Blame - drivers/scsi/scsi_error.c - kernel/msm-4.9

blob: e9c451ba71fc7e8ee28009552cc4c265cc1a16ba [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* scsi_error.c Copyright (C) 1997 Eric Youngdale
				3	*
				4	* SCSI error/timeout handling
				5	* Initial versions: Eric Youngdale. Based upon conversations with
				6	* Leonard Zubkoff and David Miller at Linux Expo,
				7	* ideas originating from all over the place.
				8	*
				9	* Restructured scsi_unjam_host and associated functions.
				10	* September 04, 2002 Mike Anderson (andmike@us.ibm.com)
				11	*
				12	* Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
				13	* minor cleanups.
				14	* September 30, 2002 Mike Anderson (andmike@us.ibm.com)
				15	*/
				16
				17	#include <linux/module.h>
				18	#include <linux/sched.h>
				19	#include <linux/timer.h>
				20	#include <linux/string.h>
				21	#include <linux/slab.h>
				22	#include <linux/kernel.h>
				23	#include <linux/interrupt.h>
				24	#include <linux/blkdev.h>
				25	#include <linux/delay.h>
				26
				27	#include <scsi/scsi.h>
				28	#include <scsi/scsi_dbg.h>
				29	#include <scsi/scsi_device.h>
				30	#include <scsi/scsi_eh.h>
				31	#include <scsi/scsi_host.h>
				32	#include <scsi/scsi_ioctl.h>
				33	#include <scsi/scsi_request.h>
				34
				35	#include "scsi_priv.h"
				36	#include "scsi_logging.h"
				37
				38	#define SENSE_TIMEOUT (10*HZ)
				39	#define START_UNIT_TIMEOUT (30*HZ)
				40
				41	/*
				42	* These should probably be handled by the host itself.
				43	* Since it is allowed to sleep, it probably should.
				44	*/
				45	#define BUS_RESET_SETTLE_TIME (10)
				46	#define HOST_RESET_SETTLE_TIME (10)
				47
				48	/* called with shost->host_lock held */
				49	void scsi_eh_wakeup(struct Scsi_Host *shost)
				50	{
				51	if (shost->host_busy == shost->host_failed) {
				52	up(shost->eh_wait);
				53	SCSI_LOG_ERROR_RECOVERY(5,
				54	printk("Waking error handler thread\n"));
				55	}
				56	}
				57
				58	/**
				59	* scsi_eh_scmd_add - add scsi cmd to error handling.
				60	* @scmd: scmd to run eh on.
				61	* @eh_flag: optional SCSI_EH flag.
				62	*
				63	* Return value:
				64	* 0 on failure.
				65	**/
				66	int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
				67	{
				68	struct Scsi_Host *shost = scmd->device->host;
				69	unsigned long flags;
				70
				71	if (shost->eh_wait == NULL)
				72	return 0;
				73
				74	spin_lock_irqsave(shost->host_lock, flags);
				75
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	76	scmd->eh_eflags \|= eh_flag;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	77	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
Mike Anderson	d330187	2005-06-16 11:12:38 -0700	[diff] [blame^]	78	scsi_host_set_state(shost, SHOST_RECOVERY);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	79	shost->host_failed++;
				80	scsi_eh_wakeup(shost);
				81	spin_unlock_irqrestore(shost->host_lock, flags);
				82	return 1;
				83	}
				84
				85	/**
				86	* scsi_add_timer - Start timeout timer for a single scsi command.
				87	* @scmd: scsi command that is about to start running.
				88	* @timeout: amount of time to allow this command to run.
				89	* @complete: timeout function to call if timer isn't canceled.
				90	*
				91	* Notes:
				92	* This should be turned into an inline function. Each scsi command
				93	* has its own timer, and as it is added to the queue, we set up the
				94	* timer. When the command completes, we cancel the timer.
				95	**/
				96	void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
				97	void (complete)(struct scsi_cmnd ))
				98	{
				99
				100	/*
				101	* If the clock was already running for this command, then
				102	* first delete the timer. The timer handling code gets rather
				103	* confused if we don't do this.
				104	*/
				105	if (scmd->eh_timeout.function)
				106	del_timer(&scmd->eh_timeout);
				107
				108	scmd->eh_timeout.data = (unsigned long)scmd;
				109	scmd->eh_timeout.expires = jiffies + timeout;
				110	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
				111
				112	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
				113	" %d, (%p)\n", __FUNCTION__,
				114	scmd, timeout, complete));
				115
				116	add_timer(&scmd->eh_timeout);
				117	}
				118	EXPORT_SYMBOL(scsi_add_timer);
				119
				120	/**
				121	* scsi_delete_timer - Delete/cancel timer for a given function.
				122	* @scmd: Cmd that we are canceling timer for
				123	*
				124	* Notes:
				125	* This should be turned into an inline function.
				126	*
				127	* Return value:
				128	* 1 if we were able to detach the timer. 0 if we blew it, and the
				129	* timer function has already started to run.
				130	**/
				131	int scsi_delete_timer(struct scsi_cmnd *scmd)
				132	{
				133	int rtn;
				134
				135	rtn = del_timer(&scmd->eh_timeout);
				136
				137	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
				138	" rtn: %d\n", __FUNCTION__,
				139	scmd, rtn));
				140
				141	scmd->eh_timeout.data = (unsigned long)NULL;
				142	scmd->eh_timeout.function = NULL;
				143
				144	return rtn;
				145	}
				146	EXPORT_SYMBOL(scsi_delete_timer);
				147
				148	/**
				149	* scsi_times_out - Timeout function for normal scsi commands.
				150	* @scmd: Cmd that is timing out.
				151	*
				152	* Notes:
				153	* We do not need to lock this. There is the potential for a race
				154	* only in that the normal completion handling might run, but if the
				155	* normal completion function determines that the timer has already
				156	* fired, then it mustn't do anything.
				157	**/
				158	void scsi_times_out(struct scsi_cmnd *scmd)
				159	{
				160	scsi_log_completion(scmd, TIMEOUT_ERROR);
				161
				162	if (scmd->device->host->hostt->eh_timed_out)
				163	switch (scmd->device->host->hostt->eh_timed_out(scmd)) {
				164	case EH_HANDLED:
				165	__scsi_done(scmd);
				166	return;
				167	case EH_RESET_TIMER:
				168	/* This allows a single retry even of a command
				169	* with allowed == 0 */
				170	if (scmd->retries++ > scmd->allowed)
				171	break;
				172	scsi_add_timer(scmd, scmd->timeout_per_command,
				173	scsi_times_out);
				174	return;
				175	case EH_NOT_HANDLED:
				176	break;
				177	}
				178
				179	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
				180	panic("Error handler thread not present at %p %p %s %d",
				181	scmd, scmd->device->host, __FILE__, __LINE__);
				182	}
				183	}
				184
				185	/**
				186	* scsi_block_when_processing_errors - Prevent cmds from being queued.
				187	* @sdev: Device on which we are performing recovery.
				188	*
				189	* Description:
				190	* We block until the host is out of error recovery, and then check to
				191	* see whether the host or the device is offline.
				192	*
				193	* Return value:
				194	* 0 when dev was taken offline by error recovery. 1 OK to proceed.
				195	**/
				196	int scsi_block_when_processing_errors(struct scsi_device *sdev)
				197	{
				198	int online;
				199
Mike Anderson	d330187	2005-06-16 11:12:38 -0700	[diff] [blame^]	200	wait_event(sdev->host->host_wait, (sdev->host->shost_state !=
				201	SHOST_RECOVERY));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	202
				203	online = scsi_device_online(sdev);
				204
				205	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
				206	online));
				207
				208	return online;
				209	}
				210	EXPORT_SYMBOL(scsi_block_when_processing_errors);
				211
				212	#ifdef CONFIG_SCSI_LOGGING
				213	/**
				214	* scsi_eh_prt_fail_stats - Log info on failures.
				215	* @shost: scsi host being recovered.
				216	* @work_q: Queue of scsi cmds to process.
				217	**/
				218	static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
				219	struct list_head *work_q)
				220	{
				221	struct scsi_cmnd *scmd;
				222	struct scsi_device *sdev;
				223	int total_failures = 0;
				224	int cmd_failed = 0;
				225	int cmd_cancel = 0;
				226	int devices_failed = 0;
				227
				228	shost_for_each_device(sdev, shost) {
				229	list_for_each_entry(scmd, work_q, eh_entry) {
				230	if (scmd->device == sdev) {
				231	++total_failures;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	232	if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	233	++cmd_cancel;
				234	else
				235	++cmd_failed;
				236	}
				237	}
				238
				239	if (cmd_cancel \|\| cmd_failed) {
				240	SCSI_LOG_ERROR_RECOVERY(3,
				241	printk("%s: %d:%d:%d:%d cmds failed: %d,"
				242	" cancel: %d\n",
				243	__FUNCTION__, shost->host_no,
				244	sdev->channel, sdev->id, sdev->lun,
				245	cmd_failed, cmd_cancel));
				246	cmd_cancel = 0;
				247	cmd_failed = 0;
				248	++devices_failed;
				249	}
				250	}
				251
				252	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
				253	" devices require eh work\n",
				254	total_failures, devices_failed));
				255	}
				256	#endif
				257
				258	/**
				259	* scsi_check_sense - Examine scsi cmd sense
				260	* @scmd: Cmd to have sense checked.
				261	*
				262	* Return value:
				263	* SUCCESS or FAILED or NEEDS_RETRY
				264	*
				265	* Notes:
				266	* When a deferred error is detected the current command has
				267	* not been executed and needs retrying.
				268	**/
				269	static int scsi_check_sense(struct scsi_cmnd *scmd)
				270	{
				271	struct scsi_sense_hdr sshdr;
				272
				273	if (! scsi_command_normalize_sense(scmd, &sshdr))
				274	return FAILED; /* no valid sense data */
				275
				276	if (scsi_sense_is_deferred(&sshdr))
				277	return NEEDS_RETRY;
				278
				279	/*
				280	* Previous logic looked for FILEMARK, EOM or ILI which are
				281	* mainly associated with tapes and returned SUCCESS.
				282	*/
				283	if (sshdr.response_code == 0x70) {
				284	/* fixed format */
				285	if (scmd->sense_buffer[2] & 0xe0)
				286	return SUCCESS;
				287	} else {
				288	/*
				289	* descriptor format: look for "stream commands sense data
				290	* descriptor" (see SSC-3). Assume single sense data
				291	* descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.
				292	*/
				293	if ((sshdr.additional_length > 3) &&
				294	(scmd->sense_buffer[8] == 0x4) &&
				295	(scmd->sense_buffer[11] & 0xe0))
				296	return SUCCESS;
				297	}
				298
				299	switch (sshdr.sense_key) {
				300	case NO_SENSE:
				301	return SUCCESS;
				302	case RECOVERED_ERROR:
				303	return /* soft_error */ SUCCESS;
				304
				305	case ABORTED_COMMAND:
				306	return NEEDS_RETRY;
				307	case NOT_READY:
				308	case UNIT_ATTENTION:
				309	/*
				310	* if we are expecting a cc/ua because of a bus reset that we
				311	* performed, treat this just as a retry. otherwise this is
				312	* information that we should pass up to the upper-level driver
				313	* so that we can deal with it there.
				314	*/
				315	if (scmd->device->expecting_cc_ua) {
				316	scmd->device->expecting_cc_ua = 0;
				317	return NEEDS_RETRY;
				318	}
				319	/*
				320	* if the device is in the process of becoming ready, we
				321	* should retry.
				322	*/
				323	if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
				324	return NEEDS_RETRY;
				325	/*
				326	* if the device is not started, we need to wake
				327	* the error handler to start the motor
				328	*/
				329	if (scmd->device->allow_restart &&
				330	(sshdr.asc == 0x04) && (sshdr.ascq == 0x02))
				331	return FAILED;
				332	return SUCCESS;
				333
				334	/* these three are not supported */
				335	case COPY_ABORTED:
				336	case VOLUME_OVERFLOW:
				337	case MISCOMPARE:
				338	return SUCCESS;
				339
				340	case MEDIUM_ERROR:
				341	return NEEDS_RETRY;
				342
				343	case HARDWARE_ERROR:
				344	if (scmd->device->retry_hwerror)
				345	return NEEDS_RETRY;
				346	else
				347	return SUCCESS;
				348
				349	case ILLEGAL_REQUEST:
				350	case BLANK_CHECK:
				351	case DATA_PROTECT:
				352	default:
				353	return SUCCESS;
				354	}
				355	}
				356
				357	/**
				358	* scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
				359	* @scmd: SCSI cmd to examine.
				360	*
				361	* Notes:
				362	* This is only called when we are examining the status of commands
				363	* queued during error recovery. the main difference here is that we
				364	* don't allow for the possibility of retries here, and we are a lot
				365	* more restrictive about what we consider acceptable.
				366	**/
				367	static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
				368	{
				369	/*
				370	* first check the host byte, to see if there is anything in there
				371	* that would indicate what we need to do.
				372	*/
				373	if (host_byte(scmd->result) == DID_RESET) {
				374	/*
				375	* rats. we are already in the error handler, so we now
				376	* get to try and figure out what to do next. if the sense
				377	* is valid, we have a pretty good idea of what to do.
				378	* if not, we mark it as FAILED.
				379	*/
				380	return scsi_check_sense(scmd);
				381	}
				382	if (host_byte(scmd->result) != DID_OK)
				383	return FAILED;
				384
				385	/*
				386	* next, check the message byte.
				387	*/
				388	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
				389	return FAILED;
				390
				391	/*
				392	* now, check the status byte to see if this indicates
				393	* anything special.
				394	*/
				395	switch (status_byte(scmd->result)) {
				396	case GOOD:
				397	case COMMAND_TERMINATED:
				398	return SUCCESS;
				399	case CHECK_CONDITION:
				400	return scsi_check_sense(scmd);
				401	case CONDITION_GOOD:
				402	case INTERMEDIATE_GOOD:
				403	case INTERMEDIATE_C_GOOD:
				404	/*
				405	* who knows? FIXME(eric)
				406	*/
				407	return SUCCESS;
				408	case BUSY:
				409	case QUEUE_FULL:
				410	case RESERVATION_CONFLICT:
				411	default:
				412	return FAILED;
				413	}
				414	return FAILED;
				415	}
				416
				417	/**
				418	* scsi_eh_times_out - timeout function for error handling.
				419	* @scmd: Cmd that is timing out.
				420	*
				421	* Notes:
				422	* During error handling, the kernel thread will be sleeping waiting
				423	* for some action to complete on the device. our only job is to
				424	* record that it timed out, and to wake up the thread.
				425	**/
				426	static void scsi_eh_times_out(struct scsi_cmnd *scmd)
				427	{
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	428	scmd->eh_eflags \|= SCSI_EH_REC_TIMEOUT;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	429	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
				430	scmd));
				431
Tejun Heo	5b8ef84	2005-05-14 00:46:18 +0900	[diff] [blame]	432	up(scmd->device->host->eh_action);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	433	}
				434
				435	/**
				436	* scsi_eh_done - Completion function for error handling.
				437	* @scmd: Cmd that is done.
				438	**/
				439	static void scsi_eh_done(struct scsi_cmnd *scmd)
				440	{
				441	/*
				442	* if the timeout handler is already running, then just set the
				443	* flag which says we finished late, and return. we have no
				444	* way of stopping the timeout handler from running, so we must
				445	* always defer to it.
				446	*/
				447	if (del_timer(&scmd->eh_timeout)) {
				448	scmd->request->rq_status = RQ_SCSI_DONE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	449
				450	SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
				451	__FUNCTION__, scmd, scmd->result));
				452
Tejun Heo	5b8ef84	2005-05-14 00:46:18 +0900	[diff] [blame]	453	up(scmd->device->host->eh_action);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	454	}
				455	}
				456
				457	/**
				458	* scsi_send_eh_cmnd - send a cmd to a device as part of error recovery.
				459	* @scmd: SCSI Cmd to send.
				460	* @timeout: Timeout for cmd.
				461	*
				462	* Notes:
				463	* The initialization of the structures is quite a bit different in
				464	* this case, and furthermore, there is a different completion handler
				465	* vs scsi_dispatch_cmd.
				466	* Return value:
				467	* SUCCESS or FAILED or NEEDS_RETRY
				468	**/
				469	static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
				470	{
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	471	struct scsi_device *sdev = scmd->device;
				472	struct Scsi_Host *shost = sdev->host;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	473	DECLARE_MUTEX_LOCKED(sem);
				474	unsigned long flags;
				475	int rtn = SUCCESS;
				476
				477	/*
				478	* we will use a queued command if possible, otherwise we will
				479	* emulate the queuing and calling of completion function ourselves.
				480	*/
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	481	if (sdev->scsi_level <= SCSI_2)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	482	scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) \|
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	483	(sdev->lun << 5 & 0xe0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	484
				485	scsi_add_timer(scmd, timeout, scsi_eh_times_out);
				486
				487	/*
				488	* set up the semaphore so we wait for the command to complete.
				489	*/
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	490	shost->eh_action = &sem;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	491	scmd->request->rq_status = RQ_SCSI_BUSY;
				492
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	493	spin_lock_irqsave(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	494	scsi_log_send(scmd);
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	495	shost->hostt->queuecommand(scmd, scsi_eh_done);
				496	spin_unlock_irqrestore(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	497
				498	down(&sem);
				499	scsi_log_completion(scmd, SUCCESS);
				500
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	501	shost->eh_action = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	502
				503	/*
				504	* see if timeout. if so, tell the host to forget about it.
				505	* in other words, we don't want a callback any more.
				506	*/
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	507	if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) {
				508	scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	509
				510	/*
				511	* as far as the low level driver is
				512	* concerned, this command is still active, so
				513	* we must give the low level driver a chance
				514	* to abort it. (db)
				515	*
				516	* FIXME(eric) - we are not tracking whether we could
				517	* abort a timed out command or not. not sure how
				518	* we should treat them differently anyways.
				519	*/
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	520	if (shost->hostt->eh_abort_handler)
				521	shost->hostt->eh_abort_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	522
				523	scmd->request->rq_status = RQ_SCSI_DONE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	524	rtn = FAILED;
				525	}
				526
				527	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
				528	__FUNCTION__, scmd, rtn));
				529
				530	/*
				531	* now examine the actual status codes to see whether the command
				532	* actually did complete normally.
				533	*/
				534	if (rtn == SUCCESS) {
				535	rtn = scsi_eh_completed_normally(scmd);
				536	SCSI_LOG_ERROR_RECOVERY(3,
				537	printk("%s: scsi_eh_completed_normally %x\n",
				538	__FUNCTION__, rtn));
				539	switch (rtn) {
				540	case SUCCESS:
				541	case NEEDS_RETRY:
				542	case FAILED:
				543	break;
				544	default:
				545	rtn = FAILED;
				546	break;
				547	}
				548	}
				549
				550	return rtn;
				551	}
				552
				553	/**
				554	* scsi_request_sense - Request sense data from a particular target.
				555	* @scmd: SCSI cmd for request sense.
				556	*
				557	* Notes:
				558	* Some hosts automatically obtain this information, others require
				559	* that we obtain it on our own. This function will not return until
				560	* the command either times out, or it completes.
				561	**/
				562	static int scsi_request_sense(struct scsi_cmnd *scmd)
				563	{
				564	static unsigned char generic_sense[6] =
				565	{REQUEST_SENSE, 0, 0, 0, 252, 0};
				566	unsigned char *scsi_result;
				567	int saved_result;
				568	int rtn;
				569
				570	memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
				571
Al Viro	bc86120	2005-04-24 12:28:34 -0700	[diff] [blame]	572	scsi_result = kmalloc(252, GFP_ATOMIC \| ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	573
				574
				575	if (unlikely(!scsi_result)) {
				576	printk(KERN_ERR "%s: cannot allocate scsi_result.\n",
				577	__FUNCTION__);
				578	return FAILED;
				579	}
				580
				581	/*
				582	* zero the sense buffer. some host adapters automatically always
				583	* request sense, so it is not a good idea that
				584	* scmd->request_buffer and scmd->sense_buffer point to the same
				585	* address (db). 0 is not a valid sense code.
				586	*/
				587	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
				588	memset(scsi_result, 0, 252);
				589
				590	saved_result = scmd->result;
				591	scmd->request_buffer = scsi_result;
				592	scmd->request_bufflen = 252;
				593	scmd->use_sg = 0;
				594	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
				595	scmd->sc_data_direction = DMA_FROM_DEVICE;
				596	scmd->underflow = 0;
				597
				598	rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
				599
				600	/* last chance to have valid sense data */
				601	if(!SCSI_SENSE_VALID(scmd)) {
				602	memcpy(scmd->sense_buffer, scmd->request_buffer,
				603	sizeof(scmd->sense_buffer));
				604	}
				605
				606	kfree(scsi_result);
				607
				608	/*
				609	* when we eventually call scsi_finish, we really wish to complete
				610	* the original request, so let's restore the original data. (db)
				611	*/
				612	scsi_setup_cmd_retry(scmd);
				613	scmd->result = saved_result;
				614	return rtn;
				615	}
				616
				617	/**
				618	* scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
				619	* @scmd: Original SCSI cmd that eh has finished.
				620	* @done_q: Queue for processed commands.
				621	*
				622	* Notes:
				623	* We don't want to use the normal command completion while we are are
				624	* still handling errors - it may cause other commands to be queued,
				625	* and that would disturb what we are doing. thus we really want to
				626	* keep a list of pending commands for final completion, and once we
				627	* are ready to leave error handling we handle completion for real.
				628	**/
				629	static void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
				630	struct list_head *done_q)
				631	{
				632	scmd->device->host->host_failed--;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	633	scmd->eh_eflags = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	634
				635	/*
				636	* set this back so that the upper level can correctly free up
				637	* things.
				638	*/
				639	scsi_setup_cmd_retry(scmd);
				640	list_move_tail(&scmd->eh_entry, done_q);
				641	}
				642
				643	/**
				644	* scsi_eh_get_sense - Get device sense data.
				645	* @work_q: Queue of commands to process.
				646	* @done_q: Queue of proccessed commands..
				647	*
				648	* Description:
				649	* See if we need to request sense information. if so, then get it
				650	* now, so we have a better idea of what to do.
				651	*
				652	* Notes:
				653	* This has the unfortunate side effect that if a shost adapter does
				654	* not automatically request sense information, that we end up shutting
				655	* it down before we request it.
				656	*
				657	* All drivers should request sense information internally these days,
				658	* so for now all I have to say is tough noogies if you end up in here.
				659	*
				660	* XXX: Long term this code should go away, but that needs an audit of
				661	* all LLDDs first.
				662	**/
				663	static int scsi_eh_get_sense(struct list_head *work_q,
				664	struct list_head *done_q)
				665	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	666	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	667	int rtn;
				668
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	669	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	670	if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) \|\|
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	671	SCSI_SENSE_VALID(scmd))
				672	continue;
				673
				674	SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
				675	" for id: %d\n",
				676	current->comm,
				677	scmd->device->id));
				678	rtn = scsi_request_sense(scmd);
				679	if (rtn != SUCCESS)
				680	continue;
				681
				682	SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
				683	" result %x\n", scmd,
				684	scmd->result));
				685	SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd));
				686
				687	rtn = scsi_decide_disposition(scmd);
				688
				689	/*
				690	* if the result was normal, then just pass it along to the
				691	* upper level.
				692	*/
				693	if (rtn == SUCCESS)
				694	/* we don't want this command reissued, just
				695	* finished with the sense data, so set
				696	* retries to the max allowed to ensure it
				697	* won't get reissued */
				698	scmd->retries = scmd->allowed;
				699	else if (rtn != NEEDS_RETRY)
				700	continue;
				701
				702	scsi_eh_finish_cmd(scmd, done_q);
				703	}
				704
				705	return list_empty(work_q);
				706	}
				707
				708	/**
				709	* scsi_try_to_abort_cmd - Ask host to abort a running command.
				710	* @scmd: SCSI cmd to abort from Lower Level.
				711	*
				712	* Notes:
				713	* This function will not return until the user's completion function
				714	* has been called. there is no timeout on this operation. if the
				715	* author of the low-level driver wishes this operation to be timed,
				716	* they can provide this facility themselves. helper functions in
				717	* scsi_error.c can be supplied to make this easier to do.
				718	**/
				719	static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
				720	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	721	if (!scmd->device->host->hostt->eh_abort_handler)
Jeff Garzik	8fa728a	2005-05-28 07:54:40 -0400	[diff] [blame]	722	return FAILED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	723
				724	/*
				725	* scsi_done was called just after the command timed out and before
				726	* we had a chance to process it. (db)
				727	*/
				728	if (scmd->serial_number == 0)
				729	return SUCCESS;
Jeff Garzik	8fa728a	2005-05-28 07:54:40 -0400	[diff] [blame]	730	return scmd->device->host->hostt->eh_abort_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	731	}
				732
				733	/**
				734	* scsi_eh_tur - Send TUR to device.
				735	* @scmd: Scsi cmd to send TUR
				736	*
				737	* Return value:
				738	* 0 - Device is ready. 1 - Device NOT ready.
				739	**/
				740	static int scsi_eh_tur(struct scsi_cmnd *scmd)
				741	{
				742	static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
				743	int retry_cnt = 1, rtn;
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	744	int saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	745
				746	retry_tur:
				747	memcpy(scmd->cmnd, tur_command, sizeof(tur_command));
				748
				749	/*
				750	* zero the sense buffer. the scsi spec mandates that any
				751	* untransferred sense data should be interpreted as being zero.
				752	*/
				753	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
				754
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	755	saved_result = scmd->result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	756	scmd->request_buffer = NULL;
				757	scmd->request_bufflen = 0;
				758	scmd->use_sg = 0;
				759	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
				760	scmd->underflow = 0;
				761	scmd->sc_data_direction = DMA_NONE;
				762
				763	rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
				764
				765	/*
				766	* when we eventually call scsi_finish, we really wish to complete
				767	* the original request, so let's restore the original data. (db)
				768	*/
				769	scsi_setup_cmd_retry(scmd);
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	770	scmd->result = saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	771
				772	/*
				773	* hey, we are done. let's look to see what happened.
				774	*/
				775	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
				776	__FUNCTION__, scmd, rtn));
				777	if (rtn == SUCCESS)
				778	return 0;
				779	else if (rtn == NEEDS_RETRY)
				780	if (retry_cnt--)
				781	goto retry_tur;
				782	return 1;
				783	}
				784
				785	/**
				786	* scsi_eh_abort_cmds - abort canceled commands.
				787	* @shost: scsi host being recovered.
				788	* @eh_done_q: list_head for processed commands.
				789	*
				790	* Decription:
				791	* Try and see whether or not it makes sense to try and abort the
				792	* running command. this only works out to be the case if we have one
				793	* command that has timed out. if the command simply failed, it makes
				794	* no sense to try and abort the command, since as far as the shost
				795	* adapter is concerned, it isn't running.
				796	**/
				797	static int scsi_eh_abort_cmds(struct list_head *work_q,
				798	struct list_head *done_q)
				799	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	800	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	801	int rtn;
				802
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	803	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	804	if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	805	continue;
				806	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
				807	"0x%p\n", current->comm,
				808	scmd));
				809	rtn = scsi_try_to_abort_cmd(scmd);
				810	if (rtn == SUCCESS) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	811	scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	812	if (!scsi_device_online(scmd->device) \|\|
				813	!scsi_eh_tur(scmd)) {
				814	scsi_eh_finish_cmd(scmd, done_q);
				815	}
				816
				817	} else
				818	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
				819	" cmd failed:"
				820	"0x%p\n",
				821	current->comm,
				822	scmd));
				823	}
				824
				825	return list_empty(work_q);
				826	}
				827
				828	/**
				829	* scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
				830	* @scmd: SCSI cmd used to send BDR
				831	*
				832	* Notes:
				833	* There is no timeout for this operation. if this operation is
				834	* unreliable for a given host, then the host itself needs to put a
				835	* timer on it, and set the host back to a consistent state prior to
				836	* returning.
				837	**/
				838	static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
				839	{
Jeff Garzik	94d0e7b8	2005-05-28 07:55:48 -0400	[diff] [blame]	840	int rtn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	841
				842	if (!scmd->device->host->hostt->eh_device_reset_handler)
Jeff Garzik	94d0e7b8	2005-05-28 07:55:48 -0400	[diff] [blame]	843	return FAILED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	844
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	845	rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	846	if (rtn == SUCCESS) {
				847	scmd->device->was_reset = 1;
				848	scmd->device->expecting_cc_ua = 1;
				849	}
				850
				851	return rtn;
				852	}
				853
				854	/**
				855	* scsi_eh_try_stu - Send START_UNIT to device.
				856	* @scmd: Scsi cmd to send START_UNIT
				857	*
				858	* Return value:
				859	* 0 - Device is ready. 1 - Device NOT ready.
				860	**/
				861	static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
				862	{
				863	static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
				864	int rtn;
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	865	int saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	866
				867	if (!scmd->device->allow_restart)
				868	return 1;
				869
				870	memcpy(scmd->cmnd, stu_command, sizeof(stu_command));
				871
				872	/*
				873	* zero the sense buffer. the scsi spec mandates that any
				874	* untransferred sense data should be interpreted as being zero.
				875	*/
				876	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
				877
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	878	saved_result = scmd->result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	879	scmd->request_buffer = NULL;
				880	scmd->request_bufflen = 0;
				881	scmd->use_sg = 0;
				882	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
				883	scmd->underflow = 0;
				884	scmd->sc_data_direction = DMA_NONE;
				885
				886	rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT);
				887
				888	/*
				889	* when we eventually call scsi_finish, we really wish to complete
				890	* the original request, so let's restore the original data. (db)
				891	*/
				892	scsi_setup_cmd_retry(scmd);
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	893	scmd->result = saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	894
				895	/*
				896	* hey, we are done. let's look to see what happened.
				897	*/
				898	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
				899	__FUNCTION__, scmd, rtn));
				900	if (rtn == SUCCESS)
				901	return 0;
				902	return 1;
				903	}
				904
				905	/**
				906	* scsi_eh_stu - send START_UNIT if needed
				907	* @shost: scsi host being recovered.
				908	* @eh_done_q: list_head for processed commands.
				909	*
				910	* Notes:
				911	* If commands are failing due to not ready, initializing command required,
				912	* try revalidating the device, which will end up sending a start unit.
				913	**/
				914	static int scsi_eh_stu(struct Scsi_Host *shost,
				915	struct list_head *work_q,
				916	struct list_head *done_q)
				917	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	918	struct scsi_cmnd scmd, stu_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	919	struct scsi_device *sdev;
				920
				921	shost_for_each_device(sdev, shost) {
				922	stu_scmd = NULL;
				923	list_for_each_entry(scmd, work_q, eh_entry)
				924	if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
				925	scsi_check_sense(scmd) == FAILED ) {
				926	stu_scmd = scmd;
				927	break;
				928	}
				929
				930	if (!stu_scmd)
				931	continue;
				932
				933	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:"
				934	" 0x%p\n", current->comm, sdev));
				935
				936	if (!scsi_eh_try_stu(stu_scmd)) {
				937	if (!scsi_device_online(sdev) \|\|
				938	!scsi_eh_tur(stu_scmd)) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	939	list_for_each_entry_safe(scmd, next,
				940	work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	941	if (scmd->device == sdev)
				942	scsi_eh_finish_cmd(scmd, done_q);
				943	}
				944	}
				945	} else {
				946	SCSI_LOG_ERROR_RECOVERY(3,
				947	printk("%s: START_UNIT failed to sdev:"
				948	" 0x%p\n", current->comm, sdev));
				949	}
				950	}
				951
				952	return list_empty(work_q);
				953	}
				954
				955
				956	/**
				957	* scsi_eh_bus_device_reset - send bdr if needed
				958	* @shost: scsi host being recovered.
				959	* @eh_done_q: list_head for processed commands.
				960	*
				961	* Notes:
				962	* Try a bus device reset. still, look to see whether we have multiple
				963	* devices that are jammed or not - if we have multiple devices, it
				964	* makes no sense to try bus_device_reset - we really would need to try
				965	* a bus_reset instead.
				966	**/
				967	static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
				968	struct list_head *work_q,
				969	struct list_head *done_q)
				970	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	971	struct scsi_cmnd scmd, bdr_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	972	struct scsi_device *sdev;
				973	int rtn;
				974
				975	shost_for_each_device(sdev, shost) {
				976	bdr_scmd = NULL;
				977	list_for_each_entry(scmd, work_q, eh_entry)
				978	if (scmd->device == sdev) {
				979	bdr_scmd = scmd;
				980	break;
				981	}
				982
				983	if (!bdr_scmd)
				984	continue;
				985
				986	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
				987	" 0x%p\n", current->comm,
				988	sdev));
				989	rtn = scsi_try_bus_device_reset(bdr_scmd);
				990	if (rtn == SUCCESS) {
				991	if (!scsi_device_online(sdev) \|\|
				992	!scsi_eh_tur(bdr_scmd)) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	993	list_for_each_entry_safe(scmd, next,
				994	work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	995	if (scmd->device == sdev)
				996	scsi_eh_finish_cmd(scmd,
				997	done_q);
				998	}
				999	}
				1000	} else {
				1001	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
				1002	" failed sdev:"
				1003	"0x%p\n",
				1004	current->comm,
				1005	sdev));
				1006	}
				1007	}
				1008
				1009	return list_empty(work_q);
				1010	}
				1011
				1012	/**
				1013	* scsi_try_bus_reset - ask host to perform a bus reset
				1014	* @scmd: SCSI cmd to send bus reset.
				1015	**/
				1016	static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
				1017	{
				1018	unsigned long flags;
				1019	int rtn;
				1020
				1021	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
				1022	__FUNCTION__));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1023
				1024	if (!scmd->device->host->hostt->eh_bus_reset_handler)
				1025	return FAILED;
				1026
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1027	rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1028
				1029	if (rtn == SUCCESS) {
				1030	if (!scmd->device->host->hostt->skip_settle_delay)
				1031	ssleep(BUS_RESET_SETTLE_TIME);
				1032	spin_lock_irqsave(scmd->device->host->host_lock, flags);
				1033	scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
				1034	spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
				1035	}
				1036
				1037	return rtn;
				1038	}
				1039
				1040	/**
				1041	* scsi_try_host_reset - ask host adapter to reset itself
				1042	* @scmd: SCSI cmd to send hsot reset.
				1043	**/
				1044	static int scsi_try_host_reset(struct scsi_cmnd *scmd)
				1045	{
				1046	unsigned long flags;
				1047	int rtn;
				1048
				1049	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
				1050	__FUNCTION__));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1051
				1052	if (!scmd->device->host->hostt->eh_host_reset_handler)
				1053	return FAILED;
				1054
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1055	rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1056
				1057	if (rtn == SUCCESS) {
				1058	if (!scmd->device->host->hostt->skip_settle_delay)
				1059	ssleep(HOST_RESET_SETTLE_TIME);
				1060	spin_lock_irqsave(scmd->device->host->host_lock, flags);
				1061	scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
				1062	spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
				1063	}
				1064
				1065	return rtn;
				1066	}
				1067
				1068	/**
				1069	* scsi_eh_bus_reset - send a bus reset
				1070	* @shost: scsi host being recovered.
				1071	* @eh_done_q: list_head for processed commands.
				1072	**/
				1073	static int scsi_eh_bus_reset(struct Scsi_Host *shost,
				1074	struct list_head *work_q,
				1075	struct list_head *done_q)
				1076	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1077	struct scsi_cmnd scmd, chan_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1078	unsigned int channel;
				1079	int rtn;
				1080
				1081	/*
				1082	* we really want to loop over the various channels, and do this on
				1083	* a channel by channel basis. we should also check to see if any
				1084	* of the failed commands are on soft_reset devices, and if so, skip
				1085	* the reset.
				1086	*/
				1087
				1088	for (channel = 0; channel <= shost->max_channel; channel++) {
				1089	chan_scmd = NULL;
				1090	list_for_each_entry(scmd, work_q, eh_entry) {
				1091	if (channel == scmd->device->channel) {
				1092	chan_scmd = scmd;
				1093	break;
				1094	/*
				1095	* FIXME add back in some support for
				1096	* soft_reset devices.
				1097	*/
				1098	}
				1099	}
				1100
				1101	if (!chan_scmd)
				1102	continue;
				1103	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
				1104	" %d\n", current->comm,
				1105	channel));
				1106	rtn = scsi_try_bus_reset(chan_scmd);
				1107	if (rtn == SUCCESS) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1108	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1109	if (channel == scmd->device->channel)
				1110	if (!scsi_device_online(scmd->device) \|\|
				1111	!scsi_eh_tur(scmd))
				1112	scsi_eh_finish_cmd(scmd,
				1113	done_q);
				1114	}
				1115	} else {
				1116	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
				1117	" failed chan: %d\n",
				1118	current->comm,
				1119	channel));
				1120	}
				1121	}
				1122	return list_empty(work_q);
				1123	}
				1124
				1125	/**
				1126	* scsi_eh_host_reset - send a host reset
				1127	* @work_q: list_head for processed commands.
				1128	* @done_q: list_head for processed commands.
				1129	**/
				1130	static int scsi_eh_host_reset(struct list_head *work_q,
				1131	struct list_head *done_q)
				1132	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1133	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1134	int rtn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1135
				1136	if (!list_empty(work_q)) {
				1137	scmd = list_entry(work_q->next,
				1138	struct scsi_cmnd, eh_entry);
				1139
				1140	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
				1141	, current->comm));
				1142
				1143	rtn = scsi_try_host_reset(scmd);
				1144	if (rtn == SUCCESS) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1145	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1146	if (!scsi_device_online(scmd->device) \|\|
				1147	(!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) \|\|
				1148	!scsi_eh_tur(scmd))
				1149	scsi_eh_finish_cmd(scmd, done_q);
				1150	}
				1151	} else {
				1152	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
				1153	" failed\n",
				1154	current->comm));
				1155	}
				1156	}
				1157	return list_empty(work_q);
				1158	}
				1159
				1160	/**
				1161	* scsi_eh_offline_sdevs - offline scsi devices that fail to recover
				1162	* @work_q: list_head for processed commands.
				1163	* @done_q: list_head for processed commands.
				1164	*
				1165	**/
				1166	static void scsi_eh_offline_sdevs(struct list_head *work_q,
				1167	struct list_head *done_q)
				1168	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1169	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1170
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1171	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1172	printk(KERN_INFO "scsi: Device offlined - not"
				1173	" ready after error recovery: host"
				1174	" %d channel %d id %d lun %d\n",
				1175	scmd->device->host->host_no,
				1176	scmd->device->channel,
				1177	scmd->device->id,
				1178	scmd->device->lun);
				1179	scsi_device_set_state(scmd->device, SDEV_OFFLINE);
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	1180	if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1181	/*
				1182	* FIXME: Handle lost cmds.
				1183	*/
				1184	}
				1185	scsi_eh_finish_cmd(scmd, done_q);
				1186	}
				1187	return;
				1188	}
				1189
				1190	/**
				1191	* scsi_decide_disposition - Disposition a cmd on return from LLD.
				1192	* @scmd: SCSI cmd to examine.
				1193	*
				1194	* Notes:
				1195	* This is only called when we are examining the status after sending
				1196	* out the actual data command. any commands that are queued for error
				1197	* recovery (e.g. test_unit_ready) do not come through here.
				1198	*
				1199	* When this routine returns failed, it means the error handler thread
				1200	* is woken. In cases where the error code indicates an error that
				1201	* doesn't require the error handler read (i.e. we don't need to
				1202	* abort/reset), this function should return SUCCESS.
				1203	**/
				1204	int scsi_decide_disposition(struct scsi_cmnd *scmd)
				1205	{
				1206	int rtn;
				1207
				1208	/*
				1209	* if the device is offline, then we clearly just pass the result back
				1210	* up to the top level.
				1211	*/
				1212	if (!scsi_device_online(scmd->device)) {
				1213	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
				1214	" as SUCCESS\n",
				1215	__FUNCTION__));
				1216	return SUCCESS;
				1217	}
				1218
				1219	/*
				1220	* first check the host byte, to see if there is anything in there
				1221	* that would indicate what we need to do.
				1222	*/
				1223	switch (host_byte(scmd->result)) {
				1224	case DID_PASSTHROUGH:
				1225	/*
				1226	* no matter what, pass this through to the upper layer.
				1227	* nuke this special code so that it looks like we are saying
				1228	* did_ok.
				1229	*/
				1230	scmd->result &= 0xff00ffff;
				1231	return SUCCESS;
				1232	case DID_OK:
				1233	/*
				1234	* looks good. drop through, and check the next byte.
				1235	*/
				1236	break;
				1237	case DID_NO_CONNECT:
				1238	case DID_BAD_TARGET:
				1239	case DID_ABORT:
				1240	/*
				1241	* note - this means that we just report the status back
				1242	* to the top level driver, not that we actually think
				1243	* that it indicates SUCCESS.
				1244	*/
				1245	return SUCCESS;
				1246	/*
				1247	* when the low level driver returns did_soft_error,
				1248	* it is responsible for keeping an internal retry counter
				1249	* in order to avoid endless loops (db)
				1250	*
				1251	* actually this is a bug in this function here. we should
				1252	* be mindful of the maximum number of retries specified
				1253	* and not get stuck in a loop.
				1254	*/
				1255	case DID_SOFT_ERROR:
				1256	goto maybe_retry;
				1257	case DID_IMM_RETRY:
				1258	return NEEDS_RETRY;
				1259
	bf34191	2005-04-12 17:49:09 -0500	[diff] [blame]	1260	case DID_REQUEUE:
				1261	return ADD_TO_MLQUEUE;
				1262
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1263	case DID_ERROR:
				1264	if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
				1265	status_byte(scmd->result) == RESERVATION_CONFLICT)
				1266	/*
				1267	* execute reservation conflict processing code
				1268	* lower down
				1269	*/
				1270	break;
				1271	/* fallthrough */
				1272
				1273	case DID_BUS_BUSY:
				1274	case DID_PARITY:
				1275	goto maybe_retry;
				1276	case DID_TIME_OUT:
				1277	/*
				1278	* when we scan the bus, we get timeout messages for
				1279	* these commands if there is no device available.
				1280	* other hosts report did_no_connect for the same thing.
				1281	*/
				1282	if ((scmd->cmnd[0] == TEST_UNIT_READY \|\|
				1283	scmd->cmnd[0] == INQUIRY)) {
				1284	return SUCCESS;
				1285	} else {
				1286	return FAILED;
				1287	}
				1288	case DID_RESET:
				1289	return SUCCESS;
				1290	default:
				1291	return FAILED;
				1292	}
				1293
				1294	/*
				1295	* next, check the message byte.
				1296	*/
				1297	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
				1298	return FAILED;
				1299
				1300	/*
				1301	* check the status byte to see if this indicates anything special.
				1302	*/
				1303	switch (status_byte(scmd->result)) {
				1304	case QUEUE_FULL:
				1305	/*
				1306	* the case of trying to send too many commands to a
				1307	* tagged queueing device.
				1308	*/
				1309	case BUSY:
				1310	/*
				1311	* device can't talk to us at the moment. Should only
				1312	* occur (SAM-3) when the task queue is empty, so will cause
				1313	* the empty queue handling to trigger a stall in the
				1314	* device.
				1315	*/
				1316	return ADD_TO_MLQUEUE;
				1317	case GOOD:
				1318	case COMMAND_TERMINATED:
				1319	case TASK_ABORTED:
				1320	return SUCCESS;
				1321	case CHECK_CONDITION:
				1322	rtn = scsi_check_sense(scmd);
				1323	if (rtn == NEEDS_RETRY)
				1324	goto maybe_retry;
				1325	/* if rtn == FAILED, we have no sense information;
				1326	* returning FAILED will wake the error handler thread
				1327	* to collect the sense and redo the decide
				1328	* disposition */
				1329	return rtn;
				1330	case CONDITION_GOOD:
				1331	case INTERMEDIATE_GOOD:
				1332	case INTERMEDIATE_C_GOOD:
				1333	case ACA_ACTIVE:
				1334	/*
				1335	* who knows? FIXME(eric)
				1336	*/
				1337	return SUCCESS;
				1338
				1339	case RESERVATION_CONFLICT:
				1340	printk(KERN_INFO "scsi: reservation conflict: host"
				1341	" %d channel %d id %d lun %d\n",
				1342	scmd->device->host->host_no, scmd->device->channel,
				1343	scmd->device->id, scmd->device->lun);
				1344	return SUCCESS; /* causes immediate i/o error */
				1345	default:
				1346	return FAILED;
				1347	}
				1348	return FAILED;
				1349
				1350	maybe_retry:
				1351
				1352	/* we requeue for retry because the error was retryable, and
				1353	* the request was not marked fast fail. Note that above,
				1354	* even if the request is marked fast fail, we still requeue
				1355	* for queue congestion conditions (QUEUE_FULL or BUSY) */
				1356	if ((++scmd->retries) < scmd->allowed
				1357	&& !blk_noretry_request(scmd->request)) {
				1358	return NEEDS_RETRY;
				1359	} else {
				1360	/*
				1361	* no more retries - report this one back to upper level.
				1362	*/
				1363	return SUCCESS;
				1364	}
				1365	}
				1366
				1367	/**
				1368	* scsi_eh_lock_done - done function for eh door lock request
				1369	* @scmd: SCSI command block for the door lock request
				1370	*
				1371	* Notes:
				1372	* We completed the asynchronous door lock request, and it has either
				1373	* locked the door or failed. We must free the command structures
				1374	* associated with this request.
				1375	**/
				1376	static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
				1377	{
				1378	struct scsi_request *sreq = scmd->sc_request;
				1379
				1380	scsi_release_request(sreq);
				1381	}
				1382
				1383
				1384	/**
				1385	* scsi_eh_lock_door - Prevent medium removal for the specified device
				1386	* @sdev: SCSI device to prevent medium removal
				1387	*
				1388	* Locking:
				1389	* We must be called from process context; scsi_allocate_request()
				1390	* may sleep.
				1391	*
				1392	* Notes:
				1393	* We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
				1394	* head of the devices request queue, and continue.
				1395	*
				1396	* Bugs:
				1397	* scsi_allocate_request() may sleep waiting for existing requests to
				1398	* be processed. However, since we haven't kicked off any request
				1399	* processing for this host, this may deadlock.
				1400	*
				1401	* If scsi_allocate_request() fails for what ever reason, we
				1402	* completely forget to lock the door.
				1403	**/
				1404	static void scsi_eh_lock_door(struct scsi_device *sdev)
				1405	{
				1406	struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
				1407
				1408	if (unlikely(!sreq)) {
				1409	printk(KERN_ERR "%s: request allocate failed,"
				1410	"prevent media removal cmd not sent\n", __FUNCTION__);
				1411	return;
				1412	}
				1413
				1414	sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;
				1415	sreq->sr_cmnd[1] = 0;
				1416	sreq->sr_cmnd[2] = 0;
				1417	sreq->sr_cmnd[3] = 0;
				1418	sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;
				1419	sreq->sr_cmnd[5] = 0;
				1420	sreq->sr_data_direction = DMA_NONE;
				1421	sreq->sr_bufflen = 0;
				1422	sreq->sr_buffer = NULL;
				1423	sreq->sr_allowed = 5;
				1424	sreq->sr_done = scsi_eh_lock_done;
				1425	sreq->sr_timeout_per_command = 10 * HZ;
				1426	sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
				1427
				1428	scsi_insert_special_req(sreq, 1);
				1429	}
				1430
				1431
				1432	/**
				1433	* scsi_restart_operations - restart io operations to the specified host.
				1434	* @shost: Host we are restarting.
				1435	*
				1436	* Notes:
				1437	* When we entered the error handler, we blocked all further i/o to
				1438	* this device. we need to 'reverse' this process.
				1439	**/
				1440	static void scsi_restart_operations(struct Scsi_Host *shost)
				1441	{
				1442	struct scsi_device *sdev;
				1443
				1444	/*
				1445	* If the door was locked, we need to insert a door lock request
				1446	* onto the head of the SCSI request queue for the device. There
				1447	* is no point trying to lock the door of an off-line device.
				1448	*/
				1449	shost_for_each_device(sdev, shost) {
				1450	if (scsi_device_online(sdev) && sdev->locked)
				1451	scsi_eh_lock_door(sdev);
				1452	}
				1453
				1454	/*
				1455	* next free up anything directly waiting upon the host. this
				1456	* will be requests for character device operations, and also for
				1457	* ioctls to queued block devices.
				1458	*/
				1459	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
				1460	__FUNCTION__));
				1461
Mike Anderson	d330187	2005-06-16 11:12:38 -0700	[diff] [blame^]	1462	scsi_host_set_state(shost, SHOST_RUNNING);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1463
				1464	wake_up(&shost->host_wait);
				1465
				1466	/*
				1467	* finally we need to re-initiate requests that may be pending. we will
				1468	* have had everything blocked while error handling is taking place, and
				1469	* now that error recovery is done, we will need to ensure that these
				1470	* requests are started.
				1471	*/
				1472	scsi_run_host_queues(shost);
				1473	}
				1474
				1475	/**
				1476	* scsi_eh_ready_devs - check device ready state and recover if not.
				1477	* @shost: host to be recovered.
				1478	* @eh_done_q: list_head for processed commands.
				1479	*
				1480	**/
				1481	static void scsi_eh_ready_devs(struct Scsi_Host *shost,
				1482	struct list_head *work_q,
				1483	struct list_head *done_q)
				1484	{
				1485	if (!scsi_eh_stu(shost, work_q, done_q))
				1486	if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
				1487	if (!scsi_eh_bus_reset(shost, work_q, done_q))
				1488	if (!scsi_eh_host_reset(work_q, done_q))
				1489	scsi_eh_offline_sdevs(work_q, done_q);
				1490	}
				1491
				1492	/**
				1493	* scsi_eh_flush_done_q - finish processed commands or retry them.
				1494	* @done_q: list_head of processed commands.
				1495	*
				1496	**/
				1497	static void scsi_eh_flush_done_q(struct list_head *done_q)
				1498	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1499	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1500
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1501	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
				1502	list_del_init(&scmd->eh_entry);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1503	if (scsi_device_online(scmd->device) &&
				1504	!blk_noretry_request(scmd->request) &&
				1505	(++scmd->retries < scmd->allowed)) {
				1506	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
				1507	" retry cmd: %p\n",
				1508	current->comm,
				1509	scmd));
				1510	scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
				1511	} else {
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	1512	/*
				1513	* If just we got sense for the device (called
				1514	* scsi_eh_get_sense), scmd->result is already
				1515	* set, do not set DRIVER_TIMEOUT.
				1516	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1517	if (!scmd->result)
				1518	scmd->result \|= (DRIVER_TIMEOUT << 24);
				1519	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
				1520	" cmd: %p\n",
				1521	current->comm, scmd));
				1522	scsi_finish_command(scmd);
				1523	}
				1524	}
				1525	}
				1526
				1527	/**
				1528	* scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
				1529	* @shost: Host to unjam.
				1530	*
				1531	* Notes:
				1532	* When we come in here, we know that all commands on the bus have
				1533	* either completed, failed or timed out. we also know that no further
				1534	* commands are being sent to the host, so things are relatively quiet
				1535	* and we have freedom to fiddle with things as we wish.
				1536	*
				1537	* This is only the default implementation. it is possible for
				1538	* individual drivers to supply their own version of this function, and
				1539	* if the maintainer wishes to do this, it is strongly suggested that
				1540	* this function be taken as a template and modified. this function
				1541	* was designed to correctly handle problems for about 95% of the
				1542	* different cases out there, and it should always provide at least a
				1543	* reasonable amount of error recovery.
				1544	*
				1545	* Any command marked 'failed' or 'timeout' must eventually have
				1546	* scsi_finish_cmd() called for it. we do all of the retry stuff
				1547	* here, so when we restart the host after we return it should have an
				1548	* empty queue.
				1549	**/
				1550	static void scsi_unjam_host(struct Scsi_Host *shost)
				1551	{
				1552	unsigned long flags;
				1553	LIST_HEAD(eh_work_q);
				1554	LIST_HEAD(eh_done_q);
				1555
				1556	spin_lock_irqsave(shost->host_lock, flags);
				1557	list_splice_init(&shost->eh_cmd_q, &eh_work_q);
				1558	spin_unlock_irqrestore(shost->host_lock, flags);
				1559
				1560	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
				1561
				1562	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
				1563	if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
				1564	scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
				1565
				1566	scsi_eh_flush_done_q(&eh_done_q);
				1567	}
				1568
				1569	/**
				1570	* scsi_error_handler - Handle errors/timeouts of SCSI cmds.
				1571	* @data: Host for which we are running.
				1572	*
				1573	* Notes:
				1574	* This is always run in the context of a kernel thread. The idea is
				1575	* that we start this thing up when the kernel starts up (one per host
				1576	* that we detect), and it immediately goes to sleep and waits for some
				1577	* event (i.e. failure). When this takes place, we have the job of
				1578	* trying to unjam the bus and restarting things.
				1579	**/
				1580	int scsi_error_handler(void *data)
				1581	{
				1582	struct Scsi_Host shost = (struct Scsi_Host ) data;
				1583	int rtn;
				1584	DECLARE_MUTEX_LOCKED(sem);
				1585
				1586	/*
				1587	* Flush resources
				1588	*/
				1589
				1590	daemonize("scsi_eh_%d", shost->host_no);
				1591
				1592	current->flags \|= PF_NOFREEZE;
				1593
				1594	shost->eh_wait = &sem;
				1595	shost->ehandler = current;
				1596
				1597	/*
				1598	* Wake up the thread that created us.
				1599	*/
				1600	SCSI_LOG_ERROR_RECOVERY(3, printk("Wake up parent of"
				1601	" scsi_eh_%d\n",shost->host_no));
				1602
				1603	complete(shost->eh_notify);
				1604
				1605	while (1) {
				1606	/*
				1607	* If we get a signal, it means we are supposed to go
				1608	* away and die. This typically happens if the user is
				1609	* trying to unload a module.
				1610	*/
				1611	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
				1612	" scsi_eh_%d"
				1613	" sleeping\n",shost->host_no));
				1614
				1615	/*
				1616	* Note - we always use down_interruptible with the semaphore
				1617	* even if the module was loaded as part of the kernel. The
				1618	* reason is that down() will cause this thread to be counted
				1619	* in the load average as a running process, and down
				1620	* interruptible doesn't. Given that we need to allow this
				1621	* thread to die if the driver was loaded as a module, using
				1622	* semaphores isn't unreasonable.
				1623	*/
				1624	down_interruptible(&sem);
				1625	if (shost->eh_kill)
				1626	break;
				1627
				1628	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
				1629	" scsi_eh_%d waking"
				1630	" up\n",shost->host_no));
				1631
				1632	shost->eh_active = 1;
				1633
				1634	/*
				1635	* We have a host that is failing for some reason. Figure out
				1636	* what we need to do to get it up and online again (if we can).
				1637	* If we fail, we end up taking the thing offline.
				1638	*/
				1639	if (shost->hostt->eh_strategy_handler)
				1640	rtn = shost->hostt->eh_strategy_handler(shost);
				1641	else
				1642	scsi_unjam_host(shost);
				1643
				1644	shost->eh_active = 0;
				1645
				1646	/*
				1647	* Note - if the above fails completely, the action is to take
				1648	* individual devices offline and flush the queue of any
				1649	* outstanding requests that may have been pending. When we
				1650	* restart, we restart any I/O to any other devices on the bus
				1651	* which are still online.
				1652	*/
				1653	scsi_restart_operations(shost);
				1654
				1655	}
				1656
				1657	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
				1658	" exiting\n",shost->host_no));
				1659
				1660	/*
				1661	* Make sure that nobody tries to wake us up again.
				1662	*/
				1663	shost->eh_wait = NULL;
				1664
				1665	/*
				1666	* Knock this down too. From this point on, the host is flying
				1667	* without a pilot. If this is because the module is being unloaded,
				1668	* that's fine. If the user sent a signal to this thing, we are
				1669	* potentially in real danger.
				1670	*/
				1671	shost->eh_active = 0;
				1672	shost->ehandler = NULL;
				1673
				1674	/*
				1675	* If anyone is waiting for us to exit (i.e. someone trying to unload
				1676	* a driver), then wake up that process to let them know we are on
				1677	* the way out the door.
				1678	*/
				1679	complete_and_exit(shost->eh_notify, 0);
				1680	return 0;
				1681	}
				1682
				1683	/*
				1684	* Function: scsi_report_bus_reset()
				1685	*
				1686	* Purpose: Utility function used by low-level drivers to report that
				1687	* they have observed a bus reset on the bus being handled.
				1688	*
				1689	* Arguments: shost - Host in question
				1690	* channel - channel on which reset was observed.
				1691	*
				1692	* Returns: Nothing
				1693	*
				1694	* Lock status: Host lock must be held.
				1695	*
				1696	* Notes: This only needs to be called if the reset is one which
				1697	* originates from an unknown location. Resets originated
				1698	* by the mid-level itself don't need to call this, but there
				1699	* should be no harm.
				1700	*
				1701	* The main purpose of this is to make sure that a CHECK_CONDITION
				1702	* is properly treated.
				1703	*/
				1704	void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
				1705	{
				1706	struct scsi_device *sdev;
				1707
				1708	__shost_for_each_device(sdev, shost) {
				1709	if (channel == sdev->channel) {
				1710	sdev->was_reset = 1;
				1711	sdev->expecting_cc_ua = 1;
				1712	}
				1713	}
				1714	}
				1715	EXPORT_SYMBOL(scsi_report_bus_reset);
				1716
				1717	/*
				1718	* Function: scsi_report_device_reset()
				1719	*
				1720	* Purpose: Utility function used by low-level drivers to report that
				1721	* they have observed a device reset on the device being handled.
				1722	*
				1723	* Arguments: shost - Host in question
				1724	* channel - channel on which reset was observed
				1725	* target - target on which reset was observed
				1726	*
				1727	* Returns: Nothing
				1728	*
				1729	* Lock status: Host lock must be held
				1730	*
				1731	* Notes: This only needs to be called if the reset is one which
				1732	* originates from an unknown location. Resets originated
				1733	* by the mid-level itself don't need to call this, but there
				1734	* should be no harm.
				1735	*
				1736	* The main purpose of this is to make sure that a CHECK_CONDITION
				1737	* is properly treated.
				1738	*/
				1739	void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
				1740	{
				1741	struct scsi_device *sdev;
				1742
				1743	__shost_for_each_device(sdev, shost) {
				1744	if (channel == sdev->channel &&
				1745	target == sdev->id) {
				1746	sdev->was_reset = 1;
				1747	sdev->expecting_cc_ua = 1;
				1748	}
				1749	}
				1750	}
				1751	EXPORT_SYMBOL(scsi_report_device_reset);
				1752
				1753	static void
				1754	scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
				1755	{
				1756	}
				1757
				1758	/*
				1759	* Function: scsi_reset_provider
				1760	*
				1761	* Purpose: Send requested reset to a bus or device at any phase.
				1762	*
				1763	* Arguments: device - device to send reset to
				1764	* flag - reset type (see scsi.h)
				1765	*
				1766	* Returns: SUCCESS/FAILURE.
				1767	*
				1768	* Notes: This is used by the SCSI Generic driver to provide
				1769	* Bus/Device reset capability.
				1770	*/
				1771	int
				1772	scsi_reset_provider(struct scsi_device *dev, int flag)
				1773	{
				1774	struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
				1775	struct request req;
				1776	int rtn;
				1777
				1778	scmd->request = &req;
				1779	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
				1780	scmd->request->rq_status = RQ_SCSI_BUSY;
Christoph Hellwig	b4edcbc	2005-06-19 13:40:52 +0200	[diff] [blame]	1781
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1782	memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
				1783
				1784	scmd->scsi_done = scsi_reset_provider_done_command;
				1785	scmd->done = NULL;
				1786	scmd->buffer = NULL;
				1787	scmd->bufflen = 0;
				1788	scmd->request_buffer = NULL;
				1789	scmd->request_bufflen = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1790
				1791	scmd->cmd_len = 0;
				1792
				1793	scmd->sc_data_direction = DMA_BIDIRECTIONAL;
				1794	scmd->sc_request = NULL;
				1795	scmd->sc_magic = SCSI_CMND_MAGIC;
				1796
				1797	init_timer(&scmd->eh_timeout);
				1798
				1799	/*
				1800	* Sometimes the command can get back into the timer chain,
				1801	* so use the pid as an identifier.
				1802	*/
				1803	scmd->pid = 0;
				1804
				1805	switch (flag) {
				1806	case SCSI_TRY_RESET_DEVICE:
				1807	rtn = scsi_try_bus_device_reset(scmd);
				1808	if (rtn == SUCCESS)
				1809	break;
				1810	/* FALLTHROUGH */
				1811	case SCSI_TRY_RESET_BUS:
				1812	rtn = scsi_try_bus_reset(scmd);
				1813	if (rtn == SUCCESS)
				1814	break;
				1815	/* FALLTHROUGH */
				1816	case SCSI_TRY_RESET_HOST:
				1817	rtn = scsi_try_host_reset(scmd);
				1818	break;
				1819	default:
				1820	rtn = FAILED;
				1821	}
				1822
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1823	scsi_next_command(scmd);
				1824	return rtn;
				1825	}
				1826	EXPORT_SYMBOL(scsi_reset_provider);
				1827
				1828	/**
				1829	* scsi_normalize_sense - normalize main elements from either fixed or
				1830	* descriptor sense data format into a common format.
				1831	*
				1832	* @sense_buffer: byte array containing sense data returned by device
				1833	* @sb_len: number of valid bytes in sense_buffer
				1834	* @sshdr: pointer to instance of structure that common
				1835	* elements are written to.
				1836	*
				1837	* Notes:
				1838	* The "main elements" from sense data are: response_code, sense_key,
				1839	* asc, ascq and additional_length (only for descriptor format).
				1840	*
				1841	* Typically this function can be called after a device has
				1842	* responded to a SCSI command with the CHECK_CONDITION status.
				1843	*
				1844	* Return value:
				1845	* 1 if valid sense data information found, else 0;
				1846	**/
				1847	int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
				1848	struct scsi_sense_hdr *sshdr)
				1849	{
				1850	if (!sense_buffer \|\| !sb_len \|\| (sense_buffer[0] & 0x70) != 0x70)
				1851	return 0;
				1852
				1853	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
				1854
				1855	sshdr->response_code = (sense_buffer[0] & 0x7f);
				1856	if (sshdr->response_code >= 0x72) {
				1857	/*
				1858	* descriptor format
				1859	*/
				1860	if (sb_len > 1)
				1861	sshdr->sense_key = (sense_buffer[1] & 0xf);
				1862	if (sb_len > 2)
				1863	sshdr->asc = sense_buffer[2];
				1864	if (sb_len > 3)
				1865	sshdr->ascq = sense_buffer[3];
				1866	if (sb_len > 7)
				1867	sshdr->additional_length = sense_buffer[7];
				1868	} else {
				1869	/*
				1870	* fixed format
				1871	*/
				1872	if (sb_len > 2)
				1873	sshdr->sense_key = (sense_buffer[2] & 0xf);
				1874	if (sb_len > 7) {
				1875	sb_len = (sb_len < (sense_buffer[7] + 8)) ?
				1876	sb_len : (sense_buffer[7] + 8);
				1877	if (sb_len > 12)
				1878	sshdr->asc = sense_buffer[12];
				1879	if (sb_len > 13)
				1880	sshdr->ascq = sense_buffer[13];
				1881	}
				1882	}
				1883
				1884	return 1;
				1885	}
				1886	EXPORT_SYMBOL(scsi_normalize_sense);
				1887
				1888	int scsi_request_normalize_sense(struct scsi_request *sreq,
				1889	struct scsi_sense_hdr *sshdr)
				1890	{
				1891	return scsi_normalize_sense(sreq->sr_sense_buffer,
				1892	sizeof(sreq->sr_sense_buffer), sshdr);
				1893	}
				1894	EXPORT_SYMBOL(scsi_request_normalize_sense);
				1895
				1896	int scsi_command_normalize_sense(struct scsi_cmnd *cmd,
				1897	struct scsi_sense_hdr *sshdr)
				1898	{
				1899	return scsi_normalize_sense(cmd->sense_buffer,
				1900	sizeof(cmd->sense_buffer), sshdr);
				1901	}
				1902	EXPORT_SYMBOL(scsi_command_normalize_sense);
				1903
				1904	/**
				1905	* scsi_sense_desc_find - search for a given descriptor type in
				1906	* descriptor sense data format.
				1907	*
				1908	* @sense_buffer: byte array of descriptor format sense data
				1909	* @sb_len: number of valid bytes in sense_buffer
				1910	* @desc_type: value of descriptor type to find
				1911	* (e.g. 0 -> information)
				1912	*
				1913	* Notes:
				1914	* only valid when sense data is in descriptor format
				1915	*
				1916	* Return value:
				1917	* pointer to start of (first) descriptor if found else NULL
				1918	**/
				1919	const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
				1920	int desc_type)
				1921	{
				1922	int add_sen_len, add_len, desc_len, k;
				1923	const u8 * descp;
				1924
				1925	if ((sb_len < 8) \|\| (0 == (add_sen_len = sense_buffer[7])))
				1926	return NULL;
				1927	if ((sense_buffer[0] < 0x72) \|\| (sense_buffer[0] > 0x73))
				1928	return NULL;
				1929	add_sen_len = (add_sen_len < (sb_len - 8)) ?
				1930	add_sen_len : (sb_len - 8);
				1931	descp = &sense_buffer[8];
				1932	for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
				1933	descp += desc_len;
				1934	add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
				1935	desc_len = add_len + 2;
				1936	if (descp[0] == desc_type)
				1937	return descp;
				1938	if (add_len < 0) // short descriptor ??
				1939	break;
				1940	}
				1941	return NULL;
				1942	}
				1943	EXPORT_SYMBOL(scsi_sense_desc_find);
				1944
				1945	/**
				1946	* scsi_get_sense_info_fld - attempts to get information field from
				1947	* sense data (either fixed or descriptor format)
				1948	*
				1949	* @sense_buffer: byte array of sense data
				1950	* @sb_len: number of valid bytes in sense_buffer
				1951	* @info_out: pointer to 64 integer where 8 or 4 byte information
				1952	* field will be placed if found.
				1953	*
				1954	* Return value:
				1955	* 1 if information field found, 0 if not found.
				1956	**/
				1957	int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
				1958	u64 * info_out)
				1959	{
				1960	int j;
				1961	const u8 * ucp;
				1962	u64 ull;
				1963
				1964	if (sb_len < 7)
				1965	return 0;
				1966	switch (sense_buffer[0] & 0x7f) {
				1967	case 0x70:
				1968	case 0x71:
				1969	if (sense_buffer[0] & 0x80) {
				1970	*info_out = (sense_buffer[3] << 24) +
				1971	(sense_buffer[4] << 16) +
				1972	(sense_buffer[5] << 8) + sense_buffer[6];
				1973	return 1;
				1974	} else
				1975	return 0;
				1976	case 0x72:
				1977	case 0x73:
				1978	ucp = scsi_sense_desc_find(sense_buffer, sb_len,
				1979	0 /* info desc */);
				1980	if (ucp && (0xa == ucp[1])) {
				1981	ull = 0;
				1982	for (j = 0; j < 8; ++j) {
				1983	if (j > 0)
				1984	ull <<= 8;
				1985	ull \|= ucp[4 + j];
				1986	}
				1987	*info_out = ull;
				1988	return 1;
				1989	} else
				1990	return 0;
				1991	default:
				1992	return 0;
				1993	}
				1994	}
				1995	EXPORT_SYMBOL(scsi_get_sense_info_fld);