[SCSI] aacraid: add user initiated reset

Add the ability for an application to issue a hardware reset to the
adapter via sysfs. Typical uses include restarting the adapter after it
has been flashed. Bumped revision number for the driver and added a
feature to periodically check the adapter's health (check_interval),
update the adapter's concept of time (update_interval) and block
checking/resetting of the adapter (check_reset).

Signed-off-by: Mark Salyzyn <aacraid@adaptec.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c
index ef11c18..b3081b1 100644
--- a/drivers/scsi/aacraid/aachba.c
+++ b/drivers/scsi/aacraid/aachba.c
@@ -169,6 +169,18 @@
 module_param(acbsize, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(acbsize, "Request a specific adapter control block (FIB) size. Valid values are 512, 2048, 4096 and 8192. Default is to use suggestion from Firmware.");
 
+int update_interval = 30 * 60;
+module_param(update_interval, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(update_interval, "Interval in seconds between time sync updates issued to adapter.");
+
+int check_interval = 24 * 60 * 60;
+module_param(check_interval, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(check_interval, "Interval in seconds between adapter health checks.");
+
+int check_reset = 1;
+module_param(check_reset, int, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(check_reset, "If adapter fails health check, reset the adapter.");
+
 int expose_physicals = -1;
 module_param(expose_physicals, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(expose_physicals, "Expose physical components of the arrays. -1=protect 0=off, 1=on");
@@ -1197,6 +1209,12 @@
 			  (int)sizeof(dev->supplement_adapter_info.VpdInfo.Tsid),
 			  dev->supplement_adapter_info.VpdInfo.Tsid);
 		}
+		if (!check_reset ||
+		  (dev->supplement_adapter_info.SupportedOptions2 &
+		  le32_to_cpu(AAC_OPTION_IGNORE_RESET))) {
+			printk(KERN_INFO "%s%d: Reset Adapter Ignored\n",
+			  dev->name, dev->id);
+		}
 	}
 
 	dev->nondasd_support = 0;
diff --git a/drivers/scsi/aacraid/aacraid.h b/drivers/scsi/aacraid/aacraid.h
index fdbedb1..8abe4f9 100644
--- a/drivers/scsi/aacraid/aacraid.h
+++ b/drivers/scsi/aacraid/aacraid.h
@@ -12,8 +12,8 @@
  *----------------------------------------------------------------------------*/
 
 #ifndef AAC_DRIVER_BUILD
-# define AAC_DRIVER_BUILD 2437
-# define AAC_DRIVER_BRANCH "-mh4"
+# define AAC_DRIVER_BUILD 2447
+# define AAC_DRIVER_BRANCH "-ms"
 #endif
 #define MAXIMUM_NUM_CONTAINERS	32
 
@@ -860,10 +860,12 @@
 	__le32	FlashFirmwareBootBuild;
 	u8	MfgPcbaSerialNo[12];
 	u8	MfgWWNName[8];
-	__le32	MoreFeatureBits;
+	__le32	SupportedOptions2;
 	__le32	ReservedGrowth[1];
 };
 #define AAC_FEATURE_FALCON	0x00000010
+#define AAC_OPTION_MU_RESET	0x00000001
+#define AAC_OPTION_IGNORE_RESET	0x00000002
 #define AAC_SIS_VERSION_V3	3
 #define AAC_SIS_SLOT_UNKNOWN	0xFF
 
@@ -1260,6 +1262,19 @@
 	u8		data[16];
 };
 
+#define CT_PAUSE_IO    65
+#define CT_RELEASE_IO  66
+struct aac_pause {
+	__le32		command;	/* VM_ContainerConfig */
+	__le32		type;		/* CT_PAUSE_IO */
+	__le32		timeout;	/* 10ms ticks */
+	__le32		min;
+	__le32		noRescan;
+	__le32		parm3;
+	__le32		parm4;
+	__le32		count;	/* sizeof(((struct aac_pause_reply *)NULL)->data) */
+};
+
 struct aac_srb
 {
 	__le32		function;
@@ -1816,6 +1831,7 @@
 unsigned int aac_response_normal(struct aac_queue * q);
 unsigned int aac_command_normal(struct aac_queue * q);
 unsigned int aac_intr_normal(struct aac_dev * dev, u32 Index);
+int aac_reset_adapter(struct aac_dev * dev, int forced);
 int aac_check_health(struct aac_dev * dev);
 int aac_command_thread(void *data);
 int aac_close_fib_context(struct aac_dev * dev, struct aac_fib_context *fibctx);
@@ -1835,3 +1851,6 @@
 extern int expose_physicals;
 extern int aac_reset_devices;
 extern int aac_commit;
+extern int update_interval;
+extern int check_interval;
+extern int check_reset;
diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c
index 9aca57e..d510839 100644
--- a/drivers/scsi/aacraid/commsup.c
+++ b/drivers/scsi/aacraid/commsup.c
@@ -1021,7 +1021,7 @@
 
 }
 
-static int _aac_reset_adapter(struct aac_dev *aac)
+static int _aac_reset_adapter(struct aac_dev *aac, int forced)
 {
 	int index, quirks;
 	int retval;
@@ -1029,25 +1029,32 @@
 	struct scsi_device *dev;
 	struct scsi_cmnd *command;
 	struct scsi_cmnd *command_list;
+	int jafo = 0;
 
 	/*
 	 * Assumptions:
-	 *	- host is locked.
+	 *	- host is locked, unless called by the aacraid thread.
+	 *	  (a matter of convenience, due to legacy issues surrounding
+	 *	  eh_host_adapter_reset).
 	 *	- in_reset is asserted, so no new i/o is getting to the
 	 *	  card.
-	 *	- The card is dead.
+	 *	- The card is dead, or will be very shortly ;-/ so no new
+	 *	  commands are completing in the interrupt service.
 	 */
 	host = aac->scsi_host_ptr;
 	scsi_block_requests(host);
 	aac_adapter_disable_int(aac);
-	spin_unlock_irq(host->host_lock);
-	kthread_stop(aac->thread);
+	if (aac->thread->pid != current->pid) {
+		spin_unlock_irq(host->host_lock);
+		kthread_stop(aac->thread);
+		jafo = 1;
+	}
 
 	/*
 	 *	If a positive health, means in a known DEAD PANIC
 	 * state and the adapter could be reset to `try again'.
 	 */
-	retval = aac_adapter_restart(aac, aac_adapter_check_health(aac));
+	retval = aac_adapter_restart(aac, forced ? 0 : aac_adapter_check_health(aac));
 
 	if (retval)
 		goto out;
@@ -1104,10 +1111,12 @@
 	if (aac_get_driver_ident(index)->quirks & AAC_QUIRK_31BIT)
 		if ((retval = pci_set_dma_mask(aac->pdev, DMA_32BIT_MASK)))
 			goto out;
-	aac->thread = kthread_run(aac_command_thread, aac, aac->name);
-	if (IS_ERR(aac->thread)) {
-		retval = PTR_ERR(aac->thread);
-		goto out;
+	if (jafo) {
+		aac->thread = kthread_run(aac_command_thread, aac, aac->name);
+		if (IS_ERR(aac->thread)) {
+			retval = PTR_ERR(aac->thread);
+			goto out;
+		}
 	}
 	(void)aac_get_adapter_info(aac);
 	quirks = aac_get_driver_ident(index)->quirks;
@@ -1150,7 +1159,98 @@
 out:
 	aac->in_reset = 0;
 	scsi_unblock_requests(host);
-	spin_lock_irq(host->host_lock);
+	if (jafo) {
+		spin_lock_irq(host->host_lock);
+	}
+	return retval;
+}
+
+int aac_reset_adapter(struct aac_dev * aac, int forced)
+{
+	unsigned long flagv = 0;
+	int retval;
+	struct Scsi_Host * host;
+
+	if (spin_trylock_irqsave(&aac->fib_lock, flagv) == 0)
+		return -EBUSY;
+
+	if (aac->in_reset) {
+		spin_unlock_irqrestore(&aac->fib_lock, flagv);
+		return -EBUSY;
+	}
+	aac->in_reset = 1;
+	spin_unlock_irqrestore(&aac->fib_lock, flagv);
+
+	/*
+	 * Wait for all commands to complete to this specific
+	 * target (block maximum 60 seconds). Although not necessary,
+	 * it does make us a good storage citizen.
+	 */
+	host = aac->scsi_host_ptr;
+	scsi_block_requests(host);
+	if (forced < 2) for (retval = 60; retval; --retval) {
+		struct scsi_device * dev;
+		struct scsi_cmnd * command;
+		int active = 0;
+
+		__shost_for_each_device(dev, host) {
+			spin_lock_irqsave(&dev->list_lock, flagv);
+			list_for_each_entry(command, &dev->cmd_list, list) {
+				if (command->SCp.phase == AAC_OWNER_FIRMWARE) {
+					active++;
+					break;
+				}
+			}
+			spin_unlock_irqrestore(&dev->list_lock, flagv);
+			if (active)
+				break;
+
+		}
+		/*
+		 * We can exit If all the commands are complete
+		 */
+		if (active == 0)
+			break;
+		ssleep(1);
+	}
+
+	/* Quiesce build, flush cache, write through mode */
+	aac_send_shutdown(aac);
+	spin_lock_irqsave(host->host_lock, flagv);
+	retval = _aac_reset_adapter(aac, forced);
+	spin_unlock_irqrestore(host->host_lock, flagv);
+
+	if (retval == -ENODEV) {
+		/* Unwind aac_send_shutdown() IOP_RESET unsupported/disabled */
+		struct fib * fibctx = aac_fib_alloc(aac);
+		if (fibctx) {
+			struct aac_pause *cmd;
+			int status;
+
+			aac_fib_init(fibctx);
+
+			cmd = (struct aac_pause *) fib_data(fibctx);
+
+			cmd->command = cpu_to_le32(VM_ContainerConfig);
+			cmd->type = cpu_to_le32(CT_PAUSE_IO);
+			cmd->timeout = cpu_to_le32(1);
+			cmd->min = cpu_to_le32(1);
+			cmd->noRescan = cpu_to_le32(1);
+			cmd->count = cpu_to_le32(0);
+
+			status = aac_fib_send(ContainerCommand,
+			  fibctx,
+			  sizeof(struct aac_pause),
+			  FsaNormal,
+			  -2 /* Timeout silently */, 1,
+			  NULL, NULL);
+
+			if (status >= 0)
+				aac_fib_complete(fibctx);
+			aac_fib_free(fibctx);
+		}
+	}
+
 	return retval;
 }
 
@@ -1270,10 +1370,15 @@
 
 	printk(KERN_ERR "%s: Host adapter BLINK LED 0x%x\n", aac->name, BlinkLED);
 
+	if (!check_reset || (aac->supplement_adapter_info.SupportedOptions2 &
+	  le32_to_cpu(AAC_OPTION_IGNORE_RESET)))
+		goto out;
 	host = aac->scsi_host_ptr;
-	spin_lock_irqsave(host->host_lock, flagv);
-	BlinkLED = _aac_reset_adapter(aac);
-	spin_unlock_irqrestore(host->host_lock, flagv);
+	if (aac->thread->pid != current->pid)
+		spin_lock_irqsave(host->host_lock, flagv);
+	BlinkLED = _aac_reset_adapter(aac, 0);
+	if (aac->thread->pid != current->pid)
+		spin_unlock_irqrestore(host->host_lock, flagv);
 	return BlinkLED;
 
 out:
@@ -1300,6 +1405,9 @@
 	struct aac_fib_context *fibctx;
 	unsigned long flags;
 	DECLARE_WAITQUEUE(wait, current);
+	unsigned long next_jiffies = jiffies + HZ;
+	unsigned long next_check_jiffies = next_jiffies;
+	long difference = HZ;
 
 	/*
 	 *	We can only have one thread per adapter for AIF's.
@@ -1368,7 +1476,7 @@
 				     cpu_to_le32(AifCmdJobProgress))) {
 					aac_handle_aif(dev, fib);
 				}
- 				
+
 				time_now = jiffies/HZ;
 
 				/*
@@ -1507,11 +1615,79 @@
 		 *	There are no more AIF's
 		 */
 		spin_unlock_irqrestore(dev->queues->queue[HostNormCmdQueue].lock, flags);
-		schedule();
+
+		/*
+		 *	Background activity
+		 */
+		if ((time_before(next_check_jiffies,next_jiffies))
+		 && ((difference = next_check_jiffies - jiffies) <= 0)) {
+			next_check_jiffies = next_jiffies;
+			if (aac_check_health(dev) == 0) {
+				difference = ((long)(unsigned)check_interval)
+					   * HZ;
+				next_check_jiffies = jiffies + difference;
+			} else if (!dev->queues)
+				break;
+		}
+		if (!time_before(next_check_jiffies,next_jiffies)
+		 && ((difference = next_jiffies - jiffies) <= 0)) {
+			struct timeval now;
+			int ret;
+
+			/* Don't even try to talk to adapter if its sick */
+			ret = aac_check_health(dev);
+			if (!ret && !dev->queues)
+				break;
+			next_check_jiffies = jiffies
+					   + ((long)(unsigned)check_interval)
+					   * HZ;
+			do_gettimeofday(&now);
+
+			/* Synchronize our watches */
+			if (((1000000 - (1000000 / HZ)) > now.tv_usec)
+			 && (now.tv_usec > (1000000 / HZ)))
+				difference = (((1000000 - now.tv_usec) * HZ)
+				  + 500000) / 1000000;
+			else if (ret == 0) {
+				struct fib *fibptr;
+
+				if ((fibptr = aac_fib_alloc(dev))) {
+					u32 * info;
+
+					aac_fib_init(fibptr);
+
+					info = (u32 *) fib_data(fibptr);
+					if (now.tv_usec > 500000)
+						++now.tv_sec;
+
+					*info = cpu_to_le32(now.tv_sec);
+
+					(void)aac_fib_send(SendHostTime,
+						fibptr,
+						sizeof(*info),
+						FsaNormal,
+						1, 1,
+						NULL,
+						NULL);
+					aac_fib_complete(fibptr);
+					aac_fib_free(fibptr);
+				}
+				difference = (long)(unsigned)update_interval*HZ;
+			} else {
+				/* retry shortly */
+				difference = 10 * HZ;
+			}
+			next_jiffies = jiffies + difference;
+			if (time_before(next_check_jiffies,next_jiffies))
+				difference = next_check_jiffies - jiffies;
+		}
+		if (difference <= 0)
+			difference = 1;
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(difference);
 
 		if (kthread_should_stop())
 			break;
-		set_current_state(TASK_INTERRUPTIBLE);
 	}
 	if (dev->queues)
 		remove_wait_queue(&dev->queues->queue[HostNormCmdQueue].cmdready, &wait);
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 6f92d07..f8c2aaf 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -39,10 +39,8 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/dma-mapping.h>
 #include <linux/syscalls.h>
 #include <linux/delay.h>
-#include <linux/smp_lock.h>
 #include <linux/kthread.h>
 #include <asm/semaphore.h>
 
@@ -581,6 +579,14 @@
 		ssleep(1);
 	}
 	printk(KERN_ERR "%s: SCSI bus appears hung\n", AAC_DRIVERNAME);
+	/*
+	 * This adapter needs a blind reset, only do so for Adapters that
+	 * support a register, instead of a commanded, reset.
+	 */
+	if ((aac->supplement_adapter_info.SupportedOptions2 &
+	  le32_to_cpu(AAC_OPTION_MU_RESET|AAC_OPTION_IGNORE_RESET)) ==
+	  le32_to_cpu(AAC_OPTION_MU_RESET))
+		aac_reset_adapter(aac, 2); /* Bypass wait for command quiesce */
 	return SUCCESS; /* Cause an immediate retry of the command with a ten second delay after successful tur */
 }
 
@@ -788,6 +794,31 @@
 	  class_to_shost(class_dev)->max_id);
 }
 
+static ssize_t aac_store_reset_adapter(struct class_device *class_dev,
+		const char *buf, size_t count)
+{
+	int retval = -EACCES;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return retval;
+	retval = aac_reset_adapter((struct aac_dev*)class_to_shost(class_dev)->hostdata, buf[0] == '!');
+	if (retval >= 0)
+		retval = count;
+	return retval;
+}
+
+static ssize_t aac_show_reset_adapter(struct class_device *class_dev,
+		char *buf)
+{
+	struct aac_dev *dev = (struct aac_dev*)class_to_shost(class_dev)->hostdata;
+	int len, tmp;
+
+	tmp = aac_adapter_check_health(dev);
+	if ((tmp == 0) && dev->in_reset)
+		tmp = -EBUSY;
+	len = snprintf(buf, PAGE_SIZE, "0x%x", tmp);
+	return len;
+}
 
 static struct class_device_attribute aac_model = {
 	.attr = {
@@ -845,6 +876,14 @@
 	},
 	.show = aac_show_max_id,
 };
+static struct class_device_attribute aac_reset = {
+	.attr = {
+		.name = "reset_host",
+		.mode = S_IWUSR|S_IRUGO,
+	},
+	.store = aac_store_reset_adapter,
+	.show = aac_show_reset_adapter,
+};
 
 static struct class_device_attribute *aac_attrs[] = {
 	&aac_model,
@@ -855,6 +894,7 @@
 	&aac_serial_number,
 	&aac_max_channel,
 	&aac_max_id,
+	&aac_reset,
 	NULL
 };
 
@@ -1118,7 +1158,7 @@
 {
 	int error;
 	
-	printk(KERN_INFO "Adaptec %s driver (%s)\n",
+	printk(KERN_INFO "Adaptec %s driver %s\n",
 	  AAC_DRIVERNAME, aac_driver_version);
 
 	error = pci_register_driver(&aac_pci_driver);
diff --git a/drivers/scsi/aacraid/rx.c b/drivers/scsi/aacraid/rx.c
index ae978a3..ebc65b9 100644
--- a/drivers/scsi/aacraid/rx.c
+++ b/drivers/scsi/aacraid/rx.c
@@ -464,21 +464,24 @@
 {
 	u32 var;
 
-	if (bled)
-		printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
-			dev->name, dev->id, bled);
-	else {
-		bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
-		  0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
-		if (!bled && (var != 0x00000001))
-			bled = -EINVAL;
-	}
-	if (bled && (bled != -ETIMEDOUT))
-		bled = aac_adapter_sync_cmd(dev, IOP_RESET,
-		  0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
+	if (!(dev->supplement_adapter_info.SupportedOptions2 &
+	  le32_to_cpu(AAC_OPTION_MU_RESET)) || (bled >= 0) || (bled == -2)) {
+		if (bled)
+			printk(KERN_ERR "%s%d: adapter kernel panic'd %x.\n",
+				dev->name, dev->id, bled);
+		else {
+			bled = aac_adapter_sync_cmd(dev, IOP_RESET_ALWAYS,
+			  0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
+			if (!bled && (var != 0x00000001))
+				bled = -EINVAL;
+		}
+		if (bled && (bled != -ETIMEDOUT))
+			bled = aac_adapter_sync_cmd(dev, IOP_RESET,
+			  0, 0, 0, 0, 0, 0, &var, NULL, NULL, NULL, NULL);
 
-	if (bled && (bled != -ETIMEDOUT))
-		return -EINVAL;
+		if (bled && (bled != -ETIMEDOUT))
+			return -EINVAL;
+	}
 	if (bled || (var == 0x3803000F)) { /* USE_OTHER_METHOD */
 		rx_writel(dev, MUnit.reserved2, 3);
 		msleep(5000); /* Delay 5 seconds */
@@ -596,7 +599,7 @@
 		}
 		msleep(1);
 	}
-	if (restart)
+	if (restart && aac_commit)
 		aac_commit = 1;
 	/*
 	 *	Fill in the common function dispatch table.