msm: bam_dmux: handle timeouts due to modem crash

If bam_dmux times out waiting for a response from the modem, we assume it
has crashed.  Therefore, prepare for modem restart if enabled, otherwise
report the error with BUG as we are in an unrecoverable state

Change-Id: I5f2419eae721a735a2396693da9fa2d28b14f122
CRs-Fixed: 331808
Signed-off-by: Jeffrey Hugo <jhugo@codeaurora.org>
diff --git a/arch/arm/mach-msm/bam_dmux.c b/arch/arm/mach-msm/bam_dmux.c
index d001282..5a10ddb 100644
--- a/arch/arm/mach-msm/bam_dmux.c
+++ b/arch/arm/mach-msm/bam_dmux.c
@@ -33,6 +33,7 @@
 #include <mach/msm_smsm.h>
 #include <mach/subsystem_notif.h>
 #include <mach/socinfo.h>
+#include <mach/subsystem_restart.h>
 
 #define BAM_CH_LOCAL_OPEN       0x1
 #define BAM_CH_REMOTE_OPEN      0x2
@@ -668,6 +669,8 @@
 	if (!bam_is_connected) {
 		read_unlock(&ul_wakeup_lock);
 		ul_wakeup();
+		if (unlikely(in_global_reset == 1))
+			return -EFAULT;
 		read_lock(&ul_wakeup_lock);
 		notify_all(BAM_DMUX_UL_CONNECTED, (unsigned long)(NULL));
 	}
@@ -805,6 +808,8 @@
 	if (!bam_is_connected) {
 		read_unlock(&ul_wakeup_lock);
 		ul_wakeup();
+		if (unlikely(in_global_reset == 1))
+			return -EFAULT;
 		read_lock(&ul_wakeup_lock);
 		notify_all(BAM_DMUX_UL_CONNECTED, (unsigned long)(NULL));
 	}
@@ -840,6 +845,8 @@
 	if (!bam_is_connected && !bam_ch_is_in_reset(id)) {
 		read_unlock(&ul_wakeup_lock);
 		ul_wakeup();
+		if (unlikely(in_global_reset == 1))
+			return -EFAULT;
 		read_lock(&ul_wakeup_lock);
 		notify_all(BAM_DMUX_UL_CONNECTED, (unsigned long)(NULL));
 	}
@@ -1288,6 +1295,8 @@
 	if (!bam_is_connected) {
 		read_unlock(&ul_wakeup_lock);
 		ul_wakeup();
+		if (unlikely(in_global_reset == 1))
+			return;
 		read_lock(&ul_wakeup_lock);
 		ul_packet_written = 1;
 		notify_all(BAM_DMUX_UL_CONNECTED, (unsigned long)(NULL));
@@ -1386,6 +1395,26 @@
 	write_unlock_irqrestore(&ul_wakeup_lock, flags);
 	ul_powerdown_finish();
 }
+
+static int ssrestart_check(void)
+{
+	/*
+	 * if the restart level is RESET_SOC, SSR is not on
+	 * so the crashed modem will end up crashing the system
+	 * anyways, so use BUG() to report the error
+	 * else prepare for the restart event which should
+	 * happen soon
+	 */
+	DMUX_LOG_KERR("%s: modem timeout\n", __func__);
+	if (get_restart_level() <= RESET_SOC) {
+		BUG();
+		return 0;
+	} else {
+		in_global_reset = 1;
+		return 1;
+	}
+}
+
 static void ul_wakeup(void)
 {
 	int ret;
@@ -1429,17 +1458,29 @@
 		bam_dmux_log("%s waiting for previous ack\n", __func__);
 		ret = wait_for_completion_timeout(
 					&ul_wakeup_ack_completion, HZ);
-		BUG_ON(ret == 0);
 		wait_for_ack = 0;
+		if (unlikely(ret == 0) && ssrestart_check()) {
+			mutex_unlock(&wakeup_lock);
+			bam_dmux_log("%s timeout previous ack\n", __func__);
+			return;
+		}
 	}
 	INIT_COMPLETION(ul_wakeup_ack_completion);
 	power_vote(1);
 	bam_dmux_log("%s waiting for wakeup ack\n", __func__);
 	ret = wait_for_completion_timeout(&ul_wakeup_ack_completion, HZ);
-	BUG_ON(ret == 0);
+	if (unlikely(ret == 0) && ssrestart_check()) {
+		mutex_unlock(&wakeup_lock);
+		bam_dmux_log("%s timeout wakeup ack\n", __func__);
+		return;
+	}
 	bam_dmux_log("%s waiting completion\n", __func__);
 	ret = wait_for_completion_timeout(&bam_connection_completion, HZ);
-	BUG_ON(ret == 0);
+	if (unlikely(ret == 0) && ssrestart_check()) {
+		mutex_unlock(&wakeup_lock);
+		bam_dmux_log("%s timeout power on\n", __func__);
+		return;
+	}
 
 	bam_is_connected = 1;
 	bam_dmux_log("%s complete\n", __func__);
@@ -1610,6 +1651,11 @@
 		ul_powerdown();
 		wait_for_ack = 0;
 	}
+	/*
+	 * if modem crash during ul_wakeup(), power_vote is 1, needs to be
+	 * reset to 0.  harmless if bam_is_connected check above passes
+	 */
+	power_vote(0);
 	write_unlock_irqrestore(&ul_wakeup_lock, flags);
 	ul_powerdown_finish();
 	a2_pc_disabled = 0;
diff --git a/drivers/net/msm_rmnet_bam.c b/drivers/net/msm_rmnet_bam.c
index a0924ed..f11fb60 100644
--- a/drivers/net/msm_rmnet_bam.c
+++ b/drivers/net/msm_rmnet_bam.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -311,7 +311,7 @@
 	/* if write() succeeds, skb access is unsafe in this process */
 	bam_ret = msm_bam_dmux_write(p->ch_id, skb);
 
-	if (bam_ret != 0 && bam_ret != -EAGAIN) {
+	if (bam_ret != 0 && bam_ret != -EAGAIN && bam_ret != -EFAULT) {
 		pr_err("[%s] %s: write returned error %d",
 			dev->name, __func__, bam_ret);
 		return -EPERM;
@@ -464,6 +464,16 @@
 	if (ret == -EPERM)
 		return NETDEV_TX_BUSY;
 
+	/*
+	 * detected SSR a bit early.  shut some things down now, and leave
+	 * the rest to the main ssr handling code when that happens later
+	 */
+	if (ret == -EFAULT) {
+		netif_carrier_off(dev);
+		dev_kfree_skb_any(skb);
+		return 0;
+	}
+
 	if (ret == -EAGAIN) {
 		/*
 		 * This should not happen
@@ -690,6 +700,7 @@
 
 	p = netdev_priv(netdevs[i]);
 	p->in_reset = 1;
+	p->waiting_for_ul = 0;
 	msm_bam_dmux_close(p->ch_id);
 	netif_carrier_off(netdevs[i]);
 	netif_stop_queue(netdevs[i]);