mei: use hbm idle state to prevent spurious resets
When reset is caused by hbm protocol mismatch or timeout
we might end up in an endless reset loop and hbm protocol
will never sync
Cc: <stable@vger.kernel.org>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 8109b9a..836f92d 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -126,6 +126,17 @@
return false;
}
+/**
+ * mei_hbm_idle - set hbm to idle state
+ *
+ * @dev: the device structure
+ */
+void mei_hbm_idle(struct mei_device *dev)
+{
+ dev->init_clients_timer = 0;
+ dev->hbm_state = MEI_HBM_IDLE;
+}
+
int mei_hbm_start_wait(struct mei_device *dev)
{
int ret;
@@ -583,6 +594,14 @@
mei_read_slots(dev, dev->rd_msg_buf, hdr->length);
mei_msg = (struct mei_bus_message *)dev->rd_msg_buf;
+ /* ignore spurious message and prevent reset nesting
+ * hbm is put to idle during system reset
+ */
+ if (dev->hbm_state == MEI_HBM_IDLE) {
+ dev_dbg(&dev->pdev->dev, "hbm: state is idle ignore spurious messages\n");
+ return 0;
+ }
+
switch (mei_msg->hbm_cmd) {
case HOST_START_RES_CMD:
dev_dbg(&dev->pdev->dev, "hbm: start: response message received.\n");
diff --git a/drivers/misc/mei/hbm.h b/drivers/misc/mei/hbm.h
index f2540ff..5f92188 100644
--- a/drivers/misc/mei/hbm.h
+++ b/drivers/misc/mei/hbm.h
@@ -49,6 +49,7 @@
hdr->reserved = 0;
}
+void mei_hbm_idle(struct mei_device *dev);
int mei_hbm_start_req(struct mei_device *dev);
int mei_hbm_start_wait(struct mei_device *dev);
int mei_hbm_cl_flow_control_req(struct mei_device *dev, struct mei_cl *cl);
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index 87c077b..c47fa27 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -129,14 +129,19 @@
dev_warn(&dev->pdev->dev, "unexpected reset: dev_state = %s\n",
mei_dev_state_str(dev->dev_state));
+ /* we're already in reset, cancel the init timer
+ * if the reset was called due the hbm protocol error
+ * we need to call it before hw start
+ * so the hbm watchdog won't kick in
+ */
+ mei_hbm_idle(dev);
+
ret = mei_hw_reset(dev, interrupts_enabled);
if (ret) {
dev_err(&dev->pdev->dev, "hw reset failed disabling the device\n");
interrupts_enabled = false;
- dev->dev_state = MEI_DEV_DISABLED;
}
- dev->hbm_state = MEI_HBM_IDLE;
if (dev->dev_state != MEI_DEV_INITIALIZING &&
dev->dev_state != MEI_DEV_POWER_UP) {
@@ -160,8 +165,6 @@
memset(&dev->wr_ext_msg, 0, sizeof(dev->wr_ext_msg));
}
- /* we're already in reset, cancel the init timer */
- dev->init_clients_timer = 0;
dev->me_clients_num = 0;
dev->rd_msg_hdr = 0;
@@ -169,6 +172,7 @@
if (!interrupts_enabled) {
dev_dbg(&dev->pdev->dev, "intr not enabled end of reset\n");
+ dev->dev_state = MEI_DEV_DISABLED;
return;
}
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index bbb61be..206dbe9 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -537,7 +537,6 @@
*
* @work: pointer to the work_struct structure
*
- * NOTE: This function is called by timer interrupt work
*/
void mei_timer(struct work_struct *work)
{
@@ -552,18 +551,24 @@
mutex_lock(&dev->device_lock);
- if (dev->dev_state != MEI_DEV_ENABLED) {
- if (dev->dev_state == MEI_DEV_INIT_CLIENTS) {
- if (dev->init_clients_timer) {
- if (--dev->init_clients_timer == 0) {
- dev_err(&dev->pdev->dev, "reset: init clients timeout hbm_state = %d.\n",
- dev->hbm_state);
- mei_reset(dev, 1);
- }
+
+ /* Catch interrupt stalls during HBM init handshake */
+ if (dev->dev_state == MEI_DEV_INIT_CLIENTS &&
+ dev->hbm_state != MEI_HBM_IDLE) {
+
+ if (dev->init_clients_timer) {
+ if (--dev->init_clients_timer == 0) {
+ dev_err(&dev->pdev->dev, "timer: init clients timeout hbm_state = %d.\n",
+ dev->hbm_state);
+ mei_reset(dev, 1);
+ goto out;
}
}
- goto out;
}
+
+ if (dev->dev_state != MEI_DEV_ENABLED)
+ goto out;
+
/*** connect/disconnect timeouts ***/
list_for_each_entry_safe(cl_pos, cl_next, &dev->file_list, link) {
if (cl_pos->timer_count) {