vmwgfx: Fix potential execbuf deadlocks

Perform all command stream validation in a bounce buffer separate from the
fifo. This makes the fifo available to all validation-generated commands,
which would otherwise attempt to grab the fifo recursively, causing a
deadlock. This is in preparation for GMR2 and swappable surfaces.

Also maintain references to all surfaces in the command stream until the
command stream has been fired in order to avoid racing with surface
destruction taking place after validation but before submission.

Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 8ca3ddb..c6ff0e4 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -44,10 +44,36 @@
 	return 0;
 }
 
+
+static int vmw_resource_to_validate_list(struct vmw_sw_context *sw_context,
+					 struct vmw_resource **p_res)
+{
+	int ret = 0;
+	struct vmw_resource *res = *p_res;
+
+	if (!res->on_validate_list) {
+		if (sw_context->num_ref_resources >= VMWGFX_MAX_VALIDATIONS) {
+			DRM_ERROR("Too many resources referenced in "
+				  "command stream.\n");
+			ret = -ENOMEM;
+			goto out;
+		}
+		sw_context->resources[sw_context->num_ref_resources++] = res;
+		res->on_validate_list = true;
+		return 0;
+	}
+
+out:
+	vmw_resource_unreference(p_res);
+	return ret;
+}
+
 static int vmw_cmd_cid_check(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     SVGA3dCmdHeader *header)
 {
+	struct vmw_resource *ctx;
+
 	struct vmw_cid_cmd {
 		SVGA3dCmdHeader header;
 		__le32 cid;
@@ -58,7 +84,8 @@
 	if (likely(sw_context->cid_valid && cmd->cid == sw_context->last_cid))
 		return 0;
 
-	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid);
+	ret = vmw_context_check(dev_priv, sw_context->tfile, cmd->cid,
+				&ctx);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use context %u\n",
 			  (unsigned) cmd->cid);
@@ -67,39 +94,43 @@
 
 	sw_context->last_cid = cmd->cid;
 	sw_context->cid_valid = true;
-
-	return 0;
+	return vmw_resource_to_validate_list(sw_context, &ctx);
 }
 
 static int vmw_cmd_sid_check(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
 			     uint32_t *sid)
 {
+	struct vmw_surface *srf;
+	int ret;
+	struct vmw_resource *res;
+
 	if (*sid == SVGA3D_INVALID_ID)
 		return 0;
 
-	if (unlikely((!sw_context->sid_valid  ||
-		      *sid != sw_context->last_sid))) {
-		int real_id;
-		int ret = vmw_surface_check(dev_priv, sw_context->tfile,
-					    *sid, &real_id);
-
-		if (unlikely(ret != 0)) {
-			DRM_ERROR("Could ot find or use surface 0x%08x "
-				  "address 0x%08lx\n",
-				  (unsigned int) *sid,
-				  (unsigned long) sid);
-			return ret;
-		}
-
-		sw_context->last_sid = *sid;
-		sw_context->sid_valid = true;
-		*sid = real_id;
-		sw_context->sid_translation = real_id;
-	} else
+	if (likely((sw_context->sid_valid  &&
+		      *sid == sw_context->last_sid))) {
 		*sid = sw_context->sid_translation;
+		return 0;
+	}
 
-	return 0;
+	ret = vmw_user_surface_lookup_handle(dev_priv, sw_context->tfile,
+					     *sid, &srf);
+	if (unlikely(ret != 0)) {
+		DRM_ERROR("Could ot find or use surface 0x%08x "
+			  "address 0x%08lx\n",
+			  (unsigned int) *sid,
+			  (unsigned long) sid);
+		return ret;
+	}
+
+	sw_context->last_sid = *sid;
+	sw_context->sid_valid = true;
+	sw_context->sid_translation = srf->res.id;
+	*sid = sw_context->sid_translation;
+
+	res = &srf->res;
+	return vmw_resource_to_validate_list(sw_context, &res);
 }
 
 
@@ -213,7 +244,7 @@
 	reloc->location = ptr;
 
 	cur_validate_node = vmw_dmabuf_validate_node(bo, sw_context->cur_val_buf);
-	if (unlikely(cur_validate_node >= VMWGFX_MAX_GMRS)) {
+	if (unlikely(cur_validate_node >= VMWGFX_MAX_VALIDATIONS)) {
 		DRM_ERROR("Max number of DMA buffers per submission"
 			  " exceeded.\n");
 		ret = -EINVAL;
@@ -303,6 +334,7 @@
 		SVGA3dCmdSurfaceDMA dma;
 	} *cmd;
 	int ret;
+	struct vmw_resource *res;
 
 	cmd = container_of(header, struct vmw_dma_cmd, header);
 	ret = vmw_translate_guest_ptr(dev_priv, sw_context,
@@ -319,17 +351,16 @@
 		goto out_no_reloc;
 	}
 
-	/**
+	/*
 	 * Patch command stream with device SID.
 	 */
-
 	cmd->dma.host.sid = srf->res.id;
 	vmw_kms_cursor_snoop(srf, sw_context->tfile, bo, header);
-	/**
-	 * FIXME: May deadlock here when called from the
-	 * command parsing code.
-	 */
-	vmw_surface_unreference(&srf);
+
+	vmw_dmabuf_unreference(&vmw_bo);
+
+	res = &srf->res;
+	return vmw_resource_to_validate_list(sw_context, &res);
 
 out_no_reloc:
 	vmw_dmabuf_unreference(&vmw_bo);
@@ -501,8 +532,9 @@
 
 static int vmw_cmd_check_all(struct vmw_private *dev_priv,
 			     struct vmw_sw_context *sw_context,
-			     void *buf, uint32_t size)
+			     uint32_t size)
 {
+	void *buf = sw_context->cmd_bounce;
 	int32_t cur_size = size;
 	int ret;
 
@@ -551,7 +583,11 @@
 static void vmw_clear_validations(struct vmw_sw_context *sw_context)
 {
 	struct ttm_validate_buffer *entry, *next;
+	uint32_t i = sw_context->num_ref_resources;
 
+	/*
+	 * Drop references to DMA buffers held during command submission.
+	 */
 	list_for_each_entry_safe(entry, next, &sw_context->validate_nodes,
 				 head) {
 		list_del(&entry->head);
@@ -560,6 +596,14 @@
 		sw_context->cur_val_buf--;
 	}
 	BUG_ON(sw_context->cur_val_buf != 0);
+
+	/*
+	 * Drop references to resources held during command submission.
+	 */
+	while (i-- > 0) {
+		sw_context->resources[i]->on_validate_list = false;
+		vmw_resource_unreference(&sw_context->resources[i]);
+	}
 }
 
 static int vmw_validate_single_buffer(struct vmw_private *dev_priv,
@@ -603,6 +647,35 @@
 	return 0;
 }
 
+static int vmw_resize_cmd_bounce(struct vmw_sw_context *sw_context,
+				 uint32_t size)
+{
+	if (likely(sw_context->cmd_bounce_size >= size))
+		return 0;
+
+	if (sw_context->cmd_bounce_size == 0)
+		sw_context->cmd_bounce_size = VMWGFX_CMD_BOUNCE_INIT_SIZE;
+
+	while (sw_context->cmd_bounce_size < size) {
+		sw_context->cmd_bounce_size =
+			PAGE_ALIGN(sw_context->cmd_bounce_size +
+				   (sw_context->cmd_bounce_size >> 1));
+	}
+
+	if (sw_context->cmd_bounce != NULL)
+		vfree(sw_context->cmd_bounce);
+
+	sw_context->cmd_bounce = vmalloc(sw_context->cmd_bounce_size);
+
+	if (sw_context->cmd_bounce == NULL) {
+		DRM_ERROR("Failed to allocate command bounce buffer.\n");
+		sw_context->cmd_bounce_size = 0;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
 int vmw_execbuf_ioctl(struct drm_device *dev, void *data,
 		      struct drm_file *file_priv)
 {
@@ -627,20 +700,18 @@
 		goto out_no_cmd_mutex;
 	}
 
-	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
-	if (unlikely(cmd == NULL)) {
-		DRM_ERROR("Failed reserving fifo space for commands.\n");
-		ret = -ENOMEM;
+	ret = vmw_resize_cmd_bounce(sw_context, arg->command_size);
+	if (unlikely(ret != 0))
 		goto out_unlock;
-	}
 
 	user_cmd = (void __user *)(unsigned long)arg->commands;
-	ret = copy_from_user(cmd, user_cmd, arg->command_size);
+	ret = copy_from_user(sw_context->cmd_bounce,
+			     user_cmd, arg->command_size);
 
 	if (unlikely(ret != 0)) {
 		ret = -EFAULT;
 		DRM_ERROR("Failed copying commands.\n");
-		goto out_commit;
+		goto out_unlock;
 	}
 
 	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
@@ -648,12 +719,14 @@
 	sw_context->sid_valid = false;
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
+	sw_context->num_ref_resources = 0;
 
 	INIT_LIST_HEAD(&sw_context->validate_nodes);
 
-	ret = vmw_cmd_check_all(dev_priv, sw_context, cmd, arg->command_size);
+	ret = vmw_cmd_check_all(dev_priv, sw_context, arg->command_size);
 	if (unlikely(ret != 0))
 		goto out_err;
+
 	ret = ttm_eu_reserve_buffers(&sw_context->validate_nodes);
 	if (unlikely(ret != 0))
 		goto out_err;
@@ -669,9 +742,17 @@
 				   arg->throttle_us);
 
 		if (unlikely(ret != 0))
-			goto out_err;
+			goto out_throttle;
 	}
 
+	cmd = vmw_fifo_reserve(dev_priv, arg->command_size);
+	if (unlikely(cmd == NULL)) {
+		DRM_ERROR("Failed reserving fifo space for commands.\n");
+		ret = -ENOMEM;
+		goto out_err;
+	}
+
+	memcpy(cmd, sw_context->cmd_bounce, arg->command_size);
 	vmw_fifo_commit(dev_priv, arg->command_size);
 
 	ret = vmw_fifo_send_fence(dev_priv, &sequence);
@@ -708,10 +789,9 @@
 	return 0;
 out_err:
 	vmw_free_relocations(sw_context);
+out_throttle:
 	ttm_eu_backoff_reservation(&sw_context->validate_nodes);
 	vmw_clear_validations(sw_context);
-out_commit:
-	vmw_fifo_commit(dev_priv, 0);
 out_unlock:
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 out_no_cmd_mutex: