drm/radeon: rework page flip handling v3

Instead of trying to flip inside the vblank period when
the buffer is idle, offload blocking for idle to a kernel
thread and program the flip directly into the hardware.

v2: add error handling, fix EBUSY handling
v3: add proper exclusive_lock handling

Signed-off-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 4ae304d..736bfa2 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -676,14 +676,16 @@
  * IRQS.
  */
 
-struct radeon_unpin_work {
-	struct work_struct work;
-	struct radeon_device *rdev;
-	int crtc_id;
-	struct radeon_fence *fence;
+struct radeon_flip_work {
+	struct work_struct		flip_work;
+	struct work_struct		unpin_work;
+	struct radeon_device		*rdev;
+	int				crtc_id;
+	struct drm_framebuffer		*fb;
 	struct drm_pending_vblank_event *event;
-	struct radeon_bo *old_rbo;
-	u64 new_crtc_base;
+	struct radeon_bo		*old_rbo;
+	struct radeon_bo		*new_rbo;
+	struct radeon_fence		*fence;
 };
 
 struct r500_irq_stat_regs {
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 88e3cbe..6b3de5c 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -249,16 +249,21 @@
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 
 	drm_crtc_cleanup(crtc);
+	destroy_workqueue(radeon_crtc->flip_queue);
 	kfree(radeon_crtc);
 }
 
-/*
- * Handle unpin events outside the interrupt handler proper.
+/**
+ * radeon_unpin_work_func - unpin old buffer object
+ *
+ * @__work - kernel work item
+ *
+ * Unpin the old frame buffer object outside of the interrupt handler
  */
 static void radeon_unpin_work_func(struct work_struct *__work)
 {
-	struct radeon_unpin_work *work =
-		container_of(__work, struct radeon_unpin_work, work);
+	struct radeon_flip_work *work =
+		container_of(__work, struct radeon_flip_work, unpin_work);
 	int r;
 
 	/* unpin of the old buffer */
@@ -279,30 +284,19 @@
 void radeon_crtc_handle_vblank(struct radeon_device *rdev, int crtc_id)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
-	struct radeon_unpin_work *work;
+	struct radeon_flip_work *work;
 	unsigned long flags;
 	u32 update_pending;
 	int vpos, hpos;
 
 	spin_lock_irqsave(&rdev->ddev->event_lock, flags);
-	work = radeon_crtc->unpin_work;
-	if (work == NULL ||
-	    (work->fence && !radeon_fence_signaled(work->fence))) {
+	work = radeon_crtc->flip_work;
+	if (work == NULL) {
 		spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
 		return;
 	}
-	/* New pageflip, or just completion of a previous one? */
-	if (!radeon_crtc->deferred_flip_completion) {
-		/* do the flip (mmio) */
-		radeon_page_flip(rdev, crtc_id, work->new_crtc_base);
-		update_pending = radeon_page_flip_pending(rdev, crtc_id);
-	} else {
-		/* This is just a completion of a flip queued in crtc
-		 * at last invocation. Make sure we go directly to
-		 * completion routine.
-		 */
-		update_pending = 0;
-	}
+
+	update_pending = radeon_page_flip_pending(rdev, crtc_id);
 
 	/* Has the pageflip already completed in crtc, or is it certain
 	 * to complete in this vblank?
@@ -320,19 +314,9 @@
 		 */
 		update_pending = 0;
 	}
-	if (update_pending) {
-		/* crtc didn't flip in this target vblank interval,
-		 * but flip is pending in crtc. It will complete it
-		 * in next vblank interval, so complete the flip at
-		 * next vblank irq.
-		 */
-		radeon_crtc->deferred_flip_completion = 1;
-		spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
-		return;
-	} else {
-		spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
+	spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
+	if (!update_pending)
 		radeon_crtc_handle_flip(rdev, crtc_id);
-	}
 }
 
 /**
@@ -346,7 +330,7 @@
 void radeon_crtc_handle_flip(struct radeon_device *rdev, int crtc_id)
 {
 	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[crtc_id];
-	struct radeon_unpin_work *work;
+	struct radeon_flip_work *work;
 	unsigned long flags;
 
 	/* this can happen at init */
@@ -354,15 +338,14 @@
 		return;
 
 	spin_lock_irqsave(&rdev->ddev->event_lock, flags);
-	work = radeon_crtc->unpin_work;
+	work = radeon_crtc->flip_work;
 	if (work == NULL) {
 		spin_unlock_irqrestore(&rdev->ddev->event_lock, flags);
 		return;
 	}
 
-	/* Pageflip (will be) certainly completed in this vblank. Clean up. */
-	radeon_crtc->unpin_work = NULL;
-	radeon_crtc->deferred_flip_completion = 0;
+	/* Pageflip completed. Clean up. */
+	radeon_crtc->flip_work = NULL;
 
 	/* wakeup userspace */
 	if (work->event)
@@ -372,83 +355,69 @@
 
 	radeon_fence_unref(&work->fence);
 	radeon_irq_kms_pflip_irq_get(rdev, work->crtc_id);
-	schedule_work(&work->work);
+	queue_work(radeon_crtc->flip_queue, &work->unpin_work);
 }
 
-static int radeon_crtc_page_flip(struct drm_crtc *crtc,
-				 struct drm_framebuffer *fb,
-				 struct drm_pending_vblank_event *event,
-				 uint32_t page_flip_flags)
+/**
+ * radeon_flip_work_func - page flip framebuffer
+ *
+ * @work - kernel work item
+ *
+ * Wait for the buffer object to become idle and do the actual page flip
+ */
+static void radeon_flip_work_func(struct work_struct *__work)
 {
-	struct drm_device *dev = crtc->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-	struct radeon_framebuffer *old_radeon_fb;
-	struct radeon_framebuffer *new_radeon_fb;
-	struct drm_gem_object *obj;
-	struct radeon_bo *rbo;
-	struct radeon_unpin_work *work;
+	struct radeon_flip_work *work =
+		container_of(__work, struct radeon_flip_work, flip_work);
+	struct radeon_device *rdev = work->rdev;
+	struct radeon_crtc *radeon_crtc = rdev->mode_info.crtcs[work->crtc_id];
+
+	struct drm_crtc *crtc = &radeon_crtc->base;
+	struct drm_framebuffer *fb = work->fb;
+
+	uint32_t tiling_flags, pitch_pixels;
+	uint64_t base;
+
 	unsigned long flags;
-	u32 tiling_flags, pitch_pixels;
-	u64 base;
 	int r;
 
-	work = kzalloc(sizeof *work, GFP_KERNEL);
-	if (work == NULL)
-		return -ENOMEM;
+        down_read(&rdev->exclusive_lock);
+	while (work->fence) {
+		r = radeon_fence_wait(work->fence, false);
+		if (r == -EDEADLK) {
+			up_read(&rdev->exclusive_lock);
+			r = radeon_gpu_reset(rdev);
+			down_read(&rdev->exclusive_lock);
+		}
 
-	work->event = event;
-	work->rdev = rdev;
-	work->crtc_id = radeon_crtc->crtc_id;
-	old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
-	new_radeon_fb = to_radeon_framebuffer(fb);
-	/* schedule unpin of the old buffer */
-	obj = old_radeon_fb->obj;
-	/* take a reference to the old object */
-	drm_gem_object_reference(obj);
-	rbo = gem_to_radeon_bo(obj);
-	work->old_rbo = rbo;
-	obj = new_radeon_fb->obj;
-	rbo = gem_to_radeon_bo(obj);
-
-	spin_lock(&rbo->tbo.bdev->fence_lock);
-	if (rbo->tbo.sync_obj)
-		work->fence = radeon_fence_ref(rbo->tbo.sync_obj);
-	spin_unlock(&rbo->tbo.bdev->fence_lock);
-
-	INIT_WORK(&work->work, radeon_unpin_work_func);
-
-	/* We borrow the event spin lock for protecting unpin_work */
-	spin_lock_irqsave(&dev->event_lock, flags);
-	if (radeon_crtc->unpin_work) {
-		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
-		r = -EBUSY;
-		goto unlock_free;
+		if (r) {
+			DRM_ERROR("failed to wait on page flip fence (%d)!\n",
+				  r);
+			goto cleanup;
+		} else
+			radeon_fence_unref(&work->fence);
 	}
-	radeon_crtc->unpin_work = work;
-	radeon_crtc->deferred_flip_completion = 0;
-	spin_unlock_irqrestore(&dev->event_lock, flags);
 
 	/* pin the new buffer */
 	DRM_DEBUG_DRIVER("flip-ioctl() cur_fbo = %p, cur_bbo = %p\n",
-			 work->old_rbo, rbo);
+			 work->old_rbo, work->new_rbo);
 
-	r = radeon_bo_reserve(rbo, false);
+	r = radeon_bo_reserve(work->new_rbo, false);
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to reserve new rbo buffer before flip\n");
-		goto pflip_cleanup;
+		goto cleanup;
 	}
 	/* Only 27 bit offset for legacy CRTC */
-	r = radeon_bo_pin_restricted(rbo, RADEON_GEM_DOMAIN_VRAM,
+	r = radeon_bo_pin_restricted(work->new_rbo, RADEON_GEM_DOMAIN_VRAM,
 				     ASIC_IS_AVIVO(rdev) ? 0 : 1 << 27, &base);
 	if (unlikely(r != 0)) {
-		radeon_bo_unreserve(rbo);
+		radeon_bo_unreserve(work->new_rbo);
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
-		goto pflip_cleanup;
+		goto cleanup;
 	}
-	radeon_bo_get_tiling_flags(rbo, &tiling_flags, NULL);
-	radeon_bo_unreserve(rbo);
+	radeon_bo_get_tiling_flags(work->new_rbo, &tiling_flags, NULL);
+	radeon_bo_unreserve(work->new_rbo);
 
 	if (!ASIC_IS_AVIVO(rdev)) {
 		/* crtc offset is from display base addr not FB location */
@@ -486,28 +455,91 @@
 		base &= ~7;
 	}
 
-	spin_lock_irqsave(&dev->event_lock, flags);
-	work->new_crtc_base = base;
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-
-	/* update crtc fb */
-	crtc->primary->fb = fb;
+	/* We borrow the event spin lock for protecting flip_work */
+	spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
 	/* set the proper interrupt */
 	radeon_irq_kms_pflip_irq_get(rdev, radeon_crtc->crtc_id);
 
-	return 0;
+	/* do the flip (mmio) */
+	radeon_page_flip(rdev, radeon_crtc->crtc_id, base);
 
-pflip_cleanup:
-	spin_lock_irqsave(&dev->event_lock, flags);
-	radeon_crtc->unpin_work = NULL;
-unlock_free:
-	spin_unlock_irqrestore(&dev->event_lock, flags);
-	drm_gem_object_unreference_unlocked(old_radeon_fb->obj);
+	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	up_read(&rdev->exclusive_lock);
+
+	return;
+
+cleanup:
+	drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
 	radeon_fence_unref(&work->fence);
 	kfree(work);
+	up_read(&rdev->exclusive_lock);
+}
 
-	return r;
+static int radeon_crtc_page_flip(struct drm_crtc *crtc,
+				 struct drm_framebuffer *fb,
+				 struct drm_pending_vblank_event *event,
+				 uint32_t page_flip_flags)
+{
+	struct drm_device *dev = crtc->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_framebuffer *old_radeon_fb;
+	struct radeon_framebuffer *new_radeon_fb;
+	struct drm_gem_object *obj;
+	struct radeon_flip_work *work;
+	unsigned long flags;
+
+	work = kzalloc(sizeof *work, GFP_KERNEL);
+	if (work == NULL)
+		return -ENOMEM;
+
+	INIT_WORK(&work->flip_work, radeon_flip_work_func);
+	INIT_WORK(&work->unpin_work, radeon_unpin_work_func);
+
+	work->rdev = rdev;
+	work->crtc_id = radeon_crtc->crtc_id;
+	work->fb = fb;
+	work->event = event;
+
+	/* schedule unpin of the old buffer */
+	old_radeon_fb = to_radeon_framebuffer(crtc->primary->fb);
+	obj = old_radeon_fb->obj;
+
+	/* take a reference to the old object */
+	drm_gem_object_reference(obj);
+	work->old_rbo = gem_to_radeon_bo(obj);
+
+	new_radeon_fb = to_radeon_framebuffer(fb);
+	obj = new_radeon_fb->obj;
+	work->new_rbo = gem_to_radeon_bo(obj);
+
+	spin_lock(&work->new_rbo->tbo.bdev->fence_lock);
+	if (work->new_rbo->tbo.sync_obj)
+		work->fence = radeon_fence_ref(work->new_rbo->tbo.sync_obj);
+	spin_unlock(&work->new_rbo->tbo.bdev->fence_lock);
+
+	/* update crtc fb */
+	crtc->primary->fb = fb;
+
+	/* We borrow the event spin lock for protecting flip_work */
+	spin_lock_irqsave(&crtc->dev->event_lock, flags);
+
+	if (radeon_crtc->flip_work) {
+		DRM_DEBUG_DRIVER("flip queue: crtc already busy\n");
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+		drm_gem_object_unreference_unlocked(&work->old_rbo->gem_base);
+		radeon_fence_unref(&work->fence);
+		kfree(work);
+		return -EBUSY;
+	}
+	radeon_crtc->flip_work = work;
+
+	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+	queue_work(radeon_crtc->flip_queue, &work->flip_work);
+
+	return 0;
 }
 
 static int
@@ -577,6 +609,7 @@
 
 	drm_mode_crtc_set_gamma_size(&radeon_crtc->base, 256);
 	radeon_crtc->crtc_id = index;
+	radeon_crtc->flip_queue = create_singlethread_workqueue("radeon-crtc");
 	rdev->mode_info.crtcs[index] = radeon_crtc;
 
 	if (rdev->family >= CHIP_BONAIRE) {
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 718be1a..ea72ad8 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -325,8 +325,8 @@
 	struct drm_display_mode native_mode;
 	int pll_id;
 	/* page flipping */
-	struct radeon_unpin_work *unpin_work;
-	int deferred_flip_completion;
+	struct workqueue_struct *flip_queue;
+	struct radeon_flip_work *flip_work;
 	/* pll sharing */
 	struct radeon_atom_ss ss;
 	bool ss_enabled;