drm/radeon/kms: add support for semaphores v3

They are used to sync between rings, while fences
sync between a ring and the cpu.

v2 Fix radeon_semaphore_driver_fini when no semaphore were
allocated.

v3 Initialize list early on to avoid issue in case or early
error

Signed-off-by: Christian König <deathsimple@vodafone.de>
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile
index cf8b4bc..94dcdc7 100644
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@@ -70,7 +70,8 @@
 	r200.o radeon_legacy_tv.o r600_cs.o r600_blit.o r600_blit_shaders.o \
 	r600_blit_kms.o radeon_pm.o atombios_dp.o r600_audio.o r600_hdmi.o \
 	evergreen.o evergreen_cs.o evergreen_blit_shaders.o evergreen_blit_kms.o \
-	radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o
+	radeon_trace_points.o ni.o cayman_blit_shaders.o atombios_encoders.o \
+	radeon_semaphore.o
 
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
@@ -78,4 +79,4 @@
 
 obj-$(CONFIG_DRM_RADEON)+= radeon.o
 
-CFLAGS_radeon_trace_points.o := -I$(src)
\ No newline at end of file
+CFLAGS_radeon_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 1c7af56..233cbc0 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1448,7 +1448,7 @@
 	tmp |= BUF_SWAP_32BIT;
 #endif
 	WREG32(CP_RB_CNTL, tmp);
-	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+	WREG32(CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	WREG32(CP_RB_WPTR_DELAY, 0);
@@ -3290,6 +3290,7 @@
 	evergreen_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_bo_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 710ad8c..ef74995 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -1136,7 +1136,7 @@
 	WREG32(GRBM_SOFT_RESET, 0);
 	RREG32(GRBM_SOFT_RESET);
 
-	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+	WREG32(CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	WREG32(CP_RB_WPTR_DELAY, 0);
@@ -1557,6 +1557,7 @@
 	cayman_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index d2dced5..2f18163 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -832,6 +832,14 @@
 	radeon_ring_write(rdev, RADEON_SW_INT_FIRE);
 }
 
+void r100_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_semaphore *semaphore,
+			      unsigned ring, bool emit_wait)
+{
+	/* Unused on older asics, since we don't have semaphores or multiple rings */
+	BUG();
+}
+
 int r100_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 2fff8ce..bd2b3d0 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2190,7 +2190,7 @@
 	tmp |= BUF_SWAP_32BIT;
 #endif
 	WREG32(CP_RB_CNTL, tmp);
-	WREG32(CP_SEM_WAIT_TIMER, 0x4);
+	WREG32(CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	WREG32(CP_RB_WPTR_DELAY, 0);
@@ -2357,6 +2357,18 @@
 	}
 }
 
+void r600_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_semaphore *semaphore,
+			      unsigned ring, bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
+
+	radeon_ring_write(rdev, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
+	radeon_ring_write(rdev, addr & 0xffffffff);
+	radeon_ring_write(rdev, (upper_32_bits(addr) & 0xff) | sel);
+}
+
 int r600_copy_blit(struct radeon_device *rdev,
 		   uint64_t src_offset,
 		   uint64_t dst_offset,
@@ -2649,6 +2661,7 @@
 	r600_vram_scratch_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_bo_fini(rdev);
 	radeon_atombios_fini(rdev);
diff --git a/drivers/gpu/drm/radeon/r600_cp.c b/drivers/gpu/drm/radeon/r600_cp.c
index c9db493..84c5462 100644
--- a/drivers/gpu/drm/radeon/r600_cp.c
+++ b/drivers/gpu/drm/radeon/r600_cp.c
@@ -1815,7 +1815,7 @@
 		     dev_priv->ring.size_l2qw);
 #endif
 
-	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x4);
+	RADEON_WRITE(R600_CP_SEM_WAIT_TIMER, 0x0);
 
 	/* Set the write pointer delay */
 	RADEON_WRITE(R600_CP_RB_WPTR_DELAY, 0);
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index bfe1b5d..3ee1fd7 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -831,6 +831,8 @@
 #define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
 #define	PACKET3_INDIRECT_BUFFER_MP			0x38
 #define	PACKET3_MEM_SEMAPHORE				0x39
+#              define PACKET3_SEM_SEL_SIGNAL	    (0x6 << 29)
+#              define PACKET3_SEM_SEL_WAIT	    (0x7 << 29)
 #define	PACKET3_MPEG_INDEX				0x3A
 #define	PACKET3_WAIT_REG_MEM				0x3C
 #define	PACKET3_MEM_WRITE				0x3D
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8b93dec..6d84c64 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -228,6 +228,30 @@
 void radeon_fence_unref(struct radeon_fence **fence);
 
 /*
+ * Semaphores.
+ */
+struct radeon_semaphore_driver {
+	rwlock_t		lock;
+	struct list_head	free;
+};
+
+struct radeon_semaphore {
+	struct radeon_bo	*robj;
+	struct list_head	list;
+	uint64_t		gpu_addr;
+};
+
+void radeon_semaphore_driver_fini(struct radeon_device *rdev);
+int radeon_semaphore_create(struct radeon_device *rdev,
+			    struct radeon_semaphore **semaphore);
+void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
+				  struct radeon_semaphore *semaphore);
+void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
+				struct radeon_semaphore *semaphore);
+void radeon_semaphore_free(struct radeon_device *rdev,
+			   struct radeon_semaphore *semaphore);
+
+/*
  * Tiling registers
  */
 struct radeon_surface_reg {
@@ -921,6 +945,9 @@
 	int (*irq_process)(struct radeon_device *rdev);
 	u32 (*get_vblank_counter)(struct radeon_device *rdev, int crtc);
 	void (*fence_ring_emit)(struct radeon_device *rdev, struct radeon_fence *fence);
+	void (*semaphore_ring_emit)(struct radeon_device *rdev,
+				    struct radeon_semaphore *semaphore,
+				    unsigned ring, bool emit_wait);
 	int (*cs_parse)(struct radeon_cs_parser *p);
 	int (*copy_blit)(struct radeon_device *rdev,
 			 uint64_t src_offset,
@@ -1250,6 +1277,7 @@
 	struct radeon_mman		mman;
 	rwlock_t			fence_lock;
 	struct radeon_fence_driver	fence_drv[RADEON_NUM_RINGS];
+	struct radeon_semaphore_driver	semaphore_drv;
 	struct radeon_cp		cp;
 	/* cayman compute rings */
 	struct radeon_cp		cp1;
@@ -1470,6 +1498,7 @@
 #define radeon_irq_process(rdev) (rdev)->asic->irq_process((rdev))
 #define radeon_get_vblank_counter(rdev, crtc) (rdev)->asic->get_vblank_counter((rdev), (crtc))
 #define radeon_fence_ring_emit(rdev, fence) (rdev)->asic->fence_ring_emit((rdev), (fence))
+#define radeon_semaphore_ring_emit(rdev, semaphore, ring, emit_wait) (rdev)->asic->semaphore_ring_emit((rdev), (semaphore), (ring), (emit_wait))
 #define radeon_copy_blit(rdev, s, d, np, f) (rdev)->asic->copy_blit((rdev), (s), (d), (np), (f))
 #define radeon_copy_dma(rdev, s, d, np, f) (rdev)->asic->copy_dma((rdev), (s), (d), (np), (f))
 #define radeon_copy(rdev, s, d, np, f) (rdev)->asic->copy((rdev), (s), (d), (np), (f))
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index a2e1eae..8cfbbc77 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -146,6 +146,7 @@
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
 	.fence_ring_emit = &r100_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r100_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = NULL,
@@ -194,6 +195,7 @@
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
 	.fence_ring_emit = &r100_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r100_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -241,6 +243,7 @@
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -289,6 +292,7 @@
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -336,6 +340,7 @@
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -384,6 +389,7 @@
 	.irq_process = &r100_irq_process,
 	.get_vblank_counter = &r100_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -432,6 +438,7 @@
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -480,6 +487,7 @@
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -528,6 +536,7 @@
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -576,6 +585,7 @@
 	.irq_process = &rs600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
 	.fence_ring_emit = &r300_fence_ring_emit,
+	.semaphore_ring_emit = &r100_semaphore_ring_emit,
 	.cs_parse = &r300_cs_parse,
 	.copy_blit = &r100_copy_blit,
 	.copy_dma = &r200_copy_dma,
@@ -623,6 +633,7 @@
 	.irq_process = &r600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
+	.semaphore_ring_emit = &r600_semaphore_ring_emit,
 	.cs_parse = &r600_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -670,6 +681,7 @@
 	.irq_process = &r600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
+	.semaphore_ring_emit = &r600_semaphore_ring_emit,
 	.cs_parse = &r600_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -717,6 +729,7 @@
 	.irq_process = &r600_irq_process,
 	.get_vblank_counter = &rs600_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
+	.semaphore_ring_emit = &r600_semaphore_ring_emit,
 	.cs_parse = &r600_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -764,6 +777,7 @@
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
+	.semaphore_ring_emit = &r600_semaphore_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -811,6 +825,7 @@
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
+	.semaphore_ring_emit = &r600_semaphore_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -858,6 +873,7 @@
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
+	.semaphore_ring_emit = &r600_semaphore_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
@@ -905,6 +921,7 @@
 	.irq_process = &evergreen_irq_process,
 	.get_vblank_counter = &evergreen_get_vblank_counter,
 	.fence_ring_emit = &r600_fence_ring_emit,
+	.semaphore_ring_emit = &r600_semaphore_ring_emit,
 	.cs_parse = &evergreen_cs_parse,
 	.copy_blit = &r600_copy_blit,
 	.copy_dma = NULL,
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 5991484..6b589d5 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -69,6 +69,9 @@
 int r100_irq_process(struct radeon_device *rdev);
 void r100_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence);
+void r100_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_semaphore *semaphore,
+			      unsigned ring, bool emit_wait);
 int r100_cs_parse(struct radeon_cs_parser *p);
 void r100_pll_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 uint32_t r100_pll_rreg(struct radeon_device *rdev, uint32_t reg);
@@ -300,6 +303,9 @@
 int r600_cs_parse(struct radeon_cs_parser *p);
 void r600_fence_ring_emit(struct radeon_device *rdev,
 			  struct radeon_fence *fence);
+void r600_semaphore_ring_emit(struct radeon_device *rdev,
+			      struct radeon_semaphore *semaphore,
+			      unsigned ring, bool emit_wait);
 bool r600_gpu_is_lockup(struct radeon_device *rdev);
 int r600_asic_reset(struct radeon_device *rdev);
 int r600_set_surface_reg(struct radeon_device *rdev, int reg,
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 4ed4eeb..36296ad 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -726,9 +726,11 @@
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->vram_mutex);
 	rwlock_init(&rdev->fence_lock);
+	rwlock_init(&rdev->semaphore_drv.lock);
 	INIT_LIST_HEAD(&rdev->gem.objects);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
 	init_waitqueue_head(&rdev->irq.idle_queue);
+	INIT_LIST_HEAD(&rdev->semaphore_drv.free);
 
 	/* Set asic functions */
 	r = radeon_asic_init(rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c
new file mode 100644
index 0000000..f7d3104
--- /dev/null
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2011 Christian König.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+/*
+ * Authors:
+ *    Christian König <deathsimple@vodafone.de>
+ */
+#include "drmP.h"
+#include "drm.h"
+#include "radeon.h"
+
+static int allocate_semaphores(struct radeon_device *rdev)
+{
+	const unsigned long bo_size = PAGE_SIZE * 4;
+
+	struct radeon_bo *bo;
+	struct list_head new_entrys;
+	unsigned long irq_flags;
+	uint64_t gpu_addr;
+	void *map;
+	int i, r;
+
+	r = radeon_bo_create(rdev, bo_size, RADEON_GPU_PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_GTT, &bo);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to allocate semaphore bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_reserve(bo, false);
+	if (r) {
+		radeon_bo_unref(&bo);
+		dev_err(rdev->dev, "(%d) failed to reserve semaphore bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_kmap(bo, &map);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		dev_err(rdev->dev, "(%d) semaphore map failed\n", r);
+		return r;
+	}
+	memset(map, 0, bo_size);
+	radeon_bo_kunmap(bo);
+
+	r = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_VRAM, &gpu_addr);
+	if (r) {
+		radeon_bo_unreserve(bo);
+		radeon_bo_unref(&bo);
+		dev_err(rdev->dev, "(%d) semaphore pin failed\n", r);
+		return r;
+	}
+
+	INIT_LIST_HEAD(&new_entrys);
+	for (i = 0; i < bo_size/8; ++i) {
+		struct radeon_semaphore *sem = kmalloc(sizeof(struct radeon_semaphore), GFP_KERNEL);
+		ttm_bo_reference(&bo->tbo);
+		sem->robj = bo;
+		sem->gpu_addr = gpu_addr;
+		gpu_addr += 8;
+		list_add_tail(&sem->list, &new_entrys);
+	}
+
+	radeon_bo_unreserve(bo);
+	radeon_bo_unref(&bo);
+
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	list_splice_tail(&new_entrys, &rdev->semaphore_drv.free);
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+
+	DRM_INFO("%d new semaphores allocated\n", (int)(bo_size/8));
+
+	return 0;
+}
+
+int radeon_semaphore_create(struct radeon_device *rdev,
+			    struct radeon_semaphore **semaphore)
+{
+	unsigned long irq_flags;
+
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	if (list_empty(&rdev->semaphore_drv.free)) {
+		int r;
+		write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+		r = allocate_semaphores(rdev);
+		if (r)
+			return r;
+		write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	}
+
+	*semaphore = list_first_entry(&rdev->semaphore_drv.free, struct radeon_semaphore, list);
+	list_del(&(*semaphore)->list);
+
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+	return 0;
+}
+
+void radeon_semaphore_emit_signal(struct radeon_device *rdev, int ring,
+			          struct radeon_semaphore *semaphore)
+{
+	radeon_semaphore_ring_emit(rdev, semaphore, ring, false);
+}
+
+void radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring,
+			        struct radeon_semaphore *semaphore)
+{
+	radeon_semaphore_ring_emit(rdev, semaphore, ring, true);
+}
+
+void radeon_semaphore_free(struct radeon_device *rdev,
+			  struct radeon_semaphore *semaphore)
+{
+	unsigned long irq_flags;
+
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	list_add_tail(&semaphore->list, &rdev->semaphore_drv.free);
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+}
+
+void radeon_semaphore_driver_fini(struct radeon_device *rdev)
+{
+	struct radeon_semaphore *i, *n;
+	struct list_head entrys;
+	unsigned long irq_flags;
+
+	INIT_LIST_HEAD(&entrys);
+	write_lock_irqsave(&rdev->semaphore_drv.lock, irq_flags);
+	if (!list_empty(&rdev->semaphore_drv.free)) {
+		list_splice(&rdev->semaphore_drv.free, &entrys);
+	}
+	INIT_LIST_HEAD(&rdev->semaphore_drv.free);
+	write_unlock_irqrestore(&rdev->semaphore_drv.lock, irq_flags);
+
+	list_for_each_entry_safe(i, n, &entrys, list) {
+		radeon_bo_unref(&i->robj);
+		kfree(i);
+	}
+}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 8637fd8..be02bee 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -1270,6 +1270,7 @@
 	rv770_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
+	radeon_semaphore_driver_fini(rdev);
 	radeon_fence_driver_fini(rdev);
 	radeon_agp_fini(rdev);
 	radeon_bo_fini(rdev);