drm/radeon: introduce a sub allocator and convert ib pool to it v4

Somewhat specializaed sub-allocator designed to perform sub-allocation
for command buffer not only for current cs ioctl but for future command
submission ioctl as well. Patch also convert current ib pool to use
the sub allocator. Idea is that ib poll buffer can be share with other
command buffer submission not having 64K granularity.

v2 Harmonize pool handling and add suspend/resume callback to pin/unpin
sa bo (tested on rv280, rv370, r420, rv515, rv610, rv710, redwood, cayman,
rs480, rs690, rs880)
v3 Simplify allocator
v4 Fix radeon_ib_get error path to properly free fence

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index b4c2d0f..f29edbf 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -316,6 +316,48 @@
 	u32			tiling_flags;
 };
 
+/* sub-allocation manager, it has to be protected by another lock.
+ * By conception this is an helper for other part of the driver
+ * like the indirect buffer or semaphore, which both have their
+ * locking.
+ *
+ * Principe is simple, we keep a list of sub allocation in offset
+ * order (first entry has offset == 0, last entry has the highest
+ * offset).
+ *
+ * When allocating new object we first check if there is room at
+ * the end total_size - (last_object_offset + last_object_size) >=
+ * alloc_size. If so we allocate new object there.
+ *
+ * When there is not enough room at the end, we start waiting for
+ * each sub object until we reach object_offset+object_size >=
+ * alloc_size, this object then become the sub object we return.
+ *
+ * Alignment can't be bigger than page size.
+ *
+ * Hole are not considered for allocation to keep things simple.
+ * Assumption is that there won't be hole (all object on same
+ * alignment).
+ */
+struct radeon_sa_manager {
+	struct radeon_bo	*bo;
+	struct list_head	sa_bo;
+	unsigned		size;
+	uint64_t		gpu_addr;
+	void			*cpu_ptr;
+	uint32_t		domain;
+};
+
+struct radeon_sa_bo;
+
+/* sub-allocation buffer */
+struct radeon_sa_bo {
+	struct list_head		list;
+	struct radeon_sa_manager	*manager;
+	unsigned			offset;
+	unsigned			size;
+};
+
 /*
  * GEM objects.
  */
@@ -503,13 +545,12 @@
  */
 
 struct radeon_ib {
-	struct list_head	list;
+	struct radeon_sa_bo	sa_bo;
 	unsigned		idx;
-	uint64_t		gpu_addr;
-	struct radeon_fence	*fence;
-	uint32_t		*ptr;
 	uint32_t		length_dw;
-	bool			free;
+	uint64_t		gpu_addr;
+	uint32_t		*ptr;
+	struct radeon_fence	*fence;
 };
 
 /*
@@ -517,12 +558,11 @@
  * mutex protects scheduled_ibs, ready, alloc_bm
  */
 struct radeon_ib_pool {
-	struct mutex		mutex;
-	struct radeon_bo	*robj;
-	struct list_head	bogus_ib;
-	struct radeon_ib	ibs[RADEON_IB_POOL_SIZE];
-	bool			ready;
-	unsigned		head_id;
+	struct mutex			mutex;
+	struct radeon_sa_manager	sa_manager;
+	struct radeon_ib		ibs[RADEON_IB_POOL_SIZE];
+	bool				ready;
+	unsigned			head_id;
 };
 
 struct radeon_ring {
@@ -603,8 +643,9 @@
 int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib);
 int radeon_ib_pool_init(struct radeon_device *rdev);
 void radeon_ib_pool_fini(struct radeon_device *rdev);
+int radeon_ib_pool_start(struct radeon_device *rdev);
+int radeon_ib_pool_suspend(struct radeon_device *rdev);
 int radeon_ib_test(struct radeon_device *rdev);
-extern void radeon_ib_bogus_add(struct radeon_device *rdev, struct radeon_ib *ib);
 /* Ring access between begin & end cannot sleep */
 int radeon_ring_index(struct radeon_device *rdev, struct radeon_ring *cp);
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *cp);