u_queue: add util_queue_finish for waiting for previously added jobs

Schedule one job for every thread, and wait on a barrier inside the job
execution function.

v2: avoid alloca (fixes Windows build error)

Reviewed-by: Marek Olšák <marek.olsak@amd.com> (v1)
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 8293ec6..706ee8b 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -25,7 +25,9 @@
  */
 
 #include "u_queue.h"
+
 #include "util/u_string.h"
+#include "util/u_thread.h"
 
 static void util_queue_killall_and_wait(struct util_queue *queue);
 
@@ -429,6 +431,39 @@
       util_queue_fence_wait(fence);
 }
 
+static void
+util_queue_finish_execute(void *data, int num_thread)
+{
+   util_barrier *barrier = data;
+   util_barrier_wait(barrier);
+}
+
+/**
+ * Wait until all previously added jobs have completed.
+ */
+void
+util_queue_finish(struct util_queue *queue)
+{
+   util_barrier barrier;
+   struct util_queue_fence *fences = malloc(queue->num_threads * sizeof(*fences));
+
+   util_barrier_init(&barrier, queue->num_threads);
+
+   for (unsigned i = 0; i < queue->num_threads; ++i) {
+      util_queue_fence_init(&fences[i]);
+      util_queue_add_job(queue, &barrier, &fences[i], util_queue_finish_execute, NULL);
+   }
+
+   for (unsigned i = 0; i < queue->num_threads; ++i) {
+      util_queue_fence_wait(&fences[i]);
+      util_queue_fence_destroy(&fences[i]);
+   }
+
+   util_barrier_destroy(&barrier);
+
+   free(fences);
+}
+
 int64_t
 util_queue_get_thread_time_nano(struct util_queue *queue, unsigned thread_index)
 {
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index dfe2153..a54ec71 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -214,6 +214,8 @@
 void util_queue_drop_job(struct util_queue *queue,
                          struct util_queue_fence *fence);
 
+void util_queue_finish(struct util_queue *queue);
+
 int64_t util_queue_get_thread_time_nano(struct util_queue *queue,
                                         unsigned thread_index);