gallium/u_threaded: avoid syncing in threaded_context_flush

We could always do the flush asynchronously, but if we're going to wait
for a fence anyway and the driver thread is currently idle, the additional
communication overhead isn't worth it.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index 84fbb22..ffa8247 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -231,13 +231,23 @@
  */
 void
 threaded_context_flush(struct pipe_context *_pipe,
-                       struct tc_unflushed_batch_token *token)
+                       struct tc_unflushed_batch_token *token,
+                       bool prefer_async)
 {
    struct threaded_context *tc = threaded_context(_pipe);
 
    /* This is called from the state-tracker / application thread. */
-   if (token->tc && token->tc == tc)
-      tc_sync(token->tc);
+   if (token->tc && token->tc == tc) {
+      struct tc_batch *last = &tc->batch_slots[tc->last];
+
+      /* Prefer to do the flush in the driver thread if it is already
+       * running. That should be better for cache locality.
+       */
+      if (prefer_async || !util_queue_fence_is_signalled(&last->fence))
+         tc_batch_flush(tc);
+      else
+         tc_sync(token->tc);
+   }
 }
 
 static void
diff --git a/src/gallium/auxiliary/util/u_threaded_context.h b/src/gallium/auxiliary/util/u_threaded_context.h
index 3408956..53c5a7e 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.h
+++ b/src/gallium/auxiliary/util/u_threaded_context.h
@@ -381,7 +381,8 @@
 
 void
 threaded_context_flush(struct pipe_context *_pipe,
-                       struct tc_unflushed_batch_token *token);
+                       struct tc_unflushed_batch_token *token,
+                       bool prefer_async);
 
 static inline struct threaded_context *
 threaded_context(struct pipe_context *pipe)
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index 5163d65..9d6bcfe 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -203,7 +203,8 @@
 			 * be in flight in the driver thread, so the fence
 			 * may not be ready yet when this call returns.
 			 */
-			threaded_context_flush(ctx, rfence->tc_token);
+			threaded_context_flush(ctx, rfence->tc_token,
+					       timeout == 0);
 		}
 
 		if (timeout == PIPE_TIMEOUT_INFINITE) {