radeonsi: reduce the scope of sel->mutex in si_shader_select_with_key

We only need the lock to guard changes in the variant linked list. The
actual compilation can happen outside the lock, since we use the ready
fence as a guard.

v2: fix double-unlock

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 4c02924..75b1648 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1621,6 +1621,8 @@
 		/* Don't check the "current" shader. We checked it above. */
 		if (current != iter &&
 		    memcmp(&iter->key, key, sizeof(*key)) == 0) {
+			mtx_unlock(&sel->mutex);
+
 			if (unlikely(!util_queue_fence_is_signalled(&iter->ready))) {
 				/* If it's an optimized shader and its compilation has
 				 * been started but isn't done, use the unoptimized
@@ -1628,7 +1630,6 @@
 				 */
 				if (iter->is_optimized) {
 					memset(&key->opt, 0, sizeof(key->opt));
-					mtx_unlock(&sel->mutex);
 					goto again;
 				}
 
@@ -1636,12 +1637,10 @@
 			}
 
 			if (iter->compilation_failed) {
-				mtx_unlock(&sel->mutex);
 				return -1; /* skip the draw call */
 			}
 
 			state->current = iter;
-			mtx_unlock(&sel->mutex);
 			return 0;
 		}
 	}
@@ -1768,6 +1767,8 @@
 		sel->last_variant = shader;
 	}
 
+	mtx_unlock(&sel->mutex);
+
 	assert(!shader->is_optimized);
 	si_build_shader_variant(shader, thread_index, false);
 
@@ -1776,7 +1777,6 @@
 	if (!shader->compilation_failed)
 		state->current = shader;
 
-	mtx_unlock(&sel->mutex);
 	return shader->compilation_failed ? -1 : 0;
 }