radeonsi: Handle TGSI CONST registers

We now emit LLVM load instructions for TGSI CONST register reads,
which are lowered in the backend to S_LOAD_DWORD* instructions.
diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c
index e3623e0..4f99549 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_shader.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c
@@ -66,54 +66,85 @@
 #define CENTROID_OFSET 4
 
 #define USE_SGPR_MAX_SUFFIX_LEN 5
+#define CONST_ADDR_SPACE 2
 
 enum sgpr_type {
+	SGPR_CONST_PTR_F32,
+	SGPR_CONST_PTR_V4I32,
+	SGPR_CONST_PTR_V8I32,
 	SGPR_I32,
-	SGPR_I64,
-	SGPR_PTR_V4I32,
-	SGPR_PTR_V8I32
+	SGPR_I64
 };
 
+/**
+ * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad
+ *
+ * @param offset The offset parameter specifies the number of
+ * elements to offset, not the number of bytes or dwords.  An element is the
+ * the type pointed to by the base_ptr parameter (e.g. int is the element of
+ * an int* pointer)
+ *
+ * When LLVM lowers the load instruction, it will convert the element offset
+ * into a dword offset automatically.
+ *
+ */
+static LLVMValueRef build_indexed_load(
+	struct gallivm_state * gallivm,
+	LLVMValueRef base_ptr,
+	LLVMValueRef offset)
+{
+	LLVMValueRef computed_ptr = LLVMBuildGEP(
+		gallivm->builder, base_ptr, &offset, 1, "");
+
+	return LLVMBuildLoad(gallivm->builder, computed_ptr, "");
+}
+
+/*
+ * XXX: Instead of using an intrinsic to use a specific SGPR, we should be
+ * using load instructions.  The loads should load from the USER_SGPR address
+ * space and use the sgpr index as the pointer.
+ */
 static LLVMValueRef use_sgpr(
 	struct gallivm_state * gallivm,
 	enum sgpr_type type,
 	unsigned sgpr)
 {
 	LLVMValueRef sgpr_index;
-	LLVMValueRef sgpr_value;
 	LLVMTypeRef ret_type;
 
 	sgpr_index = lp_build_const_int32(gallivm, sgpr);
 
-	if (type == SGPR_I32) {
+	switch (type) {
+	case SGPR_CONST_PTR_F32:
+		ret_type = LLVMFloatTypeInContext(gallivm->context);
+		ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
+		return lp_build_intrinsic_unary(gallivm->builder,
+						"llvm.SI.use.sgprptrcf32.",
+						ret_type, sgpr_index);
+	case SGPR_I32:
 		ret_type = LLVMInt32TypeInContext(gallivm->context);
 		return lp_build_intrinsic_unary(gallivm->builder,
 						"llvm.SI.use.sgpr.i32",
 						ret_type, sgpr_index);
-	}
-
-	ret_type = LLVMInt64TypeInContext(gallivm->context);
-	sgpr_value = lp_build_intrinsic_unary(gallivm->builder,
+	case SGPR_I64:
+		ret_type= LLVMInt64TypeInContext(gallivm->context);
+		return lp_build_intrinsic_unary(gallivm->builder,
 				"llvm.SI.use.sgpr.i64",
 				 ret_type, sgpr_index);
-
-	switch (type) {
-	case SGPR_I64:
-		return sgpr_value;
-	case SGPR_PTR_V4I32:
+	case SGPR_CONST_PTR_V4I32:
 		ret_type = LLVMInt32TypeInContext(gallivm->context);
 		ret_type = LLVMVectorType(ret_type, 4);
-		ret_type = LLVMPointerType(ret_type,
-					0 /*XXX: Specify address space*/);
-		return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
-								ret_type, "");
-	case SGPR_PTR_V8I32:
+		ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
+		return lp_build_intrinsic_unary(gallivm->builder,
+						"llvm.SI.use.sgprptrci128.",
+						ret_type, sgpr_index);
+	case SGPR_CONST_PTR_V8I32:
 		ret_type = LLVMInt32TypeInContext(gallivm->context);
 		ret_type = LLVMVectorType(ret_type, 8);
-		ret_type = LLVMPointerType(ret_type,
-					0 /*XXX: Specify address space*/);
-		return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
-								ret_type, "");
+		ret_type = LLVMPointerType(ret_type, CONST_ADDR_SPACE);
+		return lp_build_intrinsic_unary(gallivm->builder,
+						"llvm.SI.use.sgprptrci256.",
+						ret_type, sgpr_index);
 	default:
 		assert(!"Unsupported SGPR type in use_sgpr()");
 		return NULL;
@@ -127,9 +158,10 @@
 {
 	LLVMValueRef t_list_ptr;
 	LLVMValueRef t_offset;
+	LLVMValueRef t_list;
 	LLVMValueRef attribute_offset;
 	LLVMValueRef buffer_index_reg;
-	LLVMValueRef args[4];
+	LLVMValueRef args[3];
 	LLVMTypeRef vec4_type;
 	LLVMValueRef input;
 	struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
@@ -138,13 +170,17 @@
 	struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
 	unsigned chan;
 
+	/* Load the T list */
 	/* XXX: Communicate with the rest of the driver about which SGPR the T#
 	 * list pointer is going to be stored in.  Hard code to SGPR[6:7] for
  	 * now */
-	t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 3);
+	t_list_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_V4I32, 3);
 
-	t_offset = lp_build_const_int32(base->gallivm,
-					4 * velem->vertex_buffer_index);
+	t_offset = lp_build_const_int32(base->gallivm, velem->vertex_buffer_index);
+
+	t_list = build_indexed_load(base->gallivm, t_list_ptr, t_offset);
+
+	/* Build the attribute offset */
 	attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset);
 
 	/* Load the buffer index is always, which is always stored in VGPR0
@@ -153,12 +189,11 @@
 		"llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
 
 	vec4_type = LLVMVectorType(base->elem_type, 4);
-	args[0] = t_list_ptr;
-	args[1] = t_offset;
-	args[2] = attribute_offset;
-	args[3] = buffer_index_reg;
+	args[0] = t_list;
+	args[1] = attribute_offset;
+	args[2] = buffer_index_reg;
 	input = lp_build_intrinsic(base->gallivm->builder,
-		"llvm.SI.vs.load.input", vec4_type, args, 4);
+		"llvm.SI.vs.load.input", vec4_type, args, 3);
 
 	/* Break up the vec4 into individual components */
 	for (chan = 0; chan < 4; chan++) {
@@ -274,7 +309,7 @@
 
 	/* XXX: Assume the pointer to the constant buffer is being stored in
 	 * SGPR[0:1] */
-	const_ptr = use_sgpr(base->gallivm, SGPR_I64, 0);
+	const_ptr = use_sgpr(base->gallivm, SGPR_CONST_PTR_F32, 0);
 
 	/* XXX: This assumes that the constant buffer is not packed, so
 	 * CONST[0].x will have an offset of 0 and CONST[1].x will have an
@@ -282,8 +317,7 @@
 	offset = lp_build_const_int32(base->gallivm,
 					(reg->Register.Index * 4) + swizzle);
 
-	return lp_build_intrinsic_binary(base->gallivm->builder,
-		"llvm.SI.load.const", base->elem_type, const_ptr, offset);
+	return build_indexed_load(base->gallivm, const_ptr, offset);
 }
 
 
@@ -457,6 +491,9 @@
 	struct lp_build_tgsi_context * bld_base,
 	struct lp_build_emit_data * emit_data)
 {
+	LLVMValueRef ptr;
+	LLVMValueRef offset;
+
 	/* WriteMask */
 	emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm,
 				emit_data->inst->Dst[0].Register.WriteMask);
@@ -467,14 +504,18 @@
 							0, LP_CHAN_ALL);
 
 	/* Resource */
-	emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2);
-	emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm,
-						  8 * emit_data->inst->Src[1].Register.Index);
+	ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V8I32, 2);
+	offset = lp_build_const_int32(bld_base->base.gallivm,
+				  8 * emit_data->inst->Src[1].Register.Index);
+	emit_data->args[2] = build_indexed_load(bld_base->base.gallivm,
+						ptr, offset);
 
 	/* Sampler */
-	emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1);
-	emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm,
-						  4 * emit_data->inst->Src[1].Register.Index);
+	ptr = use_sgpr(bld_base->base.gallivm, SGPR_CONST_PTR_V4I32, 1);
+	offset = lp_build_const_int32(bld_base->base.gallivm,
+				  4 * emit_data->inst->Src[1].Register.Index);
+	emit_data->args[3] = build_indexed_load(bld_base->base.gallivm,
+						ptr, offset);
 
 	/* Dimensions */
 	/* XXX: We might want to pass this information to the shader at some. */
@@ -482,7 +523,7 @@
 					emit_data->inst->Texture.Texture);
 */
 
-	emit_data->arg_count = 6;
+	emit_data->arg_count = 4;
 	/* XXX: To optimize, we could use a float or v2f32, if the last bits of
 	 * the writemask are clear */
 	emit_data->dst_type = LLVMVectorType(