Blame - src/gallium/drivers/radeonsi/radeonsi_shader.c - platform/external/mesa3d

blob: 50f2e39314fa3bb7bdae969bc05dd5d49122f79e [file] [log] [blame]

Tom Stellard	a75c616	2012-01-06 17:38:37 -0500	[diff] [blame^]	1
				2	#include "gallivm/lp_bld_tgsi_action.h"
				3	#include "gallivm/lp_bld_const.h"
				4	#include "gallivm/lp_bld_intr.h"
				5	#include "gallivm/lp_bld_tgsi.h"
				6	#include "radeon_llvm.h"
				7	#include "tgsi/tgsi_info.h"
				8	#include "tgsi/tgsi_parse.h"
				9	#include "tgsi/tgsi_scan.h"
				10	#include "tgsi/tgsi_dump.h"
				11
				12	#include "radeonsi_pipe.h"
				13	#include "radeonsi_shader.h"
				14	#include "sid.h"
				15
				16	#include <assert.h>
				17	#include <errno.h>
				18	#include <stdio.h>
				19
				20	/*
				21	static ps_remap_inputs(
				22	struct tgsi_llvm_context * tl_ctx,
				23	unsigned tgsi_index,
				24	unsigned tgsi_chan)
				25	{
				26	:
				27	}
				28
				29	struct si_input
				30	{
				31	struct list_head head;
				32	unsigned tgsi_index;
				33	unsigned tgsi_chan;
				34	unsigned order;
				35	};
				36	*/
				37
				38
				39	struct si_shader_context
				40	{
				41	struct radeon_llvm_context radeon_bld;
				42	struct r600_context *rctx;
				43	struct tgsi_parse_context parse;
				44	struct tgsi_token * tokens;
				45	struct si_pipe_shader *shader;
				46	unsigned type; /* TGSI_PROCESSOR_* specifies the type of shader. */
				47	/* unsigned num_inputs; */
				48	/* struct list_head inputs; */
				49	/* unsigned * input_mappings // From TGSI to SI hw */
				50	/* struct tgsi_shader_info info;*/
				51	};
				52
				53	static struct si_shader_context * si_shader_context(
				54	struct lp_build_tgsi_context * bld_base)
				55	{
				56	return (struct si_shader_context *)bld_base;
				57	}
				58
				59
				60	#define PERSPECTIVE_BASE 0
				61	#define LINEAR_BASE 9
				62
				63	#define SAMPLE_OFFSET 0
				64	#define CENTER_OFFSET 2
				65	#define CENTROID_OFSET 4
				66
				67	#define USE_SGPR_MAX_SUFFIX_LEN 5
				68
				69	enum sgpr_type {
				70	SGPR_I32,
				71	SGPR_I64,
				72	SGPR_PTR_V4I32,
				73	SGPR_PTR_V8I32
				74	};
				75
				76	static LLVMValueRef use_sgpr(
				77	struct gallivm_state * gallivm,
				78	enum sgpr_type type,
				79	unsigned sgpr)
				80	{
				81	LLVMValueRef sgpr_index;
				82	LLVMValueRef sgpr_value;
				83	LLVMTypeRef ret_type;
				84
				85	sgpr_index = lp_build_const_int32(gallivm, sgpr);
				86
				87	if (type == SGPR_I32) {
				88	ret_type = LLVMInt32TypeInContext(gallivm->context);
				89	return lp_build_intrinsic_unary(gallivm->builder,
				90	"llvm.SI.use.sgpr.i32",
				91	ret_type, sgpr_index);
				92	}
				93
				94	ret_type = LLVMInt64TypeInContext(gallivm->context);
				95	sgpr_value = lp_build_intrinsic_unary(gallivm->builder,
				96	"llvm.SI.use.sgpr.i64",
				97	ret_type, sgpr_index);
				98
				99	switch (type) {
				100	case SGPR_I64:
				101	return sgpr_value;
				102	case SGPR_PTR_V4I32:
				103	ret_type = LLVMInt32TypeInContext(gallivm->context);
				104	ret_type = LLVMVectorType(ret_type, 4);
				105	ret_type = LLVMPointerType(ret_type,
				106	0 /XXX: Specify address space/);
				107	return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
				108	ret_type, "");
				109	case SGPR_PTR_V8I32:
				110	ret_type = LLVMInt32TypeInContext(gallivm->context);
				111	ret_type = LLVMVectorType(ret_type, 8);
				112	ret_type = LLVMPointerType(ret_type,
				113	0 /XXX: Specify address space/);
				114	return LLVMBuildIntToPtr(gallivm->builder, sgpr_value,
				115	ret_type, "");
				116	default:
				117	assert(!"Unsupported SGPR type in use_sgpr()");
				118	return NULL;
				119	}
				120	}
				121
				122	static void declare_input_vs(
				123	struct si_shader_context * si_shader_ctx,
				124	unsigned input_index,
				125	const struct tgsi_full_declaration *decl)
				126	{
				127	LLVMValueRef t_list_ptr;
				128	LLVMValueRef t_offset;
				129	LLVMValueRef attribute_offset;
				130	LLVMValueRef buffer_index_reg;
				131	LLVMValueRef args[4];
				132	LLVMTypeRef vec4_type;
				133	LLVMValueRef input;
				134	struct lp_build_context * uint = &si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
				135	struct lp_build_context * base = &si_shader_ctx->radeon_bld.soa.bld_base.base;
				136	struct r600_context *rctx = si_shader_ctx->rctx;
				137	struct pipe_vertex_element *velem = &rctx->vertex_elements->elements[input_index];
				138	unsigned chan;
				139
				140	/* XXX: Communicate with the rest of the driver about which SGPR the T#
				141	* list pointer is going to be stored in. Hard code to SGPR[0-1] for
				142	* now */
				143	t_list_ptr = use_sgpr(base->gallivm, SGPR_I64, 0);
				144
				145	t_offset = lp_build_const_int32(base->gallivm,
				146	4 * velem->vertex_buffer_index);
				147	attribute_offset = lp_build_const_int32(base->gallivm, velem->src_offset);
				148
				149	/* Load the buffer index is always, which is always stored in VGPR0
				150	* for Vertex Shaders */
				151	buffer_index_reg = lp_build_intrinsic(base->gallivm->builder,
				152	"llvm.SI.vs.load.buffer.index", uint->elem_type, NULL, 0);
				153
				154	vec4_type = LLVMVectorType(base->elem_type, 4);
				155	args[0] = t_list_ptr;
				156	args[1] = t_offset;
				157	args[2] = attribute_offset;
				158	args[3] = buffer_index_reg;
				159	input = lp_build_intrinsic(base->gallivm->builder,
				160	"llvm.SI.vs.load.input", vec4_type, args, 4);
				161
				162	/* Break up the vec4 into individual components */
				163	for (chan = 0; chan < 4; chan++) {
				164	LLVMValueRef llvm_chan = lp_build_const_int32(base->gallivm, chan);
				165	/* XXX: Use a helper function for this. There is one in
				166	* tgsi_llvm.c. */
				167	si_shader_ctx->radeon_bld.inputs[radeon_llvm_reg_index_soa(input_index, chan)] =
				168	LLVMBuildExtractElement(base->gallivm->builder,
				169	input, llvm_chan, "");
				170	}
				171	}
				172
				173	static void declare_input_fs(
				174	struct si_shader_context * si_shader_ctx,
				175	unsigned input_index,
				176	const struct tgsi_full_declaration *decl)
				177	{
				178	const char * intr_name;
				179	unsigned chan;
				180	struct lp_build_context * base =
				181	&si_shader_ctx->radeon_bld.soa.bld_base.base;
				182	struct gallivm_state * gallivm = base->gallivm;
				183
				184	/* This value is:
				185	* [15:0] NewPrimMask (Bit mask for each quad. It is set it the
				186	* quad begins a new primitive. Bit 0 always needs
				187	* to be unset)
				188	* [32:16] ParamOffset
				189	*
				190	*/
				191	LLVMValueRef params = use_sgpr(base->gallivm, SGPR_I32, 6);
				192
				193
				194	/* XXX: Is this the input_index? */
				195	LLVMValueRef attr_number = lp_build_const_int32(gallivm, input_index);
				196
				197	/* XXX: Handle all possible interpolation modes */
				198	switch (decl->Declaration.Interpolate) {
				199	case TGSI_INTERPOLATE_COLOR:
				200	if (si_shader_ctx->rctx->rasterizer->flatshade)
				201	intr_name = "llvm.SI.fs.interp.constant";
				202	else
				203	intr_name = "llvm.SI.fs.interp.linear.center";
				204	break;
				205	case TGSI_INTERPOLATE_CONSTANT:
				206	intr_name = "llvm.SI.fs.interp.constant";
				207	break;
				208	case TGSI_INTERPOLATE_LINEAR:
				209	intr_name = "llvm.SI.fs.interp.linear.center";
				210	break;
				211	default:
				212	fprintf(stderr, "Warning: Unhandled interpolation mode.\n");
				213	return;
				214	}
				215
				216	/* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
				217	for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
				218	LLVMValueRef args[3];
				219	LLVMValueRef llvm_chan = lp_build_const_int32(gallivm, chan);
				220	unsigned soa_index = radeon_llvm_reg_index_soa(input_index, chan);
				221	LLVMTypeRef input_type = LLVMFloatTypeInContext(gallivm->context);
				222	args[0] = llvm_chan;
				223	args[1] = attr_number;
				224	args[2] = params;
				225	si_shader_ctx->radeon_bld.inputs[soa_index] =
				226	lp_build_intrinsic(gallivm->builder, intr_name,
				227	input_type, args, 3);
				228	}
				229	}
				230
				231	static void declare_input(
				232	struct radeon_llvm_context * radeon_bld,
				233	unsigned input_index,
				234	const struct tgsi_full_declaration *decl)
				235	{
				236	struct si_shader_context * si_shader_ctx =
				237	si_shader_context(&radeon_bld->soa.bld_base);
				238	if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
				239	declare_input_vs(si_shader_ctx, input_index, decl);
				240	} else if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) {
				241	declare_input_fs(si_shader_ctx, input_index, decl);
				242	} else {
				243	fprintf(stderr, "Warning: Unsupported shader type,\n");
				244	}
				245	}
				246
				247	static LLVMValueRef fetch_constant(
				248	struct lp_build_tgsi_context * bld_base,
				249	const struct tgsi_full_src_register *reg,
				250	enum tgsi_opcode_type type,
				251	unsigned swizzle)
				252	{
				253	struct lp_build_context * base = &bld_base->base;
				254
				255	LLVMValueRef const_ptr;
				256	LLVMValueRef offset;
				257
				258	/* XXX: Assume the pointer to the constant buffer is being stored in
				259	* SGPR[2:3] */
				260	const_ptr = use_sgpr(base->gallivm, SGPR_I64, 1);
				261
				262	/* XXX: This assumes that the constant buffer is not packed, so
				263	* CONST[0].x will have an offset of 0 and CONST[1].x will have an
				264	* offset of 4. */
				265	offset = lp_build_const_int32(base->gallivm,
				266	(reg->Register.Index * 4) + swizzle);
				267
				268	return lp_build_intrinsic_binary(base->gallivm->builder,
				269	"llvm.SI.load.const", base->elem_type, const_ptr, offset);
				270	}
				271
				272
				273	/* Declare some intrinsics with the correct attributes */
				274	static void si_llvm_emit_prologue(struct lp_build_tgsi_context * bld_base)
				275	{
				276	LLVMValueRef function;
				277	struct gallivm_state * gallivm = bld_base->base.gallivm;
				278
				279	LLVMTypeRef i64 = LLVMInt64TypeInContext(gallivm->context);
				280	LLVMTypeRef i32 = LLVMInt32TypeInContext(gallivm->context);
				281
				282	/* declare i32 @llvm.SI.use.sgpr.i32(i32) */
				283	function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i32",
				284	i32, &i32, 1);
				285	LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
				286
				287	/* declare i64 @llvm.SI.use.sgpr.i64(i32) */
				288	function = lp_declare_intrinsic(gallivm->module, "llvm.SI.use.sgpr.i64",
				289	i64, &i32, 1);
				290	LLVMAddFunctionAttr(function, LLVMReadNoneAttribute);
				291	}
				292
				293	/* XXX: This is partially implemented for VS only at this point. It is not complete */
				294	static void si_llvm_emit_epilogue(struct lp_build_tgsi_context * bld_base)
				295	{
				296	struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
				297	struct r600_shader * shader = &si_shader_ctx->shader->shader;
				298	struct lp_build_context * base = &bld_base->base;
				299	struct lp_build_context * uint =
				300	&si_shader_ctx->radeon_bld.soa.bld_base.uint_bld;
				301	struct tgsi_parse_context *parse = &si_shader_ctx->parse;
				302	LLVMValueRef last_args[9] = { 0 };
				303
				304	while (!tgsi_parse_end_of_tokens(parse)) {
				305	/* XXX: component_bits controls which components of the output
				306	* registers actually get exported. (e.g bit 0 means export
				307	* X component, bit 1 means export Y component, etc.) I'm
				308	* hard coding this to 0xf for now. In the future, we might
				309	* want to do something else. */
				310	unsigned component_bits = 0xf;
				311	unsigned chan;
				312	struct tgsi_full_declaration *d =
				313	&parse->FullToken.FullDeclaration;
				314	LLVMValueRef args[9];
				315	unsigned target;
				316	unsigned index;
				317	unsigned color_count = 0;
				318	unsigned param_count = 0;
				319	int i;
				320
				321	tgsi_parse_token(parse);
				322	if (parse->FullToken.Token.Type != TGSI_TOKEN_TYPE_DECLARATION)
				323	continue;
				324
				325	switch (d->Declaration.File) {
				326	case TGSI_FILE_INPUT:
				327	i = shader->ninput++;
				328	shader->input[i].name = d->Semantic.Name;
				329	shader->input[i].sid = d->Semantic.Index;
				330	shader->input[i].interpolate = d->Declaration.Interpolate;
				331	shader->input[i].centroid = d->Declaration.Centroid;
				332	break;
				333	case TGSI_FILE_OUTPUT:
				334	i = shader->noutput++;
				335	shader->output[i].name = d->Semantic.Name;
				336	shader->output[i].sid = d->Semantic.Index;
				337	shader->output[i].interpolate = d->Declaration.Interpolate;
				338	break;
				339	}
				340
				341	if (d->Declaration.File != TGSI_FILE_OUTPUT)
				342	continue;
				343
				344	for (index = d->Range.First; index <= d->Range.Last; index++) {
				345	for (chan = 0; chan < 4; chan++ ) {
				346	LLVMValueRef out_ptr =
				347	si_shader_ctx->radeon_bld.soa.outputs
				348	[index][chan];
				349	/* +5 because the first output value will be
				350	* the 6th argument to the intrinsic. */
				351	args[chan + 5]= LLVMBuildLoad(
				352	base->gallivm->builder, out_ptr, "");
				353	}
				354
				355	/* XXX: We probably need to keep track of the output
				356	* values, so we know what we are passing to the next
				357	* stage. */
				358
				359	/* Select the correct target */
				360	switch(d->Semantic.Name) {
				361	case TGSI_SEMANTIC_POSITION:
				362	target = V_008DFC_SQ_EXP_POS;
				363	break;
				364	case TGSI_SEMANTIC_COLOR:
				365	if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX) {
				366	target = V_008DFC_SQ_EXP_PARAM + param_count;
				367	param_count++;
				368	} else {
				369	target = V_008DFC_SQ_EXP_MRT + color_count;
				370	color_count++;
				371	}
				372	break;
				373	case TGSI_SEMANTIC_GENERIC:
				374	target = V_008DFC_SQ_EXP_PARAM + param_count;
				375	param_count++;
				376	break;
				377	default:
				378	target = 0;
				379	fprintf(stderr,
				380	"Warning: SI unhandled output type:%d\n",
				381	d->Semantic.Name);
				382	}
				383
				384	/* Specify which components to enable */
				385	args[0] = lp_build_const_int32(base->gallivm,
				386	component_bits);
				387
				388	/* Specify whether the EXEC mask represents the valid mask */
				389	args[1] = lp_build_const_int32(base->gallivm, 0);
				390
				391	/* Specify whether this is the last export */
				392	args[2] = lp_build_const_int32(base->gallivm, 0);
				393
				394	/* Specify the target we are exporting */
				395	args[3] = lp_build_const_int32(base->gallivm, target);
				396
				397	/* Set COMPR flag to zero to export data as 32-bit */
				398	args[4] = uint->zero;
				399
				400	if (si_shader_ctx->type == TGSI_PROCESSOR_VERTEX ?
				401	(d->Semantic.Name == TGSI_SEMANTIC_POSITION) :
				402	(d->Semantic.Name == TGSI_SEMANTIC_COLOR)) {
				403	if (last_args[0]) {
				404	lp_build_intrinsic(base->gallivm->builder,
				405	"llvm.SI.export",
				406	LLVMVoidTypeInContext(base->gallivm->context),
				407	last_args, 9);
				408	}
				409
				410	memcpy(last_args, args, sizeof(args));
				411	} else {
				412	lp_build_intrinsic(base->gallivm->builder,
				413	"llvm.SI.export",
				414	LLVMVoidTypeInContext(base->gallivm->context),
				415	args, 9);
				416	}
				417
				418	}
				419	}
				420
				421	/* Specify whether the EXEC mask represents the valid mask */
				422	last_args[1] = lp_build_const_int32(base->gallivm,
				423	si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT);
				424
				425	/* Specify that this is the last export */
				426	last_args[2] = lp_build_const_int32(base->gallivm, 1);
				427
				428	lp_build_intrinsic(base->gallivm->builder,
				429	"llvm.SI.export",
				430	LLVMVoidTypeInContext(base->gallivm->context),
				431	last_args, 9);
				432
				433	/* XXX: Look up what this function does */
				434	/* ctx->shader->output[i].spi_sid = r600_spi_sid(&ctx->shader->output[i]);*/
				435	}
				436
				437	static void tex_fetch_args(
				438	struct lp_build_tgsi_context * bld_base,
				439	struct lp_build_emit_data * emit_data)
				440	{
				441	/* WriteMask */
				442	emit_data->args[0] = lp_build_const_int32(bld_base->base.gallivm,
				443	emit_data->inst->Dst[0].Register.WriteMask);
				444
				445	/* Coordinates */
				446	/* XXX: Not all sample instructions need 4 address arguments. */
				447	emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
				448	0, LP_CHAN_ALL);
				449
				450	/* Resource */
				451	emit_data->args[2] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 2);
				452	emit_data->args[3] = lp_build_const_int32(bld_base->base.gallivm,
				453	32 * emit_data->inst->Src[2].Register.Index);
				454
				455	/* Sampler */
				456	emit_data->args[4] = use_sgpr(bld_base->base.gallivm, SGPR_I64, 1);
				457	emit_data->args[5] = lp_build_const_int32(bld_base->base.gallivm,
				458	16 * emit_data->inst->Src[2].Register.Index);
				459
				460	/* Dimensions */
				461	/* XXX: We might want to pass this information to the shader at some. */
				462	/* emit_data->args[4] = lp_build_const_int32(bld_base->base.gallivm,
				463	emit_data->inst->Texture.Texture);
				464	*/
				465
				466	emit_data->arg_count = 6;
				467	/* XXX: To optimize, we could use a float or v2f32, if the last bits of
				468	* the writemask are clear */
				469	emit_data->dst_type = LLVMVectorType(
				470	LLVMFloatTypeInContext(bld_base->base.gallivm->context),
				471	4);
				472	}
				473
				474	static const struct lp_build_tgsi_action tex_action = {
				475	.fetch_args = tex_fetch_args,
				476	.emit = lp_build_tgsi_intrinsic,
				477	.intr_name = "llvm.SI.sample"
				478	};
				479
				480
				481	int si_pipe_shader_create(
				482	struct pipe_context *ctx,
				483	struct si_pipe_shader *shader)
				484	{
				485	struct r600_context rctx = (struct r600_context)ctx;
				486	struct si_shader_context si_shader_ctx;
				487	struct tgsi_shader_info shader_info;
				488	struct lp_build_tgsi_context * bld_base;
				489	LLVMModuleRef mod;
				490	unsigned char * inst_bytes;
				491	unsigned inst_byte_count;
				492	unsigned i;
				493
				494	radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
				495	bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
				496
				497	tgsi_scan_shader(shader->tokens, &shader_info);
				498	bld_base->info = &shader_info;
				499	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
				500	bld_base->emit_prologue = si_llvm_emit_prologue;
				501	bld_base->emit_epilogue = si_llvm_emit_epilogue;
				502
				503	bld_base->op_actions[TGSI_OPCODE_TEX] = tex_action;
				504
				505	si_shader_ctx.radeon_bld.load_input = declare_input;
				506	si_shader_ctx.tokens = shader->tokens;
				507	tgsi_parse_init(&si_shader_ctx.parse, si_shader_ctx.tokens);
				508	si_shader_ctx.shader = shader;
				509	si_shader_ctx.type = si_shader_ctx.parse.FullHeader.Processor.Processor;
				510	si_shader_ctx.rctx = rctx;
				511
				512	shader->shader.nr_cbufs = rctx->nr_cbufs;
				513
				514	lp_build_tgsi_llvm(bld_base, shader->tokens);
				515
				516	radeon_llvm_finalize_module(&si_shader_ctx.radeon_bld);
				517
				518	mod = bld_base->base.gallivm->module;
				519	tgsi_dump(shader->tokens, 0);
				520	LLVMDumpModule(mod);
				521	radeon_llvm_compile(mod, &inst_bytes, &inst_byte_count, "SI", 1 /* dump */);
				522	fprintf(stderr, "SI CODE:\n");
				523	for (i = 0; i < inst_byte_count; i+=4 ) {
				524	fprintf(stderr, "%02x%02x%02x%02x\n", inst_bytes[i + 3],
				525	inst_bytes[i + 2], inst_bytes[i + 1],
				526	inst_bytes[i]);
				527	}
				528
				529	shader->num_sgprs = util_le32_to_cpu((uint32_t)inst_bytes);
				530	shader->num_vgprs = util_le32_to_cpu((uint32_t)(inst_bytes + 4));
				531	shader->spi_ps_input_ena = util_le32_to_cpu((uint32_t)(inst_bytes + 8));
				532
				533	tgsi_parse_free(&si_shader_ctx.parse);
				534
				535	/* copy new shader */
				536	if (shader->bo == NULL) {
				537	uint32_t *ptr;
				538
				539	shader->bo = (struct r600_resource*)
				540	pipe_buffer_create(ctx->screen, PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, inst_byte_count);
				541	if (shader->bo == NULL) {
				542	return -ENOMEM;
				543	}
				544	ptr = (uint32_t*)rctx->ws->buffer_map(shader->bo->buf, rctx->cs, PIPE_TRANSFER_WRITE);
				545	if (0 /R600_BIG_ENDIAN/) {
				546	for (i = 0; i < (inst_byte_count-12)/4; ++i) {
				547	ptr[i] = util_bswap32((uint32_t)(inst_bytes+12 + i*4));
				548	}
				549	} else {
				550	memcpy(ptr, inst_bytes + 12, inst_byte_count - 12);
				551	}
				552	rctx->ws->buffer_unmap(shader->bo->buf);
				553	}
				554
				555	free(inst_bytes);
				556
				557	return 0;
				558	}
				559
				560	void si_pipe_shader_destroy(struct pipe_context ctx, struct si_pipe_shader shader)
				561	{
				562	pipe_resource_reference((struct pipe_resource**)&shader->bo, NULL);
				563
				564	memset(&shader->shader,0,sizeof(struct r600_shader));
				565	}