Blame - src/glsl/lower_instructions.cpp - platform/external/mesa3d

blob: d460ba1a97abd2d3fe00f368c2ef5eba4f0e7d55 [file] [log] [blame]

Kenneth Graunke	63684a9	2010-11-18 17:54:07 -0800	[diff] [blame^]	1	/*
				2	* Copyright © 2010 Intel Corporation
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
				21	* DEALINGS IN THE SOFTWARE.
				22	*/
				23
				24	/**
				25	* \file lower_instructions.cpp
				26	*
				27	* Many GPUs lack native instructions for certain expression operations, and
				28	* must replace them with some other expression tree. This pass lowers some
				29	* of the most common cases, allowing the lowering code to be implemented once
				30	* rather than in each driver backend.
				31	*
				32	* Currently supported transformations:
				33	* - SUB_TO_ADD_NEG
				34	* - DIV_TO_MUL_RCP
				35	* - EXP_TO_EXP2
				36	* - LOG_TO_LOG2
				37	* - MOD_TO_FRACT
				38	*
				39	* SUB_TO_ADD_NEG:
				40	* ---------------
				41	* Breaks an ir_binop_sub expression down to add(op0, neg(op1))
				42	*
				43	* This simplifies expression reassociation, and for many backends
				44	* there is no subtract operation separate from adding the negation.
				45	* For backends with native subtract operations, they will probably
				46	* want to recognize add(op0, neg(op1)) or the other way around to
				47	* produce a subtract anyway.
				48	*
				49	* DIV_TO_MUL_RCP:
				50	* ---------------
				51	* Breaks an ir_unop_div expression down to op0 * (rcp(op1)).
				52	*
				53	* Many GPUs don't have a divide instruction (945 and 965 included),
				54	* but they do have an RCP instruction to compute an approximate
				55	* reciprocal. By breaking the operation down, constant reciprocals
				56	* can get constant folded.
				57	*
				58	* EXP_TO_EXP2 and LOG_TO_LOG2:
				59	* ----------------------------
				60	* Many GPUs don't have a base e log or exponent instruction, but they
				61	* do have base 2 versions, so this pass converts exp and log to exp2
				62	* and log2 operations.
				63	*
				64	* MOD_TO_FRACT:
				65	* -------------
				66	* Breaks an ir_unop_mod expression down to (op1 * fract(op0 / op1))
				67	*
				68	* Many GPUs don't have a MOD instruction (945 and 965 included), and
				69	* if we have to break it down like this anyway, it gives an
				70	* opportunity to do things like constant fold the (1.0 / op1) easily.
				71	*/
				72
				73	#include "main/core.h" /* for M_E */
				74	#include "glsl_types.h"
				75	#include "ir.h"
				76	#include "ir_optimization.h"
				77
				78	class lower_instructions_visitor : public ir_hierarchical_visitor {
				79	public:
				80	lower_instructions_visitor(unsigned lower)
				81	: progress(false), lower(lower) { }
				82
				83	ir_visitor_status visit_leave(ir_expression *);
				84
				85	bool progress;
				86
				87	private:
				88	unsigned lower; /** Bitfield of which operations to lower */
				89
				90	void sub_to_add_neg(ir_expression *);
				91	void div_to_mul_rcp(ir_expression *);
				92	void mod_to_fract(ir_expression *);
				93	void exp_to_exp2(ir_expression *);
				94	void log_to_log2(ir_expression *);
				95	};
				96
				97	/**
				98	* Determine if a particular type of lowering should occur
				99	*/
				100	#define lowering(x) (this->lower & x)
				101
				102	bool
				103	lower_instructions(exec_list *instructions, unsigned what_to_lower)
				104	{
				105	lower_instructions_visitor v(what_to_lower);
				106
				107	visit_list_elements(&v, instructions);
				108	return v.progress;
				109	}
				110
				111	void
				112	lower_instructions_visitor::sub_to_add_neg(ir_expression *ir)
				113	{
				114	ir->operation = ir_binop_add;
				115	ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type,
				116	ir->operands[1], NULL);
				117	this->progress = true;
				118	}
				119
				120	void
				121	lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir)
				122	{
				123	if (!ir->operands[1]->type->is_integer()) {
				124	/* New expression for the 1.0 / op1 */
				125	ir_rvalue *expr;
				126	expr = new(ir) ir_expression(ir_unop_rcp,
				127	ir->operands[1]->type,
				128	ir->operands[1],
				129	NULL);
				130
				131	/* op0 / op1 -> op0 * (1.0 / op1) */
				132	ir->operation = ir_binop_mul;
				133	ir->operands[1] = expr;
				134	} else {
				135	/* Be careful with integer division -- we need to do it as a
				136	* float and re-truncate, since rcp(n > 1) of an integer would
				137	* just be 0.
				138	*/
				139	ir_rvalue op0, op1;
				140	const struct glsl_type *vec_type;
				141
				142	vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
				143	ir->operands[1]->type->vector_elements,
				144	ir->operands[1]->type->matrix_columns);
				145
				146	if (ir->operands[1]->type->base_type == GLSL_TYPE_INT)
				147	op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL);
				148	else
				149	op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL);
				150
				151	op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL);
				152
				153	vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
				154	ir->operands[0]->type->vector_elements,
				155	ir->operands[0]->type->matrix_columns);
				156
				157	if (ir->operands[0]->type->base_type == GLSL_TYPE_INT)
				158	op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL);
				159	else
				160	op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL);
				161
				162	op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1);
				163
				164	ir->operation = ir_unop_f2i;
				165	ir->operands[0] = op0;
				166	ir->operands[1] = NULL;
				167	}
				168
				169	this->progress = true;
				170	}
				171
				172	void
				173	lower_instructions_visitor::exp_to_exp2(ir_expression *ir)
				174	{
				175	ir_constant *log2_e = new(ir) ir_constant(log2f(M_E));
				176
				177	ir->operation = ir_unop_exp2;
				178	ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type,
				179	ir->operands[0], log2_e);
				180	this->progress = true;
				181	}
				182
				183	void
				184	lower_instructions_visitor::log_to_log2(ir_expression *ir)
				185	{
				186	ir->operation = ir_binop_mul;
				187	ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type,
				188	ir->operands[0], NULL);
				189	ir->operands[1] = new(ir) ir_constant(1.0f / log2f(M_E));
				190	this->progress = true;
				191	}
				192
				193	void
				194	lower_instructions_visitor::mod_to_fract(ir_expression *ir)
				195	{
				196	ir_variable *temp = new(ir) ir_variable(ir->operands[1]->type, "mod_b",
				197	ir_var_temporary);
				198	this->base_ir->insert_before(temp);
				199
				200	ir_assignment *const assign =
				201	new(ir) ir_assignment(new(ir) ir_dereference_variable(temp),
				202	ir->operands[1], NULL);
				203
				204	this->base_ir->insert_before(assign);
				205
				206	ir_expression *const div_expr =
				207	new(ir) ir_expression(ir_binop_div, ir->operands[0]->type,
				208	ir->operands[0],
				209	new(ir) ir_dereference_variable(temp));
				210
				211	/* Don't generate new IR that would need to be lowered in an additional
				212	* pass.
				213	*/
				214	if (lowering(DIV_TO_MUL_RCP))
				215	div_to_mul_rcp(div_expr);
				216
				217	ir_rvalue *expr = new(ir) ir_expression(ir_unop_fract,
				218	ir->operands[0]->type,
				219	div_expr,
				220	NULL);
				221
				222	ir->operation = ir_binop_mul;
				223	ir->operands[0] = new(ir) ir_dereference_variable(temp);
				224	ir->operands[1] = expr;
				225	this->progress = true;
				226	}
				227
				228	ir_visitor_status
				229	lower_instructions_visitor::visit_leave(ir_expression *ir)
				230	{
				231	switch (ir->operation) {
				232	case ir_binop_sub:
				233	if (lowering(SUB_TO_ADD_NEG))
				234	sub_to_add_neg(ir);
				235	break;
				236
				237	case ir_binop_div:
				238	if (lowering(DIV_TO_MUL_RCP))
				239	div_to_mul_rcp(ir);
				240	break;
				241
				242	case ir_unop_exp:
				243	if (lowering(EXP_TO_EXP2))
				244	exp_to_exp2(ir);
				245	break;
				246
				247	case ir_unop_log:
				248	if (lowering(LOG_TO_LOG2))
				249	log_to_log2(ir);
				250	break;
				251
				252	case ir_binop_mod:
				253	if (lowering(MOD_TO_FRACT))
				254	mod_to_fract(ir);
				255	break;
				256
				257	default:
				258	return visit_continue;
				259	}
				260
				261	return visit_continue;
				262	}