Blame - tcg/optimize.c - fp2-dev/platform/external/qemu

blob: ef83229a58eb08706bd151eb74937a8c35e6b230 [file] [log] [blame]

David 'Digit' Turner	86b1fb0	2014-03-21 15:20:21 +0100	[diff] [blame]	1	/*
				2	* Optimizations for Tiny Code Generator for QEMU
				3	*
				4	* Copyright (c) 2010 Samsung Electronics.
				5	* Contributed by Kirill Batuzov <batuzovk@ispras.ru>
				6	*
				7	* Permission is hereby granted, free of charge, to any person obtaining a copy
				8	* of this software and associated documentation files (the "Software"), to deal
				9	* in the Software without restriction, including without limitation the rights
				10	* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
				11	* copies of the Software, and to permit persons to whom the Software is
				12	* furnished to do so, subject to the following conditions:
				13	*
				14	* The above copyright notice and this permission notice shall be included in
				15	* all copies or substantial portions of the Software.
				16	*
				17	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				18	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				19	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				20	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				21	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
				22	* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
				23	* THE SOFTWARE.
				24	*/
				25
				26	#include "config.h"
				27
				28	#include <stdlib.h>
				29	#include <stdio.h>
				30
				31	#include "qemu-common.h"
				32	#include "cpu.h"
				33	#include "exec/exec-all.h"
				34	#include "tcg-op.h"
				35
				36	#define CASE_OP_32_64(x) \
				37	glue(glue(case INDEX_op_, x), _i32): \
				38	glue(glue(case INDEX_op_, x), _i64)
				39
				40	typedef enum {
				41	TCG_TEMP_UNDEF = 0,
				42	TCG_TEMP_CONST,
				43	TCG_TEMP_COPY,
				44	} tcg_temp_state;
				45
				46	struct tcg_temp_info {
				47	tcg_temp_state state;
				48	uint16_t prev_copy;
				49	uint16_t next_copy;
				50	tcg_target_ulong val;
				51	tcg_target_ulong mask;
				52	};
				53
				54	static struct tcg_temp_info temps[TCG_MAX_TEMPS];
				55
				56	/* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove
				57	the copy flag from the left temp. */
				58	static void reset_temp(TCGArg temp)
				59	{
				60	if (temps[temp].state == TCG_TEMP_COPY) {
				61	if (temps[temp].prev_copy == temps[temp].next_copy) {
				62	temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
				63	} else {
				64	temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
				65	temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
				66	}
				67	}
				68	temps[temp].state = TCG_TEMP_UNDEF;
				69	temps[temp].mask = -1;
				70	}
				71
				72	/* Reset all temporaries, given that there are NB_TEMPS of them. */
				73	static void reset_all_temps(int nb_temps)
				74	{
				75	int i;
				76	for (i = 0; i < nb_temps; i++) {
				77	temps[i].state = TCG_TEMP_UNDEF;
				78	temps[i].mask = -1;
				79	}
				80	}
				81
				82	static int op_bits(TCGOpcode op)
				83	{
				84	const TCGOpDef *def = &tcg_op_defs[op];
				85	return def->flags & TCG_OPF_64BIT ? 64 : 32;
				86	}
				87
				88	static TCGOpcode op_to_movi(TCGOpcode op)
				89	{
				90	switch (op_bits(op)) {
				91	case 32:
				92	return INDEX_op_movi_i32;
				93	case 64:
				94	return INDEX_op_movi_i64;
				95	default:
				96	fprintf(stderr, "op_to_movi: unexpected return value of "
				97	"function op_bits.\n");
				98	tcg_abort();
				99	}
				100	}
				101
				102	static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
				103	{
				104	TCGArg i;
				105
				106	/* If this is already a global, we can't do better. */
				107	if (temp < s->nb_globals) {
				108	return temp;
				109	}
				110
				111	/* Search for a global first. */
				112	for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
				113	if (i < s->nb_globals) {
				114	return i;
				115	}
				116	}
				117
				118	/* If it is a temp, search for a temp local. */
				119	if (!s->temps[temp].temp_local) {
				120	for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
				121	if (s->temps[i].temp_local) {
				122	return i;
				123	}
				124	}
				125	}
				126
				127	/* Failure to find a better representation, return the same temp. */
				128	return temp;
				129	}
				130
				131	static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
				132	{
				133	TCGArg i;
				134
				135	if (arg1 == arg2) {
				136	return true;
				137	}
				138
				139	if (temps[arg1].state != TCG_TEMP_COPY
				140	\|\| temps[arg2].state != TCG_TEMP_COPY) {
				141	return false;
				142	}
				143
				144	for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
				145	if (i == arg2) {
				146	return true;
				147	}
				148	}
				149
				150	return false;
				151	}
				152
				153	static void tcg_opt_gen_mov(TCGContext s, TCGArg gen_args,
				154	TCGArg dst, TCGArg src)
				155	{
				156	reset_temp(dst);
				157	temps[dst].mask = temps[src].mask;
				158	assert(temps[src].state != TCG_TEMP_CONST);
				159
				160	if (s->temps[src].type == s->temps[dst].type) {
				161	if (temps[src].state != TCG_TEMP_COPY) {
				162	temps[src].state = TCG_TEMP_COPY;
				163	temps[src].next_copy = src;
				164	temps[src].prev_copy = src;
				165	}
				166	temps[dst].state = TCG_TEMP_COPY;
				167	temps[dst].next_copy = temps[src].next_copy;
				168	temps[dst].prev_copy = src;
				169	temps[temps[dst].next_copy].prev_copy = dst;
				170	temps[src].next_copy = dst;
				171	}
				172
				173	gen_args[0] = dst;
				174	gen_args[1] = src;
				175	}
				176
				177	static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
				178	{
				179	reset_temp(dst);
				180	temps[dst].state = TCG_TEMP_CONST;
				181	temps[dst].val = val;
				182	temps[dst].mask = val;
				183	gen_args[0] = dst;
				184	gen_args[1] = val;
				185	}
				186
				187	static TCGOpcode op_to_mov(TCGOpcode op)
				188	{
				189	switch (op_bits(op)) {
				190	case 32:
				191	return INDEX_op_mov_i32;
				192	case 64:
				193	return INDEX_op_mov_i64;
				194	default:
				195	fprintf(stderr, "op_to_mov: unexpected return value of "
				196	"function op_bits.\n");
				197	tcg_abort();
				198	}
				199	}
				200
				201	static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
				202	{
				203	uint64_t l64, h64;
				204
				205	switch (op) {
				206	CASE_OP_32_64(add):
				207	return x + y;
				208
				209	CASE_OP_32_64(sub):
				210	return x - y;
				211
				212	CASE_OP_32_64(mul):
				213	return x * y;
				214
				215	CASE_OP_32_64(and):
				216	return x & y;
				217
				218	CASE_OP_32_64(or):
				219	return x \| y;
				220
				221	CASE_OP_32_64(xor):
				222	return x ^ y;
				223
				224	case INDEX_op_shl_i32:
				225	return (uint32_t)x << (uint32_t)y;
				226
				227	case INDEX_op_shl_i64:
				228	return (uint64_t)x << (uint64_t)y;
				229
				230	case INDEX_op_shr_i32:
				231	return (uint32_t)x >> (uint32_t)y;
				232
				233	case INDEX_op_shr_i64:
				234	return (uint64_t)x >> (uint64_t)y;
				235
				236	case INDEX_op_sar_i32:
				237	return (int32_t)x >> (int32_t)y;
				238
				239	case INDEX_op_sar_i64:
				240	return (int64_t)x >> (int64_t)y;
				241
				242	case INDEX_op_rotr_i32:
				243	return ror32(x, y);
				244
				245	case INDEX_op_rotr_i64:
				246	return ror64(x, y);
				247
				248	case INDEX_op_rotl_i32:
				249	return rol32(x, y);
				250
				251	case INDEX_op_rotl_i64:
				252	return rol64(x, y);
				253
				254	CASE_OP_32_64(not):
				255	return ~x;
				256
				257	CASE_OP_32_64(neg):
				258	return -x;
				259
				260	CASE_OP_32_64(andc):
				261	return x & ~y;
				262
				263	CASE_OP_32_64(orc):
				264	return x \| ~y;
				265
				266	CASE_OP_32_64(eqv):
				267	return ~(x ^ y);
				268
				269	CASE_OP_32_64(nand):
				270	return ~(x & y);
				271
				272	CASE_OP_32_64(nor):
				273	return ~(x \| y);
				274
				275	CASE_OP_32_64(ext8s):
				276	return (int8_t)x;
				277
				278	CASE_OP_32_64(ext16s):
				279	return (int16_t)x;
				280
				281	CASE_OP_32_64(ext8u):
				282	return (uint8_t)x;
				283
				284	CASE_OP_32_64(ext16u):
				285	return (uint16_t)x;
				286
				287	case INDEX_op_ext32s_i64:
				288	return (int32_t)x;
				289
				290	case INDEX_op_ext32u_i64:
				291	return (uint32_t)x;
				292
				293	case INDEX_op_muluh_i32:
				294	return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
				295	case INDEX_op_mulsh_i32:
				296	return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
				297
				298	case INDEX_op_muluh_i64:
				299	mulu64(&l64, &h64, x, y);
				300	return h64;
				301	case INDEX_op_mulsh_i64:
				302	muls64(&l64, &h64, x, y);
				303	return h64;
				304
				305	case INDEX_op_div_i32:
				306	/* Avoid crashing on divide by zero, otherwise undefined. */
				307	return (int32_t)x / ((int32_t)y ? : 1);
				308	case INDEX_op_divu_i32:
				309	return (uint32_t)x / ((uint32_t)y ? : 1);
				310	case INDEX_op_div_i64:
				311	return (int64_t)x / ((int64_t)y ? : 1);
				312	case INDEX_op_divu_i64:
				313	return (uint64_t)x / ((uint64_t)y ? : 1);
				314
				315	case INDEX_op_rem_i32:
				316	return (int32_t)x % ((int32_t)y ? : 1);
				317	case INDEX_op_remu_i32:
				318	return (uint32_t)x % ((uint32_t)y ? : 1);
				319	case INDEX_op_rem_i64:
				320	return (int64_t)x % ((int64_t)y ? : 1);
				321	case INDEX_op_remu_i64:
				322	return (uint64_t)x % ((uint64_t)y ? : 1);
				323
				324	default:
				325	fprintf(stderr,
				326	"Unrecognized operation %d in do_constant_folding.\n", op);
				327	tcg_abort();
				328	}
				329	}
				330
				331	static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
				332	{
				333	TCGArg res = do_constant_folding_2(op, x, y);
				334	if (op_bits(op) == 32) {
				335	res &= 0xffffffff;
				336	}
				337	return res;
				338	}
				339
				340	static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
				341	{
				342	switch (c) {
				343	case TCG_COND_EQ:
				344	return x == y;
				345	case TCG_COND_NE:
				346	return x != y;
				347	case TCG_COND_LT:
				348	return (int32_t)x < (int32_t)y;
				349	case TCG_COND_GE:
				350	return (int32_t)x >= (int32_t)y;
				351	case TCG_COND_LE:
				352	return (int32_t)x <= (int32_t)y;
				353	case TCG_COND_GT:
				354	return (int32_t)x > (int32_t)y;
				355	case TCG_COND_LTU:
				356	return x < y;
				357	case TCG_COND_GEU:
				358	return x >= y;
				359	case TCG_COND_LEU:
				360	return x <= y;
				361	case TCG_COND_GTU:
				362	return x > y;
				363	default:
				364	tcg_abort();
				365	}
				366	}
				367
				368	static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
				369	{
				370	switch (c) {
				371	case TCG_COND_EQ:
				372	return x == y;
				373	case TCG_COND_NE:
				374	return x != y;
				375	case TCG_COND_LT:
				376	return (int64_t)x < (int64_t)y;
				377	case TCG_COND_GE:
				378	return (int64_t)x >= (int64_t)y;
				379	case TCG_COND_LE:
				380	return (int64_t)x <= (int64_t)y;
				381	case TCG_COND_GT:
				382	return (int64_t)x > (int64_t)y;
				383	case TCG_COND_LTU:
				384	return x < y;
				385	case TCG_COND_GEU:
				386	return x >= y;
				387	case TCG_COND_LEU:
				388	return x <= y;
				389	case TCG_COND_GTU:
				390	return x > y;
				391	default:
				392	tcg_abort();
				393	}
				394	}
				395
				396	static bool do_constant_folding_cond_eq(TCGCond c)
				397	{
				398	switch (c) {
				399	case TCG_COND_GT:
				400	case TCG_COND_LTU:
				401	case TCG_COND_LT:
				402	case TCG_COND_GTU:
				403	case TCG_COND_NE:
				404	return 0;
				405	case TCG_COND_GE:
				406	case TCG_COND_GEU:
				407	case TCG_COND_LE:
				408	case TCG_COND_LEU:
				409	case TCG_COND_EQ:
				410	return 1;
				411	default:
				412	tcg_abort();
				413	}
				414	}
				415
				416	/* Return 2 if the condition can't be simplified, and the result
				417	of the condition (0 or 1) if it can */
				418	static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
				419	TCGArg y, TCGCond c)
				420	{
				421	if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
				422	switch (op_bits(op)) {
				423	case 32:
				424	return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
				425	case 64:
				426	return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
				427	default:
				428	tcg_abort();
				429	}
				430	} else if (temps_are_copies(x, y)) {
				431	return do_constant_folding_cond_eq(c);
				432	} else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
				433	switch (c) {
				434	case TCG_COND_LTU:
				435	return 0;
				436	case TCG_COND_GEU:
				437	return 1;
				438	default:
				439	return 2;
				440	}
				441	} else {
				442	return 2;
				443	}
				444	}
				445
				446	/* Return 2 if the condition can't be simplified, and the result
				447	of the condition (0 or 1) if it can */
				448	static TCGArg do_constant_folding_cond2(TCGArg p1, TCGArg p2, TCGCond c)
				449	{
				450	TCGArg al = p1[0], ah = p1[1];
				451	TCGArg bl = p2[0], bh = p2[1];
				452
				453	if (temps[bl].state == TCG_TEMP_CONST
				454	&& temps[bh].state == TCG_TEMP_CONST) {
				455	uint64_t b = ((uint64_t)temps[bh].val << 32) \| (uint32_t)temps[bl].val;
				456
				457	if (temps[al].state == TCG_TEMP_CONST
				458	&& temps[ah].state == TCG_TEMP_CONST) {
				459	uint64_t a;
				460	a = ((uint64_t)temps[ah].val << 32) \| (uint32_t)temps[al].val;
				461	return do_constant_folding_cond_64(a, b, c);
				462	}
				463	if (b == 0) {
				464	switch (c) {
				465	case TCG_COND_LTU:
				466	return 0;
				467	case TCG_COND_GEU:
				468	return 1;
				469	default:
				470	break;
				471	}
				472	}
				473	}
				474	if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
				475	return do_constant_folding_cond_eq(c);
				476	}
				477	return 2;
				478	}
				479
				480	static bool swap_commutative(TCGArg dest, TCGArg p1, TCGArg p2)
				481	{
				482	TCGArg a1 = p1, a2 = p2;
				483	int sum = 0;
				484	sum += temps[a1].state == TCG_TEMP_CONST;
				485	sum -= temps[a2].state == TCG_TEMP_CONST;
				486
				487	/* Prefer the constant in second argument, and then the form
				488	op a, a, b, which is better handled on non-RISC hosts. */
				489	if (sum > 0 \|\| (sum == 0 && dest == a2)) {
				490	*p1 = a2;
				491	*p2 = a1;
				492	return true;
				493	}
				494	return false;
				495	}
				496
				497	static bool swap_commutative2(TCGArg p1, TCGArg p2)
				498	{
				499	int sum = 0;
				500	sum += temps[p1[0]].state == TCG_TEMP_CONST;
				501	sum += temps[p1[1]].state == TCG_TEMP_CONST;
				502	sum -= temps[p2[0]].state == TCG_TEMP_CONST;
				503	sum -= temps[p2[1]].state == TCG_TEMP_CONST;
				504	if (sum > 0) {
				505	TCGArg t;
				506	t = p1[0], p1[0] = p2[0], p2[0] = t;
				507	t = p1[1], p1[1] = p2[1], p2[1] = t;
				508	return true;
				509	}
				510	return false;
				511	}
				512
				513	/* Propagate constants and copies, fold constant expressions. */
				514	static TCGArg tcg_constant_folding(TCGContext s, uint16_t *tcg_opc_ptr,
				515	TCGArg args, TCGOpDef tcg_op_defs)
				516	{
				517	int i, nb_ops, op_index, nb_temps, nb_globals, nb_call_args;
				518	tcg_target_ulong mask, affected;
				519	TCGOpcode op;
				520	const TCGOpDef *def;
				521	TCGArg *gen_args;
				522	TCGArg tmp;
				523
				524	/* Array VALS has an element for each temp.
				525	If this temp holds a constant then its value is kept in VALS' element.
				526	If this temp is a copy of other ones then the other copies are
				527	available through the doubly linked circular list. */
				528
				529	nb_temps = s->nb_temps;
				530	nb_globals = s->nb_globals;
				531	reset_all_temps(nb_temps);
				532
				533	nb_ops = tcg_opc_ptr - s->gen_opc_buf;
				534	gen_args = args;
				535	for (op_index = 0; op_index < nb_ops; op_index++) {
				536	op = s->gen_opc_buf[op_index];
				537	def = &tcg_op_defs[op];
				538	/* Do copy propagation */
				539	if (op == INDEX_op_call) {
				540	int nb_oargs = args[0] >> 16;
				541	int nb_iargs = args[0] & 0xffff;
				542	for (i = nb_oargs + 1; i < nb_oargs + nb_iargs + 1; i++) {
				543	if (temps[args[i]].state == TCG_TEMP_COPY) {
				544	args[i] = find_better_copy(s, args[i]);
				545	}
				546	}
				547	} else {
				548	for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
				549	if (temps[args[i]].state == TCG_TEMP_COPY) {
				550	args[i] = find_better_copy(s, args[i]);
				551	}
				552	}
				553	}
				554
				555	/* For commutative operations make constant second argument */
				556	switch (op) {
				557	CASE_OP_32_64(add):
				558	CASE_OP_32_64(mul):
				559	CASE_OP_32_64(and):
				560	CASE_OP_32_64(or):
				561	CASE_OP_32_64(xor):
				562	CASE_OP_32_64(eqv):
				563	CASE_OP_32_64(nand):
				564	CASE_OP_32_64(nor):
				565	CASE_OP_32_64(muluh):
				566	CASE_OP_32_64(mulsh):
				567	swap_commutative(args[0], &args[1], &args[2]);
				568	break;
				569	CASE_OP_32_64(brcond):
				570	if (swap_commutative(-1, &args[0], &args[1])) {
				571	args[2] = tcg_swap_cond(args[2]);
				572	}
				573	break;
				574	CASE_OP_32_64(setcond):
				575	if (swap_commutative(args[0], &args[1], &args[2])) {
				576	args[3] = tcg_swap_cond(args[3]);
				577	}
				578	break;
				579	CASE_OP_32_64(movcond):
				580	if (swap_commutative(-1, &args[1], &args[2])) {
				581	args[5] = tcg_swap_cond(args[5]);
				582	}
				583	/* For movcond, we canonicalize the "false" input reg to match
				584	the destination reg so that the tcg backend can implement
				585	a "move if true" operation. */
				586	if (swap_commutative(args[0], &args[4], &args[3])) {
				587	args[5] = tcg_invert_cond(args[5]);
				588	}
				589	break;
				590	CASE_OP_32_64(add2):
				591	swap_commutative(args[0], &args[2], &args[4]);
				592	swap_commutative(args[1], &args[3], &args[5]);
				593	break;
				594	CASE_OP_32_64(mulu2):
				595	CASE_OP_32_64(muls2):
				596	swap_commutative(args[0], &args[2], &args[3]);
				597	break;
				598	case INDEX_op_brcond2_i32:
				599	if (swap_commutative2(&args[0], &args[2])) {
				600	args[4] = tcg_swap_cond(args[4]);
				601	}
				602	break;
				603	case INDEX_op_setcond2_i32:
				604	if (swap_commutative2(&args[1], &args[3])) {
				605	args[5] = tcg_swap_cond(args[5]);
				606	}
				607	break;
				608	default:
				609	break;
				610	}
				611
				612	/* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
				613	and "sub r, 0, a => neg r, a" case. */
				614	switch (op) {
				615	CASE_OP_32_64(shl):
				616	CASE_OP_32_64(shr):
				617	CASE_OP_32_64(sar):
				618	CASE_OP_32_64(rotl):
				619	CASE_OP_32_64(rotr):
				620	if (temps[args[1]].state == TCG_TEMP_CONST
				621	&& temps[args[1]].val == 0) {
				622	s->gen_opc_buf[op_index] = op_to_movi(op);
				623	tcg_opt_gen_movi(gen_args, args[0], 0);
				624	args += 3;
				625	gen_args += 2;
				626	continue;
				627	}
				628	break;
				629	CASE_OP_32_64(sub):
				630	{
				631	TCGOpcode neg_op;
				632	bool have_neg;
				633
				634	if (temps[args[2]].state == TCG_TEMP_CONST) {
				635	/* Proceed with possible constant folding. */
				636	break;
				637	}
				638	if (op == INDEX_op_sub_i32) {
				639	neg_op = INDEX_op_neg_i32;
				640	have_neg = TCG_TARGET_HAS_neg_i32;
				641	} else {
				642	neg_op = INDEX_op_neg_i64;
				643	have_neg = TCG_TARGET_HAS_neg_i64;
				644	}
				645	if (!have_neg) {
				646	break;
				647	}
				648	if (temps[args[1]].state == TCG_TEMP_CONST
				649	&& temps[args[1]].val == 0) {
				650	s->gen_opc_buf[op_index] = neg_op;
				651	reset_temp(args[0]);
				652	gen_args[0] = args[0];
				653	gen_args[1] = args[2];
				654	args += 3;
				655	gen_args += 2;
				656	continue;
				657	}
				658	}
				659	break;
				660	default:
				661	break;
				662	}
				663
				664	/* Simplify expression for "op r, a, 0 => mov r, a" cases */
				665	switch (op) {
				666	CASE_OP_32_64(add):
				667	CASE_OP_32_64(sub):
				668	CASE_OP_32_64(shl):
				669	CASE_OP_32_64(shr):
				670	CASE_OP_32_64(sar):
				671	CASE_OP_32_64(rotl):
				672	CASE_OP_32_64(rotr):
				673	CASE_OP_32_64(or):
				674	CASE_OP_32_64(xor):
				675	if (temps[args[1]].state == TCG_TEMP_CONST) {
				676	/* Proceed with possible constant folding. */
				677	break;
				678	}
				679	if (temps[args[2]].state == TCG_TEMP_CONST
				680	&& temps[args[2]].val == 0) {
				681	if (temps_are_copies(args[0], args[1])) {
				682	s->gen_opc_buf[op_index] = INDEX_op_nop;
				683	} else {
				684	s->gen_opc_buf[op_index] = op_to_mov(op);
				685	tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
				686	gen_args += 2;
				687	}
				688	args += 3;
				689	continue;
				690	}
				691	break;
				692	default:
				693	break;
				694	}
				695
				696	/* Simplify using known-zero bits */
				697	mask = -1;
				698	affected = -1;
				699	switch (op) {
				700	CASE_OP_32_64(ext8s):
				701	if ((temps[args[1]].mask & 0x80) != 0) {
				702	break;
				703	}
				704	CASE_OP_32_64(ext8u):
				705	mask = 0xff;
				706	goto and_const;
				707	CASE_OP_32_64(ext16s):
				708	if ((temps[args[1]].mask & 0x8000) != 0) {
				709	break;
				710	}
				711	CASE_OP_32_64(ext16u):
				712	mask = 0xffff;
				713	goto and_const;
				714	case INDEX_op_ext32s_i64:
				715	if ((temps[args[1]].mask & 0x80000000) != 0) {
				716	break;
				717	}
				718	case INDEX_op_ext32u_i64:
				719	mask = 0xffffffffU;
				720	goto and_const;
				721
				722	CASE_OP_32_64(and):
				723	mask = temps[args[2]].mask;
				724	if (temps[args[2]].state == TCG_TEMP_CONST) {
				725	and_const:
				726	affected = temps[args[1]].mask & ~mask;
				727	}
				728	mask = temps[args[1]].mask & mask;
				729	break;
				730
				731	CASE_OP_32_64(sar):
				732	if (temps[args[2]].state == TCG_TEMP_CONST) {
				733	mask = ((tcg_target_long)temps[args[1]].mask
				734	>> temps[args[2]].val);
				735	}
				736	break;
				737
				738	CASE_OP_32_64(shr):
				739	if (temps[args[2]].state == TCG_TEMP_CONST) {
				740	mask = temps[args[1]].mask >> temps[args[2]].val;
				741	}
				742	break;
				743
				744	CASE_OP_32_64(shl):
				745	if (temps[args[2]].state == TCG_TEMP_CONST) {
				746	mask = temps[args[1]].mask << temps[args[2]].val;
				747	}
				748	break;
				749
				750	CASE_OP_32_64(neg):
				751	/* Set to 1 all bits to the left of the rightmost. */
				752	mask = -(temps[args[1]].mask & -temps[args[1]].mask);
				753	break;
				754
				755	CASE_OP_32_64(deposit):
				756	tmp = ((1ull << args[4]) - 1);
				757	mask = ((temps[args[1]].mask & ~(tmp << args[3]))
				758	\| ((temps[args[2]].mask & tmp) << args[3]));
				759	break;
				760
				761	CASE_OP_32_64(or):
				762	CASE_OP_32_64(xor):
				763	mask = temps[args[1]].mask \| temps[args[2]].mask;
				764	break;
				765
				766	CASE_OP_32_64(setcond):
				767	mask = 1;
				768	break;
				769
				770	CASE_OP_32_64(movcond):
				771	mask = temps[args[3]].mask \| temps[args[4]].mask;
				772	break;
				773
				774	default:
				775	break;
				776	}
				777
				778	if (mask == 0) {
				779	assert(def->nb_oargs == 1);
				780	s->gen_opc_buf[op_index] = op_to_movi(op);
				781	tcg_opt_gen_movi(gen_args, args[0], 0);
				782	args += def->nb_oargs + def->nb_iargs + def->nb_cargs;
				783	gen_args += 2;
				784	continue;
				785	}
				786	if (affected == 0) {
				787	assert(def->nb_oargs == 1);
				788	if (temps_are_copies(args[0], args[1])) {
				789	s->gen_opc_buf[op_index] = INDEX_op_nop;
				790	} else if (temps[args[1]].state != TCG_TEMP_CONST) {
				791	s->gen_opc_buf[op_index] = op_to_mov(op);
				792	tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
				793	gen_args += 2;
				794	} else {
				795	s->gen_opc_buf[op_index] = op_to_movi(op);
				796	tcg_opt_gen_movi(gen_args, args[0], temps[args[1]].val);
				797	gen_args += 2;
				798	}
				799	args += def->nb_iargs + 1;
				800	continue;
				801	}
				802
				803	/* Simplify expression for "op r, a, 0 => movi r, 0" cases */
				804	switch (op) {
				805	CASE_OP_32_64(and):
				806	CASE_OP_32_64(mul):
				807	CASE_OP_32_64(muluh):
				808	CASE_OP_32_64(mulsh):
				809	if ((temps[args[2]].state == TCG_TEMP_CONST
				810	&& temps[args[2]].val == 0)) {
				811	s->gen_opc_buf[op_index] = op_to_movi(op);
				812	tcg_opt_gen_movi(gen_args, args[0], 0);
				813	args += 3;
				814	gen_args += 2;
				815	continue;
				816	}
				817	break;
				818	default:
				819	break;
				820	}
				821
				822	/* Simplify expression for "op r, a, a => mov r, a" cases */
				823	switch (op) {
				824	CASE_OP_32_64(or):
				825	CASE_OP_32_64(and):
				826	if (temps_are_copies(args[1], args[2])) {
				827	if (temps_are_copies(args[0], args[1])) {
				828	s->gen_opc_buf[op_index] = INDEX_op_nop;
				829	} else {
				830	s->gen_opc_buf[op_index] = op_to_mov(op);
				831	tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
				832	gen_args += 2;
				833	}
				834	args += 3;
				835	continue;
				836	}
				837	break;
				838	default:
				839	break;
				840	}
				841
				842	/* Simplify expression for "op r, a, a => movi r, 0" cases */
				843	switch (op) {
				844	CASE_OP_32_64(sub):
				845	CASE_OP_32_64(xor):
				846	if (temps_are_copies(args[1], args[2])) {
				847	s->gen_opc_buf[op_index] = op_to_movi(op);
				848	tcg_opt_gen_movi(gen_args, args[0], 0);
				849	gen_args += 2;
				850	args += 3;
				851	continue;
				852	}
				853	break;
				854	default:
				855	break;
				856	}
				857
				858	/* Propagate constants through copy operations and do constant
				859	folding. Constants will be substituted to arguments by register
				860	allocator where needed and possible. Also detect copies. */
				861	switch (op) {
				862	CASE_OP_32_64(mov):
				863	if (temps_are_copies(args[0], args[1])) {
				864	args += 2;
				865	s->gen_opc_buf[op_index] = INDEX_op_nop;
				866	break;
				867	}
				868	if (temps[args[1]].state != TCG_TEMP_CONST) {
				869	tcg_opt_gen_mov(s, gen_args, args[0], args[1]);
				870	gen_args += 2;
				871	args += 2;
				872	break;
				873	}
				874	/* Source argument is constant. Rewrite the operation and
				875	let movi case handle it. */
				876	op = op_to_movi(op);
				877	s->gen_opc_buf[op_index] = op;
				878	args[1] = temps[args[1]].val;
				879	/* fallthrough */
				880	CASE_OP_32_64(movi):
				881	tcg_opt_gen_movi(gen_args, args[0], args[1]);
				882	gen_args += 2;
				883	args += 2;
				884	break;
				885
				886	CASE_OP_32_64(not):
				887	CASE_OP_32_64(neg):
				888	CASE_OP_32_64(ext8s):
				889	CASE_OP_32_64(ext8u):
				890	CASE_OP_32_64(ext16s):
				891	CASE_OP_32_64(ext16u):
				892	case INDEX_op_ext32s_i64:
				893	case INDEX_op_ext32u_i64:
				894	if (temps[args[1]].state == TCG_TEMP_CONST) {
				895	s->gen_opc_buf[op_index] = op_to_movi(op);
				896	tmp = do_constant_folding(op, temps[args[1]].val, 0);
				897	tcg_opt_gen_movi(gen_args, args[0], tmp);
				898	gen_args += 2;
				899	args += 2;
				900	break;
				901	}
				902	goto do_default;
				903
				904	CASE_OP_32_64(add):
				905	CASE_OP_32_64(sub):
				906	CASE_OP_32_64(mul):
				907	CASE_OP_32_64(or):
				908	CASE_OP_32_64(and):
				909	CASE_OP_32_64(xor):
				910	CASE_OP_32_64(shl):
				911	CASE_OP_32_64(shr):
				912	CASE_OP_32_64(sar):
				913	CASE_OP_32_64(rotl):
				914	CASE_OP_32_64(rotr):
				915	CASE_OP_32_64(andc):
				916	CASE_OP_32_64(orc):
				917	CASE_OP_32_64(eqv):
				918	CASE_OP_32_64(nand):
				919	CASE_OP_32_64(nor):
				920	CASE_OP_32_64(muluh):
				921	CASE_OP_32_64(mulsh):
				922	CASE_OP_32_64(div):
				923	CASE_OP_32_64(divu):
				924	CASE_OP_32_64(rem):
				925	CASE_OP_32_64(remu):
				926	if (temps[args[1]].state == TCG_TEMP_CONST
				927	&& temps[args[2]].state == TCG_TEMP_CONST) {
				928	s->gen_opc_buf[op_index] = op_to_movi(op);
				929	tmp = do_constant_folding(op, temps[args[1]].val,
				930	temps[args[2]].val);
				931	tcg_opt_gen_movi(gen_args, args[0], tmp);
				932	gen_args += 2;
				933	args += 3;
				934	break;
				935	}
				936	goto do_default;
				937
				938	CASE_OP_32_64(deposit):
				939	if (temps[args[1]].state == TCG_TEMP_CONST
				940	&& temps[args[2]].state == TCG_TEMP_CONST) {
				941	s->gen_opc_buf[op_index] = op_to_movi(op);
				942	tmp = ((1ull << args[4]) - 1);
				943	tmp = (temps[args[1]].val & ~(tmp << args[3]))
				944	\| ((temps[args[2]].val & tmp) << args[3]);
				945	tcg_opt_gen_movi(gen_args, args[0], tmp);
				946	gen_args += 2;
				947	args += 5;
				948	break;
				949	}
				950	goto do_default;
				951
				952	CASE_OP_32_64(setcond):
				953	tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
				954	if (tmp != 2) {
				955	s->gen_opc_buf[op_index] = op_to_movi(op);
				956	tcg_opt_gen_movi(gen_args, args[0], tmp);
				957	gen_args += 2;
				958	args += 4;
				959	break;
				960	}
				961	goto do_default;
				962
				963	CASE_OP_32_64(brcond):
				964	tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
				965	if (tmp != 2) {
				966	if (tmp) {
				967	reset_all_temps(nb_temps);
				968	s->gen_opc_buf[op_index] = INDEX_op_br;
				969	gen_args[0] = args[3];
				970	gen_args += 1;
				971	} else {
				972	s->gen_opc_buf[op_index] = INDEX_op_nop;
				973	}
				974	args += 4;
				975	break;
				976	}
				977	goto do_default;
				978
				979	CASE_OP_32_64(movcond):
				980	tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
				981	if (tmp != 2) {
				982	if (temps_are_copies(args[0], args[4-tmp])) {
				983	s->gen_opc_buf[op_index] = INDEX_op_nop;
				984	} else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
				985	s->gen_opc_buf[op_index] = op_to_movi(op);
				986	tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
				987	gen_args += 2;
				988	} else {
				989	s->gen_opc_buf[op_index] = op_to_mov(op);
				990	tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
				991	gen_args += 2;
				992	}
				993	args += 6;
				994	break;
				995	}
				996	goto do_default;
				997
				998	case INDEX_op_add2_i32:
				999	case INDEX_op_sub2_i32:
				1000	if (temps[args[2]].state == TCG_TEMP_CONST
				1001	&& temps[args[3]].state == TCG_TEMP_CONST
				1002	&& temps[args[4]].state == TCG_TEMP_CONST
				1003	&& temps[args[5]].state == TCG_TEMP_CONST) {
				1004	uint32_t al = temps[args[2]].val;
				1005	uint32_t ah = temps[args[3]].val;
				1006	uint32_t bl = temps[args[4]].val;
				1007	uint32_t bh = temps[args[5]].val;
				1008	uint64_t a = ((uint64_t)ah << 32) \| al;
				1009	uint64_t b = ((uint64_t)bh << 32) \| bl;
				1010	TCGArg rl, rh;
				1011
				1012	if (op == INDEX_op_add2_i32) {
				1013	a += b;
				1014	} else {
				1015	a -= b;
				1016	}
				1017
				1018	/* We emit the extra nop when we emit the add2/sub2. */
				1019	assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
				1020
				1021	rl = args[0];
				1022	rh = args[1];
				1023	s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
				1024	s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
				1025	tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)a);
				1026	tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(a >> 32));
				1027	gen_args += 4;
				1028	args += 6;
				1029	break;
				1030	}
				1031	goto do_default;
				1032
				1033	case INDEX_op_mulu2_i32:
				1034	if (temps[args[2]].state == TCG_TEMP_CONST
				1035	&& temps[args[3]].state == TCG_TEMP_CONST) {
				1036	uint32_t a = temps[args[2]].val;
				1037	uint32_t b = temps[args[3]].val;
				1038	uint64_t r = (uint64_t)a * b;
				1039	TCGArg rl, rh;
				1040
				1041	/* We emit the extra nop when we emit the mulu2. */
				1042	assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
				1043
				1044	rl = args[0];
				1045	rh = args[1];
				1046	s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
				1047	s->gen_opc_buf[++op_index] = INDEX_op_movi_i32;
				1048	tcg_opt_gen_movi(&gen_args[0], rl, (uint32_t)r);
				1049	tcg_opt_gen_movi(&gen_args[2], rh, (uint32_t)(r >> 32));
				1050	gen_args += 4;
				1051	args += 4;
				1052	break;
				1053	}
				1054	goto do_default;
				1055
				1056	case INDEX_op_brcond2_i32:
				1057	tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
				1058	if (tmp != 2) {
				1059	if (tmp) {
				1060	reset_all_temps(nb_temps);
				1061	s->gen_opc_buf[op_index] = INDEX_op_br;
				1062	gen_args[0] = args[5];
				1063	gen_args += 1;
				1064	} else {
				1065	s->gen_opc_buf[op_index] = INDEX_op_nop;
				1066	}
				1067	} else if ((args[4] == TCG_COND_LT \|\| args[4] == TCG_COND_GE)
				1068	&& temps[args[2]].state == TCG_TEMP_CONST
				1069	&& temps[args[3]].state == TCG_TEMP_CONST
				1070	&& temps[args[2]].val == 0
				1071	&& temps[args[3]].val == 0) {
				1072	/* Simplify LT/GE comparisons vs zero to a single compare
				1073	vs the high word of the input. */
				1074	reset_all_temps(nb_temps);
				1075	s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
				1076	gen_args[0] = args[1];
				1077	gen_args[1] = args[3];
				1078	gen_args[2] = args[4];
				1079	gen_args[3] = args[5];
				1080	gen_args += 4;
				1081	} else {
				1082	goto do_default;
				1083	}
				1084	args += 6;
				1085	break;
				1086
				1087	case INDEX_op_setcond2_i32:
				1088	tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
				1089	if (tmp != 2) {
				1090	s->gen_opc_buf[op_index] = INDEX_op_movi_i32;
				1091	tcg_opt_gen_movi(gen_args, args[0], tmp);
				1092	gen_args += 2;
				1093	} else if ((args[5] == TCG_COND_LT \|\| args[5] == TCG_COND_GE)
				1094	&& temps[args[3]].state == TCG_TEMP_CONST
				1095	&& temps[args[4]].state == TCG_TEMP_CONST
				1096	&& temps[args[3]].val == 0
				1097	&& temps[args[4]].val == 0) {
				1098	/* Simplify LT/GE comparisons vs zero to a single compare
				1099	vs the high word of the input. */
				1100	s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
				1101	reset_temp(args[0]);
				1102	gen_args[0] = args[0];
				1103	gen_args[1] = args[2];
				1104	gen_args[2] = args[4];
				1105	gen_args[3] = args[5];
				1106	gen_args += 4;
				1107	} else {
				1108	goto do_default;
				1109	}
				1110	args += 6;
				1111	break;
				1112
				1113	case INDEX_op_call:
				1114	nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
				1115	if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS \|
				1116	TCG_CALL_NO_WRITE_GLOBALS))) {
				1117	for (i = 0; i < nb_globals; i++) {
				1118	reset_temp(i);
				1119	}
				1120	}
				1121	for (i = 0; i < (args[0] >> 16); i++) {
				1122	reset_temp(args[i + 1]);
				1123	}
				1124	i = nb_call_args + 3;
				1125	while (i) {
				1126	gen_args = args;
				1127	args++;
				1128	gen_args++;
				1129	i--;
				1130	}
				1131	break;
				1132
				1133	default:
				1134	do_default:
				1135	/* Default case: we know nothing about operation (or were unable
				1136	to compute the operation result) so no propagation is done.
				1137	We trash everything if the operation is the end of a basic
				1138	block, otherwise we only trash the output args. "mask" is
				1139	the non-zero bits mask for the first output arg. */
				1140	if (def->flags & TCG_OPF_BB_END) {
				1141	reset_all_temps(nb_temps);
				1142	} else {
				1143	for (i = 0; i < def->nb_oargs; i++) {
				1144	reset_temp(args[i]);
				1145	}
				1146	}
				1147	for (i = 0; i < def->nb_args; i++) {
				1148	gen_args[i] = args[i];
				1149	}
				1150	args += def->nb_args;
				1151	gen_args += def->nb_args;
				1152	break;
				1153	}
				1154	}
				1155
				1156	return gen_args;
				1157	}
				1158
				1159	TCGArg tcg_optimize(TCGContext s, uint16_t *tcg_opc_ptr,
				1160	TCGArg args, TCGOpDef tcg_op_defs)
				1161	{
				1162	TCGArg *res;
				1163	res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
				1164	return res;
				1165	}