Blame - llvm/lib/Target/AArch64/AArch64ExpandImm.cpp - toolchain/llvm-project

blob: c764af80eb86081d8a4c97a68398f334a2c21059 [file] [log] [blame]

Adhemerval Zanella	8a595b1	2019-03-18 18:23:23 +0000	[diff] [blame]	1	//===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8	//
				9	// This file implements the AArch64ExpandImm stuff.
				10	//
				11	//===----------------------------------------------------------------------===//
				12
				13	#include "AArch64.h"
				14	#include "AArch64ExpandImm.h"
				15	#include "MCTargetDesc/AArch64AddressingModes.h"
				16
				17	namespace llvm {
				18
				19	namespace AArch64_IMM {
				20
				21	/// Helper function which extracts the specified 16-bit chunk from a
				22	/// 64-bit value.
				23	static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
				24	assert(ChunkIdx < 4 && "Out of range chunk index specified!");
				25
				26	return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
				27	}
				28
				29	/// Check whether the given 16-bit chunk replicated to full 64-bit width
				30	/// can be materialized with an ORR instruction.
				31	static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
				32	Chunk = (Chunk << 48) \| (Chunk << 32) \| (Chunk << 16) \| Chunk;
				33
				34	return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
				35	}
				36
				37	/// Check for identical 16-bit chunks within the constant and if so
				38	/// materialize them with a single ORR instruction. The remaining one or two
				39	/// 16-bit chunks will be materialized with MOVK instructions.
				40	///
				41	/// This allows us to materialize constants like \|A\|B\|A\|A\| or \|A\|B\|C\|A\| (order
				42	/// of the chunks doesn't matter), assuming \|A\|A\|A\|A\| can be materialized with
				43	/// an ORR instruction.
				44	static bool tryToreplicateChunks(uint64_t UImm,
				45	SmallVectorImpl<ImmInsnModel> &Insn) {
				46	using CountMap = DenseMap<uint64_t, unsigned>;
				47
				48	CountMap Counts;
				49
				50	// Scan the constant and count how often every chunk occurs.
				51	for (unsigned Idx = 0; Idx < 4; ++Idx)
				52	++Counts[getChunk(UImm, Idx)];
				53
				54	// Traverse the chunks to find one which occurs more than once.
				55	for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
				56	Chunk != End; ++Chunk) {
				57	const uint64_t ChunkVal = Chunk->first;
				58	const unsigned Count = Chunk->second;
				59
				60	uint64_t Encoding = 0;
				61
				62	// We are looking for chunks which have two or three instances and can be
				63	// materialized with an ORR instruction.
				64	if ((Count != 2 && Count != 3) \|\| !canUseOrr(ChunkVal, Encoding))
				65	continue;
				66
				67	const bool CountThree = Count == 3;
				68
				69	Insn.push_back({ AArch64::ORRXri, 0, Encoding });
				70
				71	unsigned ShiftAmt = 0;
				72	uint64_t Imm16 = 0;
				73	// Find the first chunk not materialized with the ORR instruction.
				74	for (; ShiftAmt < 64; ShiftAmt += 16) {
				75	Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
				76
				77	if (Imm16 != ChunkVal)
				78	break;
				79	}
				80
				81	// Create the first MOVK instruction.
				82	Insn.push_back({ AArch64::MOVKXi, Imm16,
				83	AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
				84
				85	// In case we have three instances the whole constant is now materialized
				86	// and we can exit.
				87	if (CountThree)
				88	return true;
				89
				90	// Find the remaining chunk which needs to be materialized.
				91	for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
				92	Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
				93
				94	if (Imm16 != ChunkVal)
				95	break;
				96	}
				97	Insn.push_back({ AArch64::MOVKXi, Imm16,
				98	AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt) });
				99	return true;
				100	}
				101
				102	return false;
				103	}
				104
				105	/// Check whether this chunk matches the pattern '1...0...'. This pattern
				106	/// starts a contiguous sequence of ones if we look at the bits from the LSB
				107	/// towards the MSB.
				108	static bool isStartChunk(uint64_t Chunk) {
				109	if (Chunk == 0 \|\| Chunk == std::numeric_limits<uint64_t>::max())
				110	return false;
				111
				112	return isMask_64(~Chunk);
				113	}
				114
				115	/// Check whether this chunk matches the pattern '0...1...' This pattern
				116	/// ends a contiguous sequence of ones if we look at the bits from the LSB
				117	/// towards the MSB.
				118	static bool isEndChunk(uint64_t Chunk) {
				119	if (Chunk == 0 \|\| Chunk == std::numeric_limits<uint64_t>::max())
				120	return false;
				121
				122	return isMask_64(Chunk);
				123	}
				124
				125	/// Clear or set all bits in the chunk at the given index.
				126	static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
				127	const uint64_t Mask = 0xFFFF;
				128
				129	if (Clear)
				130	// Clear chunk in the immediate.
				131	Imm &= ~(Mask << (Idx * 16));
				132	else
				133	// Set all bits in the immediate for the particular chunk.
				134	Imm \|= Mask << (Idx * 16);
				135
				136	return Imm;
				137	}
				138
				139	/// Check whether the constant contains a sequence of contiguous ones,
				140	/// which might be interrupted by one or two chunks. If so, materialize the
				141	/// sequence of contiguous ones with an ORR instruction.
				142	/// Materialize the chunks which are either interrupting the sequence or outside
				143	/// of the sequence with a MOVK instruction.
				144	///
				145	/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
				146	/// which ends the sequence (0...1...). Then we are looking for constants which
				147	/// contain at least one S and E chunk.
				148	/// E.g. \|E\|A\|B\|S\|, \|A\|E\|B\|S\| or \|A\|B\|E\|S\|.
				149	///
				150	/// We are also looking for constants like \|S\|A\|B\|E\| where the contiguous
				151	/// sequence of ones wraps around the MSB into the LSB.
				152	static bool trySequenceOfOnes(uint64_t UImm,
				153	SmallVectorImpl<ImmInsnModel> &Insn) {
				154	const int NotSet = -1;
				155	const uint64_t Mask = 0xFFFF;
				156
				157	int StartIdx = NotSet;
				158	int EndIdx = NotSet;
				159	// Try to find the chunks which start/end a contiguous sequence of ones.
				160	for (int Idx = 0; Idx < 4; ++Idx) {
				161	int64_t Chunk = getChunk(UImm, Idx);
				162	// Sign extend the 16-bit chunk to 64-bit.
				163	Chunk = (Chunk << 48) >> 48;
				164
				165	if (isStartChunk(Chunk))
				166	StartIdx = Idx;
				167	else if (isEndChunk(Chunk))
				168	EndIdx = Idx;
				169	}
				170
				171	// Early exit in case we can't find a start/end chunk.
				172	if (StartIdx == NotSet \|\| EndIdx == NotSet)
				173	return false;
				174
				175	// Outside of the contiguous sequence of ones everything needs to be zero.
				176	uint64_t Outside = 0;
				177	// Chunks between the start and end chunk need to have all their bits set.
				178	uint64_t Inside = Mask;
				179
				180	// If our contiguous sequence of ones wraps around from the MSB into the LSB,
				181	// just swap indices and pretend we are materializing a contiguous sequence
				182	// of zeros surrounded by a contiguous sequence of ones.
				183	if (StartIdx > EndIdx) {
				184	std::swap(StartIdx, EndIdx);
				185	std::swap(Outside, Inside);
				186	}
				187
				188	uint64_t OrrImm = UImm;
				189	int FirstMovkIdx = NotSet;
				190	int SecondMovkIdx = NotSet;
				191
				192	// Find out which chunks we need to patch up to obtain a contiguous sequence
				193	// of ones.
				194	for (int Idx = 0; Idx < 4; ++Idx) {
				195	const uint64_t Chunk = getChunk(UImm, Idx);
				196
				197	// Check whether we are looking at a chunk which is not part of the
				198	// contiguous sequence of ones.
				199	if ((Idx < StartIdx \|\| EndIdx < Idx) && Chunk != Outside) {
				200	OrrImm = updateImm(OrrImm, Idx, Outside == 0);
				201
				202	// Remember the index we need to patch.
				203	if (FirstMovkIdx == NotSet)
				204	FirstMovkIdx = Idx;
				205	else
				206	SecondMovkIdx = Idx;
				207
				208	// Check whether we are looking a chunk which is part of the contiguous
				209	// sequence of ones.
				210	} else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
				211	OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
				212
				213	// Remember the index we need to patch.
				214	if (FirstMovkIdx == NotSet)
				215	FirstMovkIdx = Idx;
				216	else
				217	SecondMovkIdx = Idx;
				218	}
				219	}
				220	assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
				221
				222	// Create the ORR-immediate instruction.
				223	uint64_t Encoding = 0;
				224	AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
				225	Insn.push_back({ AArch64::ORRXri, 0, Encoding });
				226
				227	const bool SingleMovk = SecondMovkIdx == NotSet;
				228	Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, FirstMovkIdx),
				229	AArch64_AM::getShifterImm(AArch64_AM::LSL,
				230	FirstMovkIdx * 16) });
				231
				232	// Early exit in case we only need to emit a single MOVK instruction.
				233	if (SingleMovk)
				234	return true;
				235
				236	// Create the second MOVK instruction.
				237	Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, SecondMovkIdx),
				238	AArch64_AM::getShifterImm(AArch64_AM::LSL,
				239	SecondMovkIdx * 16) });
				240
				241	return true;
				242	}
				243
				244	/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
				245	/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
				246	static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
				247	unsigned OneChunks, unsigned ZeroChunks,
				248	SmallVectorImpl<ImmInsnModel> &Insn) {
				249	const unsigned Mask = 0xFFFF;
				250
				251	// Use a MOVZ or MOVN instruction to set the high bits, followed by one or
				252	// more MOVK instructions to insert additional 16-bit portions into the
				253	// lower bits.
				254	bool isNeg = false;
				255
				256	// Use MOVN to materialize the high bits if we have more all one chunks
				257	// than all zero chunks.
				258	if (OneChunks > ZeroChunks) {
				259	isNeg = true;
				260	Imm = ~Imm;
				261	}
				262
				263	unsigned FirstOpc;
				264	if (BitSize == 32) {
				265	Imm &= (1LL << 32) - 1;
				266	FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
				267	} else {
				268	FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
				269	}
				270	unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
				271	unsigned LastShift = 0; // LSL amount for last MOVK
				272	if (Imm != 0) {
				273	unsigned LZ = countLeadingZeros(Imm);
				274	unsigned TZ = countTrailingZeros(Imm);
				275	Shift = (TZ / 16) * 16;
				276	LastShift = ((63 - LZ) / 16) * 16;
				277	}
				278	unsigned Imm16 = (Imm >> Shift) & Mask;
				279
				280	Insn.push_back({ FirstOpc, Imm16,
				281	AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
				282
				283	if (Shift == LastShift)
				284	return;
				285
				286	// If a MOVN was used for the high bits of a negative value, flip the rest
				287	// of the bits back for use with MOVK.
				288	if (isNeg)
				289	Imm = ~Imm;
				290
				291	unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
				292	while (Shift < LastShift) {
				293	Shift += 16;
				294	Imm16 = (Imm >> Shift) & Mask;
				295	if (Imm16 == (isNeg ? Mask : 0))
				296	continue; // This 16-bit portion is already set correctly.
				297
				298	Insn.push_back({ Opc, Imm16,
				299	AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
				300	}
				301	}
				302
				303	/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
				304	/// real move-immediate instructions to synthesize the immediate.
				305	void expandMOVImm(uint64_t Imm, unsigned BitSize,
				306	SmallVectorImpl<ImmInsnModel> &Insn) {
				307	const unsigned Mask = 0xFFFF;
				308
				309	// Scan the immediate and count the number of 16-bit chunks which are either
				310	// all ones or all zeros.
				311	unsigned OneChunks = 0;
				312	unsigned ZeroChunks = 0;
				313	for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
				314	const unsigned Chunk = (Imm >> Shift) & Mask;
				315	if (Chunk == Mask)
				316	OneChunks++;
				317	else if (Chunk == 0)
				318	ZeroChunks++;
				319	}
				320
Eli Friedman	92d0d13	2019-03-25 21:25:28 +0000	[diff] [blame]	321	// Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
				322	if ((BitSize / 16) - OneChunks <= 1 \|\| (BitSize / 16) - ZeroChunks <= 1) {
				323	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
				324	return;
				325	}
Adhemerval Zanella	8a595b1	2019-03-18 18:23:23 +0000	[diff] [blame]	326
				327	// Try a single ORR.
				328	uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
				329	uint64_t Encoding;
				330	if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
				331	unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
				332	Insn.push_back({ Opc, 0, Encoding });
				333	return;
				334	}
				335
				336	// One to up three instruction sequences.
				337	//
				338	// Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
				339	// fastest sequence with fast literal generation.
				340	if (OneChunks >= (BitSize / 16) - 2 \|\| ZeroChunks >= (BitSize / 16) - 2) {
				341	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
				342	return;
				343	}
				344
				345	assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
				346	"MOVZ/MOVK pair");
				347
				348	// Try other two-instruction sequences.
				349
				350	// 64-bit ORR followed by MOVK.
				351	// We try to construct the ORR immediate in three different ways: either we
				352	// zero out the chunk which will be replaced, we fill the chunk which will
				353	// be replaced with ones, or we take the bit pattern from the other half of
				354	// the 64-bit immediate. This is comprehensive because of the way ORR
				355	// immediates are constructed.
				356	for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
				357	uint64_t ShiftedMask = (0xFFFFULL << Shift);
				358	uint64_t ZeroChunk = UImm & ~ShiftedMask;
				359	uint64_t OneChunk = UImm \| ShiftedMask;
				360	uint64_t RotatedImm = (UImm << 32) \| (UImm >> 32);
				361	uint64_t ReplicateChunk = ZeroChunk \| (RotatedImm & ShiftedMask);
				362	if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) \|\|
				363	AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) \|\|
				364	AArch64_AM::processLogicalImmediate(ReplicateChunk, BitSize,
				365	Encoding)) {
				366	// Create the ORR-immediate instruction.
				367	Insn.push_back({ AArch64::ORRXri, 0, Encoding });
				368
				369	// Create the MOVK instruction.
				370	const unsigned Imm16 = getChunk(UImm, Shift / 16);
				371	Insn.push_back({ AArch64::MOVKXi, Imm16,
				372	AArch64_AM::getShifterImm(AArch64_AM::LSL, Shift) });
				373	return;
				374	}
				375	}
				376
				377	// FIXME: Add more two-instruction sequences.
				378
				379	// Three instruction sequences.
				380	//
				381	// Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
				382	// the fastest sequence with fast literal generation. (If neither MOVK is
				383	// part of a fast literal generation pair, it could be slower than the
				384	// four-instruction sequence, but we won't worry about that for now.)
				385	if (OneChunks \|\| ZeroChunks) {
				386	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
				387	return;
				388	}
				389
				390	// Check for identical 16-bit chunks within the constant and if so materialize
				391	// them with a single ORR instruction. The remaining one or two 16-bit chunks
				392	// will be materialized with MOVK instructions.
				393	if (BitSize == 64 && tryToreplicateChunks(UImm, Insn))
				394	return;
				395
				396	// Check whether the constant contains a sequence of contiguous ones, which
				397	// might be interrupted by one or two chunks. If so, materialize the sequence
				398	// of contiguous ones with an ORR instruction. Materialize the chunks which
				399	// are either interrupting the sequence or outside of the sequence with a
				400	// MOVK instruction.
				401	if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
				402	return;
				403
				404	// We found no possible two or three instruction sequence; use the general
				405	// four-instruction sequence.
				406	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
				407	}
				408
				409	} // end namespace AArch64_AM
				410
				411	} // end namespace llvm