| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 1 | //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===// | 
|  | 2 | // | 
|  | 3 | //                     The LLVM Compiler Infrastructure | 
|  | 4 | // | 
|  | 5 | // This file is distributed under the University of Illinois Open Source | 
|  | 6 | // License. See LICENSE.TXT for details. | 
|  | 7 | // | 
|  | 8 | //===----------------------------------------------------------------------===// | 
|  | 9 | // | 
|  | 10 | /// \file | 
|  | 11 | /// \brief Implementation of the TargetInstrInfo class that is common to all | 
|  | 12 | /// AMD GPUs. | 
|  | 13 | // | 
|  | 14 | //===----------------------------------------------------------------------===// | 
|  | 15 |  | 
|  | 16 | #include "AMDGPUInstrInfo.h" | 
|  | 17 | #include "AMDGPURegisterInfo.h" | 
|  | 18 | #include "AMDGPUTargetMachine.h" | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 19 | #include "llvm/CodeGen/MachineFrameInfo.h" | 
|  | 20 | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | 21 | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | 22 |  | 
| Chandler Carruth | d174b72 | 2014-04-22 02:03:14 +0000 | [diff] [blame] | 23 | using namespace llvm; | 
|  | 24 |  | 
| Juergen Ributzka | d12ccbd | 2013-11-19 00:57:56 +0000 | [diff] [blame] | 25 | #define GET_INSTRINFO_CTOR_DTOR | 
| Christian Konig | f741fbf | 2013-02-26 17:52:42 +0000 | [diff] [blame] | 26 | #define GET_INSTRMAP_INFO | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 27 | #include "AMDGPUGenInstrInfo.inc" | 
|  | 28 |  | 
| Juergen Ributzka | d12ccbd | 2013-11-19 00:57:56 +0000 | [diff] [blame] | 29 | // Pin the vtable to this file. | 
|  | 30 | void AMDGPUInstrInfo::anchor() {} | 
|  | 31 |  | 
| Matt Arsenault | 43e92fe | 2016-06-24 06:30:11 +0000 | [diff] [blame] | 32 | AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) | 
|  | 33 | : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 34 |  | 
| Matt Arsenault | 034d666 | 2014-07-24 02:10:17 +0000 | [diff] [blame] | 35 | bool AMDGPUInstrInfo::enableClusterLoads() const { | 
|  | 36 | return true; | 
|  | 37 | } | 
|  | 38 |  | 
| Matt Arsenault | d5f4de2 | 2014-08-06 00:29:49 +0000 | [diff] [blame] | 39 | // FIXME: This behaves strangely. If, for example, you have 32 load + stores, | 
|  | 40 | // the first 16 loads will be interleaved with the stores, and the next 16 will | 
|  | 41 | // be clustered as expected. It should really split into 2 16 store batches. | 
|  | 42 | // | 
|  | 43 | // Loads are clustered until this returns false, rather than trying to schedule | 
|  | 44 | // groups of stores. This also means we have to deal with saying different | 
|  | 45 | // address space loads should be clustered, and ones which might cause bank | 
|  | 46 | // conflicts. | 
|  | 47 | // | 
|  | 48 | // This might be deprecated so it might not be worth that much effort to fix. | 
|  | 49 | bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, | 
|  | 50 | int64_t Offset0, int64_t Offset1, | 
|  | 51 | unsigned NumLoads) const { | 
|  | 52 | assert(Offset1 > Offset0 && | 
|  | 53 | "Second offset should be larger than first offset!"); | 
|  | 54 | // If we have less than 16 loads in a row, and the offsets are within 64 | 
|  | 55 | // bytes, then schedule together. | 
|  | 56 |  | 
|  | 57 | // A cacheline is 64 bytes (for global memory). | 
|  | 58 | return (NumLoads <= 16 && (Offset1 - Offset0) < 64); | 
| Tom Stellard | 75aadc2 | 2012-12-11 21:25:42 +0000 | [diff] [blame] | 59 | } | 
|  | 60 |  | 
| Tom Stellard | 682bfbc | 2013-10-10 17:11:24 +0000 | [diff] [blame] | 61 | int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { | 
|  | 62 | switch (Channels) { | 
|  | 63 | default: return Opcode; | 
|  | 64 | case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1); | 
|  | 65 | case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2); | 
|  | 66 | case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3); | 
|  | 67 | } | 
|  | 68 | } | 
| Tom Stellard | c721a23 | 2014-05-16 20:56:47 +0000 | [diff] [blame] | 69 |  | 
| Matt Arsenault | 43e92fe | 2016-06-24 06:30:11 +0000 | [diff] [blame] | 70 | // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td | 
|  | 71 | enum SIEncodingFamily { | 
|  | 72 | SI = 0, | 
|  | 73 | VI = 1 | 
|  | 74 | }; | 
|  | 75 |  | 
| Tom Stellard | c721a23 | 2014-05-16 20:56:47 +0000 | [diff] [blame] | 76 | // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any | 
| Matt Arsenault | 1f0227a | 2014-10-07 21:29:56 +0000 | [diff] [blame] | 77 | // header files, so we need to wrap it in a function that takes unsigned | 
| Tom Stellard | c721a23 | 2014-05-16 20:56:47 +0000 | [diff] [blame] | 78 | // instead. | 
|  | 79 | namespace llvm { | 
|  | 80 | namespace AMDGPU { | 
| Marek Olsak | a93603d | 2015-01-15 18:42:51 +0000 | [diff] [blame] | 81 | static int getMCOpcode(uint16_t Opcode, unsigned Gen) { | 
| Matt Arsenault | 43e92fe | 2016-06-24 06:30:11 +0000 | [diff] [blame] | 82 | return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); | 
| Tom Stellard | c721a23 | 2014-05-16 20:56:47 +0000 | [diff] [blame] | 83 | } | 
|  | 84 | } | 
|  | 85 | } | 
| Marek Olsak | a93603d | 2015-01-15 18:42:51 +0000 | [diff] [blame] | 86 |  | 
| Matt Arsenault | 43e92fe | 2016-06-24 06:30:11 +0000 | [diff] [blame] | 87 | static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) { | 
|  | 88 | switch (ST.getGeneration()) { | 
|  | 89 | case AMDGPUSubtarget::SOUTHERN_ISLANDS: | 
|  | 90 | case AMDGPUSubtarget::SEA_ISLANDS: | 
|  | 91 | return SIEncodingFamily::SI; | 
| Marek Olsak | a93603d | 2015-01-15 18:42:51 +0000 | [diff] [blame] | 92 | case AMDGPUSubtarget::VOLCANIC_ISLANDS: | 
| Matt Arsenault | 43e92fe | 2016-06-24 06:30:11 +0000 | [diff] [blame] | 93 | return SIEncodingFamily::VI; | 
|  | 94 |  | 
|  | 95 | // FIXME: This should never be called for r600 GPUs. | 
|  | 96 | case AMDGPUSubtarget::R600: | 
|  | 97 | case AMDGPUSubtarget::R700: | 
|  | 98 | case AMDGPUSubtarget::EVERGREEN: | 
|  | 99 | case AMDGPUSubtarget::NORTHERN_ISLANDS: | 
|  | 100 | return SIEncodingFamily::SI; | 
| Marek Olsak | a93603d | 2015-01-15 18:42:51 +0000 | [diff] [blame] | 101 | } | 
| Simon Pilgrim | 634dde3 | 2016-06-27 12:58:10 +0000 | [diff] [blame] | 102 |  | 
|  | 103 | llvm_unreachable("Unknown subtarget generation!"); | 
| Marek Olsak | a93603d | 2015-01-15 18:42:51 +0000 | [diff] [blame] | 104 | } | 
|  | 105 |  | 
|  | 106 | int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const { | 
| Matt Arsenault | 43e92fe | 2016-06-24 06:30:11 +0000 | [diff] [blame] | 107 | int MCOp = AMDGPU::getMCOpcode(Opcode, subtargetEncodingFamily(ST)); | 
| Marek Olsak | a93603d | 2015-01-15 18:42:51 +0000 | [diff] [blame] | 108 |  | 
|  | 109 | // -1 means that Opcode is already a native instruction. | 
|  | 110 | if (MCOp == -1) | 
|  | 111 | return Opcode; | 
|  | 112 |  | 
|  | 113 | // (uint16_t)-1 means that Opcode is a pseudo instruction that has | 
|  | 114 | // no encoding in the given subtarget generation. | 
|  | 115 | if (MCOp == (uint16_t)-1) | 
|  | 116 | return -1; | 
|  | 117 |  | 
|  | 118 | return MCOp; | 
|  | 119 | } |