AMDGPU] Assembler: better support for immediate literals in assembler.

Summary:
Prevously assembler parsed all literals as either 32-bit integers or 32-bit floating-point values. Because of this we couldn't support f64 literals.
E.g. in instruction "v_fract_f64 v[0:1], 0.5", literal 0.5 was encoded as 32-bit literal 0x3f000000, which is incorrect and will be interpreted as 3.0517578125E-5 instead of 0.5. Correct encoding is inline constant 240 (optimal) or 32-bit literal 0x3FE00000 at least.

With this change the way immediate literals are parsed is changed. All literals are always parsed as 64-bit values either integer or floating-point. Then we convert parsed literals to correct form based on information about type of operand parsed (was literal floating or binary) and type of expected instruction operands (is this f32/64 or b32/64 instruction).
Here are rules how we convert literals:
    - We parsed fp literal:
        - Instruction expects 64-bit operand:
            - If parsed literal is inlinable (e.g. v_fract_f64_e32 v[0:1], 0.5)
                - then we do nothing this literal
            - Else if literal is not-inlinable but instruction requires to inline it (e.g. this is e64 encoding, v_fract_f64_e64 v[0:1], 1.5)
                - report error
            - Else literal is not-inlinable but we can encode it as additional 32-bit literal constant
                - If instruction expect fp operand type (f64)
                    - Check if low 32 bits of literal are zeroes (e.g. v_fract_f64 v[0:1], 1.5)
                        - If so then do nothing
                    - Else (e.g. v_fract_f64 v[0:1], 3.1415)
                        - report warning that low 32 bits will be set to zeroes and precision will be lost
                        - set low 32 bits of literal to zeroes
                - Instruction expects integer operand type (e.g. s_mov_b64_e32 s[0:1], 1.5)
                    - report error as it is unclear how to encode this literal
        - Instruction expects 32-bit operand:
            - Convert parsed 64 bit fp literal to 32 bit fp. Allow lose of precision but not overflow or underflow
            - Is this literal inlinable and are we required to inline literal (e.g. v_trunc_f32_e64 v0, 0.5)
                - do nothing
                - Else report error
            - Do nothing. We can encode any other 32-bit fp literal (e.g. v_trunc_f32 v0, 10000000.0)
    - Parsed binary literal:
        - Is this literal inlinable (e.g. v_trunc_f32_e32 v0, 35)
            - do nothing
        - Else, are we required to inline this literal (e.g. v_trunc_f32_e64 v0, 35)
            - report error
        - Else, literal is not-inlinable and we are not required to inline it
            - Are high 32 bit of literal zeroes or same as sign bit (32 bit)
                - do nothing (e.g. v_trunc_f32 v0, 0xdeadbeef)
            - Else
                - report error (e.g. v_trunc_f32 v0, 0x123456789abcdef0)

For this change it is required that we know operand types of instruction (are they f32/64 or b32/64). I added several new register operands (they extend previous register operands) and set operand types to corresponding types:
'''
enum OperandType {
    OPERAND_REG_IMM32_INT,
    OPERAND_REG_IMM32_FP,
    OPERAND_REG_INLINE_C_INT,
    OPERAND_REG_INLINE_C_FP,
}
'''

This is not working yet:
    - Several tests are failing
    - Problems with predicate methods for inline immediates
    - LLVM generated assembler parts try to select e64 encoding before e32.
More changes are required for several AsmOperands.

Reviewers: vpykhtin, tstellarAMD

Subscribers: arsenm, kzhuravl, artem.tamazov

Differential Revision: https://reviews.llvm.org/D22922

llvm-svn: 281050
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 2845c24..702335b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -49,13 +49,6 @@
 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
 
-// 32-bit VALU immediate operand that uses the constant bus.
-def u32kimm : Operand<i32> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_KIMM32";
-  let PrintMethod = "printU32ImmOperand";
-}
-
 let OperandType = "OPERAND_IMMEDIATE" in {
 
 def u32imm : Operand<i32> {
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 2f0e602..ae09831 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -43,10 +44,15 @@
 
 namespace {
 
+class AMDGPUAsmParser;
 struct OptionalOperand;
 
 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
 
+//===----------------------------------------------------------------------===//
+// Operand
+//===----------------------------------------------------------------------===//
+
 class AMDGPUOperand : public MCParsedAsmOperand {
   enum KindTy {
     Token,
@@ -56,9 +62,11 @@
   } Kind;
 
   SMLoc StartLoc, EndLoc;
+  const AMDGPUAsmParser *AsmParser;
 
 public:
-  AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+  AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
+    : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
 
   typedef std::unique_ptr<AMDGPUOperand> Ptr;
 
@@ -143,8 +151,6 @@
   };
 
   struct RegOp {
-    const MCRegisterInfo *TRI;
-    const MCSubtargetInfo *STI;
     unsigned RegNo;
     bool IsForcedVOP3;
     Modifiers Mods;
@@ -175,20 +181,8 @@
     return Kind == Immediate;
   }
 
-  bool isInlinableImm() const {
-    if (!isImmTy(ImmTyNone)) {
-      // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
-      return false;
-    }
-    // TODO: We should avoid using host float here. It would be better to
-    // check the float bit values which is what a few other places do.
-    // We've had bot failures before due to weird NaN support on mips hosts.
-    const float F = BitsToFloat(Imm.Val);
-    // TODO: Add 1/(2*pi) for VI
-    return (Imm.Val <= 64 && Imm.Val >= -16) ||
-           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
-           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0);
-  }
+  bool isInlinableImm(MVT type) const;
+  bool isLiteralImm(MVT type) const;
 
   bool isRegKind() const {
     return Kind == Register;
@@ -198,8 +192,24 @@
     return isRegKind() && !Reg.Mods.hasModifiers();
   }
 
-  bool isRegOrImmWithInputMods() const {
-    return isRegKind() || isInlinableImm();
+  bool isRegOrImmWithInputMods(MVT type) const {
+    return isRegKind() || isInlinableImm(type);
+  }
+
+  bool isRegOrImmWithInt32InputMods() const {
+    return isRegOrImmWithInputMods(MVT::i32);
+  }
+
+  bool isRegOrImmWithInt64InputMods() const {
+    return isRegOrImmWithInputMods(MVT::i64);
+  }
+
+  bool isRegOrImmWithFP32InputMods() const {
+    return isRegOrImmWithInputMods(MVT::f32);
+  }
+
+  bool isRegOrImmWithFP64InputMods() const {
+    return isRegOrImmWithInputMods(MVT::f64);
   }
 
   bool isImmTy(ImmTy ImmT) const {
@@ -243,47 +253,76 @@
     return isReg() || isImm();
   }
 
-  bool isRegClass(unsigned RCID) const {
-    return isReg() && Reg.TRI->getRegClass(RCID).contains(getReg());
+  bool isRegClass(unsigned RCID) const;
+
+  bool isSCSrcB32() const {
+    return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i32);
   }
 
-  bool isSCSrc32() const {
-    return isInlinableImm() || isRegClass(AMDGPU::SReg_32RegClassID);
+  bool isSCSrcB64() const {
+    return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::i64);
   }
 
-  bool isSCSrc64() const {
-    return isInlinableImm() || isRegClass(AMDGPU::SReg_64RegClassID);
+  bool isSCSrcF32() const {
+    return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f32);
   }
 
-  bool isSSrc32() const {
-    return isImm() || isSCSrc32() || isExpr();
+  bool isSCSrcF64() const {
+    return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::f64);
   }
 
-  bool isSSrc64() const {
+  bool isSSrcB32() const {
+    return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
+  }
+
+  bool isSSrcB64() const {
     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
     // See isVSrc64().
-    return isImm() || isSCSrc64();
+    return isSCSrcB64() || isLiteralImm(MVT::i64);
   }
 
-  bool isVCSrc32() const {
-    return isInlinableImm() || isRegClass(AMDGPU::VS_32RegClassID);
+  bool isSSrcF32() const {
+    return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
   }
 
-  bool isVCSrc64() const {
-    return isInlinableImm() || isRegClass(AMDGPU::VS_64RegClassID);
+  bool isSSrcF64() const {
+    return isSCSrcB64() || isLiteralImm(MVT::f64);
   }
 
-  bool isVSrc32() const {
-    return isImm() || isVCSrc32();
+  bool isVCSrcB32() const {
+    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i32);
   }
 
-  bool isVSrc64() const {
-    // TODO: Check if the 64-bit value (coming from assembly source) can be
-    // narrowed to 32 bits (in the instruction stream). That require knowledge
-    // of instruction type (unsigned/signed, floating or "untyped"/B64),
-    // see [AMD GCN3 ISA 6.3.1].
-    // TODO: How 64-bit values are formed from 32-bit literals in _B64 insns?
-    return isImm() || isVCSrc64();
+  bool isVCSrcB64() const {
+    return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::i64);
+  }
+  
+  bool isVCSrcF32() const {
+    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f32);
+  }
+
+  bool isVCSrcF64() const {
+    return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::f64);
+  }
+
+  bool isVSrcB32() const {
+    return isVCSrcF32() || isLiteralImm(MVT::i32);
+  }
+
+  bool isVSrcB64() const {
+    return isVCSrcF64() || isLiteralImm(MVT::i64);
+  }
+
+  bool isVSrcF32() const {
+    return isVCSrcF32() || isLiteralImm(MVT::f32);
+  }
+
+  bool isVSrcF64() const {
+    return isVCSrcF64() || isLiteralImm(MVT::f64);
+  }
+
+  bool isKImmFP32() const {
+    return isLiteralImm(MVT::f32);
   }
 
   bool isMem() const override {
@@ -368,29 +407,13 @@
     return getModifiers().hasIntModifiers();
   }
 
-  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const {
-    if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers()) {
-      // Apply modifiers to immediate value 
-      int64_t Val = Imm.Val;
-      bool Negate = Imm.Mods.Neg; // Only negate can get here
-      if (Imm.IsFPImm) {
-        APFloat F(BitsToFloat(Val));
-        if (Negate) {
-          F.changeSign();
-        }
-        Val = F.bitcastToAPInt().getZExtValue();
-      } else {
-        Val = Negate ? -Val : Val;
-      }
-      Inst.addOperand(MCOperand::createImm(Val));
-    } else {
-      Inst.addOperand(MCOperand::createImm(getImm()));
-    }
-  }
+  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
 
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
-  }
+  void addLiteralImmOperand(MCInst &Inst, int64_t Val) const;
+
+  void addKImmFP32Operands(MCInst &Inst, unsigned N) const;
+
+  void addRegOperands(MCInst &Inst, unsigned N) const;
 
   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
     if (isRegKind())
@@ -484,10 +507,11 @@
     }
   }
 
-  static AMDGPUOperand::Ptr CreateImm(int64_t Val, SMLoc Loc,
+  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
+                                      int64_t Val, SMLoc Loc,
                                       enum ImmTy Type = ImmTyNone,
                                       bool IsFPImm = false) {
-    auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
+    auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
     Op->Imm.Val = Val;
     Op->Imm.IsFPImm = IsFPImm;
     Op->Imm.Type = Type;
@@ -497,9 +521,10 @@
     return Op;
   }
 
-  static AMDGPUOperand::Ptr CreateToken(StringRef Str, SMLoc Loc,
+  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
+                                        StringRef Str, SMLoc Loc,
                                         bool HasExplicitEncodingSize = true) {
-    auto Res = llvm::make_unique<AMDGPUOperand>(Token);
+    auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
     Res->StartLoc = Loc;
@@ -507,15 +532,12 @@
     return Res;
   }
 
-  static AMDGPUOperand::Ptr CreateReg(unsigned RegNo, SMLoc S,
+  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
+                                      unsigned RegNo, SMLoc S,
                                       SMLoc E,
-                                      const MCRegisterInfo *TRI,
-                                      const MCSubtargetInfo *STI,
                                       bool ForceVOP3) {
-    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+    auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
     Op->Reg.RegNo = RegNo;
-    Op->Reg.TRI = TRI;
-    Op->Reg.STI = STI;
     Op->Reg.Mods = {false, false, false};
     Op->Reg.IsForcedVOP3 = ForceVOP3;
     Op->StartLoc = S;
@@ -523,8 +545,9 @@
     return Op;
   }
 
-  static AMDGPUOperand::Ptr CreateExpr(const class MCExpr *Expr, SMLoc S) {
-    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
+                                       const class MCExpr *Expr, SMLoc S) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
     Op->Expr = Expr;
     Op->StartLoc = S;
     Op->EndLoc = S;
@@ -537,6 +560,10 @@
   return OS;
 }
 
+//===----------------------------------------------------------------------===//
+// AsmParser
+//===----------------------------------------------------------------------===//
+
 class AMDGPUAsmParser : public MCTargetAsmParser {
   const MCInstrInfo &MII;
   MCAsmParser &Parser;
@@ -545,22 +572,6 @@
   bool ForcedDPP;
   bool ForcedSDWA;
 
-  bool isSI() const {
-    return AMDGPU::isSI(getSTI());
-  }
-
-  bool isCI() const {
-    return AMDGPU::isCI(getSTI());
-  }
-
-  bool isVI() const {
-    return AMDGPU::isVI(getSTI());
-  }
-
-  bool hasSGPR102_SGPR103() const {
-    return !isVI();
-  }
-
   /// @name Auto-generated Match Functions
   /// {
 
@@ -624,11 +635,37 @@
     }
   }
 
+  bool isSI() const {
+    return AMDGPU::isSI(getSTI());
+  }
+
+  bool isCI() const {
+    return AMDGPU::isCI(getSTI());
+  }
+
+  bool isVI() const {
+    return AMDGPU::isVI(getSTI());
+  }
+
+  bool hasSGPR102_SGPR103() const {
+    return !isVI();
+  }
+
   AMDGPUTargetStreamer &getTargetStreamer() {
     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
     return static_cast<AMDGPUTargetStreamer &>(TS);
   }
 
+  const MCRegisterInfo *getMRI() const {
+    // We need this const_cast because for some reason getContext() is not const
+    // in MCAsmParser.
+    return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
+  }
+
+  const MCInstrInfo *getMII() const {
+    return &MII;
+  }
+
   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
@@ -652,6 +689,7 @@
   StringRef parseMnemonicSuffix(StringRef Name);
   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                         SMLoc NameLoc, OperandVector &Operands) override;
+  //bool ProcessInstruction(MCInst &Inst);
 
   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
   OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
@@ -738,6 +776,202 @@
 
 }
 
+//===----------------------------------------------------------------------===//
+// Operand
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUOperand::isInlinableImm(MVT type) const {
+  if (!isImmTy(ImmTyNone)) {
+    // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
+    return false;
+  }
+  // TODO: We should avoid using host float here. It would be better to
+  // check the float bit values which is what a few other places do.
+  // We've had bot failures before due to weird NaN support on mips hosts.
+
+  APInt Literal(64, Imm.Val);
+
+  if (Imm.IsFPImm) { // We got fp literal token
+    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
+      return AMDGPU::isInlinableLiteral64(Imm.Val, AsmParser->isVI());
+    } else { // Expected 32-bit operand
+      bool lost;
+      APFloat FPLiteral(APFloat::IEEEdouble, Literal);
+      // Convert literal to single precision
+      APFloat::opStatus status = FPLiteral.convert(APFloat::IEEEsingle,
+                                                    APFloat::rmNearestTiesToEven,
+                                                    &lost);
+      // We allow precision lost but not overflow or underflow
+      if (status != APFloat::opOK &&
+          lost &&
+          ((status & APFloat::opOverflow)  != 0 ||
+            (status & APFloat::opUnderflow) != 0)) {
+        return false;
+      }
+      // Check if single precision literal is inlinable
+      return AMDGPU::isInlinableLiteral32(
+              static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
+              AsmParser->isVI());
+    }
+  } else { // We got int literal token
+    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
+      return AMDGPU::isInlinableLiteral64(Imm.Val, AsmParser->isVI());
+    } else { // Expected 32-bit operand
+      return AMDGPU::isInlinableLiteral32(
+            static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
+            AsmParser->isVI());
+    }
+  }
+  return false;
+}
+
+bool AMDGPUOperand::isLiteralImm(MVT type) const {
+  // Check that this imediate can be added as literal
+  if (!isImmTy(ImmTyNone)) {
+    return false;
+  }
+
+  APInt Literal(64, Imm.Val);
+
+  if (Imm.IsFPImm) { // We got fp literal token
+    if (type == MVT::f64) { // Expected 64-bit fp operand
+      // We would set low 64-bits of literal to zeroes but we accept this literals
+      return true;
+    } else if (type == MVT::i64) { // Expected 64-bit int operand
+        // We don't allow fp literals in 64-bit integer instructions. It is
+        // unclear how we should encode them.
+      return false;
+    } else { // Expected 32-bit operand
+      bool lost;
+      APFloat FPLiteral(APFloat::IEEEdouble, Literal);
+      // Convert literal to single precision
+      APFloat::opStatus status = FPLiteral.convert(APFloat::IEEEsingle,
+                                                    APFloat::rmNearestTiesToEven,
+                                                    &lost);
+      // We allow precision lost but not overflow or underflow
+      if (status != APFloat::opOK &&
+          lost &&
+          ((status & APFloat::opOverflow)  != 0 ||
+            (status & APFloat::opUnderflow) != 0)) {
+        return false;
+      }
+      return true;
+    }
+  } else { // We got int literal token
+    APInt HiBits = Literal.getHiBits(32);
+    if (HiBits == 0xffffffff &&
+        (*Literal.getLoBits(32).getRawData() & 0x80000000) != 0) {
+      // If high 32 bits aren't zeroes then they all should be ones and 32nd
+      // bit should be set. So that this 64-bit literal is sign-extension of
+      // 32-bit value.
+      return true;
+    } else if (HiBits == 0) {
+      return true;
+    }
+  }
+  return false;
+}
+
+bool AMDGPUOperand::isRegClass(unsigned RCID) const {
+  return isReg() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
+}
+
+void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
+  int64_t Val = Imm.Val;
+  if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) {
+    // Apply modifiers to immediate value. Only negate can get here
+    if (Imm.IsFPImm) {
+      APFloat F(BitsToDouble(Val));
+      F.changeSign();
+      Val = F.bitcastToAPInt().getZExtValue();
+    } else {
+      Val = -Val;
+    }
+  }
+
+  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), Inst.getNumOperands())) {
+    addLiteralImmOperand(Inst, Val);
+  } else {
+    Inst.addOperand(MCOperand::createImm(Val));
+  }
+}
+
+void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
+  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
+  auto OpNum = Inst.getNumOperands();
+  // Check that this operand accepts literals
+  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
+
+  APInt Literal(64, Val);
+  auto OpSize = AMDGPU::getRegOperandSize(AsmParser->getMRI(), InstDesc, OpNum); // expected operand size
+
+  if (Imm.IsFPImm) { // We got fp literal token
+    if (OpSize == 8) { // Expected 64-bit operand
+      // Check if literal is inlinable
+      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), AsmParser->isVI())) {
+        Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
+      } else if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
+        // For fp operands we check if low 32 bits are zeros
+        if (Literal.getLoBits(32) != 0) {
+          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
+                            "Can't encode literal as exact 64-bit"
+                            " floating-point operand. Low 32-bits will be"
+                            " set to zero");
+        }
+        Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
+      } else {
+        // We don't allow fp literals in 64-bit integer instructions. It is
+        // unclear how we should encode them. This case should be checked earlier
+        // in predicate methods (isLiteralImm())
+        llvm_unreachable("fp literal in 64-bit integer instruction.");
+      }
+    } else { // Expected 32-bit operand
+      bool lost;
+      APFloat FPLiteral(APFloat::IEEEdouble, Literal);
+      // Convert literal to single precision
+      FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+      // We allow precision lost but not overflow or underflow. This should be
+      // checked earlier in isLiteralImm()
+      Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+    }
+  } else { // We got int literal token
+    if (OpSize == 8) { // Expected 64-bit operand
+      auto LiteralVal = Literal.getZExtValue();
+      if (AMDGPU::isInlinableLiteral64(LiteralVal, AsmParser->isVI())) {
+        Inst.addOperand(MCOperand::createImm(LiteralVal));
+        return;
+      }
+    } else { // Expected 32-bit operand
+      auto LiteralVal = static_cast<int32_t>(Literal.getLoBits(32).getZExtValue());
+      if (AMDGPU::isInlinableLiteral32(LiteralVal, AsmParser->isVI())) {
+        Inst.addOperand(MCOperand::createImm(LiteralVal));
+        return;
+      }
+    }
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+  }
+}
+
+void AMDGPUOperand::addKImmFP32Operands(MCInst &Inst, unsigned N) const {
+  APInt Literal(64, Imm.Val);
+  if (Imm.IsFPImm) { // We got fp literal
+    bool lost;
+    APFloat FPLiteral(APFloat::IEEEdouble, Literal);
+    FPLiteral.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost);
+    Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+  } else { // We got int literal token
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue()));
+  }
+}
+
+void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
+  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
+}
+
+//===----------------------------------------------------------------------===//
+// AsmParser
+//===----------------------------------------------------------------------===//
+
 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
   if (Is == IS_VGPR) {
     switch (RegWidth) {
@@ -952,20 +1186,18 @@
   const auto &Tok = Parser.getTok();
   SMLoc StartLoc = Tok.getLoc();
   SMLoc EndLoc = Tok.getEndLoc();
-  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
-
   RegisterKind RegKind;
   unsigned Reg, RegNum, RegWidth;
 
   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
     return nullptr;
   }
-  return AMDGPUOperand::CreateReg(Reg, StartLoc, EndLoc,
-                                  TRI, &getSTI(), false);
+  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
 }
 
 AMDGPUAsmParser::OperandMatchResultTy
 AMDGPUAsmParser::parseImm(OperandVector &Operands) {
+  // TODO: add syntactic sugar for 1/(2*PI)
   bool Minus = false;
   if (getLexer().getKind() == AsmToken::Minus) {
     Minus = true;
@@ -978,28 +1210,21 @@
     int64_t IntVal;
     if (getParser().parseAbsoluteExpression(IntVal))
       return MatchOperand_ParseFail;
-    if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
-      Error(S, "invalid immediate: only 32-bit values are legal");
-      return MatchOperand_ParseFail;
-    }
-
     if (Minus)
       IntVal *= -1;
-    Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
+    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
     return MatchOperand_Success;
   }
   case AsmToken::Real: {
-    // FIXME: We should emit an error if a double precisions floating-point
-    // value is used.  I'm not sure the best way to detect this.
     int64_t IntVal;
     if (getParser().parseAbsoluteExpression(IntVal))
       return MatchOperand_ParseFail;
 
-    APFloat F((float)BitsToDouble(IntVal));
+    APFloat F(BitsToDouble(IntVal));
     if (Minus)
       F.changeSign();
     Operands.push_back(
-        AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S,
+        AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
                                  AMDGPUOperand::ImmTyNone, true));
     return MatchOperand_Success;
   }
@@ -1505,11 +1730,11 @@
     SMLoc S = Tok.getLoc();
     const MCExpr *Expr = nullptr;
     if (!Parser.parseExpression(Expr)) {
-      Operands.push_back(AMDGPUOperand::CreateExpr(Expr, S));
+      Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
       return MatchOperand_Success;
     }
 
-    Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(), Tok.getLoc()));
+    Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), Tok.getLoc()));
     Parser.Lex();
     return MatchOperand_Success;
   }
@@ -1543,7 +1768,7 @@
                                        SMLoc NameLoc, OperandVector &Operands) {
   // Add the instruction mnemonic
   Name = parseMnemonicSuffix(Name);
-  Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
+  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
 
   while (!getLexer().is(AsmToken::EndOfStatement)) {
     AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
@@ -1618,7 +1843,7 @@
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
   return MatchOperand_Success;
 }
 
@@ -1650,7 +1875,7 @@
     }
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
   return MatchOperand_Success;
 }
 
@@ -1825,7 +2050,7 @@
       } while(getLexer().isNot(AsmToken::EndOfStatement));
       break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, CntVal, S));
   return MatchOperand_Success;
 }
 
@@ -1930,7 +2155,7 @@
       }
       break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
   return MatchOperand_Success;
 }
 
@@ -2113,7 +2338,7 @@
     }
     break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
   return MatchOperand_Success;
 }
 
@@ -2135,12 +2360,12 @@
       int64_t Imm;
       if (getParser().parseAbsoluteExpression(Imm))
         return MatchOperand_ParseFail;
-      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+      Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
       return MatchOperand_Success;
     }
 
     case AsmToken::Identifier:
-      Operands.push_back(AMDGPUOperand::CreateExpr(
+      Operands.push_back(AMDGPUOperand::CreateExpr(this,
           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
                                   Parser.getTok().getString()), getContext()), S));
       Parser.Lex();
@@ -2153,15 +2378,15 @@
 //===----------------------------------------------------------------------===//
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyGLC);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTySLC);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyTFE);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
 }
 
 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
@@ -2284,23 +2509,23 @@
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDMask);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDA);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyR128);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyLWE);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2321,11 +2546,11 @@
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2458,8 +2683,7 @@
 
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
-    if (Op.isRegOrImmWithInputMods()) {
-      // only fp modifiers allowed in VOP3
+    if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) {
       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
     } else if (Op.isImm()) {
       OptionalIdx[Op.getImmTy()] = I;
@@ -2605,21 +2829,20 @@
   }
   Parser.Lex(); // eat last token
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
-                                              AMDGPUOperand::ImmTyDppCtrl));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
   return MatchOperand_Success;
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
-  return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
+  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
-  return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
+  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
 }
 
 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
@@ -2634,8 +2857,7 @@
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
     // Add the register arguments
-    if (Op.isRegOrImmWithInputMods()) {
-      // Only float modifiers supported in DPP
+    if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) {
       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
     } else if (Op.isDPPCtrl()) {
       Op.addImmOperands(Inst, 1);
@@ -2684,7 +2906,7 @@
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Int, S, Type));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
   return MatchOperand_Success;
 }
 
@@ -2711,8 +2933,7 @@
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
-                                              AMDGPUOperand::ImmTySdwaDstUnused));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
   return MatchOperand_Success;
 }
 
@@ -2746,8 +2967,8 @@
         Op.Reg.RegNo == AMDGPU::VCC) {
       // VOPC sdwa use "vcc" token as dst. Skip it.
       continue;
-    } else if (Op.isRegOrImmWithInputMods()) {
-       Op.addRegOrImmWithInputModsOperands(Inst, 2);
+    } else if (Desc.OpInfo[Inst.getNumOperands()].OperandType == AMDGPU::OPERAND_INPUT_MODS) {
+      Op.addRegOrImmWithInputModsOperands(Inst, 2);
     } else if (Op.isImm()) {
       // Handle optional arguments
       OptionalIdx[Op.getImmTy()] = I;
@@ -2817,14 +3038,16 @@
     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
   case MCK_offen:
     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
-  case MCK_SSrc32:
+  case MCK_SSrcB32:
     // When operands have expression values, they will return true for isToken,
     // because it is not possible to distinguish between a token and an
     // expression at parse time. MatchInstructionImpl() will always try to
     // match an operand as a token, when isToken returns true, and when the
     // name of the expression is not a valid token, the match will fail,
     // so we need to handle it here.
-    return Operand.isSSrc32() ? Match_Success : Match_InvalidOperand;
+    return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
+  case MCK_SSrcF32:
+    return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
   case MCK_SoppBrTarget:
     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
   default: return Match_InvalidOperand;
diff --git a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 7dd0f00..6556830 100644
--- a/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -359,7 +359,7 @@
   else if (Imm == DoubleToBits(-4.0))
     O << "-4.0";
   else {
-    assert(isUInt<32>(Imm));
+    assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
 
     // In rare situations, we will have a 32-bit literal in a 64-bit
     // operand. This is technically allowed for the encoding of s_mov_b64.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index ab7c830..2e5286e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -18,6 +18,7 @@
 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixup.h"
@@ -38,11 +39,9 @@
   const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
 
-  /// \brief Can this operand also contain immediate values?
-  bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
-
   /// \brief Encode an fp or int literal
-  uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const;
+  uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize,
+                          const MCSubtargetInfo &STI) const;
 
 public:
   SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
@@ -76,14 +75,6 @@
   return new SIMCCodeEmitter(MCII, MRI, Ctx);
 }
 
-bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
-                                   unsigned OpNo) const {
-  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
-  return OpType == AMDGPU::OPERAND_REG_IMM32 ||
-         OpType == AMDGPU::OPERAND_REG_INLINE_C;
-}
-
 // Returns the encoding value to use if the given integer is an integer inline
 // immediate value, or 0 if it is not.
 template <typename IntTy>
@@ -97,7 +88,7 @@
   return 0;
 }
 
-static uint32_t getLit32Encoding(uint32_t Val) {
+static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
   if (IntImm != 0)
     return IntImm;
@@ -126,10 +117,13 @@
   if (Val == FloatToBits(-4.0f))
     return 247;
 
+  if (AMDGPU::isVI(STI) && Val == 0x3e22f983) // 1/(2*pi)
+    return 248;
+
   return 255;
 }
 
-static uint32_t getLit64Encoding(uint64_t Val) {
+static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
   if (IntImm != 0)
     return IntImm;
@@ -158,11 +152,15 @@
   if (Val == DoubleToBits(-4.0))
     return 247;
 
+  if (AMDGPU::isVI(STI) && Val == 0x3fc45f306dc9c882) // 1/(2*pi)
+    return 248;
+
   return 255;
 }
 
 uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
-                                         unsigned OpSize) const {
+                                         unsigned OpSize,
+                                         const MCSubtargetInfo &STI) const {
 
   int64_t Imm;
   if (MO.isExpr()) {
@@ -182,11 +180,11 @@
   }
 
   if (OpSize == 4)
-    return getLit32Encoding(static_cast<uint32_t>(Imm));
+    return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
 
   assert(OpSize == 8);
 
-  return getLit64Encoding(static_cast<uint64_t>(Imm));
+  return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
 }
 
 void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -208,7 +206,7 @@
   for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
 
     // Check if this operand should be encoded as [SV]Src
-    if (!isSrcOperand(Desc, i))
+    if (!AMDGPU::isSISrcOperand(Desc, i))
       continue;
 
     int RCID = Desc.OpInfo[i].RegClass;
@@ -216,7 +214,7 @@
 
     // Is this operand a literal immediate?
     const MCOperand &Op = MI.getOperand(i);
-    if (getLitEncoding(Op, RC.getSize()) != 255)
+    if (getLitEncoding(Op, RC.getSize(), STI) != 255)
       continue;
 
     // Yes! Encode it
@@ -280,11 +278,10 @@
   }
 
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
-  if (isSrcOperand(Desc, OpNo)) {
-    int RCID = Desc.OpInfo[OpNo].RegClass;
-    const MCRegisterClass &RC = MRI.getRegClass(RCID);
-
-    uint32_t Enc = getLitEncoding(MO, RC.getSize());
+  if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
+    uint32_t Enc = getLitEncoding(MO,
+                                  AMDGPU::getRegOperandSize(&MRI, Desc, OpNo),
+                                  STI);
     if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
       return Enc;
 
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 7eac83c..744c2e2 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -49,14 +49,17 @@
 namespace llvm {
 namespace AMDGPU {
   enum OperandType {
-    /// Operand with register or 32-bit immediate
-    OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET,
-    /// Operand with register or inline constant
-    OPERAND_REG_INLINE_C,
+    /// Operands with register or 32-bit immediate
+    OPERAND_REG_IMM32_INT = MCOI::OPERAND_FIRST_TARGET,
+    OPERAND_REG_IMM32_FP,
+    /// Operands with register or inline constant
+    OPERAND_REG_INLINE_C_INT,
+    OPERAND_REG_INLINE_C_FP,
 
-    /// Operand with 32-bit immediate that uses the constant bus. The standard
-    /// OPERAND_IMMEDIATE should be used for special immediates such as source
-    /// modifiers.
+    // Operand for source modifiers for VOP instructions
+    OPERAND_INPUT_MODS,
+
+    /// Operand with 32-bit immediate that uses the constant bus.
     OPERAND_KIMM32
   };
 }
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4c69a39..c84847f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1689,9 +1689,11 @@
         return false;
       }
       break;
-    case AMDGPU::OPERAND_REG_IMM32:
+    case AMDGPU::OPERAND_REG_IMM32_INT:
+    case AMDGPU::OPERAND_REG_IMM32_FP:
       break;
-    case AMDGPU::OPERAND_REG_INLINE_C:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP:
       if (isLiteralConstant(MI.getOperand(i),
                             RI.getRegClass(RegClass)->getSize())) {
         ErrInfo = "Illegal immediate value for operand.";
@@ -2030,8 +2032,8 @@
   // In order to be legal, the common sub-class must be equal to the
   // class of the current operand.  For example:
   //
-  // v_mov_b32 s0 ; Operand defined as vsrc_32
-  //              ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+  // v_mov_b32 s0 ; Operand defined as vsrc_b32
+  //              ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
   //
   // s_sendmsg 0, s0 ; Operand defined as m0reg
   //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index ad129b8..f68eef2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -454,29 +454,56 @@
 } // End OperandType = "OPERAND_IMMEDIATE"
 
 
+// 32-bit VALU immediate operand that uses the constant bus.
+def KImmFP32MatchClass : AsmOperandClass {
+  let Name = "KImmFP32";
+  let PredicateMethod = "isKImmFP32";
+  let ParserMethod = "parseImm";
+  let RenderMethod = "addKImmFP32Operands";
+}
+
+def f32kimm : Operand<i32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_KIMM32";
+  let PrintMethod = "printU32ImmOperand";
+  let ParserMatchClass = KImmFP32MatchClass;
+}
+
 def VOPDstS64 : VOPDstOperand <SReg_64>;
 
-def FPInputModsMatchClass : AsmOperandClass {
-  let Name = "RegOrImmWithFPInputMods";
+class FPInputModsMatchClass <int opSize> : AsmOperandClass {
+  let Name = "RegOrImmWithFP"#opSize#"InputMods";
   let ParserMethod = "parseRegOrImmWithFPInputMods";
-  let PredicateMethod = "isRegOrImmWithInputMods";
+  let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
+}
+def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
+def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
+
+class InputMods <AsmOperandClass matchClass> : Operand <i32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_INPUT_MODS";
+  let ParserMatchClass = matchClass;
 }
 
-def FPInputMods : Operand <i32> {
+class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
   let PrintMethod = "printOperandAndFPInputMods";
-  let ParserMatchClass = FPInputModsMatchClass;
 }
+def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
+def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
 
-def IntInputModsMatchClass : AsmOperandClass {
-  let Name = "RegOrImmWithIntInputMods";
+class IntInputModsMatchClass <int opSize> : AsmOperandClass {
+  let Name = "RegOrImmWithInt"#opSize#"InputMods";
   let ParserMethod = "parseRegOrImmWithIntInputMods";
-  let PredicateMethod = "isRegOrImmWithInputMods";
+  let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
 }
+def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
+def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
 
-def IntInputMods: Operand <i32> {
+class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
   let PrintMethod = "printOperandAndIntInputMods";
-  let ParserMatchClass = IntInputModsMatchClass;
 }
+def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
+def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
 
 //===----------------------------------------------------------------------===//
 // Complex patterns
@@ -605,7 +632,13 @@
 // Returns the register class to use for source 0 of VOP[12C]
 // instructions for the given VT.
 class getVOPSrc0ForVT<ValueType VT> {
-  RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
+  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+             !if(!eq(VT.Value, f32.Value), 1,
+             !if(!eq(VT.Value, f64.Value), 1,
+             0)));
+  RegisterOperand ret = !if(isFP, 
+                            !if(!eq(VT.Size, 64), VSrc_f64, VSrc_f32),
+                            !if(!eq(VT.Size, 64), VSrc_b64, VSrc_b32));
 }
 
 // Returns the vreg register class to use for source operand given VT
@@ -617,14 +650,22 @@
 // Returns the register class to use for sources of VOP3 instructions for the
 // given VT.
 class getVOP3SrcForVT<ValueType VT> {
+  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+             !if(!eq(VT.Value, f32.Value), 1,
+             !if(!eq(VT.Value, f64.Value), 1,
+             0)));
   RegisterOperand ret =
-  !if(!eq(VT.Size, 64),
-      VCSrc_64,
-      !if(!eq(VT.Value, i1.Value),
-          SCSrc_64,
-          VCSrc_32
-       )
-    );
+    !if(!eq(VT.Size, 64),
+        !if(isFP,
+            VCSrc_f64,
+            VCSrc_b64),
+        !if(!eq(VT.Value, i1.Value),
+            SCSrc_b64,
+            !if(isFP,
+                VCSrc_f32,
+                VCSrc_b32)
+         )
+      );
 }
 
 // Returns 1 if the source arguments have modifiers, 0 if they do not.
@@ -636,6 +677,17 @@
     0));
 }
 
+// Return type of input modifiers operand for specified input operand
+class getSrcMod <ValueType VT> {
+  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+               !if(!eq(VT.Value, f32.Value), 1,
+               !if(!eq(VT.Value, f64.Value), 1,
+               0)));
+  Operand ret =  !if(!eq(VT.Size, 64),
+                     !if(isFP, FP64InputMods, Int64InputMods),
+                     !if(isFP, FP32InputMods, Int32InputMods));
+}
+
 // Returns the input arguments for VOP[12C] instructions for the given SrcVT.
 class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
   dag ret = !if(!eq(NumSrcArgs, 1), (ins Src0RC:$src0),               // VOP1
@@ -646,7 +698,8 @@
 // Returns the input arguments for VOP3 instructions for the given SrcVT.
 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
                 RegisterOperand Src2RC, int NumSrcArgs,
-                bit HasModifiers> {
+                bit HasModifiers, Operand Src0Mod, Operand Src1Mod,
+                Operand Src2Mod> {
 
   dag ret =
     !if (!eq(NumSrcArgs, 0),
@@ -656,7 +709,7 @@
     !if (!eq(NumSrcArgs, 1),
       !if (!eq(HasModifiers, 1),
         // VOP1 with modifiers
-        (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
+        (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
              clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP1 without modifiers
@@ -665,8 +718,8 @@
     !if (!eq(NumSrcArgs, 2),
       !if (!eq(HasModifiers, 1),
         // VOP 2 with modifiers
-        (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
-             FPInputMods:$src1_modifiers, Src1RC:$src1,
+        (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+             Src1Mod:$src1_modifiers, Src1RC:$src1,
              clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP2 without modifiers
@@ -675,9 +728,9 @@
     /* NumSrcArgs == 3 */,
       !if (!eq(HasModifiers, 1),
         // VOP3 with modifiers
-        (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
-             FPInputMods:$src1_modifiers, Src1RC:$src1,
-             FPInputMods:$src2_modifiers, Src2RC:$src2,
+        (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+             Src1Mod:$src1_modifiers, Src1RC:$src1,
+             Src2Mod:$src2_modifiers, Src2RC:$src2,
              clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP3 without modifiers
@@ -686,7 +739,7 @@
 }
 
 class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
-                                                             bit HasModifiers> {
+                 bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
 
   dag ret = !if (!eq(NumSrcArgs, 0),
                 // VOP1 without input operands (V_NOP)
@@ -695,7 +748,7 @@
             !if (!eq(NumSrcArgs, 1),
               !if (!eq(HasModifiers, 1),
                 // VOP1_DPP with modifiers
-                (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
+                (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
                      dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
               /* else */,
@@ -706,8 +759,8 @@
               /* NumSrcArgs == 2 */,
               !if (!eq(HasModifiers, 1),
                 // VOP2_DPP with modifiers
-                (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
-                     FPInputMods:$src1_modifiers, Src1RC:$src1,
+                (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                     Src1Mod:$src1_modifiers, Src1RC:$src1,
                      dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
               /* else */,
@@ -719,48 +772,45 @@
 }
 
 class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
-                  bit HasFloatModifiers, ValueType DstVT> {
+                  bit HasFloatModifiers, Operand Src0Mod, Operand Src1Mod,
+                  ValueType DstVT> {
 
   dag ret = !if(!eq(NumSrcArgs, 0),
                // VOP1 without input operands (V_NOP)
                (ins),
             !if(!eq(NumSrcArgs, 1),
-                !if(HasFloatModifiers,
+                !if(HasFloatModifiers, 
                     // VOP1_SDWA with float modifiers
-                    (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0,
+                    (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
                          clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                         src0_sel:$src0_sel)
-                /* else */,
-                    // VOP1_SDWA with sext modifier
-                    (ins IntInputMods:$src0_imodifiers, Src0RC:$src0,
+                         src0_sel:$src0_sel),
+                    // VOP1_SDWA with int modifiers
+                    (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
                          clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                         src0_sel:$src0_sel)
-                /* endif */)
+                         src0_sel:$src0_sel))
               /* NumSrcArgs == 2 */,
               !if(HasFloatModifiers,
                   !if(!eq(DstVT.Size, 1),
                       // VOPC_SDWA with float modifiers
-                      (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0,
-                           FPInputMods:$src1_fmodifiers, Src1RC:$src1,
+                      (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
+                           Src1Mod:$src1_fmodifiers, Src1RC:$src1,
                            clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
                       // VOP2_SDWA or VOPC_SDWA with float modifiers
-                      (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0,
-                           FPInputMods:$src1_fmodifiers, Src1RC:$src1,
+                      (ins Src0Mod:$src0_fmodifiers, Src0RC:$src0,
+                           Src1Mod:$src1_fmodifiers, Src1RC:$src1,
                            clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                           src0_sel:$src0_sel, src1_sel:$src1_sel)
-                  ),
-              /* else */
-                !if(!eq(DstVT.Size, 1),
-                    // VOPC_SDWA with sext modifiers
-                    (ins IntInputMods:$src0_imodifiers, Src0RC:$src0,
-                         IntInputMods:$src1_imodifiers, Src1RC:$src1,
-                         clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
-                    // VOP2_SDWA or VOPC_SDWA with sext modifier
-                    (ins IntInputMods:$src0_imodifiers, Src0RC:$src0,
-                         IntInputMods:$src1_imodifiers, Src1RC:$src1,
-                         clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                         src0_sel:$src0_sel, src1_sel:$src1_sel)
-                )
+                           src0_sel:$src0_sel, src1_sel:$src1_sel)),
+              
+                  !if(!eq(DstVT.Size, 1),
+                      // VOPC_SDWA with int modifiers
+                      (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
+                           Src1Mod:$src1_imodifiers, Src1RC:$src1,
+                           clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
+                      // VOP2_SDWA or VOPC_SDWA with int modifiers
+                      (ins Src0Mod:$src0_imodifiers, Src0RC:$src0,
+                           Src1Mod:$src1_imodifiers, Src1RC:$src1,
+                           clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                           src0_sel:$src0_sel, src1_sel:$src1_sel))
              /* endif */)));
 }
 
@@ -885,6 +935,9 @@
   field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
   field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
   field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
+  field Operand Src0Mod = getSrcMod<Src0VT>.ret;
+  field Operand Src1Mod = getSrcMod<Src1VT>.ret;
+  field Operand Src2Mod = getSrcMod<Src2VT>.ret;
 
   field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
   field bit HasDst32 = HasDst;
@@ -904,9 +957,11 @@
 
   field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
   field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
-                             HasModifiers>.ret;
-  field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers>.ret;
-  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers, DstVT>.ret;
+                             HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+  field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
+                               HasModifiers, Src0Mod, Src1Mod>.ret;
+  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
+                                 HasModifiers, Src0Mod, Src1Mod, DstVT>.ret;
 
   field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
   field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
@@ -968,12 +1023,12 @@
   let Src0RC64 = VOPDstOperand<VGPR_32>;
 
   let Outs = (outs);
-  let Ins32 = (ins Src0RC32:$vdst, VSrc_32:$src0);
-  let Ins64 = (ins Src0RC64:$vdst, VSrc_32:$src0);
+  let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
+  let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
 
   let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
-  let InsSDWA = (ins Src0RC32:$vdst, IntInputMods:$src0_imodifiers, VCSrc_32:$src0,
+  let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_imodifiers, VCSrc_b32:$src0,
                      clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel);
 
@@ -997,12 +1052,12 @@
 // Write out to vcc or arbitrary SGPR and read in from vcc or
 // arbitrary SGPR.
 def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
-  // We use VCSrc_32 to exclude literal constants, even though the
+  // We use VCSrc_b32 to exclude literal constants, even though the
   // encoding normally allows them since the implicit VCC use means
   // using one would always violate the constant bus
   // restriction. SGPRs are still allowed because it should
   // technically be possible to use VCC again as src0.
-  let Src0RC32 = VCSrc_32;
+  let Src0RC32 = VCSrc_b32;
   let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
   let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
   let Outs32 = (outs DstRC:$vdst);
@@ -1015,7 +1070,7 @@
 
 // Read in from vcc or arbitrary SGPR
 def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
-  let Src0RC32 = VCSrc_32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
+  let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
   let Asm32 = "$vdst, $src0, $src1, vcc";
   let Asm64 = "$vdst, $src0, $src1, $src2";
   let Outs32 = (outs DstRC:$vdst);
@@ -1052,10 +1107,10 @@
 }
 
 class VOPC_Class_Profile<ValueType vt> : VOPC_Profile<vt, i32> {
-  let Ins64 = (ins FPInputMods:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
+  let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
   let Asm64 = "$sdst, $src0_modifiers, $src1";
-  let InsSDWA = (ins FPInputMods:$src0_fmodifiers, Src0RC64:$src0,
-                     IntInputMods:$src1_imodifiers, Src1RC64:$src1,
+  let InsSDWA = (ins Src0Mod:$src0_fmodifiers, Src0RC64:$src0,
+                     Int32InputMods:$src1_imodifiers, Src1RC64:$src1,
                      clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
   let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel";
 
@@ -1075,26 +1130,26 @@
 
 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
 def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
-  field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32kimm:$imm);
+  field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, f32kimm:$imm);
   field string Asm32 = "$vdst, $src0, $src1, $imm";
   field bit HasExt = 0;
 }
 def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
-  field dag Ins32 = (ins VCSrc_32:$src0, u32kimm:$imm, VGPR_32:$src1);
+  field dag Ins32 = (ins VCSrc_f32:$src0, f32kimm:$imm, VGPR_32:$src1);
   field string Asm32 = "$vdst, $src0, $imm, $src1";
   field bit HasExt = 0;
 }
 def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
   let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
   let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
-                             HasModifiers>.ret;
-  let InsDPP = (ins FPInputMods:$src0_modifiers, Src0RC32:$src0,
-                    FPInputMods:$src1_modifiers, Src1RC32:$src1,
+                       HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+  let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0RC32:$src0,
+                    FP32InputMods:$src1_modifiers, Src1RC32:$src1,
                     VGPR_32:$src2, // stub argument
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
-  let InsSDWA = (ins FPInputMods:$src0_fmodifiers, Src0RC32:$src0,
-                     FPInputMods:$src1_fmodifiers, Src1RC32:$src1,
+  let InsSDWA = (ins FP32InputMods:$src0_fmodifiers, Src0RC32:$src0,
+                     FP32InputMods:$src1_fmodifiers, Src1RC32:$src1,
                      VGPR_32:$src2, // stub argument
                      clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
                      src0_sel:$src0_sel, src1_sel:$src1_sel);
@@ -1968,11 +2023,7 @@
                           SDPatternOperator node = null_frag> : VOP3_Helper <
   op, opName,
   (outs P.DstRC.RegClass:$vdst),
-  (ins FPInputMods:$src0_modifiers, P.Src0RC64:$src0,
-       FPInputMods:$src1_modifiers, P.Src1RC64:$src1,
-       FPInputMods:$src2_modifiers, P.Src2RC64:$src2,
-       clampmod:$clamp,
-       omod:$omod),
+  P.Ins64,
   "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
   [(set P.DstVT:$vdst,
             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
@@ -2086,7 +2137,7 @@
   op, opName, (outs),
   (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
    i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
-   SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
+   SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
   opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
         #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
 >;
@@ -2100,7 +2151,7 @@
   op, opName, (outs regClass:$dst),
   (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
        i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
-       i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
+       i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
   opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
         #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
 >;
@@ -2262,13 +2313,13 @@
       defm _ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_addr64", (outs),
         (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
-             SCSrc_32:$soffset, offset:$offset, slc:$slc),
+             SCSrc_b32:$soffset, offset:$offset, slc:$slc),
         name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$slc", [], 0
       >;
 
       defm _OFFSET : MUBUFAtomicOffset_m <
         op, name#"_offset", (outs),
-        (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset,
+        (ins rc:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset,
              slc:$slc),
         name#" $vdata, off, $srsrc, $soffset$offset$slc", [], 0
       >;
@@ -2276,7 +2327,7 @@
       let offen = 1, idxen = 0 in {
         defm _OFFEN : MUBUFAtomicOther_m <
           op, name#"_offen", (outs),
-          (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+          (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                 offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$slc", [], 0
         >;
@@ -2285,7 +2336,7 @@
       let offen = 0, idxen = 1 in {
         defm _IDXEN : MUBUFAtomicOther_m <
           op, name#"_idxen", (outs),
-          (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+          (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                 offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$slc", [], 0
         >;
@@ -2294,7 +2345,7 @@
       let offen = 1, idxen = 1 in {
         defm _BOTHEN : MUBUFAtomicOther_m <
           op, name#"_bothen", (outs),
-          (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+          (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                 offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$slc",
           [], 0
@@ -2310,7 +2361,7 @@
       defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
         op, name#"_rtn_addr64", (outs rc:$vdata),
         (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
-             SCSrc_32:$soffset, offset:$offset, slc:$slc),
+             SCSrc_b32:$soffset, offset:$offset, slc:$slc),
         name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset glc$slc",
         [(set vt:$vdata,
          (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
@@ -2319,7 +2370,7 @@
 
       defm _RTN_OFFSET : MUBUFAtomicOffset_m <
         op, name#"_rtn_offset", (outs rc:$vdata),
-        (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
+        (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_b32:$soffset,
              offset:$offset, slc:$slc),
         name#" $vdata, off, $srsrc, $soffset$offset glc$slc",
         [(set vt:$vdata,
@@ -2330,7 +2381,7 @@
       let offen = 1, idxen = 0 in {
         defm _RTN_OFFEN : MUBUFAtomicOther_m <
           op, name#"_rtn_offen", (outs rc:$vdata),
-          (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+          (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                 offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset offen$offset glc$slc",
           [], 1
@@ -2340,7 +2391,7 @@
       let offen = 0, idxen = 1 in {
         defm _RTN_IDXEN : MUBUFAtomicOther_m <
           op, name#"_rtn_idxen", (outs rc:$vdata),
-          (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+          (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                 offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset glc$slc",
           [], 1
@@ -2350,7 +2401,7 @@
       let offen = 1, idxen = 1 in {
         defm _RTN_BOTHEN : MUBUFAtomicOther_m <
           op, name#"_rtn_bothen", (outs rc:$vdata),
-          (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+          (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                 offset:$offset, slc:$slc),
           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset glc$slc",
           [], 1
@@ -2370,7 +2421,7 @@
   let mayLoad = 1, mayStore = 0 in {
     let offen = 0, idxen = 0, vaddr = 0 in {
       defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
-                           (ins SReg_128:$srsrc, SCSrc_32:$soffset,
+                           (ins SReg_128:$srsrc, SCSrc_b32:$soffset,
                            offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
                            name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe",
                            [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
@@ -2381,7 +2432,7 @@
     let offen = 1, idxen = 0  in {
       defm _OFFEN  : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
                            (ins VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, offset:$offset, glc:$glc, slc:$slc,
+                           SCSrc_b32:$soffset, offset:$offset, glc:$glc, slc:$slc,
                            tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$glc$slc$tfe", []>;
     }
@@ -2389,14 +2440,14 @@
     let offen = 0, idxen = 1 in {
       defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
                            (ins VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, offset:$offset, glc:$glc,
+                           SCSrc_b32:$soffset, offset:$offset, glc:$glc,
                            slc:$slc, tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>;
     }
 
     let offen = 1, idxen = 1 in {
       defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
-                           (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+                           (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                            offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>;
     }
@@ -2404,7 +2455,7 @@
     let offen = 0, idxen = 0 in {
       defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
                            (ins VReg_64:$vaddr, SReg_128:$srsrc,
-                                SCSrc_32:$soffset, offset:$offset,
+                                SCSrc_b32:$soffset, offset:$offset,
 				glc:$glc, slc:$slc, tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$glc$slc$tfe",
                            [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
@@ -2420,7 +2471,7 @@
   let mayLoad = 0, mayStore = 1 in {
     let offen = 0, idxen = 0, vaddr = 0 in {
       defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
-                              (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
+                              (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_b32:$soffset,
                               offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
                               name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe",
                               [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
@@ -2430,7 +2481,7 @@
     let offen = 1, idxen = 0  in {
       defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
                              (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
-                              SCSrc_32:$soffset, offset:$offset, glc:$glc,
+                              SCSrc_b32:$soffset, offset:$offset, glc:$glc,
                               slc:$slc, tfe:$tfe),
                              name#" $vdata, $vaddr, $srsrc, $soffset offen"#
                              "$offset$glc$slc$tfe", []>;
@@ -2439,14 +2490,14 @@
     let offen = 0, idxen = 1 in {
       defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs),
                            (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, offset:$offset, glc:$glc,
+                           SCSrc_b32:$soffset, offset:$offset, glc:$glc,
                            slc:$slc, tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>;
     }
 
     let offen = 1, idxen = 1 in {
       defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
-                           (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
+                           (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
                            offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
                            name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>;
     }
@@ -2454,7 +2505,7 @@
     let offen = 0, idxen = 0 in {
       defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
                                     (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
-                                         SCSrc_32:$soffset,
+                                         SCSrc_b32:$soffset,
                                          offset:$offset, glc:$glc, slc:$slc,
                                          tfe:$tfe),
                                     name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 63d2239..94506f2 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -848,7 +848,7 @@
   vop3 <0x001, 0x289>,
   "v_readlane_b32",
   (outs SReg_32:$vdst),
-  (ins VGPR_32:$src0, SCSrc_32:$src1),
+  (ins VGPR_32:$src0, SCSrc_b32:$src1),
   "v_readlane_b32 $vdst, $src0, $src1",
   [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))]
 >;
@@ -857,7 +857,7 @@
   vop3 <0x002, 0x28a>,
   "v_writelane_b32",
   (outs VGPR_32:$vdst),
-  (ins SReg_32:$src0, SCSrc_32:$src1),
+  (ins SReg_32:$src0, SCSrc_b32:$src1),
   "v_writelane_b32 $vdst, $src0, $src1"
 >;
 
@@ -1179,7 +1179,7 @@
 
 // For use in patterns
 def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
-  (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []> {
+  (ins VSrc_b64:$src0, VSrc_b64:$src1, SSrc_b64:$src2), "", []> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
   let usesCustomInserter = 1;
@@ -1187,7 +1187,7 @@
 
 // 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
 // pass to enable folding of inline immediates.
-def V_MOV_B64_PSEUDO : PseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_64:$src0)> {
+def V_MOV_B64_PSEUDO : PseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_b64:$src0)> {
   let VALU = 1;
 }
 } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
@@ -1263,14 +1263,14 @@
 
 let Uses = [EXEC], Defs = [EXEC,VCC] in {
 def SI_KILL : PseudoInstSI <
-  (outs), (ins VSrc_32:$src),
+  (outs), (ins VSrc_b32:$src),
   [(AMDGPUkill i32:$src)]> {
   let isConvergent = 1;
   let usesCustomInserter = 1;
 }
 
 def SI_KILL_TERMINATOR : SPseudoInstSI <
-  (outs), (ins VSrc_32:$src)> {
+  (outs), (ins VSrc_b32:$src)> {
   let isTerminator = 1;
 }
 
@@ -1288,7 +1288,7 @@
 // s_mov_b32 rather than a copy of another initialized
 // register. MachineCSE skips copies, and we don't want to have to
 // fold operands before it runs.
-def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_32:$src)> {
+def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
   let Defs = [M0];
   let usesCustomInserter = 1;
   let isAsCheapAsAMove = 1;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index b6c5fb9..86de0e2 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -791,14 +791,16 @@
 }
 
 bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
-  return OpType == AMDGPU::OPERAND_REG_IMM32;
+  return OpType == AMDGPU::OPERAND_REG_IMM32_INT ||
+         OpType == AMDGPU::OPERAND_REG_IMM32_FP;
 }
 
 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
   if (opCanUseLiteralConstant(OpType))
     return true;
 
-  return OpType == AMDGPU::OPERAND_REG_INLINE_C;
+  return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
+         OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
 }
 
 // FIXME: Most of these are flexible with HSA and we don't need to reserve them
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 9332666..657bed1 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -358,56 +358,59 @@
 //  Register operands
 //===----------------------------------------------------------------------===//
 
-class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_IMM32";
-}
-
-class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_INLINE_C";
-}
-
 class RegImmMatcher<string name> : AsmOperandClass {
   let Name = name;
   let RenderMethod = "addRegOrImmOperands";
 }
 
+multiclass SIRegOperand <string rc, string MatchName, string opType> {
+  let OperandNamespace = "AMDGPU" in {
+    
+    def _b32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+      let OperandType = opType#"_INT";
+      let ParserMatchClass = RegImmMatcher<MatchName#"B32">;
+    }
+  
+    def _f32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+      let OperandType = opType#"_FP";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+    }
+    
+    def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+      let OperandType = opType#"_INT";
+      let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
+    }
+
+    def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+      let OperandType = opType#"_FP";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
+    }
+  }
+}
+
+multiclass RegImmOperand <string rc, string MatchName> 
+  : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM32">;
+
+multiclass RegInlineOperand <string rc, string MatchName> 
+  : SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">;
+
 //===----------------------------------------------------------------------===//
 //  SSrc_* Operands with an SGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
 
-def SSrc_32 : RegImmOperand<SReg_32> {
-  let ParserMatchClass = RegImmMatcher<"SSrc32">;
-}
-
-def SSrc_64 : RegImmOperand<SReg_64> {
-  let ParserMatchClass = RegImmMatcher<"SSrc64">;
-}
+defm SSrc : RegImmOperand<"SReg", "SSrc">;
 
 //===----------------------------------------------------------------------===//
 //  SCSrc_* Operands with an SGPR or a inline constant
 //===----------------------------------------------------------------------===//
 
-def SCSrc_32 : RegInlineOperand<SReg_32> {
-  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
-}
-
-def SCSrc_64 : RegInlineOperand<SReg_64> {
-  let ParserMatchClass = RegImmMatcher<"SCSrc64">;
-}
+defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
 
 //===----------------------------------------------------------------------===//
 //  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
 
-def VSrc_32 : RegImmOperand<VS_32> {
-  let ParserMatchClass = RegImmMatcher<"VSrc32">;
-}
-
-def VSrc_64 : RegImmOperand<VS_64> {
-  let ParserMatchClass = RegImmMatcher<"VSrc64">;
-}
+defm VSrc : RegImmOperand<"VS", "VSrc">;
 
 //===----------------------------------------------------------------------===//
 //  VSrc_* Operands with an VGPR
@@ -424,10 +427,4 @@
 //  VCSrc_* Operands with an SGPR, VGPR or an inline constant
 //===----------------------------------------------------------------------===//
 
-def VCSrc_32 : RegInlineOperand<VS_32> {
-  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
-}
-
-def VCSrc_64 : RegInlineOperand<VS_64> {
-  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
-}
+defm VCSrc : RegInlineOperand<"VS", "VCSrc">;
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 5a8e2a1..7226f20 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -57,24 +57,24 @@
 }
 
 class SOP1_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
-  opName, (outs SReg_32:$sdst), (ins SSrc_32:$src0),
+  opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0),
   "$sdst, $src0", pattern
 >;
 
 class SOP1_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
-  opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0),
+  opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0),
   "$sdst, $src0", pattern
 >;
 
 // 64-bit input, 32-bit output.
 class SOP1_32_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
-  opName, (outs SReg_32:$sdst), (ins SSrc_64:$src0),
+  opName, (outs SReg_32:$sdst), (ins SSrc_b64:$src0),
   "$sdst, $src0", pattern
 >;
 
 // 32-bit input, 64-bit output.
 class SOP1_64_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
-  opName, (outs SReg_64:$sdst), (ins SSrc_32:$src0),
+  opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0),
   "$sdst, $src0", pattern
 >;
 
@@ -254,22 +254,22 @@
 
 
 class SOP2_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
-  opName, (outs SReg_32:$sdst), (ins SSrc_32:$src0, SSrc_32:$src1),
+  opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1),
   "$sdst, $src0, $src1", pattern
 >;
 
 class SOP2_64 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
-  opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0, SSrc_64:$src1),
+  opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
   "$sdst, $src0, $src1", pattern
 >;
 
 class SOP2_64_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
-  opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0, SSrc_32:$src1),
+  opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b32:$src1),
   "$sdst, $src0, $src1", pattern
 >;
 
 class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
-  opName, (outs SReg_64:$sdst), (ins SSrc_32:$src0, SSrc_32:$src1),
+  opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1),
   "$sdst, $src0, $src1", pattern
 >;
 
@@ -277,23 +277,23 @@
 let isCommutable = 1 in {
 def S_ADD_U32 : SOP2_32 <"s_add_u32">;
 def S_ADD_I32 : SOP2_32 <"s_add_i32",
-  [(set i32:$sdst, (add SSrc_32:$src0, SSrc_32:$src1))]
+  [(set i32:$sdst, (add SSrc_b32:$src0, SSrc_b32:$src1))]
 >;
 } // End isCommutable = 1
 
 def S_SUB_U32 : SOP2_32 <"s_sub_u32">;
 def S_SUB_I32 : SOP2_32 <"s_sub_i32",
-  [(set i32:$sdst, (sub SSrc_32:$src0, SSrc_32:$src1))]
+  [(set i32:$sdst, (sub SSrc_b32:$src0, SSrc_b32:$src1))]
 >;
 
 let Uses = [SCC] in { // Carry in comes from SCC
 let isCommutable = 1 in {
 def S_ADDC_U32 : SOP2_32 <"s_addc_u32",
-  [(set i32:$sdst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+  [(set i32:$sdst, (adde (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
 } // End isCommutable = 1
 
 def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
-  [(set i32:$sdst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
+  [(set i32:$sdst, (sube (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
 } // End Uses = [SCC]
 
 
@@ -614,13 +614,13 @@
 }
 
 class SOPC_CMP_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
-  : SOPC_Helper<op, SSrc_32, i32, opName, cond>;
+  : SOPC_Helper<op, SSrc_b32, i32, opName, cond>;
 
 class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
-  : SOPC_Base<op, SSrc_32, SSrc_32, opName, pattern>;
+  : SOPC_Base<op, SSrc_b32, SSrc_b32, opName, pattern>;
 
 class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
-  : SOPC_Base<op, SSrc_64, SSrc_32, opName, pattern>;
+  : SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
 
 
 def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 1fac266..51b56e0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -8,10 +8,13 @@
 //===----------------------------------------------------------------------===//
 #include "AMDGPUBaseInfo.h"
 #include "AMDGPU.h"
+#include "SIDefines.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -200,5 +203,72 @@
   return Reg;
 }
 
+bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+
+  return OpType == AMDGPU::OPERAND_REG_IMM32_INT ||
+         OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
+         OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
+         OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+}
+
+bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+
+  return OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
+         OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+}
+
+bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+
+  return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
+         OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
+}
+
+unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
+                           unsigned OpNo) {
+  int RCID = Desc.OpInfo[OpNo].RegClass;
+  const MCRegisterClass &RC = MRI->getRegClass(RCID);
+  return RC.getSize();
+}
+
+bool isInlinableLiteral64(int64_t Literal, bool IsVI) {
+  if (Literal >= -16 && Literal <= 64)
+    return true;
+
+  double D = BitsToDouble(Literal);
+
+  if (D == 0.5 || D == -0.5 ||
+      D == 1.0 || D == -1.0 ||
+      D == 2.0 || D == -2.0 ||
+      D == 4.0 || D == -4.0)
+    return true;
+
+  if (IsVI && Literal == 0x3fc45f306dc9c882)
+    return true;
+
+  return false;
+}
+
+bool isInlinableLiteral32(int32_t Literal, bool IsVI) {
+  if (Literal >= -16 && Literal <= 64)
+    return true;
+
+  float F = BitsToFloat(Literal);
+
+  if (F == 0.5 || F == -0.5 ||
+      F == 1.0 || F == -1.0 ||
+      F == 2.0 || F == -2.0 ||
+      F == 4.0 || F == -4.0)
+    return true;
+
+  if (IsVI && Literal == 0x3e22f983)
+    return true;
+
+  return false;
+}
+
+
 } // End namespace AMDGPU
 } // End namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 744d7fb..1b8b8a0 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -19,6 +19,8 @@
 class Function;
 class GlobalValue;
 class MCContext;
+class MCInstrDesc;
+class MCRegisterInfo;
 class MCSection;
 class MCSubtargetInfo;
 
@@ -80,6 +82,23 @@
 /// \p STI otherwise return \p Reg.
 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
 
+/// \brief Can this operand also contain immediate values?
+bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// \brief Is this floating-point operand?
+bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// \brief Does this opearnd support only inlinable literals?
+bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// \brief Get size of register operand
+unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
+                           unsigned OpNo);
+
+/// \brief Is this literal inlinable
+bool isInlinableLiteral64(int64_t Literal, bool IsVI);
+bool isInlinableLiteral32(int32_t Literal, bool IsVI);
+
 } // end namespace AMDGPU
 } // end namespace llvm
 
diff --git a/llvm/lib/Target/AMDGPU/VIInstructions.td b/llvm/lib/Target/AMDGPU/VIInstructions.td
index fc88f1fb..3c5a8a9 100644
--- a/llvm/lib/Target/AMDGPU/VIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VIInstructions.td
@@ -90,7 +90,7 @@
 
 class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
   name#" $dst, $src0, $src1",
-  (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0)
+  (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
 >, PredicateControl {
   let UseInstAsmMatchConverter = 0;
   let AsmVariantName = AMDGPUAsmVariants.VOP3;