[X86] Fix for bugzilla 31576 - add support for "data32" instruction prefix

This patch fixes bugzilla 31576 (https://llvm.org/bugs/show_bug.cgi?id=31576).

"data32" instruction prefix was not defined in the llvm.
An exception had to be added to the X86 tablegen and AsmPrinter because both "data16" and "data32" are encoded to 0x66 (but in different modes).

Differential Revision: https://reviews.llvm.org/D28468

llvm-svn: 292352
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index e692118..f416d89 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -2360,7 +2360,7 @@
     Name == "lock" || Name == "rep" ||
     Name == "repe" || Name == "repz" ||
     Name == "repne" || Name == "repnz" ||
-    Name == "rex64" || Name == "data16";
+    Name == "rex64" || Name == "data16" || Name == "data32";
 
   bool CurlyAsEndOfStatement = false;
   // This does the actual operand parsing.  Don't parse any more if we have a
diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 10b7e6f..76cb518 100644
--- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -61,6 +61,17 @@
     OS << "\tcallq\t";
     printPCRelImm(MI, 0, OS);
   }
+  // data16 and data32 both have the same encoding of 0x66. While data32 is
+  // valid only in 16 bit systems, data16 is valid in the rest.
+  // There seems to be some lack of support of the Requires clause that causes
+  // 0x66 to be interpreted as "data16" by the asm printer.
+  // Thus we add an adjustment here in order to print the "right" instruction.
+  else if (MI->getOpcode() == X86::DATA16_PREFIX &&
+    (STI.getFeatureBits()[X86::Mode16Bit])) {
+    MCInst Data32MI(*MI);
+    Data32MI.setOpcode(X86::DATA32_PREFIX);
+    printInstruction(&Data32MI, OS);
+  }
   // Try to print any aliases first.
   else if (!printAliasInstr(MI, OS))
     printInstruction(MI, OS);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 27a9adf..a647f69 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1967,7 +1967,12 @@
                      Requires<[In64BitMode]>;
 
 // Data16 instruction prefix
-def DATA16_PREFIX : I<0x66, RawFrm, (outs),  (ins), "data16", []>;
+def DATA16_PREFIX : I<0x66, RawFrm, (outs),  (ins), "data16", []>, 
+                     Requires<[Not16BitMode]>;
+
+// Data instruction prefix
+def DATA32_PREFIX : I<0x66, RawFrm, (outs),  (ins), "data32", []>, 
+                     Requires<[In16BitMode]>;
 
 // Repeat string operation instruction prefixes
 // These uses the DF flag in the EFLAGS register to inc or dec ECX
diff --git a/llvm/test/MC/X86/data-prefix-fail.s b/llvm/test/MC/X86/data-prefix-fail.s
new file mode 100644
index 0000000..2b910cd
--- /dev/null
+++ b/llvm/test/MC/X86/data-prefix-fail.s
@@ -0,0 +1,25 @@
+// RUN: not llvm-mc -triple x86_64-unknown-unknown --show-encoding %s 2> %t.err | FileCheck --check-prefix=64 %s
+// RUN: FileCheck --check-prefix=ERR64 < %t.err %s
+// RUN: not llvm-mc -triple i386-unknown-unknown --show-encoding %s 2> %t.err | FileCheck --check-prefix=32 %s
+// RUN: FileCheck --check-prefix=ERR32 < %t.err %s
+// RUN: not llvm-mc -triple i386-unknown-unknown-code16 --show-encoding %s 2> %t.err | FileCheck --check-prefix=16 %s
+// RUN: FileCheck --check-prefix=ERR16 < %t.err %s
+
+// ERR64: error: instruction requires: 16-bit mode
+// ERR32: error: instruction requires: 16-bit mode
+// 16: data32
+// 16: encoding: [0x66]
+// 16: lgdtw 0
+// 16: encoding: [0x0f,0x01,0x16,0x00,0x00]
+data32 lgdt 0
+
+// 64: data16
+// 64: encoding: [0x66]
+// 64: lgdtq 0
+// 64: encoding: [0x0f,0x01,0x14,0x25,0x00,0x00,0x00,0x00]
+// 32: data16
+// 32: encoding: [0x66]
+// 32: lgdtl 0
+// 32: encoding: [0x0f,0x01,0x15,0x00,0x00,0x00,0x00]
+// ERR16: error: instruction requires: Not 16-bit mode
+data16 lgdt 0
diff --git a/llvm/test/MC/X86/data-prefix16.s b/llvm/test/MC/X86/data-prefix16.s
new file mode 100644
index 0000000..d90b9dc
--- /dev/null
+++ b/llvm/test/MC/X86/data-prefix16.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple i386-unknown-unknown-code16 -filetype=obj %s -o - | llvm-objdump -triple i386-unknown-unknown-code16 -d - | FileCheck %s
+
+# CHECK: 66 0f 01 16 00 00
+# CHECK: lgdtl 0
+data32 lgdt 0
+
+# CHECK: 66
+# CHECK: data32
+data32
diff --git a/llvm/test/MC/X86/data-prefix32.s b/llvm/test/MC/X86/data-prefix32.s
new file mode 100644
index 0000000..15a718b
--- /dev/null
+++ b/llvm/test/MC/X86/data-prefix32.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple=i386-unknown-unknown -filetype=obj %s -o - | llvm-objdump -triple=i386-unknown-unknown -d - | FileCheck %s
+
+# CHECK: 66 0f 01 15 00 00 00 00
+# CHECK: lgdtw 0
+data16 lgdt 0
+
+# CHECK: 66
+# CHECK: data16
+data16
diff --git a/llvm/test/MC/X86/data-prefix64.s b/llvm/test/MC/X86/data-prefix64.s
new file mode 100644
index 0000000..acd0db3
--- /dev/null
+++ b/llvm/test/MC/X86/data-prefix64.s
@@ -0,0 +1,9 @@
+# RUN: llvm-mc -triple=x86_64-unknown-unknown -filetype=obj %s -o - | llvm-objdump -triple=x86_64-unknown-unknown -d - | FileCheck %s
+
+# CHECK: 66 0f 01 14 25 00 00 00 00
+# CHECK: lgdtq 0
+data16 lgdt 0
+
+# CHECK: 66
+# CHECK: data16
+data16
diff --git a/llvm/test/MC/X86/x86-16.s b/llvm/test/MC/X86/x86-16.s
index f566935..b95f66e 100644
--- a/llvm/test/MC/X86/x86-16.s
+++ b/llvm/test/MC/X86/x86-16.s
@@ -959,3 +959,13 @@
 // CHECK: lretl
 // CHECK: encoding: [0x66,0xcb]
 lretl
+
+// CHECK: data32
+// CHECK: encoding: [0x66]
+data32
+
+// CHECK: data32
+// CHECK: encoding: [0x66]
+// CHECK: lgdtw 4(%eax)
+// CHECK:  encoding: [0x67,0x0f,0x01,0x50,0x04]
+data32 lgdt 4(%eax)
diff --git a/llvm/test/MC/X86/x86-32.s b/llvm/test/MC/X86/x86-32.s
index 7207652..c05cf41 100644
--- a/llvm/test/MC/X86/x86-32.s
+++ b/llvm/test/MC/X86/x86-32.s
@@ -1079,3 +1079,13 @@
 // CHECK: lretw
 // CHECK: encoding: [0x66,0xcb]
 lretw
+
+// CHECK: data16
+// CHECK: encoding: [0x66]
+data16
+
+// CHECK: data16
+// CHECK: encoding: [0x66]
+// CHECK: lgdtl 4(%eax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+data16 lgdt 4(%eax)
diff --git a/llvm/test/MC/X86/x86-64.s b/llvm/test/MC/X86/x86-64.s
index 1af25e5..89dc599 100644
--- a/llvm/test/MC/X86/x86-64.s
+++ b/llvm/test/MC/X86/x86-64.s
@@ -1119,6 +1119,12 @@
 rex64 // CHECK: rex64 # encoding: [0x48]
 data16 // CHECK: data16 # encoding: [0x66]
 
+// CHECK: data16
+// CHECK: encoding: [0x66]
+// CHECK: lgdtq 4(%rax)
+// CHECK:  encoding: [0x0f,0x01,0x50,0x04]
+data16 lgdt 4(%rax)
+
 // PR8855
 movq 18446744073709551615,%rbx   // CHECK: movq	-1, %rbx
 
diff --git a/llvm/utils/TableGen/X86DisassemblerTables.cpp b/llvm/utils/TableGen/X86DisassemblerTables.cpp
index 5b710e4..c9e36f9 100644
--- a/llvm/utils/TableGen/X86DisassemblerTables.cpp
+++ b/llvm/utils/TableGen/X86DisassemblerTables.cpp
@@ -879,6 +879,10 @@
                                            newInfo.name == "XCHG64ar"))
           continue; // special case for XCHG*ar and NOOP
 
+        if (previousInfo.name == "DATA16_PREFIX" &&
+            newInfo.name == "DATA32_PREFIX")
+          continue; // special case for data16 and data32
+
         if (outranks(previousInfo.insnContext, newInfo.insnContext))
           continue;