Added FMA3 Intel instructions.
I disabled FMA3 autodetection, since the result may differ from expected for some benchmarks.
I added tests for GodeGen and intrinsics.
I did not change llvm.fma.f32/64 - it may be done later.

llvm-svn: 157737
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index b23d756..856f3be 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -128,7 +128,8 @@
   X86TargetMachine &TM;
   const X86RegisterInfo RI;
 
-  /// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
+  /// RegOp2MemOpTable3Addr, RegOp2MemOpTable2Addr, 
+  /// RegOp2MemOpTable0, RegOp2MemOpTable1,
   /// RegOp2MemOpTable2 - Load / store folding opcode maps.
   ///
   typedef DenseMap<unsigned,
@@ -137,6 +138,7 @@
   RegOp2MemOpTableType RegOp2MemOpTable0;
   RegOp2MemOpTableType RegOp2MemOpTable1;
   RegOp2MemOpTableType RegOp2MemOpTable2;
+  RegOp2MemOpTableType RegOp2MemOpTable3;
 
   /// MemOp2RegOpTable - Load / store unfolding opcode map.
   ///