First round of support for doing scalar FP using the SSE2 ISA extension and XMM registers. There are many known deficiencies and fixmes, which will be addressed ASAP. The major benefit of this work is that it will allow the LLVM register allocator to allocate FP registers across basic blocks. The x86 backend will still default to x87 style FP. To enable this work, you must pass -enable-sse-scalar-fp and either -sse2 or -sse3 to llc. An example before and after would be for: double foo(double *P) { double Sum = 0; int i; for (i = 0; i < 1000; ++i) Sum += P[i]; return Sum; } The inner loop looks like the following: x87: .LBB_foo_1: # no_exit fldl (%esp) faddl (%eax,%ecx,8) fstpl (%esp) incl %ecx cmpl $1000, %ecx #FP_REG_KILL jne .LBB_foo_1 # no_exit SSE2: addsd (%eax,%ecx,8), %xmm0 incl %ecx cmpl $1000, %ecx #FP_REG_KILL jne .LBB_foo_1 # no_exit git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@22340 91177308-0d34-0410-b5e6-96231b3b80d8

commit: f63be7d3959939b2ffaf0bba5519b71216ec9ee6 [log] [tgz]
author: Nate Begeman <natebegeman@mac.com> Wed Jul 06 18:59:04 2005 +0000
committer: Nate Begeman <natebegeman@mac.com> Wed Jul 06 18:59:04 2005 +0000
tree: d166173a3bc9390065664b66967fce46dbf794e6
parent: e0fe225e8912b6308e3e1db442ba7b96d9f09ff3 [diff] [blame]
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 08920cc..230debf 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp

@@ -52,6 +52,7 @@
   case 32: return 2;
   case 64: return 3;   // FP in 64-bit spill mode.
   case 80: return 4;   // FP in 80-bit spill mode.
+  case 128: return 5;  // XMM reg in 128 bit mode.
   }
 }
 
@@ -59,18 +60,24 @@
                                           MachineBasicBlock::iterator MI,
                                           unsigned SrcReg, int FrameIdx) const {
   static const unsigned Opcode[] =
-    { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST64m, X86::FSTP80m };
+    { X86::MOV8mr, X86::MOV16mr, X86::MOV32mr, X86::FST64m, X86::FSTP80m,
+      X86::MOVAPDmr };
   unsigned Idx = getIdx(getSpillSize(SrcReg));
-  addFrameReference(BuildMI(MBB, MI, Opcode[Idx], 5), FrameIdx).addReg(SrcReg);
+  unsigned Opc = Opcode[Idx];
+  if (X86ScalarSSE && Opc == X86::FST64m) Opc = X86::MOVSDmr;
+  addFrameReference(BuildMI(MBB, MI, Opc, 5), FrameIdx).addReg(SrcReg);
 }
 
 void X86RegisterInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                            MachineBasicBlock::iterator MI,
                                            unsigned DestReg, int FrameIdx)const{
   static const unsigned Opcode[] =
-    { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD64m, X86::FLD80m };
+    { X86::MOV8rm, X86::MOV16rm, X86::MOV32rm, X86::FLD64m, X86::FLD80m,
+      X86::MOVAPDrm };
   unsigned Idx = getIdx(getSpillSize(DestReg));
-  addFrameReference(BuildMI(MBB, MI, Opcode[Idx], 4, DestReg), FrameIdx);
+  unsigned Opc = Opcode[Idx];
+  if (X86ScalarSSE && Opc == X86::FLD64m) Opc = X86::MOVSDrm;
+  addFrameReference(BuildMI(MBB, MI, Opc, 4, DestReg), FrameIdx);
 }
 
 void X86RegisterInfo::copyRegToReg(MachineBasicBlock &MBB,
@@ -78,8 +85,11 @@
                                    unsigned DestReg, unsigned SrcReg,
                                    const TargetRegisterClass *RC) const {
   static const unsigned Opcode[] =
-    { X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::FpMOV };
-  BuildMI(MBB, MI, Opcode[getIdx(RC->getSize()*8)], 1, DestReg).addReg(SrcReg);
+    { X86::MOV8rr, X86::MOV16rr, X86::MOV32rr, X86::FpMOV, X86::FpMOV,
+      X86::MOVAPDrr };
+  unsigned Opc = Opcode[getIdx(RC->getSize()*8)];
+  if (X86ScalarSSE && Opc == X86::FpMOV) Opc = X86::MOVAPDrr;
+  BuildMI(MBB, MI, Opc, 1, DestReg).addReg(SrcReg);
 }
 
 static MachineInstr *MakeMInst(unsigned Opcode, unsigned FrameIndex,
commit	f63be7d3959939b2ffaf0bba5519b71216ec9ee6	[log] [tgz]
author	Nate Begeman <natebegeman@mac.com>	Wed Jul 06 18:59:04 2005 +0000
committer	Nate Begeman <natebegeman@mac.com>	Wed Jul 06 18:59:04 2005 +0000
tree	d166173a3bc9390065664b66967fce46dbf794e6
parent	e0fe225e8912b6308e3e1db442ba7b96d9f09ff3 [diff] [blame]