[stack-protection] Add support for MSVC buffer security check

Summary:
This patch is adding support for the MSVC buffer security check implementation

The buffer security check is turned on with the '/GS' compiler switch.
  * https://msdn.microsoft.com/en-us/library/8dbf701c.aspx
  * To be added to clang here: http://reviews.llvm.org/D20347

Some overview of buffer security check feature and implementation:
  * https://msdn.microsoft.com/en-us/library/aa290051(VS.71).aspx
  * http://www.ksyash.com/2011/01/buffer-overflow-protection-3/
  * http://blog.osom.info/2012/02/understanding-vs-c-compilers-buffer.html


For the following example:
```
int example(int offset, int index) {
  char buffer[10];
  memset(buffer, 0xCC, index);
  return buffer[index];
}
```

The MSVC compiler is adding these instructions to perform stack integrity check:
```
        push        ebp  
        mov         ebp,esp  
        sub         esp,50h  
  [1]   mov         eax,dword ptr [__security_cookie (01068024h)]  
  [2]   xor         eax,ebp  
  [3]   mov         dword ptr [ebp-4],eax  
        push        ebx  
        push        esi  
        push        edi  
        mov         eax,dword ptr [index]  
        push        eax  
        push        0CCh  
        lea         ecx,[buffer]  
        push        ecx  
        call        _memset (010610B9h)  
        add         esp,0Ch  
        mov         eax,dword ptr [index]  
        movsx       eax,byte ptr buffer[eax]  
        pop         edi  
        pop         esi  
        pop         ebx  
  [4]   mov         ecx,dword ptr [ebp-4]  
  [5]   xor         ecx,ebp  
  [6]   call        @__security_check_cookie@4 (01061276h)  
        mov         esp,ebp  
        pop         ebp  
        ret  
```

The instrumentation above is:
  * [1] is loading the global security canary,
  * [3] is storing the local computed ([2]) canary to the guard slot,
  * [4] is loading the guard slot and ([5]) re-compute the global canary,
  * [6] is validating the resulting canary with the '__security_check_cookie' and performs error handling.

Overview of the current stack-protection implementation:
  * lib/CodeGen/StackProtector.cpp
    * There is a default stack-protection implementation applied on intermediate representation.
    * The target can overload 'getIRStackGuard' method if it has a standard location for the stack protector cookie.
    * An intrinsic 'Intrinsic::stackprotector' is added to the prologue. It will be expanded by the instruction selection pass (DAG or Fast).
    * Basic Blocks are added to every instrumented function to receive the code for handling stack guard validation and errors handling.
    * Guard manipulation and comparison are added directly to the intermediate representation.

  * lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
  * lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    * There is an implementation that adds instrumentation during instruction selection (for better handling of sibbling calls).
      * see long comment above 'class StackProtectorDescriptor' declaration.
    * The target needs to override 'getSDagStackGuard' to activate SDAG stack protection generation. (note: getIRStackGuard MUST be nullptr).
      * 'getSDagStackGuard' returns the appropriate stack guard (security cookie)
    * The code is generated by 'SelectionDAGBuilder.cpp' and 'SelectionDAGISel.cpp'.

  * include/llvm/Target/TargetLowering.h
    * Contains function to retrieve the default Guard 'Value'; should be overriden by each target to select which implementation is used and provide Guard 'Value'.

  * lib/Target/X86/X86ISelLowering.cpp
    * Contains the x86 specialisation; Guard 'Value' used by the SelectionDAG algorithm.

Function-based Instrumentation:
  * The MSVC doesn't inline the stack guard comparison in every function. Instead, a call to '__security_check_cookie' is added to the epilogue before every return instructions.
  * To support function-based instrumentation, this patch is
    * adding a function to get the function-based check (llvm 'Value', see include/llvm/Target/TargetLowering.h),
      * If provided, the stack protection instrumentation won't be inlined and a call to that function will be added to the prologue.
    * modifying (SelectionDAGISel.cpp) do avoid producing basic blocks used for inline instrumentation,
    * generating the function-based instrumentation during the ISEL pass (SelectionDAGBuilder.cpp),
    * if FastISEL (not SelectionDAG), using the fallback which rely on the same function-based implemented over intermediate representation (StackProtector.cpp).

Modifications
  * adding support for MSVC (lib/Target/X86/X86ISelLowering.cpp)
  * adding support function-based instrumentation (lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp, .h)

Results

  * IR generated instrumentation:
```
clang-cl /GS test.cc /Od /c -mllvm -print-isel-input
```

```
*** Final LLVM Code input to ISel ***

; Function Attrs: nounwind sspstrong
define i32 @"\01?example@@YAHHH@Z"(i32 %offset, i32 %index) #0 {
entry:
  %StackGuardSlot = alloca i8*                                                  <<<-- Allocated guard slot
  %0 = call i8* @llvm.stackguard()                                              <<<-- Loading Stack Guard value
  call void @llvm.stackprotector(i8* %0, i8** %StackGuardSlot)                  <<<-- Prologue intrinsic call (store to Guard slot)
  %index.addr = alloca i32, align 4
  %offset.addr = alloca i32, align 4
  %buffer = alloca [10 x i8], align 1
  store i32 %index, i32* %index.addr, align 4
  store i32 %offset, i32* %offset.addr, align 4
  %arraydecay = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 0
  %1 = load i32, i32* %index.addr, align 4
  call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -52, i32 %1, i32 1, i1 false)
  %2 = load i32, i32* %index.addr, align 4
  %arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buffer, i32 0, i32 %2
  %3 = load i8, i8* %arrayidx, align 1
  %conv = sext i8 %3 to i32
  %4 = load volatile i8*, i8** %StackGuardSlot                                  <<<-- Loading Guard slot
  call void @__security_check_cookie(i8* %4)                                    <<<-- Epilogue function-based check
  ret i32 %conv
}
```

  * SelectionDAG generated instrumentation:

```
clang-cl /GS test.cc /O1 /c /FA
```

```
"?example@@YAHHH@Z":                    # @"\01?example@@YAHHH@Z"
# BB#0:                                 # %entry
        pushl   %esi
        subl    $16, %esp
        movl    ___security_cookie, %eax                                        <<<-- Loading Stack Guard value
        movl    28(%esp), %esi
        movl    %eax, 12(%esp)                                                  <<<-- Store to Guard slot
        leal    2(%esp), %eax
        pushl   %esi
        pushl   $204
        pushl   %eax
        calll   _memset
        addl    $12, %esp
        movsbl  2(%esp,%esi), %esi
        movl    12(%esp), %ecx                                                  <<<-- Loading Guard slot
        calll   @__security_check_cookie@4                                      <<<-- Epilogue function-based check
        movl    %esi, %eax
        addl    $16, %esp
        popl    %esi
        retl
```

Reviewers: kcc, pcc, eugenis, rnk

Subscribers: majnemer, llvm-commits, hans, thakis, rnk

Differential Revision: http://reviews.llvm.org/D20346

llvm-svn: 272053
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 2c4f37a..6a727a1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2040,6 +2040,40 @@
   const Module &M = *ParentBB->getParent()->getFunction()->getParent();
   unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
 
+  // Generate code to load the content of the guard slot.
+  SDValue StackSlot = DAG.getLoad(
+      PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
+      false, false, Align);
+
+  // Retrieve guard check function, nullptr if instrumentation is inlined.
+  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
+    // The target provides a guard check function to validate the guard value.
+    // Generate a call to that function with the content of the guard slot as
+    // argument.
+    auto *Fn = cast<Function>(GuardCheck);
+    FunctionType *FnTy = Fn->getFunctionType();
+    assert(FnTy->getNumParams() == 1 && "Invalid function signature");
+
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    Entry.Node = StackSlot;
+    Entry.Ty = FnTy->getParamType(0);
+    if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
+      Entry.isInReg = true;
+    Args.push_back(Entry);
+
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(getCurSDLoc())
+      .setChain(DAG.getEntryNode())
+      .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
+                 getValue(GuardCheck), std::move(Args));
+
+    std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
+    DAG.setRoot(Result.second);
+    return;
+  }
+
   // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
   // Otherwise, emit a volatile load to retrieve the stack guard value.
   SDValue Chain = DAG.getEntryNode();
@@ -2054,11 +2088,6 @@
                     true, false, false, Align);
   }
 
-  SDValue StackSlot = DAG.getLoad(
-      PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
-      MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), true,
-      false, false, Align);
-
   // Perform the comparison via a subtract/getsetcc.
   EVT VT = Guard.getValueType();
   SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index a7d2969..68af2d9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -456,7 +456,14 @@
   ///
   ///     c. After we finish selecting the basic block, in FinishBasicBlock if
   ///        the StackProtectorDescriptor attached to the SelectionDAGBuilder is
-  ///        initialized, we first find a splice point in the parent basic block
+  ///        initialized, we produce the validation code with one of these
+  ///        techniques:
+  ///          1) with a call to a guard check function
+  ///          2) with inlined instrumentation
+  ///
+  ///        1) We insert a call to the check function before the terminator.
+  ///
+  ///        2) We first find a splice point in the parent basic block
   ///        before the terminator and then splice the terminator of said basic
   ///        block into the success basic block. Then we code-gen a new tail for
   ///        the parent basic block consisting of the two loads, the comparison,
@@ -475,15 +482,22 @@
       return ParentMBB && SuccessMBB && FailureMBB;
     }
 
+    bool shouldEmitFunctionBasedCheckStackProtector() const {
+      return ParentMBB && !SuccessMBB && !FailureMBB;
+    }
+
     /// Initialize the stack protector descriptor structure for a new basic
     /// block.
-    void initialize(const BasicBlock *BB, MachineBasicBlock *MBB) {
+    void initialize(const BasicBlock *BB, MachineBasicBlock *MBB,
+                    bool FunctionBasedInstrumentation) {
       // Make sure we are not initialized yet.
       assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is "
              "already initialized!");
       ParentMBB = MBB;
-      SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
-      FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+      if (!FunctionBasedInstrumentation) {
+        SuccessMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ true);
+        FailureMBB = AddSuccessorMBB(BB, MBB, /* IsLikely */ false, FailureMBB);
+      }
     }
 
     /// Reset state that changes when we handle different basic blocks.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 0ae1a95..918c013 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1463,8 +1463,12 @@
         LowerArguments(Fn);
       }
     }
-    if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB))
-      SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB]);
+    if (getAnalysis<StackProtector>().shouldEmitSDCheck(*LLVMBB)) {
+      bool FunctionBasedInstrumentation =
+          TLI->getSSPStackGuardCheck(*Fn.getParent());
+      SDB->SPDescriptor.initialize(LLVMBB, FuncInfo->MBBMap[LLVMBB],
+                                   FunctionBasedInstrumentation);
+    }
 
     if (Begin != BI)
       ++NumDAGBlocks;
@@ -1552,7 +1556,7 @@
 /// terminator, but additionally the copies that move the vregs into the
 /// physical registers.
 static MachineBasicBlock::iterator
-FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) {
+FindSplitPointForStackProtector(MachineBasicBlock *BB) {
   MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator();
   //
   if (SplitPoint == BB->begin())
@@ -1593,7 +1597,23 @@
   }
 
   // Handle stack protector.
-  if (SDB->SPDescriptor.shouldEmitStackProtector()) {
+  if (SDB->SPDescriptor.shouldEmitFunctionBasedCheckStackProtector()) {
+    // The target provides a guard check function. There is no need to
+    // generate error handling code or to split current basic block.
+    MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
+
+    // Add load and check to the basicblock.
+    FuncInfo->MBB = ParentMBB;
+    FuncInfo->InsertPt =
+        FindSplitPointForStackProtector(ParentMBB);
+    SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB);
+    CurDAG->setRoot(SDB->getRoot());
+    SDB->clear();
+    CodeGenAndEmitDAG();
+
+    // Clear the Per-BB State.
+    SDB->SPDescriptor.resetPerBBState();
+  } else if (SDB->SPDescriptor.shouldEmitStackProtector()) {
     MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB();
     MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB();
 
@@ -1604,7 +1624,7 @@
     // register allocation issues caused by us splitting the parent mbb. The
     // register allocator will clean up said virtual copies later on.
     MachineBasicBlock::iterator SplitPoint =
-      FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc());
+        FindSplitPointForStackProtector(ParentMBB);
 
     // Splice the terminator of ParentMBB into SuccessMBB.
     SuccessMBB->splice(SuccessMBB->end(), ParentMBB,
diff --git a/llvm/lib/CodeGen/StackProtector.cpp b/llvm/lib/CodeGen/StackProtector.cpp
index 8a56c6a..5f58c04 100644
--- a/llvm/lib/CodeGen/StackProtector.cpp
+++ b/llvm/lib/CodeGen/StackProtector.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/Passes.h"
@@ -95,11 +96,19 @@
   Attribute Attr = Fn.getFnAttribute("stack-protector-buffer-size");
   if (Attr.isStringAttribute() &&
       Attr.getValueAsString().getAsInteger(10, SSPBufferSize))
-      return false; // Invalid integer string
+    return false; // Invalid integer string
 
   if (!RequiresStackProtector())
     return false;
 
+  // TODO(etienneb): Functions with funclets are not correctly supported now.
+  // Do nothing if this is funclet-based personality.
+  if (Fn.hasPersonalityFn()) {
+    EHPersonality Personality = classifyEHPersonality(Fn.getPersonalityFn());
+    if (isFuncletEHPersonality(Personality))
+      return false;
+  }
+
   ++NumFunProtected;
   return InsertStackProtectors();
 }
@@ -313,9 +322,9 @@
   PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext());
   AI = B.CreateAlloca(PtrTy, nullptr, "StackGuardSlot");
 
-  Value *Guard = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
+  Value *GuardSlot = getStackGuard(TLI, M, B, &SupportsSelectionDAGSP);
   B.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
-               {Guard, AI});
+               {GuardSlot, AI});
   return SupportsSelectionDAGSP;
 }
 
@@ -336,12 +345,36 @@
     if (!RI)
       continue;
 
+    // Generate prologue instrumentation if not already generated.
     if (!HasPrologue) {
       HasPrologue = true;
       SupportsSelectionDAGSP &= CreatePrologue(F, M, RI, TLI, AI);
     }
 
-    if (!SupportsSelectionDAGSP) {
+    // SelectionDAG based code generation. Nothing else needs to be done here.
+    // The epilogue instrumentation is postponed to SelectionDAG.
+    if (SupportsSelectionDAGSP)
+      break;
+
+    // Set HasIRCheck to true, so that SelectionDAG will not generate its own
+    // version. SelectionDAG called 'shouldEmitSDCheck' to check whether
+    // instrumentation has already been generated.
+    HasIRCheck = true;
+
+    // Generate epilogue instrumentation. The epilogue intrumentation can be
+    // function-based or inlined depending on which mechanism the target is
+    // providing.
+    if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
+      // Generate the function-based epilogue instrumentation.
+      // The target provides a guard check function, generate a call to it.
+      IRBuilder<> B(RI);
+      LoadInst *Guard = B.CreateLoad(AI, true, "Guard");
+      CallInst *Call = B.CreateCall(GuardCheck, {Guard});
+      llvm::Function *Function = cast<llvm::Function>(GuardCheck);
+      Call->setAttributes(Function->getAttributes());
+      Call->setCallingConv(Function->getCallingConv());
+    } else {
+      // Generate the epilogue with inline instrumentation.
       // If we do not support SelectionDAG based tail calls, generate IR level
       // tail calls.
       //
@@ -372,10 +405,6 @@
       // fail BB generated by the stack protector pseudo instruction.
       BasicBlock *FailBB = CreateFailBB();
 
-      // Set HasIRCheck to true, so that SelectionDAG will not generate its own
-      // version.
-      HasIRCheck = true;
-
       // Split the basic block before the return instruction.
       BasicBlock *NewBB = BB->splitBasicBlock(RI->getIterator(), "SP_return");
 
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 97f85f5..67b2ee9 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -898,7 +898,7 @@
       setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
     }
 
-    // For most targets @llvm.get.dynamic.area.offest just returns 0.
+    // For most targets @llvm.get.dynamic.area.offset just returns 0.
     setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
   }
 
@@ -1833,3 +1833,7 @@
 Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
   return M.getGlobalVariable("__stack_chk_guard");
 }
+
+Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+  return nullptr;
+}