Revert r86077 because it caused crashes in 179.art and 175.vpr on ARM

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@86213 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index f4eb793..e710350 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Analysis/ConstantFolding.h"
-#include "llvm/Target/TargetData.h"
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -97,47 +96,45 @@
   if (!CI)
     return NULL;
 
-  // The size of the malloc's result type must be known to determine array size.
+  // Type must be known to determine array size.
   const Type *T = getMallocAllocatedType(CI);
-  if (!T || !T->isSized() || !TD)
+  if (!T)
     return NULL;
 
   Value *MallocArg = CI->getOperand(1);
-  const Type *ArgType = MallocArg->getType();
   ConstantExpr *CO = dyn_cast<ConstantExpr>(MallocArg);
   BinaryOperator *BO = dyn_cast<BinaryOperator>(MallocArg);
 
-  unsigned ElementSizeInt = TD->getTypeAllocSize(T);
-  if (const StructType *ST = dyn_cast<StructType>(T))
-    ElementSizeInt = TD->getStructLayout(ST)->getSizeInBytes();
-  Constant *ElementSize = ConstantInt::get(ArgType, ElementSizeInt);
+  Constant *ElementSize = ConstantExpr::getSizeOf(T);
+  ElementSize = ConstantExpr::getTruncOrBitCast(ElementSize, 
+                                                MallocArg->getType());
+  Constant *FoldedElementSize =
+   ConstantFoldConstantExpression(cast<ConstantExpr>(ElementSize), Context, TD);
 
   // First, check if CI is a non-array malloc.
-  if (CO && CO == ElementSize)
+  if (CO && ((CO == ElementSize) ||
+             (FoldedElementSize && (CO == FoldedElementSize))))
     // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
-    return ConstantInt::get(ArgType, 1);
+    return ConstantInt::get(MallocArg->getType(), 1);
 
   // Second, check if CI is an array malloc whose array size can be determined.
-  if (isConstantOne(ElementSize))
+  if (isConstantOne(ElementSize) || 
+      (FoldedElementSize && isConstantOne(FoldedElementSize)))
     return MallocArg;
 
-  if (ConstantInt *CInt = dyn_cast<ConstantInt>(MallocArg))
-    if (CInt->getZExtValue() % ElementSizeInt == 0)
-      return ConstantInt::get(ArgType, CInt->getZExtValue() / ElementSizeInt);
-
   if (!CO && !BO)
     return NULL;
 
   Value *Op0 = NULL;
   Value *Op1 = NULL;
   unsigned Opcode = 0;
-  if (CO && ((CO->getOpcode() == Instruction::Mul) ||
+  if (CO && ((CO->getOpcode() == Instruction::Mul) || 
              (CO->getOpcode() == Instruction::Shl))) {
     Op0 = CO->getOperand(0);
     Op1 = CO->getOperand(1);
     Opcode = CO->getOpcode();
   }
-  if (BO && ((BO->getOpcode() == Instruction::Mul) ||
+  if (BO && ((BO->getOpcode() == Instruction::Mul) || 
              (BO->getOpcode() == Instruction::Shl))) {
     Op0 = BO->getOperand(0);
     Op1 = BO->getOperand(1);
@@ -147,10 +144,12 @@
   // Determine array size if malloc's argument is the product of a mul or shl.
   if (Op0) {
     if (Opcode == Instruction::Mul) {
-      if (Op1 == ElementSize)
+      if ((Op1 == ElementSize) ||
+          (FoldedElementSize && (Op1 == FoldedElementSize)))
         // ArraySize * ElementSize
         return Op0;
-      if (Op0 == ElementSize)
+      if ((Op0 == ElementSize) ||
+          (FoldedElementSize && (Op0 == FoldedElementSize)))
         // ElementSize * ArraySize
         return Op1;
     }
@@ -162,10 +161,11 @@
       uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
       Value *Op1Pow = ConstantInt::get(Context, 
                                   APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
-      if (Op0 == ElementSize)
+      if (Op0 == ElementSize || (FoldedElementSize && Op0 == FoldedElementSize))
         // ArraySize << log2(ElementSize)
         return Op1Pow;
-      if (Op1Pow == ElementSize)
+      if (Op1Pow == ElementSize ||
+          (FoldedElementSize && Op1Pow == FoldedElementSize))
         // ElementSize << log2(ArraySize)
         return Op0;
     }
@@ -205,41 +205,35 @@
 }
 
 /// getMallocType - Returns the PointerType resulting from the malloc call.
-/// The PointerType depends on the number of bitcast uses of the malloc call:
-///   0: PointerType is the calls' return type.
-///   1: PointerType is the bitcast's result type.
-///  >1: Unique PointerType cannot be determined, return NULL.
+/// This PointerType is the result type of the call's only bitcast use.
+/// If there is no unique bitcast use, then return NULL.
 const PointerType *llvm::getMallocType(const CallInst *CI) {
   assert(isMalloc(CI) && "GetMallocType and not malloc call");
   
-  const PointerType *MallocType = NULL;
-  unsigned NumOfBitCastUses = 0;
-
+  const BitCastInst *BCI = NULL;
+  
   // Determine if CallInst has a bitcast use.
   for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
        UI != E; )
-    if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
-      MallocType = cast<PointerType>(BCI->getDestTy());
-      NumOfBitCastUses++;
-    }
+    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
+      break;
 
-  // Malloc call has 1 bitcast use, so type is the bitcast's destination type.
-  if (NumOfBitCastUses == 1)
-    return MallocType;
+  // Malloc call has 1 bitcast use and no other uses, so type is the bitcast's
+  // destination type.
+  if (BCI && CI->hasOneUse())
+    return cast<PointerType>(BCI->getDestTy());
 
   // Malloc call was not bitcast, so type is the malloc function's return type.
-  if (NumOfBitCastUses == 0)
+  if (!BCI)
     return cast<PointerType>(CI->getType());
 
   // Type could not be determined.
   return NULL;
 }
 
-/// getMallocAllocatedType - Returns the Type allocated by malloc call.
-/// The Type depends on the number of bitcast uses of the malloc call:
-///   0: PointerType is the malloc calls' return type.
-///   1: PointerType is the bitcast's result type.
-///  >1: Unique PointerType cannot be determined, return NULL.
+/// getMallocAllocatedType - Returns the Type allocated by malloc call. This
+/// Type is the result type of the call's only bitcast use. If there is no
+/// unique bitcast use, then return NULL.
 const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
   const PointerType *PT = getMallocType(CI);
   return PT ? PT->getElementType() : NULL;
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 63af42d..b53d9383 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -3619,14 +3619,12 @@
   // Autoupgrade old malloc instruction to malloc call.
   // FIXME: Remove in LLVM 3.0.
   const Type *IntPtrTy = Type::getInt32Ty(Context);
-  Constant *AllocSize = ConstantExpr::getSizeOf(Ty);
-  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy);
   if (!MallocF)
     // Prototype malloc as "void *(int32)".
     // This function is renamed as "malloc" in ValidateEndOfModule().
     MallocF = cast<Function>(
        M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL));
-  Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF);
+  Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, Size, MallocF);
   return false;
 }
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 9916388..68527e3 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2101,10 +2101,8 @@
       if (!Ty || !Size) return Error("Invalid MALLOC record");
       if (!CurBB) return Error("Invalid malloc instruction with no BB");
       const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext());
-      Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType());
-      AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty);
       I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(),
-                                 AllocSize, Size, NULL);
+                                 Size, NULL);
       InstructionList.push_back(I);
       break;
     }
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 234d0ec..5dab9ef 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -822,42 +822,32 @@
 /// malloc into a global, and any loads of GV as uses of the new global.
 static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
                                                      CallInst *CI,
-                                                     const Type *AllocTy,
+                                                     BitCastInst *BCI,
                                                      Value* NElems,
                                                      LLVMContext &Context,
                                                      TargetData* TD) {
-  DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << "  CALL = " << *CI << '\n');
+  DEBUG(errs() << "PROMOTING MALLOC GLOBAL: " << *GV
+               << "  CALL = " << *CI << "  BCI = " << *BCI << '\n');
 
   const Type *IntPtrTy = TD->getIntPtrType(Context);
   
-  // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
-  // returned NULL and we would not be here).
-  BitCastInst *BCI = NULL;
-  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
-    if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
-      break;
-
   ConstantInt *NElements = cast<ConstantInt>(NElems);
   if (NElements->getZExtValue() != 1) {
     // If we have an array allocation, transform it to a single element
     // allocation to make the code below simpler.
-    Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
-    unsigned TypeSize = TD->getTypeAllocSize(NewTy);
-    if (const StructType *ST = dyn_cast<StructType>(NewTy))
-      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
-    Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
-                                         ConstantInt::get(IntPtrTy, TypeSize));
+    Type *NewTy = ArrayType::get(getMallocAllocatedType(CI),
+                                 NElements->getZExtValue());
+    Value* NewM = CallInst::CreateMalloc(CI, IntPtrTy, NewTy);
+    Instruction* NewMI = cast<Instruction>(NewM);
     Value* Indices[2];
     Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
-    Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
-                                              NewCI->getName()+".el0", CI);
-    Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
-    if (BCI) BCI->replaceAllUsesWith(NewGEP);
-    CI->replaceAllUsesWith(Cast);
-    if (BCI) BCI->eraseFromParent();
+    Value *NewGEP = GetElementPtrInst::Create(NewMI, Indices, Indices + 2,
+                                              NewMI->getName()+".el0", CI);
+    BCI->replaceAllUsesWith(NewGEP);
+    BCI->eraseFromParent();
     CI->eraseFromParent();
-    BCI = dyn_cast<BitCastInst>(NewCI);
-    CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);
+    BCI = cast<BitCastInst>(NewMI);
+    CI = extractMallocCallFromBitCast(NewMI);
   }
 
   // Create the new global variable.  The contents of the malloc'd memory is
@@ -871,9 +861,8 @@
                                              GV,
                                              GV->isThreadLocal());
   
-  // Anything that used the malloc or its bitcast now uses the global directly.
-  if (BCI) BCI->replaceAllUsesWith(NewGV);
-  CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));
+  // Anything that used the malloc now uses the global directly.
+  BCI->replaceAllUsesWith(NewGV);
 
   Constant *RepValue = NewGV;
   if (NewGV->getType() != GV->getType()->getElementType())
@@ -941,9 +930,9 @@
     GV->getParent()->getGlobalList().insert(GV, InitBool);
 
 
-  // Now the GV is dead, nuke it and the malloc (both CI and BCI).
+  // Now the GV is dead, nuke it and the malloc.
   GV->eraseFromParent();
-  if (BCI) BCI->eraseFromParent();
+  BCI->eraseFromParent();
   CI->eraseFromParent();
 
   // To further other optimizations, loop over all users of NewGV and try to
@@ -1284,10 +1273,13 @@
 
 /// PerformHeapAllocSRoA - CI is an allocation of an array of structures.  Break
 /// it up into multiple allocations of arrays of the fields.
-static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
-                                            Value* NElems, LLVMContext &Context,
+static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV,
+                                            CallInst *CI, BitCastInst* BCI,
+                                            Value* NElems,
+                                            LLVMContext &Context,
                                             TargetData *TD) {
-  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC = " << *CI << '\n');
+  DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << "  MALLOC CALL = " << *CI 
+               << " BITCAST = " << *BCI << '\n');
   const Type* MAT = getMallocAllocatedType(CI);
   const StructType *STy = cast<StructType>(MAT);
 
@@ -1295,8 +1287,8 @@
   // it into GV).  If there are other uses, change them to be uses of
   // the global to simplify later code.  This also deletes the store
   // into GV.
-  ReplaceUsesOfMallocWithGlobal(CI, GV);
-
+  ReplaceUsesOfMallocWithGlobal(BCI, GV);
+  
   // Okay, at this point, there are no users of the malloc.  Insert N
   // new mallocs at the same place as CI, and N globals.
   std::vector<Value*> FieldGlobals;
@@ -1314,16 +1306,11 @@
                          GV->isThreadLocal());
     FieldGlobals.push_back(NGV);
     
-    unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
-    if (const StructType* ST = dyn_cast<StructType>(FieldTy))
-      TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
-    const Type* IntPtrTy = TD->getIntPtrType(Context);
-    Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
-                                        ConstantInt::get(IntPtrTy, TypeSize),
-                                        NElems,
-                                        CI->getName() + ".f" + Twine(FieldNo));
+    Value *NMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+                                        FieldTy, NElems,
+                                        BCI->getName() + ".f" + Twine(FieldNo));
     FieldMallocs.push_back(NMI);
-    new StoreInst(NMI, NGV, CI);
+    new StoreInst(NMI, NGV, BCI);
   }
   
   // The tricky aspect of this transformation is handling the case when malloc
@@ -1340,18 +1327,18 @@
   //    }
   Value *RunningOr = 0;
   for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
-    Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
-                             Constant::getNullValue(FieldMallocs[i]->getType()),
-                               "isnull");
+    Value *Cond = new ICmpInst(BCI, ICmpInst::ICMP_EQ, FieldMallocs[i],
+                              Constant::getNullValue(FieldMallocs[i]->getType()),
+                                  "isnull");
     if (!RunningOr)
       RunningOr = Cond;   // First seteq
     else
-      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+      RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", BCI);
   }
 
   // Split the basic block at the old malloc.
-  BasicBlock *OrigBB = CI->getParent();
-  BasicBlock *ContBB = OrigBB->splitBasicBlock(CI, "malloc_cont");
+  BasicBlock *OrigBB = BCI->getParent();
+  BasicBlock *ContBB = OrigBB->splitBasicBlock(BCI, "malloc_cont");
   
   // Create the block to check the first condition.  Put all these blocks at the
   // end of the function as they are unlikely to be executed.
@@ -1387,8 +1374,9 @@
   }
   
   BranchInst::Create(ContBB, NullPtrBlock);
-
-  // CI is no longer needed, remove it.
+  
+  // CI and BCI are no longer needed, remove them.
+  BCI->eraseFromParent();
   CI->eraseFromParent();
 
   /// InsertedScalarizedLoads - As we process loads, if we can't immediately
@@ -1475,10 +1463,14 @@
 /// cast of malloc.
 static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
                                                CallInst *CI,
-                                               const Type *AllocTy,
+                                               BitCastInst *BCI,
                                                Module::global_iterator &GVI,
                                                TargetData *TD,
                                                LLVMContext &Context) {
+  // If we can't figure out the type being malloced, then we can't optimize.
+  const Type *AllocTy = getMallocAllocatedType(CI);
+  assert(AllocTy);
+
   // If this is a malloc of an abstract type, don't touch it.
   if (!AllocTy->isSized())
     return false;
@@ -1499,7 +1491,7 @@
   // for.
   {
     SmallPtrSet<PHINode*, 8> PHIs;
-    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(CI, GV, PHIs))
+    if (!ValueIsOnlyUsedLocallyOrStoredToOneGlobal(BCI, GV, PHIs))
       return false;
   }  
 
@@ -1507,16 +1499,16 @@
   // transform the program to use global memory instead of malloc'd memory.
   // This eliminates dynamic allocation, avoids an indirection accessing the
   // data, and exposes the resultant global to further GlobalOpt.
+  Value *NElems = getMallocArraySize(CI, Context, TD);
   // We cannot optimize the malloc if we cannot determine malloc array size.
-  if (Value *NElems = getMallocArraySize(CI, Context, TD)) {
+  if (NElems) {
     if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
       // Restrict this transformation to only working on small allocations
       // (2048 bytes currently), as we don't want to introduce a 16M global or
       // something.
       if (TD && 
           NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
-        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems,
-                                            Context, TD);
+        GVI = OptimizeGlobalAddressOfMalloc(GV, CI, BCI, NElems, Context, TD);
         return true;
       }
   
@@ -1534,29 +1526,26 @@
       // This the structure has an unreasonable number of fields, leave it
       // alone.
       if (AllocSTy->getNumElements() <= 16 && AllocSTy->getNumElements() != 0 &&
-          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, CI)) {
+          AllGlobalLoadUsesSimpleEnoughForHeapSRA(GV, BCI)) {
 
         // If this is a fixed size array, transform the Malloc to be an alloc of
         // structs.  malloc [100 x struct],1 -> malloc struct, 100
         if (const ArrayType *AT =
                               dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
-          const Type *IntPtrTy = TD->getIntPtrType(Context);
-          unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
-          Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
-          Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
-          Instruction *Malloc = CallInst::CreateMalloc(CI, IntPtrTy, AllocSTy,
-                                                       AllocSize, NumElements,
-                                                       CI->getName());
-          Instruction *Cast = new BitCastInst(Malloc, CI->getType(), "tmp", CI);
-          CI->replaceAllUsesWith(Cast);
+          Value* NumElements = ConstantInt::get(Type::getInt32Ty(Context),
+                                                AT->getNumElements());
+          Value* NewMI = CallInst::CreateMalloc(CI, TD->getIntPtrType(Context),
+                                                AllocSTy, NumElements,
+                                                BCI->getName());
+          Value *Cast = new BitCastInst(NewMI, getMallocType(CI), "tmp", CI);
+          BCI->replaceAllUsesWith(Cast);
+          BCI->eraseFromParent();
           CI->eraseFromParent();
-          CI = dyn_cast<BitCastInst>(Malloc) ?
-               extractMallocCallFromBitCast(Malloc):
-               cast<CallInst>(Malloc);
+          BCI = cast<BitCastInst>(NewMI);
+          CI = extractMallocCallFromBitCast(NewMI);
         }
       
-        GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD), 
-                                   Context, TD);
+        GVI = PerformHeapAllocSRoA(GV, CI, BCI, NElems, Context, TD);
         return true;
       }
     }
@@ -1588,10 +1577,15 @@
       if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
         return true;
     } else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
-      const Type* MallocType = getMallocAllocatedType(CI);
-      if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType, 
-                                                           GVI, TD, Context))
-        return true;
+      if (getMallocAllocatedType(CI)) {
+        BitCastInst* BCI = NULL;
+        for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
+             UI != E; )
+          BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++));
+        if (BCI &&
+            TryToOptimizeStoreOfMallocToGlobal(GV, CI, BCI, GVI, TD, Context))
+          return true;
+      }
     }
   }
 
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 1a34180..9a49d42 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1699,24 +1699,18 @@
 
 LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
                              const char *Name) {
-  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
-  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
-  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
-  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
-                                               ITy, unwrap(Ty), AllocSize, 
-                                               0, 0, "");
-  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+  const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  return wrap(unwrap(B)->Insert(CallInst::CreateMalloc(
+      unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), 0, 0, ""),
+      Twine(Name)));
 }
 
 LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty,
                                   LLVMValueRef Val, const char *Name) {
-  const Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
-  Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty));
-  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy);
-  Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), 
-                                               ITy, unwrap(Ty), AllocSize, 
-                                               unwrap(Val), 0, "");
-  return wrap(unwrap(B)->Insert(Malloc, Twine(Name)));
+  const Type* IntPtrT = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext());
+  return wrap(unwrap(B)->Insert(CallInst::CreateMalloc(
+      unwrap(B)->GetInsertBlock(), IntPtrT, unwrap(Ty), unwrap(Val), 0, ""),
+      Twine(Name)));
 }
 
 LLVMValueRef LLVMBuildAlloca(LLVMBuilderRef B, LLVMTypeRef Ty,
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 279bc73..52d8735 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Support/CallSite.h"
 #include "llvm/Support/ConstantRange.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetData.h"
 
 using namespace llvm;
 
@@ -449,11 +448,22 @@
   return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
 }
 
+static Value *checkArraySize(Value *Amt, const Type *IntPtrTy) {
+  if (!Amt)
+    Amt = ConstantInt::get(IntPtrTy, 1);
+  else {
+    assert(!isa<BasicBlock>(Amt) &&
+           "Passed basic block into malloc size parameter! Use other ctor");
+    assert(Amt->getType() == IntPtrTy &&
+           "Malloc array size is not an intptr!");
+  }
+  return Amt;
+}
+
 static Instruction *createMalloc(Instruction *InsertBefore,
                                  BasicBlock *InsertAtEnd, const Type *IntPtrTy,
-                                 const Type *AllocTy, Value *AllocSize, 
-                                 Value *ArraySize, Function *MallocF,
-                                 const Twine &Name) {
+                                 const Type *AllocTy, Value *ArraySize,
+                                 Function *MallocF, const Twine &NameStr) {
   assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
          "createMalloc needs either InsertBefore or InsertAtEnd");
 
@@ -461,14 +471,10 @@
   //       bitcast (i8* malloc(typeSize)) to type*
   // malloc(type, arraySize) becomes:
   //       bitcast (i8 *malloc(typeSize*arraySize)) to type*
-  if (!ArraySize)
-    ArraySize = ConstantInt::get(IntPtrTy, 1);
-  else if (ArraySize->getType() != IntPtrTy) {
-    if (InsertBefore)
-      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertBefore);
-    else
-      ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertAtEnd);
-  }
+  Value *AllocSize = ConstantExpr::getSizeOf(AllocTy);
+  AllocSize = ConstantExpr::getTruncOrBitCast(cast<Constant>(AllocSize),
+                                              IntPtrTy);
+  ArraySize = checkArraySize(ArraySize, IntPtrTy);
 
   if (!IsConstantOne(ArraySize)) {
     if (IsConstantOne(AllocSize)) {
@@ -507,14 +513,14 @@
     Result = MCall;
     if (Result->getType() != AllocPtrType)
       // Create a cast instruction to convert to the right type...
-      Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
+      Result = new BitCastInst(MCall, AllocPtrType, NameStr, InsertBefore);
   } else {
     MCall = CallInst::Create(MallocF, AllocSize, "malloccall");
     Result = MCall;
     if (Result->getType() != AllocPtrType) {
       InsertAtEnd->getInstList().push_back(MCall);
       // Create a cast instruction to convert to the right type...
-      Result = new BitCastInst(MCall, AllocPtrType, Name);
+      Result = new BitCastInst(MCall, AllocPtrType, NameStr);
     }
   }
   MCall->setTailCall();
@@ -532,9 +538,8 @@
 /// 3. Bitcast the result of the malloc call to the specified type.
 Instruction *CallInst::CreateMalloc(Instruction *InsertBefore,
                                     const Type *IntPtrTy, const Type *AllocTy,
-                                    Value *AllocSize, Value *ArraySize,
-                                    const Twine &Name) {
-  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, AllocSize,
+                                    Value *ArraySize, const Twine &Name) {
+  return createMalloc(InsertBefore, NULL, IntPtrTy, AllocTy, 
                       ArraySize, NULL, Name);
 }
 
@@ -548,9 +553,9 @@
 /// responsibility of the caller.
 Instruction *CallInst::CreateMalloc(BasicBlock *InsertAtEnd,
                                     const Type *IntPtrTy, const Type *AllocTy,
-                                    Value *AllocSize, Value *ArraySize, 
-                                    Function *MallocF, const Twine &Name) {
-  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy, AllocSize,
+                                    Value *ArraySize, Function* MallocF,
+                                    const Twine &Name) {
+  return createMalloc(NULL, InsertAtEnd, IntPtrTy, AllocTy,
                       ArraySize, MallocF, Name);
 }