improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff.  For example, we
use to compile this:

struct DeclGroup {
  unsigned NumDecls;
};

int foo(DeclGroup D);
void bar(DeclGroup *D) {
  foo(*D);
}

into:

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  %tmp3 = alloca i64                              ; <i64*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %0 = bitcast i64* %tmp3 to %struct.DeclGroup*   ; <%struct.DeclGroup*> [#uses=1]
  %1 = load %struct.DeclGroup* %agg.tmp           ; <%struct.DeclGroup> [#uses=1]
  store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
  %2 = load i64* %tmp3                            ; <i64> [#uses=1]
  call void @_Z3foo9DeclGroup(i64 %2)
  ret void
}

which would cause fastisel to bail out due to the first class aggregate load %1.  With
this patch we now compile it into the (still awful):

define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
  %D.addr = alloca %struct.DeclGroup*, align 8    ; <%struct.DeclGroup**> [#uses=2]
  %agg.tmp = alloca %struct.DeclGroup, align 4    ; <%struct.DeclGroup*> [#uses=2]
  %tmp3 = alloca i64                              ; <i64*> [#uses=2]
  store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
  %tmp = load %struct.DeclGroup** %D.addr         ; <%struct.DeclGroup*> [#uses=1]
  %tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
  %tmp2 = bitcast %struct.DeclGroup* %tmp to i8*  ; <i8*> [#uses=1]
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
  %coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
  %0 = bitcast i64* %tmp3 to i32*                 ; <i32*> [#uses=1]
  %1 = load i32* %coerce.dive                     ; <i32> [#uses=1]
  store i32 %1, i32* %0, align 1
  %2 = load i64* %tmp3                            ; <i64> [#uses=1]
  %call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
  ret void
}

which doesn't bail out.  On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.




git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@106973 91177308-0d34-0410-b5e6-96231b3b80d8
1 file changed