[profile] Fix value profile runtime merging issues

This patch fixes the following issues:
(1) The strong definition of the merge hook function was not working which
breaks the online value profile merging. This patch removes the weak
attribute of VPMergeHook and assigns the value dynamically.
(2) Truncate the proifle file so that we don't have garbage data at the end of
the file.
(3) Add new __llvm_profile_instrument_target_value() interface to do the value
profile update in batch. This is needed as the original incremental by 1
in __llvm_profile_instrument_target() is too slow for online merge.

Differential Revision: https://reviews.llvm.org/D44847

llvm-svn: 328987
diff --git a/compiler-rt/lib/profile/InstrProfiling.h b/compiler-rt/lib/profile/InstrProfiling.h
index 945f1c4..740e2d4 100644
--- a/compiler-rt/lib/profile/InstrProfiling.h
+++ b/compiler-rt/lib/profile/InstrProfiling.h
@@ -106,6 +106,10 @@
 #include "InstrProfData.inc"
     );
 
+void __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
+                                            uint32_t CounterIndex,
+                                            uint64_t CounterValue);
+
 /*!
  * \brief Write instrumentation data to the current file.
  *
diff --git a/compiler-rt/lib/profile/InstrProfilingFile.c b/compiler-rt/lib/profile/InstrProfilingFile.c
index d7c0abb..6e6b8fa 100644
--- a/compiler-rt/lib/profile/InstrProfilingFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingFile.c
@@ -183,8 +183,12 @@
 
   /* Now start merging */
   __llvm_profile_merge_from_buffer(ProfileBuffer, ProfileFileSize);
-  (void)munmap(ProfileBuffer, ProfileFileSize);
 
+  // Truncate the file in case merging of value profile did not happend to
+  // prevent from leaving garbage data at the end of the profile file.
+  ftruncate(fileno(ProfileFile), __llvm_profile_get_size_for_buffer());
+
+  (void)munmap(ProfileBuffer, ProfileFileSize);
   *MergeDone = 1;
 
   return 0;
@@ -234,6 +238,7 @@
   FILE *OutputFile;
 
   int MergeDone = 0;
+  VPMergeHook = &lprofMergeValueProfData;
   if (!doMerging())
     OutputFile = fopen(OutputName, "ab");
   else
diff --git a/compiler-rt/lib/profile/InstrProfilingMerge.c b/compiler-rt/lib/profile/InstrProfilingMerge.c
index a202115..2397250 100644
--- a/compiler-rt/lib/profile/InstrProfilingMerge.c
+++ b/compiler-rt/lib/profile/InstrProfilingMerge.c
@@ -17,8 +17,9 @@
 #define INSTR_PROF_VALUE_PROF_DATA
 #include "InstrProfData.inc"
 
-COMPILER_RT_WEAK void (*VPMergeHook)(ValueProfData *,
-                                     __llvm_profile_data *) = NULL;
+COMPILER_RT_VISIBILITY
+void (*VPMergeHook)(ValueProfData *, __llvm_profile_data *);
+
 COMPILER_RT_VISIBILITY
 uint64_t lprofGetLoadModuleSignature() {
   /* A very fast way to compute a module signature.  */
diff --git a/compiler-rt/lib/profile/InstrProfilingMergeFile.c b/compiler-rt/lib/profile/InstrProfilingMergeFile.c
index ac5ee9f..b41f216 100644
--- a/compiler-rt/lib/profile/InstrProfilingMergeFile.c
+++ b/compiler-rt/lib/profile/InstrProfilingMergeFile.c
@@ -17,24 +17,24 @@
 #define INSTR_PROF_VALUE_PROF_DATA
 #include "InstrProfData.inc"
 
-void (*VPMergeHook)(ValueProfData *,
-                    __llvm_profile_data *) = &lprofMergeValueProfData;
-
 /* Merge value profile data pointed to by SrcValueProfData into
  * in-memory profile counters pointed by to DstData.  */
 void lprofMergeValueProfData(ValueProfData *SrcValueProfData,
                              __llvm_profile_data *DstData) {
-  unsigned I, S, V, C;
+  unsigned I, S, V, DstIndex = 0;
   InstrProfValueData *VData;
   ValueProfRecord *VR = getFirstValueProfRecord(SrcValueProfData);
   for (I = 0; I < SrcValueProfData->NumValueKinds; I++) {
     VData = getValueProfRecordValueData(VR);
+    unsigned SrcIndex = 0;
     for (S = 0; S < VR->NumValueSites; S++) {
       uint8_t NV = VR->SiteCountArray[S];
       for (V = 0; V < NV; V++) {
-        for (C = 0; C < VData[V].Count; C++)
-          __llvm_profile_instrument_target(VData[V].Value, DstData, S);
+        __llvm_profile_instrument_target_value(VData[SrcIndex].Value, DstData,
+                                               DstIndex, VData[SrcIndex].Count);
+        ++SrcIndex;
       }
+      ++DstIndex;
     }
     VR = getValueProfRecordNext(VR);
   }
diff --git a/compiler-rt/lib/profile/InstrProfilingPort.h b/compiler-rt/lib/profile/InstrProfilingPort.h
index cd1264b..ede6aaa 100644
--- a/compiler-rt/lib/profile/InstrProfilingPort.h
+++ b/compiler-rt/lib/profile/InstrProfilingPort.h
@@ -22,12 +22,14 @@
 #define COMPILER_RT_ALLOCA _alloca
 /* Need to include <stdio.h> and <io.h> */
 #define COMPILER_RT_FTRUNCATE(f,l) _chsize(_fileno(f),l)
+#define COMPILER_RT_ALWAYS_INLINE __forceinline
 #elif __GNUC__
 #define COMPILER_RT_ALIGNAS(x) __attribute__((aligned(x)))
 #define COMPILER_RT_VISIBILITY __attribute__((visibility("hidden")))
 #define COMPILER_RT_WEAK __attribute__((weak))
 #define COMPILER_RT_ALLOCA __builtin_alloca
 #define COMPILER_RT_FTRUNCATE(f,l) ftruncate(fileno(f),l)
+#define COMPILER_RT_ALWAYS_INLINE inline __attribute((always_inline))
 #endif
 
 #if defined(__APPLE__)
diff --git a/compiler-rt/lib/profile/InstrProfilingValue.c b/compiler-rt/lib/profile/InstrProfilingValue.c
index 3db0de8..674a486 100644
--- a/compiler-rt/lib/profile/InstrProfilingValue.c
+++ b/compiler-rt/lib/profile/InstrProfilingValue.c
@@ -132,13 +132,14 @@
   return Node;
 }
 
-COMPILER_RT_VISIBILITY void
-__llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
-                                 uint32_t CounterIndex) {
+static COMPILER_RT_ALWAYS_INLINE void
+instrumentTargetValueImpl(uint64_t TargetValue, void *Data,
+                          uint32_t CounterIndex, uint64_t CountValue) {
   __llvm_profile_data *PData = (__llvm_profile_data *)Data;
   if (!PData)
     return;
-
+  if (!CountValue)
+    return;
   if (!PData->Values) {
     if (!allocateValueProfileCounters(PData))
       return;
@@ -153,7 +154,7 @@
   uint8_t VDataCount = 0;
   while (CurVNode) {
     if (TargetValue == CurVNode->Value) {
-      CurVNode->Count++;
+      CurVNode->Count += CountValue;
       return;
     }
     if (CurVNode->Count < MinCount) {
@@ -194,11 +195,13 @@
      * the runtime can wipe out more than one lowest count entries
      * to give space for hot targets.
      */
-    if (!MinCountVNode->Count || !(--MinCountVNode->Count)) {
+    if (MinCountVNode->Count <= CountValue) {
       CurVNode = MinCountVNode;
       CurVNode->Value = TargetValue;
-      CurVNode->Count++;
-    }
+      CurVNode->Count = CountValue;
+    } else
+      MinCountVNode->Count -= CountValue;
+
     return;
   }
 
@@ -206,7 +209,7 @@
   if (!CurVNode)
     return;
   CurVNode->Value = TargetValue;
-  CurVNode->Count++;
+  CurVNode->Count += CountValue;
 
   uint32_t Success = 0;
   if (!ValueCounters[CounterIndex])
@@ -221,6 +224,18 @@
   }
 }
 
+COMPILER_RT_VISIBILITY void
+__llvm_profile_instrument_target(uint64_t TargetValue, void *Data,
+                                 uint32_t CounterIndex) {
+  instrumentTargetValueImpl(TargetValue, Data, CounterIndex, 1);
+}
+COMPILER_RT_VISIBILITY void
+__llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
+                                       uint32_t CounterIndex,
+                                       uint64_t CountValue) {
+  instrumentTargetValueImpl(TargetValue, Data, CounterIndex, CountValue);
+}
+
 /*
  * The target values are partitioned into multiple regions/ranges. There is one
  * contiguous region which is precise -- every value in the range is tracked
diff --git a/compiler-rt/test/profile/instrprof-value-merge.c b/compiler-rt/test/profile/instrprof-value-merge.c
new file mode 100644
index 0000000..902430a
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-value-merge.c
@@ -0,0 +1,79 @@
+// RUN: %clang_pgogen -o %t -O3 %s
+// RUN: rm -rf %t.profdir
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: env LLVM_PROFILE_FILE=%t.profdir/default_%m.profraw %run %t 1
+// RUN: llvm-profdata show -counts -function=main -ic-targets -memop-sizes %t.profdir/default_*.profraw | FileCheck %s
+
+#include <string.h>
+
+void (*f0)();
+void (*f1)();
+void (*f2)();
+
+char dst[200];
+char src[200];
+volatile int n;
+
+__attribute__((noinline)) void foo() {}
+
+__attribute__((noinline)) void bar() {
+  f0 = foo;
+  f1 = foo;
+  f2 = foo;
+  n = 4;
+}
+int main(int argc, char *argv[]) {
+  int i;
+  bar();
+  if (argc == 1) {
+    f0();
+    for (i = 0; i < 9; i++)
+      f1();
+    for (i = 0; i < 99; i++)
+      f2();
+  } else {
+    memcpy((void *)dst, (void *)src, n);
+    for (i = 0; i < 6; i++)
+      memcpy((void *)(dst + 2), (void *)src, n + 1);
+    for (i = 0; i < 66; i++)
+      memcpy((void *)(dst + 9), (void *)src, n + 2);
+  }
+}
+
+// CHECK: Counters:
+// CHECK:   main:
+// CHECK:     Hash: 0x00030012a7ab6e87
+// CHECK:     Counters: 6
+// CHECK:     Indirect Call Site Count: 3
+// CHECK:     Number of Memory Intrinsics Calls: 3
+// CHECK:     Block counts: [27, 297, 12, 132, 3, 2]
+// CHECK:     Indirect Target Results:
+// CHECK:         [ 0, foo, 3 ]
+// CHECK:         [ 1, foo, 27 ]
+// CHECK:         [ 2, foo, 297 ]
+// CHECK:     Memory Intrinsic Size Results:
+// CHECK:         [ 0, 4, 2 ]
+// CHECK:         [ 1, 5, 12 ]
+// CHECK:         [ 2, 6, 132 ]
+// CHECK: Instrumentation level: IR
+// CHECK: Functions shown: 1
+// CHECK: Total functions: 3
+// CHECK: Maximum function count: 327
+// CHECK: Maximum internal block count: 297
+// CHECK: Statistics for indirect call sites profile:
+// CHECK:   Total number of sites: 3
+// CHECK:   Total number of sites with values: 3
+// CHECK:   Total number of profiled values: 3
+// CHECK:   Value sites histogram:
+// CHECK:         NumTargets, SiteCount
+// CHECK:         1, 3
+// CHECK: Statistics for memory intrinsic calls sizes profile:
+// CHECK:   Total number of sites: 3
+// CHECK:   Total number of sites with values: 3
+// CHECK:   Total number of profiled values: 3
+// CHECK:   Value sites histogram:
+// CHECK:         NumTargets, SiteCount
+// CHECK:         1, 3