Bring r314809 back.

But now include a check for CPU_COUNT so we still build on 10 year old
versions of glibc.

Original message:

Use sched_getaffinity instead of std::thread::hardware_concurrency.

The issue with std::thread::hardware_concurrency is that it forwards
to libc and some implementations (like glibc) don't take thread
affinity into consideration.

With this change a llvm program that can execute in only 2 cores will
use 2 threads, even if the machine has 32 cores.

This makes benchmarking a lot easier, but should also help if someone
doesn't want to use all cores for compilation for example.

llvm-svn: 314931
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index a1a16b9..f13e6f7 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -267,8 +267,11 @@
 check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
 if( LLVM_USING_GLIBC )
   add_definitions( -D_GNU_SOURCE )
+  list(APPEND CMAKE_REQUIRED_DEFINITIONS "-D_GNU_SOURCE")
 endif()
 # This check requires _GNU_SOURCE
+check_symbol_exists(sched_getaffinity sched.h HAVE_SCHED_GETAFFINITY)
+check_symbol_exists(CPU_COUNT sched.h HAVE_CPU_COUNT)
 if(HAVE_LIBPTHREAD)
   check_library_exists(pthread pthread_getname_np "" HAVE_PTHREAD_GETNAME_NP)
   check_library_exists(pthread pthread_setname_np "" HAVE_PTHREAD_SETNAME_NP)
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index d0d1e09..d67148f 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -185,6 +185,12 @@
 /* Define to 1 if you have the `setenv' function. */
 #cmakedefine HAVE_SETENV ${HAVE_SETENV}
 
+/* Define to 1 if you have the `sched_getaffinity' function. */
+#cmakedefine HAVE_SCHED_GETAFFINITY ${HAVE_SCHED_GETAFFINITY}
+
+/* Define to 1 if you have the `CPU_COUNT' macro. */
+#cmakedefine HAVE_CPU_COUNT ${HAVE_CPU_COUNT}
+
 /* Define to 1 if you have the `setrlimit' function. */
 #cmakedefine HAVE_SETRLIMIT ${HAVE_SETRLIMIT}
 
diff --git a/llvm/include/llvm/Support/ThreadPool.h b/llvm/include/llvm/Support/ThreadPool.h
index 9ada946..fb82559 100644
--- a/llvm/include/llvm/Support/ThreadPool.h
+++ b/llvm/include/llvm/Support/ThreadPool.h
@@ -38,8 +38,8 @@
   using TaskTy = std::function<void()>;
   using PackagedTaskTy = std::packaged_task<void()>;
 
-  /// Construct a pool with the number of core available on the system (or
-  /// whatever the value returned by std::thread::hardware_concurrency() is).
+  /// Construct a pool with the number of threads found by
+  /// hardware_concurrency().
   ThreadPool();
 
   /// Construct a pool of \p ThreadCount threads
diff --git a/llvm/include/llvm/Support/Threading.h b/llvm/include/llvm/Support/Threading.h
index 03963a2..6d813bc 100644
--- a/llvm/include/llvm/Support/Threading.h
+++ b/llvm/include/llvm/Support/Threading.h
@@ -131,6 +131,14 @@
   /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF
   unsigned heavyweight_hardware_concurrency();
 
+  /// Get the number of threads that the current program can execute
+  /// concurrently. On some systems std::thread::hardware_concurrency() returns
+  /// the total number of cores, without taking affinity into consideration.
+  /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF.
+  /// Fallback to std::thread::hardware_concurrency() if sched_getaffinity is
+  /// not available.
+  unsigned hardware_concurrency();
+
   /// \brief Return the current thread id, as used in various OS system calls.
   /// Note that not all platforms guarantee that the value returned will be
   /// unique across the entire system, so portable code should not assume
diff --git a/llvm/lib/Support/Parallel.cpp b/llvm/lib/Support/Parallel.cpp
index ab2cfde..010e429 100644
--- a/llvm/lib/Support/Parallel.cpp
+++ b/llvm/lib/Support/Parallel.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/Support/Parallel.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Threading.h"
 
 #include <atomic>
 #include <stack>
@@ -70,8 +71,7 @@
 ///   in filo order.
 class ThreadPoolExecutor : public Executor {
 public:
-  explicit ThreadPoolExecutor(
-      unsigned ThreadCount = std::thread::hardware_concurrency())
+  explicit ThreadPoolExecutor(unsigned ThreadCount = hardware_concurrency())
       : Done(ThreadCount) {
     // Spawn all but one of the threads in another thread as spawning threads
     // can take a while.
diff --git a/llvm/lib/Support/ThreadPool.cpp b/llvm/lib/Support/ThreadPool.cpp
index 22b7550..f1b5bdf 100644
--- a/llvm/lib/Support/ThreadPool.cpp
+++ b/llvm/lib/Support/ThreadPool.cpp
@@ -14,14 +14,15 @@
 #include "llvm/Support/ThreadPool.h"
 
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Threading.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
 #if LLVM_ENABLE_THREADS
 
-// Default to std::thread::hardware_concurrency
-ThreadPool::ThreadPool() : ThreadPool(std::thread::hardware_concurrency()) {}
+// Default to hardware_concurrency
+ThreadPool::ThreadPool() : ThreadPool(hardware_concurrency()) {}
 
 ThreadPool::ThreadPool(unsigned ThreadCount)
     : ActiveThreads(0), EnableFlag(true) {
diff --git a/llvm/lib/Support/Threading.cpp b/llvm/lib/Support/Threading.cpp
index 6a10b98..473c848 100644
--- a/llvm/lib/Support/Threading.cpp
+++ b/llvm/lib/Support/Threading.cpp
@@ -47,6 +47,8 @@
 
 unsigned llvm::heavyweight_hardware_concurrency() { return 1; }
 
+unsigned llvm::hardware_concurrency() { return 1; }
+
 uint64_t llvm::get_threadid() { return 0; }
 
 uint32_t llvm::get_max_thread_name_length() { return 0; }
@@ -71,6 +73,18 @@
   return NumPhysical;
 }
 
+unsigned llvm::hardware_concurrency() {
+#if defined(HAVE_SCHED_GETAFFINITY) && defined(HAVE_CPU_COUNT)
+  cpu_set_t Set;
+  if (sched_getaffinity(0, sizeof(Set), &Set))
+    return CPU_COUNT(&Set);
+#endif
+  // Guard against std::thread::hardware_concurrency() returning 0.
+  if (unsigned Val = std::thread::hardware_concurrency())
+    return Val;
+  return 1;
+}
+
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Threading.inc"
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index eee2421..8e21a7a9 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -211,8 +211,8 @@
 
   // If NumThreads is not specified, auto-detect a good default.
   if (NumThreads == 0)
-    NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(),
-                                       unsigned(Inputs.size() / 2)));
+    NumThreads =
+        std::min(hardware_concurrency(), unsigned((Inputs.size() + 1) / 2));
 
   // Initialize the writer contexts.
   SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;