Add posix implementation for platform/numa.h functions, relying
on hwloc.
PiperOrigin-RevId: 235742876
diff --git a/.bazelrc b/.bazelrc
index 17285af..1741091 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -67,6 +67,7 @@
build:gdr --define=with_gdr_support=true
build:ngraph --define=with_ngraph_support=true
build:verbs --define=with_verbs_support=true
+build:numa --define=with_numa_support=true
# Options to disable default on features
build:noaws --define=no_aws_support=true
diff --git a/configure.py b/configure.py
index 3eb09a1..673825c 100644
--- a/configure.py
+++ b/configure.py
@@ -1751,6 +1751,7 @@
config_info_line('gdr', 'Build with GDR support.')
config_info_line('verbs', 'Build with libverbs support.')
config_info_line('ngraph', 'Build with Intel nGraph support.')
+ config_info_line('numa', 'Build with NUMA support.')
config_info_line(
'dynamic_kernels',
'(Experimental) Build kernels into separate shared objects.')
diff --git a/tensorflow/BUILD b/tensorflow/BUILD
index f53982f..e1d988a 100644
--- a/tensorflow/BUILD
+++ b/tensorflow/BUILD
@@ -304,6 +304,12 @@
visibility = ["//visibility:public"],
)
+config_setting(
+ name = "with_numa_support",
+ define_values = {"with_numa_support": "true"},
+ visibility = ["//visibility:public"],
+)
+
# Crosses between framework_shared_object and a bunch of other configurations
# due to limitations in nested select() statements.
config_setting(
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD
index 64aed37..8f5de68 100644
--- a/tensorflow/core/BUILD
+++ b/tensorflow/core/BUILD
@@ -128,6 +128,9 @@
"tf_additional_libdevice_srcs",
"tf_additional_minimal_lib_srcs",
"tf_additional_mpi_lib_defines",
+ "tf_additional_numa_deps",
+ "tf_additional_numa_lib_defines",
+ "tf_additional_numa_copts",
"tf_additional_proto_hdrs",
"tf_additional_proto_srcs",
"tf_additional_test_deps",
@@ -388,15 +391,15 @@
":platform_port_hdrs",
":platform_port_internal_hdrs",
],
- copts = tf_copts(),
+ copts = tf_copts() + tf_additional_numa_copts(),
visibility = ["//tensorflow/core:__subpackages__"],
deps = [
":lib_platform",
":platform_base",
- "//tensorflow/core/platform/default/build_config:port",
"@com_google_absl//absl/base",
+ "//tensorflow/core/platform/default/build_config:port",
"@snappy",
- ],
+ ] + tf_additional_numa_deps(),
)
filegroup(
@@ -2278,11 +2281,14 @@
]
# Replicated for lib_internal and lib_internal_impl.
-LIB_INTERNAL_DEFINES = (tf_additional_lib_defines() + [
- "TF_USE_SNAPPY",
- ] + tf_additional_verbs_lib_defines() +
- tf_additional_mpi_lib_defines() +
- tf_additional_gdr_lib_defines())
+LIB_INTERNAL_DEFINES = (
+ tf_additional_lib_defines() + [
+ "TF_USE_SNAPPY",
+ ] + tf_additional_verbs_lib_defines() +
+ tf_additional_mpi_lib_defines() +
+ tf_additional_gdr_lib_defines() +
+ tf_additional_numa_lib_defines()
+)
cc_library(
name = "lib_internal",
@@ -2355,19 +2361,20 @@
copts = tf_copts(),
defines = LIB_INTERNAL_DEFINES,
deps = tf_additional_lib_deps() + [
- ":lib_hash_crc32c_accelerate_internal",
- ":lib_proto_parsing",
- ":abi",
- ":core_stringpiece",
- "@com_google_absl//absl/memory",
- "@com_google_absl//absl/strings",
- "//third_party/eigen3",
- "//tensorflow/core/platform/default/build_config:platformlib",
- "@snappy",
- "@zlib_archive//:zlib",
- "@double_conversion//:double-conversion",
- "@protobuf_archive//:protobuf",
- ] + tf_protos_all_impl() + tf_protos_grappler_impl(),
+ ":lib_hash_crc32c_accelerate_internal",
+ ":lib_proto_parsing",
+ ":abi",
+ ":core_stringpiece",
+ "@com_google_absl//absl/memory",
+ "@com_google_absl//absl/strings",
+ "//third_party/eigen3",
+ "//tensorflow/core/platform/default/build_config:platformlib",
+ "@snappy",
+ "@zlib_archive//:zlib",
+ "@double_conversion//:double-conversion",
+ "@protobuf_archive//:protobuf",
+ ] + tf_protos_all_impl() + tf_protos_grappler_impl() +
+ tf_additional_numa_deps(),
)
# File compiled with extra flags to get cpu-specific acceleration.
diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl
index f9ac4ff..f6f449a 100644
--- a/tensorflow/core/platform/default/build_config.bzl
+++ b/tensorflow/core/platform/default/build_config.bzl
@@ -725,6 +725,12 @@
"//conditions:default": [],
})
+def tf_additional_numa_lib_defines():
+ return select({
+ "//tensorflow:with_numa_support": ["TENSORFLOW_USE_NUMA"],
+ "//conditions:default": [],
+ })
+
def tf_py_clif_cc(name, visibility = None, **kwargs):
pass
@@ -757,3 +763,26 @@
"//third_party/mkl:intel_binary_blob",
],
)
+
+def tf_additional_numa_deps():
+ return select({
+ "//tensorflow:android": [],
+ "//tensorflow:ios": [],
+ "//tensorflow:windows": [],
+ "//tensorflow:darwin": [],
+ "//conditions:default": [
+ "@hwloc",
+ ],
+ })
+
+def tf_additional_numa_copts():
+ return select({
+ "//tensorflow:android": [],
+ "//tensorflow:ios": [],
+ "//tensorflow:windows": [],
+ "//tensorflow:darwin": [],
+ "//conditions:default": [
+ "-Ithird_party/hwloc/hwloc-master/include",
+ "-DTENSORFLOW_USE_NUMA",
+ ],
+ })
diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc
index 807e008..1561632 100644
--- a/tensorflow/core/platform/posix/port.cc
+++ b/tensorflow/core/platform/posix/port.cc
@@ -45,6 +45,10 @@
#include <thread>
#endif
+#if TENSORFLOW_USE_NUMA
+#include "hwloc.h" // TF:hwloc
+#endif
+
namespace tensorflow {
namespace port {
@@ -115,16 +119,94 @@
return (ht_per_core > 0) ? ht_per_core : 1;
}
-bool NUMAEnabled() {
- // Not yet implemented: coming soon.
- return false;
+#ifdef TENSORFLOW_USE_NUMA
+namespace {
+static hwloc_topology_t hwloc_topology_handle;
+
+bool HaveHWLocTopology() {
+ // One time initialization
+ static bool init = []() {
+ if (hwloc_topology_init(&hwloc_topology_handle)) {
+ LOG(ERROR) << "Call to hwloc_topology_init() failed";
+ return false;
+ }
+ if (hwloc_topology_load(hwloc_topology_handle)) {
+ LOG(ERROR) << "Call to hwloc_topology_load() failed";
+ return false;
+ }
+ return true;
+ }();
+ return init;
}
-int NUMANumNodes() { return 1; }
+// Return the first hwloc object of the given type whose os_index
+// matches 'index'.
+hwloc_obj_t GetHWLocTypeIndex(hwloc_obj_type_t tp, int index) {
+ hwloc_obj_t obj = nullptr;
+ if (index >= 0) {
+ while ((obj = hwloc_get_next_obj_by_type(hwloc_topology_handle, tp, obj)) !=
+ nullptr) {
+ if (obj->os_index == index) break;
+ }
+ }
+ return obj;
+}
+} // namespace
+#endif // TENSORFLOW_USE_NUMA
-void NUMASetThreadNodeAffinity(int node) {}
+bool NUMAEnabled() { return (NUMANumNodes() > 1); }
-int NUMAGetThreadNodeAffinity() { return kNUMANoAffinity; }
+int NUMANumNodes() {
+#ifdef TENSORFLOW_USE_NUMA
+ if (HaveHWLocTopology()) {
+ int num_numanodes =
+ hwloc_get_nbobjs_by_type(hwloc_topology_handle, HWLOC_OBJ_NUMANODE);
+ return std::max(1, num_numanodes);
+ } else {
+ return 1;
+ }
+#else
+ return 1;
+#endif // TENSORFLOW_USE_NUMA
+}
+
+void NUMASetThreadNodeAffinity(int node) {
+#ifdef TENSORFLOW_USE_NUMA
+ if (HaveHWLocTopology()) {
+ // Find the corresponding NUMA node topology object.
+ hwloc_obj_t obj = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
+ if (obj) {
+ hwloc_set_cpubind(hwloc_topology_handle, obj->cpuset,
+ HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT);
+ } else {
+ LOG(ERROR) << "Could not find hwloc NUMA node " << node;
+ }
+ }
+#endif // TENSORFLOW_USE_NUMA
+}
+
+int NUMAGetThreadNodeAffinity() {
+ int node_index = kNUMANoAffinity;
+#ifdef TENSORFLOW_USE_NUMA
+ if (HaveHWLocTopology()) {
+ hwloc_cpuset_t thread_cpuset = hwloc_bitmap_alloc();
+ hwloc_get_cpubind(hwloc_topology_handle, thread_cpuset,
+ HWLOC_CPUBIND_THREAD);
+ hwloc_obj_t obj = nullptr;
+ // Return the first NUMA node whose cpuset is a (non-proper) superset of
+ // that of the current thread.
+ while ((obj = hwloc_get_next_obj_by_type(
+ hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
+ if (hwloc_bitmap_isincluded(thread_cpuset, obj->cpuset)) {
+ node_index = obj->os_index;
+ break;
+ }
+ }
+ hwloc_bitmap_free(thread_cpuset);
+ }
+#endif // TENSORFLOW_USE_NUMA
+ return node_index;
+}
void* AlignedMalloc(size_t size, int minimum_alignment) {
#if defined(__ANDROID__)
@@ -154,12 +236,54 @@
void Free(void* ptr) { free(ptr); }
void* NUMAMalloc(int node, size_t size, int minimum_alignment) {
+#ifdef TENSORFLOW_USE_NUMA
+ if (HaveHWLocTopology()) {
+ hwloc_obj_t numa_node = GetHWLocTypeIndex(HWLOC_OBJ_NUMANODE, node);
+ if (numa_node) {
+ return hwloc_alloc_membind(hwloc_topology_handle, size,
+ numa_node->nodeset, HWLOC_MEMBIND_BIND,
+ HWLOC_MEMBIND_BYNODESET);
+ } else {
+ LOG(ERROR) << "Failed to find hwloc NUMA node " << node;
+ }
+ }
+#endif // TENSORFLOW_USE_NUMA
return AlignedMalloc(size, minimum_alignment);
}
-void NUMAFree(void* ptr, size_t size) { Free(ptr); }
+void NUMAFree(void* ptr, size_t size) {
+#ifdef TENSORFLOW_USE_NUMA
+ if (HaveHWLocTopology()) {
+ hwloc_free(hwloc_topology_handle, ptr, size);
+ return;
+ }
+#endif // TENSORFLOW_USE_NUMA
+ Free(ptr);
+}
-int NUMAGetMemAffinity(const void* addr) { return kNUMANoAffinity; }
+int NUMAGetMemAffinity(const void* addr) {
+ int node = kNUMANoAffinity;
+#ifdef TENSORFLOW_USE_NUMA
+ if (HaveHWLocTopology() && addr) {
+ hwloc_nodeset_t nodeset = hwloc_bitmap_alloc();
+ if (!hwloc_get_area_memlocation(hwloc_topology_handle, addr, 4, nodeset,
+ HWLOC_MEMBIND_BYNODESET)) {
+ hwloc_obj_t obj = nullptr;
+ while ((obj = hwloc_get_next_obj_by_type(
+ hwloc_topology_handle, HWLOC_OBJ_NUMANODE, obj)) != nullptr) {
+ if (hwloc_bitmap_isincluded(nodeset, obj->nodeset)) {
+ node = obj->os_index;
+ break;
+ }
+ }
+ hwloc_bitmap_free(nodeset);
+ } else {
+ LOG(ERROR) << "Failed call to hwloc_get_area_memlocation.";
+ }
+ }
+#endif // TENSORFLOW_USE_NUMA
+ return node;
+}
void MallocExtension_ReleaseToSystem(std::size_t num_bytes) {
// No-op.
diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD
index 0c81ebe..525c05b 100644
--- a/tensorflow/tools/lib_package/BUILD
+++ b/tensorflow/tools/lib_package/BUILD
@@ -130,6 +130,7 @@
"@gemmlowp//:LICENSE",
"@gif_archive//:COPYING",
"@highwayhash//:LICENSE",
+ "@hwloc//:LICENSE",
"@icu//:icu4c/LICENSE",
"@jpeg//:LICENSE.md",
"@llvm//:LICENSE.TXT",
@@ -199,6 +200,7 @@
"@gemmlowp//:LICENSE",
"@gif_archive//:COPYING",
"@highwayhash//:LICENSE",
+ "@hwloc//:LICENSE",
"@icu//:icu4j/main/shared/licenses/LICENSE",
"@jpeg//:LICENSE.md",
"@llvm//:LICENSE.TXT",
diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD
index 90dfca2..88f13a0 100644
--- a/tensorflow/tools/pip_package/BUILD
+++ b/tensorflow/tools/pip_package/BUILD
@@ -171,6 +171,7 @@
"@gemmlowp//:LICENSE",
"@gif_archive//:COPYING",
"@highwayhash//:LICENSE",
+ "@hwloc//:LICENSE",
"@icu//:icu4c/LICENSE",
"@jpeg//:LICENSE.md",
"@keras_applications_archive//:LICENSE",