Implement "Greedy by size planner" memory optimization

The planner optimize intermediate tensor memory allocation to reduce memory footprint. See http://arxiv.org/abs/2001.03288 for details.

This optimization could be turned off during compilation by adding -DXNN_ENABLE_MEM_OPT=OFF or --define=xnnpack_enable_memopt=false when using Bazel.

PiperOrigin-RevId: 311471060
diff --git a/CMakeLists.txt b/CMakeLists.txt
index db7146d..d42776d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@
 SET(XNNPACK_LIBRARY_TYPE "default" CACHE STRING "Type of library (shared, static, or default) to build")
 SET_PROPERTY(CACHE XNNPACK_LIBRARY_TYPE PROPERTY STRINGS default static shared)
 OPTION(XNNPACK_ENABLE_ASSEMBLY "Build XNNPACK with assembly micro-kernels" ON)
+OPTION(XNNPACK_ENABLE_MEMOPT "Build XNNPACK with optimized memory allocation scheme" ON)
 OPTION(XNNPACK_BUILD_TESTS "Build XNNPACK unit tests" ON)
 OPTION(XNNPACK_BUILD_BENCHMARKS "Build XNNPACK benchmarks" ON)
 OPTION(XNNPACK_USE_SYSTEM_LIBS "Use system-provided dependency libraries" OFF)
@@ -32,6 +33,12 @@
   ADD_DEFINITIONS(-DXNN_ENABLE_ASSEMBLY=0)
 ENDIF()
 
+IF(XNNPACK_ENABLE_MEMOPT)
+  ADD_DEFINITIONS(-DXNN_ENABLE_MEMOPT=1)
+ELSE()
+  ADD_DEFINITIONS(-DXNN_ENABLE_MEMOPT=0)
+ENDIF()
+
 IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
   # Disable "unary minus operator applied to unsigned type, result still unsigned" warning
   ADD_COMPILE_OPTIONS("/wd4146")
@@ -177,6 +184,7 @@
 LIST(APPEND XNNPACK_COLD_SRCS
   src/init.c
   src/memory.c
+  src/memory-planner.c
   src/operator-delete.c
   src/runtime.c
   src/subgraph.c