arm_compute v20.02

commit: 36ccc90158c65844b3c53c5462c6d7d852b97278 [log] [tgz]
author: Jenkins <bsgcomp@arm.com> Fri Feb 21 11:10:48 2020 +0000
committer: Jenkins <bsgcomp@arm.com> Fri Feb 21 11:10:48 2020 +0000
tree: 71d95e6c8af0bb8ca909bdc96dc3954963eafb59
parent: 7f09cf7dfba61aad6d6d588b1b096f56bafaad5e [diff] [blame]
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index 62f1f2c..379b0aa 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox

@@ -1,5 +1,5 @@
 ///
-/// Copyright (c) 2017-2019 ARM Limited.
+/// Copyright (c) 2017-2020 ARM Limited.
 ///
 /// SPDX-License-Identifier: MIT
 ///
@@ -76,9 +76,10 @@
 	│   ├── graph.h --> Includes all the Graph headers at once.
 	│   ├── core
 	│   │   ├── CL
+	│   │   │   ├── CLCoreRuntimeContext.h --> Manages all core OpenCL objects needed for kernel execution (cl_context, cl_kernel, cl_command_queue, etc).
 	│   │   │   ├── CLKernelLibrary.h --> Manages all the OpenCL kernels compilation and caching, provides accessors for the OpenCL Context.
 	│   │   │   ├── CLKernels.h --> Includes all the OpenCL kernels at once
-	│   │   │   ├── CL specialisation of all the generic objects interfaces (ICLTensor, ICLImage, etc.)
+	│   │   │   ├── CL specialisation of all the generic objects interfaces (ICLTensor, ICLArray, etc.)
 	│   │   │   ├── kernels --> Folder containing all the OpenCL kernels
 	│   │   │   │   └── CL*Kernel.h
 	│   │   │   └── OpenCL.h --> Wrapper to configure the Khronos OpenCL C++ header
@@ -87,9 +88,10 @@
 	│   │   │   └── kernels --> Folder containing all the CPP kernels
 	│   │   │       └── CPP*Kernel.h
 	│   │   ├── GLES_COMPUTE
+	│   │   │   ├── GCCoreRuntimeContext.h --> Manages all core GLES objects needed for kernel execution.
 	│   │   │   ├── GCKernelLibrary.h --> Manages all the GLES kernels compilation and caching, provides accessors for the GLES Context.
 	│   │   │   ├── GCKernels.h --> Includes all the GLES kernels at once
-	│   │   │   ├── GLES specialisation of all the generic objects interfaces (IGCTensor, IGCImage, etc.)
+	│   │   │   ├── GLES specialisation of all the generic objects interfaces (IGCTensor etc.)
 	│   │   │   ├── kernels --> Folder containing all the GLES kernels
 	│   │   │   │   └── GC*Kernel.h
 	│   │   │   └── OpenGLES.h --> Wrapper to configure the Khronos EGL and OpenGL ES C header
@@ -102,10 +104,15 @@
 	│   │   │   │   │   └── winograd --> headers for Winograd convolution assembly implementation
 	│   │   │   │   ├── detail --> Common code for several intrinsics implementations.
 	│   │   │   │   └── NE*Kernel.h
+	│   │   │   ├── wrapper --> NEON wrapper used to simplify code
+	│   │   │   │   ├── intrinsics --> NEON instrincs' wrappers
+	│   │   │   │   ├── scalar --> Scalar operations
+	│   │   │   │   ├── traits.h --> Traits defined on NEON vectors
+	│   │   │   │   └── wrapper.h --> Includes all wrapper headers at once
 	│   │   │   └── NEKernels.h --> Includes all the NEON kernels at once
 	│   │   ├── All common basic types (Types.h, Window, Coordinates, Iterator, etc.)
-	│   │   ├── All generic objects interfaces (ITensor, IImage, etc.)
-	│   │   └── Objects metadata classes (ImageInfo, TensorInfo, MultiImageInfo)
+	│   │   ├── All generic objects interfaces (ITensor, IArray, etc.)
+	│   │   └── Objects metadata classes (TensorInfo, MultiImageInfo)
 	│   ├── graph
 	│   │   ├── algorithms
 	│   │   │   └── Generic algorithms used by the graph backend (e.g Order of traversal)
@@ -125,12 +132,15 @@
 	│   │   │   └── Debug printers
 	│   │   └── Graph objects ( INode, ITensorAccessor, Graph, etc.)
 	│   └── runtime
+	│       ├── common
+	│       │   └── Common utility code used by all backends
 	│       ├── CL
-	│       │   ├── CL objects & allocators (CLArray, CLImage, CLTensor, etc.)
+	│       │   ├── CL objects & allocators (CLArray, CLTensor, etc.)
 	│       │   ├── functions --> Folder containing all the OpenCL functions
 	│       │   │   └── CL*.h
 	│       │   ├── CLScheduler.h --> Interface to enqueue OpenCL kernels and get/set the OpenCL CommandQueue and ICLTuner.
 	│       │   ├── CLFunctions.h --> Includes all the OpenCL functions at once
+	│       │   ├── ICLTuner.h --> Interface used to tune the local work-group size of OpenCL kernels
 	│       │   └── tuners
 	│       │       └── Local workgroup size tuners for specific architectures / GPUs
 	│       ├── CPP
@@ -139,7 +149,7 @@
 	│       │   └── functions --> Folder containing all the CPP functions
 	│       │       └── CPP*.h
 	│       ├── GLES_COMPUTE
-	│       │   ├── GLES objects & allocators (GCArray, GCImage, GCTensor, etc.)
+	│       │   ├── GLES objects & allocators (GCArray, GCTensor, etc.)
 	│       │   ├── functions --> Folder containing all the GLES functions
 	│       │   │   └── GC*.h
 	│       │   ├── GCScheduler.h --> Interface to enqueue GLES kernels and get/set the GLES CommandQueue.
@@ -150,14 +160,14 @@
 	│       │   └── NEFunctions.h --> Includes all the NEON functions at once
 	│       ├── OMP
 	│       │   └── OMPScheduler.h --> OpenMP scheduler (Alternative to the CPPScheduler)
-	│       ├── Memory manager files (LifetimeManager, PoolManager, etc.)
-	│       └── Basic implementations of the generic object interfaces (Array, Image, Tensor, etc.)
-	├── data -> Contains test images and reference data dumps used by validation tests
-	├── docs -> Contains Doxyfile and Doxygen sources used to generate the HTML pages in the documentation folder.
+	│       ├── Memory & weights manager files (LifetimeManager, PoolManager, etc.)
+	│       └── Basic implementations of the generic object interfaces (Array, Tensor, etc.)
+	├── data --> Contains test images and reference data dumps used by validation tests
+	├── docs --> Contains Doxyfile and Doxygen sources used to generate the HTML pages in the documentation folder.
 	├── documentation
 	│   ├── index.xhtml
 	│   └── ...
-	├── documentation.xhtml -> documentation/index.xhtml
+	├── documentation.xhtml --> documentation/index.xhtml
 	├── examples
 	│   ├── cl_*.cpp --> OpenCL examples
 	│   ├── gc_*.cpp --> GLES compute shaders examples
@@ -169,13 +179,10 @@
 	│   │   └── Khronos OpenCL C headers and C++ wrapper
 	│   ├── half --> FP16 library available from http://half.sourceforge.net
 	│   ├── libnpy --> Library to load / write npy buffers, available from https://github.com/llohse/libnpy
-	│   └── linux --> Headers only needed for Linux builds
-	│       └── Khronos EGL and OpenGLES headers
-	├── opencl-1.2-stubs
-	│   └── opencl_stubs.c --> OpenCL stubs implementation
-	├── opengles-3.1-stubs
-	│   ├── EGL.c --> EGL stubs implementation
-	│   └── GLESv2.c --> GLESv2 stubs implementation
+	│   ├── linux --> Headers only needed for Linux builds
+	│   │   └── Khronos EGL and OpenGLES headers
+	│   └── stb
+	│        └── stb_image.h --> Single header library to load image files, available from https://github.com/nothings/stb
 	├── scripts
 	│   ├── caffe_data_extractor.py --> Basic script to export weights from Caffe to npy files
 	│   └── tensorflow_data_extractor.py --> Basic script to export weights from Tensor Flow to npy files
@@ -201,6 +208,7 @@
 	│   │   ├── CL --> OpenCL benchmarking tests
 	│   │   ├── GLES_COMPUTE --> GLES benchmarking tests
 	│   │   └── NEON --> NEON benchmarking tests
+	│   ├── benchmark_examples --> Sources needed to wrap examples to run through our benchmarking framework.
 	│   ├── CL --> OpenCL accessors
 	│   ├── GLES_COMPUTE --> GLES accessors
 	│   ├── NEON --> NEON accessors
@@ -208,6 +216,8 @@
 	│   │   └── Datasets for all the validation / benchmark tests, layer configurations for various networks, etc.
 	│   ├── framework
 	│   │   └── Boiler plate code for both validation and benchmark test suites (Command line parsers, instruments, output loggers, etc.)
+	│   ├── instruments --> User defined instruments that can be registered to the framework.
+	│   ├── validate_examples --> Sources needed to wrap examples to run through our validation framework.
 	│   └── validation --> Sources for validation
 	│       ├── Validation specific files
 	│       ├── fixtures
@@ -236,6 +246,53 @@
 
 @subsection S2_2_changelog Changelog
 
+v20.02 Public major release
+ - Various bug fixes.
+ - Various optimisations.
+ - Added new data type QASYMM8_SIGNED support for:
+     - @ref CLDepthwiseConvolutionLayer
+     - @ref CLDepthwiseConvolutionLayer3x3
+     - @ref CLGEMMConvolutionLayer
+     - @ref CLGEMMLowpMatrixMultiplyCore
+     - @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
+     - @ref CLGEMMLowpMatrixMultiplyNativeKernel
+     - @ref NEActivationLayer
+     - @ref NEComparisonOperationKernel
+     - @ref NEConvolutionLayer
+     - @ref NEDepthwiseConvolutionLayer
+     - @ref NEDepthwiseConvolutionLayer3x3Kernel
+     - @ref NEDirectConvolutionLayerOutputStageKernel
+     - @ref NEElementwiseComparison
+     - @ref NEElementwiseMax
+     - @ref NEElementwiseMin
+     - @ref NEElementwiseSquaredDiff
+     - @ref NEFullyConnectedLayer
+     - @ref NEGEMMMatrixVectorMultiplyKernel
+     - @ref NEPixelWiseMultiplication
+     - @ref NEPoolingLayer
+     - @ref NEPReluLayer
+ - Added support for QSYMM8_PER_CHANNEL in:
+     - @ref NEDepthwiseConvolutionLayer3x3Kernel
+ - Added support for split sizes in:
+     - @ref CLSplit
+     - @ref NESplit
+ - New OpenCL kernels / functions:
+     - @ref CLFill
+     - @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel / @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
+ - New NEON kernels / functions:
+     - @ref NEFill
+     - @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel / @ref NEGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
+ - Deprecated NEON functions / interfaces:
+     - @ref CLDepthwiseConvolutionLayer3x3
+     - @ref NEDepthwiseConvolutionLayerOptimized
+     - @ref PoolingLayerInfo constructors without Data Layout.
+ - Added support for quantization with multiplier greater than 1 on NEON and CL.
+ - Added support for quantized inputs of type QASYMM8_SIGNED and QASYMM8 to @ref CLQuantizationLayer.
+ - Added the ability to build bootcode for bare metal.
+ - Added support for generating synthetic QASYMM8 graphs.
+ - Added support for F16 datatype in VGG16.
+ - Removed pre-built binaries for GLES.
+
 v19.11.1 Public maintenance release
  - Fix offset calculation in NEReductionOperationKernel.
  - Fix data layout in NEScaleKernel for nhwc.
@@ -1221,6 +1278,7 @@
 @note If compiling using static libraries, this order must be followed when linking: arm_compute_graph_static, arm_compute, arm_compute_core
 
 @note These two commands assume libarm_compute.so is available in your library path, if not add the path to it using -L
+@note You might need to export the path to OpenCL library as well in your LD_LIBRARY_PATH if Compute Library was build with OpenCL enabled.
 
 To run the built executable simply run:
 
@@ -1242,25 +1300,25 @@
 @subsection S3_3_android Building for Android
 
 For Android, the library was successfully built and tested using Google's standalone toolchains:
- - clang++ from NDK r17b for armv7a
- - clang++ from NDK r17b for arm64-v8a
+ - clang++ from NDK r17c for armv7a
+ - clang++ from NDK r17c for arm64-v8a
  - clang++ from NDK r18-beta1 for arm64-v8.2-a with FP16 support
 
 Here is a guide to <a href="https://developer.android.com/ndk/guides/standalone_toolchain.html">create your Android standalone toolchains from the NDK</a>
 
-- Download the NDK r17b from here: https://developer.android.com/ndk/downloads/index.html
+- Download the NDK r17c from here: https://developer.android.com/ndk/downloads/index.html
 - Make sure you have Python 2.7 installed on your machine.
 - Generate the 32 and/or 64 toolchains by running the following commands:
 
 
-	$NDK/build/tools/make_standalone_toolchain.py --arch arm64 --install-dir $MY_TOOLCHAINS/aarch64-linux-android-ndk-r17b --stl libc++ --api 21
-	$NDK/build/tools/make_standalone_toolchain.py --arch arm --install-dir $MY_TOOLCHAINS/arm-linux-android-ndk-r17b --stl libc++ --api 21
+	$NDK/build/tools/make_standalone_toolchain.py --arch arm64 --install-dir $MY_TOOLCHAINS/aarch64-linux-android-ndk-r17c --stl libc++ --api 21
+	$NDK/build/tools/make_standalone_toolchain.py --arch arm --install-dir $MY_TOOLCHAINS/arm-linux-android-ndk-r17c --stl libc++ --api 21
 
 @attention We used to use gnustl but as of NDK r17 it is deprecated so we switched to libc++
 
 @note Make sure to add the toolchains to your PATH:
 
-	export PATH=$PATH:$MY_TOOLCHAINS/aarch64-linux-android-ndk-r17b/bin:$MY_TOOLCHAINS/arm-linux-android-ndk-r17b/bin
+	export PATH=$PATH:$MY_TOOLCHAINS/aarch64-linux-android-ndk-r17c/bin:$MY_TOOLCHAINS/arm-linux-android-ndk-r17c/bin
 
 @subsubsection S3_3_1_library How to build the library ?
commit	36ccc90158c65844b3c53c5462c6d7d852b97278	[log] [tgz]
author	Jenkins <bsgcomp@arm.com>	Fri Feb 21 11:10:48 2020 +0000
committer	Jenkins <bsgcomp@arm.com>	Fri Feb 21 11:10:48 2020 +0000
tree	71d95e6c8af0bb8ca909bdc96dc3954963eafb59
parent	7f09cf7dfba61aad6d6d588b1b096f56bafaad5e [diff] [blame]