common.h - platform/external/ruy - Gitiles

 /* Copyright 2019 Google LLC. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/

 // Miscellaneous helpers internal library.

 #ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_COMMON_H_
 #define TENSORFLOW_LITE_EXPERIMENTAL_RUY_COMMON_H_

 #include <atomic>
 #include <limits>
 #include <type_traits>
 #include <utility>

 #include "check_macros.h"
 #include "matrix.h"
 #include "opt_set.h"
 #include "path.h"
 #include "size_util.h"

 #ifdef __aarch64__
 #include <arm_neon.h>
 #endif

 #if RUY_OPT_SET & RUY_OPT_PREFETCH
 #define RUY_PREFETCH(X) X
 #else
 #define RUY_PREFETCH(X)
 #endif

 #define RUY_STR(s) RUY_STR_UNEXPANDED(s)
 #define RUY_STR_UNEXPANDED(s) #s

 namespace ruy {

 inline void MakeSimpleLayout(int rows, int cols, Order order, Layout* layout) {
   layout->rows = rows;
   layout->cols = cols;
   layout->order = order;
   layout->stride = order == Order::kColMajor ? rows : cols;
   layout->kernel.order = order;
   layout->kernel.rows = 1;
   layout->kernel.cols = 1;
 }

 inline bool IsLinear(const Layout& layout) {
   return layout.kernel.rows == 1 && layout.kernel.cols == 1;
 }

 inline bool IsPacked(const Layout& layout) {
   if (layout.order == Order::kColMajor) {
     return layout.stride == layout.rows;
   } else {
     return layout.stride == layout.cols;
   }
 }

 inline bool IsPackedLinear(const Layout& layout) {
   return IsPacked(layout) && IsLinear(layout);
 }

 inline bool IsRowMajor(const Layout& layout) {
   return layout.order == Order::kRowMajor;
 }

 inline bool IsColMajor(const Layout& layout) {
   return layout.order == Order::kColMajor;
 }

 inline bool IsLinearColMajor(const Layout& layout) {
   return IsLinear(layout) && IsColMajor(layout);
 }

 inline bool IsPackedLinearColMajor(const Layout& layout) {
   return IsLinearColMajor(layout) && IsPacked(layout);
 }

 inline bool IsLinearRowMajor(const Layout& layout) {
   return IsLinear(layout) && IsRowMajor(layout);
 }

 inline bool IsPackedLinearRowMajor(const Layout& layout) {
   return IsLinearRowMajor(layout) && IsPacked(layout);
 }

 inline int FlatSize(const Layout& layout) {
   const int outerdim =
       layout.order == Order::kColMajor ? layout.cols : layout.rows;
   return layout.stride * outerdim;
 }

 // TODO(b/130417400) add a unit test
 inline int Offset(const Layout& layout, int row, int col) {
   // TODO(benoitjacob)  - should check this but this make the _slow tests take
   // 5x longer.  Find a mitigation like in Eigen with an 'internal' variant
   // bypassing the check?
   // RUY_DCHECK_GE(row, 0);
   // RUY_DCHECK_GE(col, 0);
   // RUY_DCHECK_LT(row, layout.rows);
   // RUY_DCHECK_LT(col, layout.cols);
   if (IsLinear(layout)) {
     int row_stride = layout.order == Order::kColMajor ? 1 : layout.stride;
     int col_stride = layout.order == Order::kRowMajor ? 1 : layout.stride;
     return row * row_stride + col * col_stride;
   } else {
     RUY_DCHECK(is_pot(layout.kernel.rows));
     RUY_DCHECK(is_pot(layout.kernel.cols));
     int row_outer = row & ~(layout.kernel.rows - 1);
     int col_outer = col & ~(layout.kernel.cols - 1);
     int row_stride_outer =
         layout.order == Order::kColMajor ? layout.kernel.cols : layout.stride;
     int col_stride_outer =
         layout.order == Order::kRowMajor ? layout.kernel.rows : layout.stride;
     int offset_outer =
         row_outer * row_stride_outer + col_outer * col_stride_outer;
     int row_inner = row - row_outer;
     int col_inner = col - col_outer;
     int row_stride_inner =
         layout.kernel.order == Order::kColMajor ? 1 : layout.kernel.cols;
     int col_stride_inner =
         layout.kernel.order == Order::kRowMajor ? 1 : layout.kernel.rows;
     int offset_inner =
         row_inner * row_stride_inner + col_inner * col_stride_inner;
     return offset_outer + offset_inner;
   }
 }

 template <typename Scalar>
 const Scalar* ElementPtr(const Matrix<Scalar>& mat, int row, int col) {
   return mat.data.get() + Offset(mat.layout, row, col);
 }

 template <typename Scalar>
 Scalar* ElementPtr(Matrix<Scalar>* mat, int row, int col) {
   return mat->data.get() + Offset(mat->layout, row, col);
 }

 template <typename Scalar>
 Scalar Element(const Matrix<Scalar>& mat, int row, int col) {
   return *ElementPtr(mat, row, col);
 }

 // We need this where we have multiple threads potentially writing concurrently
 // to the same memory location. That is currently the case for Pack (see
 // the comment in TrMulTask where Pack is called) and in tracing.
 //
 // This is a strict-aliasing violation. For nicer things, see C++20 atomic_ref
 // and the defunct N4013. (Thanks to hboehm@).
 template <typename T>
 void relaxed_atomic_store(T* ptr, T value) {
   static_assert(sizeof(std::atomic<T>) == sizeof(T), "");
   std::atomic<T>* atomic = reinterpret_cast<std::atomic<T>*>(ptr);
   RUY_DCHECK(atomic->is_lock_free());
   atomic->store(value, std::memory_order_relaxed);
 }

 template <typename Scalar>
 Scalar SymmetricZeroPoint() {
   if (std::is_floating_point<Scalar>::value) {
     return 0;
   }
   if (std::is_signed<Scalar>::value) {
     return 0;
   }
   return std::numeric_limits<Scalar>::max() / 2 + 1;
 }

 template <Path ThePath, typename LhsScalar, typename RhsScalar,
           typename DstScalar, typename Spec>
 struct TrMulImpl;

 template <Order tOrder, int tRows, int tCols>
 struct FixedKernelLayout {
   static constexpr Order kOrder = tOrder;
   static constexpr int kRows = tRows;
   static constexpr int kCols = tCols;
 };

 inline void Transpose(Order* order) {
   *order = *order == Order::kColMajor ? Order::kRowMajor : Order::kColMajor;
 }

 inline void Transpose(Layout* layout) {
   Transpose(&layout->order);
   Transpose(&layout->kernel.order);
   std::swap(layout->rows, layout->cols);
   std::swap(layout->kernel.rows, layout->kernel.cols);
 }

 template <typename Scalar>
 inline void Transpose(Matrix<Scalar>* matrix) {
   Transpose(&matrix->layout);
 }

 }  // namespace ruy

 #endif  // TENSORFLOW_LITE_EXPERIMENTAL_RUY_COMMON_H_
	/* Copyright 2019 Google LLC. All Rights Reserved.

	Licensed under the Apache License, Version 2.0 (the "License");
	you may not use this file except in compliance with the License.
	You may obtain a copy of the License at

	http://www.apache.org/licenses/LICENSE-2.0

	Unless required by applicable law or agreed to in writing, software
	distributed under the License is distributed on an "AS IS" BASIS,
	WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	See the License for the specific language governing permissions and
	limitations under the License.
	==============================================================================*/

	// Miscellaneous helpers internal library.

	#ifndef TENSORFLOW_LITE_EXPERIMENTAL_RUY_COMMON_H_
	#define TENSORFLOW_LITE_EXPERIMENTAL_RUY_COMMON_H_

	#include <atomic>
	#include <limits>
	#include <type_traits>
	#include <utility>

	#include "check_macros.h"
	#include "matrix.h"
	#include "opt_set.h"
	#include "path.h"
	#include "size_util.h"

	#ifdef __aarch64__
	#include <arm_neon.h>
	#endif

	#if RUY_OPT_SET & RUY_OPT_PREFETCH
	#define RUY_PREFETCH(X) X
	#else
	#define RUY_PREFETCH(X)
	#endif

	#define RUY_STR(s) RUY_STR_UNEXPANDED(s)
	#define RUY_STR_UNEXPANDED(s) #s

	namespace ruy {

	inline void MakeSimpleLayout(int rows, int cols, Order order, Layout* layout) {
	layout->rows = rows;
	layout->cols = cols;
	layout->order = order;
	layout->stride = order == Order::kColMajor ? rows : cols;
	layout->kernel.order = order;
	layout->kernel.rows = 1;
	layout->kernel.cols = 1;
	}

	inline bool IsLinear(const Layout& layout) {
	return layout.kernel.rows == 1 && layout.kernel.cols == 1;
	}

	inline bool IsPacked(const Layout& layout) {
	if (layout.order == Order::kColMajor) {
	return layout.stride == layout.rows;
	} else {
	return layout.stride == layout.cols;
	}
	}

	inline bool IsPackedLinear(const Layout& layout) {
	return IsPacked(layout) && IsLinear(layout);
	}

	inline bool IsRowMajor(const Layout& layout) {
	return layout.order == Order::kRowMajor;
	}

	inline bool IsColMajor(const Layout& layout) {
	return layout.order == Order::kColMajor;
	}

	inline bool IsLinearColMajor(const Layout& layout) {
	return IsLinear(layout) && IsColMajor(layout);
	}

	inline bool IsPackedLinearColMajor(const Layout& layout) {
	return IsLinearColMajor(layout) && IsPacked(layout);
	}

	inline bool IsLinearRowMajor(const Layout& layout) {
	return IsLinear(layout) && IsRowMajor(layout);
	}

	inline bool IsPackedLinearRowMajor(const Layout& layout) {
	return IsLinearRowMajor(layout) && IsPacked(layout);
	}

	inline int FlatSize(const Layout& layout) {
	const int outerdim =
	layout.order == Order::kColMajor ? layout.cols : layout.rows;
	return layout.stride * outerdim;
	}

	// TODO(b/130417400) add a unit test
	inline int Offset(const Layout& layout, int row, int col) {
	// TODO(benoitjacob) - should check this but this make the _slow tests take
	// 5x longer. Find a mitigation like in Eigen with an 'internal' variant
	// bypassing the check?
	// RUY_DCHECK_GE(row, 0);
	// RUY_DCHECK_GE(col, 0);
	// RUY_DCHECK_LT(row, layout.rows);
	// RUY_DCHECK_LT(col, layout.cols);
	if (IsLinear(layout)) {
	int row_stride = layout.order == Order::kColMajor ? 1 : layout.stride;
	int col_stride = layout.order == Order::kRowMajor ? 1 : layout.stride;
	return row * row_stride + col * col_stride;
	} else {
	RUY_DCHECK(is_pot(layout.kernel.rows));
	RUY_DCHECK(is_pot(layout.kernel.cols));
	int row_outer = row & ~(layout.kernel.rows - 1);
	int col_outer = col & ~(layout.kernel.cols - 1);
	int row_stride_outer =
	layout.order == Order::kColMajor ? layout.kernel.cols : layout.stride;
	int col_stride_outer =
	layout.order == Order::kRowMajor ? layout.kernel.rows : layout.stride;
	int offset_outer =
	row_outer * row_stride_outer + col_outer * col_stride_outer;
	int row_inner = row - row_outer;
	int col_inner = col - col_outer;
	int row_stride_inner =
	layout.kernel.order == Order::kColMajor ? 1 : layout.kernel.cols;
	int col_stride_inner =
	layout.kernel.order == Order::kRowMajor ? 1 : layout.kernel.rows;
	int offset_inner =
	row_inner * row_stride_inner + col_inner * col_stride_inner;
	return offset_outer + offset_inner;
	}
	}

	template <typename Scalar>
	const Scalar* ElementPtr(const Matrix<Scalar>& mat, int row, int col) {
	return mat.data.get() + Offset(mat.layout, row, col);
	}

	template <typename Scalar>
	Scalar* ElementPtr(Matrix<Scalar>* mat, int row, int col) {
	return mat->data.get() + Offset(mat->layout, row, col);
	}

	template <typename Scalar>
	Scalar Element(const Matrix<Scalar>& mat, int row, int col) {
	return *ElementPtr(mat, row, col);
	}

	// We need this where we have multiple threads potentially writing concurrently
	// to the same memory location. That is currently the case for Pack (see
	// the comment in TrMulTask where Pack is called) and in tracing.
	//
	// This is a strict-aliasing violation. For nicer things, see C++20 atomic_ref
	// and the defunct N4013. (Thanks to hboehm@).
	template <typename T>
	void relaxed_atomic_store(T* ptr, T value) {
	static_assert(sizeof(std::atomic<T>) == sizeof(T), "");
	std::atomic<T>* atomic = reinterpret_cast<std::atomic<T>*>(ptr);
	RUY_DCHECK(atomic->is_lock_free());
	atomic->store(value, std::memory_order_relaxed);
	}

	template <typename Scalar>
	Scalar SymmetricZeroPoint() {
	if (std::is_floating_point<Scalar>::value) {
	return 0;
	}
	if (std::is_signed<Scalar>::value) {
	return 0;
	}
	return std::numeric_limits<Scalar>::max() / 2 + 1;
	}

	template <Path ThePath, typename LhsScalar, typename RhsScalar,
	typename DstScalar, typename Spec>
	struct TrMulImpl;

	template <Order tOrder, int tRows, int tCols>
	struct FixedKernelLayout {
	static constexpr Order kOrder = tOrder;
	static constexpr int kRows = tRows;
	static constexpr int kCols = tCols;
	};

	inline void Transpose(Order* order) {
	order = order == Order::kColMajor ? Order::kRowMajor : Order::kColMajor;
	}

	inline void Transpose(Layout* layout) {
	Transpose(&layout->order);
	Transpose(&layout->kernel.order);
	std::swap(layout->rows, layout->cols);
	std::swap(layout->kernel.rows, layout->kernel.cols);
	}

	template <typename Scalar>
	inline void Transpose(Matrix<Scalar>* matrix) {
	Transpose(&matrix->layout);
	}

	} // namespace ruy

	#endif // TENSORFLOW_LITE_EXPERIMENTAL_RUY_COMMON_H_