Blame - eval/f32-exp.cc - platform/external/XNNPACK

blob: d8f7c4d40c64a217c04879d4fb692ad27ccff648 [file] [log] [blame]

Marat Dukhan	6adff4e	2019-10-14 18:32:07 -0700	[diff] [blame]	1	// Copyright 2019 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
				6	#include <algorithm>
				7	#include <cfloat>
				8	#include <cmath>
				9	#include <functional>
				10	#include <random>
				11	#include <vector>
				12
				13	#include <benchmark/benchmark.h>
				14	#include <fp16/fp16.h>
				15
				16	#include <xnnpack/AlignedAllocator.h>
				17	#include <xnnpack/common.h>
				18	#include <xnnpack/math-stubs.h>
				19
				20
				21	static void ExpError(benchmark::State& state,
				22	xnn_f32_unary_math_function exp,
				23	size_t tile_size)
				24	{
				25	// The smallest x for which expf(x) is non-zero (-0x1.9FE368p+6f).
				26	const uint32_t min_input = 0xC2CFF1B4;
				27	// The largest x for which expf(x) is finite (0x1.62E42Ep6f).
				28	const uint32_t max_input = 0x42B17217;
				29	// Number of tiles in one block of inputs/outputs. Combining multiple tiles in a block reduce function call overhead.
				30	const size_t num_tiles = 100;
				31
				32	double max_ulp_error = 0.0;
				33	std::vector<float, AlignedAllocator<float, 64>> x(tile_size * num_tiles);
				34	std::vector<float, AlignedAllocator<float, 64>> y(tile_size * num_tiles);
				35	for (auto _ : state) {
				36	for (uint32_t n = min_input; int32_t(n) < 0; n -= tile_size * num_tiles) {
				37	for (uint32_t i = 0; i < tile_size * num_tiles; i++) {
				38	x[i] = fp32_from_bits(std::max<uint32_t>(n - i, 0x80000000));
				39	}
				40	std::fill(y.begin(), y.end(), std::nanf(""));
				41
				42	exp(tile_size * num_tiles * sizeof(float), x.data(), y.data());
				43
				44	for (uint32_t i = 0; i < tile_size * num_tiles; i++) {
				45	const double y_ref = std::exp(double(x[i]));
				46	const double abs_error = std::abs(y_ref - double(y[i]));
				47	const float y_abs = std::abs(y_ref);
				48	const float y_ulp = fp32_from_bits(fp32_to_bits(y_abs) + 1) - y_abs;
				49	max_ulp_error = std::max<double>(max_ulp_error, abs_error / y_ulp);
				50	}
				51	}
				52	for (uint32_t n = 0; n < max_input; n += tile_size * num_tiles) {
				53	for (uint32_t i = 0; i < tile_size * num_tiles; i++) {
				54	x[i] = fp32_from_bits(std::min<uint32_t>(n + i, max_input));
				55	}
				56	std::fill(y.begin(), y.end(), std::nanf(""));
				57
				58	exp(tile_size * num_tiles * sizeof(float), x.data(), y.data());
				59
				60	for (uint32_t i = 0; i < tile_size * num_tiles; i++) {
				61	const double y_ref = std::exp(double(x[i]));
				62	const double abs_error = std::abs(y_ref - double(y[i]));
				63	const float y_abs = std::abs(y_ref);
				64	const float y_ulp = fp32_from_bits(fp32_to_bits(y_abs) + 1) - y_abs;
				65	max_ulp_error = std::max<double>(max_ulp_error, abs_error / y_ulp);
				66	}
				67	}
				68	}
				69
				70	state.counters["ULPERROR"] = benchmark::Counter(max_ulp_error);
				71	}
				72
				73	#if XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
Marat Dukhan	ffd6840	2019-11-15 15:19:11 -0800	[diff] [blame]	74	static void f32_exp__sse2_p5(benchmark::State& state) {
				75	ExpError(state, xnn_math_f32_exp__sse2_p5, 4);
Marat Dukhan	6adff4e	2019-10-14 18:32:07 -0700	[diff] [blame]	76	}
				77	static void f32_exp__avx2_perm_p3(benchmark::State& state) {
				78	ExpError(state, xnn_math_f32_exp__avx2_perm_p3, 8);
				79	}
				80	static void f32_exp__avx2_perm_p4(benchmark::State& state) {
				81	ExpError(state, xnn_math_f32_exp__avx2_perm_p4, 8);
				82	}
				83	static void f32_exp__avx2_p5(benchmark::State& state) {
				84	ExpError(state, xnn_math_f32_exp__avx2_p5, 8);
				85	}
Marat Dukhan	ffd6840	2019-11-15 15:19:11 -0800	[diff] [blame]	86	static void f32_exp__avx512f_perm2_p2(benchmark::State& state) {
				87	ExpError(state, xnn_math_f32_exp__avx512f_perm2_p2, 16);
				88	}
				89	static void f32_exp__avx512f_perm_p3(benchmark::State& state) {
				90	ExpError(state, xnn_math_f32_exp__avx512f_perm_p3, 16);
				91	}
				92	static void f32_exp__avx512f_p5_scalef(benchmark::State& state) {
				93	ExpError(state, xnn_math_f32_exp__avx512f_p5_scalef, 16);
				94	}
				95	static void f32_exp__avx512f_p5(benchmark::State& state) {
				96	ExpError(state, xnn_math_f32_exp__avx512f_p5, 16);
				97	}
Marat Dukhan	6adff4e	2019-10-14 18:32:07 -0700	[diff] [blame]	98
Marat Dukhan	ffd6840	2019-11-15 15:19:11 -0800	[diff] [blame]	99	BENCHMARK(f32_exp__sse2_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
Marat Dukhan	6adff4e	2019-10-14 18:32:07 -0700	[diff] [blame]	100	BENCHMARK(f32_exp__avx2_perm_p4)->Unit(benchmark::kMillisecond)->Iterations(1);
				101	BENCHMARK(f32_exp__avx2_perm_p3)->Unit(benchmark::kMillisecond)->Iterations(1);
				102	BENCHMARK(f32_exp__avx2_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
Marat Dukhan	ffd6840	2019-11-15 15:19:11 -0800	[diff] [blame]	103	BENCHMARK(f32_exp__avx512f_perm2_p2)->Unit(benchmark::kMillisecond)->Iterations(1);
				104	BENCHMARK(f32_exp__avx512f_perm_p3)->Unit(benchmark::kMillisecond)->Iterations(1);
				105	BENCHMARK(f32_exp__avx512f_p5_scalef)->Unit(benchmark::kMillisecond)->Iterations(1);
				106	BENCHMARK(f32_exp__avx512f_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
Marat Dukhan	6adff4e	2019-10-14 18:32:07 -0700	[diff] [blame]	107	#endif // XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				108
Marat Dukhan	797a8fe	2019-11-14 20:21:57 -0800	[diff] [blame]	109	#if XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				110	static void f32_exp__neonfma_lut64_p2(benchmark::State& state) {
				111	ExpError(state, xnn_math_f32_exp__neonfma_lut64_p2, 4);
				112	}
				113	static void f32_exp__neonfma_p5(benchmark::State& state) {
				114	ExpError(state, xnn_math_f32_exp__neonfma_p5, 4);
				115	}
				116
				117	BENCHMARK(f32_exp__neonfma_lut64_p2)->Unit(benchmark::kMillisecond)->Iterations(1);
				118	BENCHMARK(f32_exp__neonfma_p5)->Unit(benchmark::kMillisecond)->Iterations(1);
				119	#endif // XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				120
Marat Dukhan	6adff4e	2019-10-14 18:32:07 -0700	[diff] [blame]	121	#ifndef XNNPACK_BENCHMARK_NO_MAIN
				122	BENCHMARK_MAIN();
				123	#endif