Blame - bench/average-pooling.cc - platform/external/XNNPACK

blob: a22890027292d37ce1c4ce728a470383b32061e8 [file] [log] [blame]

XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	1	// Copyright (c) Facebook, Inc. and its affiliates.
				2	// All rights reserved.
				3	//
				4	// Copyright 2019 Google LLC
				5	//
				6	// This source code is licensed under the BSD-style license found in the
				7	// LICENSE file in the root directory of this source tree.
				8
				9	#include <algorithm>
				10	#include <cfloat>
				11	#include <cmath>
				12	#include <functional>
Marat Dukhan	5ce30d9	2020-04-14 03:31:26 -0700	[diff] [blame]	13	#include <limits>
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	14	#include <random>
				15	#include <vector>
				16
				17	#include <xnnpack.h>
				18
				19	#include <benchmark/benchmark.h>
Marat Dukhan	7a16d8b	2020-03-11 04:22:44 -0700	[diff] [blame]	20	#ifdef BENCHMARK_TENSORFLOW_LITE
				21	#include "flatbuffers/include/flatbuffers/flatbuffers.h"
				22	#include "tensorflow/lite/interpreter.h"
				23	#include "tensorflow/lite/kernels/register.h"
				24	#include "tensorflow/lite/model.h"
				25	#include "tensorflow/lite/schema/schema_generated.h"
				26	#include "tensorflow/lite/version.h"
				27	#endif // BENCHMARK_TENSORFLOW_LITE
Frank Barchard	bb4c18b	2019-09-30 11:05:52 -0700	[diff] [blame]	28	#include "bench/utils.h"
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	29
Chao Mei	c664027	2020-07-23 09:35:11 -0700	[diff] [blame]	30	#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan	08b7a97	2020-07-14 18:17:29 -0700	[diff] [blame]	31	static void xnnpack_average_pooling_qu8(benchmark::State& state, const char* net) {
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	32	const size_t batch_size = state.range(0);
				33	const size_t input_height = state.range(1);
				34	const size_t input_width = state.range(2);
				35	const size_t pooling_size = state.range(3);
				36	const size_t padding_size = state.range(4);
				37	const size_t stride = state.range(5);
				38	const size_t channels = state.range(6);
				39
				40	std::random_device random_device;
				41	auto rng = std::mt19937(random_device());
Marat Dukhan	44f0ca7	2020-08-02 21:46:58 -0700	[diff] [blame^]	42	auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	43
				44	const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1;
				45	const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1;
				46
				47	std::vector<uint8_t> input(batch_size * input_height * input_width * channels + XNN_EXTRA_BYTES / sizeof(uint8_t));
				48	std::generate(input.begin(), input.end(), std::ref(u8rng));
				49	std::vector<uint8_t> output(batch_size * output_height * output_width * channels);
				50	std::fill(output.begin(), output.end(), 0xA5);
				51
Marat Dukhan	04f03be	2019-11-19 12:36:47 -0800	[diff] [blame]	52	xnn_status status = xnn_initialize(nullptr /* allocator */);
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	53	if (status != xnn_status_success) {
				54	state.SkipWithError("failed to initialize XNNPACK");
				55	return;
				56	}
				57
				58	xnn_operator_t pooling_op = nullptr;
Marat Dukhan	08b7a97	2020-07-14 18:17:29 -0700	[diff] [blame]	59	status = xnn_create_average_pooling2d_nhwc_qu8(
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	60	padding_size, padding_size, padding_size, padding_size,
				61	pooling_size, pooling_size,
				62	stride, stride,
				63	channels, channels /* input pixel stride /, channels / output pixel stride */,
				64	127 /* input zero point /, 0.75f / input scale */,
				65	127 /* output zero point /, 1.25f / output scale */,
				66	0, 255,
				67	0 /* flags */, &pooling_op);
				68	if (status != xnn_status_success) {
				69	state.SkipWithError("failed to create Average Pooling operator");
				70	return;
				71	}
				72
Marat Dukhan	08b7a97	2020-07-14 18:17:29 -0700	[diff] [blame]	73	status = xnn_setup_average_pooling2d_nhwc_qu8(
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	74	pooling_op,
				75	batch_size, input_height, input_width,
				76	input.data(), output.data(),
				77	nullptr /* thread pool */);
				78	if (status != xnn_status_success) {
				79	state.SkipWithError("failed to setup Average Pooling operator");
				80	return;
				81	}
				82
				83	for (auto _ : state) {
				84	status = xnn_run_operator(pooling_op, nullptr /* thread pool */);
				85	if (status != xnn_status_success) {
				86	state.SkipWithError("failed to run Average Pooling operator");
				87	return;
				88	}
				89	}
				90
				91	status = xnn_delete_operator(pooling_op);
				92	if (status != xnn_status_success) {
				93	state.SkipWithError("failed to delete Average Pooling operator");
				94	return;
				95	}
				96	pooling_op = nullptr;
				97
Frank Barchard	bb4c18b	2019-09-30 11:05:52 -0700	[diff] [blame]	98	state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
				99
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	100	state.counters["bytes"] = benchmark::Counter(
				101	uint64_t(state.iterations()) *
				102	batch_size * (input_height * input_width + output_height * output_width) * channels * sizeof(uint8_t),
				103	benchmark::Counter::kIsRate);
				104	}
Chao Mei	c664027	2020-07-23 09:35:11 -0700	[diff] [blame]	105	#endif // XNN_NO_QU8_OPERATORS
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	106
Marat Dukhan	7a16d8b	2020-03-11 04:22:44 -0700	[diff] [blame]	107	static void xnnpack_average_pooling_f32(benchmark::State& state, const char* net) {
				108	const size_t batch_size = state.range(0);
				109	const size_t input_height = state.range(1);
				110	const size_t input_width = state.range(2);
				111	const size_t pooling_size = state.range(3);
				112	const size_t padding_size = state.range(4);
				113	const size_t stride = state.range(5);
				114	const size_t channels = state.range(6);
				115
				116	std::random_device random_device;
				117	auto rng = std::mt19937(random_device());
Marat Dukhan	44f0ca7	2020-08-02 21:46:58 -0700	[diff] [blame^]	118	auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
Marat Dukhan	7a16d8b	2020-03-11 04:22:44 -0700	[diff] [blame]	119
				120	const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1;
				121	const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1;
				122
				123	std::vector<float> input(batch_size * input_height * input_width * channels + XNN_EXTRA_BYTES / sizeof(float));
				124	std::generate(input.begin(), input.end(), std::ref(f32rng));
				125	std::vector<float> output(batch_size * output_height * output_width * channels);
				126	std::fill(output.begin(), output.end(), std::nanf(""));
				127
				128	xnn_status status = xnn_initialize(nullptr /* allocator */);
				129	if (status != xnn_status_success) {
				130	state.SkipWithError("failed to initialize XNNPACK");
				131	return;
				132	}
				133
				134	xnn_operator_t pooling_op = nullptr;
				135	status = xnn_create_average_pooling2d_nhwc_f32(
				136	padding_size, padding_size, padding_size, padding_size,
				137	pooling_size, pooling_size,
				138	stride, stride,
				139	channels, channels /* input pixel stride /, channels / output pixel stride */,
				140	-std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity(),
				141	0 /* flags */, &pooling_op);
				142	if (status != xnn_status_success) {
				143	state.SkipWithError("failed to create Average Pooling operator");
				144	return;
				145	}
				146
				147	status = xnn_setup_average_pooling2d_nhwc_f32(
				148	pooling_op,
				149	batch_size, input_height, input_width,
				150	input.data(), output.data(),
				151	nullptr /* thread pool */);
				152	if (status != xnn_status_success) {
				153	state.SkipWithError("failed to setup Average Pooling operator");
				154	return;
				155	}
				156
				157	for (auto _ : state) {
				158	status = xnn_run_operator(pooling_op, nullptr /* thread pool */);
				159	if (status != xnn_status_success) {
				160	state.SkipWithError("failed to run Average Pooling operator");
				161	return;
				162	}
				163	}
				164
				165	status = xnn_delete_operator(pooling_op);
				166	if (status != xnn_status_success) {
				167	state.SkipWithError("failed to delete Average Pooling operator");
				168	return;
				169	}
				170	pooling_op = nullptr;
				171
				172	state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
				173
				174	state.counters["bytes"] = benchmark::Counter(
				175	uint64_t(state.iterations()) *
				176	batch_size * (input_height * input_width + output_height * output_width) * channels * sizeof(float),
				177	benchmark::Counter::kIsRate);
				178	}
				179
				180	#ifdef BENCHMARK_TENSORFLOW_LITE
				181	void tflite_average_pooling_f32(benchmark::State& state, const char* net) {
				182	const size_t batch_size = state.range(0);
				183	const size_t input_height = state.range(1);
				184	const size_t input_width = state.range(2);
				185	const size_t pooling_size = state.range(3);
				186	const size_t padding_size = state.range(4);
				187	const size_t stride = state.range(5);
				188	const size_t channels = state.range(6);
				189
				190	std::random_device random_device;
				191	auto rng = std::mt19937(random_device());
Marat Dukhan	44f0ca7	2020-08-02 21:46:58 -0700	[diff] [blame^]	192	auto f32rng = std::bind(std::uniform_real_distribution<float>(), std::ref(rng));
Marat Dukhan	7a16d8b	2020-03-11 04:22:44 -0700	[diff] [blame]	193
				194	tflite::Padding padding = tflite::Padding_VALID;
				195	if (2 * padding_size == (pooling_size - 1)) {
				196	padding = tflite::Padding_SAME;
				197	} else if (padding_size == 0) {
				198	padding = tflite::Padding_VALID;
				199	} else {
				200	state.SkipWithError("unsupported padding");
				201	return;
				202	}
				203
				204	const size_t output_height = (2 * padding_size + input_height - pooling_size) / stride + 1;
				205	const size_t output_width = (2 * padding_size + input_width - pooling_size) / stride + 1;
				206
				207	std::vector<float> input(batch_size * input_height * input_width * channels + XNN_EXTRA_BYTES / sizeof(float));
				208	std::generate(input.begin(), input.end(), std::ref(f32rng));
				209	std::vector<float> output(batch_size * output_height * output_width * channels);
				210	std::fill(output.begin(), output.end(), std::nanf(""));
				211
				212	flatbuffers::FlatBufferBuilder builder;
				213	flatbuffers::Offset<tflite::OperatorCode> operator_code =
				214	CreateOperatorCode(builder, tflite::BuiltinOperator_AVERAGE_POOL_2D);
				215
				216	flatbuffers::Offset<tflite::Pool2DOptions> pool2d_options = CreatePool2DOptions(
				217	builder, padding,
				218	stride /* stride_w /, stride / stride_h */,
				219	pooling_size /* filter_width /, pooling_size / filter_height */,
				220	tflite::ActivationFunctionType_NONE);
				221
				222	flatbuffers::Offset<tflite::Buffer> buffers[1] = {
				223	tflite::CreateBuffer(builder, builder.CreateVector({})),
				224	};
				225
				226	const int32_t input_shape[4] = {
				227	static_cast<int32_t>(batch_size),
				228	static_cast<int32_t>(input_height),
				229	static_cast<int32_t>(input_width),
				230	static_cast<int32_t>(channels)
				231	};
				232	const int32_t output_shape[4] = {
				233	static_cast<int32_t>(batch_size),
				234	static_cast<int32_t>(output_height),
				235	static_cast<int32_t>(output_width),
				236	static_cast<int32_t>(channels)
				237	};
				238
				239	flatbuffers::Offset<tflite::Tensor> tensors[2] = {
				240	tflite::CreateTensor(builder,
				241	builder.CreateVector<int32_t>(input_shape, 4),
				242	tflite::TensorType_FLOAT32),
				243	tflite::CreateTensor(builder,
				244	builder.CreateVector<int32_t>(output_shape, 4),
				245	tflite::TensorType_FLOAT32),
				246	};
				247
				248	const int32_t op_inputs[1] = { 0 };
				249	const int32_t op_outputs[1] = { 1 };
				250	flatbuffers::Offset<tflite::Operator> op = CreateOperator(
				251	builder,
				252	0 /* opcode_index */,
				253	builder.CreateVector<int32_t>(op_inputs, 1),
				254	builder.CreateVector<int32_t>(op_outputs, 1),
				255	tflite::BuiltinOptions_Pool2DOptions,
				256	pool2d_options.Union());
				257
				258	const int32_t graph_inputs[1] = { 0 };
				259	const int32_t graph_outputs[1] = { 1 };
				260	flatbuffers::Offset<tflite::SubGraph> subgraph = CreateSubGraph(
				261	builder,
				262	builder.CreateVector(tensors, 2),
				263	builder.CreateVector<int32_t>(graph_inputs, 1),
				264	builder.CreateVector<int32_t>(graph_outputs, 1),
				265	builder.CreateVector(&op, 1));
				266
				267	flatbuffers::Offset<tflite::Model> model_buffer = tflite::CreateModel(builder,
				268	TFLITE_SCHEMA_VERSION,
				269	builder.CreateVector(&operator_code, 1),
				270	builder.CreateVector(&subgraph, 1),
				271	builder.CreateString("AVERAGE_POOL_2D model"),
				272	builder.CreateVector(buffers, 1));
				273
				274	builder.Finish(model_buffer);
				275
				276	const tflite::Model* model = tflite::GetModel(builder.GetBufferPointer());
				277	tflite::ops::builtin::BuiltinOpResolver resolver;
				278	tflite::InterpreterBuilder interpreterBuilder(model, resolver);
				279	std::unique_ptr<tflite::Interpreter> interpreter;
				280	if (interpreterBuilder(&interpreter) != kTfLiteOk) {
				281	state.SkipWithError("failed to create TFLite interpreter");
				282	return;
				283	}
				284	if (interpreter == nullptr) {
				285	state.SkipWithError("TFLite interpreter is null");
				286	return;
				287	}
				288	interpreter->SetNumThreads(1);
				289
				290	if (interpreter->AllocateTensors() != kTfLiteOk) {
				291	state.SkipWithError("failed to allocate tensors");
				292	return;
				293	}
				294
				295	std::generate(
				296	interpreter->typed_tensor<float>(0),
				297	interpreter->typed_tensor<float>(0) + batch_size * input_height * input_width * channels,
				298	std::ref(f32rng));
				299
				300	for (auto _ : state) {
				301	if (interpreter->Invoke() != kTfLiteOk) {
				302	state.SkipWithError("failed to invoke TFLite interpreter");
				303	return;
				304	}
				305	}
				306
				307	state.counters["Freq"] = benchmark::utils::GetCurrentCpuFrequency();
				308
				309	state.counters["bytes"] = benchmark::Counter(
				310	uint64_t(state.iterations()) *
				311	batch_size * (input_height * input_width + output_height * output_width) * channels * sizeof(float),
				312	benchmark::Counter::kIsRate);
				313	}
				314	#endif // BENCHMARK_TENSORFLOW_LITE
				315
				316	// Final global average pooling in ImageNet classification models.
				317	static void ImageNet(benchmark::internal::Benchmark* b) {
				318	b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
				319
				320	/* N H W K P S C */
				321	b->Args({1, 13, 13, 13, 0, 1, 1000});
				322	b->Args({1, 7, 7, 7, 0, 1, 1000});
				323	}
				324
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	325	// ShuffleNet v1 with 1 group.
				326	static void ShuffleNetV1G1(benchmark::internal::Benchmark* b) {
				327	b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
				328
				329	/* N H W K P S C */
				330	b->Args({1, 56, 56, 3, 1, 2, 24});
				331	b->Args({1, 28, 28, 3, 1, 2, 144});
				332	b->Args({1, 14, 14, 3, 1, 2, 288});
				333	b->Args({1, 7, 7, 3, 1, 2, 576});
				334	}
				335
				336	// ShuffleNet v1 with 2 groups.
				337	static void ShuffleNetV1G2(benchmark::internal::Benchmark* b) {
				338	b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
				339
				340	/* N H W K P S C */
				341	b->Args({1, 56, 56, 3, 1, 2, 24});
				342	b->Args({1, 28, 28, 3, 1, 2, 200});
				343	b->Args({1, 14, 14, 3, 1, 2, 400});
				344	b->Args({1, 7, 7, 3, 1, 2, 800});
				345	}
				346
				347	// ShuffleNet v1 with 3 groups.
				348	static void ShuffleNetV1G3(benchmark::internal::Benchmark* b) {
				349	b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
				350
				351	/* N H W K P S C */
				352	b->Args({1, 56, 56, 3, 1, 2, 24});
				353	b->Args({1, 28, 28, 3, 1, 2, 240});
				354	b->Args({1, 14, 14, 3, 1, 2, 480});
				355	b->Args({1, 7, 7, 3, 1, 2, 960});
				356	}
				357
				358	// ShuffleNet v1 with 4 groups.
				359	static void ShuffleNetV1G4(benchmark::internal::Benchmark* b) {
				360	b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
				361
				362	/* N H W K P S C */
				363	b->Args({1, 56, 56, 3, 1, 2, 24});
				364	b->Args({1, 28, 28, 3, 1, 2, 272});
				365	b->Args({1, 14, 14, 3, 1, 2, 576});
				366	b->Args({1, 7, 7, 3, 1, 2, 1088});
				367	}
				368
				369	// ShuffleNet v1 with 8 groups.
				370	static void ShuffleNetV1G8(benchmark::internal::Benchmark* b) {
				371	b->ArgNames({"N", "H", "W", "K", "P", "S", "C"});
				372
				373	/* N H W K P S C */
				374	b->Args({1, 56, 56, 3, 1, 2, 24});
				375	b->Args({1, 28, 28, 3, 1, 2, 384});
				376	b->Args({1, 14, 14, 3, 1, 2, 768});
				377	b->Args({1, 7, 7, 3, 1, 2, 1536});
				378	}
				379
Marat Dukhan	7a16d8b	2020-03-11 04:22:44 -0700	[diff] [blame]	380	BENCHMARK_CAPTURE(xnnpack_average_pooling_f32, imagenet, "ImageNet")->Apply(ImageNet)->UseRealTime();
				381	BENCHMARK_CAPTURE(xnnpack_average_pooling_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
				382	BENCHMARK_CAPTURE(xnnpack_average_pooling_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
				383	BENCHMARK_CAPTURE(xnnpack_average_pooling_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
				384	BENCHMARK_CAPTURE(xnnpack_average_pooling_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
				385	BENCHMARK_CAPTURE(xnnpack_average_pooling_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
				386
				387	#ifdef BENCHMARK_TENSORFLOW_LITE
				388	BENCHMARK_CAPTURE(tflite_average_pooling_f32, imagenet, "ImageNet")->Apply(ImageNet)->UseRealTime();
				389	BENCHMARK_CAPTURE(tflite_average_pooling_f32, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
				390	BENCHMARK_CAPTURE(tflite_average_pooling_f32, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
				391	BENCHMARK_CAPTURE(tflite_average_pooling_f32, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
				392	BENCHMARK_CAPTURE(tflite_average_pooling_f32, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
				393	BENCHMARK_CAPTURE(tflite_average_pooling_f32, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
				394	#endif // BENCHMARK_TENSORFLOW_LITE
				395
Chao Mei	c664027	2020-07-23 09:35:11 -0700	[diff] [blame]	396	#ifndef XNN_NO_QU8_OPERATORS
Marat Dukhan	08b7a97	2020-07-14 18:17:29 -0700	[diff] [blame]	397	BENCHMARK_CAPTURE(xnnpack_average_pooling_qu8, imagenet, "ImageNet")->Apply(ImageNet)->UseRealTime();
				398	BENCHMARK_CAPTURE(xnnpack_average_pooling_qu8, shufflenet_v1_g1, "ShuffleNet v1 (1 group)")->Apply(ShuffleNetV1G1)->UseRealTime();
				399	BENCHMARK_CAPTURE(xnnpack_average_pooling_qu8, shufflenet_v1_g2, "ShuffleNet v1 (2 groups)")->Apply(ShuffleNetV1G2)->UseRealTime();
				400	BENCHMARK_CAPTURE(xnnpack_average_pooling_qu8, shufflenet_v1_g3, "ShuffleNet v1 (3 groups)")->Apply(ShuffleNetV1G3)->UseRealTime();
				401	BENCHMARK_CAPTURE(xnnpack_average_pooling_qu8, shufflenet_v1_g4, "ShuffleNet v1 (4 groups)")->Apply(ShuffleNetV1G4)->UseRealTime();
				402	BENCHMARK_CAPTURE(xnnpack_average_pooling_qu8, shufflenet_v1_g8, "ShuffleNet v1 (8 groups)")->Apply(ShuffleNetV1G8)->UseRealTime();
Chao Mei	c664027	2020-07-23 09:35:11 -0700	[diff] [blame]	403	#endif // XNN_NO_QU8_OPERATORS
XNNPACK Team	b455b12	2019-09-27 18:10:33 -0700	[diff] [blame]	404
				405	#ifndef XNNPACK_BENCHMARK_NO_MAIN
				406	BENCHMARK_MAIN();
				407	#endif