Blame - eval/f32-roundne.cc - platform/external/XNNPACK

blob: 0e69d2f35b8f0d604cfc209ca2c2fcb39b785b87 [file] [log] [blame]

Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1	// Copyright 2020 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
Marat Dukhan	b929d77	2020-05-12 09:19:36 -0700	[diff] [blame]	6	#include <algorithm>
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	7	#include <cmath>
				8	#include <cstddef>
Marat Dukhan	b929d77	2020-05-12 09:19:36 -0700	[diff] [blame]	9	#include <cstdint>
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	10	#include <cstdlib>
				11	#include <iomanip>
				12	#include <ios>
				13	#include <vector>
				14
				15	#include <gtest/gtest.h>
				16
				17	#include <fp16.h>
				18
				19	#include <xnnpack/AlignedAllocator.h>
				20	#include <xnnpack/common.h>
				21	#include <xnnpack/math-stubs.h>
				22
				23
				24	constexpr int kBlockSize = 1024;
				25
				26	#if XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	27	TEST(ROUNDNE__SSE_ADDSUB, positive_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	28	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				29	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				30	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				31	for (uint32_t i = 0; i < kBlockSize; i++) {
				32	inputs[i] = fp32_from_bits(n + i);
				33	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	34	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	35	for (uint32_t i = 0; i < kBlockSize; i++) {
				36	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				37	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				38	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				39	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				40	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				41	}
				42	}
				43	}
				44
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	45	TEST(ROUNDNE__SSE_ADDSUB, negative_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	46	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				47	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				48	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				49	for (uint32_t i = 0; i < kBlockSize; i++) {
				50	inputs[i] = fp32_from_bits(n + i);
				51	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	52	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	53	for (uint32_t i = 0; i < kBlockSize; i++) {
				54	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				55	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				56	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				57	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				58	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				59	}
				60	}
				61	}
				62
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	63	TEST(ROUNDNE__SSE_ADDSUB, positive_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	64	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				65	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				66	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				67	for (uint32_t i = 0; i < kBlockSize; i++) {
				68	inputs[i] = fp32_from_bits(n + i);
				69	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	70	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	71	for (uint32_t i = 0; i < kBlockSize; i++) {
				72	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				73	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				74	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				75	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				76	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				77	}
				78	}
				79	}
				80
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	81	TEST(ROUNDNE__SSE_ADDSUB, negative_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	82	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				83	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				84	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				85	for (uint32_t i = 0; i < kBlockSize; i++) {
				86	inputs[i] = fp32_from_bits(n + i);
				87	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	88	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	89	for (uint32_t i = 0; i < kBlockSize; i++) {
				90	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				91	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				92	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				93	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				94	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				95	}
				96	}
				97	}
				98
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	99	TEST(ROUNDNE__SSE_ADDSUB, positive_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	100	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				101	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	102	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	103	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	104	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				105	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				106	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				107	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				108	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				109	}
				110
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	111	TEST(ROUNDNE__SSE_ADDSUB, negative_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	112	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				113	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	114	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	115	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	116	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				117	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				118	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				119	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				120	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				121	}
				122
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	123	TEST(ROUNDNE__SSE_ADDSUB, positive_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	124	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				125	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				126	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				127	for (uint32_t i = 0; i < kBlockSize; i++) {
				128	inputs[i] = fp32_from_bits(n + i);
				129	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	130	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	131	for (uint32_t i = 0; i < kBlockSize; i++) {
				132	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				133	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				134	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				135	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				136	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				137	}
				138	}
				139	}
				140
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	141	TEST(ROUNDNE__SSE_ADDSUB, negative_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	142	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				143	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				144	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				145	for (uint32_t i = 0; i < kBlockSize; i++) {
				146	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				147	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	148	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	149	for (uint32_t i = 0; i < kBlockSize; i++) {
				150	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				151	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				152	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				153	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				154	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				155	}
				156	}
				157	}
				158
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	159	TEST(ROUNDNE__SSE_ADDSUB, positive_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	160	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				161	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				162	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				163	for (uint32_t i = 0; i < kBlockSize; i++) {
				164	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				165	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	166	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	167	for (uint32_t i = 0; i < kBlockSize; i++) {
				168	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				169	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				170	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				171	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				172	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				173	}
				174	}
				175	}
				176
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	177	TEST(ROUNDNE__SSE_ADDSUB, negative_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	178	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				179	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				180	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				181	for (uint32_t i = 0; i < kBlockSize; i++) {
				182	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				183	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	184	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	185	for (uint32_t i = 0; i < kBlockSize; i++) {
				186	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				187	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				188	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				189	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				190	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				191	}
				192	}
				193	}
				194
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	195	TEST(ROUNDNE__SSE_ADDSUB, positive_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	196	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				197	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				198	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				199	for (uint32_t i = 0; i < kBlockSize; i++) {
				200	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				201	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	202	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	203	for (uint32_t i = 0; i < kBlockSize; i++) {
				204	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				205	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				206	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				207	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				208	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				209	}
				210	}
				211	}
				212
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	213	TEST(ROUNDNE__SSE_ADDSUB, negative_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	214	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				215	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				216	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				217	for (uint32_t i = 0; i < kBlockSize; i++) {
				218	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				219	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	220	xnn_math_f32_roundne__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	221	for (uint32_t i = 0; i < kBlockSize; i++) {
				222	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				223	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				224	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				225	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				226	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				227	}
				228	}
				229	}
				230	#endif // XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				231
				232	#if XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	233	TEST(ROUNDNE__SSE2_CVT, positive_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	234	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				235	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				236	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				237	for (uint32_t i = 0; i < kBlockSize; i++) {
				238	inputs[i] = fp32_from_bits(n + i);
				239	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	240	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	241	for (uint32_t i = 0; i < kBlockSize; i++) {
				242	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				243	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				244	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				245	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				246	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				247	}
				248	}
				249	}
				250
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	251	TEST(ROUNDNE__SSE2_CVT, negative_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	252	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				253	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				254	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				255	for (uint32_t i = 0; i < kBlockSize; i++) {
				256	inputs[i] = fp32_from_bits(n + i);
				257	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	258	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	259	for (uint32_t i = 0; i < kBlockSize; i++) {
				260	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				261	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				262	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				263	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				264	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				265	}
				266	}
				267	}
				268
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	269	TEST(ROUNDNE__SSE2_CVT, positive_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	270	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				271	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				272	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				273	for (uint32_t i = 0; i < kBlockSize; i++) {
				274	inputs[i] = fp32_from_bits(n + i);
				275	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	276	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	277	for (uint32_t i = 0; i < kBlockSize; i++) {
				278	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				279	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				280	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				281	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				282	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				283	}
				284	}
				285	}
				286
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	287	TEST(ROUNDNE__SSE2_CVT, negative_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	288	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				289	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				290	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				291	for (uint32_t i = 0; i < kBlockSize; i++) {
				292	inputs[i] = fp32_from_bits(n + i);
				293	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	294	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	295	for (uint32_t i = 0; i < kBlockSize; i++) {
				296	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				297	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				298	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				299	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				300	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				301	}
				302	}
				303	}
				304
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	305	TEST(ROUNDNE__SSE2_CVT, positive_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	306	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				307	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	308	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	309	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	310	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				311	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				312	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				313	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				314	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				315	}
				316
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	317	TEST(ROUNDNE__SSE2_CVT, negative_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	318	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				319	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	320	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	321	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	322	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				323	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				324	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				325	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				326	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				327	}
				328
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	329	TEST(ROUNDNE__SSE2_CVT, positive_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	330	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				331	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				332	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				333	for (uint32_t i = 0; i < kBlockSize; i++) {
				334	inputs[i] = fp32_from_bits(n + i);
				335	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	336	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	337	for (uint32_t i = 0; i < kBlockSize; i++) {
				338	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				339	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				340	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				341	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				342	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				343	}
				344	}
				345	}
				346
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	347	TEST(ROUNDNE__SSE2_CVT, negative_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	348	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				349	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				350	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				351	for (uint32_t i = 0; i < kBlockSize; i++) {
				352	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				353	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	354	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	355	for (uint32_t i = 0; i < kBlockSize; i++) {
				356	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				357	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				358	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				359	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				360	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				361	}
				362	}
				363	}
				364
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	365	TEST(ROUNDNE__SSE2_CVT, positive_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	366	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				367	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				368	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				369	for (uint32_t i = 0; i < kBlockSize; i++) {
				370	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				371	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	372	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	373	for (uint32_t i = 0; i < kBlockSize; i++) {
				374	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				375	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				376	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				377	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				378	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				379	}
				380	}
				381	}
				382
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	383	TEST(ROUNDNE__SSE2_CVT, negative_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	384	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				385	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				386	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				387	for (uint32_t i = 0; i < kBlockSize; i++) {
				388	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				389	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	390	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	391	for (uint32_t i = 0; i < kBlockSize; i++) {
				392	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				393	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				394	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				395	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				396	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				397	}
				398	}
				399	}
				400
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	401	TEST(ROUNDNE__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	402	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				403	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				404	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				405	for (uint32_t i = 0; i < kBlockSize; i++) {
				406	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				407	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	408	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	409	for (uint32_t i = 0; i < kBlockSize; i++) {
				410	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				411	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				412	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				413	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				414	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				415	}
				416	}
				417	}
				418
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	419	TEST(ROUNDNE__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	420	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				421	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				422	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				423	for (uint32_t i = 0; i < kBlockSize; i++) {
				424	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				425	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	426	xnn_math_f32_roundne__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	427	for (uint32_t i = 0; i < kBlockSize; i++) {
				428	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				429	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				430	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				431	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				432	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				433	}
				434	}
				435	}
				436	#endif // XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				437
				438	#if XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	439	TEST(ROUNDNE__SSE41, positive_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	440	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				441	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				442	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				443	for (uint32_t i = 0; i < kBlockSize; i++) {
				444	inputs[i] = fp32_from_bits(n + i);
				445	}
				446	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				447	for (uint32_t i = 0; i < kBlockSize; i++) {
				448	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				449	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				450	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				451	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				452	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				453	}
				454	}
				455	}
				456
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	457	TEST(ROUNDNE__SSE41, negative_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	458	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				459	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				460	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				461	for (uint32_t i = 0; i < kBlockSize; i++) {
				462	inputs[i] = fp32_from_bits(n + i);
				463	}
				464	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				465	for (uint32_t i = 0; i < kBlockSize; i++) {
				466	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				467	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				468	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				469	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				470	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				471	}
				472	}
				473	}
				474
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	475	TEST(ROUNDNE__SSE41, positive_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	476	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				477	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				478	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				479	for (uint32_t i = 0; i < kBlockSize; i++) {
				480	inputs[i] = fp32_from_bits(n + i);
				481	}
				482	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				483	for (uint32_t i = 0; i < kBlockSize; i++) {
				484	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				485	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				486	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				487	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				488	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				489	}
				490	}
				491	}
				492
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	493	TEST(ROUNDNE__SSE41, negative_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	494	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				495	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				496	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				497	for (uint32_t i = 0; i < kBlockSize; i++) {
				498	inputs[i] = fp32_from_bits(n + i);
				499	}
				500	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				501	for (uint32_t i = 0; i < kBlockSize; i++) {
				502	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				503	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				504	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				505	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				506	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				507	}
				508	}
				509	}
				510
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	511	TEST(ROUNDNE__SSE41, positive_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	512	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				513	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	514	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	515	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				516	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				517	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				518	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				519	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				520	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				521	}
				522
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	523	TEST(ROUNDNE__SSE41, negative_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	524	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				525	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	526	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	527	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				528	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				529	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				530	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				531	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				532	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				533	}
				534
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	535	TEST(ROUNDNE__SSE41, positive_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	536	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				537	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				538	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				539	for (uint32_t i = 0; i < kBlockSize; i++) {
				540	inputs[i] = fp32_from_bits(n + i);
				541	}
				542	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				543	for (uint32_t i = 0; i < kBlockSize; i++) {
				544	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				545	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				546	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				547	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				548	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				549	}
				550	}
				551	}
				552
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	553	TEST(ROUNDNE__SSE41, negative_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	554	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				555	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				556	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				557	for (uint32_t i = 0; i < kBlockSize; i++) {
				558	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				559	}
				560	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				561	for (uint32_t i = 0; i < kBlockSize; i++) {
				562	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				563	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				564	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				565	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				566	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				567	}
				568	}
				569	}
				570
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	571	TEST(ROUNDNE__SSE41, positive_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	572	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				573	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				574	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				575	for (uint32_t i = 0; i < kBlockSize; i++) {
				576	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				577	}
				578	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				579	for (uint32_t i = 0; i < kBlockSize; i++) {
				580	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				581	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				582	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				583	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				584	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				585	}
				586	}
				587	}
				588
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	589	TEST(ROUNDNE__SSE41, negative_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	590	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				591	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				592	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				593	for (uint32_t i = 0; i < kBlockSize; i++) {
				594	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				595	}
				596	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				597	for (uint32_t i = 0; i < kBlockSize; i++) {
				598	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				599	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				600	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				601	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				602	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				603	}
				604	}
				605	}
				606
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	607	TEST(ROUNDNE__SSE41, positive_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	608	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				609	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				610	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				611	for (uint32_t i = 0; i < kBlockSize; i++) {
				612	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				613	}
				614	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				615	for (uint32_t i = 0; i < kBlockSize; i++) {
				616	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				617	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				618	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				619	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				620	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				621	}
				622	}
				623	}
				624
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	625	TEST(ROUNDNE__SSE41, negative_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	626	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				627	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				628	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				629	for (uint32_t i = 0; i < kBlockSize; i++) {
				630	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				631	}
				632	xnn_math_f32_roundne__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				633	for (uint32_t i = 0; i < kBlockSize; i++) {
				634	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				635	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				636	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				637	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				638	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				639	}
				640	}
				641	}
				642	#endif // XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				643
				644	#if XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	645	TEST(ROUNDNE__NEON_ADDSUB, positive_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	646	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				647	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				648	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				649	for (uint32_t i = 0; i < kBlockSize; i++) {
				650	inputs[i] = fp32_from_bits(n + i);
				651	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	652	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	653	for (uint32_t i = 0; i < kBlockSize; i++) {
				654	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				655	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				656	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				657	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				658	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				659	}
				660	}
				661	}
				662
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	663	TEST(ROUNDNE__NEON_ADDSUB, negative_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	664	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				665	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				666	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				667	for (uint32_t i = 0; i < kBlockSize; i++) {
				668	inputs[i] = fp32_from_bits(n + i);
				669	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	670	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	671	for (uint32_t i = 0; i < kBlockSize; i++) {
				672	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				673	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				674	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				675	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				676	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				677	}
				678	}
				679	}
				680
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	681	TEST(ROUNDNE__NEON_ADDSUB, positive_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	682	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				683	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				684	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				685	for (uint32_t i = 0; i < kBlockSize; i++) {
				686	inputs[i] = fp32_from_bits(n + i);
				687	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	688	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	689	for (uint32_t i = 0; i < kBlockSize; i++) {
				690	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				691	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				692	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				693	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				694	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				695	}
				696	}
				697	}
				698
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	699	TEST(ROUNDNE__NEON_ADDSUB, negative_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	700	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				701	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				702	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				703	for (uint32_t i = 0; i < kBlockSize; i++) {
				704	inputs[i] = fp32_from_bits(n + i);
				705	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	706	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	707	for (uint32_t i = 0; i < kBlockSize; i++) {
				708	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				709	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				710	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				711	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				712	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				713	}
				714	}
				715	}
				716
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	717	TEST(ROUNDNE__NEON_ADDSUB, positive_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	718	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				719	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	720	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	721	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	722	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				723	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				724	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				725	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				726	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				727	}
				728
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	729	TEST(ROUNDNE__NEON_ADDSUB, negative_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	730	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				731	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	732	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	733	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	734	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				735	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				736	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				737	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				738	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				739	}
				740
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	741	TEST(ROUNDNE__NEON_ADDSUB, positive_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	742	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				743	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				744	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				745	for (uint32_t i = 0; i < kBlockSize; i++) {
				746	inputs[i] = fp32_from_bits(n + i);
				747	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	748	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	749	for (uint32_t i = 0; i < kBlockSize; i++) {
				750	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				751	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				752	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				753	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				754	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				755	}
				756	}
				757	}
				758
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	759	TEST(ROUNDNE__NEON_ADDSUB, negative_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	760	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				761	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				762	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				763	for (uint32_t i = 0; i < kBlockSize; i++) {
				764	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				765	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	766	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	767	for (uint32_t i = 0; i < kBlockSize; i++) {
				768	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				769	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				770	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				771	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				772	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				773	}
				774	}
				775	}
				776
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	777	TEST(ROUNDNE__NEON_ADDSUB, positive_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	778	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				779	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				780	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				781	for (uint32_t i = 0; i < kBlockSize; i++) {
				782	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				783	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	784	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	785	for (uint32_t i = 0; i < kBlockSize; i++) {
				786	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				787	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				788	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				789	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				790	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				791	}
				792	}
				793	}
				794
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	795	TEST(ROUNDNE__NEON_ADDSUB, negative_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	796	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				797	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				798	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				799	for (uint32_t i = 0; i < kBlockSize; i++) {
				800	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				801	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	802	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	803	for (uint32_t i = 0; i < kBlockSize; i++) {
				804	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				805	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				806	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				807	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				808	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				809	}
				810	}
				811	}
				812
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	813	TEST(ROUNDNE__NEON_ADDSUB, positive_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	814	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				815	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				816	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				817	for (uint32_t i = 0; i < kBlockSize; i++) {
				818	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				819	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	820	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	821	for (uint32_t i = 0; i < kBlockSize; i++) {
				822	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				823	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				824	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				825	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				826	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				827	}
				828	}
				829	}
				830
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	831	TEST(ROUNDNE__NEON_ADDSUB, negative_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	832	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				833	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				834	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				835	for (uint32_t i = 0; i < kBlockSize; i++) {
				836	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				837	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	838	xnn_math_f32_roundne__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	839	for (uint32_t i = 0; i < kBlockSize; i++) {
				840	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				841	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				842	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				843	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				844	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				845	}
				846	}
				847	}
				848	#endif // XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				849
				850	#if XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	851	TEST(ROUNDNE__NEONV8, positive_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	852	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				853	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				854	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				855	for (uint32_t i = 0; i < kBlockSize; i++) {
				856	inputs[i] = fp32_from_bits(n + i);
				857	}
				858	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				859	for (uint32_t i = 0; i < kBlockSize; i++) {
				860	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				861	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				862	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				863	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				864	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				865	}
				866	}
				867	}
				868
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	869	TEST(ROUNDNE__NEONV8, negative_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	870	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				871	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				872	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				873	for (uint32_t i = 0; i < kBlockSize; i++) {
				874	inputs[i] = fp32_from_bits(n + i);
				875	}
				876	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				877	for (uint32_t i = 0; i < kBlockSize; i++) {
				878	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				879	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				880	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				881	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				882	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				883	}
				884	}
				885	}
				886
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	887	TEST(ROUNDNE__NEONV8, positive_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	888	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				889	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				890	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				891	for (uint32_t i = 0; i < kBlockSize; i++) {
				892	inputs[i] = fp32_from_bits(n + i);
				893	}
				894	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				895	for (uint32_t i = 0; i < kBlockSize; i++) {
				896	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				897	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				898	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				899	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				900	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				901	}
				902	}
				903	}
				904
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	905	TEST(ROUNDNE__NEONV8, negative_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	906	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				907	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				908	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				909	for (uint32_t i = 0; i < kBlockSize; i++) {
				910	inputs[i] = fp32_from_bits(n + i);
				911	}
				912	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				913	for (uint32_t i = 0; i < kBlockSize; i++) {
				914	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				915	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				916	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				917	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				918	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				919	}
				920	}
				921	}
				922
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	923	TEST(ROUNDNE__NEONV8, positive_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	924	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				925	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	926	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	927	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				928	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				929	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				930	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				931	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				932	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				933	}
				934
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	935	TEST(ROUNDNE__NEONV8, negative_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	936	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				937	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	938	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	939	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				940	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				941	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				942	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				943	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				944	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				945	}
				946
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	947	TEST(ROUNDNE__NEONV8, positive_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	948	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				949	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				950	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				951	for (uint32_t i = 0; i < kBlockSize; i++) {
				952	inputs[i] = fp32_from_bits(n + i);
				953	}
				954	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				955	for (uint32_t i = 0; i < kBlockSize; i++) {
				956	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				957	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				958	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				959	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				960	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				961	}
				962	}
				963	}
				964
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	965	TEST(ROUNDNE__NEONV8, negative_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	966	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				967	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				968	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				969	for (uint32_t i = 0; i < kBlockSize; i++) {
				970	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				971	}
				972	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				973	for (uint32_t i = 0; i < kBlockSize; i++) {
				974	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				975	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				976	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				977	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				978	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				979	}
				980	}
				981	}
				982
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	983	TEST(ROUNDNE__NEONV8, positive_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	984	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				985	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				986	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				987	for (uint32_t i = 0; i < kBlockSize; i++) {
				988	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				989	}
				990	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				991	for (uint32_t i = 0; i < kBlockSize; i++) {
				992	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				993	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				994	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				995	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				996	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				997	}
				998	}
				999	}
				1000
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	1001	TEST(ROUNDNE__NEONV8, negative_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1002	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1003	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1004	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1005	for (uint32_t i = 0; i < kBlockSize; i++) {
				1006	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1007	}
				1008	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1009	for (uint32_t i = 0; i < kBlockSize; i++) {
				1010	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1011	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1012	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1013	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1014	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1015	}
				1016	}
				1017	}
				1018
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	1019	TEST(ROUNDNE__NEONV8, positive_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1020	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1021	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1022	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1023	for (uint32_t i = 0; i < kBlockSize; i++) {
				1024	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1025	}
				1026	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1027	for (uint32_t i = 0; i < kBlockSize; i++) {
				1028	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1029	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1030	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1031	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1032	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1033	}
				1034	}
				1035	}
				1036
Marat Dukhan	4781dd0	2020-05-12 15:40:18 -0700	[diff] [blame]	1037	TEST(ROUNDNE__NEONV8, negative_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1038	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1039	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1040	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1041	for (uint32_t i = 0; i < kBlockSize; i++) {
				1042	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1043	}
				1044	xnn_math_f32_roundne__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1045	for (uint32_t i = 0; i < kBlockSize; i++) {
				1046	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1047	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1048	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1049	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1050	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1051	}
				1052	}
				1053	}
				1054	#endif // XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				1055
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1056	#if XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	d3f3d87	2020-06-24 13:08:25 -0700	[diff] [blame]	1057	TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_normal) {
				1058	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1059	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1060	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1061	for (uint32_t i = 0; i < kBlockSize; i++) {
				1062	inputs[i] = fp32_from_bits(n + i);
				1063	}
				1064	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1065	for (uint32_t i = 0; i < kBlockSize; i++) {
				1066	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1067	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1068	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1069	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1070	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1071	}
				1072	}
				1073	}
				1074
				1075	TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_normal) {
				1076	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1077	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1078	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1079	for (uint32_t i = 0; i < kBlockSize; i++) {
				1080	inputs[i] = fp32_from_bits(n + i);
				1081	}
				1082	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1083	for (uint32_t i = 0; i < kBlockSize; i++) {
				1084	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1085	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1086	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1087	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1088	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1089	}
				1090	}
				1091	}
				1092
				1093	TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_integral) {
				1094	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1095	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1096	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1097	for (uint32_t i = 0; i < kBlockSize; i++) {
				1098	inputs[i] = fp32_from_bits(n + i);
				1099	}
				1100	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1101	for (uint32_t i = 0; i < kBlockSize; i++) {
				1102	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1103	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1104	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1105	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1106	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1107	}
				1108	}
				1109	}
				1110
				1111	TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_integral) {
				1112	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1113	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1114	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1115	for (uint32_t i = 0; i < kBlockSize; i++) {
				1116	inputs[i] = fp32_from_bits(n + i);
				1117	}
				1118	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1119	for (uint32_t i = 0; i < kBlockSize; i++) {
				1120	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1121	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1122	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1123	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1124	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1125	}
				1126	}
				1127	}
				1128
				1129	TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_infinity) {
				1130	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1131	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1132	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
				1133	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1134	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				1135	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1136	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1137	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1138	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1139	}
				1140
				1141	TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_infinity) {
				1142	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1143	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1144	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
				1145	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1146	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				1147	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1148	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1149	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1150	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1151	}
				1152
				1153	TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_qnan) {
				1154	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1155	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1156	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1157	for (uint32_t i = 0; i < kBlockSize; i++) {
				1158	inputs[i] = fp32_from_bits(n + i);
				1159	}
				1160	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1161	for (uint32_t i = 0; i < kBlockSize; i++) {
				1162	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1163	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1164	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1165	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1166	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1167	}
				1168	}
				1169	}
				1170
				1171	TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_qnan) {
				1172	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1173	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1174	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1175	for (uint32_t i = 0; i < kBlockSize; i++) {
				1176	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				1177	}
				1178	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1179	for (uint32_t i = 0; i < kBlockSize; i++) {
				1180	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1181	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1182	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1183	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1184	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1185	}
				1186	}
				1187	}
				1188
				1189	TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan) {
				1190	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1191	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1192	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1193	for (uint32_t i = 0; i < kBlockSize; i++) {
				1194	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1195	}
				1196	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1197	for (uint32_t i = 0; i < kBlockSize; i++) {
				1198	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1199	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1200	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1201	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1202	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1203	}
				1204	}
				1205	}
				1206
				1207	TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan) {
				1208	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1209	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1210	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1211	for (uint32_t i = 0; i < kBlockSize; i++) {
				1212	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1213	}
				1214	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1215	for (uint32_t i = 0; i < kBlockSize; i++) {
				1216	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1217	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1218	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1219	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1220	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1221	}
				1222	}
				1223	}
				1224
				1225	TEST(ROUNDNE__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
				1226	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1227	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1228	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1229	for (uint32_t i = 0; i < kBlockSize; i++) {
				1230	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1231	}
				1232	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1233	for (uint32_t i = 0; i < kBlockSize; i++) {
				1234	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1235	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1236	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1237	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1238	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1239	}
				1240	}
				1241	}
				1242
				1243	TEST(ROUNDNE__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
				1244	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1245	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1246	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1247	for (uint32_t i = 0; i < kBlockSize; i++) {
				1248	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1249	}
				1250	xnn_math_f32_roundne__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1251	for (uint32_t i = 0; i < kBlockSize; i++) {
				1252	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1253	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1254	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1255	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1256	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1257	}
				1258	}
				1259	}
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1260	#endif // XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	d3f3d87	2020-06-24 13:08:25 -0700	[diff] [blame]	1261
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1262	#if XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	33b4f75	2021-09-03 10:53:53 -0700	[diff] [blame]	1263	TEST(ROUNDNE__WASMSIMD_NATIVE, positive_normal) {
				1264	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1265	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1266	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1267	for (uint32_t i = 0; i < kBlockSize; i++) {
				1268	inputs[i] = fp32_from_bits(n + i);
				1269	}
				1270	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1271	for (uint32_t i = 0; i < kBlockSize; i++) {
				1272	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1273	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1274	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1275	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1276	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1277	}
				1278	}
				1279	}
				1280
				1281	TEST(ROUNDNE__WASMSIMD_NATIVE, negative_normal) {
				1282	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1283	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1284	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1285	for (uint32_t i = 0; i < kBlockSize; i++) {
				1286	inputs[i] = fp32_from_bits(n + i);
				1287	}
				1288	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1289	for (uint32_t i = 0; i < kBlockSize; i++) {
				1290	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1291	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1292	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1293	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1294	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1295	}
				1296	}
				1297	}
				1298
				1299	TEST(ROUNDNE__WASMSIMD_NATIVE, positive_integral) {
				1300	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1301	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1302	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1303	for (uint32_t i = 0; i < kBlockSize; i++) {
				1304	inputs[i] = fp32_from_bits(n + i);
				1305	}
				1306	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1307	for (uint32_t i = 0; i < kBlockSize; i++) {
				1308	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1309	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1310	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1311	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1312	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1313	}
				1314	}
				1315	}
				1316
				1317	TEST(ROUNDNE__WASMSIMD_NATIVE, negative_integral) {
				1318	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1319	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1320	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1321	for (uint32_t i = 0; i < kBlockSize; i++) {
				1322	inputs[i] = fp32_from_bits(n + i);
				1323	}
				1324	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1325	for (uint32_t i = 0; i < kBlockSize; i++) {
				1326	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1327	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1328	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1329	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1330	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1331	}
				1332	}
				1333	}
				1334
				1335	TEST(ROUNDNE__WASMSIMD_NATIVE, positive_infinity) {
				1336	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1337	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1338	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
				1339	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1340	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				1341	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1342	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1343	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1344	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1345	}
				1346
				1347	TEST(ROUNDNE__WASMSIMD_NATIVE, negative_infinity) {
				1348	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1349	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1350	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
				1351	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1352	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				1353	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1354	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1355	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1356	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1357	}
				1358
				1359	TEST(ROUNDNE__WASMSIMD_NATIVE, positive_qnan) {
				1360	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1361	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1362	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1363	for (uint32_t i = 0; i < kBlockSize; i++) {
				1364	inputs[i] = fp32_from_bits(n + i);
				1365	}
				1366	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1367	for (uint32_t i = 0; i < kBlockSize; i++) {
				1368	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1369	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1370	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1371	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1372	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1373	}
				1374	}
				1375	}
				1376
				1377	TEST(ROUNDNE__WASMSIMD_NATIVE, negative_qnan) {
				1378	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1379	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1380	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1381	for (uint32_t i = 0; i < kBlockSize; i++) {
				1382	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				1383	}
				1384	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1385	for (uint32_t i = 0; i < kBlockSize; i++) {
				1386	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1387	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1388	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1389	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1390	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1391	}
				1392	}
				1393	}
				1394
				1395	TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan) {
				1396	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1397	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1398	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1399	for (uint32_t i = 0; i < kBlockSize; i++) {
				1400	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1401	}
				1402	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1403	for (uint32_t i = 0; i < kBlockSize; i++) {
				1404	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1405	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1406	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1407	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1408	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1409	}
				1410	}
				1411	}
				1412
				1413	TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan) {
				1414	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1415	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1416	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1417	for (uint32_t i = 0; i < kBlockSize; i++) {
				1418	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1419	}
				1420	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1421	for (uint32_t i = 0; i < kBlockSize; i++) {
				1422	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1423	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1424	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1425	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1426	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1427	}
				1428	}
				1429	}
				1430
				1431	TEST(ROUNDNE__WASMSIMD_NATIVE, positive_snan_to_qnan) {
				1432	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1433	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1434	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1435	for (uint32_t i = 0; i < kBlockSize; i++) {
				1436	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1437	}
				1438	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1439	for (uint32_t i = 0; i < kBlockSize; i++) {
				1440	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1441	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1442	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1443	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1444	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1445	}
				1446	}
				1447	}
				1448
				1449	TEST(ROUNDNE__WASMSIMD_NATIVE, negative_snan_to_qnan) {
				1450	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1451	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1452	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1453	for (uint32_t i = 0; i < kBlockSize; i++) {
				1454	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1455	}
				1456	xnn_math_f32_roundne__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1457	for (uint32_t i = 0; i < kBlockSize; i++) {
				1458	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1459	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1460	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1461	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1462	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1463	}
				1464	}
				1465	}
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1466	#endif // XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	33b4f75	2021-09-03 10:53:53 -0700	[diff] [blame]	1467
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1468	TEST(ROUNDNE__SCALAR_ADDSUB, positive_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1469	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1470	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1471	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1472	for (uint32_t i = 0; i < kBlockSize; i++) {
				1473	inputs[i] = fp32_from_bits(n + i);
				1474	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1475	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1476	for (uint32_t i = 0; i < kBlockSize; i++) {
				1477	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1478	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1479	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1480	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1481	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1482	}
				1483	}
				1484	}
				1485
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1486	TEST(ROUNDNE__SCALAR_ADDSUB, negative_normal) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1487	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1488	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1489	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1490	for (uint32_t i = 0; i < kBlockSize; i++) {
				1491	inputs[i] = fp32_from_bits(n + i);
				1492	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1493	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1494	for (uint32_t i = 0; i < kBlockSize; i++) {
				1495	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1496	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1497	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1498	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1499	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1500	}
				1501	}
				1502	}
				1503
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1504	TEST(ROUNDNE__SCALAR_ADDSUB, positive_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1505	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1506	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1507	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1508	for (uint32_t i = 0; i < kBlockSize; i++) {
				1509	inputs[i] = fp32_from_bits(n + i);
				1510	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1511	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1512	for (uint32_t i = 0; i < kBlockSize; i++) {
				1513	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1514	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1515	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1516	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1517	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1518	}
				1519	}
				1520	}
				1521
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1522	TEST(ROUNDNE__SCALAR_ADDSUB, negative_integral) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1523	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1524	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1525	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1526	for (uint32_t i = 0; i < kBlockSize; i++) {
				1527	inputs[i] = fp32_from_bits(n + i);
				1528	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1529	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1530	for (uint32_t i = 0; i < kBlockSize; i++) {
				1531	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1532	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1533	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1534	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1535	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1536	}
				1537	}
				1538	}
				1539
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1540	TEST(ROUNDNE__SCALAR_ADDSUB, positive_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1541	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1542	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	1543	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1544	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1545	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				1546	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1547	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1548	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1549	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1550	}
				1551
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1552	TEST(ROUNDNE__SCALAR_ADDSUB, negative_infinity) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1553	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1554	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	1555	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1556	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1557	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[0]));
				1558	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1559	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1560	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1561	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1562	}
				1563
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1564	TEST(ROUNDNE__SCALAR_ADDSUB, positive_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1565	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1566	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1567	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1568	for (uint32_t i = 0; i < kBlockSize; i++) {
				1569	inputs[i] = fp32_from_bits(n + i);
				1570	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1571	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1572	for (uint32_t i = 0; i < kBlockSize; i++) {
				1573	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1574	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1575	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1576	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1577	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1578	}
				1579	}
				1580	}
				1581
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1582	TEST(ROUNDNE__SCALAR_ADDSUB, negative_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1583	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1584	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1585	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1586	for (uint32_t i = 0; i < kBlockSize; i++) {
				1587	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				1588	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1589	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1590	for (uint32_t i = 0; i < kBlockSize; i++) {
				1591	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1592	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1593	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1594	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1595	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1596	}
				1597	}
				1598	}
				1599
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1600	TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1601	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1602	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1603	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1604	for (uint32_t i = 0; i < kBlockSize; i++) {
				1605	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1606	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1607	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1608	for (uint32_t i = 0; i < kBlockSize; i++) {
				1609	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1610	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1611	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1612	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1613	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1614	}
				1615	}
				1616	}
				1617
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1618	TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1619	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1620	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1621	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1622	for (uint32_t i = 0; i < kBlockSize; i++) {
				1623	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1624	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1625	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1626	for (uint32_t i = 0; i < kBlockSize; i++) {
				1627	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1628	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1629	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1630	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1631	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1632	}
				1633	}
				1634	}
				1635
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1636	TEST(ROUNDNE__SCALAR_ADDSUB, positive_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1637	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1638	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1639	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1640	for (uint32_t i = 0; i < kBlockSize; i++) {
				1641	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1642	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1643	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1644	for (uint32_t i = 0; i < kBlockSize; i++) {
				1645	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1646	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1647	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1648	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1649	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1650	}
				1651	}
				1652	}
				1653
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1654	TEST(ROUNDNE__SCALAR_ADDSUB, negative_snan_to_qnan) {
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1655	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1656	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1657	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1658	for (uint32_t i = 0; i < kBlockSize; i++) {
				1659	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1660	}
Marat Dukhan	075088a	2020-05-12 19:42:12 -0700	[diff] [blame]	1661	xnn_math_f32_roundne__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
Marat Dukhan	8853b82	2020-05-07 12:19:01 -0700	[diff] [blame]	1662	for (uint32_t i = 0; i < kBlockSize; i++) {
				1663	const uint32_t reference_output = fp32_to_bits(std::nearbyint(inputs[i]));
				1664	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1665	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1666	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1667	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1668	}
				1669	}
				1670	}