Blame - eval/f32-roundz.cc - platform/external/XNNPACK

blob: 5e2dc54239a67bb50f12a96a1e9cb9415fc392d3 [file] [log] [blame]

Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1	// Copyright 2020 Google LLC
				2	//
				3	// This source code is licensed under the BSD-style license found in the
				4	// LICENSE file in the root directory of this source tree.
				5
				6	#include <algorithm>
				7	#include <cmath>
				8	#include <cstddef>
				9	#include <cstdint>
				10	#include <cstdlib>
				11	#include <iomanip>
				12	#include <ios>
				13	#include <vector>
				14
				15	#include <gtest/gtest.h>
				16
				17	#include <fp16.h>
				18
				19	#include <xnnpack/AlignedAllocator.h>
				20	#include <xnnpack/common.h>
				21	#include <xnnpack/math-stubs.h>
				22
				23
				24	constexpr int kBlockSize = 1024;
				25
				26	#if XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				27	TEST(ROUNDZ__SSE_ADDSUB, positive_normal) {
				28	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				29	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				30	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				31	for (uint32_t i = 0; i < kBlockSize; i++) {
				32	inputs[i] = fp32_from_bits(n + i);
				33	}
				34	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				35	for (uint32_t i = 0; i < kBlockSize; i++) {
				36	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				37	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				38	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				39	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				40	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				41	}
				42	}
				43	}
				44
				45	TEST(ROUNDZ__SSE_ADDSUB, negative_normal) {
				46	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				47	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				48	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				49	for (uint32_t i = 0; i < kBlockSize; i++) {
				50	inputs[i] = fp32_from_bits(n + i);
				51	}
				52	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				53	for (uint32_t i = 0; i < kBlockSize; i++) {
				54	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				55	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				56	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				57	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				58	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				59	}
				60	}
				61	}
				62
				63	TEST(ROUNDZ__SSE_ADDSUB, positive_integral) {
				64	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				65	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				66	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				67	for (uint32_t i = 0; i < kBlockSize; i++) {
				68	inputs[i] = fp32_from_bits(n + i);
				69	}
				70	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				71	for (uint32_t i = 0; i < kBlockSize; i++) {
				72	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				73	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				74	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				75	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				76	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				77	}
				78	}
				79	}
				80
				81	TEST(ROUNDZ__SSE_ADDSUB, negative_integral) {
				82	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				83	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				84	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				85	for (uint32_t i = 0; i < kBlockSize; i++) {
				86	inputs[i] = fp32_from_bits(n + i);
				87	}
				88	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				89	for (uint32_t i = 0; i < kBlockSize; i++) {
				90	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				91	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				92	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				93	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				94	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				95	}
				96	}
				97	}
				98
				99	TEST(ROUNDZ__SSE_ADDSUB, positive_infinity) {
				100	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				101	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	102	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	103	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				104	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				105	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				106	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				107	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				108	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				109	}
				110
				111	TEST(ROUNDZ__SSE_ADDSUB, negative_infinity) {
				112	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				113	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	114	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	115	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				116	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				117	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				118	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				119	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				120	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				121	}
				122
				123	TEST(ROUNDZ__SSE_ADDSUB, positive_qnan) {
				124	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				125	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				126	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				127	for (uint32_t i = 0; i < kBlockSize; i++) {
				128	inputs[i] = fp32_from_bits(n + i);
				129	}
				130	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				131	for (uint32_t i = 0; i < kBlockSize; i++) {
				132	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				133	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				134	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				135	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				136	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				137	}
				138	}
				139	}
				140
				141	TEST(ROUNDZ__SSE_ADDSUB, negative_qnan) {
				142	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				143	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				144	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				145	for (uint32_t i = 0; i < kBlockSize; i++) {
				146	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				147	}
				148	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				149	for (uint32_t i = 0; i < kBlockSize; i++) {
				150	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				151	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				152	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				153	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				154	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				155	}
				156	}
				157	}
				158
				159	TEST(ROUNDZ__SSE_ADDSUB, positive_snan) {
				160	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				161	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				162	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				163	for (uint32_t i = 0; i < kBlockSize; i++) {
				164	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				165	}
				166	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				167	for (uint32_t i = 0; i < kBlockSize; i++) {
				168	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				169	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				170	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				171	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				172	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				173	}
				174	}
				175	}
				176
				177	TEST(ROUNDZ__SSE_ADDSUB, negative_snan) {
				178	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				179	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				180	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				181	for (uint32_t i = 0; i < kBlockSize; i++) {
				182	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				183	}
				184	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				185	for (uint32_t i = 0; i < kBlockSize; i++) {
				186	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				187	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				188	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				189	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				190	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				191	}
				192	}
				193	}
				194
				195	TEST(ROUNDZ__SSE_ADDSUB, positive_snan_to_qnan) {
				196	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				197	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				198	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				199	for (uint32_t i = 0; i < kBlockSize; i++) {
				200	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				201	}
				202	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				203	for (uint32_t i = 0; i < kBlockSize; i++) {
				204	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				205	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				206	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				207	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				208	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				209	}
				210	}
				211	}
				212
				213	TEST(ROUNDZ__SSE_ADDSUB, negative_snan_to_qnan) {
				214	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				215	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				216	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				217	for (uint32_t i = 0; i < kBlockSize; i++) {
				218	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				219	}
				220	xnn_math_f32_roundz__sse_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				221	for (uint32_t i = 0; i < kBlockSize; i++) {
				222	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				223	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				224	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				225	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				226	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				227	}
				228	}
				229	}
				230	#endif // XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				231
				232	#if XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				233	TEST(ROUNDZ__SSE2_CVT, positive_normal) {
				234	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				235	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				236	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				237	for (uint32_t i = 0; i < kBlockSize; i++) {
				238	inputs[i] = fp32_from_bits(n + i);
				239	}
				240	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				241	for (uint32_t i = 0; i < kBlockSize; i++) {
				242	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				243	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				244	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				245	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				246	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				247	}
				248	}
				249	}
				250
				251	TEST(ROUNDZ__SSE2_CVT, negative_normal) {
				252	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				253	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				254	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				255	for (uint32_t i = 0; i < kBlockSize; i++) {
				256	inputs[i] = fp32_from_bits(n + i);
				257	}
				258	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				259	for (uint32_t i = 0; i < kBlockSize; i++) {
				260	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				261	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				262	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				263	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				264	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				265	}
				266	}
				267	}
				268
				269	TEST(ROUNDZ__SSE2_CVT, positive_integral) {
				270	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				271	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				272	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				273	for (uint32_t i = 0; i < kBlockSize; i++) {
				274	inputs[i] = fp32_from_bits(n + i);
				275	}
				276	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				277	for (uint32_t i = 0; i < kBlockSize; i++) {
				278	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				279	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				280	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				281	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				282	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				283	}
				284	}
				285	}
				286
				287	TEST(ROUNDZ__SSE2_CVT, negative_integral) {
				288	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				289	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				290	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				291	for (uint32_t i = 0; i < kBlockSize; i++) {
				292	inputs[i] = fp32_from_bits(n + i);
				293	}
				294	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				295	for (uint32_t i = 0; i < kBlockSize; i++) {
				296	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				297	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				298	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				299	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				300	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				301	}
				302	}
				303	}
				304
				305	TEST(ROUNDZ__SSE2_CVT, positive_infinity) {
				306	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				307	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	308	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	309	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				310	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				311	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				312	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				313	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				314	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				315	}
				316
				317	TEST(ROUNDZ__SSE2_CVT, negative_infinity) {
				318	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				319	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	320	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	321	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				322	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				323	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				324	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				325	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				326	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				327	}
				328
				329	TEST(ROUNDZ__SSE2_CVT, positive_qnan) {
				330	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				331	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				332	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				333	for (uint32_t i = 0; i < kBlockSize; i++) {
				334	inputs[i] = fp32_from_bits(n + i);
				335	}
				336	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				337	for (uint32_t i = 0; i < kBlockSize; i++) {
				338	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				339	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				340	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				341	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				342	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				343	}
				344	}
				345	}
				346
				347	TEST(ROUNDZ__SSE2_CVT, negative_qnan) {
				348	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				349	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				350	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				351	for (uint32_t i = 0; i < kBlockSize; i++) {
				352	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				353	}
				354	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				355	for (uint32_t i = 0; i < kBlockSize; i++) {
				356	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				357	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				358	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				359	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				360	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				361	}
				362	}
				363	}
				364
				365	TEST(ROUNDZ__SSE2_CVT, positive_snan) {
				366	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				367	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				368	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				369	for (uint32_t i = 0; i < kBlockSize; i++) {
				370	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				371	}
				372	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				373	for (uint32_t i = 0; i < kBlockSize; i++) {
				374	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				375	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				376	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				377	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				378	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				379	}
				380	}
				381	}
				382
				383	TEST(ROUNDZ__SSE2_CVT, negative_snan) {
				384	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				385	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				386	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				387	for (uint32_t i = 0; i < kBlockSize; i++) {
				388	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				389	}
				390	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				391	for (uint32_t i = 0; i < kBlockSize; i++) {
				392	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				393	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				394	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				395	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				396	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				397	}
				398	}
				399	}
				400
				401	TEST(ROUNDZ__SSE2_CVT, DISABLED_positive_snan_to_qnan) {
				402	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				403	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				404	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				405	for (uint32_t i = 0; i < kBlockSize; i++) {
				406	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				407	}
				408	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				409	for (uint32_t i = 0; i < kBlockSize; i++) {
				410	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				411	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				412	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				413	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				414	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				415	}
				416	}
				417	}
				418
				419	TEST(ROUNDZ__SSE2_CVT, DISABLED_negative_snan_to_qnan) {
				420	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				421	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				422	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				423	for (uint32_t i = 0; i < kBlockSize; i++) {
				424	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				425	}
				426	xnn_math_f32_roundz__sse2_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				427	for (uint32_t i = 0; i < kBlockSize; i++) {
				428	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				429	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				430	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				431	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				432	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				433	}
				434	}
				435	}
				436	#endif // XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				437
				438	#if XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				439	TEST(ROUNDZ__SSE41, positive_normal) {
				440	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				441	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				442	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				443	for (uint32_t i = 0; i < kBlockSize; i++) {
				444	inputs[i] = fp32_from_bits(n + i);
				445	}
				446	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				447	for (uint32_t i = 0; i < kBlockSize; i++) {
				448	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				449	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				450	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				451	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				452	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				453	}
				454	}
				455	}
				456
				457	TEST(ROUNDZ__SSE41, negative_normal) {
				458	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				459	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				460	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				461	for (uint32_t i = 0; i < kBlockSize; i++) {
				462	inputs[i] = fp32_from_bits(n + i);
				463	}
				464	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				465	for (uint32_t i = 0; i < kBlockSize; i++) {
				466	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				467	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				468	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				469	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				470	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				471	}
				472	}
				473	}
				474
				475	TEST(ROUNDZ__SSE41, positive_integral) {
				476	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				477	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				478	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				479	for (uint32_t i = 0; i < kBlockSize; i++) {
				480	inputs[i] = fp32_from_bits(n + i);
				481	}
				482	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				483	for (uint32_t i = 0; i < kBlockSize; i++) {
				484	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				485	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				486	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				487	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				488	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				489	}
				490	}
				491	}
				492
				493	TEST(ROUNDZ__SSE41, negative_integral) {
				494	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				495	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				496	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				497	for (uint32_t i = 0; i < kBlockSize; i++) {
				498	inputs[i] = fp32_from_bits(n + i);
				499	}
				500	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				501	for (uint32_t i = 0; i < kBlockSize; i++) {
				502	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				503	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				504	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				505	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				506	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				507	}
				508	}
				509	}
				510
				511	TEST(ROUNDZ__SSE41, positive_infinity) {
				512	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				513	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	514	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	515	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				516	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				517	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				518	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				519	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				520	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				521	}
				522
				523	TEST(ROUNDZ__SSE41, negative_infinity) {
				524	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				525	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	526	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	527	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				528	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				529	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				530	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				531	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				532	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				533	}
				534
				535	TEST(ROUNDZ__SSE41, positive_qnan) {
				536	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				537	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				538	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				539	for (uint32_t i = 0; i < kBlockSize; i++) {
				540	inputs[i] = fp32_from_bits(n + i);
				541	}
				542	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				543	for (uint32_t i = 0; i < kBlockSize; i++) {
				544	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				545	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				546	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				547	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				548	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				549	}
				550	}
				551	}
				552
				553	TEST(ROUNDZ__SSE41, negative_qnan) {
				554	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				555	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				556	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				557	for (uint32_t i = 0; i < kBlockSize; i++) {
				558	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				559	}
				560	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				561	for (uint32_t i = 0; i < kBlockSize; i++) {
				562	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				563	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				564	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				565	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				566	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				567	}
				568	}
				569	}
				570
				571	TEST(ROUNDZ__SSE41, positive_snan) {
				572	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				573	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				574	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				575	for (uint32_t i = 0; i < kBlockSize; i++) {
				576	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				577	}
				578	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				579	for (uint32_t i = 0; i < kBlockSize; i++) {
				580	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				581	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				582	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				583	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				584	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				585	}
				586	}
				587	}
				588
				589	TEST(ROUNDZ__SSE41, negative_snan) {
				590	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				591	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				592	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				593	for (uint32_t i = 0; i < kBlockSize; i++) {
				594	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				595	}
				596	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				597	for (uint32_t i = 0; i < kBlockSize; i++) {
				598	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				599	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				600	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				601	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				602	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				603	}
				604	}
				605	}
				606
				607	TEST(ROUNDZ__SSE41, positive_snan_to_qnan) {
				608	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				609	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				610	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				611	for (uint32_t i = 0; i < kBlockSize; i++) {
				612	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				613	}
				614	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				615	for (uint32_t i = 0; i < kBlockSize; i++) {
				616	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				617	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				618	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				619	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				620	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				621	}
				622	}
				623	}
				624
				625	TEST(ROUNDZ__SSE41, negative_snan_to_qnan) {
				626	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				627	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				628	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				629	for (uint32_t i = 0; i < kBlockSize; i++) {
				630	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				631	}
				632	xnn_math_f32_roundz__sse41(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				633	for (uint32_t i = 0; i < kBlockSize; i++) {
				634	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				635	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				636	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				637	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				638	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				639	}
				640	}
				641	}
				642	#endif // XNN_ARCH_X86 \|\| XNN_ARCH_X86_64
				643
				644	#if XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				645	TEST(ROUNDZ__NEON_ADDSUB, positive_normal) {
				646	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				647	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				648	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				649	for (uint32_t i = 0; i < kBlockSize; i++) {
				650	inputs[i] = fp32_from_bits(n + i);
				651	}
				652	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				653	for (uint32_t i = 0; i < kBlockSize; i++) {
				654	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				655	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				656	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				657	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				658	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				659	}
				660	}
				661	}
				662
				663	TEST(ROUNDZ__NEON_ADDSUB, negative_normal) {
				664	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				665	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				666	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				667	for (uint32_t i = 0; i < kBlockSize; i++) {
				668	inputs[i] = fp32_from_bits(n + i);
				669	}
				670	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				671	for (uint32_t i = 0; i < kBlockSize; i++) {
				672	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				673	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				674	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				675	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				676	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				677	}
				678	}
				679	}
				680
				681	TEST(ROUNDZ__NEON_ADDSUB, positive_integral) {
				682	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				683	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				684	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				685	for (uint32_t i = 0; i < kBlockSize; i++) {
				686	inputs[i] = fp32_from_bits(n + i);
				687	}
				688	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				689	for (uint32_t i = 0; i < kBlockSize; i++) {
				690	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				691	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				692	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				693	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				694	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				695	}
				696	}
				697	}
				698
				699	TEST(ROUNDZ__NEON_ADDSUB, negative_integral) {
				700	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				701	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				702	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				703	for (uint32_t i = 0; i < kBlockSize; i++) {
				704	inputs[i] = fp32_from_bits(n + i);
				705	}
				706	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				707	for (uint32_t i = 0; i < kBlockSize; i++) {
				708	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				709	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				710	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				711	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				712	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				713	}
				714	}
				715	}
				716
				717	TEST(ROUNDZ__NEON_ADDSUB, positive_infinity) {
				718	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				719	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	720	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	721	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				722	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				723	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				724	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				725	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				726	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				727	}
				728
				729	TEST(ROUNDZ__NEON_ADDSUB, negative_infinity) {
				730	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				731	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	732	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	733	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				734	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				735	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				736	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				737	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				738	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				739	}
				740
				741	TEST(ROUNDZ__NEON_ADDSUB, positive_qnan) {
				742	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				743	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				744	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				745	for (uint32_t i = 0; i < kBlockSize; i++) {
				746	inputs[i] = fp32_from_bits(n + i);
				747	}
				748	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				749	for (uint32_t i = 0; i < kBlockSize; i++) {
				750	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				751	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				752	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				753	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				754	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				755	}
				756	}
				757	}
				758
				759	TEST(ROUNDZ__NEON_ADDSUB, negative_qnan) {
				760	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				761	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				762	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				763	for (uint32_t i = 0; i < kBlockSize; i++) {
				764	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				765	}
				766	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				767	for (uint32_t i = 0; i < kBlockSize; i++) {
				768	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				769	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				770	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				771	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				772	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				773	}
				774	}
				775	}
				776
				777	TEST(ROUNDZ__NEON_ADDSUB, positive_snan) {
				778	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				779	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				780	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				781	for (uint32_t i = 0; i < kBlockSize; i++) {
				782	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				783	}
				784	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				785	for (uint32_t i = 0; i < kBlockSize; i++) {
				786	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				787	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				788	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				789	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				790	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				791	}
				792	}
				793	}
				794
				795	TEST(ROUNDZ__NEON_ADDSUB, negative_snan) {
				796	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				797	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				798	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				799	for (uint32_t i = 0; i < kBlockSize; i++) {
				800	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				801	}
				802	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				803	for (uint32_t i = 0; i < kBlockSize; i++) {
				804	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				805	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				806	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				807	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				808	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				809	}
				810	}
				811	}
				812
				813	TEST(ROUNDZ__NEON_ADDSUB, positive_snan_to_qnan) {
				814	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				815	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				816	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				817	for (uint32_t i = 0; i < kBlockSize; i++) {
				818	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				819	}
				820	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				821	for (uint32_t i = 0; i < kBlockSize; i++) {
				822	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				823	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				824	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				825	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				826	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				827	}
				828	}
				829	}
				830
				831	TEST(ROUNDZ__NEON_ADDSUB, negative_snan_to_qnan) {
				832	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				833	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				834	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				835	for (uint32_t i = 0; i < kBlockSize; i++) {
				836	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				837	}
				838	xnn_math_f32_roundz__neon_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				839	for (uint32_t i = 0; i < kBlockSize; i++) {
				840	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				841	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				842	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				843	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				844	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				845	}
				846	}
				847	}
				848	#endif // XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				849
				850	#if XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				851	TEST(ROUNDZ__NEON_CVT, positive_normal) {
				852	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				853	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				854	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				855	for (uint32_t i = 0; i < kBlockSize; i++) {
				856	inputs[i] = fp32_from_bits(n + i);
				857	}
				858	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				859	for (uint32_t i = 0; i < kBlockSize; i++) {
				860	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				861	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				862	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				863	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				864	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				865	}
				866	}
				867	}
				868
				869	TEST(ROUNDZ__NEON_CVT, negative_normal) {
				870	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				871	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				872	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				873	for (uint32_t i = 0; i < kBlockSize; i++) {
				874	inputs[i] = fp32_from_bits(n + i);
				875	}
				876	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				877	for (uint32_t i = 0; i < kBlockSize; i++) {
				878	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				879	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				880	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				881	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				882	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				883	}
				884	}
				885	}
				886
				887	TEST(ROUNDZ__NEON_CVT, positive_integral) {
				888	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				889	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				890	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				891	for (uint32_t i = 0; i < kBlockSize; i++) {
				892	inputs[i] = fp32_from_bits(n + i);
				893	}
				894	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				895	for (uint32_t i = 0; i < kBlockSize; i++) {
				896	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				897	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				898	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				899	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				900	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				901	}
				902	}
				903	}
				904
				905	TEST(ROUNDZ__NEON_CVT, negative_integral) {
				906	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				907	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				908	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				909	for (uint32_t i = 0; i < kBlockSize; i++) {
				910	inputs[i] = fp32_from_bits(n + i);
				911	}
				912	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				913	for (uint32_t i = 0; i < kBlockSize; i++) {
				914	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				915	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				916	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				917	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				918	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				919	}
				920	}
				921	}
				922
				923	TEST(ROUNDZ__NEON_CVT, positive_infinity) {
				924	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				925	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	926	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	927	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				928	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				929	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				930	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				931	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				932	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				933	}
				934
				935	TEST(ROUNDZ__NEON_CVT, negative_infinity) {
				936	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				937	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	938	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	939	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				940	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				941	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				942	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				943	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				944	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				945	}
				946
				947	TEST(ROUNDZ__NEON_CVT, positive_qnan) {
				948	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				949	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				950	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				951	for (uint32_t i = 0; i < kBlockSize; i++) {
				952	inputs[i] = fp32_from_bits(n + i);
				953	}
				954	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				955	for (uint32_t i = 0; i < kBlockSize; i++) {
				956	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				957	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				958	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				959	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				960	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				961	}
				962	}
				963	}
				964
				965	TEST(ROUNDZ__NEON_CVT, negative_qnan) {
				966	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				967	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				968	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				969	for (uint32_t i = 0; i < kBlockSize; i++) {
				970	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				971	}
				972	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				973	for (uint32_t i = 0; i < kBlockSize; i++) {
				974	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				975	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				976	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				977	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				978	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				979	}
				980	}
				981	}
				982
				983	TEST(ROUNDZ__NEON_CVT, positive_snan) {
				984	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				985	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				986	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				987	for (uint32_t i = 0; i < kBlockSize; i++) {
				988	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				989	}
				990	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				991	for (uint32_t i = 0; i < kBlockSize; i++) {
				992	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				993	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				994	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				995	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				996	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				997	}
				998	}
				999	}
				1000
				1001	TEST(ROUNDZ__NEON_CVT, negative_snan) {
				1002	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1003	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1004	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1005	for (uint32_t i = 0; i < kBlockSize; i++) {
				1006	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1007	}
				1008	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1009	for (uint32_t i = 0; i < kBlockSize; i++) {
				1010	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1011	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1012	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1013	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1014	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1015	}
				1016	}
				1017	}
				1018
Marat Dukhan	03723f5	2020-05-13 00:59:24 -0700	[diff] [blame]	1019	TEST(ROUNDZ__NEON_CVT, DISABLED_positive_snan_to_qnan) {
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1020	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1021	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1022	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1023	for (uint32_t i = 0; i < kBlockSize; i++) {
				1024	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1025	}
				1026	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1027	for (uint32_t i = 0; i < kBlockSize; i++) {
				1028	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1029	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1030	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1031	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1032	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1033	}
				1034	}
				1035	}
				1036
Marat Dukhan	03723f5	2020-05-13 00:59:24 -0700	[diff] [blame]	1037	TEST(ROUNDZ__NEON_CVT, DISABLED_negative_snan_to_qnan) {
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1038	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1039	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1040	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1041	for (uint32_t i = 0; i < kBlockSize; i++) {
				1042	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1043	}
				1044	xnn_math_f32_roundz__neon_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1045	for (uint32_t i = 0; i < kBlockSize; i++) {
				1046	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1047	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1048	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1049	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1050	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1051	}
				1052	}
				1053	}
				1054	#endif // XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				1055
				1056	#if XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				1057	TEST(ROUNDZ__NEONV8, positive_normal) {
				1058	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1059	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1060	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1061	for (uint32_t i = 0; i < kBlockSize; i++) {
				1062	inputs[i] = fp32_from_bits(n + i);
				1063	}
				1064	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1065	for (uint32_t i = 0; i < kBlockSize; i++) {
				1066	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1067	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1068	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1069	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1070	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1071	}
				1072	}
				1073	}
				1074
				1075	TEST(ROUNDZ__NEONV8, negative_normal) {
				1076	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1077	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1078	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1079	for (uint32_t i = 0; i < kBlockSize; i++) {
				1080	inputs[i] = fp32_from_bits(n + i);
				1081	}
				1082	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1083	for (uint32_t i = 0; i < kBlockSize; i++) {
				1084	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1085	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1086	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1087	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1088	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1089	}
				1090	}
				1091	}
				1092
				1093	TEST(ROUNDZ__NEONV8, positive_integral) {
				1094	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1095	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1096	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1097	for (uint32_t i = 0; i < kBlockSize; i++) {
				1098	inputs[i] = fp32_from_bits(n + i);
				1099	}
				1100	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1101	for (uint32_t i = 0; i < kBlockSize; i++) {
				1102	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1103	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1104	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1105	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1106	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1107	}
				1108	}
				1109	}
				1110
				1111	TEST(ROUNDZ__NEONV8, negative_integral) {
				1112	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1113	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1114	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1115	for (uint32_t i = 0; i < kBlockSize; i++) {
				1116	inputs[i] = fp32_from_bits(n + i);
				1117	}
				1118	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1119	for (uint32_t i = 0; i < kBlockSize; i++) {
				1120	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1121	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1122	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1123	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1124	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1125	}
				1126	}
				1127	}
				1128
				1129	TEST(ROUNDZ__NEONV8, positive_infinity) {
				1130	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1131	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	1132	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1133	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1134	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1135	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1136	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1137	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1138	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1139	}
				1140
				1141	TEST(ROUNDZ__NEONV8, negative_infinity) {
				1142	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1143	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	1144	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1145	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1146	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1147	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1148	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1149	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1150	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1151	}
				1152
				1153	TEST(ROUNDZ__NEONV8, positive_qnan) {
				1154	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1155	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1156	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1157	for (uint32_t i = 0; i < kBlockSize; i++) {
				1158	inputs[i] = fp32_from_bits(n + i);
				1159	}
				1160	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1161	for (uint32_t i = 0; i < kBlockSize; i++) {
				1162	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1163	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1164	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1165	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1166	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1167	}
				1168	}
				1169	}
				1170
				1171	TEST(ROUNDZ__NEONV8, negative_qnan) {
				1172	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1173	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1174	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1175	for (uint32_t i = 0; i < kBlockSize; i++) {
				1176	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				1177	}
				1178	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1179	for (uint32_t i = 0; i < kBlockSize; i++) {
				1180	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1181	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1182	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1183	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1184	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1185	}
				1186	}
				1187	}
				1188
				1189	TEST(ROUNDZ__NEONV8, positive_snan) {
				1190	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1191	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1192	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1193	for (uint32_t i = 0; i < kBlockSize; i++) {
				1194	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1195	}
				1196	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1197	for (uint32_t i = 0; i < kBlockSize; i++) {
				1198	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1199	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1200	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1201	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1202	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1203	}
				1204	}
				1205	}
				1206
				1207	TEST(ROUNDZ__NEONV8, negative_snan) {
				1208	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1209	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1210	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1211	for (uint32_t i = 0; i < kBlockSize; i++) {
				1212	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1213	}
				1214	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1215	for (uint32_t i = 0; i < kBlockSize; i++) {
				1216	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1217	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1218	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1219	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1220	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1221	}
				1222	}
				1223	}
				1224
				1225	TEST(ROUNDZ__NEONV8, positive_snan_to_qnan) {
				1226	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1227	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1228	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1229	for (uint32_t i = 0; i < kBlockSize; i++) {
				1230	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1231	}
				1232	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1233	for (uint32_t i = 0; i < kBlockSize; i++) {
				1234	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1235	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1236	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1237	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1238	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1239	}
				1240	}
				1241	}
				1242
				1243	TEST(ROUNDZ__NEONV8, negative_snan_to_qnan) {
				1244	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1245	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1246	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1247	for (uint32_t i = 0; i < kBlockSize; i++) {
				1248	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1249	}
				1250	xnn_math_f32_roundz__neonv8(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1251	for (uint32_t i = 0; i < kBlockSize; i++) {
				1252	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1253	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1254	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1255	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1256	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1257	}
				1258	}
				1259	}
				1260	#endif // XNN_ARCH_ARM \|\| XNN_ARCH_ARM64
				1261
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1262	#if XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	d3f3d87	2020-06-24 13:08:25 -0700	[diff] [blame]	1263	TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_normal) {
				1264	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1265	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1266	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1267	for (uint32_t i = 0; i < kBlockSize; i++) {
				1268	inputs[i] = fp32_from_bits(n + i);
				1269	}
				1270	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1271	for (uint32_t i = 0; i < kBlockSize; i++) {
				1272	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1273	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1274	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1275	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1276	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1277	}
				1278	}
				1279	}
				1280
				1281	TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_normal) {
				1282	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1283	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1284	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1285	for (uint32_t i = 0; i < kBlockSize; i++) {
				1286	inputs[i] = fp32_from_bits(n + i);
				1287	}
				1288	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1289	for (uint32_t i = 0; i < kBlockSize; i++) {
				1290	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1291	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1292	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1293	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1294	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1295	}
				1296	}
				1297	}
				1298
				1299	TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_integral) {
				1300	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1301	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1302	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1303	for (uint32_t i = 0; i < kBlockSize; i++) {
				1304	inputs[i] = fp32_from_bits(n + i);
				1305	}
				1306	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1307	for (uint32_t i = 0; i < kBlockSize; i++) {
				1308	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1309	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1310	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1311	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1312	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1313	}
				1314	}
				1315	}
				1316
				1317	TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_integral) {
				1318	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1319	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1320	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1321	for (uint32_t i = 0; i < kBlockSize; i++) {
				1322	inputs[i] = fp32_from_bits(n + i);
				1323	}
				1324	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1325	for (uint32_t i = 0; i < kBlockSize; i++) {
				1326	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1327	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1328	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1329	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1330	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1331	}
				1332	}
				1333	}
				1334
				1335	TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_infinity) {
				1336	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1337	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1338	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
				1339	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1340	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1341	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1342	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1343	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1344	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1345	}
				1346
				1347	TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_infinity) {
				1348	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1349	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1350	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
				1351	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1352	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1353	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1354	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1355	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1356	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1357	}
				1358
				1359	TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_qnan) {
				1360	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1361	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1362	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1363	for (uint32_t i = 0; i < kBlockSize; i++) {
				1364	inputs[i] = fp32_from_bits(n + i);
				1365	}
				1366	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1367	for (uint32_t i = 0; i < kBlockSize; i++) {
				1368	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1369	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1370	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1371	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1372	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1373	}
				1374	}
				1375	}
				1376
				1377	TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_qnan) {
				1378	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1379	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1380	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1381	for (uint32_t i = 0; i < kBlockSize; i++) {
				1382	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				1383	}
				1384	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1385	for (uint32_t i = 0; i < kBlockSize; i++) {
				1386	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1387	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1388	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1389	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1390	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1391	}
				1392	}
				1393	}
				1394
				1395	TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan) {
				1396	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1397	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1398	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1399	for (uint32_t i = 0; i < kBlockSize; i++) {
				1400	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1401	}
				1402	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1403	for (uint32_t i = 0; i < kBlockSize; i++) {
				1404	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1405	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1406	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1407	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1408	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1409	}
				1410	}
				1411	}
				1412
				1413	TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan) {
				1414	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1415	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1416	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1417	for (uint32_t i = 0; i < kBlockSize; i++) {
				1418	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1419	}
				1420	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1421	for (uint32_t i = 0; i < kBlockSize; i++) {
				1422	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1423	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1424	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1425	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1426	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1427	}
				1428	}
				1429	}
				1430
				1431	TEST(ROUNDZ__WASMSIMD_ADDSUB, positive_snan_to_qnan) {
				1432	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1433	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1434	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1435	for (uint32_t i = 0; i < kBlockSize; i++) {
				1436	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1437	}
				1438	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1439	for (uint32_t i = 0; i < kBlockSize; i++) {
				1440	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1441	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1442	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1443	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1444	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1445	}
				1446	}
				1447	}
				1448
				1449	TEST(ROUNDZ__WASMSIMD_ADDSUB, negative_snan_to_qnan) {
				1450	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1451	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1452	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1453	for (uint32_t i = 0; i < kBlockSize; i++) {
				1454	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1455	}
				1456	xnn_math_f32_roundz__wasmsimd_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1457	for (uint32_t i = 0; i < kBlockSize; i++) {
				1458	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1459	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1460	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1461	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1462	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1463	}
				1464	}
				1465	}
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1466	#endif // XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	d3f3d87	2020-06-24 13:08:25 -0700	[diff] [blame]	1467
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1468	#if XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	88da62c	2020-07-15 17:56:09 -0700	[diff] [blame]	1469	TEST(ROUNDZ__WASMSIMD_CVT, positive_normal) {
				1470	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1471	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1472	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1473	for (uint32_t i = 0; i < kBlockSize; i++) {
				1474	inputs[i] = fp32_from_bits(n + i);
				1475	}
				1476	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1477	for (uint32_t i = 0; i < kBlockSize; i++) {
				1478	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1479	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1480	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1481	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1482	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1483	}
				1484	}
				1485	}
				1486
				1487	TEST(ROUNDZ__WASMSIMD_CVT, negative_normal) {
				1488	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1489	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1490	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1491	for (uint32_t i = 0; i < kBlockSize; i++) {
				1492	inputs[i] = fp32_from_bits(n + i);
				1493	}
				1494	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1495	for (uint32_t i = 0; i < kBlockSize; i++) {
				1496	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1497	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1498	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1499	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1500	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1501	}
				1502	}
				1503	}
				1504
				1505	TEST(ROUNDZ__WASMSIMD_CVT, positive_integral) {
				1506	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1507	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1508	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1509	for (uint32_t i = 0; i < kBlockSize; i++) {
				1510	inputs[i] = fp32_from_bits(n + i);
				1511	}
				1512	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1513	for (uint32_t i = 0; i < kBlockSize; i++) {
				1514	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1515	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1516	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1517	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1518	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1519	}
				1520	}
				1521	}
				1522
				1523	TEST(ROUNDZ__WASMSIMD_CVT, negative_integral) {
				1524	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1525	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1526	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1527	for (uint32_t i = 0; i < kBlockSize; i++) {
				1528	inputs[i] = fp32_from_bits(n + i);
				1529	}
				1530	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1531	for (uint32_t i = 0; i < kBlockSize; i++) {
				1532	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1533	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1534	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1535	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1536	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1537	}
				1538	}
				1539	}
				1540
				1541	TEST(ROUNDZ__WASMSIMD_CVT, positive_infinity) {
				1542	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1543	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1544	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
				1545	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1546	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1547	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1548	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1549	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1550	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1551	}
				1552
				1553	TEST(ROUNDZ__WASMSIMD_CVT, negative_infinity) {
				1554	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1555	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1556	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
				1557	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1558	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1559	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1560	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1561	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1562	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1563	}
				1564
				1565	TEST(ROUNDZ__WASMSIMD_CVT, positive_qnan) {
				1566	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1567	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1568	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1569	for (uint32_t i = 0; i < kBlockSize; i++) {
				1570	inputs[i] = fp32_from_bits(n + i);
				1571	}
				1572	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1573	for (uint32_t i = 0; i < kBlockSize; i++) {
				1574	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1575	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1576	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1577	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1578	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1579	}
				1580	}
				1581	}
				1582
				1583	TEST(ROUNDZ__WASMSIMD_CVT, negative_qnan) {
				1584	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1585	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1586	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1587	for (uint32_t i = 0; i < kBlockSize; i++) {
				1588	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				1589	}
				1590	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1591	for (uint32_t i = 0; i < kBlockSize; i++) {
				1592	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1593	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1594	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1595	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1596	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1597	}
				1598	}
				1599	}
				1600
				1601	TEST(ROUNDZ__WASMSIMD_CVT, positive_snan) {
				1602	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1603	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1604	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1605	for (uint32_t i = 0; i < kBlockSize; i++) {
				1606	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1607	}
				1608	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1609	for (uint32_t i = 0; i < kBlockSize; i++) {
				1610	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1611	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1612	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1613	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1614	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1615	}
				1616	}
				1617	}
				1618
				1619	TEST(ROUNDZ__WASMSIMD_CVT, negative_snan) {
				1620	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1621	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1622	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1623	for (uint32_t i = 0; i < kBlockSize; i++) {
				1624	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1625	}
				1626	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1627	for (uint32_t i = 0; i < kBlockSize; i++) {
				1628	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1629	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1630	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1631	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1632	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1633	}
				1634	}
				1635	}
				1636
				1637	TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_positive_snan_to_qnan) {
				1638	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1639	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1640	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1641	for (uint32_t i = 0; i < kBlockSize; i++) {
				1642	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1643	}
				1644	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1645	for (uint32_t i = 0; i < kBlockSize; i++) {
				1646	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1647	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1648	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1649	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1650	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1651	}
				1652	}
				1653	}
				1654
				1655	TEST(ROUNDZ__WASMSIMD_CVT, DISABLED_negative_snan_to_qnan) {
				1656	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1657	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1658	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1659	for (uint32_t i = 0; i < kBlockSize; i++) {
				1660	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1661	}
				1662	xnn_math_f32_roundz__wasmsimd_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1663	for (uint32_t i = 0; i < kBlockSize; i++) {
				1664	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1665	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1666	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1667	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1668	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1669	}
				1670	}
				1671	}
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1672	#endif // XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	88da62c	2020-07-15 17:56:09 -0700	[diff] [blame]	1673
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1674	#if XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	33b4f75	2021-09-03 10:53:53 -0700	[diff] [blame]	1675	TEST(ROUNDZ__WASMSIMD_NATIVE, positive_normal) {
				1676	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1677	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1678	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1679	for (uint32_t i = 0; i < kBlockSize; i++) {
				1680	inputs[i] = fp32_from_bits(n + i);
				1681	}
				1682	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1683	for (uint32_t i = 0; i < kBlockSize; i++) {
				1684	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1685	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1686	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1687	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1688	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1689	}
				1690	}
				1691	}
				1692
				1693	TEST(ROUNDZ__WASMSIMD_NATIVE, negative_normal) {
				1694	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1695	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1696	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1697	for (uint32_t i = 0; i < kBlockSize; i++) {
				1698	inputs[i] = fp32_from_bits(n + i);
				1699	}
				1700	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1701	for (uint32_t i = 0; i < kBlockSize; i++) {
				1702	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1703	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1704	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1705	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1706	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1707	}
				1708	}
				1709	}
				1710
				1711	TEST(ROUNDZ__WASMSIMD_NATIVE, positive_integral) {
				1712	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1713	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1714	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1715	for (uint32_t i = 0; i < kBlockSize; i++) {
				1716	inputs[i] = fp32_from_bits(n + i);
				1717	}
				1718	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1719	for (uint32_t i = 0; i < kBlockSize; i++) {
				1720	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1721	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1722	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1723	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1724	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1725	}
				1726	}
				1727	}
				1728
				1729	TEST(ROUNDZ__WASMSIMD_NATIVE, negative_integral) {
				1730	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1731	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1732	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1733	for (uint32_t i = 0; i < kBlockSize; i++) {
				1734	inputs[i] = fp32_from_bits(n + i);
				1735	}
				1736	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1737	for (uint32_t i = 0; i < kBlockSize; i++) {
				1738	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1739	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1740	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1741	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1742	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1743	}
				1744	}
				1745	}
				1746
				1747	TEST(ROUNDZ__WASMSIMD_NATIVE, positive_infinity) {
				1748	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1749	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1750	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
				1751	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1752	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1753	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1754	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1755	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1756	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1757	}
				1758
				1759	TEST(ROUNDZ__WASMSIMD_NATIVE, negative_infinity) {
				1760	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1761	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1762	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
				1763	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1764	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1765	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1766	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1767	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1768	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1769	}
				1770
				1771	TEST(ROUNDZ__WASMSIMD_NATIVE, positive_qnan) {
				1772	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1773	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1774	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1775	for (uint32_t i = 0; i < kBlockSize; i++) {
				1776	inputs[i] = fp32_from_bits(n + i);
				1777	}
				1778	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1779	for (uint32_t i = 0; i < kBlockSize; i++) {
				1780	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1781	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1782	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1783	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1784	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1785	}
				1786	}
				1787	}
				1788
				1789	TEST(ROUNDZ__WASMSIMD_NATIVE, negative_qnan) {
				1790	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1791	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1792	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1793	for (uint32_t i = 0; i < kBlockSize; i++) {
				1794	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				1795	}
				1796	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1797	for (uint32_t i = 0; i < kBlockSize; i++) {
				1798	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1799	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1800	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1801	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1802	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1803	}
				1804	}
				1805	}
				1806
				1807	TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan) {
				1808	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1809	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1810	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1811	for (uint32_t i = 0; i < kBlockSize; i++) {
				1812	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1813	}
				1814	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1815	for (uint32_t i = 0; i < kBlockSize; i++) {
				1816	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1817	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1818	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1819	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1820	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1821	}
				1822	}
				1823	}
				1824
				1825	TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan) {
				1826	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1827	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1828	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1829	for (uint32_t i = 0; i < kBlockSize; i++) {
				1830	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1831	}
				1832	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1833	for (uint32_t i = 0; i < kBlockSize; i++) {
				1834	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1835	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				1836	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1837	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1838	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1839	}
				1840	}
				1841	}
				1842
				1843	TEST(ROUNDZ__WASMSIMD_NATIVE, positive_snan_to_qnan) {
				1844	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1845	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1846	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1847	for (uint32_t i = 0; i < kBlockSize; i++) {
				1848	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1849	}
				1850	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1851	for (uint32_t i = 0; i < kBlockSize; i++) {
				1852	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1853	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1854	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1855	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1856	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1857	}
				1858	}
				1859	}
				1860
				1861	TEST(ROUNDZ__WASMSIMD_NATIVE, negative_snan_to_qnan) {
				1862	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1863	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1864	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				1865	for (uint32_t i = 0; i < kBlockSize; i++) {
				1866	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				1867	}
				1868	xnn_math_f32_roundz__wasmsimd_native(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1869	for (uint32_t i = 0; i < kBlockSize; i++) {
				1870	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1871	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1872	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1873	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1874	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1875	}
				1876	}
				1877	}
Marat Dukhan	4c61779	2021-12-21 15:47:58 -0800	[diff] [blame]	1878	#endif // XNN_ARCH_WASMSIMD \|\| XNN_ARCH_WASMRELAXEDSIMD
Marat Dukhan	33b4f75	2021-09-03 10:53:53 -0700	[diff] [blame]	1879
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1880	TEST(ROUNDZ__SCALAR_ADDSUB, positive_normal) {
				1881	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1882	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1883	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				1884	for (uint32_t i = 0; i < kBlockSize; i++) {
				1885	inputs[i] = fp32_from_bits(n + i);
				1886	}
				1887	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1888	for (uint32_t i = 0; i < kBlockSize; i++) {
				1889	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1890	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1891	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1892	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1893	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1894	}
				1895	}
				1896	}
				1897
				1898	TEST(ROUNDZ__SCALAR_ADDSUB, negative_normal) {
				1899	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1900	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1901	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				1902	for (uint32_t i = 0; i < kBlockSize; i++) {
				1903	inputs[i] = fp32_from_bits(n + i);
				1904	}
				1905	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1906	for (uint32_t i = 0; i < kBlockSize; i++) {
				1907	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1908	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1909	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1910	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1911	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1912	}
				1913	}
				1914	}
				1915
				1916	TEST(ROUNDZ__SCALAR_ADDSUB, positive_integral) {
				1917	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1918	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1919	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				1920	for (uint32_t i = 0; i < kBlockSize; i++) {
				1921	inputs[i] = fp32_from_bits(n + i);
				1922	}
				1923	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1924	for (uint32_t i = 0; i < kBlockSize; i++) {
				1925	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1926	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1927	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1928	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1929	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1930	}
				1931	}
				1932	}
				1933
				1934	TEST(ROUNDZ__SCALAR_ADDSUB, negative_integral) {
				1935	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1936	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1937	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				1938	for (uint32_t i = 0; i < kBlockSize; i++) {
				1939	inputs[i] = fp32_from_bits(n + i);
				1940	}
				1941	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1942	for (uint32_t i = 0; i < kBlockSize; i++) {
				1943	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1944	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1945	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1946	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1947	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1948	}
				1949	}
				1950	}
				1951
				1952	TEST(ROUNDZ__SCALAR_ADDSUB, positive_infinity) {
				1953	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1954	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	1955	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1956	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1957	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1958	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1959	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1960	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1961	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1962	}
				1963
				1964	TEST(ROUNDZ__SCALAR_ADDSUB, negative_infinity) {
				1965	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1966	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	1967	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	1968	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1969	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				1970	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				1971	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				1972	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1973	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				1974	}
				1975
				1976	TEST(ROUNDZ__SCALAR_ADDSUB, positive_qnan) {
				1977	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1978	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1979	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1980	for (uint32_t i = 0; i < kBlockSize; i++) {
				1981	inputs[i] = fp32_from_bits(n + i);
				1982	}
				1983	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				1984	for (uint32_t i = 0; i < kBlockSize; i++) {
				1985	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				1986	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				1987	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				1988	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				1989	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				1990	}
				1991	}
				1992	}
				1993
				1994	TEST(ROUNDZ__SCALAR_ADDSUB, negative_qnan) {
				1995	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				1996	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				1997	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				1998	for (uint32_t i = 0; i < kBlockSize; i++) {
				1999	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				2000	}
				2001	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2002	for (uint32_t i = 0; i < kBlockSize; i++) {
				2003	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2004	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2005	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2006	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2007	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2008	}
				2009	}
				2010	}
				2011
				2012	TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan) {
				2013	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2014	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2015	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2016	for (uint32_t i = 0; i < kBlockSize; i++) {
				2017	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2018	}
				2019	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2020	for (uint32_t i = 0; i < kBlockSize; i++) {
				2021	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2022	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				2023	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2024	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2025	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2026	}
				2027	}
				2028	}
				2029
				2030	TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan) {
				2031	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2032	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2033	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2034	for (uint32_t i = 0; i < kBlockSize; i++) {
				2035	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2036	}
				2037	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2038	for (uint32_t i = 0; i < kBlockSize; i++) {
				2039	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2040	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				2041	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2042	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2043	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2044	}
				2045	}
				2046	}
				2047
				2048	TEST(ROUNDZ__SCALAR_ADDSUB, positive_snan_to_qnan) {
				2049	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2050	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2051	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2052	for (uint32_t i = 0; i < kBlockSize; i++) {
				2053	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2054	}
				2055	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2056	for (uint32_t i = 0; i < kBlockSize; i++) {
				2057	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2058	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2059	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2060	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2061	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2062	}
				2063	}
				2064	}
				2065
				2066	TEST(ROUNDZ__SCALAR_ADDSUB, negative_snan_to_qnan) {
				2067	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2068	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2069	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2070	for (uint32_t i = 0; i < kBlockSize; i++) {
				2071	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2072	}
				2073	xnn_math_f32_roundz__scalar_addsub(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2074	for (uint32_t i = 0; i < kBlockSize; i++) {
				2075	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2076	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2077	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2078	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2079	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2080	}
				2081	}
				2082	}
				2083
				2084	TEST(ROUNDZ__SCALAR_CVT, positive_normal) {
				2085	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2086	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2087	for (uint32_t n = UINT32_C(0x00000000); n < UINT32_C(0x4B800000); n += kBlockSize) {
				2088	for (uint32_t i = 0; i < kBlockSize; i++) {
				2089	inputs[i] = fp32_from_bits(n + i);
				2090	}
				2091	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2092	for (uint32_t i = 0; i < kBlockSize; i++) {
				2093	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2094	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2095	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2096	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2097	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2098	}
				2099	}
				2100	}
				2101
				2102	TEST(ROUNDZ__SCALAR_CVT, negative_normal) {
				2103	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2104	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2105	for (uint32_t n = UINT32_C(0x80000000); n < UINT32_C(0xCB800000); n += kBlockSize) {
				2106	for (uint32_t i = 0; i < kBlockSize; i++) {
				2107	inputs[i] = fp32_from_bits(n + i);
				2108	}
				2109	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2110	for (uint32_t i = 0; i < kBlockSize; i++) {
				2111	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2112	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2113	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2114	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2115	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2116	}
				2117	}
				2118	}
				2119
				2120	TEST(ROUNDZ__SCALAR_CVT, positive_integral) {
				2121	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2122	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2123	for (uint32_t n = UINT32_C(0x4B800000); n < UINT32_C(0x7F800000); n += kBlockSize) {
				2124	for (uint32_t i = 0; i < kBlockSize; i++) {
				2125	inputs[i] = fp32_from_bits(n + i);
				2126	}
				2127	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2128	for (uint32_t i = 0; i < kBlockSize; i++) {
				2129	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2130	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2131	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2132	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2133	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2134	}
				2135	}
				2136	}
				2137
				2138	TEST(ROUNDZ__SCALAR_CVT, negative_integral) {
				2139	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2140	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2141	for (uint32_t n = UINT32_C(0xCB800000); n < UINT32_C(0xFF800000); n += kBlockSize) {
				2142	for (uint32_t i = 0; i < kBlockSize; i++) {
				2143	inputs[i] = fp32_from_bits(n + i);
				2144	}
				2145	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2146	for (uint32_t i = 0; i < kBlockSize; i++) {
				2147	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2148	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2149	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2150	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2151	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2152	}
				2153	}
				2154	}
				2155
				2156	TEST(ROUNDZ__SCALAR_CVT, positive_infinity) {
				2157	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2158	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	2159	std::fill(inputs.begin(), inputs.end(), +std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	2160	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2161	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				2162	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				2163	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				2164	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2165	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				2166	}
				2167
				2168	TEST(ROUNDZ__SCALAR_CVT, negative_infinity) {
				2169	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2170	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
Marat Dukhan	d310214	2020-06-08 01:24:01 -0700	[diff] [blame]	2171	std::fill(inputs.begin(), inputs.end(), -std::numeric_limits<float>::infinity());
Marat Dukhan	2dbb944	2020-05-12 20:43:43 -0700	[diff] [blame]	2172	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2173	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[0]));
				2174	ASSERT_EQ(reference_output, fp32_to_bits(outputs[0]))
				2175	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[0])
				2176	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2177	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[0]);
				2178	}
				2179
				2180	TEST(ROUNDZ__SCALAR_CVT, positive_qnan) {
				2181	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2182	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2183	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				2184	for (uint32_t i = 0; i < kBlockSize; i++) {
				2185	inputs[i] = fp32_from_bits(n + i);
				2186	}
				2187	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2188	for (uint32_t i = 0; i < kBlockSize; i++) {
				2189	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2190	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2191	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2192	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2193	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2194	}
				2195	}
				2196	}
				2197
				2198	TEST(ROUNDZ__SCALAR_CVT, negative_qnan) {
				2199	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2200	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2201	for (uint32_t n = UINT32_C(0x7FC00000); n < UINT32_C(0x80000000); n += kBlockSize) {
				2202	for (uint32_t i = 0; i < kBlockSize; i++) {
				2203	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| (n + i));
				2204	}
				2205	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2206	for (uint32_t i = 0; i < kBlockSize; i++) {
				2207	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2208	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2209	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2210	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2211	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2212	}
				2213	}
				2214	}
				2215
				2216	TEST(ROUNDZ__SCALAR_CVT, positive_snan) {
				2217	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2218	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2219	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2220	for (uint32_t i = 0; i < kBlockSize; i++) {
				2221	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2222	}
				2223	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2224	for (uint32_t i = 0; i < kBlockSize; i++) {
				2225	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2226	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				2227	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2228	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2229	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2230	}
				2231	}
				2232	}
				2233
				2234	TEST(ROUNDZ__SCALAR_CVT, negative_snan) {
				2235	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2236	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2237	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2238	for (uint32_t i = 0; i < kBlockSize; i++) {
				2239	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2240	}
				2241	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2242	for (uint32_t i = 0; i < kBlockSize; i++) {
				2243	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2244	ASSERT_EQ(reference_output & UINT32_C(0xFFBFFFFF), fp32_to_bits(outputs[i]) & UINT32_C(0xFFBFFFFF))
				2245	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2246	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2247	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2248	}
				2249	}
				2250	}
				2251
				2252	TEST(ROUNDZ__SCALAR_CVT, DISABLED_positive_snan_to_qnan) {
				2253	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2254	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2255	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2256	for (uint32_t i = 0; i < kBlockSize; i++) {
				2257	inputs[i] = fp32_from_bits(std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2258	}
				2259	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2260	for (uint32_t i = 0; i < kBlockSize; i++) {
				2261	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2262	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2263	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2264	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2265	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2266	}
				2267	}
				2268	}
				2269
				2270	TEST(ROUNDZ__SCALAR_CVT, DISABLED_negative_snan_to_qnan) {
				2271	std::vector<float, AlignedAllocator<float, 64>> inputs(kBlockSize);
				2272	std::vector<float, AlignedAllocator<float, 64>> outputs(kBlockSize);
				2273	for (uint32_t n = UINT32_C(0x7F800000); n < UINT32_C(0x7FC00000); n += kBlockSize) {
				2274	for (uint32_t i = 0; i < kBlockSize; i++) {
				2275	inputs[i] = fp32_from_bits(UINT32_C(0x80000000) \| std::max<uint32_t>(n + i, UINT32_C(0x7F800001)));
				2276	}
				2277	xnn_math_f32_roundz__scalar_cvt(kBlockSize * sizeof(float), inputs.data(), outputs.data());
				2278	for (uint32_t i = 0; i < kBlockSize; i++) {
				2279	const uint32_t reference_output = fp32_to_bits(std::trunc(inputs[i]));
				2280	ASSERT_EQ(reference_output, fp32_to_bits(outputs[i]))
				2281	<< "input = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(inputs[i])
				2282	<< ", reference = 0x" << std::hex << std::setw(8) << std::setfill('0') << reference_output
				2283	<< ", optimized = 0x" << std::hex << std::setw(8) << std::setfill('0') << fp32_to_bits(outputs[i]);
				2284	}
				2285	}
				2286	}