FP16 tests allow for results that are near zero.

When a float reference test produces a result that is not representable as a halffloat,
the halffloat result will be zero.  The tester tests for 1% error but will fail on
these near zero reference values.  So a minimum error of 0.001 is allowed.

This change increases the range of tests values to include near zero and in some cases
increases the accuracy requirement, but allows a minimum error.

PiperOrigin-RevId: 331656463
diff --git a/test/binary-elementwise-operator-tester.h b/test/binary-elementwise-operator-tester.h
index 62da9eb..2b2867b 100644
--- a/test/binary-elementwise-operator-tester.h
+++ b/test/binary-elementwise-operator-tester.h
@@ -346,7 +346,7 @@
 
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     // Compute generalized shapes.
@@ -482,7 +482,7 @@
                 for (size_t n = 0; n < output_dims[5]; n++) {
                   const size_t index =
                     i * output_strides[0] + j * output_strides[1] + k * output_strides[2] + l * output_strides[3] + m * output_strides[4] + n * output_strides[5];
-                  ASSERT_NEAR(fp16_ieee_to_fp32_value(output[index]), output_ref[index], 1.0e-2f * std::abs(output_ref[index]))
+                  ASSERT_NEAR(fp16_ieee_to_fp32_value(output[index]), output_ref[index], std::max(1.0e-3f, std::abs(output_ref[index]) * 1.0e-2f))
                     << "(i, j, k, l, m, n) = (" << i << ", " << j << ", " << k << ", " << l << ", " << m << ", " << n << ")";
                 }
               }
diff --git a/test/convolution-operator-tester.h b/test/convolution-operator-tester.h
index 975b38b..f0df7ff 100644
--- a/test/convolution-operator-tester.h
+++ b/test/convolution-operator-tester.h
@@ -1005,7 +1005,7 @@
   void TestNHWCxF16() const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +
@@ -1151,10 +1151,7 @@
 //                  << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
 //                ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)
 //                  << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
-                ASSERT_NEAR(
-                    output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
-                    fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),
-                    1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
+                ASSERT_NEAR(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-3f, std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))
                   << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
               }
             }
@@ -1820,7 +1817,7 @@
 
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(
@@ -1939,10 +1936,7 @@
                   << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
                 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)
                   << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
-                ASSERT_NEAR(
-                    output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],
-                    fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),
-                    1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))
+                ASSERT_NEAR(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-3f, std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))
                   << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
               }
             }
@@ -2021,10 +2015,7 @@
                   << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
                 ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)
                   << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
-                ASSERT_NEAR(
-                    next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],
-                    fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),
-                    1.0e-2 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))
+                ASSERT_NEAR(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-3f, std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))
                   << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;
               }
             }
diff --git a/test/dwconv-microkernel-tester.h b/test/dwconv-microkernel-tester.h
index 8bc8e9e..94ca1ef 100644
--- a/test/dwconv-microkernel-tester.h
+++ b/test/dwconv-microkernel-tester.h
@@ -397,7 +397,7 @@
   void Test(xnn_f16_dwconv_minmax_unipass_ukernel_function dwconv_minmax, Variant variant = Variant::Native) const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<const uint16_t*> indirection((width() - 1) * step() + kr());
@@ -477,10 +477,7 @@
             << "x = " << x << ", channel = " << c;
           ASSERT_LE(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_max)
             << "x = " << x << ", channel = " << c;
-          ASSERT_NEAR(
-              output_ref[x * channels() + c],
-              fp16_ieee_to_fp32_value(output[x * output_stride() + c]),
-              std::abs(output_ref[x * channels() + c]) * 1.0e-2)
+          ASSERT_NEAR(output_ref[x * channels() + c], fp16_ieee_to_fp32_value(output[x * output_stride() + c]), std::max(1.0e-3f, std::abs(output_ref[x * channels() + c]) * 1.0e-2f))
             << "x = " << x << ", channel = " << c;
         }
       }
diff --git a/test/gavgpool-microkernel-tester.h b/test/gavgpool-microkernel-tester.h
index 1e3fa5b..de041bf 100644
--- a/test/gavgpool-microkernel-tester.h
+++ b/test/gavgpool-microkernel-tester.h
@@ -507,7 +507,7 @@
           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
         ASSERT_GE(fp16_ieee_to_fp32_value(output[c]), output_min)
           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
-        ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::abs(output_ref[c]) * 1.0e-2f)
+        ASSERT_NEAR(fp16_ieee_to_fp32_value(output[c]), output_ref[c], std::max(1.0e-3f, std::abs(output_ref[c]) * 1.0e-2f))
           << "at position " << c << ", rows = " << rows() << ", channels = " << channels();
       }
     }
diff --git a/test/gemm-microkernel-tester.h b/test/gemm-microkernel-tester.h
index 24a8f8d..7b9ec53 100644
--- a/test/gemm-microkernel-tester.h
+++ b/test/gemm-microkernel-tester.h
@@ -811,10 +811,7 @@
       // Validate micro-kernel outputs.
       for (size_t i = 0; i < m(); i++) {
         for (size_t j = 0; j < n(); j++) {
-          ASSERT_NEAR(
-              fp16_ieee_to_fp32_value(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()]),
-              c_ref[i * n() + j],
-              std::abs(c_ref[i * n() + j]) * 1.0e-2f)
+          ASSERT_NEAR(fp16_ieee_to_fp32_value(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()]), c_ref[i * n() + j], std::max(1.0e-3f, std::abs(c_ref[i * n() + j]) * 1.0e-2f))
               << "at " << i << ", " << j << ": reference = " << c_ref[i * n() + j]
               << ", optimized = " << fp16_ieee_to_fp32_value(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()]) << ", Mr x Nr x Kr = " << mr() << " x " << nr()
               << " x " << kr() << ", M x N x K = " << m() << " x " << n() << " x " << k();
@@ -933,10 +930,7 @@
               << "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
               << ", optimized = " << fp16_ieee_to_fp32_value(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()]) << ", Mr x Nr x Kr = " << mr() << " x " << nr()
               << " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
-          ASSERT_NEAR(
-              fp16_ieee_to_fp32_value(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()]),
-              c_ref[i * n() + j],
-              std::abs(c_ref[i * n() + j]) * 1.0e-1f)
+          ASSERT_NEAR(fp16_ieee_to_fp32_value(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()]), c_ref[i * n() + j], std::max(1.0e-3f, std::abs(c_ref[i * n() + j]) * 1.0e-2f))
               << "at " << i << ", " << i << ": reference = " << c_ref[i * n() + j]
               << ", optimized = " << fp16_ieee_to_fp32_value(c[i * cm_stride() + (j / nr()) * cn_stride() + j % nr()]) << ", Mr x Nr x Kr = " << mr() << " x " << nr()
               << " x " << kr() << ", M x N x KC x KS = " << m() << " x " << n() << " x " << k() << " x " << ks();
diff --git a/test/global-average-pooling-operator-tester.h b/test/global-average-pooling-operator-tester.h
index e1ab883..dff7d61 100644
--- a/test/global-average-pooling-operator-tester.h
+++ b/test/global-average-pooling-operator-tester.h
@@ -293,7 +293,7 @@
   void TestNWCxF16() const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<uint16_t> input((batch_size() * width() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));
@@ -360,7 +360,7 @@
         for (size_t c = 0; c < channels(); c++) {
           ASSERT_LE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_max);
           ASSERT_GE(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_min);
-          ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::abs(output_ref[i * channels() + c]) * 1.0e-2f)
+          ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::max(1.0e-3f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f))
             << "at batch index " << i << " / " << batch_size()
             << ", channel " << c << " / " << channels();
         }
diff --git a/test/hardswish-operator-tester.h b/test/hardswish-operator-tester.h
index d735327..5ae71b6 100644
--- a/test/hardswish-operator-tester.h
+++ b/test/hardswish-operator-tester.h
@@ -131,9 +131,7 @@
       // Verify results.
       for (size_t i = 0; i < batch_size(); i++) {
         for (size_t c = 0; c < channels(); c++) {
-          ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]),
-                      output_ref[i * channels() + c],
-                      std::abs(output_ref[i * channels() + c]) * 1.0e-2f + 1.0e-3f)
+          ASSERT_NEAR(fp16_ieee_to_fp32_value(output[i * output_stride() + c]), output_ref[i * channels() + c], std::max(1.0e-3f, std::abs(output_ref[i * channels() + c]) * 1.0e-2f))
             << "at position " << i << ", batch size = " << batch_size() << ", channels = " << channels();
         }
       }
diff --git a/test/hswish-microkernel-tester.h b/test/hswish-microkernel-tester.h
index 97ce8c3..e0b3f8d 100644
--- a/test/hswish-microkernel-tester.h
+++ b/test/hswish-microkernel-tester.h
@@ -88,7 +88,7 @@
 
       // Verify results.
       for (size_t i = 0; i < batch_size(); i++) {
-        ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::abs(y_ref[i]) * 1.0e-1f)
+        ASSERT_NEAR(y_ref[i], fp16_ieee_to_fp32_value(y[i]), std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
           << "at position " << i << ", batch_size = " << batch_size();
       }
     }
diff --git a/test/spmm-microkernel-tester.h b/test/spmm-microkernel-tester.h
index 9d96966..e37bf4d 100644
--- a/test/spmm-microkernel-tester.h
+++ b/test/spmm-microkernel-tester.h
@@ -443,10 +443,7 @@
       // Validate micro-kernel outputs.
       for (size_t pxb = 0; pxb < n(); pxb++) {
         for (size_t oc = 0; oc < m(); oc++) {
-          ASSERT_NEAR(
-            fp16_ieee_to_fp32_value(c[pxb * m() + oc]),
-            c_ref[pxb * m() + oc],
-            std::abs(c_ref[pxb * m() + oc]) * 1.0e-2f)
+          ASSERT_NEAR(fp16_ieee_to_fp32_value(c[pxb * m() + oc]), c_ref[pxb * m() + oc], std::max(1.0e-3f, std::abs(c_ref[pxb * m() + oc]) * 1.0e-2f))
             << "at " << pxb << ", " << oc
             << ": Mr x Nr x Kr = " << mr() << " x " << nr()
             << ", M x N x K = " << m() << " x " << n() << " x " << k();
diff --git a/test/vbinary-microkernel-tester.h b/test/vbinary-microkernel-tester.h
index ea82645..ca992e4 100644
--- a/test/vbinary-microkernel-tester.h
+++ b/test/vbinary-microkernel-tester.h
@@ -97,7 +97,7 @@
   void Test(xnn_f16_vbinary_ukernel_function vbinary, OpType op_type) const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
@@ -150,7 +150,7 @@
 
       // Verify results.
       for (size_t i = 0; i < batch_size(); i++) {
-        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::abs(y_ref[i]) * 1.0e-2f)
+        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
           << "at " << i << " / " << batch_size();
       }
     }
@@ -159,7 +159,7 @@
   void Test(xnn_f16_vbinary_minmax_ukernel_function vbinary_minmax, OpType op_type) const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
@@ -230,7 +230,7 @@
 
       // Verify results.
       for (size_t i = 0; i < batch_size(); i++) {
-        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::abs(y_ref[i]) * 1.0e-2f)
+        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
           << "at " << i << " / " << batch_size();
       }
     }
diff --git a/test/vbinaryc-microkernel-tester.h b/test/vbinaryc-microkernel-tester.h
index ac2729a..451d5e3 100644
--- a/test/vbinaryc-microkernel-tester.h
+++ b/test/vbinaryc-microkernel-tester.h
@@ -90,7 +90,7 @@
   void Test(xnn_f16_vbinary_ukernel_function vbinaryc, OpType op_type) const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
@@ -146,7 +146,7 @@
 
       // Verify results.
       for (size_t i = 0; i < batch_size(); i++) {
-        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::abs(y_ref[i]) * 1.0e-2f)
+        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
           << "at " << i << " / " << batch_size();
       }
     }
@@ -155,7 +155,7 @@
   void Test(xnn_f16_vbinary_minmax_ukernel_function vbinaryc_minmax, OpType op_type) const {
     std::random_device random_device;
     auto rng = std::mt19937(random_device());
-    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.01f, 1.0f), rng);
+    auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);
     auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);
 
     std::vector<uint16_t> a(batch_size() + XNN_EXTRA_BYTES / sizeof(uint16_t));
@@ -229,7 +229,7 @@
 
       // Verify results.
       for (size_t i = 0; i < batch_size(); i++) {
-        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::abs(y_ref[i]) * 1.0e-2f)
+        ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i]), y_ref[i], std::max(1.0e-3f, std::abs(y_ref[i]) * 1.0e-2f))
           << "at " << i << " / " << batch_size();
       }
     }
diff --git a/test/vmulcaddc-microkernel-tester.h b/test/vmulcaddc-microkernel-tester.h
index 84f5f77..566da63 100644
--- a/test/vmulcaddc-microkernel-tester.h
+++ b/test/vmulcaddc-microkernel-tester.h
@@ -181,7 +181,7 @@
       // Verify results.
       for (size_t i = 0; i < rows(); i++) {
         for (size_t j = 0; j < channels(); j++) {
-          ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i * output_stride() + j]), y_ref[i * channels() + j], std::abs(y_ref[i * channels() + j]) * 1.0e-2f)
+          ASSERT_NEAR(fp16_ieee_to_fp32_value(y[i * output_stride() + j]), y_ref[i * channels() + j], std::max(1.0e-3f, std::abs(y_ref[i * channels() + j]) * 1.0e-2f))
             << "at pixel " << i << " / " << rows()
             << ", channel = " << j << " / " << channels();
         }