Support Convolution, Deconvolution, and Fully Connected operators without bias
PiperOrigin-RevId: 277524405
diff --git a/src/convolution-spnchw.c b/src/convolution-spnchw.c
index 63ea965..fd36480 100644
--- a/src/convolution-spnchw.c
+++ b/src/convolution-spnchw.c
@@ -280,8 +280,14 @@
size_t first_ic = 0, last_ic = 0;
bool first_nonzero = true;
for (size_t ocb = 0; ocb < round_down_po2(group_output_channels, output_channels_block_size); ocb += output_channels_block_size) {
- for (size_t oco = 0; oco < output_channels_block_size; oco++) {
- *nonzero_values++ = bias[ocb + oco];
+ if XNN_LIKELY(bias != NULL) {
+ for (size_t oco = 0; oco < output_channels_block_size; oco++) {
+ *nonzero_values++ = bias[ocb + oco];
+ }
+ } else {
+ for (size_t oco = 0; oco < output_channels_block_size; oco++) {
+ *nonzero_values++ = 0.0f;
+ }
}
for (size_t ic = 0; ic < group_input_channels; ic++) {
bool is_nonzero_block = false;
@@ -311,7 +317,11 @@
output_channel_nonzeros += 1;
}
for (size_t oc = round_down_po2(group_output_channels, output_channels_block_size); oc < group_output_channels; oc++) {
- *nonzero_values++ = bias[oc];
+ if XNN_LIKELY(bias != NULL) {
+ *nonzero_values++ = bias[oc];
+ } else {
+ *nonzero_values++ = 0.0f;
+ }
for (size_t ic = 0; ic < group_input_channels; ic++) {
const float weight = kernel[oc * group_input_channels + ic];
if (weight != 0.0f) {
diff --git a/src/xnnpack/pack.h b/src/xnnpack/pack.h
index ab13696..865c5d7 100644
--- a/src/xnnpack/pack.h
+++ b/src/xnnpack/pack.h
@@ -30,9 +30,17 @@
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
int32_t* packed_b = (int32_t*) packed_w;
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
- packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ }
+ } else {
+ size_t n = nr_block_size;
+ do {
+ *((int32_t*) packed_w) = boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ } while (--n != 0);
}
packed_w = (void*) ((uintptr_t) packed_w + (nr - nr_block_size) * sizeof(int32_t));
for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) {
@@ -52,7 +60,9 @@
}
}
k += nc * kc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
} while (--g != 0);
}
@@ -74,9 +84,17 @@
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
int32_t* packed_b = (int32_t*) packed_w;
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
- packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ }
+ } else {
+ size_t n = nr_block_size;
+ do {
+ *((int32_t*) packed_w) = boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ } while (--n != 0);
}
packed_w = (void*) ((uintptr_t) packed_w + (nr - nr_block_size) * sizeof(int32_t));
for (size_t ki = 0; ki < ks; ki++) {
@@ -99,7 +117,9 @@
}
}
k += ks * kc * nc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
} while (--g != 0);
}
@@ -120,9 +140,17 @@
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
int32_t* packed_b = (int32_t*) packed_w;
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
- packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ }
+ } else {
+ size_t n = nr_block_size;
+ do {
+ *((int32_t*) packed_w) = boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ } while (--n != 0);
}
packed_w = (void*) ((uintptr_t) packed_w + (nr - nr_block_size) * sizeof(int32_t));
for (size_t ki = 0; ki < ks; ki++) {
@@ -137,7 +165,9 @@
}
}
k += nc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
}
}
@@ -168,9 +198,17 @@
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
int32_t* packed_b = (int32_t*) packed_w;
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
- packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ if XNN_LIKELY(b != 0) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ *((int32_t*) packed_w) = b[nr_block_start + nr_block_offset] + boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ }
+ } else {
+ size_t n = nr_block_size;
+ do {
+ *((int32_t*) packed_w) = boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ } while (--n != 0);
}
packed_w = (void*) ((uintptr_t) packed_w + (nr - nr_block_size) * sizeof(int32_t));
for (size_t ky = oy; ky < kh; ky += sh) {
@@ -197,7 +235,9 @@
}
}
k += kh * kw * kc * nc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
}
}
@@ -216,9 +256,17 @@
for (size_t cr_block_start = 0; cr_block_start < c; cr_block_start += cr) {
const size_t cr_block_size = min(c - cr_block_start, cr);
int32_t* packed_b = (int32_t*) packed_w;
- for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
- *((int32_t*) packed_w) = b[cr_block_start + cr_block_offset] + boff;
- packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ if XNN_LIKELY(b != NULL) {
+ for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
+ *((int32_t*) packed_w) = b[cr_block_start + cr_block_offset] + boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ }
+ } else {
+ size_t n = cr_block_size;
+ do {
+ *((int32_t*) packed_w) = boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ } while (--n != 0);
}
packed_w = (void*) ((uintptr_t) packed_w + (cr - cr_block_size) * sizeof(int32_t));
for (size_t x = 0; x < w; x++) {
@@ -250,9 +298,17 @@
for (size_t cr_block_start = 0; cr_block_start < c; cr_block_start += cr) {
const size_t cr_block_size = min(c - cr_block_start, cr);
int32_t* packed_b = (int32_t*) packed_w;
- for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
- *((int32_t*) packed_w) = b[cr_block_start + cr_block_offset] + boff;
- packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ if XNN_LIKELY(b != NULL) {
+ for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
+ *((int32_t*) packed_w) = b[cr_block_start + cr_block_offset] + boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ }
+ } else {
+ size_t n = cr_block_size;
+ do {
+ *((int32_t*) packed_w) = boff;
+ packed_w = (void*) ((uintptr_t) packed_w + sizeof(int32_t));
+ } while (--n != 0);
}
packed_w = (void*) ((uintptr_t) packed_w + (cr - cr_block_size) * sizeof(int32_t));
for (size_t x = 0; x < w; x++) {
@@ -282,10 +338,12 @@
do {
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *packed_w++ = b[nr_block_start + nr_block_offset];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset];
+ }
}
- packed_w += nr - nr_block_size;
+ packed_w += nr;
for (size_t kr_block_start = 0; kr_block_start < kc; kr_block_start += kr) {
const size_t kr_block_size = min(kc - kr_block_start, kr);
for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
@@ -299,7 +357,9 @@
}
}
k += nc * kc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
} while (--g != 0);
}
@@ -320,10 +380,12 @@
do {
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *packed_w++ = b[nr_block_start + nr_block_offset];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset];
+ }
}
- packed_w += nr - nr_block_size;
+ packed_w += nr;
for (size_t kr_block_start = 0; kr_block_start < skc; kr_block_start += kr) {
for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
@@ -348,7 +410,9 @@
}
}
k += nc * kc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
} while (--g != 0);
}
@@ -413,10 +477,12 @@
do {
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *packed_w++ = b[nr_block_start + nr_block_offset];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset];
+ }
}
- packed_w += nr - nr_block_size;
+ packed_w += nr;
for (size_t ki = 0; ki < ks; ki++) {
for (size_t kr_block_start = 0; kr_block_start < skc; kr_block_start += kr) {
@@ -443,7 +509,9 @@
}
}
k += ks * kc * nc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
} while (--g != 0);
}
@@ -460,10 +528,12 @@
for (size_t i = 0; i < g; i++) {
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *packed_w++ = b[nr_block_start + nr_block_offset];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset];
+ }
}
- packed_w += nr - nr_block_size;
+ packed_w += nr;
for (size_t ki = 0; ki < ks; ki++) {
for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
*packed_w =
@@ -474,7 +544,9 @@
}
}
k += nc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
}
}
@@ -490,8 +562,15 @@
{
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
- for (size_t nr_block_offset = 0; nr_block_offset < nr; nr_block_offset++) {
- *packed_w++ = b[nr_block_start + min(nr_block_offset, nr_block_size - 1)];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr; nr_block_offset++) {
+ *packed_w++ = b[min(nr_block_offset, nr_block_size - 1)];
+ }
+ } else {
+ size_t n = nr;
+ do {
+ *packed_w++ = 0.0f;
+ } while (--n != 0);
}
for (size_t kx = 0; kx < kw; kx++) {
@@ -503,6 +582,9 @@
}
}
}
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nr;
+ }
}
}
@@ -533,10 +615,12 @@
}
for (size_t nr_block_start = 0; nr_block_start < nc; nr_block_start += nr) {
const size_t nr_block_size = min(nc - nr_block_start, nr);
- for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
- *packed_w++ = b[nr_block_start + nr_block_offset];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t nr_block_offset = 0; nr_block_offset < nr_block_size; nr_block_offset++) {
+ packed_w[nr_block_offset] = b[nr_block_start + nr_block_offset];
+ }
}
- packed_w += nr - nr_block_size;
+ packed_w += nr;
for (size_t ky = oy; ky < kh; ky += sh) {
for (size_t kx = ox; kx < kw; kx += sw) {
for (size_t kr_block_start = 0; kr_block_start < skc; kr_block_start += kr) {
@@ -566,7 +650,9 @@
}
}
k += kh * kw * kc * nc;
- b += nc;
+ if XNN_UNPREDICTABLE(b != NULL) {
+ b += nc;
+ }
}
}
@@ -581,8 +667,15 @@
{
for (size_t cr_block_start = 0; cr_block_start < c; cr_block_start += cr) {
const size_t cr_block_size = min(c - cr_block_start, cr);
- for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
- *packed_w++ = b[cr_block_start + cr_block_offset];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
+ *packed_w++ = b[cr_block_start + cr_block_offset];
+ }
+ } else {
+ size_t n = cr_block_size;
+ do {
+ *packed_w++ = 0.0f;
+ } while (--n != 0);
}
packed_w += cr - cr_block_size;
for (size_t x = 0; x < w; x++) {
@@ -608,8 +701,15 @@
{
for (size_t cr_block_start = 0; cr_block_start < c; cr_block_start += cr) {
const size_t cr_block_size = min(c - cr_block_start, cr);
- for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
- *packed_w++ = b[cr_block_start + cr_block_offset];
+ if XNN_LIKELY(b != NULL) {
+ for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
+ *packed_w++ = b[cr_block_start + cr_block_offset];
+ }
+ } else {
+ size_t n = cr_block_size;
+ do {
+ *packed_w++ = 0.0f;
+ } while (--n != 0);
}
packed_w += cr - cr_block_size;
for (size_t x = 0; x < w; x++) {
@@ -632,7 +732,12 @@
float* packed_weights)
{
for (size_t g = 0; g < groups; g++) {
- *packed_weights++ = *bias++;
+ if XNN_LIKELY(bias != NULL) {
+ *packed_weights = *bias++;
+ } else {
+ *packed_weights = 0.0f;
+ }
+ packed_weights += 1;
for (size_t i = 0; i < kernel_size; i++) {
*packed_weights++ = kernel[g * kernel_size + i];
}
@@ -649,12 +754,19 @@
for (size_t cr_block_start = 0; cr_block_start < c; cr_block_start += cr) {
const size_t cr_block_size = min(c - cr_block_start, cr);
for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
- packed_w[cr_block_offset] = s[cr_block_start + cr_block_offset];
+ *packed_w++ = s[cr_block_start + cr_block_offset];
}
- packed_w += cr;
- for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
- packed_w[cr_block_offset] = b[cr_block_start + cr_block_offset];
+ packed_w += cr - cr_block_size;
+ if XNN_LIKELY(b != NULL) {
+ for (size_t cr_block_offset = 0; cr_block_offset < cr_block_size; cr_block_offset++) {
+ *packed_w++ = b[cr_block_start + cr_block_offset];
+ }
+ } else {
+ size_t n = cr_block_size;
+ do {
+ *packed_w++ = 0.0f;
+ } while (--n != 0);
}
- packed_w += cr;
+ packed_w += cr - cr_block_size;
}
}
diff --git a/test/convolution-operator-tester.h b/test/convolution-operator-tester.h
index e637fac..115b846 100644
--- a/test/convolution-operator-tester.h
+++ b/test/convolution-operator-tester.h
@@ -467,6 +467,15 @@
return this->depthwise_layout_;
}
+ inline ConvolutionOperatorTester& has_bias(bool has_bias) {
+ this->has_bias_ = has_bias;
+ return *this;
+ }
+
+ inline bool has_bias() const {
+ return this->has_bias_;
+ }
+
inline ConvolutionOperatorTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
@@ -500,17 +509,21 @@
std::fill(output.begin(), output.end(), 0xA5);
// Compute reference results, without renormalization.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(accumulators.begin(), accumulators.end(), 0);
}
if (depthwise_layout()) {
ASSERT_EQ(group_input_channels(), 1);
@@ -596,7 +609,7 @@
input_pixel_stride(), output_pixel_stride(),
input_zero_point, 1.0f /* input scale */,
kernel_zero_point, 1.0f /* kernel scale */,
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_zero_point, output_scale, qmin(), qmax(),
(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
&convolution_op));
@@ -656,17 +669,21 @@
std::fill(output.begin(), output.end(), nanf(""));
// Compute reference results, without clamping.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(output_ref.begin(), output_ref.end(), 0.0f);
}
if (depthwise_layout()) {
ASSERT_EQ(group_input_channels(), 1);
@@ -747,7 +764,7 @@
dilation_height(), dilation_width(),
groups(), group_input_channels(), group_output_channels(),
input_pixel_stride(), output_pixel_stride(),
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_min, output_max,
(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),
&convolution_op));
@@ -819,17 +836,21 @@
std::fill(output.begin(), output.end(), 0xA5);
// Compute reference results, without renormalization.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(accumulators.begin(), accumulators.end(), 0);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oy = 0; oy < output_height(); oy++) {
@@ -886,7 +907,7 @@
input_pixel_stride(), output_pixel_stride(),
input_zero_point, 1.0f /* input scale */,
kernel_zero_point, 1.0f /* kernel scale */,
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_zero_point, output_scale, qmin(), qmax(),
0, &convolution_op));
@@ -929,17 +950,21 @@
std::fill(output.begin(), output.end(), 0xA5);
// Compute reference results for the second run, including renormalization.
- for (size_t i = 0; i < next_batch_size(); i++) {
- for (size_t oy = 0; oy < next_output_height(); oy++) {
- for (size_t ox = 0; ox < next_output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < next_batch_size(); i++) {
+ for (size_t oy = 0; oy < next_output_height(); oy++) {
+ for (size_t ox = 0; ox < next_output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(next_accumulators.begin(), next_accumulators.end(), 0);
}
for (size_t i = 0; i < next_batch_size(); i++) {
for (size_t oy = 0; oy < next_output_height(); oy++) {
@@ -1030,17 +1055,21 @@
std::fill(output.begin(), output.end(), nanf(""));
// Compute reference results, without clamping.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(output_ref.begin(), output_ref.end(), 0.0f);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oy = 0; oy < output_height(); oy++) {
@@ -1092,7 +1121,7 @@
dilation_height(), dilation_width(),
groups(), group_input_channels(), group_output_channels(),
input_pixel_stride(), output_pixel_stride(),
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_min, output_max,
0, &convolution_op));
@@ -1135,17 +1164,21 @@
std::fill(output.begin(), output.end(), nanf(""));
// Compute reference results for the second run, including clamping.
- for (size_t i = 0; i < next_batch_size(); i++) {
- for (size_t oy = 0; oy < next_output_height(); oy++) {
- for (size_t ox = 0; ox < next_output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < next_batch_size(); i++) {
+ for (size_t oy = 0; oy < next_output_height(); oy++) {
+ for (size_t ox = 0; ox < next_output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);
}
for (size_t i = 0; i < next_batch_size(); i++) {
for (size_t oy = 0; oy < next_output_height(); oy++) {
@@ -1236,5 +1269,6 @@
uint8_t qmin_{0};
uint8_t qmax_{255};
bool depthwise_layout_{false};
+ bool has_bias_{true};
size_t iterations_{1};
};
diff --git a/test/convolution-spnchw-operator-tester.h b/test/convolution-spnchw-operator-tester.h
index d860add..a117633 100644
--- a/test/convolution-spnchw-operator-tester.h
+++ b/test/convolution-spnchw-operator-tester.h
@@ -359,6 +359,15 @@
return this->depthwise_layout_;
}
+ inline ConvolutionSpNCHWOperatorTester& has_bias(bool has_bias) {
+ this->has_bias_ = has_bias;
+ return *this;
+ }
+
+ inline bool has_bias() const {
+ return this->has_bias_;
+ }
+
inline ConvolutionSpNCHWOperatorTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
@@ -397,17 +406,21 @@
std::fill(output.begin(), output.end(), nanf(""));
// Compute reference results, without clamping.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(output_ref.begin(), output_ref.end(), 0.0f);
}
if (nhwc_input()) {
for (size_t i = 0; i < batch_size(); i++) {
@@ -486,7 +499,7 @@
subsampling_height(), subsampling_width(),
dilation_height(), dilation_width(),
groups(), group_input_channels(), group_output_channels(),
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_min, output_max,
(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),
&convolution_op);
@@ -555,5 +568,6 @@
uint8_t qmin_{0};
uint8_t qmax_{255};
bool depthwise_layout_{false};
+ bool has_bias_{true};
size_t iterations_{1};
};
diff --git a/test/convolution-spnchw.cc b/test/convolution-spnchw.cc
index cad38eb..7690039 100644
--- a/test/convolution-spnchw.cc
+++ b/test/convolution-spnchw.cc
@@ -107,6 +107,17 @@
.TestF32();
}
+TEST(CONVOLUTION_SpNHWC_OP_F32, 1x1_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(19)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** SPMM path, batched ****************************/
TEST(CONVOLUTION_SpNHWC_OP_F32, batched_1x1) {
@@ -238,6 +249,18 @@
.TestF32();
}
+TEST(CONVOLUTION_SpNHWC_OP_F32, batched_1x1_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(19)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** DConv 3x3c3s2 HWC->SpCHW path ****************************/
TEST(CONVOLUTION_HWC2SpNHWC_OP_F32, 3x3c3s2) {
@@ -330,6 +353,20 @@
.TestF32();
}
+TEST(CONVOLUTION_HWC2SpNHWC_OP_F32, 3x3c3s2_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .padding(1)
+ .kernel_size(3, 3)
+ .subsampling(2)
+ .group_input_channels(3)
+ .group_output_channels(19)
+ .nhwc_input(true)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** DConv 3x3c3s2 HWC->SpCHW path, batched ****************************/
TEST(CONVOLUTION_HWC2SpNHWC_OP_F32, batched_3x3c3s2) {
@@ -443,6 +480,21 @@
.TestF32();
}
+TEST(CONVOLUTION_HWC2SpNHWC_OP_F32, batched_3x3c3s2_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .padding(1)
+ .kernel_size(3, 3)
+ .subsampling(2)
+ .group_input_channels(3)
+ .group_output_channels(19)
+ .nhwc_input(true)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** DWCONV 3x3 path ****************************/
TEST(CONVOLUTION_SpNHWC_OP_F32, depthwise_3x3) {
@@ -529,6 +581,17 @@
.TestF32();
}
+TEST(CONVOLUTION_SpNHWC_OP_F32, depthwise_3x3_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .kernel_size(3, 3)
+ .padding_width(1)
+ .groups(19)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** DWCONV 3x3 path, batched ****************************/
TEST(CONVOLUTION_SpNHWC_OP_F32, batched_depthwise_3x3) {
@@ -646,6 +709,18 @@
.TestF32();
}
+TEST(CONVOLUTION_SpNHWC_OP_F32, batched_depthwise_3x3_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .kernel_size(3, 3)
+ .padding_width(1)
+ .groups(19)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** DWCONV 3x3 stride-2 path ****************************/
TEST(CONVOLUTION_SpNHWC_OP_F32, depthwise_3x3s2) {
@@ -739,6 +814,18 @@
.TestF32();
}
+TEST(CONVOLUTION_SpNHWC_OP_F32, depthwise_3x3s2_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .kernel_size(3, 3)
+ .padding_width(1)
+ .subsampling(2)
+ .groups(19)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** DWCONV 3x3 stride-2 path, batched ****************************/
TEST(CONVOLUTION_SpNHWC_OP_F32, batched_depthwise_3x3s2) {
@@ -864,3 +951,16 @@
.iterations(3)
.TestF32();
}
+
+TEST(CONVOLUTION_SpNHWC_OP_F32, batched_depthwise_3x3s2_without_bias) {
+ ConvolutionSpNCHWOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .kernel_size(3, 3)
+ .padding_width(1)
+ .subsampling(2)
+ .groups(19)
+ .iterations(3)
+ .TestF32();
+}
diff --git a/test/convolution.cc b/test/convolution.cc
index ebbdfd3..a173619 100644
--- a/test/convolution.cc
+++ b/test/convolution.cc
@@ -65,6 +65,17 @@
.TestQ8();
}
+TEST(CONVOLUTION_OP_Q8, 1x1_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(13, 14)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(19)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(CONVOLUTION_OP_Q8, 1x1_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -135,6 +146,18 @@
.TestQ8();
}
+TEST(CONVOLUTION_OP_Q8, grouped_1x1_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(24, 25)
+ .kernel_size(1, 1)
+ .groups(2)
+ .group_input_channels(17)
+ .group_output_channels(19)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(CONVOLUTION_OP_Q8, grouped_1x1_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -282,6 +305,18 @@
.TestQ8();
}
+TEST(CONVOLUTION_OP_Q8, 3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .group_input_channels(15)
+ .group_output_channels(17)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(CONVOLUTION_OP_Q8, 3x3_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -390,6 +425,19 @@
.TestQ8();
}
+TEST(CONVOLUTION_OP_Q8, grouped_3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 11)
+ .padding(1)
+ .kernel_size(3, 3)
+ .groups(2)
+ .group_input_channels(14)
+ .group_output_channels(13)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(CONVOLUTION_OP_Q8, grouped_3x3_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -533,6 +581,17 @@
.TestQ8();
}
+TEST(CONVOLUTION_OP_Q8, depthwise_3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(1, 1)
+ .kernel_size(3, 3)
+ .groups(27)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(CONVOLUTION_OP_Q8, depthwise_3x3s2) {
ConvolutionOperatorTester()
.input_size(15, 14)
@@ -665,6 +724,17 @@
.TestQ8();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_Q8, 1x1_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .kernel_size(1, 1)
+ .groups(24)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(DEPTHWISE_CONVOLUTION_OP_Q8, 3x3) {
ConvolutionOperatorTester()
.depthwise_layout(true)
@@ -688,6 +758,18 @@
.TestQ8();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_Q8, 3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(1, 1)
+ .kernel_size(3, 3)
+ .groups(24)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(DEPTHWISE_CONVOLUTION_OP_Q8, 3x3s2_with_tf_same_padding) {
for (size_t input_height = 14; input_height <= 15; input_height++) {
for (size_t input_width = 14; input_width <= 15; input_width++) {
@@ -714,6 +796,18 @@
.TestQ8();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_Q8, 5x5_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(2, 2)
+ .kernel_size(5, 5)
+ .groups(24)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(CONVOLUTION_OP_Q8, setup_increasing_batch) {
ASSERT_EQ(xnn_status_success, xnn_initialize());
ConvolutionOperatorTester()
@@ -1055,6 +1149,17 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, 1x1_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(13, 14)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(19)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, 1x1_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -1125,6 +1230,18 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, grouped_1x1_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(24, 25)
+ .kernel_size(1, 1)
+ .groups(2)
+ .group_input_channels(17)
+ .group_output_channels(19)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, grouped_1x1_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -1385,6 +1502,18 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, 3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .group_input_channels(15)
+ .group_output_channels(17)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, 3x3_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -1493,6 +1622,19 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, grouped_3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 11)
+ .padding(1)
+ .kernel_size(3, 3)
+ .groups(2)
+ .group_input_channels(14)
+ .group_output_channels(13)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, grouped_3x3_with_batch) {
ConvolutionOperatorTester()
.batch_size(3)
@@ -1758,6 +1900,16 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, depthwise_1x1_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(15, 14)
+ .kernel_size(1, 1)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, depthwise_2x2) {
ConvolutionOperatorTester()
.input_size(15, 14)
@@ -1768,6 +1920,17 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, depthwise_2x2_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(1, 1)
+ .kernel_size(2, 2)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, depthwise_2x2s2) {
ConvolutionOperatorTester()
.input_size(15, 14)
@@ -1844,6 +2007,17 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, depthwise_3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(1, 1)
+ .kernel_size(3, 3)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, depthwise_3x3s2) {
ConvolutionOperatorTester()
.input_size(15, 14)
@@ -1965,6 +2139,17 @@
.TestF32();
}
+TEST(CONVOLUTION_OP_F32, depthwise_5x5_without_bias) {
+ ConvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(2, 2)
+ .kernel_size(5, 5)
+ .groups(27)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, depthwise_5x5s2) {
ConvolutionOperatorTester()
.input_size(15, 14)
@@ -2052,6 +2237,17 @@
.TestF32();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_F32, 1x1_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .kernel_size(1, 1)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(DEPTHWISE_CONVOLUTION_OP_F32, 2x2) {
ConvolutionOperatorTester()
.depthwise_layout(true)
@@ -2075,6 +2271,18 @@
.TestF32();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_F32, 2x2_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(1, 1)
+ .kernel_size(2, 2)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(DEPTHWISE_CONVOLUTION_OP_F32, 3x3) {
ConvolutionOperatorTester()
.depthwise_layout(true)
@@ -2098,6 +2306,18 @@
.TestF32();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_F32, 3x3_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(1, 1)
+ .kernel_size(3, 3)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(DEPTHWISE_CONVOLUTION_OP_F32, 3x3s2_with_tf_same_padding) {
for (size_t input_height = 14; input_height <= 15; input_height++) {
for (size_t input_width = 14; input_width <= 15; input_width++) {
@@ -2136,6 +2356,18 @@
.TestF32();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_F32, 5x5_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(2, 2)
+ .kernel_size(5, 5)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(DEPTHWISE_CONVOLUTION_OP_F32, 7x7) {
ConvolutionOperatorTester()
.depthwise_layout(true)
@@ -2147,6 +2379,18 @@
.TestF32();
}
+TEST(DEPTHWISE_CONVOLUTION_OP_F32, 7x7_without_bias) {
+ ConvolutionOperatorTester()
+ .depthwise_layout(true)
+ .has_bias(false)
+ .input_size(15, 14)
+ .padding(3, 3)
+ .kernel_size(7, 7)
+ .groups(24)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(CONVOLUTION_OP_F32, setup_increasing_batch) {
ASSERT_EQ(xnn_status_success, xnn_initialize());
ConvolutionOperatorTester()
diff --git a/test/deconvolution-operator-tester.h b/test/deconvolution-operator-tester.h
index bb836a6..67f3548 100644
--- a/test/deconvolution-operator-tester.h
+++ b/test/deconvolution-operator-tester.h
@@ -409,6 +409,15 @@
return this->qmax_;
}
+ inline DeconvolutionOperatorTester& has_bias(bool has_bias) {
+ this->has_bias_ = has_bias;
+ return *this;
+ }
+
+ inline bool has_bias() const {
+ return this->has_bias_;
+ }
+
inline DeconvolutionOperatorTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
@@ -442,17 +451,21 @@
std::fill(output.begin(), output.end(), 0xA5);
// Compute reference results, without renormalization.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(accumulators.begin(), accumulators.end(), 0);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oy = 0; oy < output_height(); oy++) {
@@ -512,7 +525,7 @@
input_pixel_stride(), output_pixel_stride(),
input_zero_point, 1.0f /* input scale */,
kernel_zero_point, 1.0f /* kernel scale */,
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_zero_point, output_scale, qmin(), qmax(),
0, &deconvolution_op));
@@ -572,17 +585,21 @@
std::fill(output_ref.begin(), output_ref.end(), 0.0f);
// Compute reference results, without clamping.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(output_ref.begin(), output_ref.end(), 0.0f);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oy = 0; oy < output_height(); oy++) {
@@ -637,7 +654,7 @@
dilation_height(), dilation_width(),
groups(), group_input_channels(), group_output_channels(),
input_pixel_stride(), output_pixel_stride(),
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_min, output_max,
0, &deconvolution_op));
@@ -706,17 +723,21 @@
std::fill(output.begin(), output.end(), 0xA5);
// Compute reference results, without renormalization.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(accumulators.begin(), accumulators.end(), 0);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oy = 0; oy < output_height(); oy++) {
@@ -776,7 +797,7 @@
input_pixel_stride(), output_pixel_stride(),
input_zero_point, 1.0f /* input scale */,
kernel_zero_point, 1.0f /* kernel scale */,
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_zero_point, output_scale, qmin(), qmax(),
0, &deconvolution_op));
@@ -819,17 +840,21 @@
std::fill(output.begin(), output.end(), 0xA5);
// Compute reference results for the second run, including renormalization.
- for (size_t i = 0; i < next_batch_size(); i++) {
- for (size_t oy = 0; oy < next_output_height(); oy++) {
- for (size_t ox = 0; ox < next_output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < next_batch_size(); i++) {
+ for (size_t oy = 0; oy < next_output_height(); oy++) {
+ for (size_t ox = 0; ox < next_output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(next_accumulators.begin(), next_accumulators.end(), 0);
}
for (size_t i = 0; i < next_batch_size(); i++) {
for (size_t oy = 0; oy < next_output_height(); oy++) {
@@ -920,17 +945,21 @@
std::fill(output.begin(), output.end(), nanf(""));
// Compute reference results, without clamping.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oy = 0; oy < output_height(); oy++) {
- for (size_t ox = 0; ox < output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oy = 0; oy < output_height(); oy++) {
+ for (size_t ox = 0; ox < output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(output_ref.begin(), output_ref.end(), 0.0f);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oy = 0; oy < output_height(); oy++) {
@@ -985,7 +1014,7 @@
dilation_height(), dilation_width(),
groups(), group_input_channels(), group_output_channels(),
input_pixel_stride(), output_pixel_stride(),
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_min, output_max,
0, &deconvolution_op));
@@ -1028,17 +1057,21 @@
std::fill(output.begin(), output.end(), nanf(""));
// Compute reference results for the second run, including clamping.
- for (size_t i = 0; i < next_batch_size(); i++) {
- for (size_t oy = 0; oy < next_output_height(); oy++) {
- for (size_t ox = 0; ox < next_output_width(); ox++) {
- for (size_t g = 0; g < groups(); g++) {
- for (size_t oc = 0; oc < group_output_channels(); oc++) {
- next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
- bias[g * group_output_channels() + oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < next_batch_size(); i++) {
+ for (size_t oy = 0; oy < next_output_height(); oy++) {
+ for (size_t ox = 0; ox < next_output_width(); ox++) {
+ for (size_t g = 0; g < groups(); g++) {
+ for (size_t oc = 0; oc < group_output_channels(); oc++) {
+ next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =
+ bias[g * group_output_channels() + oc];
+ }
}
}
}
}
+ } else {
+ std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);
}
for (size_t i = 0; i < next_batch_size(); i++) {
for (size_t oy = 0; oy < next_output_height(); oy++) {
@@ -1131,5 +1164,6 @@
size_t next_batch_size_{0};
uint8_t qmin_{0};
uint8_t qmax_{255};
+ bool has_bias_{true};
size_t iterations_{1};
};
diff --git a/test/deconvolution.cc b/test/deconvolution.cc
index 470aa85..30ef3ea 100644
--- a/test/deconvolution.cc
+++ b/test/deconvolution.cc
@@ -123,6 +123,18 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, 1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** Future GEMM path, grouped ****************************/
TEST(DECONVOLUTION_OP_Q8, grouped_1x1) {
@@ -245,6 +257,19 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, grouped_1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** Future GEMM path, batched ****************************/
TEST(DECONVOLUTION_OP_Q8, batched_1x1) {
@@ -367,6 +392,19 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, batched_1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** Future GEMM path, batched, grouped ****************************/
TEST(DECONVOLUTION_OP_Q8, batched_grouped_1x1) {
@@ -498,6 +536,20 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, batched_grouped_1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** CONV path ****************************/
TEST(DECONVOLUTION_OP_Q8, 3x3) {
@@ -772,6 +824,19 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, 3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** CONV path, grouped ****************************/
TEST(DECONVOLUTION_OP_Q8, grouped_3x3) {
@@ -1065,6 +1130,20 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, grouped_3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** CONV path, batched ****************************/
TEST(DECONVOLUTION_OP_Q8, batched_3x3) {
@@ -1358,6 +1437,20 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, batched_3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** CONV path, grouped, batched ****************************/
TEST(DECONVOLUTION_OP_Q8, batched_grouped_3x3) {
@@ -1670,6 +1763,21 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, batched_grouped_3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** CONV path, setup ****************************/
TEST(DECONVOLUTION_OP_Q8, 3x3_setup_changing_batch) {
@@ -1980,6 +2088,20 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, 3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** SUBCONV2D path, grouped ****************************/
TEST(DECONVOLUTION_OP_Q8, grouped_3x3s2) {
@@ -2260,6 +2382,21 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, grouped_3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .groups(2)
+ .group_input_channels(17)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** SUBCONV2D path, batched ****************************/
TEST(DECONVOLUTION_OP_Q8, batched_3x3s2) {
@@ -2540,6 +2677,21 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, batched_3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** SUBCONV2D path, grouped, batched ****************************/
TEST(DECONVOLUTION_OP_Q8, batched_grouped_3x3s2) {
@@ -2837,6 +2989,22 @@
.TestQ8();
}
+TEST(DECONVOLUTION_OP_Q8, batched_grouped_3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .groups(2)
+ .group_input_channels(17)
+ .group_output_channels(xnn_params.q8.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestQ8();
+}
+
/**************************** SUBCONV2D path, setup ****************************/
TEST(DECONVOLUTION_OP_Q8, 3x3s2_setup_changing_batch) {
@@ -3000,6 +3168,18 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, 1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** Future GEMM path, grouped ****************************/
TEST(DECONVOLUTION_OP_F32, grouped_1x1) {
@@ -3122,6 +3302,19 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, grouped_1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** Future GEMM path, batched ****************************/
TEST(DECONVOLUTION_OP_F32, batched_1x1) {
@@ -3244,6 +3437,19 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, batched_1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** Future GEMM path, batched, grouped ****************************/
TEST(DECONVOLUTION_OP_F32, batched_grouped_1x1) {
@@ -3375,6 +3581,20 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, batched_grouped_1x1_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(27, 29)
+ .kernel_size(1, 1)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** CONV path ****************************/
TEST(DECONVOLUTION_OP_F32, 3x3) {
@@ -3649,6 +3869,19 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, 3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** CONV path, grouped ****************************/
TEST(DECONVOLUTION_OP_F32, grouped_3x3) {
@@ -3942,6 +4175,20 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, grouped_3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** CONV path, batched ****************************/
TEST(DECONVOLUTION_OP_F32, batched_3x3) {
@@ -4235,6 +4482,20 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, batched_3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** CONV path, grouped, batched ****************************/
TEST(DECONVOLUTION_OP_F32, batched_grouped_3x3) {
@@ -4547,6 +4808,21 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, batched_grouped_3x3_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(13, 12)
+ .padding(1)
+ .kernel_size(3, 3)
+ .groups(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** CONV path, setup ****************************/
TEST(DECONVOLUTION_OP_F32, 3x3_setup_changing_batch) {
@@ -4857,6 +5133,20 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, 3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** SUBCONV2D path, grouped ****************************/
TEST(DECONVOLUTION_OP_F32, grouped_3x3s2) {
@@ -5137,6 +5427,21 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, grouped_3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .groups(2)
+ .group_input_channels(17)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** SUBCONV2D path, batched ****************************/
TEST(DECONVOLUTION_OP_F32, batched_3x3s2) {
@@ -5417,6 +5722,21 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, batched_3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .group_input_channels(23)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** SUBCONV2D path, grouped, batched ****************************/
TEST(DECONVOLUTION_OP_F32, batched_grouped_3x3s2) {
@@ -5714,6 +6034,22 @@
.TestF32();
}
+TEST(DECONVOLUTION_OP_F32, batched_grouped_3x3s2_without_bias) {
+ ASSERT_EQ(xnn_status_success, xnn_initialize());
+ DeconvolutionOperatorTester()
+ .has_bias(false)
+ .batch_size(2)
+ .input_size(10, 9)
+ .padding(1)
+ .kernel_size(3, 3)
+ .stride(2)
+ .groups(2)
+ .group_input_channels(17)
+ .group_output_channels(xnn_params.f32.gemm.nr * 2 + 3)
+ .iterations(3)
+ .TestF32();
+}
+
/**************************** SUBCONV2D path, setup ****************************/
TEST(DECONVOLUTION_OP_F32, 3x3s2_setup_changing_batch) {
diff --git a/test/fully-connected-operator-tester.h b/test/fully-connected-operator-tester.h
index f5b2cd6..5ddfc94 100644
--- a/test/fully-connected-operator-tester.h
+++ b/test/fully-connected-operator-tester.h
@@ -101,6 +101,15 @@
return this->qmax_;
}
+ inline FullyConnectedOperatorTester& has_bias(bool has_bias) {
+ this->has_bias_ = has_bias;
+ return *this;
+ }
+
+ inline bool has_bias() const {
+ return this->has_bias_;
+ }
+
inline FullyConnectedOperatorTester& iterations(size_t iterations) {
this->iterations_ = iterations;
return *this;
@@ -132,13 +141,16 @@
std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));
std::generate(bias.begin(), bias.end(), std::ref(s32rng));
std::fill(output.begin(), output.end(), 0xA5);
- std::fill(accumulators.begin(), accumulators.end(), 0);
// Compute reference results, without renormalization.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oc = 0; oc < output_channels(); oc++) {
- accumulators[i * output_channels() + oc] = bias[oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oc = 0; oc < output_channels(); oc++) {
+ accumulators[i * output_channels() + oc] = bias[oc];
+ }
}
+ } else {
+ std::fill(accumulators.begin(), accumulators.end(), 0);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oc = 0; oc < output_channels(); oc++) {
@@ -175,7 +187,7 @@
input_stride(), output_stride(),
input_zero_point, 1.0f /* input scale */,
kernel_zero_point, 1.0f /* kernel scale */,
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_zero_point, output_scale, qmin(), qmax(),
0, &fully_connected_op));
@@ -228,10 +240,14 @@
std::fill(output.begin(), output.end(), nanf(""));
// Compute reference results, without renormalization.
- for (size_t i = 0; i < batch_size(); i++) {
- for (size_t oc = 0; oc < output_channels(); oc++) {
- output_ref[i * output_channels() + oc] = bias[oc];
+ if (has_bias()) {
+ for (size_t i = 0; i < batch_size(); i++) {
+ for (size_t oc = 0; oc < output_channels(); oc++) {
+ output_ref[i * output_channels() + oc] = bias[oc];
+ }
}
+ } else {
+ std::fill(output_ref.begin(), output_ref.end(), 0.0f);
}
for (size_t i = 0; i < batch_size(); i++) {
for (size_t oc = 0; oc < output_channels(); oc++) {
@@ -262,7 +278,7 @@
xnn_create_fully_connected_nc_f32(
input_channels(), output_channels(),
input_stride(), output_stride(),
- kernel.data(), bias.data(),
+ kernel.data(), has_bias() ? bias.data() : nullptr,
output_min, output_max,
0, &fully_connected_op));
@@ -304,5 +320,6 @@
size_t batch_size_{1};
uint8_t qmin_{0};
uint8_t qmax_{255};
+ bool has_bias_{true};
size_t iterations_{1};
};
diff --git a/test/fully-connected.cc b/test/fully-connected.cc
index bff2db9..7f7cb8c 100644
--- a/test/fully-connected.cc
+++ b/test/fully-connected.cc
@@ -60,6 +60,16 @@
.TestQ8();
}
+TEST(FULLY_CONNECTED_OP_Q8, unit_batch_without_bias) {
+ FullyConnectedOperatorTester()
+ .has_bias(false)
+ .batch_size(1)
+ .input_channels(23)
+ .output_channels(19)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(FULLY_CONNECTED_OP_Q8, small_batch) {
FullyConnectedOperatorTester()
.batch_size(12)
@@ -109,6 +119,16 @@
.TestQ8();
}
+TEST(FULLY_CONNECTED_OP_Q8, small_batch_without_bias) {
+ FullyConnectedOperatorTester()
+ .has_bias(false)
+ .batch_size(12)
+ .input_channels(23)
+ .output_channels(19)
+ .iterations(3)
+ .TestQ8();
+}
+
TEST(FULLY_CONNECTED_OP_F32, unit_batch) {
FullyConnectedOperatorTester()
.batch_size(1)
@@ -158,6 +178,16 @@
.TestF32();
}
+TEST(FULLY_CONNECTED_OP_F32, unit_batch_without_bias) {
+ FullyConnectedOperatorTester()
+ .has_bias(false)
+ .batch_size(1)
+ .input_channels(23)
+ .output_channels(19)
+ .iterations(3)
+ .TestF32();
+}
+
TEST(FULLY_CONNECTED_OP_F32, small_batch) {
FullyConnectedOperatorTester()
.batch_size(12)
@@ -206,3 +236,13 @@
.iterations(3)
.TestF32();
}
+
+TEST(FULLY_CONNECTED_OP_F32, small_batch_without_bias) {
+ FullyConnectedOperatorTester()
+ .has_bias(false)
+ .batch_size(12)
+ .input_channels(23)
+ .output_channels(19)
+ .iterations(3)
+ .TestF32();
+}