Blame - test/convolution-operator-tester.h - platform/external/XNNPACK

2022-01-14 17:14:35 -0800

[diff] [blame]

720

ASSERT_EQ(weights_type(), WeightsType::Default);

721

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

722

std::random_device random_device;

723

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

724

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

725

auto i8rng = std::bind(

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

726

std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),

727

std::ref(rng));

728

auto w8rng = std::bind(

729

std::uniform_int_distribution<int32_t>(-std::numeric_limits<int8_t>::max(), std::numeric_limits<int8_t>::max()),

730

std::ref(rng));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

731

732

std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) +

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

733

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

734

std::vector<int8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

735

std::vector<int32_t> bias(groups() * group_output_channels());

736

std::vector<int8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

737

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

738

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

739

740

const int8_t input_zero_point = -1;

741

742

for (size_t iteration = 0; iteration < iterations(); iteration++) {

743

std::generate(input.begin(), input.end(), std::ref(i8rng));

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

744

std::generate(kernel.begin(), kernel.end(), std::ref(w8rng));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

745

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

746

std::fill(output.begin(), output.end(), 0xA5);

747

748

// Compute reference results, without renormalization.

749

if (has_bias()) {

750

for (size_t i = 0; i < batch_size(); i++) {

751

for (size_t oy = 0; oy < output_height(); oy++) {

752

for (size_t ox = 0; ox < output_width(); ox++) {

753

for (size_t g = 0; g < groups(); g++) {

754

for (size_t oc = 0; oc < group_output_channels(); oc++) {

755

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

756

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(accumulators.begin(), accumulators.end(), 0);

764

}

765

if (depthwise_layout()) {

766

ASSERT_EQ(group_input_channels(), 1);

767

768

for (size_t i = 0; i < batch_size(); i++) {

769

for (size_t oy = 0; oy < output_height(); oy++) {

770

for (size_t ox = 0; ox < output_width(); ox++) {

771

for (size_t ky = 0; ky < kernel_height(); ky++) {

772

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

773

if (iy < input_height()) {

774

for (size_t kx = 0; kx < kernel_width(); kx++) {

775

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

776

if (ix < input_width()) {

777

for (size_t g = 0; g < groups(); g++) {

778

for (size_t oc = 0; oc < group_output_channels(); oc++) {

779

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

780

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) - int32_t(input_zero_point)) *

781

int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]);

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

793

for (size_t oy = 0; oy < output_height(); oy++) {

794

for (size_t ox = 0; ox < output_width(); ox++) {

795

for (size_t ky = 0; ky < kernel_height(); ky++) {

796

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

797

if (iy < input_height()) {

798

for (size_t kx = 0; kx < kernel_width(); kx++) {

799

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

800

if (ix < input_width()) {

801

for (size_t g = 0; g < groups(); g++) {

802

for (size_t oc = 0; oc < group_output_channels(); oc++) {

803

for (size_t ic = 0; ic < group_input_channels(); ic++) {

804

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

805

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

806

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

820

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

821

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

822

823

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

824

const int8_t output_zero_point = int8_t(std::max(std::min(

825

lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

826

long(std::numeric_limits<int8_t>::max())), long(std::numeric_limits<int8_t>::min())));

827

828

// Renormalize reference results.

829

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

830

[this, output_scale, output_zero_point](int32_t x) -> double {

831

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax() - 0x80) - output_zero_point), double(qmin() - 0x80) - output_zero_point);

832

});

833

834

// Create, setup, run, and destroy Convolution operator.

835

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

836

xnn_operator_t convolution_op = nullptr;

837

838

xnn_status status = xnn_create_convolution2d_nhwc_qs8(

839

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

840

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

841

kernel_height(), kernel_width(),

842

subsampling_height(), subsampling_width(),

843

dilation_height(), dilation_width(),

844

groups(), group_input_channels(), group_output_channels(),

845

input_channel_stride(), output_channel_stride(),

846

input_zero_point, 1.0f /* input scale */, 1.0f /* kernel scale */,

847

kernel.data(), has_bias() ? bias.data() : nullptr,

848

output_zero_point, output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),

849

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

850

&convolution_op);

851

if (status == xnn_status_unsupported_hardware) {

852

GTEST_SKIP();

853

}

854

ASSERT_EQ(xnn_status_success, status);

855

ASSERT_NE(nullptr, convolution_op);

856

857

// Smart pointer to automatically delete convolution_op.

858

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

859

860

ASSERT_EQ(xnn_status_success,

861

xnn_setup_convolution2d_nhwc_qs8(

862

convolution_op,

863

batch_size(), input_height(), input_width(),

864

input.data(), output.data(),

865

nullptr /* thread pool */));

866

867

ASSERT_EQ(xnn_status_success,

868

xnn_run_operator(convolution_op, nullptr /* thread pool */));

869

870

// Verify results.

871

for (size_t i = 0; i < batch_size(); i++) {

872

for (size_t y = 0; y < output_height(); y++) {

873

for (size_t x = 0; x < output_width(); x++) {

874

for (size_t g = 0; g < groups(); g++) {

875

for (size_t c = 0; c < group_output_channels(); c++) {

876

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

877

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

878

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

879

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

880

ASSERT_NEAR(

881

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

882

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

883

0.9)

884

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

893

void TestNHWCxQU8() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

894

ASSERT_EQ(weights_type(), WeightsType::Default);

895

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

896

std::random_device random_device;

897

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

898

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));

899

auto u8rng = std::bind(

900

std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

901

902

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

903

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

904

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

905

std::vector<int32_t> bias(groups() * group_output_channels());

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

906

std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

907

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

908

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

909

910

const uint8_t input_zero_point = 127;

911

const uint8_t kernel_zero_point = 127;

912

913

for (size_t iteration = 0; iteration < iterations(); iteration++) {

914

std::generate(input.begin(), input.end(), std::ref(u8rng));

915

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

Marat Dukhan

ecd8311

2020-08-03 21:50:28 -0700

[diff] [blame]

916

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

917

std::fill(output.begin(), output.end(), 0xA5);

918

919

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

920

if (has_bias()) {

921

for (size_t i = 0; i < batch_size(); i++) {

922

for (size_t oy = 0; oy < output_height(); oy++) {

923

for (size_t ox = 0; ox < output_width(); ox++) {

924

for (size_t g = 0; g < groups(); g++) {

925

for (size_t oc = 0; oc < group_output_channels(); oc++) {

926

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

927

bias[g * group_output_channels() + oc];

928

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

933

} else {

934

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

935

}

936

if (depthwise_layout()) {

937

ASSERT_EQ(group_input_channels(), 1);

938

939

for (size_t i = 0; i < batch_size(); i++) {

940

for (size_t oy = 0; oy < output_height(); oy++) {

941

for (size_t ox = 0; ox < output_width(); ox++) {

942

for (size_t ky = 0; ky < kernel_height(); ky++) {

943

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

944

if (iy < input_height()) {

945

for (size_t kx = 0; kx < kernel_width(); kx++) {

946

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

947

if (ix < input_width()) {

948

for (size_t g = 0; g < groups(); g++) {

949

for (size_t oc = 0; oc < group_output_channels(); oc++) {

950

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

951

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

952

(int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

964

for (size_t oy = 0; oy < output_height(); oy++) {

965

for (size_t ox = 0; ox < output_width(); ox++) {

966

for (size_t ky = 0; ky < kernel_height(); ky++) {

967

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

968

if (iy < input_height()) {

969

for (size_t kx = 0; kx < kernel_width(); kx++) {

970

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

971

if (ix < input_width()) {

972

for (size_t g = 0; g < groups(); g++) {

973

for (size_t oc = 0; oc < group_output_channels(); oc++) {

974

for (size_t ic = 0; ic < group_input_channels(); ic++) {

975

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

976

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

977

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

991

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

992

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

993

994

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

995

const uint8_t output_zero_point = uint8_t(std::max(std::min(

996

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

997

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

998

999

// Renormalize reference results.

1000

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

1001

[this, output_scale, output_zero_point](int32_t x) -> double {

1002

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

1003

});

1004

1005

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1006

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1007

xnn_operator_t convolution_op = nullptr;

1008

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1009

xnn_status status = xnn_create_convolution2d_nhwc_qu8(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

1010

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

1011

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1012

kernel_height(), kernel_width(),

1013

subsampling_height(), subsampling_width(),

1014

dilation_height(), dilation_width(),

1015

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1016

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1017

input_zero_point, 1.0f /* input scale */,

1018

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1019

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1020

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

1021

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1022

&convolution_op);

1023

if (status == xnn_status_unsupported_hardware) {

1024

GTEST_SKIP();

1025

}

1026

ASSERT_EQ(xnn_status_success, status);

1027

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1028

1029

// Smart pointer to automatically delete convolution_op.

1030

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1031

1032

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

1033

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1034

convolution_op,

1035

batch_size(), input_height(), input_width(),

1036

input.data(), output.data(),

1037

nullptr /* thread pool */));

1038

1039

ASSERT_EQ(xnn_status_success,

1040

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1041

1042

// Verify results.

1043

for (size_t i = 0; i < batch_size(); i++) {

1044

for (size_t y = 0; y < output_height(); y++) {

1045

for (size_t x = 0; x < output_width(); x++) {

1046

for (size_t g = 0; g < groups(); g++) {

1047

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1048

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1049

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1050

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1051

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1052

ASSERT_NEAR(

1053

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1054

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1055

0.9)

1056

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1065

void TestNHWCxF32() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1066

ASSERT_EQ(weights_type(), WeightsType::Default);

1067

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1068

std::random_device random_device;

1069

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1070

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1071

1072

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1073

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1074

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1075

std::vector<float> bias(groups() * group_output_channels());

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1076

std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1077

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1078

1079

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1080

std::generate(input.begin(), input.end(), std::ref(f32rng));

1081

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

1082

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

1083

std::fill(output.begin(), output.end(), nanf(""));

1084

1085

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1086

if (has_bias()) {

1087

for (size_t i = 0; i < batch_size(); i++) {

1088

for (size_t oy = 0; oy < output_height(); oy++) {

1089

for (size_t ox = 0; ox < output_width(); ox++) {

1090

for (size_t g = 0; g < groups(); g++) {

1091

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1092

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1093

bias[g * group_output_channels() + oc];

1094

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1099

} else {

1100

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1101

}

1102

if (depthwise_layout()) {

1103

ASSERT_EQ(group_input_channels(), 1);

1104

1105

for (size_t i = 0; i < batch_size(); i++) {

1106

for (size_t oy = 0; oy < output_height(); oy++) {

1107

for (size_t ox = 0; ox < output_width(); ox++) {

1108

for (size_t ky = 0; ky < kernel_height(); ky++) {

1109

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1110

if (iy < input_height()) {

1111

for (size_t kx = 0; kx < kernel_width(); kx++) {

1112

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1113

if (ix < input_width()) {

1114

for (size_t g = 0; g < groups(); g++) {

1115

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1116

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1117

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1118

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

1130

for (size_t oy = 0; oy < output_height(); oy++) {

1131

for (size_t ox = 0; ox < output_width(); ox++) {

1132

for (size_t ky = 0; ky < kernel_height(); ky++) {

1133

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1134

if (iy < input_height()) {

1135

for (size_t kx = 0; kx < kernel_width(); kx++) {

1136

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1137

if (ix < input_width()) {

1138

for (size_t g = 0; g < groups(); g++) {

1139

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1140

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1141

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1142

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1143

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1157

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1158

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1159

1160

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

1161

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

1162

1163

// Clamp reference results.

1164

for (float& value : output_ref) {

1165

value = std::max(std::min(value, output_max), output_min);

1166

}

1167

1168

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1169

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1170

xnn_operator_t convolution_op = nullptr;

1171

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1172

xnn_status status = xnn_create_convolution2d_nhwc_f32(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

1173

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

1174

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1175

kernel_height(), kernel_width(),

1176

subsampling_height(), subsampling_width(),

1177

dilation_height(), dilation_width(),

1178

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1179

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1180

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1181

output_min, output_max,

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

1182

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1183

&convolution_op);

1184

if (status == xnn_status_unsupported_hardware) {

1185

GTEST_SKIP();

1186

}

1187

ASSERT_EQ(xnn_status_success, status);

1188

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1189

1190

// Smart pointer to automatically delete convolution_op.

1191

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1192

1193

ASSERT_EQ(xnn_status_success,

1194

xnn_setup_convolution2d_nhwc_f32(

1195

convolution_op,

1196

batch_size(), input_height(), input_width(),

1197

input.data(), output.data(),

1198

nullptr /* thread pool */));

1199

1200

ASSERT_EQ(xnn_status_success,

1201

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1202

1203

// Verify results.

1204

for (size_t i = 0; i < batch_size(); i++) {

1205

for (size_t y = 0; y < output_height(); y++) {

1206

for (size_t x = 0; x < output_width(); x++) {

1207

for (size_t g = 0; g < groups(); g++) {

1208

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1209

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1210

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1211

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1212

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1213

ASSERT_NEAR(

1214

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1215

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1216

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

1217

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1226

void TestNHWCxF16() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1227

switch (weights_type()) {

1228

case WeightsType::Default:

1229

break;

1230

case WeightsType::FP32:

1231

break;

1232

default:

1233

GTEST_FAIL() << "unexpected weights type";

1234

}

1235

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1236

std::random_device random_device;

1237

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1238

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1239

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

1240

1241

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +

1242

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

1243

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1244

std::vector<float> kernel_as_float(kernel.size());

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1245

std::vector<uint16_t> bias(groups() * group_output_channels());

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1246

std::vector<float> bias_as_float(bias.size());

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1247

std::vector<uint16_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

1248

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1249

1250

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1251

std::generate(input.begin(), input.end(), std::ref(f16rng));

1252

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1253

std::transform(kernel.cbegin(), kernel.cend(), kernel_as_float.begin(), fp16_ieee_to_fp32_value);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1254

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1255

std::transform(bias.cbegin(), bias.cend(), bias_as_float.begin(), fp16_ieee_to_fp32_value);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1256

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1257

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1258

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1259

// Compute reference results, without clamping.

1260

if (has_bias()) {

1261

for (size_t i = 0; i < batch_size(); i++) {

1262

for (size_t oy = 0; oy < output_height(); oy++) {

1263

for (size_t ox = 0; ox < output_width(); ox++) {

1264

for (size_t g = 0; g < groups(); g++) {

1265

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1266

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1267

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1275

}

1276

if (depthwise_layout()) {

1277

ASSERT_EQ(group_input_channels(), 1);

1278

1279

for (size_t i = 0; i < batch_size(); i++) {

1280

for (size_t oy = 0; oy < output_height(); oy++) {

1281

for (size_t ox = 0; ox < output_width(); ox++) {

1282

for (size_t ky = 0; ky < kernel_height(); ky++) {

1283

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1284

if (iy < input_height()) {

1285

for (size_t kx = 0; kx < kernel_width(); kx++) {

1286

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1287

if (ix < input_width()) {

1288

for (size_t g = 0; g < groups(); g++) {

1289

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1290

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1291

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) *

1292

fp16_ieee_to_fp32_value(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]);

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

1304

for (size_t oy = 0; oy < output_height(); oy++) {

1305

for (size_t ox = 0; ox < output_width(); ox++) {

1306

for (size_t ky = 0; ky < kernel_height(); ky++) {

1307

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1308

if (iy < input_height()) {

1309

for (size_t kx = 0; kx < kernel_width(); kx++) {

1310

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1311

if (ix < input_width()) {

1312

for (size_t g = 0; g < groups(); g++) {

1313

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1314

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1315

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1316

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1317

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1331

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1332

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1333

const float accumulated_range = accumulated_max - accumulated_min;

1334

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

1335

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

1336

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

1337

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

1338

1339

// Clamp reference results.

1340

for (float& value : output_ref) {

1341

value = std::max(std::min(value, output_max), output_min);

1342

}

1343

1344

// Create, setup, run, and destroy Convolution operator.

1345

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1346

xnn_operator_t convolution_op = nullptr;

1347

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1348

const void* kernel_data = kernel.data();

1349

const void* bias_data = bias.data();

1350

if (weights_type() == WeightsType::FP32) {

1351

kernel_data = kernel_as_float.data();

1352

bias_data = bias_as_float.data();

1353

}

1354

uint32_t flags = 0;

1355

if (depthwise_layout()) {

1356

flags |= XNN_FLAG_DEPTHWISE_CONVOLUTION;

1357

}

1358

if (padding_tf_same()) {

1359

flags |= XNN_FLAG_TENSORFLOW_SAME_PADDING;

1360

}

1361

if (weights_type() == WeightsType::FP32) {

1362

flags |= XNN_FLAG_FP32_STATIC_WEIGHTS;

1363

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1364

xnn_status status = xnn_create_convolution2d_nhwc_f16(

1365

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

1366

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

1367

kernel_height(), kernel_width(),

1368

subsampling_height(), subsampling_width(),

1369

dilation_height(), dilation_width(),

1370

groups(), group_input_channels(), group_output_channels(),

1371

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1372

kernel_data, has_bias() ? bias_data : nullptr,

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1373

output_min, output_max,

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1374

flags,

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1375

&convolution_op);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1376

if (status == xnn_status_unsupported_hardware) {

1377

GTEST_SKIP();

1378

}

1379

ASSERT_EQ(xnn_status_success, status);

1380

ASSERT_NE(nullptr, convolution_op);

1381

1382

// Smart pointer to automatically delete convolution_op.

1383

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1384

1385

ASSERT_EQ(xnn_status_success,

1386

xnn_setup_convolution2d_nhwc_f16(

1387

convolution_op,

1388

batch_size(), input_height(), input_width(),

1389

input.data(), output.data(),

1390

nullptr /* thread pool */));

1391

1392

ASSERT_EQ(xnn_status_success,

1393

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1394

1395

// Verify results.

1396

for (size_t i = 0; i < batch_size(); i++) {

1397

for (size_t y = 0; y < output_height(); y++) {

1398

for (size_t x = 0; x < output_width(); x++) {

1399

for (size_t g = 0; g < groups(); g++) {

1400

for (size_t c = 0; c < group_output_channels(); c++) {

1401

// ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

1402

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1403

// ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

1404

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Frank Barchard

2b9d29b

2020-09-17 12:03:39 -0700

[diff] [blame]

1405

ASSERT_NEAR(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-4f, std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1406

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1415

void TestNCHWxF32() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1416

ASSERT_EQ(weights_type(), WeightsType::Default);

1417

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1418

std::random_device random_device;

1419

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1420

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1421

auto prng = std::bind(std::uniform_real_distribution<float>(), rng);

1422

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1423

std::vector<float> input(2 * XNN_EXTRA_BYTES / sizeof(float) +

1424

((batch_size() - 1) * input_channel_stride() + groups() * group_input_channels()) * input_height() * input_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1425

std::vector<float> kernel(

1426

groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1427

std::vector<float> bias(groups() * group_output_channels());

1428

std::vector<float> output(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1429

((batch_size() - 1) * output_channel_stride() + groups() * group_output_channels()) * output_height() * output_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1430

std::vector<float> output_ref(batch_size() * groups() * group_output_channels() * output_height() * output_width());

1431

1432

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1433

std::generate(input.begin(), input.end(), std::ref(f32rng));

1434

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

1435

for (float& k : kernel) {

1436

if (prng() <= sparsity()) {

k = 0.0f;

}

}

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

1441

std::fill(output.begin(), output.end(), nanf(""));

1442

1443

// Compute reference results, without clamping.

1444

if (has_bias()) {

1445

for (size_t i = 0; i < batch_size(); i++) {

1446

for (size_t oy = 0; oy < output_height(); oy++) {

1447

for (size_t ox = 0; ox < output_width(); ox++) {

1448

for (size_t g = 0; g < groups(); g++) {

1449

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1450

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =

1451

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1459

}

1460

if (force_nhwc_input()) {

1461

for (size_t i = 0; i < batch_size(); i++) {

1462

for (size_t oy = 0; oy < output_height(); oy++) {

1463

for (size_t ox = 0; ox < output_width(); ox++) {

1464

for (size_t ky = 0; ky < kernel_height(); ky++) {

1465

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1466

if (iy < input_height()) {

1467

for (size_t kx = 0; kx < kernel_width(); kx++) {

1468

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1469

if (ix < input_width()) {

1470

for (size_t g = 0; g < groups(); g++) {

1471

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1472

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1473

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1474

input[((((i * input_height() + iy) * input_width() + ix) * groups() + g) * group_input_channels() + ic)] *

1475

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

Marat Dukhan

3303271

2020-06-18 11:06:04 -0700

[diff] [blame]

1486

} else if (depthwise_layout()) {

1487

ASSERT_EQ(group_input_channels(), 1);

1488

1489

for (size_t i = 0; i < batch_size(); i++) {

1490

for (size_t oy = 0; oy < output_height(); oy++) {

1491

for (size_t ox = 0; ox < output_width(); ox++) {

1492

for (size_t ky = 0; ky < kernel_height(); ky++) {

1493

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1494

if (iy < input_height()) {

1495

for (size_t kx = 0; kx < kernel_width(); kx++) {

1496

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1497

if (ix < input_width()) {

1498

for (size_t g = 0; g < groups(); g++) {

1499

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1500

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1501

input[((i * input_channel_stride() + g) * input_height() + iy) * input_width() + ix] *

1502

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1512

} else {

1513

for (size_t i = 0; i < batch_size(); i++) {

1514

for (size_t oy = 0; oy < output_height(); oy++) {

1515

for (size_t ox = 0; ox < output_width(); ox++) {

1516

for (size_t ky = 0; ky < kernel_height(); ky++) {

1517

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1518

if (iy < input_height()) {

1519

for (size_t kx = 0; kx < kernel_width(); kx++) {

1520

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1521

if (ix < input_width()) {

1522

for (size_t g = 0; g < groups(); g++) {

1523

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1524

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1525

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1526

input[((i * input_channel_stride() + g * group_input_channels() + ic) * input_height() + iy) * input_width() + ix] *

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1527

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1541

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1542

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1543

Marat Dukhan

869c62d

2020-04-09 17:17:55 -0700

[diff] [blame]

1544

const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :

1545

accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

1546

const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :

1547

accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1548

1549

// Clamp reference results.

1550

for (float& value : output_ref) {

1551

value = std::max(std::min(value, output_max), output_min);

1552

}

1553

1554

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1555

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1556

xnn_operator_t convolution_op = nullptr;

1557

1558

xnn_status status = xnn_create_convolution2d_nchw_f32(

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1559

padding_top(), padding_right(), padding_bottom(), padding_left(),

1560

kernel_height(), kernel_width(),

1561

subsampling_height(), subsampling_width(),

1562

dilation_height(), dilation_width(),

1563

groups(), group_input_channels(), group_output_channels(),

1564

input_channel_stride(), output_channel_stride(),

1565

kernel.data(), has_bias() ? bias.data() : nullptr,

1566

output_min, output_max,

1567

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (force_nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),

1568

&convolution_op);

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1569

if (status == xnn_status_unsupported_parameter) {

1570

GTEST_SKIP();

1571

}

1572

ASSERT_EQ(xnn_status_success, status);

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1573

ASSERT_NE(nullptr, convolution_op);

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1574

1575

// Smart pointer to automatically delete convolution_op.

1576

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1577

1578

ASSERT_EQ(xnn_status_success,

1579

xnn_setup_convolution2d_nchw_f32(

1580

convolution_op,

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1581

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1582

input.data(), output.data(),

1583

nullptr /* thread pool */));

1584

1585

ASSERT_EQ(xnn_status_success,

1586

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1587

1588

// Verify results.

1589

for (size_t i = 0; i < batch_size(); i++) {

1590

for (size_t y = 0; y < output_height(); y++) {

1591

for (size_t x = 0; x < output_width(); x++) {

1592

for (size_t g = 0; g < groups(); g++) {

1593

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1594

ASSERT_GE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_min)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1595

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1596

ASSERT_LE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_max)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1597

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

1598

ASSERT_NEAR(

1599

output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1600

output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1601

1.0e-4 * std::abs(output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x]))

1602

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

}

}

}

}

}

}

}

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

1611

void TestSetupNHWCxQC8() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1612

ASSERT_EQ(weights_type(), WeightsType::Default);

1613

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

1614

ASSERT_FALSE(depthwise_layout());

1615

1616

std::random_device random_device;

1617

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1618

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

1619

auto i8rng = std::bind(

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1620

std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),

1621

std::ref(rng));

1622

auto w8rng = std::bind(

1623

std::uniform_int_distribution<int32_t>(-std::numeric_limits<int8_t>::max(), std::numeric_limits<int8_t>::max()),

1624

std::ref(rng));

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

1625

1626

std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(

1627

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1628

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

1629

std::vector<int8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1630

std::vector<int32_t> bias(groups() * group_output_channels());

1631

std::vector<int8_t> output(std::max(

1632

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1633

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

1634

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1635

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1636

std::vector<float> requantization_scales(groups() * group_output_channels());

1637

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1638

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1639

std::vector<float> next_requantization_scales(groups() * group_output_channels());

1640

1641

const int8_t input_zero_point = -1;

1642

const int8_t output_zero_point = -1;

1643

1644

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1645

std::generate(input.begin(), input.end(), std::ref(i8rng));

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1646

std::generate(kernel.begin(), kernel.end(), std::ref(w8rng));

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

1647

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

1648

std::fill(output.begin(), output.end(), 0xA5);

1649

1650

// Compute reference results, without renormalization.

1651

if (has_bias()) {

1652

for (size_t i = 0; i < batch_size(); i++) {

1653

for (size_t oy = 0; oy < output_height(); oy++) {

1654

for (size_t ox = 0; ox < output_width(); ox++) {

1655

for (size_t g = 0; g < groups(); g++) {

1656

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1657

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1658

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(accumulators.begin(), accumulators.end(), 0);

1666

}

1667

for (size_t i = 0; i < batch_size(); i++) {

1668

for (size_t oy = 0; oy < output_height(); oy++) {

1669

for (size_t ox = 0; ox < output_width(); ox++) {

1670

for (size_t ky = 0; ky < kernel_height(); ky++) {

1671

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1672

if (iy < input_height()) {

1673

for (size_t kx = 0; kx < kernel_width(); kx++) {

1674

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1675

if (ix < input_width()) {

1676

for (size_t g = 0; g < groups(); g++) {

1677

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1678

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1679

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1680

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

1681

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

1694

for (size_t c = 0; c < groups() * group_output_channels(); c++) {

1695

int32_t accumulated_min = accumulators[c];

1696

int32_t accumulated_max = accumulators[c];

1697

for (size_t px = 0; px < batch_size() * output_height() * output_width(); px++) {

1698

accumulated_min = std::min(accumulated_min, accumulators[px * groups() * group_output_channels() + c]);

1699

accumulated_max = std::max(accumulated_max, accumulators[px * groups() * group_output_channels() + c]);

1700

}

1701

1702

float requantization_scale = 0x1.0p-32f;

1703

if (accumulated_max != 0) {

1704

requantization_scale = std::max(requantization_scale,

1705

float(int32_t(std::numeric_limits<int8_t>::max()) - int32_t(output_zero_point)) / float(accumulated_max));

1706

}

1707

if (accumulated_min != 0) {

1708

requantization_scale = std::max(requantization_scale,

1709

float(int32_t(std::numeric_limits<int8_t>::min()) - int32_t(output_zero_point)) / float(accumulated_min));

1710

}

1711

requantization_scale = std::min(requantization_scale, 0x1.FFFFFEp-1f);

1712

1713

requantization_scales[c] = requantization_scale;

1714

}

1715

1716

// Renormalize reference results.

1717

for (size_t c = 0; c < groups() * group_output_channels(); c++) {

1718

for (size_t px = 0; px < batch_size() * output_height() * output_width(); px++) {

1719

output_ref[px * groups() * group_output_channels() + c] = double(int32_t(output_zero_point)) +

1720

double(accumulators[px * groups() * group_output_channels() + c]) * double(requantization_scales[c]);

1721

}

1722

}

1723

std::transform(output_ref.cbegin(), output_ref.cend(), output_ref.begin(),

1724

[this](double x) -> double {

1725

return std::max<double>(std::min<double>(x, double(qmax() - 0x80)), double(qmin() - 0x80));

1726

});

1727

1728

// Create, setup, and run Convolution operator once.

1729

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1730

xnn_operator_t convolution_op = nullptr;

1731

1732

xnn_status status = xnn_create_convolution2d_nhwc_qc8(

1733

padding_top(), padding_right(), padding_bottom(), padding_left(),

1734

kernel_height(), kernel_width(),

1735

subsampling_height(), subsampling_width(),

1736

dilation_height(), dilation_width(),

1737

groups(), group_input_channels(), group_output_channels(),

1738

input_channel_stride(), output_channel_stride(),

1739

input_zero_point, 1.0f /* input scale */, requantization_scales.data(),

1740

kernel.data(), has_bias() ? bias.data() : nullptr,

1741

output_zero_point, 1.0f /* output scale */, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),

1742

0, &convolution_op);

1743

if (status == xnn_status_unsupported_hardware) {

1744

GTEST_SKIP();

1745

}

1746

ASSERT_EQ(xnn_status_success, status);

1747

ASSERT_NE(nullptr, convolution_op);

1748

1749

// Smart pointer to automatically delete convolution_op.

1750

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1751

1752

ASSERT_EQ(xnn_status_success,

1753

xnn_setup_convolution2d_nhwc_qc8(

1754

convolution_op,

1755

batch_size(), input_height(), input_width(),

1756

input.data(), output.data(),

1757

nullptr /* thread pool */));

1758

1759

ASSERT_EQ(xnn_status_success,

1760

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1761

1762

// Verify results of the first run.

1763

for (size_t i = 0; i < batch_size(); i++) {

1764

for (size_t y = 0; y < output_height(); y++) {

1765

for (size_t x = 0; x < output_width(); x++) {

1766

for (size_t g = 0; g < groups(); g++) {

1767

for (size_t c = 0; c < group_output_channels(); c++) {

1768

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

1769

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1770

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

1771

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1772

ASSERT_NEAR(

1773

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

1774

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

1775

0.9)

1776

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1784

std::generate(input.begin(), input.end(), std::ref(i8rng));

1785

std::fill(output.begin(), output.end(), 0xA5);

1786

1787

// Compute reference results for the second run, including renormalization.

1788

if (has_bias()) {

1789

for (size_t i = 0; i < next_batch_size(); i++) {

1790

for (size_t oy = 0; oy < next_output_height(); oy++) {

1791

for (size_t ox = 0; ox < next_output_width(); ox++) {

1792

for (size_t g = 0; g < groups(); g++) {

1793

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1794

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1795

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

1803

}

1804

for (size_t i = 0; i < next_batch_size(); i++) {

1805

for (size_t oy = 0; oy < next_output_height(); oy++) {

1806

for (size_t ox = 0; ox < next_output_width(); ox++) {

1807

for (size_t ky = 0; ky < kernel_height(); ky++) {

1808

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1809

if (iy < next_input_height()) {

1810

for (size_t kx = 0; kx < kernel_width(); kx++) {

1811

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1812

if (ix < next_input_width()) {

1813

for (size_t g = 0; g < groups(); g++) {

1814

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1815

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1816

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1817

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

1818

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

for (size_t c = 0; c < groups() * group_output_channels(); c++) {

1830

for (size_t px = 0; px < next_batch_size() * next_output_height() * next_output_width(); px++) {

1831

next_output_ref[px * groups() * group_output_channels() + c] = double(int32_t(output_zero_point)) +

1832

double(next_accumulators[px * groups() * group_output_channels() + c]) * double(requantization_scales[c]);

1833

}

1834

}

1835

std::transform(next_output_ref.cbegin(), next_output_ref.cend(), next_output_ref.begin(),

1836

[this](double x) -> double {

1837

return std::max<double>(std::min<double>(x, double(qmax() - 0x80)), double(qmin() - 0x80));

1838

});

1839

1840

// Setup and run Convolution operator the second time, and destroy the operator.

1841

ASSERT_EQ(xnn_status_success,

1842

xnn_setup_convolution2d_nhwc_qc8(

1843

convolution_op,

1844

next_batch_size(), next_input_height(), next_input_width(),

1845

input.data(), output.data(),

1846

nullptr /* thread pool */));

1847

1848

ASSERT_EQ(xnn_status_success,

1849

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1850

1851

// Verify results of the second run.

1852

for (size_t i = 0; i < next_batch_size(); i++) {

1853

for (size_t y = 0; y < next_output_height(); y++) {

1854

for (size_t x = 0; x < next_output_width(); x++) {

1855

for (size_t g = 0; g < groups(); g++) {

1856

for (size_t c = 0; c < group_output_channels(); c++) {

1857

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

1858

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1859

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

1860

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1861

ASSERT_NEAR(

1862

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

1863

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

1864

0.9)

1865

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1874

void TestSetupNHWCxQS8() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

1875

ASSERT_EQ(weights_type(), WeightsType::Default);

1876

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1877

ASSERT_FALSE(depthwise_layout());

1878

1879

std::random_device random_device;

1880

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1881

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1882

auto i8rng = std::bind(

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1883

std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()),

1884

std::ref(rng));

1885

auto w8rng = std::bind(

1886

std::uniform_int_distribution<int32_t>(-std::numeric_limits<int8_t>::max(), std::numeric_limits<int8_t>::max()),

1887

std::ref(rng));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1888

1889

std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(

1890

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

1891

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1892

std::vector<int8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1893

std::vector<int32_t> bias(groups() * group_output_channels());

1894

std::vector<int8_t> output(std::max(

1895

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1896

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

1897

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1898

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1899

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1900

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1901

1902

const int8_t input_zero_point = -1;

1903

1904

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1905

std::generate(input.begin(), input.end(), std::ref(i8rng));

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

1906

std::generate(kernel.begin(), kernel.end(), std::ref(w8rng));

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1907

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

1908

std::fill(output.begin(), output.end(), 0xA5);

1909

1910

// Compute reference results, without renormalization.

1911

if (has_bias()) {

1912

for (size_t i = 0; i < batch_size(); i++) {

1913

for (size_t oy = 0; oy < output_height(); oy++) {

1914

for (size_t ox = 0; ox < output_width(); ox++) {

1915

for (size_t g = 0; g < groups(); g++) {

1916

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1917

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1918

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(accumulators.begin(), accumulators.end(), 0);

1926

}

1927

for (size_t i = 0; i < batch_size(); i++) {

1928

for (size_t oy = 0; oy < output_height(); oy++) {

1929

for (size_t ox = 0; ox < output_width(); ox++) {

1930

for (size_t ky = 0; ky < kernel_height(); ky++) {

1931

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1932

if (iy < input_height()) {

1933

for (size_t kx = 0; kx < kernel_width(); kx++) {

1934

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1935

if (ix < input_width()) {

1936

for (size_t g = 0; g < groups(); g++) {

1937

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1938

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1939

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1940

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

1941

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

1954

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

1955

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

1956

1957

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

1958

const int8_t output_zero_point = int8_t(std::max(std::min(

1959

lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

1960

long(std::numeric_limits<int8_t>::max())), long(std::numeric_limits<int8_t>::min())));

1961

1962

// Renormalize reference results.

1963

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

1964

[this, output_scale, output_zero_point](int32_t x) -> double {

1965

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax() - 0x80) - output_zero_point), double(qmin() - 0x80) - output_zero_point);

1966

});

1967

1968

// Create, setup, and run Convolution operator once.

1969

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1970

xnn_operator_t convolution_op = nullptr;

1971

1972

xnn_status status = xnn_create_convolution2d_nhwc_qs8(

1973

padding_top(), padding_right(), padding_bottom(), padding_left(),

1974

kernel_height(), kernel_width(),

1975

subsampling_height(), subsampling_width(),

1976

dilation_height(), dilation_width(),

1977

groups(), group_input_channels(), group_output_channels(),

1978

input_channel_stride(), output_channel_stride(),

1979

input_zero_point, 1.0f /* input scale */, 1.0f /* kernel scale */,

1980

kernel.data(), has_bias() ? bias.data() : nullptr,

1981

output_zero_point, output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),

1982

0, &convolution_op);

1983

if (status == xnn_status_unsupported_hardware) {

1984

GTEST_SKIP();

1985

}

1986

ASSERT_EQ(xnn_status_success, status);

1987

ASSERT_NE(nullptr, convolution_op);

1988

1989

// Smart pointer to automatically delete convolution_op.

1990

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1991

1992

ASSERT_EQ(xnn_status_success,

1993

xnn_setup_convolution2d_nhwc_qs8(

1994

convolution_op,

1995

batch_size(), input_height(), input_width(),

1996

input.data(), output.data(),

1997

nullptr /* thread pool */));

1998

1999

ASSERT_EQ(xnn_status_success,

2000

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2001

2002

// Verify results of the first run.

2003

for (size_t i = 0; i < batch_size(); i++) {

2004

for (size_t y = 0; y < output_height(); y++) {

2005

for (size_t x = 0; x < output_width(); x++) {

2006

for (size_t g = 0; g < groups(); g++) {

2007

for (size_t c = 0; c < group_output_channels(); c++) {

2008

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

2009

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2010

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

2011

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2012

ASSERT_NEAR(

2013

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

2014

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

2015

0.9)

2016

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

2024

std::generate(input.begin(), input.end(), std::ref(i8rng));

2025

std::fill(output.begin(), output.end(), 0xA5);

2026

2027

// Compute reference results for the second run, including renormalization.

2028

if (has_bias()) {

2029

for (size_t i = 0; i < next_batch_size(); i++) {

2030

for (size_t oy = 0; oy < next_output_height(); oy++) {

2031

for (size_t ox = 0; ox < next_output_width(); ox++) {

2032

for (size_t g = 0; g < groups(); g++) {

2033

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2034

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2035

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

2043

}

2044

for (size_t i = 0; i < next_batch_size(); i++) {

2045

for (size_t oy = 0; oy < next_output_height(); oy++) {

2046

for (size_t ox = 0; ox < next_output_width(); ox++) {

2047

for (size_t ky = 0; ky < kernel_height(); ky++) {

2048

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2049

if (iy < next_input_height()) {

2050

for (size_t kx = 0; kx < kernel_width(); kx++) {

2051

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2052

if (ix < next_input_width()) {

2053

for (size_t g = 0; g < groups(); g++) {

2054

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2055

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2056

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

2057

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

2058

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

2070

[this, output_scale, output_zero_point](int32_t x) -> double {

2071

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax() - 0x80) - output_zero_point), double(qmin() - 0x80) - output_zero_point);

2072

});

2073

2074

// Setup and run Convolution operator the second time, and destroy the operator.

2075

ASSERT_EQ(xnn_status_success,

2076

xnn_setup_convolution2d_nhwc_qs8(

2077

convolution_op,

2078

next_batch_size(), next_input_height(), next_input_width(),

2079

input.data(), output.data(),

2080

nullptr /* thread pool */));

2081

2082

ASSERT_EQ(xnn_status_success,

2083

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2084

2085

// Verify results of the second run.

2086

for (size_t i = 0; i < next_batch_size(); i++) {

2087

for (size_t y = 0; y < next_output_height(); y++) {

2088

for (size_t x = 0; x < next_output_width(); x++) {

2089

for (size_t g = 0; g < groups(); g++) {

2090

for (size_t c = 0; c < group_output_channels(); c++) {

2091

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

2092

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2093

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

2094

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2095

ASSERT_NEAR(

2096

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

2097

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

2098

0.9)

2099

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

2108

void TestSetupNHWCxQU8() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

2109

ASSERT_EQ(weights_type(), WeightsType::Default);

2110

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2111

ASSERT_FALSE(depthwise_layout());

2112

2113

std::random_device random_device;

2114

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

2115

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), std::ref(rng));

2116

auto u8rng = std::bind(

2117

std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), std::ref(rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2118

2119

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2120

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

Marat Dukhan

2021-06-18 16:14:17 -0700

[diff] [blame]

2121

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2122

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

2123

std::vector<int32_t> bias(groups() * group_output_channels());

2124

std::vector<uint8_t> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2125

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

2126

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2127

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

2128

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

2129

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

2130

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

2131

2132

const uint8_t input_zero_point = 127;

2133

const uint8_t kernel_zero_point = 127;

2134

2135

for (size_t iteration = 0; iteration < iterations(); iteration++) {

2136

std::generate(input.begin(), input.end(), std::ref(u8rng));

2137

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

Marat Dukhan

ecd8311

2020-08-03 21:50:28 -0700

[diff] [blame]

2138

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2139

std::fill(output.begin(), output.end(), 0xA5);

2140

2141

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2142

if (has_bias()) {

2143

for (size_t i = 0; i < batch_size(); i++) {

2144

for (size_t oy = 0; oy < output_height(); oy++) {

2145

for (size_t ox = 0; ox < output_width(); ox++) {

2146

for (size_t g = 0; g < groups(); g++) {

2147

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2148

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2149

bias[g * group_output_channels() + oc];

2150

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2155

} else {

2156

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2157

}

2158

for (size_t i = 0; i < batch_size(); i++) {

2159

for (size_t oy = 0; oy < output_height(); oy++) {

2160

for (size_t ox = 0; ox < output_width(); ox++) {

2161

for (size_t ky = 0; ky < kernel_height(); ky++) {

2162

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2163

if (iy < input_height()) {

2164

for (size_t kx = 0; kx < kernel_width(); kx++) {

2165

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2166

if (ix < input_width()) {

2167

for (size_t g = 0; g < groups(); g++) {

2168

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2169

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2170

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2171

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2172

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

2185

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

2186

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

2187

2188

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

2189

const uint8_t output_zero_point = uint8_t(std::max(std::min(

2190

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

2191

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

2192

2193

// Renormalize reference results.

2194

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

2195

[this, output_scale, output_zero_point](int32_t x) -> double {

2196

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

2197

});

2198

2199

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

2200

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2201

xnn_operator_t convolution_op = nullptr;

2202

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

2203

xnn_status status = xnn_create_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2204

padding_top(), padding_right(), padding_bottom(), padding_left(),

2205

kernel_height(), kernel_width(),

2206

subsampling_height(), subsampling_width(),

2207

dilation_height(), dilation_width(),

2208

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2209

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2210

input_zero_point, 1.0f /* input scale */,

2211

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2212

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2213

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

2214

0, &convolution_op);

2215

if (status == xnn_status_unsupported_hardware) {

2216

GTEST_SKIP();

2217

}

2218

ASSERT_EQ(xnn_status_success, status);

2219

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2220

2221

// Smart pointer to automatically delete convolution_op.

2222

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

2223

2224

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

2225

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2226

convolution_op,

2227

batch_size(), input_height(), input_width(),

2228

input.data(), output.data(),

2229

nullptr /* thread pool */));

2230

2231

ASSERT_EQ(xnn_status_success,

2232

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2233

2234

// Verify results of the first run.

2235

for (size_t i = 0; i < batch_size(); i++) {

2236

for (size_t y = 0; y < output_height(); y++) {

2237

for (size_t x = 0; x < output_width(); x++) {

2238

for (size_t g = 0; g < groups(); g++) {

2239

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2240

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2241

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2242

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2243

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2244

ASSERT_NEAR(

2245

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2246

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2247

0.9)

2248

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

2256

std::generate(input.begin(), input.end(), std::ref(u8rng));

2257

std::fill(output.begin(), output.end(), 0xA5);

2258

2259

// Compute reference results for the second run, including renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2260

if (has_bias()) {

2261

for (size_t i = 0; i < next_batch_size(); i++) {

2262

for (size_t oy = 0; oy < next_output_height(); oy++) {

2263

for (size_t ox = 0; ox < next_output_width(); ox++) {

2264

for (size_t g = 0; g < groups(); g++) {

2265

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2266

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2267

bias[g * group_output_channels() + oc];

2268

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2273

} else {

2274

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2275

}

2276

for (size_t i = 0; i < next_batch_size(); i++) {

2277

for (size_t oy = 0; oy < next_output_height(); oy++) {

2278

for (size_t ox = 0; ox < next_output_width(); ox++) {

2279

for (size_t ky = 0; ky < kernel_height(); ky++) {

2280

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2281

if (iy < next_input_height()) {

2282

for (size_t kx = 0; kx < kernel_width(); kx++) {

2283

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2284

if (ix < next_input_width()) {

2285

for (size_t g = 0; g < groups(); g++) {

2286

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2287

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2288

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2289

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2290

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

2302

[this, output_scale, output_zero_point](int32_t x) -> double {

2303

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

2304

});

2305

2306

// Setup and run Convolution operator the second time, and destroy the operator.

2307

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

2308

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2309

convolution_op,

2310

next_batch_size(), next_input_height(), next_input_width(),

2311

input.data(), output.data(),

2312

nullptr /* thread pool */));

2313

2314

ASSERT_EQ(xnn_status_success,

2315

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2316

2317

// Verify results of the second run.

2318

for (size_t i = 0; i < next_batch_size(); i++) {

2319

for (size_t y = 0; y < next_output_height(); y++) {

2320

for (size_t x = 0; x < next_output_width(); x++) {

2321

for (size_t g = 0; g < groups(); g++) {

2322

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2323

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2324

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2325

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2326

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2327

ASSERT_NEAR(

2328

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2329

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2330

0.9)

2331

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

2340

void TestSetupNHWCxF16() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

2341

ASSERT_EQ(weights_type(), WeightsType::Default);

2342

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

2343

ASSERT_FALSE(depthwise_layout());

2344

2345

std::random_device random_device;

2346

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

2347

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), std::ref(rng));

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

2348

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

2349

2350

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(

2351

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

2352

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

2353

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

2354

std::vector<uint16_t> bias(groups() * group_output_channels());

2355

std::vector<uint16_t> output(std::max(

2356

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

2357

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

2358

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

2359

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

2360

2361

for (size_t iteration = 0; iteration < iterations(); iteration++) {

2362

std::generate(input.begin(), input.end(), std::ref(f16rng));

2363

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

2364

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

2365

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

2366

2367

// Compute reference results, without clamping.

2368

if (has_bias()) {

2369

for (size_t i = 0; i < batch_size(); i++) {

2370

for (size_t oy = 0; oy < output_height(); oy++) {

2371

for (size_t ox = 0; ox < output_width(); ox++) {

2372

for (size_t g = 0; g < groups(); g++) {

2373

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2374

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2375

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

2383

}

2384

for (size_t i = 0; i < batch_size(); i++) {

2385

for (size_t oy = 0; oy < output_height(); oy++) {

2386

for (size_t ox = 0; ox < output_width(); ox++) {

2387

for (size_t ky = 0; ky < kernel_height(); ky++) {

2388

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2389

if (iy < input_height()) {

2390

for (size_t kx = 0; kx < kernel_width(); kx++) {

2391

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2392

if (ix < input_width()) {

2393

for (size_t g = 0; g < groups(); g++) {

2394

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2395

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2396

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

2397

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

2398

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

2411

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

2412

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

2413

const float accumulated_range = accumulated_max - accumulated_min;

2414

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

2415

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

2416

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

2417

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

2418

2419

for (float& output_value : output_ref) {

2420

output_value = std::min(std::max(output_value, output_min), output_max);

2421

}

2422

2423

// Create, setup, and run Convolution operator once.

2424

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

2425

xnn_operator_t convolution_op = nullptr;

2426

2427

xnn_status status = xnn_create_convolution2d_nhwc_f16(

2428

padding_top(), padding_right(), padding_bottom(), padding_left(),

2429

kernel_height(), kernel_width(),

2430

subsampling_height(), subsampling_width(),

2431

dilation_height(), dilation_width(),

2432

groups(), group_input_channels(), group_output_channels(),

2433

input_channel_stride(), output_channel_stride(),

2434

kernel.data(), has_bias() ? bias.data() : nullptr,

2435

output_min, output_max,

2436

0, &convolution_op);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

2437

if (status == xnn_status_unsupported_hardware) {

2438

GTEST_SKIP();

2439

}

2440

ASSERT_EQ(xnn_status_success, status);

2441

ASSERT_NE(nullptr, convolution_op);

2442

2443

// Smart pointer to automatically delete convolution_op.

2444

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

2445

2446

ASSERT_EQ(xnn_status_success,

2447

xnn_setup_convolution2d_nhwc_f16(

2448

convolution_op,

2449

batch_size(), input_height(), input_width(),

2450

input.data(), output.data(),

2451

nullptr /* thread pool */));

2452

2453

ASSERT_EQ(xnn_status_success,

2454

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2455

2456

// Verify results of the first run.

2457

for (size_t i = 0; i < batch_size(); i++) {

2458

for (size_t y = 0; y < output_height(); y++) {

2459

for (size_t x = 0; x < output_width(); x++) {

2460

for (size_t g = 0; g < groups(); g++) {

2461

for (size_t c = 0; c < group_output_channels(); c++) {

2462

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

2463

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2464

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

2465

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Frank Barchard

2b9d29b

2020-09-17 12:03:39 -0700

[diff] [blame]

2466

ASSERT_NEAR(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-4f, std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

2467

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

2475

std::generate(input.begin(), input.end(), std::ref(f16rng));

2476

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

2477

2478

// Compute reference results for the second run, including clamping.

2479

if (has_bias()) {

2480

for (size_t i = 0; i < next_batch_size(); i++) {

2481

for (size_t oy = 0; oy < next_output_height(); oy++) {

2482

for (size_t ox = 0; ox < next_output_width(); ox++) {

2483

for (size_t g = 0; g < groups(); g++) {

2484

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2485

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2486

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

2494

}

2495

for (size_t i = 0; i < next_batch_size(); i++) {

2496

for (size_t oy = 0; oy < next_output_height(); oy++) {

2497

for (size_t ox = 0; ox < next_output_width(); ox++) {

2498

for (size_t ky = 0; ky < kernel_height(); ky++) {

2499

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2500

if (iy < next_input_height()) {

2501

for (size_t kx = 0; kx < kernel_width(); kx++) {

2502

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2503

if (ix < next_input_width()) {

2504

for (size_t g = 0; g < groups(); g++) {

2505

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2506

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2507

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

2508

fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

2509

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

2521

value = std::max(std::min(value, output_max), output_min);

2522

}

2523

2524

// Setup and run Convolution operator the second time, and destroy the operator.

2525

ASSERT_EQ(xnn_status_success,

2526

xnn_setup_convolution2d_nhwc_f16(

2527

convolution_op,

2528

next_batch_size(), next_input_height(), next_input_width(),

2529

input.data(), output.data(),

2530

nullptr /* thread pool */));

2531

2532

ASSERT_EQ(xnn_status_success,

2533

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2534

2535

// Verify results of the second run.

2536

for (size_t i = 0; i < next_batch_size(); i++) {

2537

for (size_t y = 0; y < next_output_height(); y++) {

2538

for (size_t x = 0; x < next_output_width(); x++) {

2539

for (size_t g = 0; g < groups(); g++) {

2540

for (size_t c = 0; c < group_output_channels(); c++) {

2541

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

2542

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2543

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

2544

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Frank Barchard

2b9d29b

2020-09-17 12:03:39 -0700

[diff] [blame]

2545

ASSERT_NEAR(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-4f, std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

2546

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2555

void TestSetupNHWCxF32() const {

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

2556

ASSERT_EQ(weights_type(), WeightsType::Default);

2557

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2558

ASSERT_FALSE(depthwise_layout());

2559

2560

std::random_device random_device;

2561

auto rng = std::mt19937(random_device());

Marat Dukhan

2021-08-10 22:20:20 -0700

[diff] [blame]

2562

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), std::ref(rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2563

2564

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2565

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

2566

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2567

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

2568

std::vector<float> bias(groups() * group_output_channels());

2569

std::vector<float> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2570

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

2571

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2572

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

2573

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

2574

2575

for (size_t iteration = 0; iteration < iterations(); iteration++) {

2576

std::generate(input.begin(), input.end(), std::ref(f32rng));

2577

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

2578

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

2579

std::fill(output.begin(), output.end(), nanf(""));

2580

2581

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2582

if (has_bias()) {

2583

for (size_t i = 0; i < batch_size(); i++) {

2584

for (size_t oy = 0; oy < output_height(); oy++) {

2585

for (size_t ox = 0; ox < output_width(); ox++) {

2586

for (size_t g = 0; g < groups(); g++) {

2587

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2588

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2589

bias[g * group_output_channels() + oc];

2590

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2595

} else {

2596

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2597

}

2598

for (size_t i = 0; i < batch_size(); i++) {

2599

for (size_t oy = 0; oy < output_height(); oy++) {

2600

for (size_t ox = 0; ox < output_width(); ox++) {

2601

for (size_t ky = 0; ky < kernel_height(); ky++) {

2602

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2603

if (iy < input_height()) {

2604

for (size_t kx = 0; kx < kernel_width(); kx++) {

2605

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2606

if (ix < input_width()) {

2607

for (size_t g = 0; g < groups(); g++) {

2608

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2609

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2610

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2611

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2612

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

2625

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

2626

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

2627

2628

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

2629

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

2630

2631

// Clamp reference results.

2632

for (float& value : output_ref) {

2633

value = std::max(std::min(value, output_max), output_min);

2634

}

2635

2636

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

2637

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2638

xnn_operator_t convolution_op = nullptr;

2639

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

2640

xnn_status status = xnn_create_convolution2d_nhwc_f32(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2641

padding_top(), padding_right(), padding_bottom(), padding_left(),

2642

kernel_height(), kernel_width(),

2643

subsampling_height(), subsampling_width(),

2644

dilation_height(), dilation_width(),

2645

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2646

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2647

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2648

output_min, output_max,

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

2649

0, &convolution_op);

2650

if (status == xnn_status_unsupported_hardware) {

2651

GTEST_SKIP();

2652

}

2653

ASSERT_EQ(xnn_status_success, status);

2654

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2655

2656

// Smart pointer to automatically delete convolution_op.

2657

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

2658

2659

ASSERT_EQ(xnn_status_success,

2660

xnn_setup_convolution2d_nhwc_f32(

2661

convolution_op,

2662

batch_size(), input_height(), input_width(),

2663

input.data(), output.data(),

2664

nullptr /* thread pool */));

2665

2666

ASSERT_EQ(xnn_status_success,

2667

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2668

2669

// Verify results of the first run.

2670

for (size_t i = 0; i < batch_size(); i++) {

2671

for (size_t y = 0; y < output_height(); y++) {

2672

for (size_t x = 0; x < output_width(); x++) {

2673

for (size_t g = 0; g < groups(); g++) {

2674

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2675

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2676

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2677

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2678

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2679

ASSERT_NEAR(

2680

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2681

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2682

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

2683

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

2691

std::generate(input.begin(), input.end(), std::ref(f32rng));

2692

std::fill(output.begin(), output.end(), nanf(""));

2693

2694

// Compute reference results for the second run, including clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2695

if (has_bias()) {

2696

for (size_t i = 0; i < next_batch_size(); i++) {

2697

for (size_t oy = 0; oy < next_output_height(); oy++) {

2698

for (size_t ox = 0; ox < next_output_width(); ox++) {

2699

for (size_t g = 0; g < groups(); g++) {

2700

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2701

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2702

bias[g * group_output_channels() + oc];

2703

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2708

} else {

2709

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2710

}

2711

for (size_t i = 0; i < next_batch_size(); i++) {

2712

for (size_t oy = 0; oy < next_output_height(); oy++) {

2713

for (size_t ox = 0; ox < next_output_width(); ox++) {

2714

for (size_t ky = 0; ky < kernel_height(); ky++) {

2715

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2716

if (iy < next_input_height()) {

2717

for (size_t kx = 0; kx < kernel_width(); kx++) {

2718

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2719

if (ix < next_input_width()) {

2720

for (size_t g = 0; g < groups(); g++) {

2721

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2722

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2723

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2724

input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2725

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

2737

value = std::max(std::min(value, output_max), output_min);

2738

}

2739

2740

// Setup and run Convolution operator the second time, and destroy the operator.

2741

ASSERT_EQ(xnn_status_success,

2742

xnn_setup_convolution2d_nhwc_f32(

2743

convolution_op,

2744

next_batch_size(), next_input_height(), next_input_width(),

2745

input.data(), output.data(),

2746

nullptr /* thread pool */));

2747

2748

ASSERT_EQ(xnn_status_success,

2749

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2750

2751

// Verify results of the second run.

2752

for (size_t i = 0; i < next_batch_size(); i++) {

2753

for (size_t y = 0; y < next_output_height(); y++) {

2754

for (size_t x = 0; x < next_output_width(); x++) {

2755

for (size_t g = 0; g < groups(); g++) {

2756

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2757

ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2758

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2759

ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2760

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2761

ASSERT_NEAR(

2762

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2763

output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2764

1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))

2765

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

private:

uint32_t padding_top_{0};

2776

uint32_t padding_right_{0};

2777

uint32_t padding_bottom_{0};

2778

uint32_t padding_left_{0};

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

2779

bool padding_tf_same_{false};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2780

size_t input_height_{1};

2781

size_t input_width_{1};

2782

uint32_t groups_{1};

2783

size_t group_input_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2784

size_t input_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2785

size_t group_output_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2786

size_t output_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2787

size_t batch_size_{1};

2788

uint32_t kernel_height_{1};

2789

uint32_t kernel_width_{1};

2790

uint32_t dilation_height_{1};

2791

uint32_t dilation_width_{1};

2792

uint32_t subsampling_height_{1};

2793

uint32_t subsampling_width_{1};

2794

size_t next_input_height_{0};

2795

size_t next_input_width_{0};

2796

size_t next_batch_size_{0};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2797

float sparsity_{0.0f};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2798

uint8_t qmin_{0};

2799

uint8_t qmax_{255};

2800

bool depthwise_layout_{false};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2801

bool force_nhwc_input_{false};

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2802

bool has_bias_{true};

Marat Dukhan

2022-01-14 17:14:35 -0800

[diff] [blame]

2803

WeightsType weights_type_{WeightsType::Default};

XNNPACK Team