Blame - test/convolution-operator-tester.h - platform/external/XNNPACK

2019-09-27 18:10:33 -0700

[diff] [blame]

678

std::random_device random_device;

679

auto rng = std::mt19937(random_device());

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

680

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

681

auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

682

683

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

684

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()) + 8);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

685

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

686

std::vector<int32_t> bias(groups() * group_output_channels());

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

687

std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

688

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

689

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

690

691

const uint8_t input_zero_point = 127;

692

const uint8_t kernel_zero_point = 127;

693

694

for (size_t iteration = 0; iteration < iterations(); iteration++) {

695

std::generate(input.begin(), input.end(), std::ref(u8rng));

696

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

697

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

698

std::fill(output.begin(), output.end(), 0xA5);

699

700

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

701

if (has_bias()) {

702

for (size_t i = 0; i < batch_size(); i++) {

703

for (size_t oy = 0; oy < output_height(); oy++) {

704

for (size_t ox = 0; ox < output_width(); ox++) {

705

for (size_t g = 0; g < groups(); g++) {

706

for (size_t oc = 0; oc < group_output_channels(); oc++) {

707

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

708

bias[g * group_output_channels() + oc];

709

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

714

} else {

715

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

716

}

717

if (depthwise_layout()) {

718

ASSERT_EQ(group_input_channels(), 1);

719

720

for (size_t i = 0; i < batch_size(); i++) {

721

for (size_t oy = 0; oy < output_height(); oy++) {

722

for (size_t ox = 0; ox < output_width(); ox++) {

723

for (size_t ky = 0; ky < kernel_height(); ky++) {

724

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

725

if (iy < input_height()) {

726

for (size_t kx = 0; kx < kernel_width(); kx++) {

727

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

728

if (ix < input_width()) {

729

for (size_t g = 0; g < groups(); g++) {

730

for (size_t oc = 0; oc < group_output_channels(); oc++) {

731

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

732

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

733

(int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

745

for (size_t oy = 0; oy < output_height(); oy++) {

746

for (size_t ox = 0; ox < output_width(); ox++) {

747

for (size_t ky = 0; ky < kernel_height(); ky++) {

748

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

749

if (iy < input_height()) {

750

for (size_t kx = 0; kx < kernel_width(); kx++) {

751

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

752

if (ix < input_width()) {

753

for (size_t g = 0; g < groups(); g++) {

754

for (size_t oc = 0; oc < group_output_channels(); oc++) {

755

for (size_t ic = 0; ic < group_input_channels(); ic++) {

756

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

757

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

758

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

772

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

773

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

774

775

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

776

const uint8_t output_zero_point = uint8_t(std::max(std::min(

777

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

778

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

779

780

// Renormalize reference results.

781

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

782

[this, output_scale, output_zero_point](int32_t x) -> double {

783

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

784

});

785

786

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

787

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

788

xnn_operator_t convolution_op = nullptr;

789

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

790

xnn_status status = xnn_create_convolution2d_nhwc_qu8(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

791

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

792

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

793

kernel_height(), kernel_width(),

794

subsampling_height(), subsampling_width(),

795

dilation_height(), dilation_width(),

796

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

797

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

798

input_zero_point, 1.0f /* input scale */,

799

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

800

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

801

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

802

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

803

&convolution_op);

804

if (status == xnn_status_unsupported_hardware) {

805

GTEST_SKIP();

806

}

807

ASSERT_EQ(xnn_status_success, status);

808

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

809

810

// Smart pointer to automatically delete convolution_op.

811

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

812

813

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

814

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

815

convolution_op,

816

batch_size(), input_height(), input_width(),

817

input.data(), output.data(),

818

nullptr /* thread pool */));

819

820

ASSERT_EQ(xnn_status_success,

821

xnn_run_operator(convolution_op, nullptr /* thread pool */));

822

823

// Verify results.

824

for (size_t i = 0; i < batch_size(); i++) {

825

for (size_t y = 0; y < output_height(); y++) {

826

for (size_t x = 0; x < output_width(); x++) {

827

for (size_t g = 0; g < groups(); g++) {

828

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

829

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

830

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

831

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

832

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

833

ASSERT_NEAR(

834

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

835

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

836

0.9)

837

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

846

void TestNHWCxF32() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

847

std::random_device random_device;

848

auto rng = std::mt19937(random_device());

849

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

850

851

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

852

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

853

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

854

std::vector<float> bias(groups() * group_output_channels());

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

855

std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

856

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

857

858

for (size_t iteration = 0; iteration < iterations(); iteration++) {

859

std::generate(input.begin(), input.end(), std::ref(f32rng));

860

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

861

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

862

std::fill(output.begin(), output.end(), nanf(""));

863

864

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

865

if (has_bias()) {

866

for (size_t i = 0; i < batch_size(); i++) {

867

for (size_t oy = 0; oy < output_height(); oy++) {

868

for (size_t ox = 0; ox < output_width(); ox++) {

869

for (size_t g = 0; g < groups(); g++) {

870

for (size_t oc = 0; oc < group_output_channels(); oc++) {

871

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

872

bias[g * group_output_channels() + oc];

873

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

878

} else {

879

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

880

}

881

if (depthwise_layout()) {

882

ASSERT_EQ(group_input_channels(), 1);

883

884

for (size_t i = 0; i < batch_size(); i++) {

885

for (size_t oy = 0; oy < output_height(); oy++) {

886

for (size_t ox = 0; ox < output_width(); ox++) {

887

for (size_t ky = 0; ky < kernel_height(); ky++) {

888

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

889

if (iy < input_height()) {

890

for (size_t kx = 0; kx < kernel_width(); kx++) {

891

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

892

if (ix < input_width()) {

893

for (size_t g = 0; g < groups(); g++) {

894

for (size_t oc = 0; oc < group_output_channels(); oc++) {

895

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

896

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

897

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

909

for (size_t oy = 0; oy < output_height(); oy++) {

910

for (size_t ox = 0; ox < output_width(); ox++) {

911

for (size_t ky = 0; ky < kernel_height(); ky++) {

912

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

913

if (iy < input_height()) {

914

for (size_t kx = 0; kx < kernel_width(); kx++) {

915

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

916

if (ix < input_width()) {

917

for (size_t g = 0; g < groups(); g++) {

918

for (size_t oc = 0; oc < group_output_channels(); oc++) {

919

for (size_t ic = 0; ic < group_input_channels(); ic++) {

920

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

921

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

922

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

936

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

937

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

938

939

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

940

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

941

942

// Clamp reference results.

943

for (float& value : output_ref) {

944

value = std::max(std::min(value, output_max), output_min);

945

}

946

947

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

948

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

949

xnn_operator_t convolution_op = nullptr;

950

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

951

xnn_status status = xnn_create_convolution2d_nhwc_f32(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

952

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

953

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

954

kernel_height(), kernel_width(),

955

subsampling_height(), subsampling_width(),

956

dilation_height(), dilation_width(),

957

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

958

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

959

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

960

output_min, output_max,

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

961

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

962

&convolution_op);

963

if (status == xnn_status_unsupported_hardware) {

964

GTEST_SKIP();

965

}

966

ASSERT_EQ(xnn_status_success, status);

967

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

968

969

// Smart pointer to automatically delete convolution_op.

970

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

971

972

ASSERT_EQ(xnn_status_success,

973

xnn_setup_convolution2d_nhwc_f32(

974

convolution_op,

975

batch_size(), input_height(), input_width(),

976

input.data(), output.data(),

977

nullptr /* thread pool */));

978

979

ASSERT_EQ(xnn_status_success,

980

xnn_run_operator(convolution_op, nullptr /* thread pool */));

981

982

// Verify results.

983

for (size_t i = 0; i < batch_size(); i++) {

984

for (size_t y = 0; y < output_height(); y++) {

985

for (size_t x = 0; x < output_width(); x++) {

986

for (size_t g = 0; g < groups(); g++) {

987

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

988

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

989

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

990

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

991

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

992

ASSERT_NEAR(

993

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

994

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

995

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

996

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1005

void TestNHWCxF16() const {

1006

std::random_device random_device;

1007

auto rng = std::mt19937(random_device());

Frank Barchard

7d2c1f2

2020-09-14 16:43:53 -0700

[diff] [blame]

1008

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1009

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

1010

1011

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +

1012

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

1013

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1014

std::vector<uint16_t> bias(groups() * group_output_channels());

1015

std::vector<uint16_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

1016

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1017

1018

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1019

std::generate(input.begin(), input.end(), std::ref(f16rng));

1020

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

1021

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

1022

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1023

1024

// Compute reference results, without clamping.

1025

if (has_bias()) {

1026

for (size_t i = 0; i < batch_size(); i++) {

1027

for (size_t oy = 0; oy < output_height(); oy++) {

1028

for (size_t ox = 0; ox < output_width(); ox++) {

1029

for (size_t g = 0; g < groups(); g++) {

1030

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1031

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1032

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1040

}

1041

if (depthwise_layout()) {

1042

ASSERT_EQ(group_input_channels(), 1);

1043

1044

for (size_t i = 0; i < batch_size(); i++) {

1045

for (size_t oy = 0; oy < output_height(); oy++) {

1046

for (size_t ox = 0; ox < output_width(); ox++) {

1047

for (size_t ky = 0; ky < kernel_height(); ky++) {

1048

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1049

if (iy < input_height()) {

1050

for (size_t kx = 0; kx < kernel_width(); kx++) {

1051

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1052

if (ix < input_width()) {

1053

for (size_t g = 0; g < groups(); g++) {

1054

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1055

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1056

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) *

1057

fp16_ieee_to_fp32_value(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]);

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

1069

for (size_t oy = 0; oy < output_height(); oy++) {

1070

for (size_t ox = 0; ox < output_width(); ox++) {

1071

for (size_t ky = 0; ky < kernel_height(); ky++) {

1072

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1073

if (iy < input_height()) {

1074

for (size_t kx = 0; kx < kernel_width(); kx++) {

1075

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1076

if (ix < input_width()) {

1077

for (size_t g = 0; g < groups(); g++) {

1078

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1079

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1080

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1081

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1082

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1096

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1097

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1098

const float accumulated_range = accumulated_max - accumulated_min;

1099

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

1100

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

1101

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

1102

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

1103

1104

// Clamp reference results.

1105

for (float& value : output_ref) {

1106

value = std::max(std::min(value, output_max), output_min);

1107

}

1108

1109

// Create, setup, run, and destroy Convolution operator.

1110

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1111

xnn_operator_t convolution_op = nullptr;

1112

1113

xnn_status status = xnn_create_convolution2d_nhwc_f16(

1114

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

1115

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

1116

kernel_height(), kernel_width(),

1117

subsampling_height(), subsampling_width(),

1118

dilation_height(), dilation_width(),

1119

groups(), group_input_channels(), group_output_channels(),

1120

input_channel_stride(), output_channel_stride(),

1121

kernel.data(), has_bias() ? bias.data() : nullptr,

1122

output_min, output_max,

1123

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

1124

&convolution_op);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1125

if (status == xnn_status_unsupported_hardware) {

1126

GTEST_SKIP();

1127

}

1128

ASSERT_EQ(xnn_status_success, status);

1129

ASSERT_NE(nullptr, convolution_op);

1130

1131

// Smart pointer to automatically delete convolution_op.

1132

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1133

1134

ASSERT_EQ(xnn_status_success,

1135

xnn_setup_convolution2d_nhwc_f16(

1136

convolution_op,

1137

batch_size(), input_height(), input_width(),

1138

input.data(), output.data(),

1139

nullptr /* thread pool */));

1140

1141

ASSERT_EQ(xnn_status_success,

1142

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1143

1144

// Verify results.

1145

for (size_t i = 0; i < batch_size(); i++) {

1146

for (size_t y = 0; y < output_height(); y++) {

1147

for (size_t x = 0; x < output_width(); x++) {

1148

for (size_t g = 0; g < groups(); g++) {

1149

for (size_t c = 0; c < group_output_channels(); c++) {

1150

// ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

1151

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1152

// ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

1153

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Frank Barchard

2b9d29b

2020-09-17 12:03:39 -0700

[diff] [blame]

1154

ASSERT_NEAR(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-4f, std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1155

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1164

void TestNCHWxF32() const {

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1165

std::random_device random_device;

1166

auto rng = std::mt19937(random_device());

1167

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

1168

auto prng = std::bind(std::uniform_real_distribution<float>(), rng);

1169

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1170

std::vector<float> input(2 * XNN_EXTRA_BYTES / sizeof(float) +

1171

((batch_size() - 1) * input_channel_stride() + groups() * group_input_channels()) * input_height() * input_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1172

std::vector<float> kernel(

1173

groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1174

std::vector<float> bias(groups() * group_output_channels());

1175

std::vector<float> output(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1176

((batch_size() - 1) * output_channel_stride() + groups() * group_output_channels()) * output_height() * output_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1177

std::vector<float> output_ref(batch_size() * groups() * group_output_channels() * output_height() * output_width());

1178

1179

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1180

std::generate(input.begin(), input.end(), std::ref(f32rng));

1181

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

1182

for (float& k : kernel) {

1183

if (prng() <= sparsity()) {

k = 0.0f;

}

}

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

1188

std::fill(output.begin(), output.end(), nanf(""));

1189

1190

// Compute reference results, without clamping.

1191

if (has_bias()) {

1192

for (size_t i = 0; i < batch_size(); i++) {

1193

for (size_t oy = 0; oy < output_height(); oy++) {

1194

for (size_t ox = 0; ox < output_width(); ox++) {

1195

for (size_t g = 0; g < groups(); g++) {

1196

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1197

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =

1198

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1206

}

1207

if (force_nhwc_input()) {

1208

for (size_t i = 0; i < batch_size(); i++) {

1209

for (size_t oy = 0; oy < output_height(); oy++) {

1210

for (size_t ox = 0; ox < output_width(); ox++) {

1211

for (size_t ky = 0; ky < kernel_height(); ky++) {

1212

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1213

if (iy < input_height()) {

1214

for (size_t kx = 0; kx < kernel_width(); kx++) {

1215

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1216

if (ix < input_width()) {

1217

for (size_t g = 0; g < groups(); g++) {

1218

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1219

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1220

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1221

input[((((i * input_height() + iy) * input_width() + ix) * groups() + g) * group_input_channels() + ic)] *

1222

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

Marat Dukhan

3303271

2020-06-18 11:06:04 -0700

[diff] [blame]

1233

} else if (depthwise_layout()) {

1234

ASSERT_EQ(group_input_channels(), 1);

1235

1236

for (size_t i = 0; i < batch_size(); i++) {

1237

for (size_t oy = 0; oy < output_height(); oy++) {

1238

for (size_t ox = 0; ox < output_width(); ox++) {

1239

for (size_t ky = 0; ky < kernel_height(); ky++) {

1240

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1241

if (iy < input_height()) {

1242

for (size_t kx = 0; kx < kernel_width(); kx++) {

1243

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1244

if (ix < input_width()) {

1245

for (size_t g = 0; g < groups(); g++) {

1246

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1247

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1248

input[((i * input_channel_stride() + g) * input_height() + iy) * input_width() + ix] *

1249

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1259

} else {

1260

for (size_t i = 0; i < batch_size(); i++) {

1261

for (size_t oy = 0; oy < output_height(); oy++) {

1262

for (size_t ox = 0; ox < output_width(); ox++) {

1263

for (size_t ky = 0; ky < kernel_height(); ky++) {

1264

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1265

if (iy < input_height()) {

1266

for (size_t kx = 0; kx < kernel_width(); kx++) {

1267

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1268

if (ix < input_width()) {

1269

for (size_t g = 0; g < groups(); g++) {

1270

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1271

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1272

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1273

input[((i * input_channel_stride() + g * group_input_channels() + ic) * input_height() + iy) * input_width() + ix] *

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1274

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1288

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1289

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1290

Marat Dukhan

869c62d

2020-04-09 17:17:55 -0700

[diff] [blame]

1291

const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :

1292

accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

1293

const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :

1294

accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1295

1296

// Clamp reference results.

1297

for (float& value : output_ref) {

1298

value = std::max(std::min(value, output_max), output_min);

1299

}

1300

1301

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1302

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1303

xnn_operator_t convolution_op = nullptr;

1304

1305

xnn_status status = xnn_create_convolution2d_nchw_f32(

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1306

padding_top(), padding_right(), padding_bottom(), padding_left(),

1307

kernel_height(), kernel_width(),

1308

subsampling_height(), subsampling_width(),

1309

dilation_height(), dilation_width(),

1310

groups(), group_input_channels(), group_output_channels(),

1311

input_channel_stride(), output_channel_stride(),

1312

kernel.data(), has_bias() ? bias.data() : nullptr,

1313

output_min, output_max,

1314

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (force_nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),

1315

&convolution_op);

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1316

if (status == xnn_status_unsupported_parameter) {

1317

GTEST_SKIP();

1318

}

1319

ASSERT_EQ(xnn_status_success, status);

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1320

ASSERT_NE(nullptr, convolution_op);

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1321

1322

// Smart pointer to automatically delete convolution_op.

1323

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1324

1325

ASSERT_EQ(xnn_status_success,

1326

xnn_setup_convolution2d_nchw_f32(

1327

convolution_op,

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1328

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1329

input.data(), output.data(),

1330

nullptr /* thread pool */));

1331

1332

ASSERT_EQ(xnn_status_success,

1333

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1334

1335

// Verify results.

1336

for (size_t i = 0; i < batch_size(); i++) {

1337

for (size_t y = 0; y < output_height(); y++) {

1338

for (size_t x = 0; x < output_width(); x++) {

1339

for (size_t g = 0; g < groups(); g++) {

1340

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1341

ASSERT_GE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_min)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1342

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1343

ASSERT_LE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_max)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1344

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

1345

ASSERT_NEAR(

1346

output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1347

output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1348

1.0e-4 * std::abs(output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x]))

1349

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

}

}

}

}

}

}

}

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1358

void TestSetupNHWCxQS8() const {

1359

ASSERT_FALSE(depthwise_layout());

1360

1361

std::random_device random_device;

1362

auto rng = std::mt19937(random_device());

1363

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

1364

auto i8rng = std::bind(

1365

std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng);

1366

1367

std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(

1368

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1369

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())) + 8);

1370

std::vector<int8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1371

std::vector<int32_t> bias(groups() * group_output_channels());

1372

std::vector<int8_t> output(std::max(

1373

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1374

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

1375

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1376

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1377

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1378

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1379

1380

const int8_t input_zero_point = -1;

1381

1382

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1383

std::generate(input.begin(), input.end(), std::ref(i8rng));

1384

std::generate(kernel.begin(), kernel.end(), std::ref(i8rng));

1385

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

1386

std::fill(output.begin(), output.end(), 0xA5);

1387

1388

// Compute reference results, without renormalization.

1389

if (has_bias()) {

1390

for (size_t i = 0; i < batch_size(); i++) {

1391

for (size_t oy = 0; oy < output_height(); oy++) {

1392

for (size_t ox = 0; ox < output_width(); ox++) {

1393

for (size_t g = 0; g < groups(); g++) {

1394

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1395

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1396

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(accumulators.begin(), accumulators.end(), 0);

1404

}

1405

for (size_t i = 0; i < batch_size(); i++) {

1406

for (size_t oy = 0; oy < output_height(); oy++) {

1407

for (size_t ox = 0; ox < output_width(); ox++) {

1408

for (size_t ky = 0; ky < kernel_height(); ky++) {

1409

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1410

if (iy < input_height()) {

1411

for (size_t kx = 0; kx < kernel_width(); kx++) {

1412

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1413

if (ix < input_width()) {

1414

for (size_t g = 0; g < groups(); g++) {

1415

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1416

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1417

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1418

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

1419

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

1432

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

1433

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

1434

1435

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

1436

const int8_t output_zero_point = int8_t(std::max(std::min(

1437

lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

1438

long(std::numeric_limits<int8_t>::max())), long(std::numeric_limits<int8_t>::min())));

1439

1440

// Renormalize reference results.

1441

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

1442

[this, output_scale, output_zero_point](int32_t x) -> double {

1443

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax() - 0x80) - output_zero_point), double(qmin() - 0x80) - output_zero_point);

1444

});

1445

1446

// Create, setup, and run Convolution operator once.

1447

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1448

xnn_operator_t convolution_op = nullptr;

1449

1450

xnn_status status = xnn_create_convolution2d_nhwc_qs8(

1451

padding_top(), padding_right(), padding_bottom(), padding_left(),

1452

kernel_height(), kernel_width(),

1453

subsampling_height(), subsampling_width(),

1454

dilation_height(), dilation_width(),

1455

groups(), group_input_channels(), group_output_channels(),

1456

input_channel_stride(), output_channel_stride(),

1457

input_zero_point, 1.0f /* input scale */, 1.0f /* kernel scale */,

1458

kernel.data(), has_bias() ? bias.data() : nullptr,

1459

output_zero_point, output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),

1460

0, &convolution_op);

1461

if (status == xnn_status_unsupported_hardware) {

1462

GTEST_SKIP();

1463

}

1464

ASSERT_EQ(xnn_status_success, status);

1465

ASSERT_NE(nullptr, convolution_op);

1466

1467

// Smart pointer to automatically delete convolution_op.

1468

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1469

1470

ASSERT_EQ(xnn_status_success,

1471

xnn_setup_convolution2d_nhwc_qs8(

1472

convolution_op,

1473

batch_size(), input_height(), input_width(),

1474

input.data(), output.data(),

1475

nullptr /* thread pool */));

1476

1477

ASSERT_EQ(xnn_status_success,

1478

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1479

1480

// Verify results of the first run.

1481

for (size_t i = 0; i < batch_size(); i++) {

1482

for (size_t y = 0; y < output_height(); y++) {

1483

for (size_t x = 0; x < output_width(); x++) {

1484

for (size_t g = 0; g < groups(); g++) {

1485

for (size_t c = 0; c < group_output_channels(); c++) {

1486

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

1487

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1488

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

1489

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1490

ASSERT_NEAR(

1491

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

1492

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

1493

0.9)

1494

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1502

std::generate(input.begin(), input.end(), std::ref(i8rng));

1503

std::fill(output.begin(), output.end(), 0xA5);

1504

1505

// Compute reference results for the second run, including renormalization.

1506

if (has_bias()) {

1507

for (size_t i = 0; i < next_batch_size(); i++) {

1508

for (size_t oy = 0; oy < next_output_height(); oy++) {

1509

for (size_t ox = 0; ox < next_output_width(); ox++) {

1510

for (size_t g = 0; g < groups(); g++) {

1511

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1512

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1513

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

1521

}

1522

for (size_t i = 0; i < next_batch_size(); i++) {

1523

for (size_t oy = 0; oy < next_output_height(); oy++) {

1524

for (size_t ox = 0; ox < next_output_width(); ox++) {

1525

for (size_t ky = 0; ky < kernel_height(); ky++) {

1526

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1527

if (iy < next_input_height()) {

1528

for (size_t kx = 0; kx < kernel_width(); kx++) {

1529

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1530

if (ix < next_input_width()) {

1531

for (size_t g = 0; g < groups(); g++) {

1532

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1533

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1534

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1535

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

1536

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

1548

[this, output_scale, output_zero_point](int32_t x) -> double {

1549

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax() - 0x80) - output_zero_point), double(qmin() - 0x80) - output_zero_point);

1550

});

1551

1552

// Setup and run Convolution operator the second time, and destroy the operator.

1553

ASSERT_EQ(xnn_status_success,

1554

xnn_setup_convolution2d_nhwc_qs8(

1555

convolution_op,

1556

next_batch_size(), next_input_height(), next_input_width(),

1557

input.data(), output.data(),

1558

nullptr /* thread pool */));

1559

1560

ASSERT_EQ(xnn_status_success,

1561

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1562

1563

// Verify results of the second run.

1564

for (size_t i = 0; i < next_batch_size(); i++) {

1565

for (size_t y = 0; y < next_output_height(); y++) {

1566

for (size_t x = 0; x < next_output_width(); x++) {

1567

for (size_t g = 0; g < groups(); g++) {

1568

for (size_t c = 0; c < group_output_channels(); c++) {

1569

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

1570

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1571

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

1572

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1573

ASSERT_NEAR(

1574

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

1575

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

1576

0.9)

1577

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

1586

void TestSetupNHWCxQU8() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1587

ASSERT_FALSE(depthwise_layout());

1588

1589

std::random_device random_device;

1590

auto rng = std::mt19937(random_device());

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

1591

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

1592

auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1593

1594

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1595

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1596

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())) + 8);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1597

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1598

std::vector<int32_t> bias(groups() * group_output_channels());

1599

std::vector<uint8_t> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1600

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1601

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1602

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1603

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1604

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1605

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1606

1607

const uint8_t input_zero_point = 127;

1608

const uint8_t kernel_zero_point = 127;

1609

1610

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1611

std::generate(input.begin(), input.end(), std::ref(u8rng));

1612

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

1613

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1614

std::fill(output.begin(), output.end(), 0xA5);

1615

1616

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1617

if (has_bias()) {

1618

for (size_t i = 0; i < batch_size(); i++) {

1619

for (size_t oy = 0; oy < output_height(); oy++) {

1620

for (size_t ox = 0; ox < output_width(); ox++) {

1621

for (size_t g = 0; g < groups(); g++) {

1622

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1623

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1624

bias[g * group_output_channels() + oc];

1625

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1630

} else {

1631

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1632

}

1633

for (size_t i = 0; i < batch_size(); i++) {

1634

for (size_t oy = 0; oy < output_height(); oy++) {

1635

for (size_t ox = 0; ox < output_width(); ox++) {

1636

for (size_t ky = 0; ky < kernel_height(); ky++) {

1637

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1638

if (iy < input_height()) {

1639

for (size_t kx = 0; kx < kernel_width(); kx++) {

1640

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1641

if (ix < input_width()) {

1642

for (size_t g = 0; g < groups(); g++) {

1643

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1644

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1645

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1646

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1647

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

1660

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

1661

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

1662

1663

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

1664

const uint8_t output_zero_point = uint8_t(std::max(std::min(

1665

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

1666

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

1667

1668

// Renormalize reference results.

1669

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

1670

[this, output_scale, output_zero_point](int32_t x) -> double {

1671

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

1672

});

1673

1674

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1675

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1676

xnn_operator_t convolution_op = nullptr;

1677

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1678

xnn_status status = xnn_create_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1679

padding_top(), padding_right(), padding_bottom(), padding_left(),

1680

kernel_height(), kernel_width(),

1681

subsampling_height(), subsampling_width(),

1682

dilation_height(), dilation_width(),

1683

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1684

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1685

input_zero_point, 1.0f /* input scale */,

1686

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1687

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1688

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

1689

0, &convolution_op);

1690

if (status == xnn_status_unsupported_hardware) {

1691

GTEST_SKIP();

1692

}

1693

ASSERT_EQ(xnn_status_success, status);

1694

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1695

1696

// Smart pointer to automatically delete convolution_op.

1697

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1698

1699

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

1700

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1701

convolution_op,

1702

batch_size(), input_height(), input_width(),

1703

input.data(), output.data(),

1704

nullptr /* thread pool */));

1705

1706

ASSERT_EQ(xnn_status_success,

1707

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1708

1709

// Verify results of the first run.

1710

for (size_t i = 0; i < batch_size(); i++) {

1711

for (size_t y = 0; y < output_height(); y++) {

1712

for (size_t x = 0; x < output_width(); x++) {

1713

for (size_t g = 0; g < groups(); g++) {

1714

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1715

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1716

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1717

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1718

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1719

ASSERT_NEAR(

1720

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1721

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1722

0.9)

1723

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1731

std::generate(input.begin(), input.end(), std::ref(u8rng));

1732

std::fill(output.begin(), output.end(), 0xA5);

1733

1734

// Compute reference results for the second run, including renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1735

if (has_bias()) {

1736

for (size_t i = 0; i < next_batch_size(); i++) {

1737

for (size_t oy = 0; oy < next_output_height(); oy++) {

1738

for (size_t ox = 0; ox < next_output_width(); ox++) {

1739

for (size_t g = 0; g < groups(); g++) {

1740

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1741

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1742

bias[g * group_output_channels() + oc];

1743

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1748

} else {

1749

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1750

}

1751

for (size_t i = 0; i < next_batch_size(); i++) {

1752

for (size_t oy = 0; oy < next_output_height(); oy++) {

1753

for (size_t ox = 0; ox < next_output_width(); ox++) {

1754

for (size_t ky = 0; ky < kernel_height(); ky++) {

1755

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1756

if (iy < next_input_height()) {

1757

for (size_t kx = 0; kx < kernel_width(); kx++) {

1758

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1759

if (ix < next_input_width()) {

1760

for (size_t g = 0; g < groups(); g++) {

1761

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1762

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1763

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1764

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1765

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

1777

[this, output_scale, output_zero_point](int32_t x) -> double {

1778

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

1779

});

1780

1781

// Setup and run Convolution operator the second time, and destroy the operator.

1782

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

1783

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1784

convolution_op,

1785

next_batch_size(), next_input_height(), next_input_width(),

1786

input.data(), output.data(),

1787

nullptr /* thread pool */));

1788

1789

ASSERT_EQ(xnn_status_success,

1790

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1791

1792

// Verify results of the second run.

1793

for (size_t i = 0; i < next_batch_size(); i++) {

1794

for (size_t y = 0; y < next_output_height(); y++) {

1795

for (size_t x = 0; x < next_output_width(); x++) {

1796

for (size_t g = 0; g < groups(); g++) {

1797

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1798

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1799

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1800

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1801

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1802

ASSERT_NEAR(

1803

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1804

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1805

0.9)

1806

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1815

void TestSetupNHWCxF16() const {

1816

ASSERT_FALSE(depthwise_layout());

1817

1818

std::random_device random_device;

1819

auto rng = std::mt19937(random_device());

Frank Barchard

7d2c1f2

2020-09-14 16:43:53 -0700

[diff] [blame]

1820

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1821

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

1822

1823

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(

1824

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1825

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

1826

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1827

std::vector<uint16_t> bias(groups() * group_output_channels());

1828

std::vector<uint16_t> output(std::max(

1829

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1830

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

1831

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1832

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1833

1834

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1835

std::generate(input.begin(), input.end(), std::ref(f16rng));

1836

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

1837

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

1838

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1839

1840

// Compute reference results, without clamping.

1841

if (has_bias()) {

1842

for (size_t i = 0; i < batch_size(); i++) {

1843

for (size_t oy = 0; oy < output_height(); oy++) {

1844

for (size_t ox = 0; ox < output_width(); ox++) {

1845

for (size_t g = 0; g < groups(); g++) {

1846

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1847

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1848

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1856

}

1857

for (size_t i = 0; i < batch_size(); i++) {

1858

for (size_t oy = 0; oy < output_height(); oy++) {

1859

for (size_t ox = 0; ox < output_width(); ox++) {

1860

for (size_t ky = 0; ky < kernel_height(); ky++) {

1861

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1862

if (iy < input_height()) {

1863

for (size_t kx = 0; kx < kernel_width(); kx++) {

1864

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1865

if (ix < input_width()) {

1866

for (size_t g = 0; g < groups(); g++) {

1867

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1868

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1869

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1870

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1871

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1884

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1885

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1886

const float accumulated_range = accumulated_max - accumulated_min;

1887

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

1888

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

1889

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

1890

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

1891

1892

for (float& output_value : output_ref) {

1893

output_value = std::min(std::max(output_value, output_min), output_max);

1894

}

1895

1896

// Create, setup, and run Convolution operator once.

1897

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1898

xnn_operator_t convolution_op = nullptr;

1899

1900

xnn_status status = xnn_create_convolution2d_nhwc_f16(

1901

padding_top(), padding_right(), padding_bottom(), padding_left(),

1902

kernel_height(), kernel_width(),

1903

subsampling_height(), subsampling_width(),

1904

dilation_height(), dilation_width(),

1905

groups(), group_input_channels(), group_output_channels(),

1906

input_channel_stride(), output_channel_stride(),

1907

kernel.data(), has_bias() ? bias.data() : nullptr,

1908

output_min, output_max,

1909

0, &convolution_op);

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1910

if (status == xnn_status_unsupported_hardware) {

1911

GTEST_SKIP();

1912

}

1913

ASSERT_EQ(xnn_status_success, status);

1914

ASSERT_NE(nullptr, convolution_op);

1915

1916

// Smart pointer to automatically delete convolution_op.

1917

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1918

1919

ASSERT_EQ(xnn_status_success,

1920

xnn_setup_convolution2d_nhwc_f16(

1921

convolution_op,

1922

batch_size(), input_height(), input_width(),

1923

input.data(), output.data(),

1924

nullptr /* thread pool */));

1925

1926

ASSERT_EQ(xnn_status_success,

1927

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1928

1929

// Verify results of the first run.

1930

for (size_t i = 0; i < batch_size(); i++) {

1931

for (size_t y = 0; y < output_height(); y++) {

1932

for (size_t x = 0; x < output_width(); x++) {

1933

for (size_t g = 0; g < groups(); g++) {

1934

for (size_t c = 0; c < group_output_channels(); c++) {

1935

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

1936

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1937

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

1938

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Frank Barchard

2b9d29b

2020-09-17 12:03:39 -0700

[diff] [blame]

1939

ASSERT_NEAR(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-4f, std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1940

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1948

std::generate(input.begin(), input.end(), std::ref(f16rng));

1949

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1950

1951

// Compute reference results for the second run, including clamping.

1952

if (has_bias()) {

1953

for (size_t i = 0; i < next_batch_size(); i++) {

1954

for (size_t oy = 0; oy < next_output_height(); oy++) {

1955

for (size_t ox = 0; ox < next_output_width(); ox++) {

1956

for (size_t g = 0; g < groups(); g++) {

1957

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1958

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1959

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

1967

}

1968

for (size_t i = 0; i < next_batch_size(); i++) {

1969

for (size_t oy = 0; oy < next_output_height(); oy++) {

1970

for (size_t ox = 0; ox < next_output_width(); ox++) {

1971

for (size_t ky = 0; ky < kernel_height(); ky++) {

1972

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1973

if (iy < next_input_height()) {

1974

for (size_t kx = 0; kx < kernel_width(); kx++) {

1975

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1976

if (ix < next_input_width()) {

1977

for (size_t g = 0; g < groups(); g++) {

1978

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1979

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1980

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1981

fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1982

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

1994

value = std::max(std::min(value, output_max), output_min);

1995

}

1996

1997

// Setup and run Convolution operator the second time, and destroy the operator.

1998

ASSERT_EQ(xnn_status_success,

1999

xnn_setup_convolution2d_nhwc_f16(

2000

convolution_op,

2001

next_batch_size(), next_input_height(), next_input_width(),

2002

input.data(), output.data(),

2003

nullptr /* thread pool */));

2004

2005

ASSERT_EQ(xnn_status_success,

2006

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2007

2008

// Verify results of the second run.

2009

for (size_t i = 0; i < next_batch_size(); i++) {

2010

for (size_t y = 0; y < next_output_height(); y++) {

2011

for (size_t x = 0; x < next_output_width(); x++) {

2012

for (size_t g = 0; g < groups(); g++) {

2013

for (size_t c = 0; c < group_output_channels(); c++) {

2014

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

2015

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2016

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

2017

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Frank Barchard

2b9d29b

2020-09-17 12:03:39 -0700

[diff] [blame]

2018

ASSERT_NEAR(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c], fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), std::max(1.0e-4f, std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]) * 1.0e-2f))

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

2019

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2028

void TestSetupNHWCxF32() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2029

ASSERT_FALSE(depthwise_layout());

2030

2031

std::random_device random_device;

2032

auto rng = std::mt19937(random_device());

2033

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

2034

2035

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2036

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

2037

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2038

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

2039

std::vector<float> bias(groups() * group_output_channels());

2040

std::vector<float> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2041

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

2042

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2043

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

2044

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

2045

2046

for (size_t iteration = 0; iteration < iterations(); iteration++) {

2047

std::generate(input.begin(), input.end(), std::ref(f32rng));

2048

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

2049

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

2050

std::fill(output.begin(), output.end(), nanf(""));

2051

2052

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2053

if (has_bias()) {

2054

for (size_t i = 0; i < batch_size(); i++) {

2055

for (size_t oy = 0; oy < output_height(); oy++) {

2056

for (size_t ox = 0; ox < output_width(); ox++) {

2057

for (size_t g = 0; g < groups(); g++) {

2058

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2059

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2060

bias[g * group_output_channels() + oc];

2061

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2066

} else {

2067

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2068

}

2069

for (size_t i = 0; i < batch_size(); i++) {

2070

for (size_t oy = 0; oy < output_height(); oy++) {

2071

for (size_t ox = 0; ox < output_width(); ox++) {

2072

for (size_t ky = 0; ky < kernel_height(); ky++) {

2073

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2074

if (iy < input_height()) {

2075

for (size_t kx = 0; kx < kernel_width(); kx++) {

2076

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2077

if (ix < input_width()) {

2078

for (size_t g = 0; g < groups(); g++) {

2079

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2080

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2081

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2082

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2083

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

2096

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

2097

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

2098

2099

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

2100

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

2101

2102

// Clamp reference results.

2103

for (float& value : output_ref) {

2104

value = std::max(std::min(value, output_max), output_min);

2105

}

2106

2107

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

2108

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2109

xnn_operator_t convolution_op = nullptr;

2110

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

2111

xnn_status status = xnn_create_convolution2d_nhwc_f32(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2112

padding_top(), padding_right(), padding_bottom(), padding_left(),

2113

kernel_height(), kernel_width(),

2114

subsampling_height(), subsampling_width(),

2115

dilation_height(), dilation_width(),

2116

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2117

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2118

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2119

output_min, output_max,

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame]

2120

0, &convolution_op);

2121

if (status == xnn_status_unsupported_hardware) {

2122

GTEST_SKIP();

2123

}

2124

ASSERT_EQ(xnn_status_success, status);

2125

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2126

2127

// Smart pointer to automatically delete convolution_op.

2128

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

2129

2130

ASSERT_EQ(xnn_status_success,

2131

xnn_setup_convolution2d_nhwc_f32(

2132

convolution_op,

2133

batch_size(), input_height(), input_width(),

2134

input.data(), output.data(),

2135

nullptr /* thread pool */));

2136

2137

ASSERT_EQ(xnn_status_success,

2138

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2139

2140

// Verify results of the first run.

2141

for (size_t i = 0; i < batch_size(); i++) {

2142

for (size_t y = 0; y < output_height(); y++) {

2143

for (size_t x = 0; x < output_width(); x++) {

2144

for (size_t g = 0; g < groups(); g++) {

2145

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2146

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2147

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2148

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2149

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2150

ASSERT_NEAR(

2151

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2152

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2153

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

2154

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

2162

std::generate(input.begin(), input.end(), std::ref(f32rng));

2163

std::fill(output.begin(), output.end(), nanf(""));

2164

2165

// Compute reference results for the second run, including clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2166

if (has_bias()) {

2167

for (size_t i = 0; i < next_batch_size(); i++) {

2168

for (size_t oy = 0; oy < next_output_height(); oy++) {

2169

for (size_t ox = 0; ox < next_output_width(); ox++) {

2170

for (size_t g = 0; g < groups(); g++) {

2171

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2172

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2173

bias[g * group_output_channels() + oc];

2174

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2179

} else {

2180

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2181

}

2182

for (size_t i = 0; i < next_batch_size(); i++) {

2183

for (size_t oy = 0; oy < next_output_height(); oy++) {

2184

for (size_t ox = 0; ox < next_output_width(); ox++) {

2185

for (size_t ky = 0; ky < kernel_height(); ky++) {

2186

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2187

if (iy < next_input_height()) {

2188

for (size_t kx = 0; kx < kernel_width(); kx++) {

2189

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2190

if (ix < next_input_width()) {

2191

for (size_t g = 0; g < groups(); g++) {

2192

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2193

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2194

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2195

input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2196

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

2208

value = std::max(std::min(value, output_max), output_min);

2209

}

2210

2211

// Setup and run Convolution operator the second time, and destroy the operator.

2212

ASSERT_EQ(xnn_status_success,

2213

xnn_setup_convolution2d_nhwc_f32(

2214

convolution_op,

2215

next_batch_size(), next_input_height(), next_input_width(),

2216

input.data(), output.data(),

2217

nullptr /* thread pool */));

2218

2219

ASSERT_EQ(xnn_status_success,

2220

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2221

2222

// Verify results of the second run.

2223

for (size_t i = 0; i < next_batch_size(); i++) {

2224

for (size_t y = 0; y < next_output_height(); y++) {

2225

for (size_t x = 0; x < next_output_width(); x++) {

2226

for (size_t g = 0; g < groups(); g++) {

2227

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2228

ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2229

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2230

ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2231

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2232

ASSERT_NEAR(

2233

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2234

output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2235

1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))

2236

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

private:

uint32_t padding_top_{0};

2247

uint32_t padding_right_{0};

2248

uint32_t padding_bottom_{0};

2249

uint32_t padding_left_{0};

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

2250

bool padding_tf_same_{false};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2251

size_t input_height_{1};

2252

size_t input_width_{1};

2253

uint32_t groups_{1};

2254

size_t group_input_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2255

size_t input_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2256

size_t group_output_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2257

size_t output_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2258

size_t batch_size_{1};

2259

uint32_t kernel_height_{1};

2260

uint32_t kernel_width_{1};

2261

uint32_t dilation_height_{1};

2262

uint32_t dilation_width_{1};

2263

uint32_t subsampling_height_{1};

2264

uint32_t subsampling_width_{1};

2265

size_t next_input_height_{0};

2266

size_t next_input_width_{0};

2267

size_t next_batch_size_{0};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2268

float sparsity_{0.0f};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2269

uint8_t qmin_{0};

2270

uint8_t qmax_{255};

2271

bool depthwise_layout_{false};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2272

bool force_nhwc_input_{false};

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2273

bool has_bias_{true};

XNNPACK Team