Blame - test/convolution-operator-tester.h - platform/external/XNNPACK

2019-09-27 18:10:33 -0700

[diff] [blame]

678

std::random_device random_device;

679

auto rng = std::mt19937(random_device());

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

680

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

681

auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

682

683

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) +

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

684

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()) + 8);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

685

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

686

std::vector<int32_t> bias(groups() * group_output_channels());

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

687

std::vector<uint8_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

688

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

689

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

690

691

const uint8_t input_zero_point = 127;

692

const uint8_t kernel_zero_point = 127;

693

694

for (size_t iteration = 0; iteration < iterations(); iteration++) {

695

std::generate(input.begin(), input.end(), std::ref(u8rng));

696

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

697

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

698

std::fill(output.begin(), output.end(), 0xA5);

699

700

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

701

if (has_bias()) {

702

for (size_t i = 0; i < batch_size(); i++) {

703

for (size_t oy = 0; oy < output_height(); oy++) {

704

for (size_t ox = 0; ox < output_width(); ox++) {

705

for (size_t g = 0; g < groups(); g++) {

706

for (size_t oc = 0; oc < group_output_channels(); oc++) {

707

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

708

bias[g * group_output_channels() + oc];

709

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

714

} else {

715

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

716

}

717

if (depthwise_layout()) {

718

ASSERT_EQ(group_input_channels(), 1);

719

720

for (size_t i = 0; i < batch_size(); i++) {

721

for (size_t oy = 0; oy < output_height(); oy++) {

722

for (size_t ox = 0; ox < output_width(); ox++) {

723

for (size_t ky = 0; ky < kernel_height(); ky++) {

724

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

725

if (iy < input_height()) {

726

for (size_t kx = 0; kx < kernel_width(); kx++) {

727

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

728

if (ix < input_width()) {

729

for (size_t g = 0; g < groups(); g++) {

730

for (size_t oc = 0; oc < group_output_channels(); oc++) {

731

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

732

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

733

(int32_t(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

745

for (size_t oy = 0; oy < output_height(); oy++) {

746

for (size_t ox = 0; ox < output_width(); ox++) {

747

for (size_t ky = 0; ky < kernel_height(); ky++) {

748

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

749

if (iy < input_height()) {

750

for (size_t kx = 0; kx < kernel_width(); kx++) {

751

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

752

if (ix < input_width()) {

753

for (size_t g = 0; g < groups(); g++) {

754

for (size_t oc = 0; oc < group_output_channels(); oc++) {

755

for (size_t ic = 0; ic < group_input_channels(); ic++) {

756

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

757

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

758

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

772

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

773

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

774

775

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

776

const uint8_t output_zero_point = uint8_t(std::max(std::min(

777

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

778

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

779

780

// Renormalize reference results.

781

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

782

[this, output_scale, output_zero_point](int32_t x) -> double {

783

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

784

});

785

786

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

787

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

788

xnn_operator_t convolution_op = nullptr;

789

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

790

xnn_status status = xnn_create_convolution2d_nhwc_qu8(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

791

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

792

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

793

kernel_height(), kernel_width(),

794

subsampling_height(), subsampling_width(),

795

dilation_height(), dilation_width(),

796

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

797

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

798

input_zero_point, 1.0f /* input scale */,

799

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

800

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

801

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

802

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

803

&convolution_op);

804

if (status == xnn_status_unsupported_hardware) {

805

GTEST_SKIP();

806

}

807

ASSERT_EQ(xnn_status_success, status);

808

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

809

810

// Smart pointer to automatically delete convolution_op.

811

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

812

813

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

814

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

815

convolution_op,

816

batch_size(), input_height(), input_width(),

817

input.data(), output.data(),

818

nullptr /* thread pool */));

819

820

ASSERT_EQ(xnn_status_success,

821

xnn_run_operator(convolution_op, nullptr /* thread pool */));

822

823

// Verify results.

824

for (size_t i = 0; i < batch_size(); i++) {

825

for (size_t y = 0; y < output_height(); y++) {

826

for (size_t x = 0; x < output_width(); x++) {

827

for (size_t g = 0; g < groups(); g++) {

828

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

829

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

830

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

831

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

832

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

833

ASSERT_NEAR(

834

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

835

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

836

0.9)

837

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

846

void TestNHWCxF32() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

847

std::random_device random_device;

848

auto rng = std::mt19937(random_device());

849

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

850

851

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

852

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

853

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

854

std::vector<float> bias(groups() * group_output_channels());

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

855

std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

856

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

857

858

for (size_t iteration = 0; iteration < iterations(); iteration++) {

859

std::generate(input.begin(), input.end(), std::ref(f32rng));

860

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

861

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

862

std::fill(output.begin(), output.end(), nanf(""));

863

864

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

865

if (has_bias()) {

866

for (size_t i = 0; i < batch_size(); i++) {

867

for (size_t oy = 0; oy < output_height(); oy++) {

868

for (size_t ox = 0; ox < output_width(); ox++) {

869

for (size_t g = 0; g < groups(); g++) {

870

for (size_t oc = 0; oc < group_output_channels(); oc++) {

871

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

872

bias[g * group_output_channels() + oc];

873

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

878

} else {

879

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

880

}

881

if (depthwise_layout()) {

882

ASSERT_EQ(group_input_channels(), 1);

883

884

for (size_t i = 0; i < batch_size(); i++) {

885

for (size_t oy = 0; oy < output_height(); oy++) {

886

for (size_t ox = 0; ox < output_width(); ox++) {

887

for (size_t ky = 0; ky < kernel_height(); ky++) {

888

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

889

if (iy < input_height()) {

890

for (size_t kx = 0; kx < kernel_width(); kx++) {

891

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

892

if (ix < input_width()) {

893

for (size_t g = 0; g < groups(); g++) {

894

for (size_t oc = 0; oc < group_output_channels(); oc++) {

895

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

896

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

897

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

909

for (size_t oy = 0; oy < output_height(); oy++) {

910

for (size_t ox = 0; ox < output_width(); ox++) {

911

for (size_t ky = 0; ky < kernel_height(); ky++) {

912

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

913

if (iy < input_height()) {

914

for (size_t kx = 0; kx < kernel_width(); kx++) {

915

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

916

if (ix < input_width()) {

917

for (size_t g = 0; g < groups(); g++) {

918

for (size_t oc = 0; oc < group_output_channels(); oc++) {

919

for (size_t ic = 0; ic < group_input_channels(); ic++) {

920

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

921

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

922

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

936

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

937

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

938

939

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

940

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

941

942

// Clamp reference results.

943

for (float& value : output_ref) {

944

value = std::max(std::min(value, output_max), output_min);

945

}

946

947

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

948

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

949

xnn_operator_t convolution_op = nullptr;

950

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

951

xnn_status status = xnn_create_convolution2d_nhwc_f32(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

952

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

953

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

954

kernel_height(), kernel_width(),

955

subsampling_height(), subsampling_width(),

956

dilation_height(), dilation_width(),

957

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

958

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

959

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

960

output_min, output_max,

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

961

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

962

&convolution_op);

963

if (status == xnn_status_unsupported_hardware) {

964

GTEST_SKIP();

965

}

966

ASSERT_EQ(xnn_status_success, status);

967

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

968

969

// Smart pointer to automatically delete convolution_op.

970

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

971

972

ASSERT_EQ(xnn_status_success,

973

xnn_setup_convolution2d_nhwc_f32(

974

convolution_op,

975

batch_size(), input_height(), input_width(),

976

input.data(), output.data(),

977

nullptr /* thread pool */));

978

979

ASSERT_EQ(xnn_status_success,

980

xnn_run_operator(convolution_op, nullptr /* thread pool */));

981

982

// Verify results.

983

for (size_t i = 0; i < batch_size(); i++) {

984

for (size_t y = 0; y < output_height(); y++) {

985

for (size_t x = 0; x < output_width(); x++) {

986

for (size_t g = 0; g < groups(); g++) {

987

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

988

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

989

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

990

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

991

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

992

ASSERT_NEAR(

993

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

994

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

995

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

996

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1005

void TestNHWCxF16() const {

1006

std::random_device random_device;

1007

auto rng = std::mt19937(random_device());

1008

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

1009

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

1010

1011

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +

1012

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

1013

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1014

std::vector<uint16_t> bias(groups() * group_output_channels());

1015

std::vector<uint16_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

1016

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1017

1018

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1019

std::generate(input.begin(), input.end(), std::ref(f16rng));

1020

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

1021

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

1022

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1023

1024

// Compute reference results, without clamping.

1025

if (has_bias()) {

1026

for (size_t i = 0; i < batch_size(); i++) {

1027

for (size_t oy = 0; oy < output_height(); oy++) {

1028

for (size_t ox = 0; ox < output_width(); ox++) {

1029

for (size_t g = 0; g < groups(); g++) {

1030

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1031

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1032

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1040

}

1041

if (depthwise_layout()) {

1042

ASSERT_EQ(group_input_channels(), 1);

1043

1044

for (size_t i = 0; i < batch_size(); i++) {

1045

for (size_t oy = 0; oy < output_height(); oy++) {

1046

for (size_t ox = 0; ox < output_width(); ox++) {

1047

for (size_t ky = 0; ky < kernel_height(); ky++) {

1048

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1049

if (iy < input_height()) {

1050

for (size_t kx = 0; kx < kernel_width(); kx++) {

1051

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1052

if (ix < input_width()) {

1053

for (size_t g = 0; g < groups(); g++) {

1054

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1055

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1056

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) *

1057

fp16_ieee_to_fp32_value(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]);

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

1069

for (size_t oy = 0; oy < output_height(); oy++) {

1070

for (size_t ox = 0; ox < output_width(); ox++) {

1071

for (size_t ky = 0; ky < kernel_height(); ky++) {

1072

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1073

if (iy < input_height()) {

1074

for (size_t kx = 0; kx < kernel_width(); kx++) {

1075

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1076

if (ix < input_width()) {

1077

for (size_t g = 0; g < groups(); g++) {

1078

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1079

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1080

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1081

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1082

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1096

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1097

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1098

const float accumulated_range = accumulated_max - accumulated_min;

1099

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

1100

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

1101

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

1102

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

1103

1104

// Clamp reference results.

1105

for (float& value : output_ref) {

1106

value = std::max(std::min(value, output_max), output_min);

1107

}

1108

1109

// Create, setup, run, and destroy Convolution operator.

1110

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1111

xnn_operator_t convolution_op = nullptr;

1112

1113

xnn_status status = xnn_create_convolution2d_nhwc_f16(

1114

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

1115

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

1116

kernel_height(), kernel_width(),

1117

subsampling_height(), subsampling_width(),

1118

dilation_height(), dilation_width(),

1119

groups(), group_input_channels(), group_output_channels(),

1120

input_channel_stride(), output_channel_stride(),

1121

kernel.data(), has_bias() ? bias.data() : nullptr,

1122

output_min, output_max,

1123

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

1124

&convolution_op);

Frank Barchard

49b4dcc

2020-06-26 14:07:19 -0700

[diff] [blame]

1125

if (status == xnn_status_unsupported_hardware) {

1126

GTEST_SKIP();

1127

}

1128

ASSERT_EQ(xnn_status_success, status);

1129

ASSERT_NE(nullptr, convolution_op);

1130

1131

// Smart pointer to automatically delete convolution_op.

1132

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1133

1134

ASSERT_EQ(xnn_status_success,

1135

xnn_setup_convolution2d_nhwc_f16(

1136

convolution_op,

1137

batch_size(), input_height(), input_width(),

1138

input.data(), output.data(),

1139

nullptr /* thread pool */));

1140

1141

ASSERT_EQ(xnn_status_success,

1142

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1143

1144

// Verify results.

1145

for (size_t i = 0; i < batch_size(); i++) {

1146

for (size_t y = 0; y < output_height(); y++) {

1147

for (size_t x = 0; x < output_width(); x++) {

1148

for (size_t g = 0; g < groups(); g++) {

1149

for (size_t c = 0; c < group_output_channels(); c++) {

1150

// ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

1151

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1152

// ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

1153

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1154

ASSERT_NEAR(

1155

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

1156

fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

1157

1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

1158

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1167

void TestNCHWxF32() const {

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1168

std::random_device random_device;

1169

auto rng = std::mt19937(random_device());

1170

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

1171

auto prng = std::bind(std::uniform_real_distribution<float>(), rng);

1172

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1173

std::vector<float> input(2 * XNN_EXTRA_BYTES / sizeof(float) +

1174

((batch_size() - 1) * input_channel_stride() + groups() * group_input_channels()) * input_height() * input_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1175

std::vector<float> kernel(

1176

groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1177

std::vector<float> bias(groups() * group_output_channels());

1178

std::vector<float> output(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1179

((batch_size() - 1) * output_channel_stride() + groups() * group_output_channels()) * output_height() * output_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1180

std::vector<float> output_ref(batch_size() * groups() * group_output_channels() * output_height() * output_width());

1181

1182

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1183

std::generate(input.begin(), input.end(), std::ref(f32rng));

1184

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

1185

for (float& k : kernel) {

1186

if (prng() <= sparsity()) {

k = 0.0f;

}

}

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

1191

std::fill(output.begin(), output.end(), nanf(""));

1192

1193

// Compute reference results, without clamping.

1194

if (has_bias()) {

1195

for (size_t i = 0; i < batch_size(); i++) {

1196

for (size_t oy = 0; oy < output_height(); oy++) {

1197

for (size_t ox = 0; ox < output_width(); ox++) {

1198

for (size_t g = 0; g < groups(); g++) {

1199

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1200

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =

1201

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1209

}

1210

if (force_nhwc_input()) {

1211

for (size_t i = 0; i < batch_size(); i++) {

1212

for (size_t oy = 0; oy < output_height(); oy++) {

1213

for (size_t ox = 0; ox < output_width(); ox++) {

1214

for (size_t ky = 0; ky < kernel_height(); ky++) {

1215

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1216

if (iy < input_height()) {

1217

for (size_t kx = 0; kx < kernel_width(); kx++) {

1218

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1219

if (ix < input_width()) {

1220

for (size_t g = 0; g < groups(); g++) {

1221

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1222

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1223

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1224

input[((((i * input_height() + iy) * input_width() + ix) * groups() + g) * group_input_channels() + ic)] *

1225

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

Marat Dukhan

3303271

2020-06-18 11:06:04 -0700

[diff] [blame]

1236

} else if (depthwise_layout()) {

1237

ASSERT_EQ(group_input_channels(), 1);

1238

1239

for (size_t i = 0; i < batch_size(); i++) {

1240

for (size_t oy = 0; oy < output_height(); oy++) {

1241

for (size_t ox = 0; ox < output_width(); ox++) {

1242

for (size_t ky = 0; ky < kernel_height(); ky++) {

1243

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1244

if (iy < input_height()) {

1245

for (size_t kx = 0; kx < kernel_width(); kx++) {

1246

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1247

if (ix < input_width()) {

1248

for (size_t g = 0; g < groups(); g++) {

1249

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1250

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1251

input[((i * input_channel_stride() + g) * input_height() + iy) * input_width() + ix] *

1252

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1262

} else {

1263

for (size_t i = 0; i < batch_size(); i++) {

1264

for (size_t oy = 0; oy < output_height(); oy++) {

1265

for (size_t ox = 0; ox < output_width(); ox++) {

1266

for (size_t ky = 0; ky < kernel_height(); ky++) {

1267

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1268

if (iy < input_height()) {

1269

for (size_t kx = 0; kx < kernel_width(); kx++) {

1270

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1271

if (ix < input_width()) {

1272

for (size_t g = 0; g < groups(); g++) {

1273

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1274

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1275

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1276

input[((i * input_channel_stride() + g * group_input_channels() + ic) * input_height() + iy) * input_width() + ix] *

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1277

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1291

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1292

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1293

Marat Dukhan

869c62d

2020-04-09 17:17:55 -0700

[diff] [blame]

1294

const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :

1295

accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

1296

const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :

1297

accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1298

1299

// Clamp reference results.

1300

for (float& value : output_ref) {

1301

value = std::max(std::min(value, output_max), output_min);

1302

}

1303

1304

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1305

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1306

xnn_operator_t convolution_op = nullptr;

1307

1308

xnn_status status = xnn_create_convolution2d_nchw_f32(

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

1309

padding_top(), padding_right(), padding_bottom(), padding_left(),

1310

kernel_height(), kernel_width(),

1311

subsampling_height(), subsampling_width(),

1312

dilation_height(), dilation_width(),

1313

groups(), group_input_channels(), group_output_channels(),

1314

input_channel_stride(), output_channel_stride(),

1315

kernel.data(), has_bias() ? bias.data() : nullptr,

1316

output_min, output_max,

1317

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (force_nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),

1318

&convolution_op);

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1319

if (status == xnn_status_unsupported_parameter) {

1320

GTEST_SKIP();

1321

}

1322

ASSERT_EQ(xnn_status_success, status);

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

1323

ASSERT_NE(nullptr, convolution_op);

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1324

1325

// Smart pointer to automatically delete convolution_op.

1326

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1327

1328

ASSERT_EQ(xnn_status_success,

1329

xnn_setup_convolution2d_nchw_f32(

1330

convolution_op,

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1331

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1332

input.data(), output.data(),

1333

nullptr /* thread pool */));

1334

1335

ASSERT_EQ(xnn_status_success,

1336

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1337

1338

// Verify results.

1339

for (size_t i = 0; i < batch_size(); i++) {

1340

for (size_t y = 0; y < output_height(); y++) {

1341

for (size_t x = 0; x < output_width(); x++) {

1342

for (size_t g = 0; g < groups(); g++) {

1343

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1344

ASSERT_GE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_min)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1345

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1346

ASSERT_LE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_max)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1347

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

1348

ASSERT_NEAR(

1349

output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1350

output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1351

1.0e-4 * std::abs(output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x]))

1352

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

}

}

}

}

}

}

}

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

1361

void TestSetupNHWCxQS8() const {

1362

ASSERT_FALSE(depthwise_layout());

1363

1364

std::random_device random_device;

1365

auto rng = std::mt19937(random_device());

1366

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

1367

auto i8rng = std::bind(

1368

std::uniform_int_distribution<int32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng);

1369

1370

std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + std::max(

1371

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1372

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())) + 8);

1373

std::vector<int8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1374

std::vector<int32_t> bias(groups() * group_output_channels());

1375

std::vector<int8_t> output(std::max(

1376

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1377

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

1378

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1379

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1380

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1381

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1382

1383

const int8_t input_zero_point = -1;

1384

1385

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1386

std::generate(input.begin(), input.end(), std::ref(i8rng));

1387

std::generate(kernel.begin(), kernel.end(), std::ref(i8rng));

1388

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

1389

std::fill(output.begin(), output.end(), 0xA5);

1390

1391

// Compute reference results, without renormalization.

1392

if (has_bias()) {

1393

for (size_t i = 0; i < batch_size(); i++) {

1394

for (size_t oy = 0; oy < output_height(); oy++) {

1395

for (size_t ox = 0; ox < output_width(); ox++) {

1396

for (size_t g = 0; g < groups(); g++) {

1397

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1398

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1399

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(accumulators.begin(), accumulators.end(), 0);

1407

}

1408

for (size_t i = 0; i < batch_size(); i++) {

1409

for (size_t oy = 0; oy < output_height(); oy++) {

1410

for (size_t ox = 0; ox < output_width(); ox++) {

1411

for (size_t ky = 0; ky < kernel_height(); ky++) {

1412

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1413

if (iy < input_height()) {

1414

for (size_t kx = 0; kx < kernel_width(); kx++) {

1415

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1416

if (ix < input_width()) {

1417

for (size_t g = 0; g < groups(); g++) {

1418

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1419

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1420

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1421

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

1422

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

1435

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

1436

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

1437

1438

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

1439

const int8_t output_zero_point = int8_t(std::max(std::min(

1440

lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

1441

long(std::numeric_limits<int8_t>::max())), long(std::numeric_limits<int8_t>::min())));

1442

1443

// Renormalize reference results.

1444

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

1445

[this, output_scale, output_zero_point](int32_t x) -> double {

1446

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax() - 0x80) - output_zero_point), double(qmin() - 0x80) - output_zero_point);

1447

});

1448

1449

// Create, setup, and run Convolution operator once.

1450

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1451

xnn_operator_t convolution_op = nullptr;

1452

1453

xnn_status status = xnn_create_convolution2d_nhwc_qs8(

1454

padding_top(), padding_right(), padding_bottom(), padding_left(),

1455

kernel_height(), kernel_width(),

1456

subsampling_height(), subsampling_width(),

1457

dilation_height(), dilation_width(),

1458

groups(), group_input_channels(), group_output_channels(),

1459

input_channel_stride(), output_channel_stride(),

1460

input_zero_point, 1.0f /* input scale */, 1.0f /* kernel scale */,

1461

kernel.data(), has_bias() ? bias.data() : nullptr,

1462

output_zero_point, output_scale, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80),

1463

0, &convolution_op);

1464

if (status == xnn_status_unsupported_hardware) {

1465

GTEST_SKIP();

1466

}

1467

ASSERT_EQ(xnn_status_success, status);

1468

ASSERT_NE(nullptr, convolution_op);

1469

1470

// Smart pointer to automatically delete convolution_op.

1471

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1472

1473

ASSERT_EQ(xnn_status_success,

1474

xnn_setup_convolution2d_nhwc_qs8(

1475

convolution_op,

1476

batch_size(), input_height(), input_width(),

1477

input.data(), output.data(),

1478

nullptr /* thread pool */));

1479

1480

ASSERT_EQ(xnn_status_success,

1481

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1482

1483

// Verify results of the first run.

1484

for (size_t i = 0; i < batch_size(); i++) {

1485

for (size_t y = 0; y < output_height(); y++) {

1486

for (size_t x = 0; x < output_width(); x++) {

1487

for (size_t g = 0; g < groups(); g++) {

1488

for (size_t c = 0; c < group_output_channels(); c++) {

1489

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

1490

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1491

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

1492

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1493

ASSERT_NEAR(

1494

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

1495

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

1496

0.9)

1497

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1505

std::generate(input.begin(), input.end(), std::ref(i8rng));

1506

std::fill(output.begin(), output.end(), 0xA5);

1507

1508

// Compute reference results for the second run, including renormalization.

1509

if (has_bias()) {

1510

for (size_t i = 0; i < next_batch_size(); i++) {

1511

for (size_t oy = 0; oy < next_output_height(); oy++) {

1512

for (size_t ox = 0; ox < next_output_width(); ox++) {

1513

for (size_t g = 0; g < groups(); g++) {

1514

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1515

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1516

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

1524

}

1525

for (size_t i = 0; i < next_batch_size(); i++) {

1526

for (size_t oy = 0; oy < next_output_height(); oy++) {

1527

for (size_t ox = 0; ox < next_output_width(); ox++) {

1528

for (size_t ky = 0; ky < kernel_height(); ky++) {

1529

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1530

if (iy < next_input_height()) {

1531

for (size_t kx = 0; kx < kernel_width(); kx++) {

1532

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1533

if (ix < next_input_width()) {

1534

for (size_t g = 0; g < groups(); g++) {

1535

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1536

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1537

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1538

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

1539

int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

1551

[this, output_scale, output_zero_point](int32_t x) -> double {

1552

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax() - 0x80) - output_zero_point), double(qmin() - 0x80) - output_zero_point);

1553

});

1554

1555

// Setup and run Convolution operator the second time, and destroy the operator.

1556

ASSERT_EQ(xnn_status_success,

1557

xnn_setup_convolution2d_nhwc_qs8(

1558

convolution_op,

1559

next_batch_size(), next_input_height(), next_input_width(),

1560

input.data(), output.data(),

1561

nullptr /* thread pool */));

1562

1563

ASSERT_EQ(xnn_status_success,

1564

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1565

1566

// Verify results of the second run.

1567

for (size_t i = 0; i < next_batch_size(); i++) {

1568

for (size_t y = 0; y < next_output_height(); y++) {

1569

for (size_t x = 0; x < next_output_width(); x++) {

1570

for (size_t g = 0; g < groups(); g++) {

1571

for (size_t c = 0; c < group_output_channels(); c++) {

1572

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax() - 0x80))

1573

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1574

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin() - 0x80))

1575

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1576

ASSERT_NEAR(

1577

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

1578

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

1579

0.9)

1580

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

1589

void TestSetupNHWCxQU8() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1590

ASSERT_FALSE(depthwise_layout());

1591

1592

std::random_device random_device;

1593

auto rng = std::mt19937(random_device());

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

1594

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

1595

auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1596

1597

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1598

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1599

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())) + 8);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1600

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1601

std::vector<int32_t> bias(groups() * group_output_channels());

1602

std::vector<uint8_t> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1603

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1604

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1605

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1606

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1607

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1608

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1609

1610

const uint8_t input_zero_point = 127;

1611

const uint8_t kernel_zero_point = 127;

1612

1613

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1614

std::generate(input.begin(), input.end(), std::ref(u8rng));

1615

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

Marat Dukhan

2020-08-03 21:50:28 -0700

[diff] [blame]

1616

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1617

std::fill(output.begin(), output.end(), 0xA5);

1618

1619

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1620

if (has_bias()) {

1621

for (size_t i = 0; i < batch_size(); i++) {

1622

for (size_t oy = 0; oy < output_height(); oy++) {

1623

for (size_t ox = 0; ox < output_width(); ox++) {

1624

for (size_t g = 0; g < groups(); g++) {

1625

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1626

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1627

bias[g * group_output_channels() + oc];

1628

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1633

} else {

1634

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1635

}

1636

for (size_t i = 0; i < batch_size(); i++) {

1637

for (size_t oy = 0; oy < output_height(); oy++) {

1638

for (size_t ox = 0; ox < output_width(); ox++) {

1639

for (size_t ky = 0; ky < kernel_height(); ky++) {

1640

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1641

if (iy < input_height()) {

1642

for (size_t kx = 0; kx < kernel_width(); kx++) {

1643

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1644

if (ix < input_width()) {

1645

for (size_t g = 0; g < groups(); g++) {

1646

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1647

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1648

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1649

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1650

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

1663

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

1664

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

1665

1666

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

1667

const uint8_t output_zero_point = uint8_t(std::max(std::min(

1668

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

1669

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

1670

1671

// Renormalize reference results.

1672

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

1673

[this, output_scale, output_zero_point](int32_t x) -> double {

1674

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

1675

});

1676

1677

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1678

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1679

xnn_operator_t convolution_op = nullptr;

1680

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

1681

xnn_status status = xnn_create_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1682

padding_top(), padding_right(), padding_bottom(), padding_left(),

1683

kernel_height(), kernel_width(),

1684

subsampling_height(), subsampling_width(),

1685

dilation_height(), dilation_width(),

1686

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1687

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1688

input_zero_point, 1.0f /* input scale */,

1689

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1690

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1691

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

1692

0, &convolution_op);

1693

if (status == xnn_status_unsupported_hardware) {

1694

GTEST_SKIP();

1695

}

1696

ASSERT_EQ(xnn_status_success, status);

1697

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1698

1699

// Smart pointer to automatically delete convolution_op.

1700

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1701

1702

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

1703

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1704

convolution_op,

1705

batch_size(), input_height(), input_width(),

1706

input.data(), output.data(),

1707

nullptr /* thread pool */));

1708

1709

ASSERT_EQ(xnn_status_success,

1710

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1711

1712

// Verify results of the first run.

1713

for (size_t i = 0; i < batch_size(); i++) {

1714

for (size_t y = 0; y < output_height(); y++) {

1715

for (size_t x = 0; x < output_width(); x++) {

1716

for (size_t g = 0; g < groups(); g++) {

1717

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1718

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1719

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1720

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1721

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1722

ASSERT_NEAR(

1723

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1724

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1725

0.9)

1726

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1734

std::generate(input.begin(), input.end(), std::ref(u8rng));

1735

std::fill(output.begin(), output.end(), 0xA5);

1736

1737

// Compute reference results for the second run, including renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1738

if (has_bias()) {

1739

for (size_t i = 0; i < next_batch_size(); i++) {

1740

for (size_t oy = 0; oy < next_output_height(); oy++) {

1741

for (size_t ox = 0; ox < next_output_width(); ox++) {

1742

for (size_t g = 0; g < groups(); g++) {

1743

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1744

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1745

bias[g * group_output_channels() + oc];

1746

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1751

} else {

1752

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1753

}

1754

for (size_t i = 0; i < next_batch_size(); i++) {

1755

for (size_t oy = 0; oy < next_output_height(); oy++) {

1756

for (size_t ox = 0; ox < next_output_width(); ox++) {

1757

for (size_t ky = 0; ky < kernel_height(); ky++) {

1758

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1759

if (iy < next_input_height()) {

1760

for (size_t kx = 0; kx < kernel_width(); kx++) {

1761

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1762

if (ix < next_input_width()) {

1763

for (size_t g = 0; g < groups(); g++) {

1764

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1765

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1766

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1767

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1768

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

1780

[this, output_scale, output_zero_point](int32_t x) -> double {

1781

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

1782

});

1783

1784

// Setup and run Convolution operator the second time, and destroy the operator.

1785

ASSERT_EQ(xnn_status_success,

Marat Dukhan

08b7a97

2020-07-14 18:17:29 -0700

[diff] [blame]

1786

xnn_setup_convolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1787

convolution_op,

1788

next_batch_size(), next_input_height(), next_input_width(),

1789

input.data(), output.data(),

1790

nullptr /* thread pool */));

1791

1792

ASSERT_EQ(xnn_status_success,

1793

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1794

1795

// Verify results of the second run.

1796

for (size_t i = 0; i < next_batch_size(); i++) {

1797

for (size_t y = 0; y < next_output_height(); y++) {

1798

for (size_t x = 0; x < next_output_width(); x++) {

1799

for (size_t g = 0; g < groups(); g++) {

1800

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1801

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1802

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1803

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1804

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1805

ASSERT_NEAR(

1806

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1807

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1808

0.9)

1809

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame]

1818

void TestSetupNHWCxF16() const {

1819

ASSERT_FALSE(depthwise_layout());

1820

1821

std::random_device random_device;

1822

auto rng = std::mt19937(random_device());

1823

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

1824

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

1825

1826

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(

1827

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1828

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

1829

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1830

std::vector<uint16_t> bias(groups() * group_output_channels());

1831

std::vector<uint16_t> output(std::max(

1832

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1833

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

1834

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1835

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1836

1837

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1838

std::generate(input.begin(), input.end(), std::ref(f16rng));

1839

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

1840

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

1841

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1842

1843

// Compute reference results, without clamping.

1844

if (has_bias()) {

1845

for (size_t i = 0; i < batch_size(); i++) {

1846

for (size_t oy = 0; oy < output_height(); oy++) {

1847

for (size_t ox = 0; ox < output_width(); ox++) {

1848

for (size_t g = 0; g < groups(); g++) {

1849

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1850

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1851

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1859

}

1860

for (size_t i = 0; i < batch_size(); i++) {

1861

for (size_t oy = 0; oy < output_height(); oy++) {

1862

for (size_t ox = 0; ox < output_width(); ox++) {

1863

for (size_t ky = 0; ky < kernel_height(); ky++) {

1864

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1865

if (iy < input_height()) {

1866

for (size_t kx = 0; kx < kernel_width(); kx++) {

1867

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1868

if (ix < input_width()) {

1869

for (size_t g = 0; g < groups(); g++) {

1870

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1871

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1872

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1873

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1874

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1887

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1888

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1889

const float accumulated_range = accumulated_max - accumulated_min;

1890

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

1891

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

1892

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

1893

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

1894

1895

for (float& output_value : output_ref) {

1896

output_value = std::min(std::max(output_value, output_min), output_max);

1897

}

1898

1899

// Create, setup, and run Convolution operator once.

1900

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1901

xnn_operator_t convolution_op = nullptr;

1902

1903

xnn_status status = xnn_create_convolution2d_nhwc_f16(

1904

padding_top(), padding_right(), padding_bottom(), padding_left(),

1905

kernel_height(), kernel_width(),

1906

subsampling_height(), subsampling_width(),

1907

dilation_height(), dilation_width(),

1908

groups(), group_input_channels(), group_output_channels(),

1909

input_channel_stride(), output_channel_stride(),

1910

kernel.data(), has_bias() ? bias.data() : nullptr,

1911

output_min, output_max,

1912

0, &convolution_op);

Frank Barchard

49b4dcc

2020-06-26 14:07:19 -0700

[diff] [blame]

1913

if (status == xnn_status_unsupported_hardware) {

1914

GTEST_SKIP();

1915

}

1916

ASSERT_EQ(xnn_status_success, status);

1917

ASSERT_NE(nullptr, convolution_op);

1918

1919

// Smart pointer to automatically delete convolution_op.

1920

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1921

1922

ASSERT_EQ(xnn_status_success,

1923

xnn_setup_convolution2d_nhwc_f16(

1924

convolution_op,

1925

batch_size(), input_height(), input_width(),

1926

input.data(), output.data(),

1927

nullptr /* thread pool */));

1928

1929

ASSERT_EQ(xnn_status_success,

1930

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1931

1932

// Verify results of the first run.

1933

for (size_t i = 0; i < batch_size(); i++) {

1934

for (size_t y = 0; y < output_height(); y++) {

1935

for (size_t x = 0; x < output_width(); x++) {

1936

for (size_t g = 0; g < groups(); g++) {

1937

for (size_t c = 0; c < group_output_channels(); c++) {

1938

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

1939

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1940

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

1941

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1942

ASSERT_NEAR(

1943

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

1944

fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

1945

1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

1946

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1954

std::generate(input.begin(), input.end(), std::ref(f16rng));

1955

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1956

1957

// Compute reference results for the second run, including clamping.

1958

if (has_bias()) {

1959

for (size_t i = 0; i < next_batch_size(); i++) {

1960

for (size_t oy = 0; oy < next_output_height(); oy++) {

1961

for (size_t ox = 0; ox < next_output_width(); ox++) {

1962

for (size_t g = 0; g < groups(); g++) {

1963

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1964

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1965

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

1973

}

1974

for (size_t i = 0; i < next_batch_size(); i++) {

1975

for (size_t oy = 0; oy < next_output_height(); oy++) {

1976

for (size_t ox = 0; ox < next_output_width(); ox++) {

1977

for (size_t ky = 0; ky < kernel_height(); ky++) {

1978

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1979

if (iy < next_input_height()) {

1980

for (size_t kx = 0; kx < kernel_width(); kx++) {

1981

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1982

if (ix < next_input_width()) {

1983

for (size_t g = 0; g < groups(); g++) {

1984

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1985

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1986

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1987

fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1988

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

2000

value = std::max(std::min(value, output_max), output_min);

2001

}

2002

2003

// Setup and run Convolution operator the second time, and destroy the operator.

2004

ASSERT_EQ(xnn_status_success,

2005

xnn_setup_convolution2d_nhwc_f16(

2006

convolution_op,

2007

next_batch_size(), next_input_height(), next_input_width(),

2008

input.data(), output.data(),

2009

nullptr /* thread pool */));

2010

2011

ASSERT_EQ(xnn_status_success,

2012

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2013

2014

// Verify results of the second run.

2015

for (size_t i = 0; i < next_batch_size(); i++) {

2016

for (size_t y = 0; y < next_output_height(); y++) {

2017

for (size_t x = 0; x < next_output_width(); x++) {

2018

for (size_t g = 0; g < groups(); g++) {

2019

for (size_t c = 0; c < group_output_channels(); c++) {

2020

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

2021

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2022

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

2023

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2024

ASSERT_NEAR(

2025

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

2026

fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

2027

1.0e-2 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))

2028

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2037

void TestSetupNHWCxF32() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2038

ASSERT_FALSE(depthwise_layout());

2039

2040

std::random_device random_device;

2041

auto rng = std::mt19937(random_device());

2042

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

2043

2044

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2045

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

2046

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2047

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

2048

std::vector<float> bias(groups() * group_output_channels());

2049

std::vector<float> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2050

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

2051

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2052

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

2053

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

2054

2055

for (size_t iteration = 0; iteration < iterations(); iteration++) {

2056

std::generate(input.begin(), input.end(), std::ref(f32rng));

2057

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

2058

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

2059

std::fill(output.begin(), output.end(), nanf(""));

2060

2061

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2062

if (has_bias()) {

2063

for (size_t i = 0; i < batch_size(); i++) {

2064

for (size_t oy = 0; oy < output_height(); oy++) {

2065

for (size_t ox = 0; ox < output_width(); ox++) {

2066

for (size_t g = 0; g < groups(); g++) {

2067

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2068

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2069

bias[g * group_output_channels() + oc];

2070

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2075

} else {

2076

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2077

}

2078

for (size_t i = 0; i < batch_size(); i++) {

2079

for (size_t oy = 0; oy < output_height(); oy++) {

2080

for (size_t ox = 0; ox < output_width(); ox++) {

2081

for (size_t ky = 0; ky < kernel_height(); ky++) {

2082

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2083

if (iy < input_height()) {

2084

for (size_t kx = 0; kx < kernel_width(); kx++) {

2085

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2086

if (ix < input_width()) {

2087

for (size_t g = 0; g < groups(); g++) {

2088

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2089

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2090

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2091

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2092

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

2105

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

2106

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

2107

2108

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

2109

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

2110

2111

// Clamp reference results.

2112

for (float& value : output_ref) {

2113

value = std::max(std::min(value, output_max), output_min);

2114

}

2115

2116

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

2117

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2118

xnn_operator_t convolution_op = nullptr;

2119

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

2120

xnn_status status = xnn_create_convolution2d_nhwc_f32(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2121

padding_top(), padding_right(), padding_bottom(), padding_left(),

2122

kernel_height(), kernel_width(),

2123

subsampling_height(), subsampling_width(),

2124

dilation_height(), dilation_width(),

2125

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2126

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2127

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2128

output_min, output_max,

Marat Dukhan

2020-08-04 16:38:22 -0700

[diff] [blame^]

2129

0, &convolution_op);

2130

if (status == xnn_status_unsupported_hardware) {

2131

GTEST_SKIP();

2132

}

2133

ASSERT_EQ(xnn_status_success, status);

2134

ASSERT_NE(nullptr, convolution_op);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2135

2136

// Smart pointer to automatically delete convolution_op.

2137

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

2138

2139

ASSERT_EQ(xnn_status_success,

2140

xnn_setup_convolution2d_nhwc_f32(

2141

convolution_op,

2142

batch_size(), input_height(), input_width(),

2143

input.data(), output.data(),

2144

nullptr /* thread pool */));

2145

2146

ASSERT_EQ(xnn_status_success,

2147

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2148

2149

// Verify results of the first run.

2150

for (size_t i = 0; i < batch_size(); i++) {

2151

for (size_t y = 0; y < output_height(); y++) {

2152

for (size_t x = 0; x < output_width(); x++) {

2153

for (size_t g = 0; g < groups(); g++) {

2154

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2155

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2156

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2157

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2158

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2159

ASSERT_NEAR(

2160

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2161

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2162

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

2163

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

2171

std::generate(input.begin(), input.end(), std::ref(f32rng));

2172

std::fill(output.begin(), output.end(), nanf(""));

2173

2174

// Compute reference results for the second run, including clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2175

if (has_bias()) {

2176

for (size_t i = 0; i < next_batch_size(); i++) {

2177

for (size_t oy = 0; oy < next_output_height(); oy++) {

2178

for (size_t ox = 0; ox < next_output_width(); ox++) {

2179

for (size_t g = 0; g < groups(); g++) {

2180

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2181

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

2182

bias[g * group_output_channels() + oc];

2183

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2188

} else {

2189

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2190

}

2191

for (size_t i = 0; i < next_batch_size(); i++) {

2192

for (size_t oy = 0; oy < next_output_height(); oy++) {

2193

for (size_t ox = 0; ox < next_output_width(); ox++) {

2194

for (size_t ky = 0; ky < kernel_height(); ky++) {

2195

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

2196

if (iy < next_input_height()) {

2197

for (size_t kx = 0; kx < kernel_width(); kx++) {

2198

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

2199

if (ix < next_input_width()) {

2200

for (size_t g = 0; g < groups(); g++) {

2201

for (size_t oc = 0; oc < group_output_channels(); oc++) {

2202

for (size_t ic = 0; ic < group_input_channels(); ic++) {

2203

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2204

input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2205

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

2217

value = std::max(std::min(value, output_max), output_min);

2218

}

2219

2220

// Setup and run Convolution operator the second time, and destroy the operator.

2221

ASSERT_EQ(xnn_status_success,

2222

xnn_setup_convolution2d_nhwc_f32(

2223

convolution_op,

2224

next_batch_size(), next_input_height(), next_input_width(),

2225

input.data(), output.data(),

2226

nullptr /* thread pool */));

2227

2228

ASSERT_EQ(xnn_status_success,

2229

xnn_run_operator(convolution_op, nullptr /* thread pool */));

2230

2231

// Verify results of the second run.

2232

for (size_t i = 0; i < next_batch_size(); i++) {

2233

for (size_t y = 0; y < next_output_height(); y++) {

2234

for (size_t x = 0; x < next_output_width(); x++) {

2235

for (size_t g = 0; g < groups(); g++) {

2236

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2237

ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2238

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2239

ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2240

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

2241

ASSERT_NEAR(

2242

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2243

output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2244

1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))

2245

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

private:

uint32_t padding_top_{0};

2256

uint32_t padding_right_{0};

2257

uint32_t padding_bottom_{0};

2258

uint32_t padding_left_{0};

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

2259

bool padding_tf_same_{false};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2260

size_t input_height_{1};

2261

size_t input_width_{1};

2262

uint32_t groups_{1};

2263

size_t group_input_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2264

size_t input_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2265

size_t group_output_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

2266

size_t output_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2267

size_t batch_size_{1};

2268

uint32_t kernel_height_{1};

2269

uint32_t kernel_width_{1};

2270

uint32_t dilation_height_{1};

2271

uint32_t dilation_width_{1};

2272

uint32_t subsampling_height_{1};

2273

uint32_t subsampling_width_{1};

2274

size_t next_input_height_{0};

2275

size_t next_input_width_{0};

2276

size_t next_batch_size_{0};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2277

float sparsity_{0.0f};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

2278

uint8_t qmin_{0};

2279

uint8_t qmax_{255};

2280

bool depthwise_layout_{false};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

2281

bool force_nhwc_input_{false};

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

2282

bool has_bias_{true};

XNNPACK Team