Blame - test/convolution-operator-tester.h - platform/external/XNNPACK

2019-09-27 18:10:33 -0700

[diff] [blame]

590

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

604

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

605

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

606

607

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

608

const uint8_t output_zero_point = uint8_t(std::max(std::min(

609

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

610

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

611

612

// Renormalize reference results.

613

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

614

[this, output_scale, output_zero_point](int32_t x) -> double {

615

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

616

});

617

618

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

619

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

620

xnn_operator_t convolution_op = nullptr;

621

622

ASSERT_EQ(xnn_status_success,

623

xnn_create_convolution2d_nhwc_q8(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

624

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

625

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

626

kernel_height(), kernel_width(),

627

subsampling_height(), subsampling_width(),

628

dilation_height(), dilation_width(),

629

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

630

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

631

input_zero_point, 1.0f /* input scale */,

632

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

633

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

634

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

635

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

636

&convolution_op));

637

638

// Smart pointer to automatically delete convolution_op.

639

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

640

641

ASSERT_EQ(xnn_status_success,

642

xnn_setup_convolution2d_nhwc_q8(

643

convolution_op,

644

batch_size(), input_height(), input_width(),

645

input.data(), output.data(),

646

nullptr /* thread pool */));

647

648

ASSERT_EQ(xnn_status_success,

649

xnn_run_operator(convolution_op, nullptr /* thread pool */));

650

651

// Verify results.

652

for (size_t i = 0; i < batch_size(); i++) {

653

for (size_t y = 0; y < output_height(); y++) {

654

for (size_t x = 0; x < output_width(); x++) {

655

for (size_t g = 0; g < groups(); g++) {

656

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

657

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

658

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

659

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

660

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

661

ASSERT_NEAR(

662

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

663

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

664

0.9)

665

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

674

void TestNHWCxF32() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

675

std::random_device random_device;

676

auto rng = std::mt19937(random_device());

677

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

678

679

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

680

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

681

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

682

std::vector<float> bias(groups() * group_output_channels());

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

683

std::vector<float> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

684

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

685

686

for (size_t iteration = 0; iteration < iterations(); iteration++) {

687

std::generate(input.begin(), input.end(), std::ref(f32rng));

688

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

689

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

690

std::fill(output.begin(), output.end(), nanf(""));

691

692

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

693

if (has_bias()) {

694

for (size_t i = 0; i < batch_size(); i++) {

695

for (size_t oy = 0; oy < output_height(); oy++) {

696

for (size_t ox = 0; ox < output_width(); ox++) {

697

for (size_t g = 0; g < groups(); g++) {

698

for (size_t oc = 0; oc < group_output_channels(); oc++) {

699

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

700

bias[g * group_output_channels() + oc];

701

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

706

} else {

707

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

708

}

709

if (depthwise_layout()) {

710

ASSERT_EQ(group_input_channels(), 1);

711

712

for (size_t i = 0; i < batch_size(); i++) {

713

for (size_t oy = 0; oy < output_height(); oy++) {

714

for (size_t ox = 0; ox < output_width(); ox++) {

715

for (size_t ky = 0; ky < kernel_height(); ky++) {

716

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

717

if (iy < input_height()) {

718

for (size_t kx = 0; kx < kernel_width(); kx++) {

719

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

720

if (ix < input_width()) {

721

for (size_t g = 0; g < groups(); g++) {

722

for (size_t oc = 0; oc < group_output_channels(); oc++) {

723

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

724

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

725

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

737

for (size_t oy = 0; oy < output_height(); oy++) {

738

for (size_t ox = 0; ox < output_width(); ox++) {

739

for (size_t ky = 0; ky < kernel_height(); ky++) {

740

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

741

if (iy < input_height()) {

742

for (size_t kx = 0; kx < kernel_width(); kx++) {

743

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

744

if (ix < input_width()) {

745

for (size_t g = 0; g < groups(); g++) {

746

for (size_t oc = 0; oc < group_output_channels(); oc++) {

747

for (size_t ic = 0; ic < group_input_channels(); ic++) {

748

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

749

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

750

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

764

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

765

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

766

767

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

768

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

769

770

// Clamp reference results.

771

for (float& value : output_ref) {

772

value = std::max(std::min(value, output_max), output_min);

773

}

774

775

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

776

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

777

xnn_operator_t convolution_op = nullptr;

778

779

ASSERT_EQ(xnn_status_success,

780

xnn_create_convolution2d_nhwc_f32(

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

781

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

782

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

783

kernel_height(), kernel_width(),

784

subsampling_height(), subsampling_width(),

785

dilation_height(), dilation_width(),

786

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

787

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

788

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

789

output_min, output_max,

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

790

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

791

&convolution_op));

792

793

// Smart pointer to automatically delete convolution_op.

794

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

795

796

ASSERT_EQ(xnn_status_success,

797

xnn_setup_convolution2d_nhwc_f32(

798

convolution_op,

799

batch_size(), input_height(), input_width(),

800

input.data(), output.data(),

801

nullptr /* thread pool */));

802

803

ASSERT_EQ(xnn_status_success,

804

xnn_run_operator(convolution_op, nullptr /* thread pool */));

805

806

// Verify results.

807

for (size_t i = 0; i < batch_size(); i++) {

808

for (size_t y = 0; y < output_height(); y++) {

809

for (size_t x = 0; x < output_width(); x++) {

810

for (size_t g = 0; g < groups(); g++) {

811

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

812

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

813

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

814

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

815

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

816

ASSERT_NEAR(

817

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

818

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

819

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

820

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame^]

829

void TestNHWCxF16() const {

830

std::random_device random_device;

831

auto rng = std::mt19937(random_device());

832

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

833

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

834

835

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) +

836

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()));

837

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

838

std::vector<uint16_t> bias(groups() * group_output_channels());

839

std::vector<uint16_t> output(batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()));

840

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

841

842

for (size_t iteration = 0; iteration < iterations(); iteration++) {

843

std::generate(input.begin(), input.end(), std::ref(f16rng));

844

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

845

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

846

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

847

848

// Compute reference results, without clamping.

849

if (has_bias()) {

850

for (size_t i = 0; i < batch_size(); i++) {

851

for (size_t oy = 0; oy < output_height(); oy++) {

852

for (size_t ox = 0; ox < output_width(); ox++) {

853

for (size_t g = 0; g < groups(); g++) {

854

for (size_t oc = 0; oc < group_output_channels(); oc++) {

855

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

856

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

864

}

865

if (depthwise_layout()) {

866

ASSERT_EQ(group_input_channels(), 1);

867

868

for (size_t i = 0; i < batch_size(); i++) {

869

for (size_t oy = 0; oy < output_height(); oy++) {

870

for (size_t ox = 0; ox < output_width(); ox++) {

871

for (size_t ky = 0; ky < kernel_height(); ky++) {

872

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

873

if (iy < input_height()) {

874

for (size_t kx = 0; kx < kernel_width(); kx++) {

875

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

876

if (ix < input_width()) {

877

for (size_t g = 0; g < groups(); g++) {

878

for (size_t oc = 0; oc < group_output_channels(); oc++) {

879

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

880

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g]) *

881

fp16_ieee_to_fp32_value(kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc]);

}

}

}

}

}

}

}

}

}

} else {

for (size_t i = 0; i < batch_size(); i++) {

893

for (size_t oy = 0; oy < output_height(); oy++) {

894

for (size_t ox = 0; ox < output_width(); ox++) {

895

for (size_t ky = 0; ky < kernel_height(); ky++) {

896

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

897

if (iy < input_height()) {

898

for (size_t kx = 0; kx < kernel_width(); kx++) {

899

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

900

if (ix < input_width()) {

901

for (size_t g = 0; g < groups(); g++) {

902

for (size_t oc = 0; oc < group_output_channels(); oc++) {

903

for (size_t ic = 0; ic < group_input_channels(); ic++) {

904

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

905

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

906

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

920

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

921

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

922

const float accumulated_range = accumulated_max - accumulated_min;

923

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

924

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

925

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

926

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

927

928

// Clamp reference results.

929

for (float& value : output_ref) {

930

value = std::max(std::min(value, output_max), output_min);

931

}

932

933

// Create, setup, run, and destroy Convolution operator.

934

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

935

xnn_operator_t convolution_op = nullptr;

936

937

xnn_status status = xnn_create_convolution2d_nhwc_f16(

938

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

939

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

940

kernel_height(), kernel_width(),

941

subsampling_height(), subsampling_width(),

942

dilation_height(), dilation_width(),

943

groups(), group_input_channels(), group_output_channels(),

944

input_channel_stride(), output_channel_stride(),

945

kernel.data(), has_bias() ? bias.data() : nullptr,

946

output_min, output_max,

947

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0),

948

&convolution_op);

949

950

if (status == xnn_status_unsupported_hardware) {

951

GTEST_SKIP();

952

}

953

ASSERT_EQ(xnn_status_success, status);

954

ASSERT_NE(nullptr, convolution_op);

955

956

// Smart pointer to automatically delete convolution_op.

957

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

958

959

ASSERT_EQ(xnn_status_success,

960

xnn_setup_convolution2d_nhwc_f16(

961

convolution_op,

962

batch_size(), input_height(), input_width(),

963

input.data(), output.data(),

964

nullptr /* thread pool */));

965

966

ASSERT_EQ(xnn_status_success,

967

xnn_run_operator(convolution_op, nullptr /* thread pool */));

968

969

// Verify results.

970

for (size_t i = 0; i < batch_size(); i++) {

971

for (size_t y = 0; y < output_height(); y++) {

972

for (size_t x = 0; x < output_width(); x++) {

973

for (size_t g = 0; g < groups(); g++) {

974

for (size_t c = 0; c < group_output_channels(); c++) {

975

// ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

976

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

977

// ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

978

// << "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

979

ASSERT_NEAR(

980

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

981

fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

982

1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

983

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

992

void TestNCHWxF32() const {

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

993

std::random_device random_device;

994

auto rng = std::mt19937(random_device());

995

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

996

auto prng = std::bind(std::uniform_real_distribution<float>(), rng);

997

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

998

std::vector<float> input(2 * XNN_EXTRA_BYTES / sizeof(float) +

999

((batch_size() - 1) * input_channel_stride() + groups() * group_input_channels()) * input_height() * input_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1000

std::vector<float> kernel(

1001

groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1002

std::vector<float> bias(groups() * group_output_channels());

1003

std::vector<float> output(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1004

((batch_size() - 1) * output_channel_stride() + groups() * group_output_channels()) * output_height() * output_width());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1005

std::vector<float> output_ref(batch_size() * groups() * group_output_channels() * output_height() * output_width());

1006

1007

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1008

std::generate(input.begin(), input.end(), std::ref(f32rng));

1009

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

1010

for (float& k : kernel) {

1011

if (prng() <= sparsity()) {

k = 0.0f;

}

}

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

1016

std::fill(output.begin(), output.end(), nanf(""));

1017

1018

// Compute reference results, without clamping.

1019

if (has_bias()) {

1020

for (size_t i = 0; i < batch_size(); i++) {

1021

for (size_t oy = 0; oy < output_height(); oy++) {

1022

for (size_t ox = 0; ox < output_width(); ox++) {

1023

for (size_t g = 0; g < groups(); g++) {

1024

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1025

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] =

1026

bias[g * group_output_channels() + oc];

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1034

}

1035

if (force_nhwc_input()) {

1036

for (size_t i = 0; i < batch_size(); i++) {

1037

for (size_t oy = 0; oy < output_height(); oy++) {

1038

for (size_t ox = 0; ox < output_width(); ox++) {

1039

for (size_t ky = 0; ky < kernel_height(); ky++) {

1040

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1041

if (iy < input_height()) {

1042

for (size_t kx = 0; kx < kernel_width(); kx++) {

1043

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1044

if (ix < input_width()) {

1045

for (size_t g = 0; g < groups(); g++) {

1046

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1047

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1048

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1049

input[((((i * input_height() + iy) * input_width() + ix) * groups() + g) * group_input_channels() + ic)] *

1050

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

Marat Dukhan

3303271

2020-06-18 11:06:04 -0700

[diff] [blame]

1061

} else if (depthwise_layout()) {

1062

ASSERT_EQ(group_input_channels(), 1);

1063

1064

for (size_t i = 0; i < batch_size(); i++) {

1065

for (size_t oy = 0; oy < output_height(); oy++) {

1066

for (size_t ox = 0; ox < output_width(); ox++) {

1067

for (size_t ky = 0; ky < kernel_height(); ky++) {

1068

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1069

if (iy < input_height()) {

1070

for (size_t kx = 0; kx < kernel_width(); kx++) {

1071

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1072

if (ix < input_width()) {

1073

for (size_t g = 0; g < groups(); g++) {

1074

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1075

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

1076

input[((i * input_channel_stride() + g) * input_height() + iy) * input_width() + ix] *

1077

kernel[((ky * kernel_width() + kx) * groups() + g) * group_output_channels() + oc];

}

}

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1087

} else {

1088

for (size_t i = 0; i < batch_size(); i++) {

1089

for (size_t oy = 0; oy < output_height(); oy++) {

1090

for (size_t ox = 0; ox < output_width(); ox++) {

1091

for (size_t ky = 0; ky < kernel_height(); ky++) {

1092

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1093

if (iy < input_height()) {

1094

for (size_t kx = 0; kx < kernel_width(); kx++) {

1095

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1096

if (ix < input_width()) {

1097

for (size_t g = 0; g < groups(); g++) {

1098

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1099

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1100

output_ref[(((i * groups() + g) * group_output_channels() + oc) * output_height() + oy) * output_width() + ox] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1101

input[((i * input_channel_stride() + g * group_input_channels() + ic) * input_height() + iy) * input_width() + ix] *

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1102

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1116

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1117

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1118

Marat Dukhan

869c62d

2020-04-09 17:17:55 -0700

[diff] [blame]

1119

const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :

1120

accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

1121

const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :

1122

accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1123

1124

// Clamp reference results.

1125

for (float& value : output_ref) {

1126

value = std::max(std::min(value, output_max), output_min);

1127

}

1128

1129

// Create, setup, run, and destroy Convolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1130

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1131

xnn_operator_t convolution_op = nullptr;

1132

1133

xnn_status status = xnn_create_convolution2d_nchw_f32(

1134

padding_top(), padding_right(), padding_bottom(), padding_left(),

1135

kernel_height(), kernel_width(),

1136

subsampling_height(), subsampling_width(),

1137

dilation_height(), dilation_width(),

1138

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1139

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1140

kernel.data(), has_bias() ? bias.data() : nullptr,

1141

output_min, output_max,

1142

(depthwise_layout() ? XNN_FLAG_DEPTHWISE_CONVOLUTION : 0) | (force_nhwc_input() ? XNN_FLAG_INPUT_NHWC : 0),

1143

&convolution_op);

1144

if (status == xnn_status_unsupported_parameter) {

1145

GTEST_SKIP();

1146

}

1147

ASSERT_EQ(xnn_status_success, status);

1148

1149

// Smart pointer to automatically delete convolution_op.

1150

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1151

1152

ASSERT_EQ(xnn_status_success,

1153

xnn_setup_convolution2d_nchw_f32(

1154

convolution_op,

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1155

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1156

input.data(), output.data(),

1157

nullptr /* thread pool */));

1158

1159

ASSERT_EQ(xnn_status_success,

1160

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1161

1162

// Verify results.

1163

for (size_t i = 0; i < batch_size(); i++) {

1164

for (size_t y = 0; y < output_height(); y++) {

1165

for (size_t x = 0; x < output_width(); x++) {

1166

for (size_t g = 0; g < groups(); g++) {

1167

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1168

ASSERT_GE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_min)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1169

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1170

ASSERT_LE(output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x], output_max)

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1171

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

1172

ASSERT_NEAR(

1173

output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1174

output[((i * output_channel_stride() + g * group_output_channels() + c) * output_height() + y) * output_width() + x],

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1175

1.0e-4 * std::abs(output_ref[(((i * groups() + g) * group_output_channels() + c) * output_height() + y) * output_width() + x]))

1176

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c << ", image = " << i;

}

}

}

}

}

}

}

void TestSetupNHWCxQ8() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1186

ASSERT_FALSE(depthwise_layout());

1187

1188

std::random_device random_device;

1189

auto rng = std::mt19937(random_device());

1190

auto s32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

1191

auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1192

1193

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1194

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1195

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())) + 8);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1196

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1197

std::vector<int32_t> bias(groups() * group_output_channels());

1198

std::vector<uint8_t> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1199

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1200

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1201

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1202

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1203

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1204

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1205

1206

const uint8_t input_zero_point = 127;

1207

const uint8_t kernel_zero_point = 127;

1208

1209

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1210

std::generate(input.begin(), input.end(), std::ref(u8rng));

1211

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

1212

std::generate(bias.begin(), bias.end(), std::ref(s32rng));

1213

std::fill(output.begin(), output.end(), 0xA5);

1214

1215

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1216

if (has_bias()) {

1217

for (size_t i = 0; i < batch_size(); i++) {

1218

for (size_t oy = 0; oy < output_height(); oy++) {

1219

for (size_t ox = 0; ox < output_width(); ox++) {

1220

for (size_t g = 0; g < groups(); g++) {

1221

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1222

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1223

bias[g * group_output_channels() + oc];

1224

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1229

} else {

1230

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1231

}

1232

for (size_t i = 0; i < batch_size(); i++) {

1233

for (size_t oy = 0; oy < output_height(); oy++) {

1234

for (size_t ox = 0; ox < output_width(); ox++) {

1235

for (size_t ky = 0; ky < kernel_height(); ky++) {

1236

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1237

if (iy < input_height()) {

1238

for (size_t kx = 0; kx < kernel_width(); kx++) {

1239

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1240

if (ix < input_width()) {

1241

for (size_t g = 0; g < groups(); g++) {

1242

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1243

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1244

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1245

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1246

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

1259

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

1260

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

1261

1262

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

1263

const uint8_t output_zero_point = uint8_t(std::max(std::min(

1264

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

1265

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

1266

1267

// Renormalize reference results.

1268

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

1269

[this, output_scale, output_zero_point](int32_t x) -> double {

1270

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

1271

});

1272

1273

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1274

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1275

xnn_operator_t convolution_op = nullptr;

1276

1277

ASSERT_EQ(xnn_status_success,

1278

xnn_create_convolution2d_nhwc_q8(

1279

padding_top(), padding_right(), padding_bottom(), padding_left(),

1280

kernel_height(), kernel_width(),

1281

subsampling_height(), subsampling_width(),

1282

dilation_height(), dilation_width(),

1283

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1284

input_channel_stride(), output_channel_stride(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1285

input_zero_point, 1.0f /* input scale */,

1286

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1287

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1288

output_zero_point, output_scale, qmin(), qmax(),

1289

0, &convolution_op));

1290

1291

// Smart pointer to automatically delete convolution_op.

1292

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1293

1294

ASSERT_EQ(xnn_status_success,

1295

xnn_setup_convolution2d_nhwc_q8(

1296

convolution_op,

1297

batch_size(), input_height(), input_width(),

1298

input.data(), output.data(),

1299

nullptr /* thread pool */));

1300

1301

ASSERT_EQ(xnn_status_success,

1302

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1303

1304

// Verify results of the first run.

1305

for (size_t i = 0; i < batch_size(); i++) {

1306

for (size_t y = 0; y < output_height(); y++) {

1307

for (size_t x = 0; x < output_width(); x++) {

1308

for (size_t g = 0; g < groups(); g++) {

1309

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1310

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1311

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1312

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1313

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1314

ASSERT_NEAR(

1315

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1316

double(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1317

0.9)

1318

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1326

std::generate(input.begin(), input.end(), std::ref(u8rng));

1327

std::fill(output.begin(), output.end(), 0xA5);

1328

1329

// Compute reference results for the second run, including renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1330

if (has_bias()) {

1331

for (size_t i = 0; i < next_batch_size(); i++) {

1332

for (size_t oy = 0; oy < next_output_height(); oy++) {

1333

for (size_t ox = 0; ox < next_output_width(); ox++) {

1334

for (size_t g = 0; g < groups(); g++) {

1335

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1336

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1337

bias[g * group_output_channels() + oc];

1338

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1343

} else {

1344

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1345

}

1346

for (size_t i = 0; i < next_batch_size(); i++) {

1347

for (size_t oy = 0; oy < next_output_height(); oy++) {

1348

for (size_t ox = 0; ox < next_output_width(); ox++) {

1349

for (size_t ky = 0; ky < kernel_height(); ky++) {

1350

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1351

if (iy < next_input_height()) {

1352

for (size_t kx = 0; kx < kernel_width(); kx++) {

1353

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1354

if (ix < next_input_width()) {

1355

for (size_t g = 0; g < groups(); g++) {

1356

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1357

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1358

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1359

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1360

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

1372

[this, output_scale, output_zero_point](int32_t x) -> double {

1373

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

1374

});

1375

1376

// Setup and run Convolution operator the second time, and destroy the operator.

1377

ASSERT_EQ(xnn_status_success,

1378

xnn_setup_convolution2d_nhwc_q8(

1379

convolution_op,

1380

next_batch_size(), next_input_height(), next_input_width(),

1381

input.data(), output.data(),

1382

nullptr /* thread pool */));

1383

1384

ASSERT_EQ(xnn_status_success,

1385

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1386

1387

// Verify results of the second run.

1388

for (size_t i = 0; i < next_batch_size(); i++) {

1389

for (size_t y = 0; y < next_output_height(); y++) {

1390

for (size_t x = 0; x < next_output_width(); x++) {

1391

for (size_t g = 0; g < groups(); g++) {

1392

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1393

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1394

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1395

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1396

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1397

ASSERT_NEAR(

1398

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1399

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1400

0.9)

1401

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Frank Barchard

2020-06-26 14:07:19 -0700

[diff] [blame^]

1410

void TestSetupNHWCxF16() const {

1411

ASSERT_FALSE(depthwise_layout());

1412

1413

std::random_device random_device;

1414

auto rng = std::mt19937(random_device());

1415

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

1416

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

1417

1418

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + std::max(

1419

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1420

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

1421

std::vector<uint16_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1422

std::vector<uint16_t> bias(groups() * group_output_channels());

1423

std::vector<uint16_t> output(std::max(

1424

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1425

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

1426

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1427

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1428

1429

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1430

std::generate(input.begin(), input.end(), std::ref(f16rng));

1431

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

1432

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

1433

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1434

1435

// Compute reference results, without clamping.

1436

if (has_bias()) {

1437

for (size_t i = 0; i < batch_size(); i++) {

1438

for (size_t oy = 0; oy < output_height(); oy++) {

1439

for (size_t ox = 0; ox < output_width(); ox++) {

1440

for (size_t g = 0; g < groups(); g++) {

1441

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1442

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1443

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

1451

}

1452

for (size_t i = 0; i < batch_size(); i++) {

1453

for (size_t oy = 0; oy < output_height(); oy++) {

1454

for (size_t ox = 0; ox < output_width(); ox++) {

1455

for (size_t ky = 0; ky < kernel_height(); ky++) {

1456

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1457

if (iy < input_height()) {

1458

for (size_t kx = 0; kx < kernel_width(); kx++) {

1459

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1460

if (ix < input_width()) {

1461

for (size_t g = 0; g < groups(); g++) {

1462

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1463

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1464

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1465

fp16_ieee_to_fp32_value(input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1466

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1479

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1480

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1481

const float accumulated_range = accumulated_max - accumulated_min;

1482

const float scaled_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

1483

const float scaled_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

1484

const float output_min = scaled_min == scaled_max ? -std::numeric_limits<float>::infinity() : scaled_min;

1485

const float output_max = scaled_min == scaled_max ? +std::numeric_limits<float>::infinity() : scaled_max;

1486

1487

for (float& output_value : output_ref) {

1488

output_value = std::min(std::max(output_value, output_min), output_max);

1489

}

1490

1491

// Create, setup, and run Convolution operator once.

1492

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

1493

xnn_operator_t convolution_op = nullptr;

1494

1495

xnn_status status = xnn_create_convolution2d_nhwc_f16(

1496

padding_top(), padding_right(), padding_bottom(), padding_left(),

1497

kernel_height(), kernel_width(),

1498

subsampling_height(), subsampling_width(),

1499

dilation_height(), dilation_width(),

1500

groups(), group_input_channels(), group_output_channels(),

1501

input_channel_stride(), output_channel_stride(),

1502

kernel.data(), has_bias() ? bias.data() : nullptr,

1503

output_min, output_max,

1504

0, &convolution_op);

1505

1506

if (status == xnn_status_unsupported_hardware) {

1507

GTEST_SKIP();

1508

}

1509

ASSERT_EQ(xnn_status_success, status);

1510

ASSERT_NE(nullptr, convolution_op);

1511

1512

// Smart pointer to automatically delete convolution_op.

1513

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1514

1515

ASSERT_EQ(xnn_status_success,

1516

xnn_setup_convolution2d_nhwc_f16(

1517

convolution_op,

1518

batch_size(), input_height(), input_width(),

1519

input.data(), output.data(),

1520

nullptr /* thread pool */));

1521

1522

ASSERT_EQ(xnn_status_success,

1523

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1524

1525

// Verify results of the first run.

1526

for (size_t i = 0; i < batch_size(); i++) {

1527

for (size_t y = 0; y < output_height(); y++) {

1528

for (size_t x = 0; x < output_width(); x++) {

1529

for (size_t g = 0; g < groups(); g++) {

1530

for (size_t c = 0; c < group_output_channels(); c++) {

1531

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

1532

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1533

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

1534

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1535

ASSERT_NEAR(

1536

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

1537

fp16_ieee_to_fp32_value(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

1538

1.0e-2 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

1539

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1547

std::generate(input.begin(), input.end(), std::ref(f16rng));

1548

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

1549

1550

// Compute reference results for the second run, including clamping.

1551

if (has_bias()) {

1552

for (size_t i = 0; i < next_batch_size(); i++) {

1553

for (size_t oy = 0; oy < next_output_height(); oy++) {

1554

for (size_t ox = 0; ox < next_output_width(); ox++) {

1555

for (size_t g = 0; g < groups(); g++) {

1556

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1557

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1558

fp16_ieee_to_fp32_value(bias[g * group_output_channels() + oc]);

}

}

}

}

}

} else {

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

1566

}

1567

for (size_t i = 0; i < next_batch_size(); i++) {

1568

for (size_t oy = 0; oy < next_output_height(); oy++) {

1569

for (size_t ox = 0; ox < next_output_width(); ox++) {

1570

for (size_t ky = 0; ky < kernel_height(); ky++) {

1571

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1572

if (iy < next_input_height()) {

1573

for (size_t kx = 0; kx < kernel_width(); kx++) {

1574

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1575

if (ix < next_input_width()) {

1576

for (size_t g = 0; g < groups(); g++) {

1577

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1578

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1579

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1580

fp16_ieee_to_fp32_value(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic]) *

1581

fp16_ieee_to_fp32_value(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]);

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

1593

value = std::max(std::min(value, output_max), output_min);

1594

}

1595

1596

// Setup and run Convolution operator the second time, and destroy the operator.

1597

ASSERT_EQ(xnn_status_success,

1598

xnn_setup_convolution2d_nhwc_f16(

1599

convolution_op,

1600

next_batch_size(), next_input_height(), next_input_width(),

1601

input.data(), output.data(),

1602

nullptr /* thread pool */));

1603

1604

ASSERT_EQ(xnn_status_success,

1605

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1606

1607

// Verify results of the second run.

1608

for (size_t i = 0; i < next_batch_size(); i++) {

1609

for (size_t y = 0; y < next_output_height(); y++) {

1610

for (size_t x = 0; x < next_output_width(); x++) {

1611

for (size_t g = 0; g < groups(); g++) {

1612

for (size_t c = 0; c < group_output_channels(); c++) {

1613

ASSERT_GE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_min)

1614

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1615

ASSERT_LE(fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]), output_max)

1616

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1617

ASSERT_NEAR(

1618

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

1619

fp16_ieee_to_fp32_value(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c]),

1620

1.0e-2 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))

1621

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1630

void TestSetupNHWCxF32() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1631

ASSERT_FALSE(depthwise_layout());

1632

1633

std::random_device random_device;

1634

auto rng = std::mt19937(random_device());

1635

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

1636

1637

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1638

batch_size() * ((input_height() * input_width() - 1) * input_channel_stride() + groups() * group_input_channels()),

1639

next_batch_size() * ((next_input_height() * next_input_width() - 1) * input_channel_stride() + groups() * group_input_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1640

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

1641

std::vector<float> bias(groups() * group_output_channels());

1642

std::vector<float> output(std::max(

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1643

batch_size() * ((output_height() * output_width() - 1) * output_channel_stride() + groups() * group_output_channels()),

1644

next_batch_size() * ((next_output_height() * next_output_width() - 1) * output_channel_stride() + groups() * group_output_channels())));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1645

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1646

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1647

1648

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1649

std::generate(input.begin(), input.end(), std::ref(f32rng));

1650

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

1651

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

1652

std::fill(output.begin(), output.end(), nanf(""));

1653

1654

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1655

if (has_bias()) {

1656

for (size_t i = 0; i < batch_size(); i++) {

1657

for (size_t oy = 0; oy < output_height(); oy++) {

1658

for (size_t ox = 0; ox < output_width(); ox++) {

1659

for (size_t g = 0; g < groups(); g++) {

1660

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1661

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1662

bias[g * group_output_channels() + oc];

1663

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1668

} else {

1669

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1670

}

1671

for (size_t i = 0; i < batch_size(); i++) {

1672

for (size_t oy = 0; oy < output_height(); oy++) {

1673

for (size_t ox = 0; ox < output_width(); ox++) {

1674

for (size_t ky = 0; ky < kernel_height(); ky++) {

1675

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1676

if (iy < input_height()) {

1677

for (size_t kx = 0; kx < kernel_width(); kx++) {

1678

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1679

if (ix < input_width()) {

1680

for (size_t g = 0; g < groups(); g++) {

1681

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1682

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1683

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1684

input[((i * input_height() + iy) * input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1685

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1698

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1699

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1700

1701

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

1702

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

1703

1704

// Clamp reference results.

1705

for (float& value : output_ref) {

1706

value = std::max(std::min(value, output_max), output_min);

1707

}

1708

1709

// Create, setup, and run Convolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1710

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1711

xnn_operator_t convolution_op = nullptr;

1712

1713

ASSERT_EQ(xnn_status_success,

1714

xnn_create_convolution2d_nhwc_f32(

1715

padding_top(), padding_right(), padding_bottom(), padding_left(),

1716

kernel_height(), kernel_width(),

1717

subsampling_height(), subsampling_width(),

1718

dilation_height(), dilation_width(),

1719

groups(), group_input_channels(), group_output_channels(),

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1720

input_channel_stride(), output_channel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1721

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1722

output_min, output_max,

1723

0, &convolution_op));

1724

1725

// Smart pointer to automatically delete convolution_op.

1726

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_convolution_op(convolution_op, xnn_delete_operator);

1727

1728

ASSERT_EQ(xnn_status_success,

1729

xnn_setup_convolution2d_nhwc_f32(

1730

convolution_op,

1731

batch_size(), input_height(), input_width(),

1732

input.data(), output.data(),

1733

nullptr /* thread pool */));

1734

1735

ASSERT_EQ(xnn_status_success,

1736

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1737

1738

// Verify results of the first run.

1739

for (size_t i = 0; i < batch_size(); i++) {

1740

for (size_t y = 0; y < output_height(); y++) {

1741

for (size_t x = 0; x < output_width(); x++) {

1742

for (size_t g = 0; g < groups(); g++) {

1743

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1744

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1745

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1746

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1747

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1748

ASSERT_NEAR(

1749

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1750

output[((i * output_height() + y) * output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1751

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

1752

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1760

std::generate(input.begin(), input.end(), std::ref(f32rng));

1761

std::fill(output.begin(), output.end(), nanf(""));

1762

1763

// Compute reference results for the second run, including clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1764

if (has_bias()) {

1765

for (size_t i = 0; i < next_batch_size(); i++) {

1766

for (size_t oy = 0; oy < next_output_height(); oy++) {

1767

for (size_t ox = 0; ox < next_output_width(); ox++) {

1768

for (size_t g = 0; g < groups(); g++) {

1769

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1770

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1771

bias[g * group_output_channels() + oc];

1772

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1777

} else {

1778

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1779

}

1780

for (size_t i = 0; i < next_batch_size(); i++) {

1781

for (size_t oy = 0; oy < next_output_height(); oy++) {

1782

for (size_t ox = 0; ox < next_output_width(); ox++) {

1783

for (size_t ky = 0; ky < kernel_height(); ky++) {

1784

const size_t iy = oy * subsampling_height() + ky * dilation_height() - padding_top();

1785

if (iy < next_input_height()) {

1786

for (size_t kx = 0; kx < kernel_width(); kx++) {

1787

const size_t ix = ox * subsampling_width() + kx * dilation_width() - padding_left();

1788

if (ix < next_input_width()) {

1789

for (size_t g = 0; g < groups(); g++) {

1790

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1791

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1792

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1793

input[((i * next_input_height() + iy) * next_input_width() + ix) * input_channel_stride() + g * group_input_channels() + ic] *

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1794

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

1806

value = std::max(std::min(value, output_max), output_min);

1807

}

1808

1809

// Setup and run Convolution operator the second time, and destroy the operator.

1810

ASSERT_EQ(xnn_status_success,

1811

xnn_setup_convolution2d_nhwc_f32(

1812

convolution_op,

1813

next_batch_size(), next_input_height(), next_input_width(),

1814

input.data(), output.data(),

1815

nullptr /* thread pool */));

1816

1817

ASSERT_EQ(xnn_status_success,

1818

xnn_run_operator(convolution_op, nullptr /* thread pool */));

1819

1820

// Verify results of the second run.

1821

for (size_t i = 0; i < next_batch_size(); i++) {

1822

for (size_t y = 0; y < next_output_height(); y++) {

1823

for (size_t x = 0; x < next_output_width(); x++) {

1824

for (size_t g = 0; g < groups(); g++) {

1825

for (size_t c = 0; c < group_output_channels(); c++) {

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1826

ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_min)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1827

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1828

ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c], output_max)

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1829

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1830

ASSERT_NEAR(

1831

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1832

output[((i * next_output_height() + y) * next_output_width() + x) * output_channel_stride() + g * group_output_channels() + c],

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1833

1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))

1834

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

private:

uint32_t padding_top_{0};

1845

uint32_t padding_right_{0};

1846

uint32_t padding_bottom_{0};

1847

uint32_t padding_left_{0};

Marat Dukhan

2019-10-24 12:46:13 -0700

[diff] [blame]

1848

bool padding_tf_same_{false};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1849

size_t input_height_{1};

1850

size_t input_width_{1};

1851

uint32_t groups_{1};

1852

size_t group_input_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1853

size_t input_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1854

size_t group_output_channels_{1};

Marat Dukhan

2020-06-18 07:56:25 -0700

[diff] [blame]

1855

size_t output_channel_stride_{0};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1856

size_t batch_size_{1};

1857

uint32_t kernel_height_{1};

1858

uint32_t kernel_width_{1};

1859

uint32_t dilation_height_{1};

1860

uint32_t dilation_width_{1};

1861

uint32_t subsampling_height_{1};

1862

uint32_t subsampling_width_{1};

1863

size_t next_input_height_{0};

1864

size_t next_input_width_{0};

1865

size_t next_batch_size_{0};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1866

float sparsity_{0.0f};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1867

uint8_t qmin_{0};

1868

uint8_t qmax_{255};

1869

bool depthwise_layout_{false};

Marat Dukhan

2019-11-18 09:25:38 -0800

[diff] [blame]

1870

bool force_nhwc_input_{false};

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1871

bool has_bias_{true};

XNNPACK Team