Blame - test/deconvolution-operator-tester.h - platform/external/XNNPACK

2019-11-19 12:36:47 -0800

[diff] [blame]

570

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

571

xnn_operator_t deconvolution_op = nullptr;

572

573

ASSERT_EQ(xnn_status_success,

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

574

xnn_create_deconvolution2d_nhwc_qu8(

Marat Dukhan

2020-04-28 15:03:28 -0700

[diff] [blame]

575

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

576

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

577

kernel_height(), kernel_width(),

578

stride_height(), stride_width(),

579

dilation_height(), dilation_width(),

580

groups(), group_input_channels(), group_output_channels(),

581

input_pixel_stride(), output_pixel_stride(),

582

input_zero_point, 1.0f /* input scale */,

583

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

584

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

585

output_zero_point, output_scale, qmin(), qmax(),

Marat Dukhan

2020-04-28 15:03:28 -0700

[diff] [blame]

586

padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,

587

&deconvolution_op));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

588

589

// Smart pointer to automatically delete deconvolution_op.

590

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);

591

592

ASSERT_EQ(xnn_status_success,

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

593

xnn_setup_deconvolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

594

deconvolution_op,

595

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-05 12:25:18 -0800

[diff] [blame]

596

adjustment_height(), adjustment_width(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

597

input.data(), output.data(),

598

nullptr /* thread pool */));

599

600

ASSERT_EQ(xnn_status_success,

601

xnn_run_operator(deconvolution_op, nullptr /* thread pool */));

602

603

// Verify results.

604

for (size_t i = 0; i < batch_size(); i++) {

605

for (size_t y = 0; y < output_height(); y++) {

606

for (size_t x = 0; x < output_width(); x++) {

607

for (size_t g = 0; g < groups(); g++) {

608

for (size_t c = 0; c < group_output_channels(); c++) {

609

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

610

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

611

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

612

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

613

ASSERT_NEAR(

614

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

615

double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

616

0.9)

617

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

void TestF32() const {

627

std::random_device random_device;

628

auto rng = std::mt19937(random_device());

629

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

630

631

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) +

632

(batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels());

633

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

634

std::vector<float> bias(groups() * group_output_channels());

635

std::vector<float> output((batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels());

636

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

637

638

for (size_t iteration = 0; iteration < iterations(); iteration++) {

639

std::generate(input.begin(), input.end(), std::ref(f32rng));

640

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

641

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

642

std::fill(output.begin(), output.end(), nanf(""));

643

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

644

645

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

646

if (has_bias()) {

647

for (size_t i = 0; i < batch_size(); i++) {

648

for (size_t oy = 0; oy < output_height(); oy++) {

649

for (size_t ox = 0; ox < output_width(); ox++) {

650

for (size_t g = 0; g < groups(); g++) {

651

for (size_t oc = 0; oc < group_output_channels(); oc++) {

652

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

653

bias[g * group_output_channels() + oc];

654

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

659

} else {

660

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

661

}

662

for (size_t i = 0; i < batch_size(); i++) {

663

for (size_t oy = 0; oy < output_height(); oy++) {

664

for (size_t ox = 0; ox < output_width(); ox++) {

665

for (size_t ky = 0; ky < kernel_height(); ky++) {

666

const size_t y = oy + padding_top() - ky * dilation_height();

667

const size_t iy = y / stride_height();

668

if (iy * stride_height() == y && iy < input_height()) {

669

for (size_t kx = 0; kx < kernel_width(); kx++) {

670

const size_t x = ox + padding_left() - kx * dilation_width();

671

const size_t ix = x / stride_width();

672

if (ix * stride_width() == x && ix < input_width()) {

673

for (size_t g = 0; g < groups(); g++) {

674

for (size_t oc = 0; oc < group_output_channels(); oc++) {

675

for (size_t ic = 0; ic < group_input_channels(); ic++) {

676

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

677

input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *

678

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

691

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

692

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

693

Marat Dukhan

869c62d

2020-04-09 17:17:55 -0700

[diff] [blame]

694

const float output_min = qmin() == 0 ? -std::numeric_limits<float>::infinity() :

695

accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

696

const float output_max = qmax() == 255 ? std::numeric_limits<float>::infinity() :

697

accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

698

699

// Clamp reference results.

700

for (float& value : output_ref) {

701

value = std::max(std::min(value, output_max), output_min);

702

}

703

704

// Create, setup, run, and destroy Deconvolution operator.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

705

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

706

xnn_operator_t deconvolution_op = nullptr;

707

708

ASSERT_EQ(xnn_status_success,

709

xnn_create_deconvolution2d_nhwc_f32(

Marat Dukhan

2020-04-28 15:03:28 -0700

[diff] [blame]

710

padding_tf_same() ? 0 : padding_top(), padding_tf_same() ? 0 : padding_right(),

711

padding_tf_same() ? 0 : padding_bottom(), padding_tf_same() ? 0 : padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

712

kernel_height(), kernel_width(),

713

stride_height(), stride_width(),

714

dilation_height(), dilation_width(),

715

groups(), group_input_channels(), group_output_channels(),

716

input_pixel_stride(), output_pixel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

717

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

718

output_min, output_max,

Marat Dukhan

2020-04-28 15:03:28 -0700

[diff] [blame]

719

padding_tf_same() ? XNN_FLAG_TENSORFLOW_SAME_PADDING : 0,

720

&deconvolution_op));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

721

722

// Smart pointer to automatically delete deconvolution_op.

723

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);

724

725

ASSERT_EQ(xnn_status_success,

726

xnn_setup_deconvolution2d_nhwc_f32(

727

deconvolution_op,

728

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-05 12:25:18 -0800

[diff] [blame]

729

adjustment_height(), adjustment_width(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

730

input.data(), output.data(),

731

nullptr /* thread pool */));

732

733

ASSERT_EQ(xnn_status_success,

734

xnn_run_operator(deconvolution_op, nullptr /* thread pool */));

735

736

// Verify results.

737

for (size_t i = 0; i < batch_size(); i++) {

738

for (size_t y = 0; y < output_height(); y++) {

739

for (size_t x = 0; x < output_width(); x++) {

740

for (size_t g = 0; g < groups(); g++) {

741

for (size_t c = 0; c < group_output_channels(); c++) {

742

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)

743

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

744

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)

745

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

746

ASSERT_NEAR(

747

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

748

output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],

749

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

750

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

759

void TestSetupQU8() const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

760

std::random_device random_device;

761

auto rng = std::mt19937(random_device());

Marat Dukhan

ecd8311

2020-08-03 21:50:28 -0700

[diff] [blame]

762

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

763

auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

764

765

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + std::max(

766

(batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels(),

767

(next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));

768

std::vector<uint8_t> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

769

std::vector<int32_t> bias(groups() * group_output_channels());

770

std::vector<uint8_t> output(std::max(

771

(batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels(),

772

(next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));

773

std::vector<int32_t> accumulators(batch_size() * output_height() * output_width() * groups() * group_output_channels());

774

std::vector<double> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

775

std::vector<int32_t> next_accumulators(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

776

std::vector<double> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

777

778

const uint8_t input_zero_point = 127;

779

const uint8_t kernel_zero_point = 127;

780

781

for (size_t iteration = 0; iteration < iterations(); iteration++) {

782

std::generate(input.begin(), input.end(), std::ref(u8rng));

783

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

Marat Dukhan

ecd8311

2020-08-03 21:50:28 -0700

[diff] [blame]

784

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

785

std::fill(output.begin(), output.end(), 0xA5);

786

787

// Compute reference results, without renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

788

if (has_bias()) {

789

for (size_t i = 0; i < batch_size(); i++) {

790

for (size_t oy = 0; oy < output_height(); oy++) {

791

for (size_t ox = 0; ox < output_width(); ox++) {

792

for (size_t g = 0; g < groups(); g++) {

793

for (size_t oc = 0; oc < group_output_channels(); oc++) {

794

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

795

bias[g * group_output_channels() + oc];

796

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

801

} else {

802

std::fill(accumulators.begin(), accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

803

}

804

for (size_t i = 0; i < batch_size(); i++) {

805

for (size_t oy = 0; oy < output_height(); oy++) {

806

for (size_t ox = 0; ox < output_width(); ox++) {

807

for (size_t ky = 0; ky < kernel_height(); ky++) {

808

const size_t y = oy + padding_top() - ky * dilation_height();

809

const size_t iy = y / stride_height();

810

if (iy * stride_height() == y && iy < input_height()) {

811

for (size_t kx = 0; kx < kernel_width(); kx++) {

812

const size_t x = ox + padding_left() - kx * dilation_width();

813

const size_t ix = x / stride_width();

814

if (ix * stride_width() == x && ix < input_width()) {

815

for (size_t g = 0; g < groups(); g++) {

816

for (size_t oc = 0; oc < group_output_channels(); oc++) {

817

for (size_t ic = 0; ic < group_input_channels(); ic++) {

818

accumulators[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

819

(int32_t(input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

820

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

// Compute renormalization parameters.

833

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

834

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

835

836

const double output_scale = double(uint32_t(accumulated_max - accumulated_min)) / 255.0;

837

const uint8_t output_zero_point = uint8_t(std::max(std::min(

838

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

839

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

840

841

// Renormalize reference results.

842

std::transform(accumulators.cbegin(), accumulators.cend(), output_ref.begin(),

843

[this, output_scale, output_zero_point](int32_t x) -> double {

844

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

845

});

846

847

// Create, setup, and run Deconvolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

848

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

849

xnn_operator_t deconvolution_op = nullptr;

850

851

ASSERT_EQ(xnn_status_success,

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

852

xnn_create_deconvolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

853

padding_top(), padding_right(), padding_bottom(), padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

854

kernel_height(), kernel_width(),

855

stride_height(), stride_width(),

856

dilation_height(), dilation_width(),

857

groups(), group_input_channels(), group_output_channels(),

858

input_pixel_stride(), output_pixel_stride(),

859

input_zero_point, 1.0f /* input scale */,

860

kernel_zero_point, 1.0f /* kernel scale */,

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

861

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

862

output_zero_point, output_scale, qmin(), qmax(),

863

0, &deconvolution_op));

864

865

// Smart pointer to automatically delete deconvolution_op.

866

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);

867

868

ASSERT_EQ(xnn_status_success,

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

869

xnn_setup_deconvolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

870

deconvolution_op,

871

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-05 12:25:18 -0800

[diff] [blame]

872

adjustment_height(), adjustment_width(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

873

input.data(), output.data(),

874

nullptr /* thread pool */));

875

876

ASSERT_EQ(xnn_status_success,

877

xnn_run_operator(deconvolution_op, nullptr /* thread pool */));

878

879

// Verify results of the first run.

880

for (size_t i = 0; i < batch_size(); i++) {

881

for (size_t y = 0; y < output_height(); y++) {

882

for (size_t x = 0; x < output_width(); x++) {

883

for (size_t g = 0; g < groups(); g++) {

884

for (size_t c = 0; c < group_output_channels(); c++) {

885

ASSERT_LE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

886

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

887

ASSERT_GE(int32_t(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

888

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

889

ASSERT_NEAR(

890

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

891

double(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

892

0.9)

893

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

901

std::generate(input.begin(), input.end(), std::ref(u8rng));

902

std::fill(output.begin(), output.end(), 0xA5);

903

904

// Compute reference results for the second run, including renormalization.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

905

if (has_bias()) {

906

for (size_t i = 0; i < next_batch_size(); i++) {

907

for (size_t oy = 0; oy < next_output_height(); oy++) {

908

for (size_t ox = 0; ox < next_output_width(); ox++) {

909

for (size_t g = 0; g < groups(); g++) {

910

for (size_t oc = 0; oc < group_output_channels(); oc++) {

911

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

912

bias[g * group_output_channels() + oc];

913

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

918

} else {

919

std::fill(next_accumulators.begin(), next_accumulators.end(), 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

920

}

921

for (size_t i = 0; i < next_batch_size(); i++) {

922

for (size_t oy = 0; oy < next_output_height(); oy++) {

923

for (size_t ox = 0; ox < next_output_width(); ox++) {

924

for (size_t ky = 0; ky < kernel_height(); ky++) {

925

const size_t y = oy + padding_top() - ky * dilation_height();

926

const size_t iy = y / stride_height();

927

if (iy * stride_height() == y && iy < next_input_height()) {

928

for (size_t kx = 0; kx < kernel_width(); kx++) {

929

const size_t x = ox + padding_left() - kx * dilation_width();

930

const size_t ix = x / stride_width();

931

if (ix * stride_width() == x && ix < next_input_width()) {

932

for (size_t g = 0; g < groups(); g++) {

933

for (size_t oc = 0; oc < group_output_channels(); oc++) {

934

for (size_t ic = 0; ic < group_input_channels(); ic++) {

935

next_accumulators[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

936

(int32_t(input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic]) - int32_t(input_zero_point)) *

937

(int32_t(kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic]) - int32_t(kernel_zero_point));

}

}

}

}

}

}

}

}

}

}

std::transform(next_accumulators.cbegin(), next_accumulators.cend(), next_output_ref.begin(),

949

[this, output_scale, output_zero_point](int32_t x) -> double {

950

return std::max<double>(std::min<double>(double(x) / output_scale, double(qmax()) - output_zero_point), double(qmin()) - output_zero_point);

951

});

952

953

// Setup and run Deconvolution operator the second time, and destroy the operator.

954

ASSERT_EQ(xnn_status_success,

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

955

xnn_setup_deconvolution2d_nhwc_qu8(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

956

deconvolution_op,

957

next_batch_size(), next_input_height(), next_input_width(),

Marat Dukhan

2019-11-05 12:25:18 -0800

[diff] [blame]

958

adjustment_height(), adjustment_width(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

959

input.data(), output.data(),

960

nullptr /* thread pool */));

961

962

ASSERT_EQ(xnn_status_success,

963

xnn_run_operator(deconvolution_op, nullptr /* thread pool */));

964

965

// Verify results of the second run.

966

for (size_t i = 0; i < next_batch_size(); i++) {

967

for (size_t y = 0; y < next_output_height(); y++) {

968

for (size_t x = 0; x < next_output_width(); x++) {

969

for (size_t g = 0; g < groups(); g++) {

970

for (size_t c = 0; c < group_output_channels(); c++) {

971

ASSERT_LE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmax()))

972

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

973

ASSERT_GE(int32_t(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]), int32_t(qmin()))

974

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

975

ASSERT_NEAR(

976

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

977

double(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c]) - double(output_zero_point),

978

0.9)

979

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

void TestSetupF32() const {

989

std::random_device random_device;

990

auto rng = std::mt19937(random_device());

991

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

992

993

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + std::max(

994

(batch_size() * input_height() * input_width() - 1) * input_pixel_stride() + groups() * group_input_channels(),

995

(next_batch_size() * next_input_height() * next_input_width() - 1) * input_pixel_stride() + groups() * group_input_channels()));

996

std::vector<float> kernel(groups() * group_output_channels() * kernel_height() * kernel_width() * group_input_channels());

997

std::vector<float> bias(groups() * group_output_channels());

998

std::vector<float> output(std::max(

999

(batch_size() * output_height() * output_width() - 1) * output_pixel_stride() + groups() * group_output_channels(),

1000

(next_batch_size() * next_output_height() * next_output_width() - 1) * output_pixel_stride() + groups() * group_output_channels()));

1001

std::vector<float> output_ref(batch_size() * output_height() * output_width() * groups() * group_output_channels());

1002

std::vector<float> next_output_ref(next_batch_size() * next_output_height() * next_output_width() * groups() * group_output_channels());

1003

1004

for (size_t iteration = 0; iteration < iterations(); iteration++) {

1005

std::generate(input.begin(), input.end(), std::ref(f32rng));

1006

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

1007

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

1008

std::fill(output.begin(), output.end(), nanf(""));

1009

1010

// Compute reference results, without clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1011

if (has_bias()) {

1012

for (size_t i = 0; i < batch_size(); i++) {

1013

for (size_t oy = 0; oy < output_height(); oy++) {

1014

for (size_t ox = 0; ox < output_width(); ox++) {

1015

for (size_t g = 0; g < groups(); g++) {

1016

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1017

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1018

bias[g * group_output_channels() + oc];

1019

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1024

} else {

1025

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1026

}

1027

for (size_t i = 0; i < batch_size(); i++) {

1028

for (size_t oy = 0; oy < output_height(); oy++) {

1029

for (size_t ox = 0; ox < output_width(); ox++) {

1030

for (size_t ky = 0; ky < kernel_height(); ky++) {

1031

const size_t y = oy + padding_top() - ky * dilation_height();

1032

const size_t iy = y / stride_height();

1033

if (iy * stride_height() == y && iy < input_height()) {

1034

for (size_t kx = 0; kx < kernel_width(); kx++) {

1035

const size_t x = ox + padding_left() - kx * dilation_width();

1036

const size_t ix = x / stride_width();

1037

if (ix * stride_width() == x && ix < input_width()) {

1038

for (size_t g = 0; g < groups(); g++) {

1039

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1040

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1041

output_ref[(((i * output_height() + oy) * output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1042

input[((i * input_height() + iy) * input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *

1043

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

// Compute clamping parameters.

1056

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

1057

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

1058

1059

const float output_min = accumulated_min + (accumulated_max - accumulated_min) / 255.0f * float(qmin());

1060

const float output_max = accumulated_max - (accumulated_max - accumulated_min) / 255.0f * float(255 - qmax());

1061

1062

// Clamp reference results.

1063

for (float& value : output_ref) {

1064

value = std::max(std::min(value, output_max), output_min);

1065

}

1066

1067

// Create, setup, and run Deconvolution operator once.

Marat Dukhan

2019-11-19 12:36:47 -0800

[diff] [blame]

1068

ASSERT_EQ(xnn_status_success, xnn_initialize(nullptr /* allocator */));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1069

xnn_operator_t deconvolution_op = nullptr;

1070

1071

ASSERT_EQ(xnn_status_success,

1072

xnn_create_deconvolution2d_nhwc_f32(

1073

padding_top(), padding_right(), padding_bottom(), padding_left(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1074

kernel_height(), kernel_width(),

1075

stride_height(), stride_width(),

1076

dilation_height(), dilation_width(),

1077

groups(), group_input_channels(), group_output_channels(),

1078

input_pixel_stride(), output_pixel_stride(),

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1079

kernel.data(), has_bias() ? bias.data() : nullptr,

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1080

output_min, output_max,

1081

0, &deconvolution_op));

1082

1083

// Smart pointer to automatically delete deconvolution_op.

1084

std::unique_ptr<xnn_operator, decltype(&xnn_delete_operator)> auto_deconvolution_op(deconvolution_op, xnn_delete_operator);

1085

1086

ASSERT_EQ(xnn_status_success,

1087

xnn_setup_deconvolution2d_nhwc_f32(

1088

deconvolution_op,

1089

batch_size(), input_height(), input_width(),

Marat Dukhan

2019-11-05 12:25:18 -0800

[diff] [blame]

1090

adjustment_height(), adjustment_width(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1091

input.data(), output.data(),

1092

nullptr /* thread pool */));

1093

1094

ASSERT_EQ(xnn_status_success,

1095

xnn_run_operator(deconvolution_op, nullptr /* thread pool */));

1096

1097

// Verify results of the first run.

1098

for (size_t i = 0; i < batch_size(); i++) {

1099

for (size_t y = 0; y < output_height(); y++) {

1100

for (size_t x = 0; x < output_width(); x++) {

1101

for (size_t g = 0; g < groups(); g++) {

1102

for (size_t c = 0; c < group_output_channels(); c++) {

1103

ASSERT_GE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)

1104

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1105

ASSERT_LE(output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)

1106

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1107

ASSERT_NEAR(

1108

output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c],

1109

output[((i * output_height() + y) * output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],

1110

1.0e-4 * std::abs(output_ref[(((i * output_height() + y) * output_width() + x) * groups() + g) * group_output_channels() + c]))

1111

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

// Re-generate data for the second run.

1119

std::generate(input.begin(), input.end(), std::ref(f32rng));

1120

std::fill(output.begin(), output.end(), nanf(""));

1121

1122

// Compute reference results for the second run, including clamping.

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1123

if (has_bias()) {

1124

for (size_t i = 0; i < next_batch_size(); i++) {

1125

for (size_t oy = 0; oy < next_output_height(); oy++) {

1126

for (size_t ox = 0; ox < next_output_width(); ox++) {

1127

for (size_t g = 0; g < groups(); g++) {

1128

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1129

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] =

1130

bias[g * group_output_channels() + oc];

1131

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1136

} else {

1137

std::fill(next_output_ref.begin(), next_output_ref.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1138

}

1139

for (size_t i = 0; i < next_batch_size(); i++) {

1140

for (size_t oy = 0; oy < next_output_height(); oy++) {

1141

for (size_t ox = 0; ox < next_output_width(); ox++) {

1142

for (size_t ky = 0; ky < kernel_height(); ky++) {

1143

const size_t y = oy + padding_top() - ky * dilation_height();

1144

const size_t iy = y / stride_height();

1145

if (iy * stride_height() == y && iy < next_input_height()) {

1146

for (size_t kx = 0; kx < kernel_width(); kx++) {

1147

const size_t x = ox + padding_left() - kx * dilation_width();

1148

const size_t ix = x / stride_width();

1149

if (ix * stride_width() == x && ix < next_input_width()) {

1150

for (size_t g = 0; g < groups(); g++) {

1151

for (size_t oc = 0; oc < group_output_channels(); oc++) {

1152

for (size_t ic = 0; ic < group_input_channels(); ic++) {

1153

next_output_ref[(((i * next_output_height() + oy) * next_output_width() + ox) * groups() + g) * group_output_channels() + oc] +=

1154

input[((i * next_input_height() + iy) * next_input_width() + ix) * input_pixel_stride() + g * group_input_channels() + ic] *

1155

kernel[(((g * group_output_channels() + oc) * kernel_height() + ky) * kernel_width() + kx) * group_input_channels() + ic];

}

}

}

}

}

}

}

}

}

}

for (float& value : next_output_ref) {

1167

value = std::max(std::min(value, output_max), output_min);

1168

}

1169

1170

// Setup and run Deconvolution operator the second time, and destroy the operator.

1171

ASSERT_EQ(xnn_status_success,

1172

xnn_setup_deconvolution2d_nhwc_f32(

1173

deconvolution_op,

1174

next_batch_size(), next_input_height(), next_input_width(),

Marat Dukhan

2019-11-05 12:25:18 -0800

[diff] [blame]

1175

adjustment_height(), adjustment_width(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1176

input.data(), output.data(),

1177

nullptr /* thread pool */));

1178

1179

ASSERT_EQ(xnn_status_success,

1180

xnn_run_operator(deconvolution_op, nullptr /* thread pool */));

1181

1182

// Verify results of the second run.

1183

for (size_t i = 0; i < next_batch_size(); i++) {

1184

for (size_t y = 0; y < next_output_height(); y++) {

1185

for (size_t x = 0; x < next_output_width(); x++) {

1186

for (size_t g = 0; g < groups(); g++) {

1187

for (size_t c = 0; c < group_output_channels(); c++) {

1188

ASSERT_GE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_min)

1189

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1190

ASSERT_LE(output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c], output_max)

1191

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

1192

ASSERT_NEAR(

1193

next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c],

1194

output[((i * next_output_height() + y) * next_output_width() + x) * output_pixel_stride() + g * group_output_channels() + c],

1195

1.0e-4 * std::abs(next_output_ref[(((i * next_output_height() + y) * next_output_width() + x) * groups() + g) * group_output_channels() + c]))

1196

<< "(x, y) = (" << x << ", " << y << "), group = " << g << ", channel = " << c;

}

}

}

}

}

}

}

private:

uint32_t padding_top_{0};

1207

uint32_t padding_right_{0};

1208

uint32_t padding_bottom_{0};

1209

uint32_t padding_left_{0};

Marat Dukhan

2020-04-28 15:03:28 -0700

[diff] [blame]

1210

bool padding_tf_same_{false};

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

1211

size_t input_height_{1};

1212

size_t input_width_{1};

1213

uint32_t groups_{1};

1214

size_t group_input_channels_{1};

1215

size_t input_pixel_stride_{0};

1216

size_t group_output_channels_{1};

1217

size_t output_pixel_stride_{0};

1218

size_t batch_size_{1};

1219

uint32_t kernel_height_{1};

1220

uint32_t kernel_width_{1};

1221

uint32_t adjustment_height_{0};

1222

uint32_t adjustment_width_{0};

1223

uint32_t dilation_height_{1};

1224

uint32_t dilation_width_{1};

1225

uint32_t stride_height_{1};

1226

uint32_t stride_width_{1};

1227

size_t next_input_height_{0};

1228

size_t next_input_width_{0};

1229

size_t next_batch_size_{0};

1230

uint8_t qmin_{0};

1231

uint8_t qmax_{255};

Marat Dukhan

2019-10-30 09:47:07 -0700

[diff] [blame]

1232

bool has_bias_{true};

XNNPACK Team