Blame - test/dwconv-microkernel-tester.h - platform/external/XNNPACK

2019-09-27 18:10:33 -0700

[diff] [blame]

1

// Copyright (c) Facebook, Inc. and its affiliates.

//

//

// This source code is licensed under the BSD-style license found in the

7

// LICENSE file in the root directory of this source tree.

#pragma once

#include <gtest/gtest.h>

#include <algorithm>

#include <cassert>

#include <cmath>

#include <cstddef>

#include <cstdlib>

#include <functional>

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

19

#include <limits>

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

#include <random>

#include <vector>

Frank Barchard

2020-06-04 20:12:44 -0700

[diff] [blame]

23

#include <fp16.h>

24

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

25

#include <xnnpack.h>

26

#include <xnnpack/AlignedAllocator.h>

27

#include <xnnpack/pack.h>

Marat Dukhan

eeaa7bd

2019-10-25 17:31:25 -0700

[diff] [blame]

28

#include <xnnpack/params-init.h>

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

29

#include <xnnpack/params.h>

30

#include <xnnpack/requantization.h>

31

32

33

class DWConvMicrokernelTester {

public:

enum class Variant {

Native,

Scalar,

};

inline DWConvMicrokernelTester& width(uint32_t width) {

41

assert(width >= 1);

42

this->width_ = width;

return *this;

}

inline uint32_t width() const {

return this->width_;

}

inline DWConvMicrokernelTester& step(uint32_t step) {

assert(step >= 1);

this->step_ = step;

return *this;

}

inline uint32_t step() const {

return this->step_;

}

inline DWConvMicrokernelTester& channels(uint32_t channels) {

61

assert(channels >= 1);

62

this->channels_ = channels;

return *this;

}

inline uint32_t channels() const {

67

return this->channels_;

68

}

69

70

inline DWConvMicrokernelTester& cr(uint32_t cr) {

71

assert(cr != 0);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

this->cr_ = cr;

return *this;

}

inline uint32_t cr() const {

return this->cr_;

}

inline DWConvMicrokernelTester& kr(uint32_t kr) {

assert(kr != 0);

this->kr_ = kr;

return *this;

}

inline uint32_t kr() const {

return this->kr_;

}

inline uint32_t packed_channels() const {

91

return (channels() / cr() + !!(channels() % cr())) * cr();

92

}

93

94

inline DWConvMicrokernelTester& output_stride(uint32_t output_stride) {

95

assert(output_stride != 0);

96

this->output_stride_ = output_stride;

return *this;

}

inline uint32_t output_stride() const {

101

if (this->output_stride_ == 0) {

102

return channels();

103

} else {

104

assert(this->output_stride_ >= channels());

105

return this->output_stride_;

}

}

inline DWConvMicrokernelTester& input_zero_point(uint8_t input_zero_point) {

110

this->input_zero_point_ = input_zero_point;

return *this;

}

inline uint8_t input_zero_point() const {

115

return this->input_zero_point_;

116

}

117

118

inline DWConvMicrokernelTester& kernel_zero_point(uint8_t kernel_zero_point) {

119

this->kernel_zero_point_ = kernel_zero_point;

return *this;

}

inline uint8_t kernel_zero_point() const {

124

return this->kernel_zero_point_;

125

}

126

127

inline DWConvMicrokernelTester& qmin(uint8_t qmin) {

this->qmin_ = qmin;

return *this;

}

inline uint8_t qmin() const {

return this->qmin_;

}

inline DWConvMicrokernelTester& qmax(uint8_t qmax) {

this->qmax_ = qmax;

return *this;

}

inline uint8_t qmax() const {

return this->qmax_;

}

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

145

inline DWConvMicrokernelTester& input_offset(size_t input_offset) {

146

this->input_offset_ = input_offset;

return *this;

}

inline size_t input_offset() const {

151

return this->input_offset_;

152

}

153

154

inline DWConvMicrokernelTester& zero_index(size_t zero_index) {

155

this->zero_index_ = zero_index;

return *this;

}

inline size_t zero_index() const {

160

return this->zero_index_;

161

}

162

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

163

inline DWConvMicrokernelTester& iterations(size_t iterations) {

164

this->iterations_ = iterations;

return *this;

}

inline size_t iterations() const {

169

return this->iterations_;

170

}

171

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

172

void Test(xnn_qu8_dwconv_minmax_unipass_ukernel_function dwconv_minmax, Variant variant = Variant::Native) const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

173

std::random_device random_device;

174

auto rng = std::mt19937(random_device());

Marat Dukhan

ecd8311

2020-08-03 21:50:28 -0700

[diff] [blame]

175

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

Marat Dukhan

5ce30d9

2020-04-14 03:31:26 -0700

[diff] [blame]

176

auto u8rng = std::bind(std::uniform_int_distribution<uint32_t>(0, std::numeric_limits<uint8_t>::max()), rng);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

177

178

std::vector<const uint8_t*> indirection((width() - 1) * step() + kr());

179

std::vector<uint8_t> input(XNN_EXTRA_BYTES / sizeof(uint8_t) + indirection.size() * channels());

180

std::vector<uint8_t> kernel(channels() * kr());

181

std::vector<int32_t> bias(channels());

Marat Dukhan

9594db0

2019-12-05 14:32:37 -0800

[diff] [blame]

182

std::vector<uint8_t, AlignedAllocator<uint8_t, 64>> packed_weights((kr() + sizeof(int32_t) / sizeof(uint8_t)) * packed_channels());

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

183

std::vector<uint8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint8_t));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

184

std::vector<uint8_t> output((width() - 1) * output_stride() + channels());

185

std::vector<int32_t> accumulators(width() * channels());

186

std::vector<uint8_t> output_ref(width() * channels());

187

188

for (size_t iteration = 0; iteration < iterations(); iteration++) {

189

do {

190

std::generate(input.begin(), input.end(), std::ref(u8rng));

191

} while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend()));

192

do {

193

std::generate(kernel.begin(), kernel.end(), std::ref(u8rng));

194

} while (kernel.size() > 1 && *std::max_element(kernel.cbegin(), kernel.cend()) == *std::min_element(kernel.cbegin(), kernel.cend()));

Marat Dukhan

ecd8311

2020-08-03 21:50:28 -0700

[diff] [blame]

195

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

196

std::fill(zero.begin(), zero.end(), input_zero_point());

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

197

std::fill(output.begin(), output.end(), 0xA5);

198

199

std::fill(packed_weights.begin(), packed_weights.end(), 0);

Marat Dukhan

1827af1

2020-07-16 10:58:55 -0700

[diff] [blame]

200

const xnn_qu8_packing_params packing_params = { input_zero_point(), kernel_zero_point() };

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

201

xnn_pack_qu8_dwconv_ghw_w(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

202

kr(), 1, channels(), cr(),

Marat Dukhan

2020-07-06 20:46:13 -0700

[diff] [blame]

203

kernel.data(), bias.data(), packed_weights.data(), &packing_params);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

204

for (size_t i = 0; i < indirection.size(); i++) {

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

205

indirection[i] = input.data() + i * channels() - input_offset();

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

206

}

207

std::shuffle(indirection.begin(), indirection.end(), rng);

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

208

if (zero_index() != SIZE_MAX) {

209

for (size_t i = 0; i < indirection.size(); i += kr()) {

210

indirection[i + zero_index()] = zero.data();

211

}

212

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

213

214

// Compute reference results, without renormalization.

215

for (size_t x = 0; x < width(); x++) {

216

for (size_t c = 0; c < channels(); c++) {

217

float acc = bias[c];

218

for (size_t k = 0; k < kr(); k++) {

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

219

if (indirection[x * step() + k] != zero.data()) {

220

acc +=

221

(int32_t(indirection[x * step() + k][c + input_offset()]) - int32_t(input_zero_point())) *

222

(int32_t(kernel[c * kr() + k]) - int32_t(kernel_zero_point()));

223

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

224

}

225

accumulators[x * channels() + c] = acc;

}

}

// Compute renormalization parameters.

230

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

231

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

232

const uint32_t accumulated_range = uint32_t(accumulated_max) - uint32_t(accumulated_min);

233

const double output_scale = accumulated_range >= 256 ? double(accumulated_range) / 255.0 : 1.00001;

234

const uint8_t output_zero_point = uint8_t(std::max(std::min(

235

lrint(127.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

236

long(std::numeric_limits<uint8_t>::max())), long(std::numeric_limits<uint8_t>::min())));

237

Frank Barchard

9f3a843

2020-06-02 13:59:35 -0700

[diff] [blame]

238

// Prepare parameters.

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

239

const float requantization_scale = 1.0f / float(output_scale);

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

240

union xnn_qu8_gemm_params quantization_params = { };

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

241

switch (variant) {

242

case Variant::Native:

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

243

quantization_params = xnn_init_qu8_gemm_params(

Marat Dukhan

9199246

2020-07-30 00:06:34 -0700

[diff] [blame]

244

kernel_zero_point(), requantization_scale, output_zero_point, qmin(), qmax());

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

245

break;

246

case Variant::Scalar:

Marat Dukhan

2020-07-14 18:17:29 -0700

[diff] [blame]

247

quantization_params = xnn_init_scalar_qu8_gemm_params(

Marat Dukhan

9199246

2020-07-30 00:06:34 -0700

[diff] [blame]

248

kernel_zero_point(), requantization_scale, output_zero_point, qmin(), qmax());

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

249

break;

250

}

Marat Dukhan

ec88e27

2020-07-30 15:02:09 -0700

[diff] [blame]

251

const union xnn_qu8_requantization_params scalar_requantization_params =

252

xnn_init_scalar_qu8_requantization_params(requantization_scale, output_zero_point, qmin(), qmax());

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

253

254

// Renormalize reference results.

255

for (size_t x = 0; x < width(); x++) {

256

for (size_t c = 0; c < channels(); c++) {

Marat Dukhan

5b69f8b

2020-07-24 15:26:48 -0700

[diff] [blame]

257

output_ref[x * channels() + c] = xnn_qu8_requantize_q31(accumulators[x * channels() + c], scalar_requantization_params);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

// Call optimized micro-kernel.

Marat Dukhan

9993660

2020-04-11 16:47:01 -0700

[diff] [blame]

262

dwconv_minmax(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

263

channels(), width(),

264

indirection.data(), packed_weights.data(), output.data(),

265

step() * sizeof(void*),

266

(output_stride() - channels()) * sizeof(uint8_t),

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

267

input_offset() * sizeof(uint8_t), zero.data(),

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

268

&quantization_params);

269

270

// Verify results.

271

for (size_t x = 0; x < width(); x++) {

272

for (size_t c = 0; c < channels(); c++) {

273

ASSERT_GE(uint32_t(output[x * output_stride() + c]), uint32_t(qmin()))

274

<< "x = " << x << ", channel = " << c;

275

ASSERT_LE(uint32_t(output[x * output_stride() + c]), uint32_t(qmax()))

276

<< "x = " << x << ", channel = " << c;

277

ASSERT_EQ(uint32_t(output[x * output_stride() + c]), uint32_t(output_ref[x * channels() + c]))

278

<< "x = " << x << ", channel = " << c << ", accumulator = " << accumulators[x * channels() + c];

}

}

}

}

Marat Dukhan

2020-08-04 13:59:04 -0700

[diff] [blame]

284

void Test(xnn_qs8_dwconv_minmax_unipass_ukernel_function dwconv_minmax, Variant variant = Variant::Native) const {

285

std::random_device random_device;

286

auto rng = std::mt19937(random_device());

287

auto i32rng = std::bind(std::uniform_int_distribution<int32_t>(-10000, 10000), rng);

288

auto i8rng = std::bind(

289

std::uniform_int_distribution<uint32_t>(std::numeric_limits<int8_t>::min(), std::numeric_limits<int8_t>::max()), rng);

290

291

std::vector<const int8_t*> indirection((width() - 1) * step() + kr());

292

std::vector<int8_t> input(XNN_EXTRA_BYTES / sizeof(int8_t) + indirection.size() * channels());

293

std::vector<int8_t> kernel(channels() * kr());

294

std::vector<int32_t> bias(channels());

295

std::vector<int8_t, AlignedAllocator<int8_t, 64>> packed_weights((kr() + sizeof(int32_t) / sizeof(int8_t)) * packed_channels());

296

std::vector<int8_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(int8_t));

297

std::vector<int8_t> output((width() - 1) * output_stride() + channels());

298

std::vector<int32_t> accumulators(width() * channels());

299

std::vector<int8_t> output_ref(width() * channels());

300

301

for (size_t iteration = 0; iteration < iterations(); iteration++) {

302

do {

303

std::generate(input.begin(), input.end(), std::ref(i8rng));

304

} while (input.size() > 1 && *std::max_element(input.cbegin(), input.cend()) == *std::min_element(input.cbegin(), input.cend()));

305

do {

306

std::generate(kernel.begin(), kernel.end(), std::ref(i8rng));

307

} while (kernel.size() > 1 && *std::max_element(kernel.cbegin(), kernel.cend()) == *std::min_element(kernel.cbegin(), kernel.cend()));

308

std::generate(bias.begin(), bias.end(), std::ref(i32rng));

309

std::fill(zero.begin(), zero.end(), int8_t(input_zero_point() - 0x80));

310

std::fill(output.begin(), output.end(), 0xA5);

311

312

std::fill(packed_weights.begin(), packed_weights.end(), 0);

313

const xnn_qs8_packing_params packing_params = { int8_t(input_zero_point() - 0x80) };

314

xnn_pack_qs8_dwconv_ghw_w(

315

kr(), 1, channels(), cr(),

316

kernel.data(), bias.data(), packed_weights.data(), &packing_params);

317

for (size_t i = 0; i < indirection.size(); i++) {

318

indirection[i] = input.data() + i * channels() - input_offset();

319

}

320

std::shuffle(indirection.begin(), indirection.end(), rng);

321

if (zero_index() != SIZE_MAX) {

322

for (size_t i = 0; i < indirection.size(); i += kr()) {

323

indirection[i + zero_index()] = zero.data();

}

}

// Compute reference results, without renormalization.

328

for (size_t x = 0; x < width(); x++) {

329

for (size_t c = 0; c < channels(); c++) {

330

float acc = bias[c];

331

for (size_t k = 0; k < kr(); k++) {

332

if (indirection[x * step() + k] != zero.data()) {

333

acc +=

334

(int32_t(indirection[x * step() + k][c + input_offset()]) - int32_t(input_zero_point() - 0x80)) *

335

int32_t(kernel[c * kr() + k]);

336

}

337

}

338

accumulators[x * channels() + c] = acc;

}

}

// Compute renormalization parameters.

343

const int32_t accumulated_min = *std::min_element(accumulators.cbegin(), accumulators.cend());

344

const int32_t accumulated_max = *std::max_element(accumulators.cbegin(), accumulators.cend());

345

const uint32_t accumulated_range = uint32_t(accumulated_max) - uint32_t(accumulated_min);

346

const double output_scale = accumulated_range >= 256 ? double(accumulated_range) / 255.0 : 1.00001;

347

const int8_t output_zero_point = int8_t(std::max(std::min(

348

lrint(-0.5 - 0.5 * double(accumulated_min + accumulated_max) / output_scale),

349

long(std::numeric_limits<int8_t>::max())), long(std::numeric_limits<int8_t>::min())));

350

351

// Prepare parameters.

352

const float requantization_scale = 1.0f / float(output_scale);

353

union xnn_qs8_gemm_params quantization_params = { };

354

switch (variant) {

355

case Variant::Native:

356

quantization_params = xnn_init_qs8_gemm_params(

357

requantization_scale, output_zero_point, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));

358

break;

359

case Variant::Scalar:

360

quantization_params = xnn_init_scalar_qs8_gemm_params(

361

requantization_scale, output_zero_point, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));

362

break;

363

}

364

const union xnn_qs8_requantization_params scalar_requantization_params =

365

xnn_init_scalar_qs8_requantization_params(requantization_scale, output_zero_point, int8_t(qmin() - 0x80), int8_t(qmax() - 0x80));

366

367

// Renormalize reference results.

368

for (size_t x = 0; x < width(); x++) {

369

for (size_t c = 0; c < channels(); c++) {

370

output_ref[x * channels() + c] = xnn_qs8_requantize_q31(accumulators[x * channels() + c], scalar_requantization_params);

}

}

// Call optimized micro-kernel.

375

dwconv_minmax(

376

channels(), width(),

377

indirection.data(), packed_weights.data(), output.data(),

378

step() * sizeof(void*),

379

(output_stride() - channels()) * sizeof(int8_t),

380

input_offset() * sizeof(int8_t), zero.data(),

381

&quantization_params);

382

383

// Verify results.

384

for (size_t x = 0; x < width(); x++) {

385

for (size_t c = 0; c < channels(); c++) {

386

ASSERT_GE(int32_t(output[x * output_stride() + c]), int32_t(qmin()) - 0x80)

387

<< "x = " << x << ", channel = " << c;

388

ASSERT_LE(int32_t(output[x * output_stride() + c]), int32_t(qmax()) - 0x80)

389

<< "x = " << x << ", channel = " << c;

390

ASSERT_EQ(int32_t(output[x * output_stride() + c]), int32_t(output_ref[x * channels() + c]))

391

<< "x = " << x << ", channel = " << c << ", accumulator = " << accumulators[x * channels() + c];

}

}

}

}

Frank Barchard

2020-06-04 20:12:44 -0700

[diff] [blame]

397

void Test(xnn_f16_dwconv_minmax_unipass_ukernel_function dwconv_minmax, Variant variant = Variant::Native) const {

398

std::random_device random_device;

399

auto rng = std::mt19937(random_device());

400

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.1f, 1.0f), rng);

401

auto f16rng = std::bind(fp16_ieee_from_fp32_value, f32rng);

402

403

std::vector<const uint16_t*> indirection((width() - 1) * step() + kr());

404

std::vector<uint16_t> input(XNN_EXTRA_BYTES / sizeof(uint16_t) + indirection.size() * channels());

405

std::vector<uint16_t> kernel(channels() * kr());

406

std::vector<uint16_t> bias(channels());

407

std::vector<uint16_t, AlignedAllocator<uint16_t, 64>> packed_weights((kr() + 1) * packed_channels());

408

std::vector<uint16_t> zero(channels() + XNN_EXTRA_BYTES / sizeof(uint16_t));

409

std::vector<uint16_t> output((width() - 1) * output_stride() + channels());

410

std::vector<float> output_ref(width() * channels());

411

412

for (size_t iteration = 0; iteration < iterations(); iteration++) {

413

std::generate(input.begin(), input.end(), std::ref(f16rng));

414

std::generate(kernel.begin(), kernel.end(), std::ref(f16rng));

415

std::generate(bias.begin(), bias.end(), std::ref(f16rng));

416

std::fill(zero.begin(), zero.end(), 0);

417

std::fill(output_ref.begin(), output_ref.end(), 0.0f);

418

std::fill(output.begin(), output.end(), UINT16_C(0x7E00) /* NaN */);

419

420

std::fill(packed_weights.begin(), packed_weights.end(), 0);

421

xnn_pack_f16_dwconv_ghw_w(

422

kr(), 1, channels(), cr(),

Marat Dukhan

2020-07-06 20:46:13 -0700

[diff] [blame]

423

kernel.data(), bias.data(), packed_weights.data(), nullptr);

Frank Barchard

5a599a6

2020-06-04 20:12:44 -0700

[diff] [blame]

424

for (size_t i = 0; i < indirection.size(); i++) {

425

indirection[i] = input.data() + i * channels() - input_offset();

426

}

427

std::shuffle(indirection.begin(), indirection.end(), rng);

428

if (zero_index() != SIZE_MAX) {

429

for (size_t i = 0; i < indirection.size(); i += kr()) {

430

indirection[i + zero_index()] = zero.data();

}

}

// Compute reference results, without clamping.

435

for (size_t x = 0; x < width(); x++) {

436

for (size_t c = 0; c < channels(); c++) {

437

float acc = fp16_ieee_to_fp32_value(bias[c]);

438

for (size_t k = 0; k < kr(); k++) {

439

if (indirection[x * step() + k] != zero.data()) {

440

acc += fp16_ieee_to_fp32_value(indirection[x * step() + k][c + input_offset()]) * fp16_ieee_to_fp32_value(kernel[c * kr() + k]);

441

}

442

}

443

output_ref[x * channels() + c] = acc;

}

}

// Compute clamping parameters.

448

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

449

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

450

const float accumulated_range = accumulated_max - accumulated_min;

451

const float output_min = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_min + accumulated_range / 255.0f * float(qmin())));

452

const float output_max = fp16_ieee_to_fp32_value(fp16_ieee_from_fp32_value(accumulated_max - accumulated_range / 255.0f * float(255 - qmax())));

453

454

// Prepare parameters.

455

xnn_f16_minmax_params params = xnn_init_f16_minmax_params(

456

fp16_ieee_from_fp32_value(output_min),

457

fp16_ieee_from_fp32_value(output_max));

458

459

// Clamp reference results.

460

for (float& output_val : output_ref) {

461

output_val = std::max(std::min(output_val, output_max), output_min);

462

}

463

464

// Call optimized micro-kernel.

465

dwconv_minmax(

466

channels(), width(),

467

reinterpret_cast<const void**>(indirection.data()), packed_weights.data(), output.data(),

468

step() * sizeof(void*),

469

(output_stride() - channels()) * sizeof(uint16_t),

470

input_offset() * sizeof(uint16_t), zero.data(),

&params);

// Verify results.

for (size_t x = 0; x < width(); x++) {

475

for (size_t c = 0; c < channels(); c++) {

476

ASSERT_GE(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_min)

477

<< "x = " << x << ", channel = " << c;

478

ASSERT_LE(fp16_ieee_to_fp32_value(output[x * output_stride() + c]), output_max)

479

<< "x = " << x << ", channel = " << c;

480

ASSERT_NEAR(

481

output_ref[x * channels() + c],

482

fp16_ieee_to_fp32_value(output[x * output_stride() + c]),

483

std::abs(output_ref[x * channels() + c]) * 1.0e-2)

484

<< "x = " << x << ", channel = " << c;

}

}

}

}

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

490

void Test(xnn_f32_dwconv_unipass_ukernel_function dwconv) const {

491

std::random_device random_device;

492

auto rng = std::mt19937(random_device());

493

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);

494

495

std::vector<const float*> indirection((width() - 1) * step() + kr());

496

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());

497

std::vector<float> kernel(channels() * kr());

498

std::vector<float> bias(channels());

499

std::vector<float, AlignedAllocator<float, 64>> packed_weights((kr() + 1) * packed_channels());

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

500

std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float));

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

501

std::vector<float> output((width() - 1) * output_stride() + channels());

502

std::vector<float> output_ref(width() * channels());

503

504

for (size_t iteration = 0; iteration < iterations(); iteration++) {

505

std::generate(input.begin(), input.end(), std::ref(f32rng));

506

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

507

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

508

std::fill(zero.begin(), zero.end(), 0.0f);

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

509

std::fill(output_ref.begin(), output_ref.end(), nanf(""));

510

std::fill(output.begin(), output.end(), nanf(""));

511

512

std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);

513

xnn_pack_f32_dwconv_ghw_w(

514

kr(), 1, channels(), cr(),

Marat Dukhan

2020-07-06 20:46:13 -0700

[diff] [blame]

515

kernel.data(), bias.data(), packed_weights.data(), nullptr);

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

516

for (size_t i = 0; i < indirection.size(); i++) {

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

517

indirection[i] = input.data() + i * channels() - input_offset();

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

518

}

519

std::shuffle(indirection.begin(), indirection.end(), rng);

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

520

if (zero_index() != SIZE_MAX) {

521

for (size_t i = 0; i < indirection.size(); i += kr()) {

522

indirection[i + zero_index()] = zero.data();

523

}

524

}

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

525

526

// Compute reference results, without clamping.

527

for (size_t x = 0; x < width(); x++) {

528

for (size_t c = 0; c < channels(); c++) {

529

float acc = bias[c];

530

for (size_t k = 0; k < kr(); k++) {

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

531

if (indirection[x * step() + k] != zero.data()) {

532

acc += indirection[x * step() + k][c + input_offset()] * kernel[c * kr() + k];

533

}

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

534

}

535

output_ref[x * channels() + c] = acc;

}

}

// Call optimized micro-kernel.

540

dwconv(

541

channels(), width(),

542

indirection.data(), packed_weights.data(), output.data(),

543

step() * sizeof(void*),

544

(output_stride() - channels()) * sizeof(float),

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

545

input_offset() * sizeof(float), zero.data(),

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

nullptr);

// Verify results.

for (size_t x = 0; x < width(); x++) {

550

for (size_t c = 0; c < channels(); c++) {

551

ASSERT_NEAR(

552

output_ref[x * channels() + c],

553

output[x * output_stride() + c],

554

std::abs(output_ref[x * channels() + c]) * 1.0e-5)

555

<< "x = " << x << ", channel = " << c;

}

}

}

}

void Test(xnn_f32_dwconv_minmax_unipass_ukernel_function dwconv_minmax, Variant variant = Variant::Native) const {

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

562

std::random_device random_device;

563

auto rng = std::mt19937(random_device());

564

auto f32rng = std::bind(std::uniform_real_distribution<float>(0.0f, 1.0f), rng);

565

566

std::vector<const float*> indirection((width() - 1) * step() + kr());

567

std::vector<float> input(XNN_EXTRA_BYTES / sizeof(float) + indirection.size() * channels());

568

std::vector<float> kernel(channels() * kr());

569

std::vector<float> bias(channels());

Marat Dukhan

9594db0

2019-12-05 14:32:37 -0800

[diff] [blame]

570

std::vector<float, AlignedAllocator<float, 64>> packed_weights((kr() + 1) * packed_channels());

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

571

std::vector<float> zero(channels() + XNN_EXTRA_BYTES / sizeof(float));

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

572

std::vector<float> output((width() - 1) * output_stride() + channels());

573

std::vector<float> output_ref(width() * channels());

574

575

for (size_t iteration = 0; iteration < iterations(); iteration++) {

576

std::generate(input.begin(), input.end(), std::ref(f32rng));

577

std::generate(kernel.begin(), kernel.end(), std::ref(f32rng));

578

std::generate(bias.begin(), bias.end(), std::ref(f32rng));

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

579

std::fill(zero.begin(), zero.end(), 0.0f);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

580

std::fill(output_ref.begin(), output_ref.end(), nanf(""));

581

std::fill(output.begin(), output.end(), nanf(""));

582

583

std::fill(packed_weights.begin(), packed_weights.end(), 0.0f);

584

xnn_pack_f32_dwconv_ghw_w(

585

kr(), 1, channels(), cr(),

Marat Dukhan

2020-07-06 20:46:13 -0700

[diff] [blame]

586

kernel.data(), bias.data(), packed_weights.data(), nullptr);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

587

for (size_t i = 0; i < indirection.size(); i++) {

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

588

indirection[i] = input.data() + i * channels() - input_offset();

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

589

}

590

std::shuffle(indirection.begin(), indirection.end(), rng);

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

591

if (zero_index() != SIZE_MAX) {

592

for (size_t i = 0; i < indirection.size(); i += kr()) {

593

indirection[i + zero_index()] = zero.data();

594

}

595

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

596

597

// Compute reference results, without clamping.

598

for (size_t x = 0; x < width(); x++) {

599

for (size_t c = 0; c < channels(); c++) {

600

float acc = bias[c];

601

for (size_t k = 0; k < kr(); k++) {

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

602

if (indirection[x * step() + k] != zero.data()) {

603

acc += indirection[x * step() + k][c + input_offset()] * kernel[c * kr() + k];

604

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

605

}

606

output_ref[x * channels() + c] = acc;

}

}

// Compute clamping parameters.

611

const float accumulated_min = *std::min_element(output_ref.cbegin(), output_ref.cend());

612

const float accumulated_max = *std::max_element(output_ref.cbegin(), output_ref.cend());

613

const float accumulated_range = accumulated_max - accumulated_min;

614

const float output_min = accumulated_min + accumulated_range / 255.0f * float(qmin());

615

const float output_max = accumulated_max - accumulated_range / 255.0f * float(255 - qmax());

616

Frank Barchard

9f3a843

2020-06-02 13:59:35 -0700

[diff] [blame]

617

// Prepare parameters.

Frank Barchard

2020-05-01 15:46:41 -0700

[diff] [blame]

618

xnn_f32_minmax_params params = { };

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

619

switch (variant) {

620

case Variant::Native:

Frank Barchard

2020-05-01 15:46:41 -0700

[diff] [blame]

621

params = xnn_init_f32_minmax_params(output_min, output_max);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

622

break;

623

case Variant::Scalar:

Frank Barchard

2020-05-01 15:46:41 -0700

[diff] [blame]

624

params = xnn_init_scalar_f32_minmax_params(output_min, output_max);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

break;

}

// Clamp reference results.

629

for (float& output_val : output_ref) {

630

output_val = std::max(std::min(output_val, output_max), output_min);

631

}

632

633

// Call optimized micro-kernel.

Marat Dukhan

2020-04-09 04:19:26 -0700

[diff] [blame]

634

dwconv_minmax(

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

635

channels(), width(),

636

indirection.data(), packed_weights.data(), output.data(),

637

step() * sizeof(void*),

638

(output_stride() - channels()) * sizeof(float),

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

639

input_offset() * sizeof(float), zero.data(),

Frank Barchard

2020-05-01 15:46:41 -0700

[diff] [blame]

640

&params);

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

641

642

// Verify results.

643

for (size_t x = 0; x < width(); x++) {

644

for (size_t c = 0; c < channels(); c++) {

645

ASSERT_GE(output[x * output_stride() + c], output_min)

646

<< "x = " << x << ", channel = " << c;

647

ASSERT_LE(output[x * output_stride() + c], output_max)

648

<< "x = " << x << ", channel = " << c;

649

ASSERT_NEAR(

650

output_ref[x * channels() + c],

651

output[x * output_stride() + c],

652

std::abs(output_ref[x * channels() + c]) * 1.0e-5)

653

<< "x = " << x << ", channel = " << c;

}

}

}

}

private:

uint32_t channels_{1};

uint32_t cr_{1};

uint32_t kr_{1};

uint32_t width_{1};

uint32_t step_{1};

uint32_t output_stride_{0};

666

uint8_t input_zero_point_{127};

667

uint8_t kernel_zero_point_{127};

668

uint8_t qmin_{0};

669

uint8_t qmax_{255};

Frank Barchard

2020-05-17 16:10:36 -0700

[diff] [blame]

670

size_t input_offset_{0};

671

size_t zero_index_{SIZE_MAX};

XNNPACK Team