Blame - src/indirection.c - platform/external/XNNPACK

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

1

// Copyright (c) Facebook, Inc. and its affiliates.

//

//

// This source code is licensed under the BSD-style license found in the

7

// LICENSE file in the root directory of this source tree.

8

9

#include <stddef.h>

Marat Dukhan

0ab7553

2021-11-24 16:50:30 -0800

[diff] [blame]

10

#include <math.h>

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

#include <fxdiv.h>

#include <xnnpack/indirection.h>

15

#include <xnnpack/operator.h>

16

#include <xnnpack/math.h>

17

18

19

void xnn_indirection_init_conv2d(

20

xnn_operator_t op,

21

size_t output_tile_size,

22

uint32_t log2_element_size)

23

{

24

const void** indirection_buffer = op->indirection_buffer;

25

const void* input = op->input;

26

const void* zero = op->zero_buffer;

27

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

28

const size_t input_height = op->input_height;

29

const size_t input_width = op->input_width;

30

const size_t output_height = op->output_height;

31

const size_t output_width = op->output_width;

32

const size_t kernel_height = op->kernel_height;

33

const size_t kernel_width = op->kernel_width;

34

const size_t stride_height = op->stride_height;

35

const size_t stride_width = op->stride_width;

36

const size_t dilation_height = op->dilation_height;

37

const size_t dilation_width = op->dilation_width;

38

const size_t input_padding_top = op->padding_top;

39

const size_t input_padding_left = op->padding_left;

40

41

const size_t output_size = output_height * output_width;

42

const size_t tiled_output_size = round_up(output_size, output_tile_size);

43

const size_t kernel_size = kernel_height * kernel_width;

44

45

const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);

46

47

for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {

48

for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {

49

const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);

50

const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);

51

const size_t output_x = output_y_x.remainder;

52

const size_t output_y = output_y_x.quotient;

53

for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {

54

const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;

55

if (input_y < input_height) {

56

for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {

57

const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;

58

const size_t kernel_index = kernel_y * kernel_width + kernel_x;

59

const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;

60

if (input_x < input_width) {

61

indirection_buffer[index] = (const void*)

62

((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

63

} else {

64

indirection_buffer[index] = zero;

}

}

} else {

for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {

69

const size_t kernel_index = kernel_y * kernel_width + kernel_x;

70

const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;

71

indirection_buffer[index] = zero;

}

}

}

}

}

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

79

void xnn_indirection_init_deconv2d(

80

xnn_operator_t op,

81

size_t output_tile_size,

82

uint32_t log2_element_size)

83

{

84

const void** indirection_buffer = op->indirection_buffer;

85

const void* input = op->input;

86

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

87

const void* zero = op->zero_buffer;

88

const size_t input_height = op->input_height;

89

const size_t input_width = op->input_width;

90

const size_t output_height = op->output_height;

91

const size_t output_width = op->output_width;

92

const size_t kernel_height = op->kernel_height;

93

const size_t kernel_width = op->kernel_width;

94

const size_t stride_height = op->stride_height;

95

const size_t stride_width = op->stride_width;

96

const size_t dilation_height = op->dilation_height;

97

const size_t dilation_width = op->dilation_width;

98

const size_t padding_top = op->padding_top;

99

const size_t padding_left = op->padding_left;

100

101

const size_t output_size = output_height * output_width;

102

const size_t tiled_output_size = round_up(output_size, output_tile_size);

103

const size_t kernel_size = kernel_height * kernel_width;

104

105

const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);

106

const struct fxdiv_divisor_size_t stride_height_divisor = fxdiv_init_size_t(stride_height);

107

const struct fxdiv_divisor_size_t stride_width_divisor = fxdiv_init_size_t(stride_width);

108

109

for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {

110

for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {

111

const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);

112

const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);

113

const size_t output_x = output_y_x.remainder;

114

const size_t output_y = output_y_x.quotient;

115

for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {

116

const size_t y = output_y + padding_top - kernel_y * dilation_height;

117

const size_t input_y = fxdiv_quotient_size_t(y, stride_height_divisor);

118

for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {

119

const size_t x = output_x + padding_left - kernel_x * dilation_width;

120

const size_t input_x = fxdiv_quotient_size_t(x, stride_width_divisor);

121

const size_t kernel_index = kernel_y * kernel_width + kernel_x;

122

const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;

123

if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) {

124

indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

125

} else {

126

indirection_buffer[index] = zero;

}

}

}

}

}

}

void xnn_indirection_init_subconv2d(

135

xnn_operator_t op,

136

size_t output_tile_size,

137

uint32_t log2_element_size)

138

{

139

const void** indirection_buffer = op->indirection_buffer;

140

struct subconvolution_params* subconvolution_params = op->subconvolution_buffer;

141

const void* input = op->input;

142

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

143

const void* zero = op->zero_buffer;

144

const size_t input_height = op->input_height;

145

const size_t input_width = op->input_width;

146

const size_t output_height = op->output_height;

147

const size_t output_width = op->output_width;

148

const size_t kernel_height = op->kernel_height;

149

const size_t kernel_width = op->kernel_width;

150

const size_t stride_height = op->stride_height;

151

const size_t stride_width = op->stride_width;

152

const size_t padding_top = op->padding_top;

153

const size_t padding_left = op->padding_left;

154

155

const size_t modulo_padding_top = padding_top % stride_height;

156

const size_t modulo_padding_left = padding_left % stride_width;

157

for (size_t offset_y = 0; offset_y < stride_height; offset_y++) {

158

const size_t output_y_start = subtract_modulo(offset_y, modulo_padding_top, stride_height);

159

for (size_t offset_x = 0; offset_x < stride_width; offset_x++) {

160

const size_t output_x_start = subtract_modulo(offset_x, modulo_padding_left, stride_width);

161

const size_t sliced_output_width = divide_round_up(output_width - output_x_start, stride_width);

162

163

subconvolution_params->indirection_buffer = indirection_buffer;

164

subconvolution_params->indirection_y_stride =

165

subconvolution_params->indirection_x_stride * round_up(sliced_output_width, output_tile_size);

166

++subconvolution_params;

167

168

for (size_t output_y = output_y_start; output_y < output_height; output_y += stride_height) {

169

for (size_t output_tile_start = 0; output_tile_start < sliced_output_width; output_tile_start += output_tile_size) {

170

for (size_t kernel_y = offset_y; kernel_y < kernel_height; kernel_y += stride_height) {

171

assert(doz(output_y + padding_top, kernel_y) % stride_height == 0);

172

const size_t y = output_y + padding_top - kernel_y;

173

const size_t input_y = y / stride_height;

174

175

for (size_t kernel_x = offset_x; kernel_x < kernel_width; kernel_x += stride_width) {

176

for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {

177

const size_t sliced_output_x = min(output_tile_start + output_tile_offset, sliced_output_width - 1);

178

const size_t output_x = output_x_start + sliced_output_x * stride_width;

179

180

assert(doz(output_x + padding_left, kernel_x) % stride_width == 0);

181

const size_t x = output_x + padding_left - kernel_x;

182

const size_t input_x = x / stride_width;

183

184

if (input_y < input_height && input_x < input_width) {

185

*indirection_buffer++ =

186

(const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

187

} else {

188

*indirection_buffer++ = zero;

}

}

}

}

}

}

}

}

}

Marat Dukhan

2020-10-15 09:04:21 -0700

[diff] [blame]

199

void xnn_indirection_init_dwconv2d(

xnn_operator_t op,

size_t step_height,

size_t step_width,

uint32_t log2_element_size)

204

{

205

const void** indirection_buffer = op->indirection_buffer;

206

const void* input = op->input;

207

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

208

const void* zero = op->zero_buffer;

209

const size_t input_height = op->input_height;

210

const size_t input_width = op->input_width;

211

const size_t output_height = op->output_height;

212

const size_t output_width = op->output_width;

213

const size_t kernel_height = op->kernel_height;

214

const size_t kernel_width = op->kernel_width;

215

const size_t stride_height = op->stride_height;

216

const size_t stride_width = op->stride_width;

217

const size_t dilation_height = op->dilation_height;

218

const size_t dilation_width = op->dilation_width;

219

const size_t input_padding_top = op->padding_top;

220

const size_t input_padding_left = op->padding_left;

221

222

for (size_t output_y = 0; output_y < output_height; output_y++) {

223

for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {

224

const size_t input_y = output_y * stride_height + kernel_y * dilation_height - input_padding_top;

225

if (input_y < input_height) {

226

for (size_t output_x = 0; output_x < output_width; output_x++) {

227

for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {

228

const size_t input_x = output_x * stride_width + kernel_x * dilation_width - input_padding_left;

229

const size_t index = output_y * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;

230

if (input_x < input_width) {

231

indirection_buffer[index] =

232

(const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

233

} else {

234

indirection_buffer[index] = zero;

}

}

}

} else {

for (size_t output_x = 0; output_x < output_width; output_x++) {

240

for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {

241

const size_t index = output_y * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;

242

indirection_buffer[index] = zero;

}

}

}

}

}

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

250

void xnn_indirection_init_maxpool2d(

251

xnn_operator_t op,

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

252

size_t step_height,

253

size_t step_width,

254

uint32_t log2_element_size)

255

{

256

const void** indirection_buffer = op->indirection_buffer;

257

const void* input = op->input;

258

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

259

const size_t input_height = op->input_height;

260

const size_t input_width = op->input_width;

261

const size_t output_height = op->output_height;

262

const size_t output_width = op->output_width;

263

const size_t pooling_height = op->kernel_height;

264

const size_t pooling_width = op->kernel_width;

265

const size_t stride_height = op->stride_height;

266

const size_t stride_width = op->stride_width;

267

const size_t dilation_height = op->dilation_height;

268

const size_t dilation_width = op->dilation_width;

269

const size_t input_padding_top = op->padding_top;

270

const size_t input_padding_left = op->padding_left;

271

Marat Dukhan

c58bd34

2020-03-19 18:53:05 -0700

[diff] [blame]

272

const bool any_dilation = (dilation_height | dilation_width) > 1;

273

274

if (any_dilation) {

275

// Clamp to the border doesn't work for pooling with dilation.

276

const size_t adjusted_padding_top = input_padding_top % dilation_height;

277

const size_t adjusted_padding_left = input_padding_left % dilation_width;

278

for (size_t output_y = 0; output_y < output_height; output_y++) {

279

for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {

280

size_t safe_input_y = output_y * stride_height;

281

if XNN_UNPREDICTABLE(safe_input_y < adjusted_padding_top) {

282

safe_input_y += dilation_height;

283

}

284

safe_input_y -= adjusted_padding_top;

285

286

size_t input_y = output_y * stride_height + pooling_y * dilation_height - input_padding_top;

287

if XNN_UNPREDICTABLE(input_y >= input_height) {

288

input_y = safe_input_y;

289

}

290

291

for (size_t output_x = 0; output_x < output_width; output_x++) {

292

for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {

293

size_t safe_input_x = output_x * stride_width;

294

if XNN_UNPREDICTABLE(safe_input_x < adjusted_padding_left) {

295

safe_input_x += dilation_width;

296

}

297

safe_input_x -= adjusted_padding_left;

298

299

size_t input_x = output_x * stride_width + pooling_x * dilation_width - input_padding_left;

300

if XNN_UNPREDICTABLE(input_x >= input_width) {

301

input_x = safe_input_x;

302

}

303

304

const size_t index = output_y * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;

Marat Dukhan

bdc8099

2020-04-13 01:21:18 -0700

[diff] [blame]

305

indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

Marat Dukhan

c58bd34

2020-03-19 18:53:05 -0700

[diff] [blame]

}

}

}

}

} else {

const size_t input_x_max = input_width - 1;

312

const size_t input_y_max = input_height - 1;

313

for (size_t output_y = 0; output_y < output_height; output_y++) {

314

for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {

315

const size_t input_y = min(doz(output_y * stride_height + pooling_y * dilation_height, input_padding_top), input_y_max);

316

for (size_t output_x = 0; output_x < output_width; output_x++) {

317

for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {

318

const size_t input_x = min(doz(output_x * stride_width + pooling_x * dilation_width, input_padding_left), input_x_max);

319

const size_t index = output_y * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;

Marat Dukhan

bdc8099

2020-04-13 01:21:18 -0700

[diff] [blame]

320

indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

Marat Dukhan

c58bd34

2020-03-19 18:53:05 -0700

[diff] [blame]

321

}

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

}

Artsiom Ablavatski

2020-10-27 15:52:59 -0700

[diff] [blame]

328

void xnn_indirection_init_resize_bilinear2d_hwc_f32(

Marat Dukhan

6972249

2019-11-11 19:55:50 -0800

[diff] [blame]

329

size_t input_pixel_stride,

330

size_t input_height,

331

size_t input_width,

332

size_t output_height,

333

size_t output_width,

334

const void* input,

335

const void** indirection_buffer,

336

float* packed_weights,

337

bool align_corners,

338

bool tensorflow_legacy)

339

{

340

assert(input_height != 0);

341

assert(input_height < 16777216 /* 2**24 */);

342

assert(input_width != 0);

343

assert(input_width < 16777216 /* 2**24 */);

344

assert(output_height != 0);

345

assert(output_height < 16777216 /* 2**24 */);

346

assert(output_width != 0);

347

assert(output_width < 16777216 /* 2**24 */);

348

349

const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);

350

const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);

351

const float width_scale =

352

(float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);

353

const float height_scale =

354

(float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);

355

356

const uint32_t input_y_max = (uint32_t) input_height - 1;

357

const uint32_t input_x_max = (uint32_t) input_width - 1;

Marat Dukhan

f5c4625

2020-05-22 10:36:13 -0700

[diff] [blame]

358

if (tensorflow_legacy || align_corners) {

Marat Dukhan

6972249

2019-11-11 19:55:50 -0800

[diff] [blame]

359

for (size_t output_y = 0; output_y < output_height; output_y++) {

360

const float input_y = (float) (int32_t) output_y * height_scale;

361

assert(input_y >= 0.0f);

362

assert(input_y < (float) input_height);

363

364

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

365

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

366

const float alpha_y = input_y - (float) input_y_top;

367

for (size_t output_x = 0; output_x < output_width; output_x++) {

368

const float input_x = (float) (int32_t) output_x * width_scale;

369

assert(input_x >= 0.0f);

370

assert(input_x < (float) input_width);

371

372

const uint32_t input_x_left = (uint32_t) (int32_t) input_x;

373

const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);

374

const float alpha_x = input_x - (float) input_x_left;

375

indirection_buffer[0] =

376

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

377

indirection_buffer[1] =

378

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);

379

indirection_buffer[2] =

380

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

381

indirection_buffer[3] =

382

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);

383

packed_weights[0] = alpha_x;

384

packed_weights[1] = alpha_y;

385

indirection_buffer += 4;

packed_weights += 2;

}

}

} else {

const float height_offset = 0.5f * height_scale - 0.5f;

391

const float width_offset = 0.5f * width_scale - 0.5f;

392

for (size_t output_y = 0; output_y < output_height; output_y++) {

393

float input_y = (float) (int32_t) output_y * height_scale + height_offset;

394

input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);

395

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

396

assert((int32_t) input_y_top >= 0);

397

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

398

const float alpha_y = input_y - (float) input_y_top;

399

for (size_t output_x = 0; output_x < output_width; output_x++) {

400

float input_x = (float) (int32_t) output_x * width_scale + width_offset;

401

input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);

402

const uint32_t input_x_left = (uint32_t) (int32_t) input_x;

403

assert((int32_t) input_x_left >= 0);

404

const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);

405

const float alpha_x = input_x - (float) input_x_left;

406

indirection_buffer[0] =

407

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

408

indirection_buffer[1] =

409

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);

410

indirection_buffer[2] =

411

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

412

indirection_buffer[3] =

413

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);

414

packed_weights[0] = alpha_x;

415

packed_weights[1] = alpha_y;

416

indirection_buffer += 4;

packed_weights += 2;

}

}

}

}

Marat Dukhan

2021-11-24 16:50:30 -0800

[diff] [blame]

423

void xnn_indirection_init_resize_bilinear2d_hwc_q11(

424

size_t input_pixel_stride,

425

size_t input_height,

426

size_t input_width,

427

size_t output_height,

428

size_t output_width,

429

const void* input,

430

const void** indirection_buffer,

431

int16_t* packed_weights,

432

bool align_corners,

433

bool tensorflow_legacy)

434

{

435

assert(input_height != 0);

436

assert(input_height < 16777216 /* 2**24 */);

437

assert(input_width != 0);

438

assert(input_width < 16777216 /* 2**24 */);

439

assert(output_height != 0);

440

assert(output_height < 16777216 /* 2**24 */);

441

assert(output_width != 0);

442

assert(output_width < 16777216 /* 2**24 */);

443

444

const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);

445

const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);

446

const float width_scale =

447

(float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);

448

const float height_scale =

449

(float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);

450

451

const uint32_t input_y_max = (uint32_t) input_height - 1;

452

const uint32_t input_x_max = (uint32_t) input_width - 1;

453

if (tensorflow_legacy || align_corners) {

454

for (size_t output_y = 0; output_y < output_height; output_y++) {

455

const float input_y = (float) (int32_t) output_y * height_scale;

456

assert(input_y >= 0.0f);

457

assert(input_y < (float) input_height);

458

459

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

460

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

461

const float alpha_y = input_y - (float) input_y_top;

462

for (size_t output_x = 0; output_x < output_width; output_x++) {

463

const float input_x = (float) (int32_t) output_x * width_scale;

464

assert(input_x >= 0.0f);

465

assert(input_x < (float) input_width);

466

467

const uint32_t input_x_left = (uint32_t) (int32_t) input_x;

468

const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);

469

const float alpha_x = input_x - (float) input_x_left;

470

indirection_buffer[0] =

471

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

472

indirection_buffer[1] =

473

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);

474

indirection_buffer[2] =

475

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

476

indirection_buffer[3] =

477

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);

478

packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);

479

packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);

480

indirection_buffer += 4;

packed_weights += 2;

}

}

} else {

const float height_offset = 0.5f * height_scale - 0.5f;

486

const float width_offset = 0.5f * width_scale - 0.5f;

487

for (size_t output_y = 0; output_y < output_height; output_y++) {

488

float input_y = (float) (int32_t) output_y * height_scale + height_offset;

489

input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);

490

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

491

assert((int32_t) input_y_top >= 0);

492

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

493

const float alpha_y = input_y - (float) input_y_top;

494

for (size_t output_x = 0; output_x < output_width; output_x++) {

495

float input_x = (float) (int32_t) output_x * width_scale + width_offset;

496

input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);

497

const uint32_t input_x_left = (uint32_t) (int32_t) input_x;

498

assert((int32_t) input_x_left >= 0);

499

const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);

500

const float alpha_x = input_x - (float) input_x_left;

501

indirection_buffer[0] =

502

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

503

indirection_buffer[1] =

504

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);

505

indirection_buffer[2] =

506

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

507

indirection_buffer[3] =

508

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);

509

packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);

510

packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);

511

indirection_buffer += 4;

packed_weights += 2;

}

}

}

}

Artsiom Ablavatski

2020-10-27 15:52:59 -0700

[diff] [blame]

518

void xnn_indirection_init_resize_bilinear2d_chw_f32(

519

size_t input_pixel_stride,

520

size_t input_height,

521

size_t input_width,

522

size_t output_height,

523

size_t output_width,

524

const void* input,

525

const void** indirection_buffer,

526

float* packed_weights,

527

bool align_corners,

528

bool tensorflow_legacy)

529

{

530

assert(input_height > 1);

531

assert(input_height < 16777216 /* 2**24 */);

532

assert(input_width > 1);

533

assert(input_width < 16777216 /* 2**24 */);

534

assert(output_height != 0);

535

assert(output_height < 16777216 /* 2**24 */);

536

assert(output_width != 0);

537

assert(output_width < 16777216 /* 2**24 */);

538

539

const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);

540

const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);

541

const float width_scale =

542

(float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);

543

const float height_scale =

544

(float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);

545

546

const uint32_t input_y_max = (uint32_t) input_height - 1;

547

const uint32_t input_x_max = (uint32_t) input_width - 1;

548

if (tensorflow_legacy || align_corners) {

549

for (size_t output_y = 0; output_y < output_height; output_y++) {

550

const float input_y = (float) (int32_t) output_y * height_scale;

551

assert(input_y >= 0.0f);

552

assert(input_y < (float) input_height);

553

554

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

555

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

556

const float alpha_y = input_y - (float) input_y_top;

557

for (size_t output_x = 0; output_x < output_width; output_x++) {

558

const float input_x = (float) (int32_t) output_x * width_scale;

559

assert(input_x >= 0.0f);

560

assert(input_x < (float) input_width);

561

562

uint32_t input_x_left = (uint32_t) (int32_t) input_x;

563

564

float alpha_x = input_x - (float) input_x_left;

565

if (input_x_left == input_x_max) {

566

// Ensure that there is a pixel to the right of the one pointed at,

567

// as required by some CHW kernels.

--input_x_left;

alpha_x = 1.0f;

}

indirection_buffer[0] =

572

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

573

indirection_buffer[1] =

574

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

575

packed_weights[0] = alpha_x;

576

packed_weights[1] = alpha_y;

577

indirection_buffer += 2;

packed_weights += 2;

}

}

} else {

const float height_offset = 0.5f * height_scale - 0.5f;

583

const float width_offset = 0.5f * width_scale - 0.5f;

584

for (size_t output_y = 0; output_y < output_height; output_y++) {

585

float input_y = (float) (int32_t) output_y * height_scale + height_offset;

586

input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);

587

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

588

assert((int32_t) input_y_top >= 0);

589

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

590

const float alpha_y = input_y - (float) input_y_top;

591

for (size_t output_x = 0; output_x < output_width; output_x++) {

592

float input_x = (float) (int32_t) output_x * width_scale + width_offset;

593

input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);

594

uint32_t input_x_left = (uint32_t) (int32_t) input_x;

595

assert((int32_t) input_x_left >= 0);

596

597

float alpha_x = input_x - (float) input_x_left;

598

if (input_x_left == input_x_max) {

599

// Ensure that there is a pixel to the right of the one pointed at,

600

// as required by some CHW kernels.

--input_x_left;

alpha_x = 1.0f;

}

indirection_buffer[0] =

606

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

607

indirection_buffer[1] =

608

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

609

packed_weights[0] = alpha_x;

610

packed_weights[1] = alpha_y;

611

indirection_buffer += 2;

packed_weights += 2;

}

}

}

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

618

void xnn_indirection_init_unpool2d(

619

xnn_operator_t op,

620

size_t batch_start,

621

uint32_t log2_element_size)

622

{

623

const void** indirection_buffer = op->indirection_buffer;

624

const void* output = op->output;

625

const size_t output_pixel_stride = op->output_pixel_stride << log2_element_size;

626

const size_t batch_size = op->batch_size;

627

const size_t input_height = op->input_height;

628

const size_t input_width = op->input_width;

629

const size_t output_height = op->output_height;

630

const size_t output_width = op->output_width;

631

const size_t pooling_height = op->kernel_height;

632

const size_t pooling_width = op->kernel_width;

633

const size_t output_padding_top = op->padding_top;

634

const size_t output_padding_left = op->padding_left;

635

636

for (size_t image = batch_start; image < batch_size; image++) {

637

for (size_t input_y = 0; input_y < input_height; input_y++) {

638

for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {

639

const size_t output_y = min(doz(input_y * pooling_height + pooling_y, output_padding_top), output_height - 1);

640

for (size_t input_x = 0; input_x < input_width; input_x++) {

641

for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {

642

const size_t output_x = min(doz(input_x * pooling_width + pooling_x, output_padding_left), output_width - 1);

643

indirection_buffer[(((image * input_height + input_y) * input_width + input_x) * pooling_width + pooling_x) * pooling_height + pooling_y] =

Marat Dukhan

bdc8099

2020-04-13 01:21:18 -0700

[diff] [blame]

644

(const void*) ((uintptr_t) output + ((image * output_height + output_y) * output_width + output_x) * output_pixel_stride);

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

}

}