Blame - src/indirection.c - platform/external/XNNPACK

const size_t index = (batch_index * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;

112

if (input_x < input_width) {

113

indirection_buffer[index] =

114

(const void*) ((uintptr_t) input + ((batch_index * input_height + input_y) * input_width + input_x) * input_pixel_stride);

115

} else {

116

indirection_buffer[index] = zero;

}

}

}

} else {

for (size_t output_x = 0; output_x < output_width; output_x++) {

122

for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {

123

const size_t index = (batch_index * output_height + output_y) * step_height + output_x * step_width * kernel_height + kernel_x * kernel_height + kernel_y;

124

indirection_buffer[index] = zero;

}

}

}

}

}

}

}

void xnn_indirection_init_deconv2d(

134

xnn_operator_t op,

135

size_t output_tile_size,

136

uint32_t log2_element_size)

137

{

138

const void** indirection_buffer = op->indirection_buffer;

139

const void* input = op->input;

140

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

141

const void* zero = op->zero_buffer;

142

const size_t input_height = op->input_height;

143

const size_t input_width = op->input_width;

144

const size_t output_height = op->output_height;

145

const size_t output_width = op->output_width;

146

const size_t kernel_height = op->kernel_height;

147

const size_t kernel_width = op->kernel_width;

148

const size_t stride_height = op->stride_height;

149

const size_t stride_width = op->stride_width;

150

const size_t dilation_height = op->dilation_height;

151

const size_t dilation_width = op->dilation_width;

152

const size_t padding_top = op->padding_top;

153

const size_t padding_left = op->padding_left;

154

155

const size_t output_size = output_height * output_width;

156

const size_t tiled_output_size = round_up(output_size, output_tile_size);

157

const size_t kernel_size = kernel_height * kernel_width;

158

159

const struct fxdiv_divisor_size_t output_width_divisor = fxdiv_init_size_t(output_width);

160

const struct fxdiv_divisor_size_t stride_height_divisor = fxdiv_init_size_t(stride_height);

161

const struct fxdiv_divisor_size_t stride_width_divisor = fxdiv_init_size_t(stride_width);

162

163

for (size_t output_tile_start = 0; output_tile_start < tiled_output_size; output_tile_start += output_tile_size) {

164

for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {

165

const size_t output_index = min(output_tile_start + output_tile_offset, output_size - 1);

166

const struct fxdiv_result_size_t output_y_x = fxdiv_divide_size_t(output_index, output_width_divisor);

167

const size_t output_x = output_y_x.remainder;

168

const size_t output_y = output_y_x.quotient;

169

for (size_t kernel_y = 0; kernel_y < kernel_height; kernel_y++) {

170

const size_t y = output_y + padding_top - kernel_y * dilation_height;

171

const size_t input_y = fxdiv_quotient_size_t(y, stride_height_divisor);

172

for (size_t kernel_x = 0; kernel_x < kernel_width; kernel_x++) {

173

const size_t x = output_x + padding_left - kernel_x * dilation_width;

174

const size_t input_x = fxdiv_quotient_size_t(x, stride_width_divisor);

175

const size_t kernel_index = kernel_y * kernel_width + kernel_x;

176

const size_t index = output_tile_start * kernel_size + kernel_index * output_tile_size + output_tile_offset;

177

if (input_y * stride_height == y && input_y < input_height && input_x * stride_width == x && input_x < input_width) {

178

indirection_buffer[index] = (const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

179

} else {

180

indirection_buffer[index] = zero;

}

}

}

}

}

}

void xnn_indirection_init_subconv2d(

189

xnn_operator_t op,

190

size_t output_tile_size,

191

uint32_t log2_element_size)

192

{

193

const void** indirection_buffer = op->indirection_buffer;

194

struct subconvolution_params* subconvolution_params = op->subconvolution_buffer;

195

const void* input = op->input;

196

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

197

const void* zero = op->zero_buffer;

198

const size_t input_height = op->input_height;

199

const size_t input_width = op->input_width;

200

const size_t output_height = op->output_height;

201

const size_t output_width = op->output_width;

202

const size_t kernel_height = op->kernel_height;

203

const size_t kernel_width = op->kernel_width;

204

const size_t stride_height = op->stride_height;

205

const size_t stride_width = op->stride_width;

206

const size_t padding_top = op->padding_top;

207

const size_t padding_left = op->padding_left;

208

209

const size_t modulo_padding_top = padding_top % stride_height;

210

const size_t modulo_padding_left = padding_left % stride_width;

211

for (size_t offset_y = 0; offset_y < stride_height; offset_y++) {

212

const size_t output_y_start = subtract_modulo(offset_y, modulo_padding_top, stride_height);

213

for (size_t offset_x = 0; offset_x < stride_width; offset_x++) {

214

const size_t output_x_start = subtract_modulo(offset_x, modulo_padding_left, stride_width);

215

const size_t sliced_output_width = divide_round_up(output_width - output_x_start, stride_width);

216

217

subconvolution_params->indirection_buffer = indirection_buffer;

218

subconvolution_params->indirection_y_stride =

219

subconvolution_params->indirection_x_stride * round_up(sliced_output_width, output_tile_size);

220

++subconvolution_params;

221

222

for (size_t output_y = output_y_start; output_y < output_height; output_y += stride_height) {

223

for (size_t output_tile_start = 0; output_tile_start < sliced_output_width; output_tile_start += output_tile_size) {

224

for (size_t kernel_y = offset_y; kernel_y < kernel_height; kernel_y += stride_height) {

225

assert(doz(output_y + padding_top, kernel_y) % stride_height == 0);

226

const size_t y = output_y + padding_top - kernel_y;

227

const size_t input_y = y / stride_height;

228

229

for (size_t kernel_x = offset_x; kernel_x < kernel_width; kernel_x += stride_width) {

230

for (size_t output_tile_offset = 0; output_tile_offset < output_tile_size; output_tile_offset++) {

231

const size_t sliced_output_x = min(output_tile_start + output_tile_offset, sliced_output_width - 1);

232

const size_t output_x = output_x_start + sliced_output_x * stride_width;

233

234

assert(doz(output_x + padding_left, kernel_x) % stride_width == 0);

235

const size_t x = output_x + padding_left - kernel_x;

236

const size_t input_x = x / stride_width;

237

238

if (input_y < input_height && input_x < input_width) {

239

*indirection_buffer++ =

240

(const void*) ((uintptr_t) input + (input_y * input_width + input_x) * input_pixel_stride);

241

} else {

242

*indirection_buffer++ = zero;

}

}

}

}

}

}

}

}

}

void xnn_indirection_init_maxpool2d(

254

xnn_operator_t op,

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

255

size_t step_height,

256

size_t step_width,

257

uint32_t log2_element_size)

258

{

259

const void** indirection_buffer = op->indirection_buffer;

260

const void* input = op->input;

261

const size_t input_pixel_stride = op->input_pixel_stride << log2_element_size;

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

262

const size_t input_height = op->input_height;

263

const size_t input_width = op->input_width;

264

const size_t output_height = op->output_height;

265

const size_t output_width = op->output_width;

266

const size_t pooling_height = op->kernel_height;

267

const size_t pooling_width = op->kernel_width;

268

const size_t stride_height = op->stride_height;

269

const size_t stride_width = op->stride_width;

270

const size_t dilation_height = op->dilation_height;

271

const size_t dilation_width = op->dilation_width;

272

const size_t input_padding_top = op->padding_top;

273

const size_t input_padding_left = op->padding_left;

274

Marat Dukhan

329da64

2019-11-19 21:44:39 -0800

[diff] [blame]

275

for (size_t output_y = 0; output_y < output_height; output_y++) {

276

for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {

277

const size_t input_y = doz(output_y * stride_height + pooling_y * dilation_height, input_padding_top);

278

const size_t clamped_input_y = min(input_y, input_height - 1);

279

for (size_t output_x = 0; output_x < output_width; output_x++) {

280

for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {

281

const size_t input_x = doz(output_x * stride_width + pooling_x * dilation_width, input_padding_left);

282

const size_t clamped_input_x = min(input_x, input_width - 1);

283

const size_t index = output_y * step_height + output_x * step_width * pooling_height + pooling_x * pooling_height + pooling_y;

284

indirection_buffer[index] = input + (clamped_input_y * input_width + clamped_input_x) * input_pixel_stride;

XNNPACK Team

b455b12

2019-09-27 18:10:33 -0700

[diff] [blame]

}

}

}

}

}

Marat Dukhan

2019-11-11 19:55:50 -0800

[diff] [blame]

291

void xnn_indirection_init_resize_bilinear2d_f32(

292

size_t input_pixel_stride,

293

size_t input_height,

294

size_t input_width,

295

size_t output_height,

296

size_t output_width,

297

const void* input,

298

const void** indirection_buffer,

299

float* packed_weights,

300

bool align_corners,

301

bool tensorflow_legacy)

302

{

303

assert(input_height != 0);

304

assert(input_height < 16777216 /* 2**24 */);

305

assert(input_width != 0);

306

assert(input_width < 16777216 /* 2**24 */);

307

assert(output_height != 0);

308

assert(output_height < 16777216 /* 2**24 */);

309

assert(output_width != 0);

310

assert(output_width < 16777216 /* 2**24 */);

311

312

const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);

313

const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);

314

const float width_scale =

315

(float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);

316

const float height_scale =

317

(float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);

318

319

const uint32_t input_y_max = (uint32_t) input_height - 1;

320

const uint32_t input_x_max = (uint32_t) input_width - 1;

321

if (tensorflow_legacy) {

322

for (size_t output_y = 0; output_y < output_height; output_y++) {

323

const float input_y = (float) (int32_t) output_y * height_scale;

324

assert(input_y >= 0.0f);

325

assert(input_y < (float) input_height);

326

327

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

328

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

329

const float alpha_y = input_y - (float) input_y_top;

330

for (size_t output_x = 0; output_x < output_width; output_x++) {

331

const float input_x = (float) (int32_t) output_x * width_scale;

332

assert(input_x >= 0.0f);

333

assert(input_x < (float) input_width);

334

335

const uint32_t input_x_left = (uint32_t) (int32_t) input_x;

336

const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);

337

const float alpha_x = input_x - (float) input_x_left;

338

indirection_buffer[0] =

339

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

340

indirection_buffer[1] =

341

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);

342

indirection_buffer[2] =

343

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

344

indirection_buffer[3] =

345

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);

346

packed_weights[0] = alpha_x;

347

packed_weights[1] = alpha_y;

348

indirection_buffer += 4;

packed_weights += 2;

}

}

} else {

const float height_offset = 0.5f * height_scale - 0.5f;

354

const float width_offset = 0.5f * width_scale - 0.5f;

355

for (size_t output_y = 0; output_y < output_height; output_y++) {

356

float input_y = (float) (int32_t) output_y * height_scale + height_offset;

357

input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);

358

const uint32_t input_y_top = (uint32_t) (int32_t) input_y;

359

assert((int32_t) input_y_top >= 0);

360

const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);

361

const float alpha_y = input_y - (float) input_y_top;

362

for (size_t output_x = 0; output_x < output_width; output_x++) {

363

float input_x = (float) (int32_t) output_x * width_scale + width_offset;

364

input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);

365

const uint32_t input_x_left = (uint32_t) (int32_t) input_x;

366

assert((int32_t) input_x_left >= 0);

367

const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);

368

const float alpha_x = input_x - (float) input_x_left;

369

indirection_buffer[0] =

370

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);

371

indirection_buffer[1] =

372

(void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);

373

indirection_buffer[2] =

374

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);

375

indirection_buffer[3] =

376

(void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);

377

packed_weights[0] = alpha_x;

378

packed_weights[1] = alpha_y;

379

indirection_buffer += 4;

packed_weights += 2;

}

}

}

}

XNNPACK Team

2019-09-27 18:10:33 -0700

[diff] [blame]

386

void xnn_indirection_init_unpool2d(

387

xnn_operator_t op,

388

size_t batch_start,

389

uint32_t log2_element_size)

390

{

391

const void** indirection_buffer = op->indirection_buffer;

392

const void* output = op->output;

393

const size_t output_pixel_stride = op->output_pixel_stride << log2_element_size;

394

const size_t batch_size = op->batch_size;

395

const size_t input_height = op->input_height;

396

const size_t input_width = op->input_width;

397

const size_t output_height = op->output_height;

398

const size_t output_width = op->output_width;

399

const size_t pooling_height = op->kernel_height;

400

const size_t pooling_width = op->kernel_width;

401

const size_t output_padding_top = op->padding_top;

402

const size_t output_padding_left = op->padding_left;

403

404

for (size_t image = batch_start; image < batch_size; image++) {

405

for (size_t input_y = 0; input_y < input_height; input_y++) {

406

for (size_t pooling_y = 0; pooling_y < pooling_height; pooling_y++) {

407

const size_t output_y = min(doz(input_y * pooling_height + pooling_y, output_padding_top), output_height - 1);

408

for (size_t input_x = 0; input_x < input_width; input_x++) {

409

for (size_t pooling_x = 0; pooling_x < pooling_width; pooling_x++) {

410

const size_t output_x = min(doz(input_x * pooling_width + pooling_x, output_padding_left), output_width - 1);

411

indirection_buffer[(((image * input_height + input_y) * input_width + input_x) * pooling_width + pooling_x) * pooling_height + pooling_y] =

412

output + ((image * output_height + output_y) * output_width + output_x) * output_pixel_stride;

}

}

}

}

}

}