S8 & U8 Resize Bilinear NHWC operators
PiperOrigin-RevId: 412158448
diff --git a/src/indirection.c b/src/indirection.c
index 14b8aa9..872d4cc 100644
--- a/src/indirection.c
+++ b/src/indirection.c
@@ -7,6 +7,7 @@
// LICENSE file in the root directory of this source tree.
#include <stddef.h>
+#include <math.h>
#include <fxdiv.h>
@@ -419,6 +420,101 @@
}
}
+void xnn_indirection_init_resize_bilinear2d_hwc_q11(
+ size_t input_pixel_stride,
+ size_t input_height,
+ size_t input_width,
+ size_t output_height,
+ size_t output_width,
+ const void* input,
+ const void** indirection_buffer,
+ int16_t* packed_weights,
+ bool align_corners,
+ bool tensorflow_legacy)
+{
+ assert(input_height != 0);
+ assert(input_height < 16777216 /* 2**24 */);
+ assert(input_width != 0);
+ assert(input_width < 16777216 /* 2**24 */);
+ assert(output_height != 0);
+ assert(output_height < 16777216 /* 2**24 */);
+ assert(output_width != 0);
+ assert(output_width < 16777216 /* 2**24 */);
+
+ const int32_t width_adjustment = (int32_t) (align_corners && output_width != 1);
+ const int32_t height_adjustment = (int32_t) (align_corners && output_height != 1);
+ const float width_scale =
+ (float) ((int32_t) input_width - width_adjustment) / (float) ((int32_t) output_width - width_adjustment);
+ const float height_scale =
+ (float) ((int32_t) input_height - height_adjustment) / (float) ((int32_t) output_height - height_adjustment);
+
+ const uint32_t input_y_max = (uint32_t) input_height - 1;
+ const uint32_t input_x_max = (uint32_t) input_width - 1;
+ if (tensorflow_legacy || align_corners) {
+ for (size_t output_y = 0; output_y < output_height; output_y++) {
+ const float input_y = (float) (int32_t) output_y * height_scale;
+ assert(input_y >= 0.0f);
+ assert(input_y < (float) input_height);
+
+ const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
+ const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
+ const float alpha_y = input_y - (float) input_y_top;
+ for (size_t output_x = 0; output_x < output_width; output_x++) {
+ const float input_x = (float) (int32_t) output_x * width_scale;
+ assert(input_x >= 0.0f);
+ assert(input_x < (float) input_width);
+
+ const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
+ const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
+ const float alpha_x = input_x - (float) input_x_left;
+ indirection_buffer[0] =
+ (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
+ indirection_buffer[1] =
+ (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
+ indirection_buffer[2] =
+ (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
+ indirection_buffer[3] =
+ (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
+ packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);
+ packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);
+ indirection_buffer += 4;
+ packed_weights += 2;
+ }
+ }
+ } else {
+ const float height_offset = 0.5f * height_scale - 0.5f;
+ const float width_offset = 0.5f * width_scale - 0.5f;
+ for (size_t output_y = 0; output_y < output_height; output_y++) {
+ float input_y = (float) (int32_t) output_y * height_scale + height_offset;
+ input_y = math_min_f32(math_max_f32(input_y, 0.0f), (float) input_y_max);
+ const uint32_t input_y_top = (uint32_t) (int32_t) input_y;
+ assert((int32_t) input_y_top >= 0);
+ const uint32_t input_y_bottom = math_min_u32(input_y_top + 1, input_y_max);
+ const float alpha_y = input_y - (float) input_y_top;
+ for (size_t output_x = 0; output_x < output_width; output_x++) {
+ float input_x = (float) (int32_t) output_x * width_scale + width_offset;
+ input_x = math_min_f32(math_max_f32(input_x, 0.0f), (float) input_x_max);
+ const uint32_t input_x_left = (uint32_t) (int32_t) input_x;
+ assert((int32_t) input_x_left >= 0);
+ const uint32_t input_x_right = math_min_u32(input_x_left + 1, input_x_max);
+ const float alpha_x = input_x - (float) input_x_left;
+ indirection_buffer[0] =
+ (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_left) * input_pixel_stride);
+ indirection_buffer[1] =
+ (void*) ((uintptr_t) input + (input_y_top * input_width + input_x_right) * input_pixel_stride);
+ indirection_buffer[2] =
+ (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_left) * input_pixel_stride);
+ indirection_buffer[3] =
+ (void*) ((uintptr_t) input + (input_y_bottom * input_width + input_x_right) * input_pixel_stride);
+ packed_weights[0] = (int16_t) lrintf(alpha_x * 0x1.0p+11f);
+ packed_weights[1] = (int16_t) lrintf(alpha_y * 0x1.0p+11f);
+ indirection_buffer += 4;
+ packed_weights += 2;
+ }
+ }
+ }
+}
+
void xnn_indirection_init_resize_bilinear2d_chw_f32(
size_t input_pixel_stride,
size_t input_height,
diff --git a/src/operator-strings.c b/src/operator-strings.c
index 3db439f..5e4cffb 100644
--- a/src/operator-strings.c
+++ b/src/operator-strings.c
@@ -138,6 +138,10 @@
return "PReLU (NC, F32)";
case xnn_operator_type_resize_bilinear_nhwc_f32:
return "Resize Bilinear (NHWC, F32)";
+ case xnn_operator_type_resize_bilinear_nhwc_s8:
+ return "Resize Bilinear (NHWC, S8)";
+ case xnn_operator_type_resize_bilinear_nhwc_u8:
+ return "Resize Bilinear (NHWC, U8)";
case xnn_operator_type_resize_bilinear_nchw_f32:
return "Resize Bilinear (NCHW, F32)";
case xnn_operator_type_sigmoid_nc_f32:
diff --git a/src/operators/resize-bilinear-nhwc.c b/src/operators/resize-bilinear-nhwc.c
index 9a898dd..ac1836d 100644
--- a/src/operators/resize-bilinear-nhwc.c
+++ b/src/operators/resize-bilinear-nhwc.c
@@ -21,11 +21,12 @@
#include <xnnpack/indirection.h>
-enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
+static enum xnn_status create_resize_bilinear2d_nhwc(
size_t channels,
size_t input_pixel_stride,
size_t output_pixel_stride,
uint32_t flags,
+ enum xnn_operator_type operator_type,
xnn_operator_t* resize_op_out)
{
xnn_operator_t resize_op = NULL;
@@ -33,7 +34,7 @@
if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
xnn_log_error("failed to create %s operator: XNNPACK is not initialized",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
+ xnn_operator_type_to_string(operator_type));
goto error;
}
@@ -42,7 +43,7 @@
if (channels == 0) {
xnn_log_error(
"failed to create %s operator with %zu channels: number of channels must be non-zero",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), channels);
+ xnn_operator_type_to_string(operator_type), channels);
goto error;
}
@@ -50,7 +51,7 @@
xnn_log_error(
"failed to create %s operator with input pixel stride of %zu: "
"stride must be at least as large as the number of channels (%zu)",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), input_pixel_stride, channels);
+ xnn_operator_type_to_string(operator_type), input_pixel_stride, channels);
goto error;
}
@@ -58,7 +59,7 @@
xnn_log_error(
"failed to create %s operator with output pixel stride of %zu: "
"stride must be at least as large as the number of channels (%zu)",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), output_pixel_stride, channels);
+ xnn_operator_type_to_string(operator_type), output_pixel_stride, channels);
goto error;
}
@@ -68,7 +69,7 @@
if (resize_op == NULL) {
xnn_log_error(
"failed to allocate %zu bytes for %s operator descriptor",
- sizeof(struct xnn_operator), xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
+ sizeof(struct xnn_operator), xnn_operator_type_to_string(operator_type));
goto error;
}
@@ -76,7 +77,7 @@
resize_op->input_pixel_stride = input_pixel_stride;
resize_op->output_pixel_stride = output_pixel_stride;
- resize_op->type = xnn_operator_type_resize_bilinear_nhwc_f32;
+ resize_op->type = operator_type;
resize_op->flags = flags;
resize_op->state = xnn_run_state_invalid;
@@ -89,20 +90,73 @@
return status;
}
-enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
+enum xnn_status xnn_create_resize_bilinear2d_nhwc_f32(
+ size_t channels,
+ size_t input_pixel_stride,
+ size_t output_pixel_stride,
+ uint32_t flags,
+ xnn_operator_t* resize_op_out)
+{
+ return create_resize_bilinear2d_nhwc(
+ channels,
+ input_pixel_stride,
+ output_pixel_stride,
+ flags,
+ xnn_operator_type_resize_bilinear_nhwc_f32,
+ resize_op_out);
+}
+
+enum xnn_status xnn_create_resize_bilinear2d_nhwc_s8(
+ size_t channels,
+ size_t input_pixel_stride,
+ size_t output_pixel_stride,
+ uint32_t flags,
+ xnn_operator_t* resize_op_out)
+{
+ return create_resize_bilinear2d_nhwc(
+ channels,
+ input_pixel_stride,
+ output_pixel_stride,
+ flags,
+ xnn_operator_type_resize_bilinear_nhwc_s8,
+ resize_op_out);
+}
+
+enum xnn_status xnn_create_resize_bilinear2d_nhwc_u8(
+ size_t channels,
+ size_t input_pixel_stride,
+ size_t output_pixel_stride,
+ uint32_t flags,
+ xnn_operator_t* resize_op_out)
+{
+ return create_resize_bilinear2d_nhwc(
+ channels,
+ input_pixel_stride,
+ output_pixel_stride,
+ flags,
+ xnn_operator_type_resize_bilinear_nhwc_u8,
+ resize_op_out);
+}
+
+static enum xnn_status setup_resize_bilinear2d_nhwc(
xnn_operator_t resize_op,
+ enum xnn_operator_type expected_operator_type,
size_t batch_size,
size_t input_height,
size_t input_width,
size_t output_height,
size_t output_width,
- const float* input,
- float* output,
- pthreadpool_t threadpool)
+ const void* input,
+ void* output,
+ uint32_t log2_element_size,
+ uint32_t log2_weight_element_size,
+ xnn_indirection_init_resize_bilinear2d_hwc_fn indirection_init,
+ const struct ibilinear_parameters ibilinear[restrict XNN_MIN_ELEMENTS(1)],
+ size_t num_threads)
{
- if (resize_op->type != xnn_operator_type_resize_bilinear_nhwc_f32) {
+ if (resize_op->type != expected_operator_type) {
xnn_log_error("failed to setup operator: operator type mismatch (expected %s, got %s)",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32),
+ xnn_operator_type_to_string(expected_operator_type),
xnn_operator_type_to_string(resize_op->type));
return xnn_status_invalid_parameter;
}
@@ -110,35 +164,35 @@
if ((xnn_params.init_flags & XNN_INIT_FLAG_XNNPACK) == 0) {
xnn_log_error("failed to setup %s operator: XNNPACK is not initialized",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
+ xnn_operator_type_to_string(resize_op->type));
return xnn_status_uninitialized;
}
if (input_width == 0 || input_height == 0) {
xnn_log_error(
"failed to setup %s operator with %zux%zu input: input dimensions must be non-zero",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), input_width, input_height);
+ xnn_operator_type_to_string(resize_op->type), input_width, input_height);
return xnn_status_invalid_parameter;
}
if (max(input_width, input_height) >= 16777216) {
xnn_log_error(
"failed to setup %s operator with %zux%zu input: input dimensions must be below 2**24",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), input_width, input_height);
+ xnn_operator_type_to_string(resize_op->type), input_width, input_height);
return xnn_status_unsupported_parameter;
}
if (output_width == 0 || output_height == 0) {
xnn_log_error(
"failed to setup %s operator with %zux%zu output: output dimensions must be non-zero",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), output_width, output_height);
+ xnn_operator_type_to_string(resize_op->type), output_width, output_height);
return xnn_status_invalid_parameter;
}
if (max(output_width, output_height) >= 16777216) {
xnn_log_error(
"failed to setup %s operator with %zux%zu output: output dimensions must be below 2**24",
- xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32), output_width, output_height);
+ xnn_operator_type_to_string(resize_op->type), output_width, output_height);
return xnn_status_unsupported_parameter;
}
@@ -149,13 +203,13 @@
if (output_height * output_width != resize_op->last_output_height * resize_op->last_output_width) {
const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
- const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
+ const size_t packed_weights_size = (output_height * output_width * 2) << log2_weight_element_size;
const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error(
"failed to allocate %zu bytes for %s operator indirection buffer",
- indirection_buffer_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
+ indirection_buffer_size, xnn_operator_type_to_string(resize_op->type));
return xnn_status_out_of_memory;
}
resize_op->indirection_buffer = indirection_buffer;
@@ -166,19 +220,19 @@
if (resize_op->packed_weights == NULL) {
xnn_log_error(
"failed to allocate %zu bytes for %s operator packed weights",
- packed_weights_size, xnn_operator_type_to_string(xnn_operator_type_resize_bilinear_nhwc_f32));
+ packed_weights_size, xnn_operator_type_to_string(resize_op->type));
return xnn_status_out_of_memory;
}
}
- const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride * sizeof(float);
+ const size_t input_pixel_stride_in_bytes = resize_op->input_pixel_stride << log2_element_size;
if (input_height != resize_op->last_input_height ||
input_width != resize_op->last_input_width ||
output_height != resize_op->last_output_height ||
output_width != resize_op->last_output_width)
{
const uint32_t flags = resize_op->flags;
- xnn_indirection_init_resize_bilinear2d_hwc_f32(
+ indirection_init(
input_pixel_stride_in_bytes,
input_height, input_width,
output_height, output_width,
@@ -193,9 +247,9 @@
resize_op->last_output_width = output_width;
}
- const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride * sizeof(float);
+ const size_t output_pixel_stride_in_bytes = resize_op->output_pixel_stride << log2_element_size;
resize_op->context.resize_bilinear = (struct resize_bilinear_context) {
- .scaled_channels = resize_op->channels * sizeof(float),
+ .scaled_channels = resize_op->channels << log2_element_size,
.indirect_input = resize_op->indirection_buffer,
.input_offset = (size_t) ((uintptr_t) input - (uintptr_t) resize_op->last_input),
.input_batch_stride = input_pixel_stride_in_bytes * input_height * input_width,
@@ -203,18 +257,17 @@
.output = output,
.output_pixel_stride = output_pixel_stride_in_bytes,
.output_batch_stride = output_pixel_stride_in_bytes * output_height * output_width,
- .log2_wsize = 3 /* log2(2 * sizeof(float)) */,
- .ukernel = xnn_params.f32.ibilinear.ukernel,
+ .log2_wsize = 1 + log2_weight_element_size /* log2(2 * sizeof(weight)) */,
+ .ukernel = ibilinear->ukernel,
};
const size_t output_size = output_height * output_width;
size_t output_size_tile = output_size;
- const size_t num_threads = pthreadpool_get_threads_count(threadpool);
if (num_threads > 1) {
const size_t target_tiles_per_thread = 5;
const size_t max_output_size_tile = divide_round_up(output_size, num_threads * target_tiles_per_thread);
if (max_output_size_tile < output_size_tile) {
- const uint32_t output_size_subtile = xnn_params.f32.ibilinear.pixel_tile;
+ const uint32_t output_size_subtile = ibilinear->pixel_tile;
output_size_tile =
min(output_size_tile,
divide_round_up(output_size_tile, max_output_size_tile * output_size_subtile) * output_size_subtile);
@@ -229,3 +282,87 @@
return xnn_status_success;
}
+
+enum xnn_status xnn_setup_resize_bilinear2d_nhwc_f32(
+ xnn_operator_t resize_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ size_t output_height,
+ size_t output_width,
+ const float* input,
+ float* output,
+ pthreadpool_t threadpool)
+{
+ return setup_resize_bilinear2d_nhwc(
+ resize_op,
+ xnn_operator_type_resize_bilinear_nhwc_f32,
+ batch_size,
+ input_height,
+ input_width,
+ output_height,
+ output_width,
+ input,
+ output,
+ 2 /* log2(element size) == log2(sizeof(float)) */,
+ 2 /* log2(weight element size) == log2(sizeof(float)) */,
+ (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_f32,
+ &xnn_params.f32.ibilinear,
+ pthreadpool_get_threads_count(threadpool));
+}
+
+enum xnn_status xnn_setup_resize_bilinear2d_nhwc_s8(
+ xnn_operator_t resize_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ size_t output_height,
+ size_t output_width,
+ const int8_t* input,
+ int8_t* output,
+ pthreadpool_t threadpool)
+{
+ return setup_resize_bilinear2d_nhwc(
+ resize_op,
+ xnn_operator_type_resize_bilinear_nhwc_s8,
+ batch_size,
+ input_height,
+ input_width,
+ output_height,
+ output_width,
+ input,
+ output,
+ 0 /* log2(element size) == log2(sizeof(int8_t)) */,
+ 1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
+ (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
+ &xnn_params.s8.ibilinear,
+ pthreadpool_get_threads_count(threadpool));
+}
+
+enum xnn_status xnn_setup_resize_bilinear2d_nhwc_u8(
+ xnn_operator_t resize_op,
+ size_t batch_size,
+ size_t input_height,
+ size_t input_width,
+ size_t output_height,
+ size_t output_width,
+ const uint8_t* input,
+ uint8_t* output,
+ pthreadpool_t threadpool)
+{
+ return setup_resize_bilinear2d_nhwc(
+ resize_op,
+ xnn_operator_type_resize_bilinear_nhwc_u8,
+ batch_size,
+ input_height,
+ input_width,
+ output_height,
+ output_width,
+ input,
+ output,
+ 0 /* log2(element size) == log2(sizeof(uint8_t)) */,
+ 1 /* log2(weight element size) == log2(sizeof(int16_t)) */,
+ (xnn_indirection_init_resize_bilinear2d_hwc_fn) xnn_indirection_init_resize_bilinear2d_hwc_q11,
+ &xnn_params.u8.ibilinear,
+ pthreadpool_get_threads_count(threadpool));
+}
diff --git a/src/xnnpack/indirection.h b/src/xnnpack/indirection.h
index a342df2..67bda52 100644
--- a/src/xnnpack/indirection.h
+++ b/src/xnnpack/indirection.h
@@ -58,6 +58,30 @@
bool align_corners,
bool tensorflow_legacy);
+typedef void (*xnn_indirection_init_resize_bilinear2d_hwc_fn)(
+ size_t input_pixel_stride,
+ size_t input_height,
+ size_t input_width,
+ size_t output_height,
+ size_t output_width,
+ const void* input,
+ const void** indirection_buffer,
+ void* packed_weights,
+ bool align_corners,
+ bool tensorflow_legacy);
+
+XNN_INTERNAL void xnn_indirection_init_resize_bilinear2d_hwc_q11(
+ size_t input_pixel_stride,
+ size_t input_height,
+ size_t input_width,
+ size_t output_height,
+ size_t output_width,
+ const void* input,
+ const void** indirection_buffer,
+ int16_t* packed_weights,
+ bool align_corners,
+ bool tensorflow_legacy);
+
XNN_INTERNAL void xnn_indirection_init_resize_bilinear2d_chw_f32(
size_t input_pixel_stride,
size_t input_height,
diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h
index bf23dfd..7c0a4a0 100644
--- a/src/xnnpack/operator.h
+++ b/src/xnnpack/operator.h
@@ -93,6 +93,8 @@
xnn_operator_type_prelu_nc_f32,
xnn_operator_type_resize_bilinear_nchw_f32,
xnn_operator_type_resize_bilinear_nhwc_f32,
+ xnn_operator_type_resize_bilinear_nhwc_s8,
+ xnn_operator_type_resize_bilinear_nhwc_u8,
xnn_operator_type_sigmoid_nc_f32,
xnn_operator_type_sigmoid_nc_qs8,
xnn_operator_type_sigmoid_nc_qu8,