Support overriding memory allocation functions
- Let users provide their own memory management functions for XNNPACK
PiperOrigin-RevId: 281355722
diff --git a/src/add-nc.c b/src/add-nc.c
index 8fa534e..8a86617 100644
--- a/src/add-nc.c
+++ b/src/add-nc.c
@@ -122,7 +122,7 @@
status = xnn_status_out_of_memory;
- add_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ add_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (add_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Add operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -222,7 +222,7 @@
status = xnn_status_out_of_memory;
- add_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ add_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (add_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Add operator descriptor", sizeof(struct xnn_operator));
goto error;
diff --git a/src/argmax-pooling-nhwc.c b/src/argmax-pooling-nhwc.c
index d4902ef..e4c359a 100644
--- a/src/argmax-pooling-nhwc.c
+++ b/src/argmax-pooling-nhwc.c
@@ -128,7 +128,7 @@
status = xnn_status_out_of_memory;
- argmax_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ argmax_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (argmax_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Argmax Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -236,7 +236,7 @@
// Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
- const void** indirection_buffer = (const void**) realloc(argmax_pooling_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(argmax_pooling_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
diff --git a/src/average-pooling-nhwc.c b/src/average-pooling-nhwc.c
index a6b3584..20ff0c5 100644
--- a/src/average-pooling-nhwc.c
+++ b/src/average-pooling-nhwc.c
@@ -155,7 +155,7 @@
status = xnn_status_out_of_memory;
- average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (average_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -165,7 +165,7 @@
const uint32_t mr = xnn_params.q8.avgpool.mr;
const uint32_t qr = xnn_params.q8.avgpool.qr;
if (any_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
- void* zero_buffer = xnn_allocate_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
+ void* zero_buffer = xnn_allocate_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
@@ -303,7 +303,7 @@
status = xnn_status_out_of_memory;
- average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (average_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -313,13 +313,12 @@
const uint32_t mr = xnn_params.f32.avgpool.mr;
const uint32_t qr = xnn_params.f32.avgpool.qr;
if (any_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
- void* zero_buffer = xnn_allocate_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
+ void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
channels * sizeof(float) + XNN_EXTRA_BYTES);
goto error;
}
- memset(zero_buffer, 0, channels * sizeof(float));
average_pooling_op->zero_buffer = zero_buffer;
}
@@ -431,7 +430,7 @@
const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
- const void** indirection_buffer = (const void**) realloc(average_pooling_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
@@ -559,7 +558,7 @@
const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
- const void** indirection_buffer = (const void**) realloc(average_pooling_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
@@ -611,7 +610,7 @@
input_width != average_pooling_op->last_input_width)
{
const size_t pixelwise_buffer_size = output_height * output_width * sizeof(float);
- float* pixelwise_buffer = (float*) realloc(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
+ float* pixelwise_buffer = (float*) xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
if (pixelwise_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for pixelwise buffer", pixelwise_buffer_size);
return xnn_status_out_of_memory;
diff --git a/src/channel-pad-nc.c b/src/channel-pad-nc.c
index 608c4d8..b40facf 100644
--- a/src/channel-pad-nc.c
+++ b/src/channel-pad-nc.c
@@ -62,7 +62,7 @@
status = xnn_status_out_of_memory;
- channel_pad_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ channel_pad_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (channel_pad_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Channel Pad operator descriptor", sizeof(struct xnn_operator));
goto error;
diff --git a/src/channel-shuffle-nc.c b/src/channel-shuffle-nc.c
index c2fd2f1..fc01b89 100644
--- a/src/channel-shuffle-nc.c
+++ b/src/channel-shuffle-nc.c
@@ -70,7 +70,7 @@
status = xnn_status_out_of_memory;
- channel_shuffle_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ channel_shuffle_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (channel_shuffle_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Channel Shuffle operator descriptor", sizeof(struct xnn_operator));
goto error;
diff --git a/src/clamp-nc.c b/src/clamp-nc.c
index 852d926..97c1f74 100644
--- a/src/clamp-nc.c
+++ b/src/clamp-nc.c
@@ -69,7 +69,7 @@
status = xnn_status_out_of_memory;
- clamp_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (clamp_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -155,7 +155,7 @@
status = xnn_status_out_of_memory;
- clamp_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (clamp_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
goto error;
diff --git a/src/convolution-nchw.c b/src/convolution-nchw.c
index 24c61a6..5b55553 100644
--- a/src/convolution-nchw.c
+++ b/src/convolution-nchw.c
@@ -193,7 +193,7 @@
status = xnn_status_out_of_memory;
- convolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ convolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (convolution_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Convolution operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -273,7 +273,7 @@
const size_t packed_weights_size = num_output_channel_blocks * sizeof(uint32_t) +
(num_nonzero_blocks * 2) * sizeof(int32_t) + (num_nonzero_values + group_output_channels) * sizeof(float);
- convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
goto error;
@@ -384,7 +384,7 @@
round_up(group_output_channels, xnn_params.f32.hwc2spchw_dconv3x3c3s2.output_channel_tile);
const size_t packed_weights_size = groups * packed_group_output_channels *
(group_input_channels * kernel_height * kernel_width + 1 /* bias */) * sizeof(float);
- convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
goto error;
@@ -412,7 +412,7 @@
assert(group_output_channels == 1);
const size_t packed_weights_size = groups * (kernel_height * kernel_width + 1 /* bias */) * sizeof(float);
- convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
goto error;
@@ -608,7 +608,7 @@
case xnn_ukernel_type_dconv2d_hwc2spchw:
{
const size_t zero_size = (input_width * convolution_op->group_input_channels << log2_input_element_size) + XNN_EXTRA_BYTES;
- void* zero_buffer = realloc(convolution_op->zero_buffer, zero_size);
+ void* zero_buffer = xnn_reallocate_memory(convolution_op->zero_buffer, zero_size);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for zero padding", sizeof(struct xnn_operator));
return xnn_status_out_of_memory;
diff --git a/src/convolution-nhwc.c b/src/convolution-nhwc.c
index 946cd6d..ff8f199 100644
--- a/src/convolution-nhwc.c
+++ b/src/convolution-nhwc.c
@@ -221,7 +221,7 @@
status = xnn_status_out_of_memory;
- convolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ convolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (convolution_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Convolution operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -250,7 +250,7 @@
const uint32_t c_stride = round_up_po2(groups, dwconv_parameters->cr);
const size_t packed_weights_size = (sizeof(uint8_t) * kernel_size + sizeof(int32_t)) * c_stride;
- convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
goto error;
@@ -289,7 +289,7 @@
const size_t packed_group_weights_size =
(sizeof(uint8_t) * kernel_size * k_stride + sizeof(int32_t)) * n_stride;
- convolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
goto error;
@@ -344,7 +344,7 @@
const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && kernel_size != 1;
if (any_padding || tf_same_padding) {
- void* zero_buffer = xnn_allocate_memory(zero_size);
+ void* zero_buffer = xnn_allocate_simd_memory(zero_size);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
goto error;
@@ -530,7 +530,7 @@
status = xnn_status_out_of_memory;
- convolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ convolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (convolution_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Convolution operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -559,7 +559,7 @@
{
const uint32_t c_stride = round_up_po2(groups, xnn_params.f32.vmulcaddc.channel_tile);
const size_t packed_weights_size = 2 * sizeof(float) * c_stride;
- convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
goto error;
@@ -582,7 +582,7 @@
const uint32_t c_stride = round_up_po2(groups, dwconv_parameters->cr);
const size_t packed_weights_size = (kernel_size + 1) * sizeof(float) * c_stride;
- convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
goto error;
@@ -619,7 +619,7 @@
const uint32_t k_stride = round_up_po2(group_input_channels, kr);
const size_t packed_group_weights_size = (kernel_size * k_stride + 1) * sizeof(float) * n_stride;
- convolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+ convolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
if (convolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
goto error;
@@ -673,7 +673,7 @@
const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && kernel_size != 1;
if (any_padding || tf_same_padding) {
- void* zero_buffer = xnn_allocate_zero_memory(zero_size);
+ void* zero_buffer = xnn_allocate_zero_simd_memory(zero_size);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
goto error;
@@ -876,7 +876,7 @@
if (input_height != convolution_op->last_input_height ||
input_width != convolution_op->last_input_width)
{
- const void** indirection_buffer = (const void**) realloc(convolution_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(convolution_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
@@ -970,7 +970,7 @@
const size_t indirection_buffer_size = sizeof(void*) * batch_size * output_height * step_height;
const void** indirection_buffer =
- (const void**) realloc(convolution_op->indirection_buffer, indirection_buffer_size);
+ (const void**) xnn_reallocate_memory(convolution_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
diff --git a/src/deconvolution-nhwc.c b/src/deconvolution-nhwc.c
index 2aaed66..3b3b091 100644
--- a/src/deconvolution-nhwc.c
+++ b/src/deconvolution-nhwc.c
@@ -181,7 +181,7 @@
status = xnn_status_out_of_memory;
- deconvolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ deconvolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (deconvolution_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Deconvolution operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -204,7 +204,7 @@
(sizeof(uint8_t) * kernel_size * k_stride + sizeof(int32_t) * subkernels);
const size_t subconvolution_buffer_size = sizeof(struct subconvolution_params) * subkernels;
- deconvolution_op->subconvolution_buffer = xnn_allocate_zero_memory(subconvolution_buffer_size);
+ deconvolution_op->subconvolution_buffer = xnn_allocate_zero_simd_memory(subconvolution_buffer_size);
if (deconvolution_op->subconvolution_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for subconvolution buffer", subconvolution_buffer_size);
goto error;
@@ -223,7 +223,7 @@
}
}
}
- deconvolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+ deconvolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
if (deconvolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
goto error;
@@ -251,7 +251,7 @@
}
size_t zero_size = sizeof(uint8_t) * k_stride + XNN_EXTRA_BYTES;
- void* zero_buffer = xnn_allocate_memory(zero_size);
+ void* zero_buffer = xnn_allocate_simd_memory(zero_size);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
goto error;
@@ -419,7 +419,7 @@
status = xnn_status_out_of_memory;
- deconvolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ deconvolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (deconvolution_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Deconvolution operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -453,7 +453,7 @@
(sizeof(float) * kernel_size * k_stride + sizeof(float) * subkernels);
const size_t subconvolution_buffer_size = sizeof(struct subconvolution_params) * subkernels;
- deconvolution_op->subconvolution_buffer = xnn_allocate_zero_memory(subconvolution_buffer_size);
+ deconvolution_op->subconvolution_buffer = xnn_allocate_zero_simd_memory(subconvolution_buffer_size);
if (deconvolution_op->subconvolution_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for subconvolution buffer", subconvolution_buffer_size);
goto error;
@@ -472,7 +472,7 @@
}
}
}
- deconvolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+ deconvolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
if (deconvolution_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
goto error;
@@ -498,7 +498,7 @@
}
const size_t zero_size = k_stride * sizeof(float) + XNN_EXTRA_BYTES;
- void* zero_buffer = xnn_allocate_zero_memory(zero_size);
+ void* zero_buffer = xnn_allocate_zero_simd_memory(zero_size);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
goto error;
@@ -574,7 +574,7 @@
if (input_height != deconvolution_op->last_input_height ||
input_width != deconvolution_op->last_input_width)
{
- const void** indirection_buffer = (const void**) realloc(deconvolution_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(deconvolution_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
@@ -681,7 +681,7 @@
if (input_height != deconvolution_op->last_input_height ||
input_width != deconvolution_op->last_input_width)
{
- const void** indirection_buffer = (const void**) realloc(deconvolution_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(deconvolution_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
diff --git a/src/fully-connected-nc.c b/src/fully-connected-nc.c
index 3913934..9399a80 100644
--- a/src/fully-connected-nc.c
+++ b/src/fully-connected-nc.c
@@ -123,7 +123,7 @@
status = xnn_status_out_of_memory;
- fully_connected_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ fully_connected_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (fully_connected_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Fully Connected operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -135,7 +135,7 @@
const uint32_t n_stride = round_up(output_channels, nr);
const uint32_t k_stride = round_up_po2(input_channels, kr);
- fully_connected_op->packed_weights = xnn_allocate_memory(n_stride * (k_stride * sizeof(uint8_t) + sizeof(int32_t)));
+ fully_connected_op->packed_weights = xnn_allocate_simd_memory(n_stride * (k_stride * sizeof(uint8_t) + sizeof(int32_t)));
if (fully_connected_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights",
n_stride * (k_stride * sizeof(uint8_t) + sizeof(int32_t)));
@@ -255,7 +255,7 @@
status = xnn_status_out_of_memory;
- fully_connected_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ fully_connected_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (fully_connected_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Fully Connected operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -268,7 +268,7 @@
const uint32_t n_stride = round_up(output_channels, nr);
const uint32_t k_stride = round_up_po2(input_channels, kr);
- fully_connected_op->packed_weights = xnn_allocate_memory(n_stride * (k_stride * sizeof(float) + sizeof(float)));
+ fully_connected_op->packed_weights = xnn_allocate_simd_memory(n_stride * (k_stride * sizeof(float) + sizeof(float)));
if (fully_connected_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights",
n_stride * (k_stride * sizeof(float) + sizeof(float)));
diff --git a/src/global-average-pooling-ncw.c b/src/global-average-pooling-ncw.c
index 6bf0dc1..2f6db07 100644
--- a/src/global-average-pooling-ncw.c
+++ b/src/global-average-pooling-ncw.c
@@ -71,7 +71,7 @@
status = xnn_status_out_of_memory;
- global_average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (global_average_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
diff --git a/src/global-average-pooling-nwc.c b/src/global-average-pooling-nwc.c
index 6bf781f..5f757ab 100644
--- a/src/global-average-pooling-nwc.c
+++ b/src/global-average-pooling-nwc.c
@@ -103,13 +103,13 @@
status = xnn_status_out_of_memory;
- global_average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (global_average_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
}
- void* zero_buffer = xnn_allocate_zero_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
+ void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for Global Average Pooling zero padding",
channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
@@ -204,13 +204,13 @@
status = xnn_status_out_of_memory;
- global_average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (global_average_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
}
- void* zero_buffer = xnn_allocate_zero_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
+ void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
if (zero_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for Global Average Pooling zero padding",
channels * sizeof(float) + XNN_EXTRA_BYTES);
diff --git a/src/hardswish-nc.c b/src/hardswish-nc.c
index 4253c08..c3f4b6c 100644
--- a/src/hardswish-nc.c
+++ b/src/hardswish-nc.c
@@ -9,6 +9,7 @@
#include <stdlib.h>
#include <xnnpack.h>
+#include <xnnpack/allocator.h>
#include <xnnpack/log.h>
#include <xnnpack/operator.h>
#include <xnnpack/params-init.h>
@@ -56,7 +57,7 @@
status = xnn_status_out_of_memory;
- hardswish_op = calloc(1, sizeof(struct xnn_operator));
+ hardswish_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (hardswish_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for xnn_operator structure", sizeof(struct xnn_operator));
goto error;
diff --git a/src/init.c b/src/init.c
index f43fcf1..f63d7a8 100644
--- a/src/init.c
+++ b/src/init.c
@@ -9,6 +9,7 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
+#include <string.h>
#include <pthread.h>
@@ -31,6 +32,7 @@
#include <xnnpack/log.h>
#include <xnnpack/lut.h>
#include <xnnpack/maxpool.h>
+#include <xnnpack/memory.h>
#include <xnnpack/pad.h>
#include <xnnpack/params.h>
#include <xnnpack/pavgpool.h>
@@ -1150,7 +1152,7 @@
xnn_params.initialized = true;
}
-enum xnn_status xnn_initialize(void) {
+enum xnn_status xnn_initialize(const struct xnn_allocator* allocator) {
#ifndef __EMSCRIPTEN__
if (!cpuinfo_initialize()) {
return xnn_status_out_of_memory;
@@ -1158,6 +1160,15 @@
#endif
pthread_once(&init_guard, &init);
if (xnn_params.initialized) {
+ if (allocator != NULL) {
+ memcpy(&xnn_params.allocator, allocator, sizeof(struct xnn_allocator));
+ } else {
+ xnn_params.allocator.allocate = &xnn_allocate;
+ xnn_params.allocator.reallocate = &xnn_reallocate;
+ xnn_params.allocator.deallocate = &xnn_deallocate;
+ xnn_params.allocator.aligned_allocate = &xnn_aligned_allocate;
+ xnn_params.allocator.aligned_deallocate = &xnn_aligned_deallocate;
+ }
return xnn_status_success;
} else {
return xnn_status_unsupported_hardware;
diff --git a/src/leaky-relu-nc.c b/src/leaky-relu-nc.c
index 860f44a..fda6b3a 100644
--- a/src/leaky-relu-nc.c
+++ b/src/leaky-relu-nc.c
@@ -112,13 +112,13 @@
status = xnn_status_out_of_memory;
- leaky_relu_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ leaky_relu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (leaky_relu_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Leaky ReLU operator descriptor", sizeof(struct xnn_operator));
goto error;
}
- leaky_relu_op->lookup_table = xnn_allocate_memory(256 * sizeof(uint8_t));
+ leaky_relu_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
if (leaky_relu_op->lookup_table == NULL) {
xnn_log_error("failed to allocate 256 bytes for Leaky ReLU lookup table");
goto error;
diff --git a/src/max-pooling-nhwc.c b/src/max-pooling-nhwc.c
index fb159bb..8f13af6 100644
--- a/src/max-pooling-nhwc.c
+++ b/src/max-pooling-nhwc.c
@@ -128,7 +128,7 @@
status = xnn_status_out_of_memory;
- max_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (max_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -269,7 +269,7 @@
status = xnn_status_out_of_memory;
- max_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (max_pooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -380,7 +380,7 @@
const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
- const void** indirection_buffer = (const void**) realloc(max_pooling_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
@@ -502,7 +502,7 @@
const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
- const void** indirection_buffer = (const void**) realloc(max_pooling_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
diff --git a/src/memory.c b/src/memory.c
new file mode 100644
index 0000000..a9683bd
--- /dev/null
+++ b/src/memory.c
@@ -0,0 +1,53 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#ifdef __ANDROID__
+ #include <malloc.h>
+#endif
+
+#include <xnnpack/common.h>
+#include <xnnpack/memory.h>
+
+
+extern int posix_memalign(void **memptr, size_t alignment, size_t size);
+
+
+void* xnn_allocate(void* context, size_t size) {
+ return malloc(size);
+}
+
+void* xnn_reallocate(void* context, void* pointer, size_t size) {
+ return realloc(pointer, size);
+}
+
+void xnn_deallocate(void* context, void* pointer) {
+ if XNN_LIKELY(pointer != NULL) {
+ free(pointer);
+ }
+}
+
+void* xnn_aligned_allocate(void* context, size_t alignment, size_t size) {
+#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
+ assert(alignment <= 2 * sizeof(void*));
+ return malloc(size);
+#elif defined(__ANDROID__)
+ return memalign(alignment, size);
+#else
+ void* memory_ptr = NULL;
+ if (posix_memalign(&memory_ptr, alignment, size) != 0) {
+ return NULL;
+ }
+ return memory_ptr;
+#endif
+}
+
+void xnn_aligned_deallocate(void* context, void* pointer) {
+ if XNN_LIKELY(pointer != NULL) {
+ free(pointer);
+ }
+}
diff --git a/src/multiply-nd.c b/src/multiply-nd.c
index 9070648..27dc4cf 100644
--- a/src/multiply-nd.c
+++ b/src/multiply-nd.c
@@ -54,7 +54,7 @@
status = xnn_status_out_of_memory;
- multiply_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ multiply_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (multiply_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Multiply operator descriptor", sizeof(struct xnn_operator));
goto error;
diff --git a/src/operator-delete.c b/src/operator-delete.c
index 9d225b5..3e0eeaa 100644
--- a/src/operator-delete.c
+++ b/src/operator-delete.c
@@ -26,12 +26,12 @@
return xnn_status_invalid_parameter;
}
- free(op->indirection_buffer);
- xnn_release_memory(op->packed_weights);
- xnn_release_memory(op->zero_buffer);
- free(op->pixelwise_buffer);
- free(op->subconvolution_buffer);
- xnn_release_memory(op->lookup_table);
- xnn_release_memory(op);
+ xnn_release_memory(op->indirection_buffer);
+ xnn_release_simd_memory(op->packed_weights);
+ xnn_release_simd_memory(op->zero_buffer);
+ xnn_release_memory(op->pixelwise_buffer);
+ xnn_release_memory(op->subconvolution_buffer);
+ xnn_release_simd_memory(op->lookup_table);
+ xnn_release_simd_memory(op);
return xnn_status_success;
}
diff --git a/src/prelu-nc.c b/src/prelu-nc.c
index 9b71362..49a122a 100644
--- a/src/prelu-nc.c
+++ b/src/prelu-nc.c
@@ -68,14 +68,14 @@
status = xnn_status_out_of_memory;
- prelu_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ prelu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (prelu_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for PReLU operator descriptor", sizeof(struct xnn_operator));
goto error;
}
const size_t packed_channels = round_up_po2(channels, XNN_EXTRA_BYTES / sizeof(float));
- prelu_op->packed_weights = xnn_allocate_memory(packed_channels * sizeof(float));
+ prelu_op->packed_weights = xnn_allocate_simd_memory(packed_channels * sizeof(float));
if (prelu_op->packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed slope data",
packed_channels * sizeof(float));
diff --git a/src/resize-bilinear-nhwc.c b/src/resize-bilinear-nhwc.c
index 363bbba..f8ff604 100644
--- a/src/resize-bilinear-nhwc.c
+++ b/src/resize-bilinear-nhwc.c
@@ -62,7 +62,7 @@
status = xnn_status_out_of_memory;
- resize_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (resize_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Resize Bilinear operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -147,14 +147,14 @@
const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
- const void** indirection_buffer = (const void**) realloc(resize_op->indirection_buffer, indirection_buffer_size);
+ const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
}
resize_op->indirection_buffer = indirection_buffer;
- float* packed_weights = (float*) realloc(resize_op->packed_weights, packed_weights_size);
+ float* packed_weights = (float*) xnn_reallocate_memory(resize_op->packed_weights, packed_weights_size);
if (packed_weights == NULL) {
xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
return xnn_status_out_of_memory;
diff --git a/src/sigmoid-nc.c b/src/sigmoid-nc.c
index f74fa2c..8ea899c 100644
--- a/src/sigmoid-nc.c
+++ b/src/sigmoid-nc.c
@@ -102,13 +102,13 @@
status = xnn_status_out_of_memory;
- sigmoid_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (sigmoid_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Sigmoid operator descriptor", sizeof(struct xnn_operator));
goto error;
}
- sigmoid_op->lookup_table = xnn_allocate_memory(256 * sizeof(uint8_t));
+ sigmoid_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
if (sigmoid_op->lookup_table == NULL) {
xnn_log_error("failed to allocate 256 bytes for Sigmoid lookup table");
goto error;
@@ -197,7 +197,7 @@
status = xnn_status_out_of_memory;
- sigmoid_op = calloc(1, sizeof(struct xnn_operator));
+ sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (sigmoid_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for xnn_operator structure", sizeof(struct xnn_operator));
goto error;
diff --git a/src/softargmax-nc.c b/src/softargmax-nc.c
index 3ac1671..c84915b 100644
--- a/src/softargmax-nc.c
+++ b/src/softargmax-nc.c
@@ -93,13 +93,13 @@
status = xnn_status_out_of_memory;
- softargmax_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ softargmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (softargmax_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for SoftArgMax operator descriptor", sizeof(struct xnn_operator));
goto error;
}
- softargmax_op->lookup_table = xnn_allocate_memory(256 * sizeof(uint32_t));
+ softargmax_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint32_t));
if (softargmax_op->lookup_table == NULL) {
xnn_log_error("failed to allocate 256 bytes for SoftArgMax lookup table");
goto error;
diff --git a/src/unpooling-nhwc.c b/src/unpooling-nhwc.c
index de8e14a..a423053 100644
--- a/src/unpooling-nhwc.c
+++ b/src/unpooling-nhwc.c
@@ -92,7 +92,7 @@
status = xnn_status_out_of_memory;
- unpooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+ unpooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
if (unpooling_op == NULL) {
xnn_log_error("failed to allocate %zu bytes for Unpooling operator descriptor", sizeof(struct xnn_operator));
goto error;
@@ -187,7 +187,7 @@
const size_t indirection_buffer_size = sizeof(void*) * (batch_size * input_height * input_width * pooling_size);
- void** indirection_buffer = (void**) realloc(unpooling_op->indirection_buffer, indirection_buffer_size);
+ void** indirection_buffer = (void**) xnn_reallocate_memory(unpooling_op->indirection_buffer, indirection_buffer_size);
if (indirection_buffer == NULL) {
xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
return xnn_status_out_of_memory;
diff --git a/src/xnnpack/allocator.h b/src/xnnpack/allocator.h
index c946656..8aef8b2 100644
--- a/src/xnnpack/allocator.h
+++ b/src/xnnpack/allocator.h
@@ -8,40 +8,42 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
-#ifdef __ANDROID__
- #include <malloc.h>
-#endif
#include <xnnpack/common.h>
-
-extern int posix_memalign(void **memptr, size_t alignment, size_t size);
+#include <xnnpack/params.h>
-#define XNN_ALLOCATION_ALIGNMENT 16
-
+#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
+ #define XNN_ALLOCATION_ALIGNMENT 4
+#else
+ #define XNN_ALLOCATION_ALIGNMENT 16
+#endif
inline static void* xnn_allocate_memory(size_t memory_size) {
- void* memory_ptr = NULL;
-#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
- memory_ptr = malloc(memory_size);
-#elif defined(__ANDROID__)
- memory_ptr = memalign(XNN_ALLOCATION_ALIGNMENT, memory_size);
-#else
- if (posix_memalign(&memory_ptr, XNN_ALLOCATION_ALIGNMENT, memory_size) != 0) {
- return NULL;
- }
-#endif
- return memory_ptr;
+ return xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size);
}
-inline static void* xnn_allocate_zero_memory(size_t memory_size) {
- void* memory_ptr = xnn_allocate_memory(memory_size);
- if (memory_ptr != NULL) {
- memset(memory_ptr, 0, memory_size);
- }
- return memory_ptr;
+inline static void* xnn_reallocate_memory(void* memory_pointer, size_t memory_size) {
+ return xnn_params.allocator.reallocate(xnn_params.allocator.context, memory_pointer, memory_size);
}
-inline static void xnn_release_memory(void* memory_ptr) {
- free(memory_ptr);
+inline static void xnn_release_memory(void* memory_pointer) {
+ xnn_params.allocator.deallocate(xnn_params.allocator.context, memory_pointer);
+}
+
+inline static void* xnn_allocate_simd_memory(size_t memory_size) {
+ return xnn_params.allocator.aligned_allocate(xnn_params.allocator.context, XNN_ALLOCATION_ALIGNMENT, memory_size);
+}
+
+inline static void* xnn_allocate_zero_simd_memory(size_t memory_size) {
+ void* memory_pointer = xnn_params.allocator.aligned_allocate(
+ xnn_params.allocator.context, XNN_ALLOCATION_ALIGNMENT, memory_size);
+ if (memory_pointer != NULL) {
+ memset(memory_pointer, 0, memory_size);
+ }
+ return memory_pointer;
+}
+
+inline static void xnn_release_simd_memory(void* memory_pointer) {
+ xnn_params.allocator.aligned_deallocate(xnn_params.allocator.context, memory_pointer);
}
diff --git a/src/xnnpack/memory.h b/src/xnnpack/memory.h
new file mode 100644
index 0000000..a45a915
--- /dev/null
+++ b/src/xnnpack/memory.h
@@ -0,0 +1,17 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stddef.h>
+
+#include <xnnpack/common.h>
+
+
+XNN_INTERNAL void* xnn_allocate(void* context, size_t size);
+XNN_INTERNAL void* xnn_reallocate(void* context, void* pointer, size_t size);
+XNN_INTERNAL void xnn_deallocate(void* context, void* pointer);
+XNN_INTERNAL void* xnn_aligned_allocate(void* context, size_t alignment, size_t size);
+XNN_INTERNAL void xnn_aligned_deallocate(void* context, void* pointer);
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 028e3ba..88e56d0 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -12,6 +12,7 @@
#include <stddef.h>
#include <stdint.h>
+#include <xnnpack.h>
#include <xnnpack/common.h>
struct xnn_f16_output_params {
@@ -1321,6 +1322,7 @@
struct xnn_parameters {
bool initialized;
+ struct xnn_allocator allocator;
struct {
struct gemm_parameters gemm;
struct dwconv_parameters dwconv[XNN_MAX_Q8_DWCONV_UKERNELS];