Support overriding memory allocation functions

- Let users provide their own memory management functions for XNNPACK

PiperOrigin-RevId: 281355722
diff --git a/src/add-nc.c b/src/add-nc.c
index 8fa534e..8a86617 100644
--- a/src/add-nc.c
+++ b/src/add-nc.c
@@ -122,7 +122,7 @@
 
   status = xnn_status_out_of_memory;
 
-  add_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  add_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (add_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Add operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -222,7 +222,7 @@
 
   status = xnn_status_out_of_memory;
 
-  add_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  add_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (add_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Add operator descriptor", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/argmax-pooling-nhwc.c b/src/argmax-pooling-nhwc.c
index d4902ef..e4c359a 100644
--- a/src/argmax-pooling-nhwc.c
+++ b/src/argmax-pooling-nhwc.c
@@ -128,7 +128,7 @@
 
   status = xnn_status_out_of_memory;
 
-  argmax_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  argmax_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (argmax_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Argmax Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -236,7 +236,7 @@
   // Micro-kernel may read up to (mr - 1) elements after the end of indirection buffer.
   const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
 
-  const void** indirection_buffer = (const void**) realloc(argmax_pooling_op->indirection_buffer, indirection_buffer_size);
+  const void** indirection_buffer = (const void**) xnn_reallocate_memory(argmax_pooling_op->indirection_buffer, indirection_buffer_size);
   if (indirection_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
     return xnn_status_out_of_memory;
diff --git a/src/average-pooling-nhwc.c b/src/average-pooling-nhwc.c
index a6b3584..20ff0c5 100644
--- a/src/average-pooling-nhwc.c
+++ b/src/average-pooling-nhwc.c
@@ -155,7 +155,7 @@
 
   status = xnn_status_out_of_memory;
 
-  average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (average_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -165,7 +165,7 @@
   const uint32_t mr = xnn_params.q8.avgpool.mr;
   const uint32_t qr = xnn_params.q8.avgpool.qr;
   if (any_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
-    void* zero_buffer = xnn_allocate_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
+    void* zero_buffer = xnn_allocate_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
     if (zero_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
         channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
@@ -303,7 +303,7 @@
 
   status = xnn_status_out_of_memory;
 
-  average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (average_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Average Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -313,13 +313,12 @@
   const uint32_t mr = xnn_params.f32.avgpool.mr;
   const uint32_t qr = xnn_params.f32.avgpool.qr;
   if (any_padding || pooling_size < mr || (pooling_size - mr) % qr != 0) {
-    void* zero_buffer = xnn_allocate_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
+    void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
     if (zero_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for Average Pooling zero padding",
         channels * sizeof(float) + XNN_EXTRA_BYTES);
       goto error;
     }
-    memset(zero_buffer, 0, channels * sizeof(float));
     average_pooling_op->zero_buffer = zero_buffer;
   }
 
@@ -431,7 +430,7 @@
   const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
   const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
 
-  const void** indirection_buffer = (const void**) realloc(average_pooling_op->indirection_buffer, indirection_buffer_size);
+  const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
   if (indirection_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
     return xnn_status_out_of_memory;
@@ -559,7 +558,7 @@
   const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
   const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
 
-  const void** indirection_buffer = (const void**) realloc(average_pooling_op->indirection_buffer, indirection_buffer_size);
+  const void** indirection_buffer = (const void**) xnn_reallocate_memory(average_pooling_op->indirection_buffer, indirection_buffer_size);
   if (indirection_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
     return xnn_status_out_of_memory;
@@ -611,7 +610,7 @@
           input_width != average_pooling_op->last_input_width)
       {
         const size_t pixelwise_buffer_size = output_height * output_width * sizeof(float);
-        float* pixelwise_buffer = (float*) realloc(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
+        float* pixelwise_buffer = (float*) xnn_reallocate_memory(average_pooling_op->pixelwise_buffer, pixelwise_buffer_size);
         if (pixelwise_buffer == NULL) {
           xnn_log_error("failed to allocate %zu bytes for pixelwise buffer", pixelwise_buffer_size);
           return xnn_status_out_of_memory;
diff --git a/src/channel-pad-nc.c b/src/channel-pad-nc.c
index 608c4d8..b40facf 100644
--- a/src/channel-pad-nc.c
+++ b/src/channel-pad-nc.c
@@ -62,7 +62,7 @@
 
   status = xnn_status_out_of_memory;
 
-  channel_pad_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  channel_pad_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (channel_pad_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Channel Pad operator descriptor", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/channel-shuffle-nc.c b/src/channel-shuffle-nc.c
index c2fd2f1..fc01b89 100644
--- a/src/channel-shuffle-nc.c
+++ b/src/channel-shuffle-nc.c
@@ -70,7 +70,7 @@
 
   status = xnn_status_out_of_memory;
 
-  channel_shuffle_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  channel_shuffle_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (channel_shuffle_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Channel Shuffle operator descriptor", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/clamp-nc.c b/src/clamp-nc.c
index 852d926..97c1f74 100644
--- a/src/clamp-nc.c
+++ b/src/clamp-nc.c
@@ -69,7 +69,7 @@
 
   status = xnn_status_out_of_memory;
 
-  clamp_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (clamp_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -155,7 +155,7 @@
 
   status = xnn_status_out_of_memory;
 
-  clamp_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  clamp_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (clamp_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Clamp operator descriptor", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/convolution-nchw.c b/src/convolution-nchw.c
index 24c61a6..5b55553 100644
--- a/src/convolution-nchw.c
+++ b/src/convolution-nchw.c
@@ -193,7 +193,7 @@
 
   status = xnn_status_out_of_memory;
 
-  convolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  convolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (convolution_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Convolution operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -273,7 +273,7 @@
       const size_t packed_weights_size = num_output_channel_blocks * sizeof(uint32_t) +
         (num_nonzero_blocks * 2) * sizeof(int32_t) + (num_nonzero_values + group_output_channels) * sizeof(float);
 
-      convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
         goto error;
@@ -384,7 +384,7 @@
         round_up(group_output_channels, xnn_params.f32.hwc2spchw_dconv3x3c3s2.output_channel_tile);
       const size_t packed_weights_size = groups * packed_group_output_channels *
         (group_input_channels * kernel_height * kernel_width + 1 /* bias */) * sizeof(float);
-      convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
         goto error;
@@ -412,7 +412,7 @@
       assert(group_output_channels == 1);
 
       const size_t packed_weights_size = groups * (kernel_height * kernel_width + 1 /* bias */) * sizeof(float);
-      convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
         goto error;
@@ -608,7 +608,7 @@
     case xnn_ukernel_type_dconv2d_hwc2spchw:
     {
       const size_t zero_size = (input_width * convolution_op->group_input_channels << log2_input_element_size) + XNN_EXTRA_BYTES;
-      void* zero_buffer = realloc(convolution_op->zero_buffer, zero_size);
+      void* zero_buffer = xnn_reallocate_memory(convolution_op->zero_buffer, zero_size);
       if (zero_buffer == NULL) {
         xnn_log_error("failed to allocate %zu bytes for zero padding", sizeof(struct xnn_operator));
         return xnn_status_out_of_memory;
diff --git a/src/convolution-nhwc.c b/src/convolution-nhwc.c
index 946cd6d..ff8f199 100644
--- a/src/convolution-nhwc.c
+++ b/src/convolution-nhwc.c
@@ -221,7 +221,7 @@
 
   status = xnn_status_out_of_memory;
 
-  convolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  convolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (convolution_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Convolution operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -250,7 +250,7 @@
 
       const uint32_t c_stride = round_up_po2(groups, dwconv_parameters->cr);
       const size_t packed_weights_size = (sizeof(uint8_t) * kernel_size + sizeof(int32_t)) * c_stride;
-      convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
         goto error;
@@ -289,7 +289,7 @@
 
       const size_t packed_group_weights_size =
         (sizeof(uint8_t) * kernel_size * k_stride + sizeof(int32_t)) * n_stride;
-      convolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
         goto error;
@@ -344,7 +344,7 @@
 
   const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && kernel_size != 1;
   if (any_padding || tf_same_padding) {
-    void* zero_buffer = xnn_allocate_memory(zero_size);
+    void* zero_buffer = xnn_allocate_simd_memory(zero_size);
     if (zero_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
       goto error;
@@ -530,7 +530,7 @@
 
   status = xnn_status_out_of_memory;
 
-  convolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  convolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (convolution_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Convolution operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -559,7 +559,7 @@
     {
       const uint32_t c_stride = round_up_po2(groups, xnn_params.f32.vmulcaddc.channel_tile);
       const size_t packed_weights_size = 2 * sizeof(float) * c_stride;
-      convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
         goto error;
@@ -582,7 +582,7 @@
 
       const uint32_t c_stride = round_up_po2(groups, dwconv_parameters->cr);
       const size_t packed_weights_size = (kernel_size + 1) * sizeof(float) * c_stride;
-      convolution_op->packed_weights = xnn_allocate_memory(packed_weights_size);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_weights_size);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
         goto error;
@@ -619,7 +619,7 @@
       const uint32_t k_stride = round_up_po2(group_input_channels, kr);
 
       const size_t packed_group_weights_size = (kernel_size * k_stride + 1) * sizeof(float) * n_stride;
-      convolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+      convolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
       if (convolution_op->packed_weights == NULL) {
         xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
         goto error;
@@ -673,7 +673,7 @@
 
   const bool tf_same_padding = (flags & XNN_FLAG_TENSORFLOW_SAME_PADDING) != 0 && kernel_size != 1;
   if (any_padding || tf_same_padding) {
-    void* zero_buffer = xnn_allocate_zero_memory(zero_size);
+    void* zero_buffer = xnn_allocate_zero_simd_memory(zero_size);
     if (zero_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
       goto error;
@@ -876,7 +876,7 @@
       if (input_height != convolution_op->last_input_height ||
           input_width != convolution_op->last_input_width)
       {
-        const void** indirection_buffer = (const void**) realloc(convolution_op->indirection_buffer, indirection_buffer_size);
+        const void** indirection_buffer = (const void**) xnn_reallocate_memory(convolution_op->indirection_buffer, indirection_buffer_size);
         if (indirection_buffer == NULL) {
           xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
           return xnn_status_out_of_memory;
@@ -970,7 +970,7 @@
       const size_t indirection_buffer_size = sizeof(void*) * batch_size * output_height * step_height;
 
       const void** indirection_buffer =
-        (const void**) realloc(convolution_op->indirection_buffer, indirection_buffer_size);
+        (const void**) xnn_reallocate_memory(convolution_op->indirection_buffer, indirection_buffer_size);
       if (indirection_buffer == NULL) {
         xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
         return xnn_status_out_of_memory;
diff --git a/src/deconvolution-nhwc.c b/src/deconvolution-nhwc.c
index 2aaed66..3b3b091 100644
--- a/src/deconvolution-nhwc.c
+++ b/src/deconvolution-nhwc.c
@@ -181,7 +181,7 @@
 
   status = xnn_status_out_of_memory;
 
-  deconvolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  deconvolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (deconvolution_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Deconvolution operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -204,7 +204,7 @@
       (sizeof(uint8_t) * kernel_size * k_stride + sizeof(int32_t) * subkernels);
 
     const size_t subconvolution_buffer_size = sizeof(struct subconvolution_params) * subkernels;
-    deconvolution_op->subconvolution_buffer = xnn_allocate_zero_memory(subconvolution_buffer_size);
+    deconvolution_op->subconvolution_buffer = xnn_allocate_zero_simd_memory(subconvolution_buffer_size);
     if (deconvolution_op->subconvolution_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for subconvolution buffer", subconvolution_buffer_size);
       goto error;
@@ -223,7 +223,7 @@
       }
     }
   }
-  deconvolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+  deconvolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
   if (deconvolution_op->packed_weights == NULL) {
     xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
     goto error;
@@ -251,7 +251,7 @@
   }
 
   size_t zero_size = sizeof(uint8_t) * k_stride + XNN_EXTRA_BYTES;
-  void* zero_buffer = xnn_allocate_memory(zero_size);
+  void* zero_buffer = xnn_allocate_simd_memory(zero_size);
   if (zero_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
     goto error;
@@ -419,7 +419,7 @@
 
   status = xnn_status_out_of_memory;
 
-  deconvolution_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  deconvolution_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (deconvolution_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Deconvolution operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -453,7 +453,7 @@
       (sizeof(float) * kernel_size * k_stride + sizeof(float) * subkernels);
 
     const size_t subconvolution_buffer_size = sizeof(struct subconvolution_params) * subkernels;
-    deconvolution_op->subconvolution_buffer = xnn_allocate_zero_memory(subconvolution_buffer_size);
+    deconvolution_op->subconvolution_buffer = xnn_allocate_zero_simd_memory(subconvolution_buffer_size);
     if (deconvolution_op->subconvolution_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for subconvolution buffer", subconvolution_buffer_size);
       goto error;
@@ -472,7 +472,7 @@
       }
     }
   }
-  deconvolution_op->packed_weights = xnn_allocate_memory(packed_group_weights_size * groups);
+  deconvolution_op->packed_weights = xnn_allocate_simd_memory(packed_group_weights_size * groups);
   if (deconvolution_op->packed_weights == NULL) {
     xnn_log_error("failed to allocate %zu bytes for packed weights", packed_group_weights_size * groups);
     goto error;
@@ -498,7 +498,7 @@
   }
 
   const size_t zero_size = k_stride * sizeof(float) + XNN_EXTRA_BYTES;
-  void* zero_buffer = xnn_allocate_zero_memory(zero_size);
+  void* zero_buffer = xnn_allocate_zero_simd_memory(zero_size);
   if (zero_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for zero padding", zero_size);
     goto error;
@@ -574,7 +574,7 @@
   if (input_height != deconvolution_op->last_input_height ||
       input_width != deconvolution_op->last_input_width)
   {
-    const void** indirection_buffer = (const void**) realloc(deconvolution_op->indirection_buffer, indirection_buffer_size);
+    const void** indirection_buffer = (const void**) xnn_reallocate_memory(deconvolution_op->indirection_buffer, indirection_buffer_size);
     if (indirection_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
       return xnn_status_out_of_memory;
@@ -681,7 +681,7 @@
   if (input_height != deconvolution_op->last_input_height ||
       input_width != deconvolution_op->last_input_width)
   {
-    const void** indirection_buffer = (const void**) realloc(deconvolution_op->indirection_buffer, indirection_buffer_size);
+    const void** indirection_buffer = (const void**) xnn_reallocate_memory(deconvolution_op->indirection_buffer, indirection_buffer_size);
     if (indirection_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
       return xnn_status_out_of_memory;
diff --git a/src/fully-connected-nc.c b/src/fully-connected-nc.c
index 3913934..9399a80 100644
--- a/src/fully-connected-nc.c
+++ b/src/fully-connected-nc.c
@@ -123,7 +123,7 @@
 
   status = xnn_status_out_of_memory;
 
-  fully_connected_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  fully_connected_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (fully_connected_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Fully Connected operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -135,7 +135,7 @@
   const uint32_t n_stride = round_up(output_channels, nr);
   const uint32_t k_stride = round_up_po2(input_channels, kr);
 
-  fully_connected_op->packed_weights = xnn_allocate_memory(n_stride * (k_stride * sizeof(uint8_t) + sizeof(int32_t)));
+  fully_connected_op->packed_weights = xnn_allocate_simd_memory(n_stride * (k_stride * sizeof(uint8_t) + sizeof(int32_t)));
   if (fully_connected_op->packed_weights == NULL) {
     xnn_log_error("failed to allocate %zu bytes for packed weights",
       n_stride * (k_stride * sizeof(uint8_t) + sizeof(int32_t)));
@@ -255,7 +255,7 @@
 
   status = xnn_status_out_of_memory;
 
-  fully_connected_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  fully_connected_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (fully_connected_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Fully Connected operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -268,7 +268,7 @@
   const uint32_t n_stride = round_up(output_channels, nr);
   const uint32_t k_stride = round_up_po2(input_channels, kr);
 
-  fully_connected_op->packed_weights = xnn_allocate_memory(n_stride * (k_stride * sizeof(float) + sizeof(float)));
+  fully_connected_op->packed_weights = xnn_allocate_simd_memory(n_stride * (k_stride * sizeof(float) + sizeof(float)));
   if (fully_connected_op->packed_weights == NULL) {
     xnn_log_error("failed to allocate %zu bytes for packed weights",
       n_stride * (k_stride * sizeof(float) + sizeof(float)));
diff --git a/src/global-average-pooling-ncw.c b/src/global-average-pooling-ncw.c
index 6bf0dc1..2f6db07 100644
--- a/src/global-average-pooling-ncw.c
+++ b/src/global-average-pooling-ncw.c
@@ -71,7 +71,7 @@
 
   status = xnn_status_out_of_memory;
 
-  global_average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (global_average_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/global-average-pooling-nwc.c b/src/global-average-pooling-nwc.c
index 6bf781f..5f757ab 100644
--- a/src/global-average-pooling-nwc.c
+++ b/src/global-average-pooling-nwc.c
@@ -103,13 +103,13 @@
 
   status = xnn_status_out_of_memory;
 
-  global_average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (global_average_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
   }
 
-  void* zero_buffer = xnn_allocate_zero_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
+  void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
   if (zero_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling zero padding",
       channels * sizeof(uint8_t) + XNN_EXTRA_BYTES);
@@ -204,13 +204,13 @@
 
   status = xnn_status_out_of_memory;
 
-  global_average_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  global_average_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (global_average_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
   }
 
-  void* zero_buffer = xnn_allocate_zero_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
+  void* zero_buffer = xnn_allocate_zero_simd_memory(channels * sizeof(float) + XNN_EXTRA_BYTES);
   if (zero_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Global Average Pooling zero padding",
       channels * sizeof(float) + XNN_EXTRA_BYTES);
diff --git a/src/hardswish-nc.c b/src/hardswish-nc.c
index 4253c08..c3f4b6c 100644
--- a/src/hardswish-nc.c
+++ b/src/hardswish-nc.c
@@ -9,6 +9,7 @@
 #include <stdlib.h>
 
 #include <xnnpack.h>
+#include <xnnpack/allocator.h>
 #include <xnnpack/log.h>
 #include <xnnpack/operator.h>
 #include <xnnpack/params-init.h>
@@ -56,7 +57,7 @@
 
   status = xnn_status_out_of_memory;
 
-  hardswish_op = calloc(1, sizeof(struct xnn_operator));
+  hardswish_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (hardswish_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for xnn_operator structure", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/init.c b/src/init.c
index f43fcf1..f63d7a8 100644
--- a/src/init.c
+++ b/src/init.c
@@ -9,6 +9,7 @@
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
+#include <string.h>
 
 #include <pthread.h>
 
@@ -31,6 +32,7 @@
 #include <xnnpack/log.h>
 #include <xnnpack/lut.h>
 #include <xnnpack/maxpool.h>
+#include <xnnpack/memory.h>
 #include <xnnpack/pad.h>
 #include <xnnpack/params.h>
 #include <xnnpack/pavgpool.h>
@@ -1150,7 +1152,7 @@
   xnn_params.initialized = true;
 }
 
-enum xnn_status xnn_initialize(void) {
+enum xnn_status xnn_initialize(const struct xnn_allocator* allocator) {
   #ifndef __EMSCRIPTEN__
     if (!cpuinfo_initialize()) {
       return xnn_status_out_of_memory;
@@ -1158,6 +1160,15 @@
   #endif
   pthread_once(&init_guard, &init);
   if (xnn_params.initialized) {
+    if (allocator != NULL) {
+      memcpy(&xnn_params.allocator, allocator, sizeof(struct xnn_allocator));
+    } else {
+      xnn_params.allocator.allocate = &xnn_allocate;
+      xnn_params.allocator.reallocate = &xnn_reallocate;
+      xnn_params.allocator.deallocate = &xnn_deallocate;
+      xnn_params.allocator.aligned_allocate = &xnn_aligned_allocate;
+      xnn_params.allocator.aligned_deallocate = &xnn_aligned_deallocate;
+    }
     return xnn_status_success;
   } else {
     return xnn_status_unsupported_hardware;
diff --git a/src/leaky-relu-nc.c b/src/leaky-relu-nc.c
index 860f44a..fda6b3a 100644
--- a/src/leaky-relu-nc.c
+++ b/src/leaky-relu-nc.c
@@ -112,13 +112,13 @@
 
   status = xnn_status_out_of_memory;
 
-  leaky_relu_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  leaky_relu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (leaky_relu_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Leaky ReLU operator descriptor", sizeof(struct xnn_operator));
     goto error;
   }
 
-  leaky_relu_op->lookup_table = xnn_allocate_memory(256 * sizeof(uint8_t));
+  leaky_relu_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
   if (leaky_relu_op->lookup_table == NULL) {
     xnn_log_error("failed to allocate 256 bytes for Leaky ReLU lookup table");
     goto error;
diff --git a/src/max-pooling-nhwc.c b/src/max-pooling-nhwc.c
index fb159bb..8f13af6 100644
--- a/src/max-pooling-nhwc.c
+++ b/src/max-pooling-nhwc.c
@@ -128,7 +128,7 @@
 
   status = xnn_status_out_of_memory;
 
-  max_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (max_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -269,7 +269,7 @@
 
   status = xnn_status_out_of_memory;
 
-  max_pooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  max_pooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (max_pooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Max Pooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -380,7 +380,7 @@
   const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
   const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
 
-  const void** indirection_buffer = (const void**) realloc(max_pooling_op->indirection_buffer, indirection_buffer_size);
+  const void** indirection_buffer = (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
   if (indirection_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
     return xnn_status_out_of_memory;
@@ -502,7 +502,7 @@
   const size_t step_height = pooling_size + (output_width * step_width - 1) * pooling_height;
   const size_t indirection_buffer_size = sizeof(void*) * ((mr - 1) + batch_size * output_height * step_height);
 
-  const void** indirection_buffer = (const void**) realloc(max_pooling_op->indirection_buffer, indirection_buffer_size);
+  const void** indirection_buffer = (const void**) xnn_reallocate_memory(max_pooling_op->indirection_buffer, indirection_buffer_size);
   if (indirection_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
     return xnn_status_out_of_memory;
diff --git a/src/memory.c b/src/memory.c
new file mode 100644
index 0000000..a9683bd
--- /dev/null
+++ b/src/memory.c
@@ -0,0 +1,53 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+#ifdef __ANDROID__
+  #include <malloc.h>
+#endif
+
+#include <xnnpack/common.h>
+#include <xnnpack/memory.h>
+
+
+extern int posix_memalign(void **memptr, size_t alignment, size_t size);
+
+
+void* xnn_allocate(void* context, size_t size) {
+  return malloc(size);
+}
+
+void* xnn_reallocate(void* context, void* pointer, size_t size) {
+  return realloc(pointer, size);
+}
+
+void xnn_deallocate(void* context, void* pointer) {
+  if XNN_LIKELY(pointer != NULL) {
+    free(pointer);
+  }
+}
+
+void* xnn_aligned_allocate(void* context, size_t alignment, size_t size) {
+#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
+  assert(alignment <= 2 * sizeof(void*));
+  return malloc(size);
+#elif defined(__ANDROID__)
+  return memalign(alignment, size);
+#else
+  void* memory_ptr = NULL;
+  if (posix_memalign(&memory_ptr, alignment, size) != 0) {
+    return NULL;
+  }
+  return memory_ptr;
+#endif
+}
+
+void xnn_aligned_deallocate(void* context, void* pointer) {
+  if XNN_LIKELY(pointer != NULL) {
+    free(pointer);
+  }
+}
diff --git a/src/multiply-nd.c b/src/multiply-nd.c
index 9070648..27dc4cf 100644
--- a/src/multiply-nd.c
+++ b/src/multiply-nd.c
@@ -54,7 +54,7 @@
 
   status = xnn_status_out_of_memory;
 
-  multiply_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  multiply_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (multiply_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Multiply operator descriptor", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/operator-delete.c b/src/operator-delete.c
index 9d225b5..3e0eeaa 100644
--- a/src/operator-delete.c
+++ b/src/operator-delete.c
@@ -26,12 +26,12 @@
     return xnn_status_invalid_parameter;
   }
 
-  free(op->indirection_buffer);
-  xnn_release_memory(op->packed_weights);
-  xnn_release_memory(op->zero_buffer);
-  free(op->pixelwise_buffer);
-  free(op->subconvolution_buffer);
-  xnn_release_memory(op->lookup_table);
-  xnn_release_memory(op);
+  xnn_release_memory(op->indirection_buffer);
+  xnn_release_simd_memory(op->packed_weights);
+  xnn_release_simd_memory(op->zero_buffer);
+  xnn_release_memory(op->pixelwise_buffer);
+  xnn_release_memory(op->subconvolution_buffer);
+  xnn_release_simd_memory(op->lookup_table);
+  xnn_release_simd_memory(op);
   return xnn_status_success;
 }
diff --git a/src/prelu-nc.c b/src/prelu-nc.c
index 9b71362..49a122a 100644
--- a/src/prelu-nc.c
+++ b/src/prelu-nc.c
@@ -68,14 +68,14 @@
 
   status = xnn_status_out_of_memory;
 
-  prelu_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  prelu_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (prelu_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for PReLU operator descriptor", sizeof(struct xnn_operator));
     goto error;
   }
 
   const size_t packed_channels = round_up_po2(channels, XNN_EXTRA_BYTES / sizeof(float));
-  prelu_op->packed_weights = xnn_allocate_memory(packed_channels * sizeof(float));
+  prelu_op->packed_weights = xnn_allocate_simd_memory(packed_channels * sizeof(float));
   if (prelu_op->packed_weights == NULL) {
     xnn_log_error("failed to allocate %zu bytes for packed slope data",
       packed_channels * sizeof(float));
diff --git a/src/resize-bilinear-nhwc.c b/src/resize-bilinear-nhwc.c
index 363bbba..f8ff604 100644
--- a/src/resize-bilinear-nhwc.c
+++ b/src/resize-bilinear-nhwc.c
@@ -62,7 +62,7 @@
 
   status = xnn_status_out_of_memory;
 
-  resize_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  resize_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (resize_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Resize Bilinear operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -147,14 +147,14 @@
     const size_t indirection_buffer_size = sizeof(void*) * (output_height * output_width * 4);
     const size_t packed_weights_size = sizeof(float) * (output_height * output_width * 2);
 
-    const void** indirection_buffer = (const void**) realloc(resize_op->indirection_buffer, indirection_buffer_size);
+    const void** indirection_buffer = (const void**) xnn_reallocate_memory(resize_op->indirection_buffer, indirection_buffer_size);
     if (indirection_buffer == NULL) {
       xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
       return xnn_status_out_of_memory;
     }
     resize_op->indirection_buffer = indirection_buffer;
 
-    float* packed_weights = (float*) realloc(resize_op->packed_weights, packed_weights_size);
+    float* packed_weights = (float*) xnn_reallocate_memory(resize_op->packed_weights, packed_weights_size);
     if (packed_weights == NULL) {
       xnn_log_error("failed to allocate %zu bytes for packed weights", packed_weights_size);
       return xnn_status_out_of_memory;
diff --git a/src/sigmoid-nc.c b/src/sigmoid-nc.c
index f74fa2c..8ea899c 100644
--- a/src/sigmoid-nc.c
+++ b/src/sigmoid-nc.c
@@ -102,13 +102,13 @@
 
   status = xnn_status_out_of_memory;
 
-  sigmoid_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (sigmoid_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Sigmoid operator descriptor", sizeof(struct xnn_operator));
     goto error;
   }
 
-  sigmoid_op->lookup_table = xnn_allocate_memory(256 * sizeof(uint8_t));
+  sigmoid_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint8_t));
   if (sigmoid_op->lookup_table == NULL) {
     xnn_log_error("failed to allocate 256 bytes for Sigmoid lookup table");
     goto error;
@@ -197,7 +197,7 @@
 
   status = xnn_status_out_of_memory;
 
-  sigmoid_op = calloc(1, sizeof(struct xnn_operator));
+  sigmoid_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (sigmoid_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for xnn_operator structure", sizeof(struct xnn_operator));
     goto error;
diff --git a/src/softargmax-nc.c b/src/softargmax-nc.c
index 3ac1671..c84915b 100644
--- a/src/softargmax-nc.c
+++ b/src/softargmax-nc.c
@@ -93,13 +93,13 @@
 
   status = xnn_status_out_of_memory;
 
-  softargmax_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  softargmax_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (softargmax_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for SoftArgMax operator descriptor", sizeof(struct xnn_operator));
     goto error;
   }
 
-  softargmax_op->lookup_table = xnn_allocate_memory(256 * sizeof(uint32_t));
+  softargmax_op->lookup_table = xnn_allocate_simd_memory(256 * sizeof(uint32_t));
   if (softargmax_op->lookup_table == NULL) {
     xnn_log_error("failed to allocate 256 bytes for SoftArgMax lookup table");
     goto error;
diff --git a/src/unpooling-nhwc.c b/src/unpooling-nhwc.c
index de8e14a..a423053 100644
--- a/src/unpooling-nhwc.c
+++ b/src/unpooling-nhwc.c
@@ -92,7 +92,7 @@
 
   status = xnn_status_out_of_memory;
 
-  unpooling_op = xnn_allocate_zero_memory(sizeof(struct xnn_operator));
+  unpooling_op = xnn_allocate_zero_simd_memory(sizeof(struct xnn_operator));
   if (unpooling_op == NULL) {
     xnn_log_error("failed to allocate %zu bytes for Unpooling operator descriptor", sizeof(struct xnn_operator));
     goto error;
@@ -187,7 +187,7 @@
 
   const size_t indirection_buffer_size = sizeof(void*) * (batch_size * input_height * input_width * pooling_size);
 
-  void** indirection_buffer = (void**) realloc(unpooling_op->indirection_buffer, indirection_buffer_size);
+  void** indirection_buffer = (void**) xnn_reallocate_memory(unpooling_op->indirection_buffer, indirection_buffer_size);
   if (indirection_buffer == NULL) {
     xnn_log_error("failed to allocate %zu bytes for indirection buffer", indirection_buffer_size);
     return xnn_status_out_of_memory;
diff --git a/src/xnnpack/allocator.h b/src/xnnpack/allocator.h
index c946656..8aef8b2 100644
--- a/src/xnnpack/allocator.h
+++ b/src/xnnpack/allocator.h
@@ -8,40 +8,42 @@
 #include <stddef.h>
 #include <stdlib.h>
 #include <string.h>
-#ifdef __ANDROID__
-  #include <malloc.h>
-#endif
 
 #include <xnnpack/common.h>
-
-extern int posix_memalign(void **memptr, size_t alignment, size_t size);
+#include <xnnpack/params.h>
 
 
-#define XNN_ALLOCATION_ALIGNMENT 16
-
+#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
+  #define XNN_ALLOCATION_ALIGNMENT 4
+#else
+  #define XNN_ALLOCATION_ALIGNMENT 16
+#endif
 
 inline static void* xnn_allocate_memory(size_t memory_size) {
-  void* memory_ptr = NULL;
-#if XNN_ARCH_ASMJS || XNN_ARCH_WASM
-  memory_ptr = malloc(memory_size);
-#elif defined(__ANDROID__)
-  memory_ptr = memalign(XNN_ALLOCATION_ALIGNMENT, memory_size);
-#else
-  if (posix_memalign(&memory_ptr, XNN_ALLOCATION_ALIGNMENT, memory_size) != 0) {
-    return NULL;
-  }
-#endif
-  return memory_ptr;
+  return xnn_params.allocator.allocate(xnn_params.allocator.context, memory_size);
 }
 
-inline static void* xnn_allocate_zero_memory(size_t memory_size) {
-  void* memory_ptr = xnn_allocate_memory(memory_size);
-  if (memory_ptr != NULL) {
-    memset(memory_ptr, 0, memory_size);
-  }
-  return memory_ptr;
+inline static void* xnn_reallocate_memory(void* memory_pointer, size_t memory_size) {
+  return xnn_params.allocator.reallocate(xnn_params.allocator.context, memory_pointer, memory_size);
 }
 
-inline static void xnn_release_memory(void* memory_ptr) {
-  free(memory_ptr);
+inline static void xnn_release_memory(void* memory_pointer) {
+  xnn_params.allocator.deallocate(xnn_params.allocator.context, memory_pointer);
+}
+
+inline static void* xnn_allocate_simd_memory(size_t memory_size) {
+  return xnn_params.allocator.aligned_allocate(xnn_params.allocator.context, XNN_ALLOCATION_ALIGNMENT, memory_size);
+}
+
+inline static void* xnn_allocate_zero_simd_memory(size_t memory_size) {
+  void* memory_pointer = xnn_params.allocator.aligned_allocate(
+    xnn_params.allocator.context, XNN_ALLOCATION_ALIGNMENT, memory_size);
+  if (memory_pointer != NULL) {
+    memset(memory_pointer, 0, memory_size);
+  }
+  return memory_pointer;
+}
+
+inline static void xnn_release_simd_memory(void* memory_pointer) {
+  xnn_params.allocator.aligned_deallocate(xnn_params.allocator.context, memory_pointer);
 }
diff --git a/src/xnnpack/memory.h b/src/xnnpack/memory.h
new file mode 100644
index 0000000..a45a915
--- /dev/null
+++ b/src/xnnpack/memory.h
@@ -0,0 +1,17 @@
+// Copyright 2019 Google LLC
+//
+// This source code is licensed under the BSD-style license found in the
+// LICENSE file in the root directory of this source tree.
+
+#pragma once
+
+#include <stddef.h>
+
+#include <xnnpack/common.h>
+
+
+XNN_INTERNAL void* xnn_allocate(void* context, size_t size);
+XNN_INTERNAL void* xnn_reallocate(void* context, void* pointer, size_t size);
+XNN_INTERNAL void xnn_deallocate(void* context, void* pointer);
+XNN_INTERNAL void* xnn_aligned_allocate(void* context, size_t alignment, size_t size);
+XNN_INTERNAL void xnn_aligned_deallocate(void* context, void* pointer);
diff --git a/src/xnnpack/params.h b/src/xnnpack/params.h
index 028e3ba..88e56d0 100644
--- a/src/xnnpack/params.h
+++ b/src/xnnpack/params.h
@@ -12,6 +12,7 @@
 #include <stddef.h>
 #include <stdint.h>
 
+#include <xnnpack.h>
 #include <xnnpack/common.h>
 
 struct xnn_f16_output_params {
@@ -1321,6 +1322,7 @@
 
 struct xnn_parameters {
   bool initialized;
+  struct xnn_allocator allocator;
   struct {
     struct gemm_parameters gemm;
     struct dwconv_parameters dwconv[XNN_MAX_Q8_DWCONV_UKERNELS];