Unify operator implementations filenames and tests

- Always include layout in operator name
- Standardize layout names
- Prepare for greater variability in supported layouts
- Merge testers for different layouts of the same operator

PiperOrigin-RevId: 281084963
diff --git a/src/add.c b/src/add-nc.c
similarity index 96%
rename from src/add.c
rename to src/add-nc.c
index 7cc1d16..8fa534e 100644
--- a/src/add.c
+++ b/src/add-nc.c
@@ -138,7 +138,7 @@
       a_scale / sum_scale, b_scale / sum_scale,
       sum_min, sum_max);
 
-  add_op->type = xnn_operator_type_add_q8;
+  add_op->type = xnn_operator_type_add_nc_q8;
   add_op->ukernel.type = xnn_ukernel_type_add;
 
   add_op->state = xnn_run_state_invalid;
@@ -234,7 +234,7 @@
   add_op->output_pixel_stride = sum_stride;
   add_op->f32_output_params = xnn_init_f32_output_params(sum_min, sum_max);
 
-  add_op->type = xnn_operator_type_add_f32;
+  add_op->type = xnn_operator_type_add_nc_f32;
   add_op->ukernel.type = xnn_ukernel_type_add;
 
   add_op->state = xnn_run_state_invalid;
@@ -255,8 +255,8 @@
     uint8_t* sum,
     pthreadpool_t threadpool)
 {
-  if (add_op->type != xnn_operator_type_add_q8) {
-    xnn_log_error("failed to setup Add (Q8) operator: operator type mismatch");
+  if (add_op->type != xnn_operator_type_add_nc_q8) {
+    xnn_log_error("failed to setup Add (NC, Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   add_op->state = xnn_run_state_invalid;
@@ -318,8 +318,8 @@
     float* sum,
     pthreadpool_t threadpool)
 {
-  if (add_op->type != xnn_operator_type_add_f32) {
-    xnn_log_error("failed to setup Add (F32) operator: operator type mismatch");
+  if (add_op->type != xnn_operator_type_add_nc_f32) {
+    xnn_log_error("failed to setup Add (NC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   add_op->state = xnn_run_state_invalid;
diff --git a/src/argmax-pooling.c b/src/argmax-pooling-nhwc.c
similarity index 96%
rename from src/argmax-pooling.c
rename to src/argmax-pooling-nhwc.c
index 45c2331..d4902ef 100644
--- a/src/argmax-pooling.c
+++ b/src/argmax-pooling-nhwc.c
@@ -58,7 +58,7 @@
   enum xnn_status status = xnn_status_uninitialized;
 
   if (!xnn_params.initialized) {
-    xnn_log_error("failed to create ArgMax Pooling operator: XNNPACK is not initialized");
+    xnn_log_error("failed to create Argmax Pooling operator: XNNPACK is not initialized");
     goto error;
   }
 
@@ -151,7 +151,7 @@
 
   argmax_pooling_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  argmax_pooling_op->type = xnn_operator_type_argmax_pooling_f32;
+  argmax_pooling_op->type = xnn_operator_type_argmax_pooling_nhwc_f32;
   argmax_pooling_op->ukernel.type = xnn_ukernel_type_argmax_pooling;
 
   argmax_pooling_op->state = xnn_run_state_invalid;
@@ -174,20 +174,20 @@
     uint32_t* index,
     pthreadpool_t threadpool)
 {
-  if (argmax_pooling_op->type != xnn_operator_type_argmax_pooling_f32) {
-    xnn_log_error("failed to setup ArgMax Pooling (F32) operator: operator type mismatch");
+  if (argmax_pooling_op->type != xnn_operator_type_argmax_pooling_nhwc_f32) {
+    xnn_log_error("failed to setup Argmax Pooling (NHWC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   argmax_pooling_op->state = xnn_run_state_invalid;
 
   if (!xnn_params.initialized) {
-    xnn_log_error("failed to setup ArgMax Pooling operator: XNNPACK is not initialized");
+    xnn_log_error("failed to setup Argmax Pooling operator: XNNPACK is not initialized");
     return xnn_status_uninitialized;
   }
 
   if (input_width == 0 || input_height == 0) {
     xnn_log_error(
-      "failed to setup ArgMax Pooling operator with %zux%zu input: input dimensions must be non-zero",
+      "failed to setup Argmax Pooling operator with %zux%zu input: input dimensions must be non-zero",
       input_width, input_height);
     return xnn_status_invalid_parameter;
   }
diff --git a/src/average-pooling.c b/src/average-pooling-nhwc.c
similarity index 98%
rename from src/average-pooling.c
rename to src/average-pooling-nhwc.c
index 54f2596..a6b3584 100644
--- a/src/average-pooling.c
+++ b/src/average-pooling-nhwc.c
@@ -198,7 +198,7 @@
       input_scale / (output_scale * (float) pooling_size),
       output_zero_point, output_min, output_max);
 
-  average_pooling_op->type = xnn_operator_type_average_pooling_q8;
+  average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_q8;
   average_pooling_op->ukernel.type = xnn_ukernel_type_average_pooling;
 
   *average_pooling_op_out = average_pooling_op;
@@ -338,7 +338,7 @@
   average_pooling_op->input_pixel_stride = input_pixel_stride;
   average_pooling_op->output_pixel_stride = output_pixel_stride;
 
-  average_pooling_op->type = xnn_operator_type_average_pooling_f32;
+  average_pooling_op->type = xnn_operator_type_average_pooling_nhwc_f32;
   if (any_padding) {
     average_pooling_op->f32_output_params =
       xnn_init_f32_output_params(output_min, output_max);
@@ -368,7 +368,7 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (average_pooling_op->type != xnn_operator_type_average_pooling_q8) {
+  if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_q8) {
     xnn_log_error("failed to setup Average Pooling (Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
@@ -495,7 +495,7 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (average_pooling_op->type != xnn_operator_type_average_pooling_f32) {
+  if (average_pooling_op->type != xnn_operator_type_average_pooling_nhwc_f32) {
     xnn_log_error("failed to setup Average Pooling (F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
diff --git a/src/channel-pad.c b/src/channel-pad-nc.c
similarity index 96%
rename from src/channel-pad.c
rename to src/channel-pad-nc.c
index f0380cd..608c4d8 100644
--- a/src/channel-pad.c
+++ b/src/channel-pad-nc.c
@@ -75,7 +75,7 @@
   channel_pad_op->output_pixel_stride = output_stride;
   channel_pad_op->pad_value = *((const uint32_t*) pad_value);
 
-  channel_pad_op->type = xnn_operator_type_channel_pad_x32;
+  channel_pad_op->type = xnn_operator_type_channel_pad_nc_x32;
   channel_pad_op->ukernel.type = xnn_ukernel_type_pad;
 
   channel_pad_op->state = xnn_run_state_invalid;
@@ -95,7 +95,7 @@
     void* output,
     pthreadpool_t threadpool)
 {
-  if (channel_pad_op->type != xnn_operator_type_channel_pad_x32) {
+  if (channel_pad_op->type != xnn_operator_type_channel_pad_nc_x32) {
     xnn_log_error("failed to setup Channel Pad (X32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
diff --git a/src/channel-shuffle.c b/src/channel-shuffle-nc.c
similarity index 95%
rename from src/channel-shuffle.c
rename to src/channel-shuffle-nc.c
index c27d6d3..c2fd2f1 100644
--- a/src/channel-shuffle.c
+++ b/src/channel-shuffle-nc.c
@@ -109,7 +109,7 @@
     input_stride,
     output_stride,
     flags,
-    xnn_operator_type_channel_shuffle_x8,
+    xnn_operator_type_channel_shuffle_nc_x8,
     channel_shuffle_op_out);
 }
 
@@ -127,7 +127,7 @@
     input_stride,
     output_stride,
     flags,
-    xnn_operator_type_channel_shuffle_x32,
+    xnn_operator_type_channel_shuffle_nc_x32,
     channel_shuffle_op_out);
 }
 
@@ -199,8 +199,8 @@
     void* output,
     pthreadpool_t threadpool)
 {
-  if (channel_shuffle_op->type != xnn_operator_type_channel_shuffle_x8) {
-    xnn_log_error("failed to setup Channel Shuffle (X8) operator: operator type mismatch");
+  if (channel_shuffle_op->type != xnn_operator_type_channel_shuffle_nc_x8) {
+    xnn_log_error("failed to setup Channel Shuffle (NC, X8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
@@ -220,8 +220,8 @@
     void* output,
     pthreadpool_t threadpool)
 {
-  if (channel_shuffle_op->type != xnn_operator_type_channel_shuffle_x32) {
-    xnn_log_error("failed to setup Channel Shuffle (X32) operator: operator type mismatch");
+  if (channel_shuffle_op->type != xnn_operator_type_channel_shuffle_nc_x32) {
+    xnn_log_error("failed to setup Channel Shuffle (NC, X32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
diff --git a/src/clamp.c b/src/clamp-nc.c
similarity index 95%
rename from src/clamp.c
rename to src/clamp-nc.c
index 0ef2a8a..852d926 100644
--- a/src/clamp.c
+++ b/src/clamp-nc.c
@@ -80,7 +80,7 @@
   clamp_op->output_pixel_stride = output_stride;
   clamp_op->u8_output_params = xnn_init_u8_output_params(output_min, output_max);
 
-  clamp_op->type = xnn_operator_type_clamp_u8;
+  clamp_op->type = xnn_operator_type_clamp_nc_u8;
   clamp_op->ukernel.type = xnn_ukernel_type_clamp;
 
   clamp_op->state = xnn_run_state_invalid;
@@ -166,7 +166,7 @@
   clamp_op->output_pixel_stride = output_stride;
   clamp_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  clamp_op->type = xnn_operator_type_clamp_f32;
+  clamp_op->type = xnn_operator_type_clamp_nc_f32;
   clamp_op->ukernel.type = xnn_ukernel_type_clamp;
 
   clamp_op->state = xnn_run_state_invalid;
@@ -186,8 +186,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (clamp_op->type != xnn_operator_type_clamp_u8) {
-    xnn_log_error("failed to setup Clamp (U8) operator: operator type mismatch");
+  if (clamp_op->type != xnn_operator_type_clamp_nc_u8) {
+    xnn_log_error("failed to setup Clamp (NC, U8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   clamp_op->state = xnn_run_state_invalid;
@@ -246,8 +246,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (clamp_op->type != xnn_operator_type_clamp_f32) {
-    xnn_log_error("failed to setup Clamp (F32) operator: operator type mismatch");
+  if (clamp_op->type != xnn_operator_type_clamp_nc_f32) {
+    xnn_log_error("failed to setup Clamp (NC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   clamp_op->state = xnn_run_state_invalid;
diff --git a/src/convolution-spnchw.c b/src/convolution-nchw.c
similarity index 98%
rename from src/convolution-spnchw.c
rename to src/convolution-nchw.c
index cf18fa9..24c61a6 100644
--- a/src/convolution-spnchw.c
+++ b/src/convolution-nchw.c
@@ -34,7 +34,7 @@
   return doz(padded_input_dimension, effective_kernel_dimension) / subsampling_dimension + 1;
 }
 
-enum xnn_status xnn_create_convolution2d_spnchw_f32(
+enum xnn_status xnn_create_convolution2d_nchw_f32(
     uint32_t input_padding_top,
     uint32_t input_padding_right,
     uint32_t input_padding_bottom,
@@ -455,7 +455,7 @@
     convolution_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
   }
 
-  convolution_op->type = xnn_operator_type_convolution_spnchw_f32;
+  convolution_op->type = xnn_operator_type_convolution_nchw_f32;
   convolution_op->ukernel.type = ukernel_type;
 
   convolution_op->state = xnn_run_state_invalid;
@@ -468,7 +468,7 @@
   return status;
 }
 
-static enum xnn_status setup_convolution2d_spnchw(
+static enum xnn_status setup_convolution2d_nchw(
   xnn_operator_t convolution_op,
   size_t batch_size,
   size_t input_batch_stride,
@@ -688,7 +688,7 @@
   }
 }
 
-enum xnn_status xnn_setup_convolution2d_spnchw_f32(
+enum xnn_status xnn_setup_convolution2d_nchw_f32(
     xnn_operator_t convolution_op,
     size_t batch_size,
     size_t input_batch_stride,
@@ -699,12 +699,12 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (convolution_op->type != xnn_operator_type_convolution_spnchw_f32) {
-    xnn_log_error("failed to setup Convolution (F32, SpNCHW) operator: operator type mismatch");
+  if (convolution_op->type != xnn_operator_type_convolution_nchw_f32) {
+    xnn_log_error("failed to setup Convolution (NCHW, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
-  return setup_convolution2d_spnchw(
+  return setup_convolution2d_nchw(
     convolution_op,
     batch_size, input_batch_stride, output_batch_stride,
     input_height, input_width,
diff --git a/src/convolution.c b/src/convolution-nhwc.c
similarity index 98%
rename from src/convolution.c
rename to src/convolution-nhwc.c
index 02cbf5e..946cd6d 100644
--- a/src/convolution.c
+++ b/src/convolution-nhwc.c
@@ -377,7 +377,7 @@
       input_zero_point, kernel_zero_point,
       convolution_scale, output_zero_point, output_min, output_max);
 
-  convolution_op->type = xnn_operator_type_convolution_q8;
+  convolution_op->type = xnn_operator_type_convolution_nhwc_q8;
   convolution_op->ukernel.type = ukernel_type;
   if (tf_same_padding) {
     convolution_op->flags |= XNN_FLAG_TENSORFLOW_SAME_PADDING;
@@ -700,7 +700,7 @@
 
   convolution_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  convolution_op->type = xnn_operator_type_convolution_f32;
+  convolution_op->type = xnn_operator_type_convolution_nhwc_f32;
   convolution_op->ukernel.type = ukernel_type;
   if (tf_same_padding) {
     convolution_op->flags |= XNN_FLAG_TENSORFLOW_SAME_PADDING;
@@ -1052,8 +1052,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (convolution_op->type != xnn_operator_type_convolution_q8) {
-    xnn_log_error("failed to setup Convolution (Q8) operator: operator type mismatch");
+  if (convolution_op->type != xnn_operator_type_convolution_nhwc_q8) {
+    xnn_log_error("failed to setup Convolution (NHWC, Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
@@ -1078,8 +1078,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (convolution_op->type != xnn_operator_type_convolution_f32) {
-    xnn_log_error("failed to setup Convolution (F32) operator: operator type mismatch");
+  if (convolution_op->type != xnn_operator_type_convolution_nhwc_f32) {
+    xnn_log_error("failed to setup Convolution (NHWC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
diff --git a/src/deconvolution.c b/src/deconvolution-nhwc.c
similarity index 98%
rename from src/deconvolution.c
rename to src/deconvolution-nhwc.c
index ef5721d..2aaed66 100644
--- a/src/deconvolution.c
+++ b/src/deconvolution-nhwc.c
@@ -283,7 +283,7 @@
       input_zero_point, kernel_zero_point,
       deconvolution_scale, output_zero_point, output_min, output_max);
 
-  deconvolution_op->type = xnn_operator_type_deconvolution_q8;
+  deconvolution_op->type = xnn_operator_type_deconvolution_nhwc_q8;
   deconvolution_op->ukernel.type = ukernel_type;
   deconvolution_op->ukernel.igemm = (struct xnn_ukernel_igemm) {
     .default_function = ukernel_function,
@@ -524,7 +524,7 @@
 
   deconvolution_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  deconvolution_op->type = xnn_operator_type_deconvolution_f32;
+  deconvolution_op->type = xnn_operator_type_deconvolution_nhwc_f32;
   deconvolution_op->ukernel.type = ukernel_type;
   deconvolution_op->ukernel.igemm = (struct xnn_ukernel_igemm) {
     .default_function = ukernel_function,
@@ -873,8 +873,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (deconvolution_op->type != xnn_operator_type_deconvolution_q8) {
-    xnn_log_error("failed to setup Deconvolution (Q8) operator: operator type mismatch");
+  if (deconvolution_op->type != xnn_operator_type_deconvolution_nhwc_q8) {
+    xnn_log_error("failed to setup Deconvolution (NHWC, Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
@@ -902,8 +902,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (deconvolution_op->type != xnn_operator_type_deconvolution_f32) {
-    xnn_log_error("failed to setup Deconvolution (F32) operator: operator type mismatch");
+  if (deconvolution_op->type != xnn_operator_type_deconvolution_nhwc_f32) {
+    xnn_log_error("failed to setup Deconvolution (NHWC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
diff --git a/src/fully-connected.c b/src/fully-connected-nc.c
similarity index 97%
rename from src/fully-connected.c
rename to src/fully-connected-nc.c
index 1b97b90..3913934 100644
--- a/src/fully-connected.c
+++ b/src/fully-connected-nc.c
@@ -162,7 +162,7 @@
       input_zero_point, kernel_zero_point,
       requantization_scale, output_zero_point, output_min, output_max);
 
-  fully_connected_op->type = xnn_operator_type_fully_connected_q8;
+  fully_connected_op->type = xnn_operator_type_fully_connected_nc_q8;
 
   fully_connected_op->ukernel.type = xnn_ukernel_type_gemm;
   fully_connected_op->ukernel.gemm = (struct xnn_ukernel_gemm) {
@@ -289,7 +289,7 @@
 
   fully_connected_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  fully_connected_op->type = xnn_operator_type_fully_connected_f32;
+  fully_connected_op->type = xnn_operator_type_fully_connected_nc_f32;
 
   fully_connected_op->ukernel.type = xnn_ukernel_type_gemm;
   fully_connected_op->ukernel.gemm = (struct xnn_ukernel_gemm) {
@@ -396,8 +396,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (fully_connected_op->type != xnn_operator_type_fully_connected_q8) {
-    xnn_log_error("failed to setup Fully Connected (Q8) operator: operator type mismatch");
+  if (fully_connected_op->type != xnn_operator_type_fully_connected_nc_q8) {
+    xnn_log_error("failed to setup Fully Connected (NC, Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
@@ -420,8 +420,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (fully_connected_op->type != xnn_operator_type_fully_connected_f32) {
-    xnn_log_error("failed to setup Fully Connected (F32) operator: operator type mismatch");
+  if (fully_connected_op->type != xnn_operator_type_fully_connected_nc_f32) {
+    xnn_log_error("failed to setup Fully Connected (NC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
 
diff --git a/src/global-average-pooling-spnchw.c b/src/global-average-pooling-ncw.c
similarity index 81%
rename from src/global-average-pooling-spnchw.c
rename to src/global-average-pooling-ncw.c
index f2c196b..6bf0dc1 100644
--- a/src/global-average-pooling-spnchw.c
+++ b/src/global-average-pooling-ncw.c
@@ -17,7 +17,7 @@
 #include <xnnpack/params.h>
 
 
-enum xnn_status xnn_create_global_average_pooling_spnchw_f32(
+enum xnn_status xnn_create_global_average_pooling_ncw_f32(
     size_t channels,
     float output_min,
     float output_max,
@@ -80,7 +80,7 @@
   global_average_pooling_op->channels = channels;
   global_average_pooling_op->f32_gavgpool_params = xnn_init_f32_gavgpool_params(nanf(""), output_min, output_max, 0);
 
-  global_average_pooling_op->type = xnn_operator_type_global_average_pooling_spnchw_f32;
+  global_average_pooling_op->type = xnn_operator_type_global_average_pooling_ncw_f32;
   global_average_pooling_op->ukernel.type = xnn_ukernel_type_global_average_pooling;
 
   global_average_pooling_op->state = xnn_run_state_invalid;
@@ -93,17 +93,16 @@
   return status;
 }
 
-enum xnn_status xnn_setup_global_average_pooling_spnchw_f32(
+enum xnn_status xnn_setup_global_average_pooling_ncw_f32(
     xnn_operator_t global_average_pooling_op,
     size_t batch_size,
-    size_t height,
     size_t width,
     const float* input,
     float* output,
     pthreadpool_t threadpool)
 {
-  if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_spnchw_f32) {
-    xnn_log_error("failed to setup Global Average Pooling (F32, SpNCHW) operator: operator type mismatch");
+  if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_ncw_f32) {
+    xnn_log_error("failed to setup Global Average Pooling (F32, NCW) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   global_average_pooling_op->state = xnn_run_state_invalid;
@@ -113,11 +112,6 @@
     return xnn_status_uninitialized;
   }
 
-  if (height == 0) {
-    xnn_log_error("failed to setup Global Average Pooling operator with height %zu: height must be non-zero", height);
-    return xnn_status_invalid_parameter;
-  }
-
   if (width == 0) {
     xnn_log_error("failed to setup Global Average Pooling operator with width %zu: width must be non-zero", width);
     return xnn_status_invalid_parameter;
@@ -128,20 +122,14 @@
     return xnn_status_success;
   }
 
-  global_average_pooling_op->batch_size = batch_size;
-  global_average_pooling_op->input_height = height;
-  global_average_pooling_op->input_width = width;
-  global_average_pooling_op->input = input;
-  global_average_pooling_op->output = output;
-
   xnn_update_f32_gavgpool_params(&global_average_pooling_op->f32_gavgpool_params,
-    1.0f / (float) (width * height), width * height);
+    1.0f / (float) width, width);
 
-  global_average_pooling_op->context.global_average_pooling_spnchw = (struct global_average_pooling_spnchw_context) {
-    .input_elements = width * height * sizeof(float),
+  global_average_pooling_op->context.global_average_pooling_ncw = (struct global_average_pooling_ncw_context) {
+    .input_elements = width * sizeof(float),
     .input = input,
-    .input_channel_stride = width * height * sizeof(float),
-    .input_batch_stride = global_average_pooling_op->channels * width * height * sizeof(float),
+    .input_channel_stride = width * sizeof(float),
+    .input_batch_stride = global_average_pooling_op->channels * width * sizeof(float),
     .output = output,
     .output_channel_stride = sizeof(float),
     .output_batch_stride = global_average_pooling_op->channels * sizeof(float),
@@ -151,7 +139,7 @@
 
   global_average_pooling_op->compute.type = xnn_parallelization_type_2d_tile_1d;
   global_average_pooling_op->compute.task_2d_tile_1d =
-    (pthreadpool_task_2d_tile_1d_t) xnn_compute_global_average_pooling_spnchw;
+    (pthreadpool_task_2d_tile_1d_t) xnn_compute_global_average_pooling_ncw;
   global_average_pooling_op->compute.range[0] = batch_size;
   global_average_pooling_op->compute.range[1] = global_average_pooling_op->channels;
   global_average_pooling_op->compute.tile[0] = global_average_pooling_op->channels; //xnn_params.f32.spchw_gavgpool.channel_tile;
diff --git a/src/global-average-pooling.c b/src/global-average-pooling-nwc.c
similarity index 90%
rename from src/global-average-pooling.c
rename to src/global-average-pooling-nwc.c
index d4a6d2f..6bf781f 100644
--- a/src/global-average-pooling.c
+++ b/src/global-average-pooling-nwc.c
@@ -127,7 +127,7 @@
   global_average_pooling_op->output_min = output_min;
   global_average_pooling_op->output_max = output_max;
 
-  global_average_pooling_op->type = xnn_operator_type_global_average_pooling_q8;
+  global_average_pooling_op->type = xnn_operator_type_global_average_pooling_nwc_q8;
   global_average_pooling_op->ukernel.type = xnn_ukernel_type_global_average_pooling;
 
   global_average_pooling_op->state = xnn_run_state_invalid;
@@ -223,7 +223,7 @@
   global_average_pooling_op->output_pixel_stride = output_stride;
   global_average_pooling_op->f32_avgpool_params = xnn_init_f32_avgpool_params(nanf(""), output_min, output_max);
 
-  global_average_pooling_op->type = xnn_operator_type_global_average_pooling_f32;
+  global_average_pooling_op->type = xnn_operator_type_global_average_pooling_nwc_f32;
   global_average_pooling_op->ukernel.type = xnn_ukernel_type_global_average_pooling;
 
   global_average_pooling_op->state = xnn_run_state_invalid;
@@ -244,8 +244,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_q8) {
-    xnn_log_error("failed to setup Global Average Pooling (Q8) operator: operator type mismatch");
+  if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_nwc_q8) {
+    xnn_log_error("failed to setup Global Average Pooling (NWC, Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   global_average_pooling_op->state = xnn_run_state_invalid;
@@ -280,7 +280,7 @@
 
   const size_t input_stride_in_bytes = global_average_pooling_op->input_pixel_stride * sizeof(uint8_t);
   const size_t channels = global_average_pooling_op->channels;
-  global_average_pooling_op->context.global_average_pooling = (struct global_average_pooling_context) {
+  global_average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) {
       .input = input,
       .zero = global_average_pooling_op->zero_buffer,
       .input_pixel_stride = input_stride_in_bytes,
@@ -295,11 +295,11 @@
   global_average_pooling_op->compute.range[0] = batch_size;
 
   if (width <= xnn_params.q8.gavgpool.mr) {
-    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_unipass;
-    global_average_pooling_op->context.global_average_pooling.unipass_ukernel = xnn_params.q8.gavgpool.up;
+    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass;
+    global_average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = xnn_params.q8.gavgpool.up;
   } else {
-    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_multipass;
-    global_average_pooling_op->context.global_average_pooling.multipass_ukernel = xnn_params.q8.gavgpool.mp;
+    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass;
+    global_average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = xnn_params.q8.gavgpool.mp;
   }
   global_average_pooling_op->state = xnn_run_state_ready;
 
@@ -314,8 +314,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_f32) {
-    xnn_log_error("failed to setup Global Average Pooling (F32) operator: operator type mismatch");
+  if (global_average_pooling_op->type != xnn_operator_type_global_average_pooling_nwc_f32) {
+    xnn_log_error("failed to setup Global Average Pooling (NWC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   global_average_pooling_op->state = xnn_run_state_invalid;
@@ -344,7 +344,7 @@
 
   const size_t input_stride_in_bytes = global_average_pooling_op->input_pixel_stride * sizeof(float);
   const size_t channels = global_average_pooling_op->channels;
-  global_average_pooling_op->context.global_average_pooling = (struct global_average_pooling_context) {
+  global_average_pooling_op->context.global_average_pooling_nwc = (struct global_average_pooling_nwc_context) {
       .input = input,
       .zero = global_average_pooling_op->zero_buffer,
       .input_pixel_stride = input_stride_in_bytes,
@@ -359,11 +359,11 @@
   global_average_pooling_op->compute.range[0] = batch_size;
 
   if (width <= xnn_params.f32.gavgpool.mr) {
-    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_unipass;
-    global_average_pooling_op->context.global_average_pooling.unipass_ukernel = xnn_params.f32.gavgpool.up;
+    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_unipass;
+    global_average_pooling_op->context.global_average_pooling_nwc.unipass_ukernel = xnn_params.f32.gavgpool.up;
   } else {
-    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_multipass;
-    global_average_pooling_op->context.global_average_pooling.multipass_ukernel = xnn_params.f32.gavgpool.mp;
+    global_average_pooling_op->compute.task_1d = (pthreadpool_task_1d_t) xnn_compute_global_average_pooling_nwc_multipass;
+    global_average_pooling_op->context.global_average_pooling_nwc.multipass_ukernel = xnn_params.f32.gavgpool.mp;
   }
   global_average_pooling_op->state = xnn_run_state_ready;
 
diff --git a/src/hardswish.c b/src/hardswish-nc.c
similarity index 96%
rename from src/hardswish.c
rename to src/hardswish-nc.c
index 4b0d7eb..4253c08 100644
--- a/src/hardswish.c
+++ b/src/hardswish-nc.c
@@ -67,7 +67,7 @@
   hardswish_op->output_pixel_stride = output_stride;
   hardswish_op->f32_hswish_params = xnn_init_f32_hswish_params();
 
-  hardswish_op->type = xnn_operator_type_hswish_f32;
+  hardswish_op->type = xnn_operator_type_hardswish_nc_f32;
   hardswish_op->ukernel.type = xnn_ukernel_type_hswish;
 
   hardswish_op->state = xnn_run_state_invalid;
@@ -87,7 +87,7 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (hardswish_op->type != xnn_operator_type_hswish_f32) {
+  if (hardswish_op->type != xnn_operator_type_hardswish_nc_f32) {
     xnn_log_error("failed to setup HardSwish (F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
diff --git a/src/init.c b/src/init.c
index 72fecb9..bec4f39 100644
--- a/src/init.c
+++ b/src/init.c
@@ -480,7 +480,7 @@
       .channel_tile = 4,
       .row_tile = 2,
     };
-    #ifndef XNN_NO_SPNCHW_OPERATORS
+    #ifndef XNN_NO_NCHW_OPERATORS
       xnn_params.f32.spmm = (struct spmm_parameters) {
         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_16x1__neonfma_pipelined,
         .mr = 16,
@@ -531,7 +531,7 @@
         .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__neon_x4,
         .channel_tile = 4,
       };
-    #endif  // XNN_NO_SPNCHW_OPERATORS
+    #endif  // XNN_NO_NCHW_OPERATORS
   #endif  // XNN_NO_F32_OPERATORS
 
   /**************************** X32 micro-kernels ****************************/
@@ -697,7 +697,7 @@
       .channel_tile = 4,
       .row_tile = 2,
     };
-    #ifndef XNN_NO_SPNCHW_OPERATORS
+    #ifndef XNN_NO_NCHW_OPERATORS
       xnn_params.f32.spmm = (struct spmm_parameters) {
         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_4x1__sse,
         .mr = 4,
@@ -719,7 +719,7 @@
         .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__sse_x4,
         .channel_tile = 4,
       };
-    #endif  // XNN_NO_SPNCHW_OPERATORS
+    #endif  // XNN_NO_NCHW_OPERATORS
   #endif  // XNN_NO_F32_OPERATORS
 
   /**************************** X32 micro-kernels ****************************/
@@ -1074,7 +1074,7 @@
       .channel_tile = 1,
       .row_tile = 2,
     };
-    #ifndef XNN_NO_SPNCHW_OPERATORS
+    #ifndef XNN_NO_NCHW_OPERATORS
       xnn_params.f32.spmm = (struct spmm_parameters) {
         .ukernel = (xnn_spmm_ukernel_function) xnn_f32_spmm_ukernel_8x1__scalar,
         .mr = 8,
@@ -1125,7 +1125,7 @@
         .ukernel = (xnn_gavgpool_spchw_ukernel_function) xnn_f32_gavgpool_spchw_ukernel__scalar_x1,
         .channel_tile = 1,
       };
-    #endif  // XNN_NO_SPNCHW_OPERATORS
+    #endif  // XNN_NO_NCHW_OPERATORS
   #endif  // XNN_NO_F32_OPERATORS
 
   /**************************** X32 micro-kernels ****************************/
diff --git a/src/leaky-relu.c b/src/leaky-relu-nc.c
similarity index 96%
rename from src/leaky-relu.c
rename to src/leaky-relu-nc.c
index 74ca098..860f44a 100644
--- a/src/leaky-relu.c
+++ b/src/leaky-relu-nc.c
@@ -143,7 +143,7 @@
   leaky_relu_op->input_pixel_stride = input_stride;
   leaky_relu_op->output_pixel_stride = output_stride;
 
-  leaky_relu_op->type = xnn_operator_type_leaky_relu_q8;
+  leaky_relu_op->type = xnn_operator_type_leaky_relu_nc_q8;
   leaky_relu_op->ukernel.type = xnn_ukernel_type_lut;
 
   leaky_relu_op->state = xnn_run_state_invalid;
@@ -163,8 +163,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (leaky_relu_op->type != xnn_operator_type_leaky_relu_q8) {
-    xnn_log_error("failed to setup Leaky ReLU (Q8) operator: operator type mismatch");
+  if (leaky_relu_op->type != xnn_operator_type_leaky_relu_nc_q8) {
+    xnn_log_error("failed to setup Leaky ReLU (NC, Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   leaky_relu_op->state = xnn_run_state_invalid;
diff --git a/src/max-pooling.c b/src/max-pooling-nhwc.c
similarity index 97%
rename from src/max-pooling.c
rename to src/max-pooling-nhwc.c
index a658ef4..fb159bb 100644
--- a/src/max-pooling.c
+++ b/src/max-pooling-nhwc.c
@@ -151,7 +151,7 @@
 
   max_pooling_op->u8_output_params = xnn_init_u8_output_params(output_min, output_max);
 
-  max_pooling_op->type = xnn_operator_type_max_pooling_u8;
+  max_pooling_op->type = xnn_operator_type_max_pooling_nhwc_u8;
   max_pooling_op->ukernel.type = xnn_ukernel_type_max_pooling;
 
   max_pooling_op->state = xnn_run_state_invalid;
@@ -292,7 +292,7 @@
 
   max_pooling_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  max_pooling_op->type = xnn_operator_type_max_pooling_f32;
+  max_pooling_op->type = xnn_operator_type_max_pooling_nhwc_f32;
   max_pooling_op->ukernel.type = xnn_ukernel_type_max_pooling;
 
   max_pooling_op->state = xnn_run_state_invalid;
@@ -314,8 +314,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (max_pooling_op->type != xnn_operator_type_max_pooling_u8) {
-    xnn_log_error("failed to setup Max Pooling (U8) operator: operator type mismatch");
+  if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_u8) {
+    xnn_log_error("failed to setup Max Pooling (NHWC, U8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   max_pooling_op->state = xnn_run_state_invalid;
@@ -435,8 +435,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (max_pooling_op->type != xnn_operator_type_max_pooling_f32) {
-    xnn_log_error("failed to setup Max Pooling (F32) operator: operator type mismatch");
+  if (max_pooling_op->type != xnn_operator_type_max_pooling_nhwc_f32) {
+    xnn_log_error("failed to setup Max Pooling (NHWC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   max_pooling_op->state = xnn_run_state_invalid;
diff --git a/src/multiply.c b/src/multiply-nd.c
similarity index 97%
rename from src/multiply.c
rename to src/multiply-nd.c
index a49646c..9070648 100644
--- a/src/multiply.c
+++ b/src/multiply-nd.c
@@ -62,7 +62,7 @@
 
   multiply_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  multiply_op->type = xnn_operator_type_multiply_f32;
+  multiply_op->type = xnn_operator_type_multiply_nd_f32;
   multiply_op->ukernel.type = xnn_ukernel_type_multiply;
 
   multiply_op->state = xnn_run_state_invalid;
@@ -86,8 +86,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (multiply_op->type != xnn_operator_type_multiply_f32) {
-    xnn_log_error("failed to setup Multiply (F32) operator: operator type mismatch");
+  if (multiply_op->type != xnn_operator_type_multiply_nd_f32) {
+    xnn_log_error("failed to setup Multiply (ND, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   multiply_op->state = xnn_run_state_invalid;
diff --git a/src/operator-run.c b/src/operator-run.c
index b331521..28c0b5a 100644
--- a/src/operator-run.c
+++ b/src/operator-run.c
@@ -429,8 +429,8 @@
     &context->params);
 }
 
-void xnn_compute_global_average_pooling_unipass(
-    const struct global_average_pooling_context context[restrict static 1],
+void xnn_compute_global_average_pooling_nwc_unipass(
+    const struct global_average_pooling_nwc_context context[restrict static 1],
     size_t batch_index)
 {
   const void* input =
@@ -448,8 +448,8 @@
     &context->params);
 }
 
-void xnn_compute_global_average_pooling_multipass(
-    const struct global_average_pooling_context context[restrict static 1],
+void xnn_compute_global_average_pooling_nwc_multipass(
+    const struct global_average_pooling_nwc_context context[restrict static 1],
     size_t batch_index)
 {
   const void* input =
@@ -469,16 +469,16 @@
     &context->params);
 }
 
-void xnn_compute_global_average_pooling_spnchw(
-    const struct global_average_pooling_spnchw_context context[restrict static 1],
+void xnn_compute_global_average_pooling_ncw(
+    const struct global_average_pooling_ncw_context context[restrict static 1],
     size_t batch_index,
     size_t channels_start,
     size_t channels_slice)
 {
-  const void* input =
-    (const void*) ((uintptr_t) context->input + channels_start * context->input_channel_stride + batch_index * context->input_batch_stride);
-  void* output =
-    (void*) ((uintptr_t) context->output + channels_start * context->output_channel_stride + batch_index * context->output_batch_stride);
+  const void* input = (const void*) ((uintptr_t) context->input +
+    channels_start * context->input_channel_stride + batch_index * context->input_batch_stride);
+  void* output = (void*) ((uintptr_t) context->output +
+    channels_start * context->output_channel_stride + batch_index * context->output_batch_stride);
 
   context->ukernel(
     context->input_elements,
diff --git a/src/prelu.c b/src/prelu-nc.c
similarity index 95%
rename from src/prelu.c
rename to src/prelu-nc.c
index 7f3a6e6..9b71362 100644
--- a/src/prelu.c
+++ b/src/prelu-nc.c
@@ -88,7 +88,7 @@
   prelu_op->output_pixel_stride = output_stride;
   prelu_op->f32_output_params = xnn_init_f32_output_params(output_min, output_max);
 
-  prelu_op->type = xnn_operator_type_prelu_f32;
+  prelu_op->type = xnn_operator_type_prelu_nc_f32;
   prelu_op->ukernel.type = xnn_ukernel_type_prelu;
 
   prelu_op->state = xnn_run_state_invalid;
@@ -108,8 +108,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (prelu_op->type != xnn_operator_type_prelu_f32) {
-    xnn_log_error("failed to setup PReLU (F32) operator: operator type mismatch");
+  if (prelu_op->type != xnn_operator_type_prelu_nc_f32) {
+    xnn_log_error("failed to setup PReLU (NC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   prelu_op->state = xnn_run_state_invalid;
diff --git a/src/resize-bilinear.c b/src/resize-bilinear-nhwc.c
similarity index 96%
rename from src/resize-bilinear.c
rename to src/resize-bilinear-nhwc.c
index 0b28f13..363bbba 100644
--- a/src/resize-bilinear.c
+++ b/src/resize-bilinear-nhwc.c
@@ -72,7 +72,7 @@
   resize_op->input_pixel_stride = input_pixel_stride;
   resize_op->output_pixel_stride = output_pixel_stride;
 
-  resize_op->type = xnn_operator_type_resize_bilinear_f32;
+  resize_op->type = xnn_operator_type_resize_bilinear_nhwc_f32;
   resize_op->ukernel.type = xnn_ukernel_type_unpooling;
   resize_op->flags = flags;
 
@@ -97,8 +97,8 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (resize_op->type != xnn_operator_type_resize_bilinear_f32) {
-    xnn_log_error("failed to setup Resize Bilinear (F32) operator: operator type mismatch");
+  if (resize_op->type != xnn_operator_type_resize_bilinear_nhwc_f32) {
+    xnn_log_error("failed to setup Resize Bilinear (NHWC, F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   resize_op->state = xnn_run_state_invalid;
diff --git a/src/sigmoid.c b/src/sigmoid-nc.c
similarity index 97%
rename from src/sigmoid.c
rename to src/sigmoid-nc.c
index 0d80e83..f74fa2c 100644
--- a/src/sigmoid.c
+++ b/src/sigmoid-nc.c
@@ -134,7 +134,7 @@
   sigmoid_op->input_pixel_stride = input_stride;
   sigmoid_op->output_pixel_stride = output_stride;
 
-  sigmoid_op->type = xnn_operator_type_sigmoid_q8;
+  sigmoid_op->type = xnn_operator_type_sigmoid_nc_q8;
   sigmoid_op->ukernel.type = xnn_ukernel_type_lut;
 
   sigmoid_op->state = xnn_run_state_invalid;
@@ -207,7 +207,7 @@
   sigmoid_op->input_pixel_stride = input_stride;
   sigmoid_op->output_pixel_stride = output_stride;
 
-  sigmoid_op->type = xnn_operator_type_sigmoid_f32;
+  sigmoid_op->type = xnn_operator_type_sigmoid_nc_f32;
   sigmoid_op->ukernel.type = xnn_ukernel_type_sigmoid;
 
   sigmoid_op->state = xnn_run_state_invalid;
@@ -227,7 +227,7 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (sigmoid_op->type != xnn_operator_type_sigmoid_q8) {
+  if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_q8) {
     xnn_log_error("failed to setup Sigmoid (Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
@@ -291,7 +291,7 @@
     float* output,
     pthreadpool_t threadpool)
 {
-  if (sigmoid_op->type != xnn_operator_type_sigmoid_f32) {
+  if (sigmoid_op->type != xnn_operator_type_sigmoid_nc_f32) {
     xnn_log_error("failed to setup Sigmoid (F32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
diff --git a/src/softargmax.c b/src/softargmax-nc.c
similarity index 95%
rename from src/softargmax.c
rename to src/softargmax-nc.c
index 5228b0d..3ac1671 100644
--- a/src/softargmax.c
+++ b/src/softargmax-nc.c
@@ -116,7 +116,7 @@
   softargmax_op->input_pixel_stride = input_stride;
   softargmax_op->output_pixel_stride = output_stride;
 
-  softargmax_op->type = xnn_operator_type_softargmax_q8;
+  softargmax_op->type = xnn_operator_type_softargmax_nc_q8;
   softargmax_op->ukernel.type = xnn_ukernel_type_softargmax;
 
   softargmax_op->state = xnn_run_state_invalid;
@@ -136,8 +136,8 @@
     uint8_t* output,
     pthreadpool_t threadpool)
 {
-  if (softargmax_op->type != xnn_operator_type_softargmax_q8) {
-    xnn_log_error("failed to setup SoftArgMax (Q8) operator: operator type mismatch");
+  if (softargmax_op->type != xnn_operator_type_softargmax_nc_q8) {
+    xnn_log_error("failed to setup SoftArgMax (NC, Q8) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
   softargmax_op->state = xnn_run_state_invalid;
diff --git a/src/unpooling.c b/src/unpooling-nhwc.c
similarity index 97%
rename from src/unpooling.c
rename to src/unpooling-nhwc.c
index 6264f17..de8e14a 100644
--- a/src/unpooling.c
+++ b/src/unpooling-nhwc.c
@@ -109,7 +109,7 @@
   unpooling_op->input_pixel_stride = input_pixel_stride;
   unpooling_op->output_pixel_stride = output_pixel_stride;
 
-  unpooling_op->type = xnn_operator_type_unpooling_x32;
+  unpooling_op->type = xnn_operator_type_unpooling_nhwc_x32;
   unpooling_op->ukernel.type = xnn_ukernel_type_unpooling;
 
   unpooling_op->state = xnn_run_state_invalid;
@@ -132,7 +132,7 @@
     void* output,
     pthreadpool_t threadpool)
 {
-  if (unpooling_op->type != xnn_operator_type_unpooling_x32) {
+  if (unpooling_op->type != xnn_operator_type_unpooling_nhwc_x32) {
     xnn_log_error("failed to setup Unpooling (X32) operator: operator type mismatch");
     return xnn_status_invalid_parameter;
   }
diff --git a/src/xnnpack/compute.h b/src/xnnpack/compute.h
index c87b143..2f7e31a 100644
--- a/src/xnnpack/compute.h
+++ b/src/xnnpack/compute.h
@@ -447,7 +447,7 @@
       size_t output_y);
 #endif
 
-struct global_average_pooling_context {
+struct global_average_pooling_nwc_context {
   const void* input;
   const void* zero;
   size_t input_pixel_stride;
@@ -467,16 +467,16 @@
 };
 
 #ifndef __cplusplus
-  XNN_PRIVATE void xnn_compute_global_average_pooling_unipass(
-      const struct global_average_pooling_context context[restrict static 1],
+  XNN_PRIVATE void xnn_compute_global_average_pooling_nwc_unipass(
+      const struct global_average_pooling_nwc_context context[restrict static 1],
       size_t batch_index);
 
-  XNN_PRIVATE void xnn_compute_global_average_pooling_multipass(
-      const struct global_average_pooling_context context[restrict static 1],
+  XNN_PRIVATE void xnn_compute_global_average_pooling_nwc_multipass(
+      const struct global_average_pooling_nwc_context context[restrict static 1],
       size_t batch_index);
 #endif
 
-struct global_average_pooling_spnchw_context {
+struct global_average_pooling_ncw_context {
   size_t input_elements;
   const void* input;
   size_t input_channel_stride;
@@ -491,8 +491,8 @@
 };
 
 #ifndef __cplusplus
-  XNN_PRIVATE void xnn_compute_global_average_pooling_spnchw(
-      const struct global_average_pooling_spnchw_context context[restrict static 1],
+  XNN_PRIVATE void xnn_compute_global_average_pooling_ncw(
+      const struct global_average_pooling_ncw_context context[restrict static 1],
       size_t batch_index,
       size_t channels_start,
       size_t channels_slice);
diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h
index ffe9e3a..c65af56 100644
--- a/src/xnnpack/operator.h
+++ b/src/xnnpack/operator.h
@@ -46,37 +46,37 @@
 
 enum xnn_operator_type {
   xnn_operator_type_none = 0,
-  xnn_operator_type_add_f32,
-  xnn_operator_type_add_q8,
-  xnn_operator_type_argmax_pooling_f32,
-  xnn_operator_type_average_pooling_f32,
-  xnn_operator_type_average_pooling_q8,
-  xnn_operator_type_channel_pad_x32,
-  xnn_operator_type_channel_shuffle_x32,
-  xnn_operator_type_channel_shuffle_x8,
-  xnn_operator_type_clamp_f32,
-  xnn_operator_type_clamp_u8,
-  xnn_operator_type_convolution_f32,
-  xnn_operator_type_convolution_q8,
-  xnn_operator_type_convolution_spnchw_f32,
-  xnn_operator_type_deconvolution_f32,
-  xnn_operator_type_deconvolution_q8,
-  xnn_operator_type_fully_connected_f32,
-  xnn_operator_type_fully_connected_q8,
-  xnn_operator_type_global_average_pooling_f32,
-  xnn_operator_type_global_average_pooling_q8,
-  xnn_operator_type_global_average_pooling_spnchw_f32,
-  xnn_operator_type_hswish_f32,
-  xnn_operator_type_leaky_relu_q8,
-  xnn_operator_type_max_pooling_f32,
-  xnn_operator_type_max_pooling_u8,
-  xnn_operator_type_multiply_f32,
-  xnn_operator_type_prelu_f32,
-  xnn_operator_type_resize_bilinear_f32,
-  xnn_operator_type_sigmoid_f32,
-  xnn_operator_type_sigmoid_q8,
-  xnn_operator_type_softargmax_q8,
-  xnn_operator_type_unpooling_x32,
+  xnn_operator_type_add_nc_f32,
+  xnn_operator_type_add_nc_q8,
+  xnn_operator_type_argmax_pooling_nhwc_f32,
+  xnn_operator_type_average_pooling_nhwc_f32,
+  xnn_operator_type_average_pooling_nhwc_q8,
+  xnn_operator_type_channel_pad_nc_x32,
+  xnn_operator_type_channel_shuffle_nc_x32,
+  xnn_operator_type_channel_shuffle_nc_x8,
+  xnn_operator_type_clamp_nc_f32,
+  xnn_operator_type_clamp_nc_u8,
+  xnn_operator_type_convolution_nhwc_f32,
+  xnn_operator_type_convolution_nhwc_q8,
+  xnn_operator_type_convolution_nchw_f32,
+  xnn_operator_type_deconvolution_nhwc_f32,
+  xnn_operator_type_deconvolution_nhwc_q8,
+  xnn_operator_type_fully_connected_nc_f32,
+  xnn_operator_type_fully_connected_nc_q8,
+  xnn_operator_type_global_average_pooling_nwc_f32,
+  xnn_operator_type_global_average_pooling_nwc_q8,
+  xnn_operator_type_global_average_pooling_ncw_f32,
+  xnn_operator_type_hardswish_nc_f32,
+  xnn_operator_type_leaky_relu_nc_q8,
+  xnn_operator_type_max_pooling_nhwc_f32,
+  xnn_operator_type_max_pooling_nhwc_u8,
+  xnn_operator_type_multiply_nd_f32,
+  xnn_operator_type_prelu_nc_f32,
+  xnn_operator_type_resize_bilinear_nhwc_f32,
+  xnn_operator_type_sigmoid_nc_f32,
+  xnn_operator_type_sigmoid_nc_q8,
+  xnn_operator_type_softargmax_nc_q8,
+  xnn_operator_type_unpooling_nhwc_x32,
 };
 
 struct xnn_ukernel_dconv2d {
@@ -260,8 +260,8 @@
     struct dwconv_context dwconv;
     struct elementwise_binary_context elementwise_binary;
     struct gemm_context gemm;
-    struct global_average_pooling_context global_average_pooling;
-    struct global_average_pooling_spnchw_context global_average_pooling_spnchw;
+    struct global_average_pooling_nwc_context global_average_pooling_nwc;
+    struct global_average_pooling_ncw_context global_average_pooling_ncw;
     struct igemm_context igemm;
     struct lut_contiguous_context lut_contiguous;
     struct lut_strided_context lut_strided;