Fix bugs in Depthwise Convolution and Average Pooling

Threading context wasn't fully updated when input buffer, height, and width
stay unchanged from last run, but output buffer changed.

PiperOrigin-RevId: 281632335
diff --git a/src/convolution-nhwc.c b/src/convolution-nhwc.c
index ff8f199..f1cc5ae 100644
--- a/src/convolution-nhwc.c
+++ b/src/convolution-nhwc.c
@@ -955,6 +955,7 @@
         valid_batch_size = convolution_op->valid_batch_size;
         if (batch_size <= valid_batch_size) {
           convolution_op->compute.range[0] = batch_size * convolution_op->output_height;
+          convolution_op->context.dwconv.output = output;
           convolution_op->state = xnn_run_state_ready;
           return xnn_status_success;
         }