Integrate JIT generated GEMM microkernels into create_convolution2d_nhwc

Introduce a new field, generator, into struct gemm_parameters, which contains JIT code generators for gemm, igemm, gemm1, igemm1. When set, the convolution operator creation will try to generate the microkernel using the JIT. (Right now only gemm is supported, the rest will follow in future patches.)

The xnn_ukernel_gemm and xnn_ukernel_igemm structs also has a new field, struct xnn_code_buffer general_code_buffer and mr1_code_buffer, where the generated code will be kept, and is released when the operator is deleted.

The generator field is only set in the e2e benchmarks, where we update F32 E2E benchmarks to support testing JIT generated microkernels.

PiperOrigin-RevId: 425700057
diff --git a/src/xnnpack/operator.h b/src/xnnpack/operator.h
index 2d69669..f9c8969 100644
--- a/src/xnnpack/operator.h
+++ b/src/xnnpack/operator.h
@@ -13,6 +13,7 @@
 
 #include <pthreadpool.h>
 
+#include <xnnpack/allocator.h>
 #include <xnnpack/params.h>
 #include <xnnpack/compute.h>
 
@@ -145,6 +146,8 @@
 struct xnn_ukernel_gemm {
   struct xnn_hmp_gemm_ukernel general_case;
   struct xnn_hmp_gemm_ukernel mr1_case;
+  struct xnn_code_buffer general_code_buffer;
+  struct xnn_code_buffer mr1_code_buffer;
   uint8_t mr;
   uint8_t nr;
   uint8_t kr;
@@ -155,6 +158,8 @@
   struct xnn_hmp_igemm_ukernel general_case;
   struct xnn_hmp_igemm_ukernel mr1_case;
   struct xnn_hmp_gemm_ukernel gemm_case;
+  struct xnn_code_buffer general_code_buffer;
+  struct xnn_code_buffer mr1_code_buffer;
   uint8_t mr;
   uint8_t nr;
   uint8_t kr;