Rename QS8 GEMM/IGEMM/DWCONV microkernels

Include requantization scheme in the microkernel name

PiperOrigin-RevId: 375612878
diff --git a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
index 69669ef..7146da6 100644
--- a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
+++ b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -25,7 +25,7 @@
 # C0  x6 v28 v29 v30 v31
 # unused v4 v5 v6 v7 v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32
 0:
         # Load initial bias from w into accumulators
         ADD     x2, x2, 3               // kc = (kc + 3) & ~3
@@ -111,7 +111,7 @@
 6:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld32
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld32
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
index 7c8b25b..73113db 100644
--- a/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
+++ b/src/qs8-gemm/1x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -25,7 +25,7 @@
 # C0  x6 v28 v29 v30 v31
 # unused v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64
         ADD     x2, x2, 3               // kc = (kc + 3) & ~3
         BIC     x2, x2, 3
 
@@ -145,7 +145,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x16c4__aarch64_neondot_ld64
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__aarch64_neondot_ld64
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
index 30baf88..2e8ce51 100644
--- a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
+++ b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -27,7 +27,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -272,7 +272,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in
index df90eff..99f6cb2 100644
--- a/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in
+++ b/src/qs8-gemm/1x8c8-aarch64-neon-mlal-padal.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -26,7 +26,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -247,7 +247,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index 40fe81b..9f8c28b 100644
--- a/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-gemm/2x8c16-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -29,7 +29,7 @@
 # temp1   v3 v11 v13 v15
 # unused  v8 v9
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -210,7 +210,7 @@
         LDP     d10, d11, [sp], 48
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c16__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
index 9f47636..f598124 100644
--- a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
+++ b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -30,7 +30,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -406,7 +406,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in
index 408db1a..398aee8 100644
--- a/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in
+++ b/src/qs8-gemm/2x8c8-aarch64-neon-mlal-padal.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -29,7 +29,7 @@
 # temp1   v3 v11 v13 v15
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -353,7 +353,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal${"_prfm" if PREFETCH else ""}
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S b/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S
index f4e75da..a5a1444 100644
--- a/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S
+++ b/src/qs8-gemm/2x8c8-minmax-gemmlowp-aarch64-neon-mull-padal.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -29,7 +29,7 @@
 # temp1   v3 v11 v13 v15
 # unused  v8 v9
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -193,7 +193,7 @@
         LDP     d10, d11, [sp], 48
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mull_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mull_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in b/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
index ce92cbf..cb0facf 100644
--- a/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
+++ b/src/qs8-gemm/4x16-aarch64-neon-mlal-lane-cortex-a53.S.in
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 
 # x10 x17 a53 temp registers
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -816,7 +816,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane${"_prfm" if PREFETCH else ""}_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
index 5b3e213..811cd5c 100644
--- a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
+++ b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-cortex-a55.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # C3  x7 v19 v23 v27 v31
 # unused v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -658,7 +658,7 @@
         LDP     d8,  d9, [sp], 32
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_cortex_a55
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_cortex_a55
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
index e92e6fe..169d7eb 100644
--- a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
+++ b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld32.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # C3  x7 v19 v23 v27 v31
 # unused v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -289,7 +289,7 @@
 6:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld32
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld32
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
index bf65995..efb6234 100644
--- a/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
+++ b/src/qs8-gemm/4x16c4-minmax-gemmlowp-aarch64-neondot-ld64.S
@@ -5,7 +5,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # C3  x7 v19 v23 v27 v31
 # unused v8 v9 v10 v11 v12 v13 v14 v15
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -340,7 +340,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16c4__aarch64_neondot_ld64
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__aarch64_neondot_ld64
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/MRx16c8-avx512skx.c.in b/src/qs8-gemm/MRx16c8-avx512skx.c.in
index 7870b6c..12e073f 100644
--- a/src/qs8-gemm/MRx16c8-avx512skx.c.in
+++ b/src/qs8-gemm/MRx16c8-avx512skx.c.in
@@ -16,7 +16,7 @@
 
 
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x16c8__avx512skx(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx4c2-sse.c.in b/src/qs8-gemm/MRx4c2-sse.c.in
index 7669bfa..e13c6f2 100644
--- a/src/qs8-gemm/MRx4c2-sse.c.in
+++ b/src/qs8-gemm/MRx4c2-sse.c.in
@@ -29,7 +29,7 @@
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
 $PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x4c2__${ISA}${LOAD_SUFFIX}(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c2__${ISA}${LOAD_SUFFIX}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx4c8-sse.c.in b/src/qs8-gemm/MRx4c8-sse.c.in
index 0f2ed51..f06aed9 100644
--- a/src/qs8-gemm/MRx4c8-sse.c.in
+++ b/src/qs8-gemm/MRx4c8-sse.c.in
@@ -29,7 +29,7 @@
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
 $PARAMS_STRUCT = "sse4" if SSE >= 4 else "sse2"
 $ISA = "xop" if XOP else "avx" if AVX else {2: "sse2", 3: "ssse3", 4: "sse41"}[SSE]
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x4c8__${ISA}${LOAD_SUFFIX}(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c8__${ISA}${LOAD_SUFFIX}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx4c8-wasmsimd.c.in b/src/qs8-gemm/MRx4c8-wasmsimd.c.in
index 4fced4a..46c1b72 100644
--- a/src/qs8-gemm/MRx4c8-wasmsimd.c.in
+++ b/src/qs8-gemm/MRx4c8-wasmsimd.c.in
@@ -15,7 +15,7 @@
 
 $LOAD_SUFFIX = {"LD128": "_ld128", "LD64": "_ld64", "EXTENDED": ""}[VARIANT]
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x4c8__wasmsimd${LOAD_SUFFIX}(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x4c8__wasmsimd${LOAD_SUFFIX}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRx8c8-avx2.c.in b/src/qs8-gemm/MRx8c8-avx2.c.in
index bac1ce2..20e4af5 100644
--- a/src/qs8-gemm/MRx8c8-avx2.c.in
+++ b/src/qs8-gemm/MRx8c8-avx2.c.in
@@ -15,7 +15,7 @@
 
 
 $GEMM_SUFFIX = "_xw" if VARIANT == "EXTENDED" else ""
-void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_ukernel_${MR}x8c8__avx2(
+void xnn_qs8_gemm${GEMM_SUFFIX}_minmax_gemmlowp_ukernel_${MR}x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/MRxNRc4-neondot.c.in b/src/qs8-gemm/MRxNRc4-neondot.c.in
index b037bd8..dded8ae 100644
--- a/src/qs8-gemm/MRxNRc4-neondot.c.in
+++ b/src/qs8-gemm/MRxNRc4-neondot.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/c16-neon-mlal-padal.c.in b/src/qs8-gemm/c16-neon-mlal-padal.c.in
index 15f28b7..cb7ae46 100644
--- a/src/qs8-gemm/c16-neon-mlal-padal.c.in
+++ b/src/qs8-gemm/c16-neon-mlal-padal.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/c2-neon-mull-padal-dup.c.in b/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
index f9281fa..235d9e0 100644
--- a/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
+++ b/src/qs8-gemm/c2-neon-mull-padal-dup.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c2__neon_${"mlal" if MLA else "mull"}_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c2__neon_${"mlal" if MLA else "mull"}_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/c8-neon-mull-padal.c.in b/src/qs8-gemm/c8-neon-mull-padal.c.in
index 3a39bdf..3895a74 100644
--- a/src/qs8-gemm/c8-neon-mull-padal.c.in
+++ b/src/qs8-gemm/c8-neon-mull-padal.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}c8__neon_${"mlal" if MLA else "mull"}_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}c8__neon_${"mlal" if MLA else "mull"}_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 884d3d9..cd26fa5 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
index 72f6a33..bb363e9 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 0d1e159..319a9d0 100644
--- a/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/1x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 8777d3d..1735cc8 100644
--- a/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 7a60c35..262b710 100644
--- a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 6ab52c5..407cc9b 100644
--- a/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/1x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
index f73dd33..9439336 100644
--- a/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/1x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
index 7729f91..08fff83 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
index a45769e..f2610c8 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
index ac60087..540f4ee 100644
--- a/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/1x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
index e206a4b..4454ec1 100644
--- a/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/1x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
index 731650e..58ce276 100644
--- a/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/1x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
index 8350dc1..2601b09 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
index 63362ee..d3e3979 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
index 7df288a..d7f76e3 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
index d809fb1..d4703ba 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
index 6b57e90..19ea8d5 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
index 5a75904..e8112b9 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
index b3ea64b..cdcd535 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
index 1401dc4..e24e6cc 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
index 227d0ab..b92097b 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
index 1a3f597..1d4e12c 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
index 4c04278..e3e799e 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
index 299d609..1954255 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
index 975d67c..e3fff33 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
index 1b56035..ab14a17 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
index 5565473..9e37aeb 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
index 9f3ee44..7154798 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
index d537eeb..884a744 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
index 6370331..0a3c9bf 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
index 89adced..d824489 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
index a68701d..eec86d2 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
index 17a890e..e8f39f1 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
index 33104b1..ab3766d 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
index 535b406..b2f0dd1 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 358cb92..f5b73f0 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 7cd2050..96d437c 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__wasmsimd_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
index 34e3cb0..a8f1eb7 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
index ebcc4aa..0a3b7b1 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x4c8__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
index 6d4827c..608678e 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
index c5da5d5..8304817 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
index c0ee9fd..cbf6c84 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
index 9967e0f..a1a6121 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
index 6983219..2984f88 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__wasmsimd(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__wasmsimd(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
index 44bc8cc..afa66cd 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x4c8__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x4c8__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 4a0861d..9279a2f 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
index a00f139..0039d94 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 2e98ffc..61a756b 100644
--- a/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/1x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 41d2ce5..206e8a0 100644
--- a/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 02257a1..b830974 100644
--- a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index d5f3247..08c447a 100644
--- a/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/1x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
index 7c6318d..88822ea 100644
--- a/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/1x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
index a7f167c..453eb25 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -270,7 +270,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
index 3491d88..587c39c 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -31,7 +31,7 @@
 # x16, x17, x7 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -273,7 +273,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
index 120e13f..c876f1b 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -30,7 +30,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -245,7 +245,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal_prfm
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index 5084b31..d835538 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -30,7 +30,7 @@
 # temp0  v17 v19 v21 v23
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
 
         LDP     x10, x9, [sp]           // cn_stride, params
 
@@ -239,7 +239,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_1x8c8__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
index 5f0a77c..44c0264 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c8__avx2(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
index ed094a7..b086df7 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
index 971d6a4..c97a2dc 100644
--- a/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/1x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_1x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_1x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
index 69b6b7c..9146b41 100644
--- a/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/1x8c8-xw-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_1x8c8__avx2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_1x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 49580f8..9128db9 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
index b7d4071..d883ca9 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 5850903..cab6ad6 100644
--- a/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/2x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
index eafacd8..6ddcd89 100644
--- a/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 20959ce..3b33cc3 100644
--- a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index a306ef6..8d2862d 100644
--- a/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/2x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
index cab5fd0..c437aea 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
index de9c3ea..a766207 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
index 84a40e3..c96eaec 100644
--- a/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/2x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
index 523cf90..9f4d9b4 100644
--- a/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/2x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
index 48188eb..01f2ab8 100644
--- a/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/2x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
index a6bb8cf..e9b0921 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
index 2b72822..d9fde6f 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
index c4ade3b..21bac85 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
index 672ce4a..f0f634f 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
index 60a7aa1..5b3d4ba 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
index 7f5ba39..07bedc7 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
index 53bf77c..be538ab 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
index c2aec7c..49afa05 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
index 6266b33..6ff8eaf 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
index a4f12a4..864db27 100644
--- a/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/2x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
index dad6b55..5b70a39 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
index 4beb018..3391f82 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
index a1575e9..2a7e69e 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
index d51e490..4edb2ab 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
index 19dcda2..f202ba3 100644
--- a/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/2x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
index fc48e65..ebe7be8 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
index 713a705..1224615 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
index 978e912..41ae006 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
index 8d7725e..eed4666 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
index 6236921..1efa46e 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
index d9db58d..67248e0 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
index 26c49c9..4edb5ad 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
index 163a9f8..40cc948 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index a48f0c7..e3dee05 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 2c37105..3996eaa 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__wasmsimd_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
index 8f2ec5e..160f770 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
index 16bbdcc..f240a7e 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x4c8__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
index 3a1a964..db3d8c3 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
index 7764dd8..0f2e070 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
index 1903f6b..ff3ec8f 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
index bde1b65..771050c 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
index e9444f8..4b866f1 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__wasmsimd(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__wasmsimd(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
index b15a8f9..60defec 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x4c8__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x4c8__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 35fa5c9..3e70e46 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
index 296571f..c24f9af 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
index b5a7b2f..2938a65 100644
--- a/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/2x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 21b3acc..34a5978 100644
--- a/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 530d297..3d4e374 100644
--- a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 83ab0d1..3ba6953 100644
--- a/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/2x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
index a9e2aa4..3dba71d 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -402,7 +402,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
index 1470573..caa2e3b 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -34,7 +34,7 @@
 # x16, x17, x20, x21 tenporary a53 gpr load data
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -406,7 +406,7 @@
         LDP     d8, d9, [sp], 80
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
index 661ae54..e5be317 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal-prfm.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -33,7 +33,7 @@
 # temp1   v3 v11 v13 v15
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -353,7 +353,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal_prfm
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
index f6df72a..6b0e996 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-aarch64-neon-mlal-padal.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -33,7 +33,7 @@
 # temp1   v3 v11 v13 v15
 
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -349,7 +349,7 @@
         LDP     d8, d9, [sp], 64
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_2x8c8__aarch64_neon_mlal_padal
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__aarch64_neon_mlal_padal
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
index be11da6..be9fc9a 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c8__avx2(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 19f7b55..9660125 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
index 41b37d7..d840aba 100644
--- a/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/2x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_2x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_2x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
index f4e707b..732835b 100644
--- a/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/2x8c8-xw-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_2x8c8__avx2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_2x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 0fb5e2e..b16dcd3 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
index 9c498fc..4e234e0 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
index 84b77b5..e1b0ad0 100644
--- a/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/3x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 5bd1d85..2a9e2cf 100644
--- a/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 782bb6b..259cc2e 100644
--- a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index f5b5951..9912589 100644
--- a/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/3x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
index b6d6a6d..40903b2 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
index 9b457f2..03f31f0 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
index 77450dd..93aae3c 100644
--- a/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/3x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
index 04253c0..461568a 100644
--- a/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/3x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
index 30a6d51..09eb67a 100644
--- a/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/3x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
index 06a9192..2010ae1 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
index cff57f6..31002b2 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
index a3e6c9f..f6b4b10 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
index a6a69b4..629aa5f 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
index 396470b..bd727a6 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
index 227d609..b379ade 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
index 306b6e5..762c63d 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
index 664fd04..09db9ec 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
index f599282..17c0ea1 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
index c7a4ceb..b05257b 100644
--- a/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/3x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
index 0f5d9b1..29abd40 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
index 0b0c916..a54a94f 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
index c4b2953..b6f21c4 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
index 4cd3e0d..e44996f 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
index 86d4b3f..6687fef 100644
--- a/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/3x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
index a2bf9d9..2b106c2 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
index 72b3074..96980f4 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
index 0f57e0f..2da3d50 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
index 10fa7cd..55cc4a7 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
index 6d8579c..4a255bc 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
index 62b0ba2..0ddf914 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
index 81d0de0..9a8dd50 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
index fa0121b..0857eaf 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
index 2c11faf..35d7f78 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
index 678e305..26f58fc 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-wasmsimd-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__wasmsimd_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__wasmsimd_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
index 4a09afe..d840ac7 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
index 0c874fc..1bb8e6f 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x4c8__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x4c8__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
index a4192f6..5168a36 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
index 4d4d116..98bfb01 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
index a00a4ca..b9eda39 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
index 13681e3..c402de7 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
index d0df439..5fe2220 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-wasmsimd.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__wasmsimd(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__wasmsimd(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
index e471b8e..2bf623f 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x4c8__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x4c8__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 94ddff9..ca03c7b 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
index 6e4a6b4..051e8f2 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
index b66556d..49a11d6 100644
--- a/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/3x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
index e4ec320..6053eac 100644
--- a/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index f273682..851629c 100644
--- a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index c2510de..8001d85 100644
--- a/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/3x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
index d41e047..e87cbef 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c8__avx2(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
index f7cdbe7..faa1eb7 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
index 0b7651e..18d17a1 100644
--- a/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/3x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_3x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_3x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c b/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
index d589e3c..f7d5a09 100644
--- a/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
+++ b/src/qs8-gemm/gen/3x8c8-xw-minmax-gemmlowp-avx2.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_3x8c8__avx2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_3x8c8__avx2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
index 81c990c..0e84934 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -38,7 +38,7 @@
 
 # x10 x17 a53 temp registers
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -808,7 +808,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
index 4940ffe..4250a81 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-aarch64-neon-mlal-lane-prfm-cortex-a53.S
@@ -9,7 +9,7 @@
 
 #include <xnnpack/assembly.h>
 
-# void xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(
+# void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53(
 #     size_t mr,                 x0
 #     size_t nc,                 x1
 #     size_t kc,                 x2 / x0
@@ -38,7 +38,7 @@
 
 # x10 x17 a53 temp registers
 
-BEGIN_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+BEGIN_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
 
         # Clamp A and C pointers
         CMP     x0, 2                   // if mr < 2
@@ -814,7 +814,7 @@
 8:
         RET
 
-END_FUNCTION xnn_qs8_gemm_minmax_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
+END_FUNCTION xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__aarch64_neon_mlal_lane_prfm_cortex_a53
 
 #ifdef __ELF__
 .section ".note.GNU-stack","",%progbits
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index c84ae2d..89c6da0 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
index 4dfd5af..6311f6b 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
index cd1d411..546e6c0 100644
--- a/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/4x16-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
index 9831c12..88a8672 100644
--- a/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x16c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index 1a21b08..045bcc0 100644
--- a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 8f6c1a2..1e2eb9a 100644
--- a/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/4x16c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
index 2744fe9..d62c105 100644
--- a/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/4x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
index 2a7b6ca..d3cea1a 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-avx512skx.c
@@ -16,7 +16,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c8__avx512skx(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__avx512skx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
index 2f5cbb0..1768564 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
index 12294e8..f64cfb1 100644
--- a/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/4x16c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x16c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x16c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
index 5d9c3ae..e2050cd 100644
--- a/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/4x2-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x2__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x2__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c b/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
index 725583d..35cf43c 100644
--- a/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
+++ b/src/qs8-gemm/gen/4x4-minmax-gemmlowp-scalar.c
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4__scalar(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
index d3c0a28..0f19279 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
index 43fdc24..83572e9 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-avx-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__avx_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__avx_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
index 20a69e6..daff859 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
index aed4404..5ab288b 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse2-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse2_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse2_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
index 31a3cda..180fabe 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
index b2cf9d6..1bb000d 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-sse41-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__sse41_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__sse41_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
index c1a756e..94f18c0 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld128.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
index d1b058d..757c664 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-ssse3-ld64.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__ssse3_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__ssse3_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
index b1b2b86..885d0f6 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld128.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld128(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld128(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
index 1bf5524..ff09bbe 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-gemmlowp-xop-ld64.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x4c2__xop_ld64(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x4c2__xop_ld64(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
index 8179aeb..0ff4f2d 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-avx.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__avx(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__avx(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
index 60593d2..6fad423 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse2.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse2(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse2(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
index ca5a399..b59daa4 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-sse41.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__sse41(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__sse41(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
index a2cd7d0..a56c299 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-ssse3.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__ssse3(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__ssse3(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
index 699237d..99ea366 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-gemmlowp-xop.c
@@ -20,7 +20,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_xw_minmax_ukernel_4x4c2__xop(
+void xnn_qs8_gemm_xw_minmax_gemmlowp_ukernel_4x4c2__xop(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 2a00c63..427252d 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
index 6742b5c..04c1e2b 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
index 23b5f6f..abb9621 100644
--- a/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
+++ b/src/qs8-gemm/gen/4x8-minmax-gemmlowp-neon-mull-addw-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
index 8fc445d..5245b23 100644
--- a/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x8c16-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c16__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c16__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
index fa2daf0..d9f855b 100644
--- a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
+++ b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mlal-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mlal_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mlal_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
index 879fdc7..8a81abc 100644
--- a/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
+++ b/src/qs8-gemm/gen/4x8c2-minmax-gemmlowp-neon-mull-padal-dup.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c2__neon_mull_padal_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c2__neon_mull_padal_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
index b609be0..b31b68e 100644
--- a/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/4x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
index 96764f3..bd2e83e 100644
--- a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
+++ b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mlal-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mlal_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mlal_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
index 520ab4a..e37c3a1 100644
--- a/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
+++ b/src/qs8-gemm/gen/4x8c8-minmax-gemmlowp-neon-mull-padal.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_4x8c8__neon_mull_padal(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_4x8c8__neon_mull_padal(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
index f8d3267..8218751 100644
--- a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
index 00e6d40..db38867 100644
--- a/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/6x16-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x16__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
index 1bd39bc..5dddcb2 100644
--- a/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/6x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
index 8a4308a..405d6d0 100644
--- a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
+++ b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane-prfm.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane_prfm(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane_prfm(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
index 63d1f02..577417f 100644
--- a/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
+++ b/src/qs8-gemm/gen/6x8-minmax-gemmlowp-neon-mlal-lane.c
@@ -15,7 +15,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x8__neon_mlal_lane(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8__neon_mlal_lane(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
index 489e824..7466ac0 100644
--- a/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/6x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_6x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_6x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
index 88d722e..cd39d25 100644
--- a/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/8x16c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_8x16c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x16c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c b/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
index 47375be..fdbc6e3 100644
--- a/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
+++ b/src/qs8-gemm/gen/8x8c4-minmax-gemmlowp-neondot.c
@@ -15,7 +15,7 @@
 #include <xnnpack/math.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_8x8c4__neondot(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_8x8c4__neondot(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/neon-mlal-lane.c.in b/src/qs8-gemm/neon-mlal-lane.c.in
index 0d9bf20..04e952c 100644
--- a/src/qs8-gemm/neon-mlal-lane.c.in
+++ b/src/qs8-gemm/neon-mlal-lane.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}__neon_mlal_lane${"_prfm" if PREFETCH else ""}(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}__neon_mlal_lane${"_prfm" if PREFETCH else ""}(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/neon-mull-addw-dup.c.in b/src/qs8-gemm/neon-mull-addw-dup.c.in
index 3d38ef2..0508020 100644
--- a/src/qs8-gemm/neon-mull-addw-dup.c.in
+++ b/src/qs8-gemm/neon-mull-addw-dup.c.in
@@ -14,7 +14,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}__neon_mull_addw_dup(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}__neon_mull_addw_dup(
     size_t mr,
     size_t nc,
     size_t kc,
diff --git a/src/qs8-gemm/scalar.c.in b/src/qs8-gemm/scalar.c.in
index 314ff76..e94033f 100644
--- a/src/qs8-gemm/scalar.c.in
+++ b/src/qs8-gemm/scalar.c.in
@@ -10,7 +10,7 @@
 #include <xnnpack/gemm.h>
 
 
-void xnn_qs8_gemm_minmax_ukernel_${MR}x${NR}__scalar(
+void xnn_qs8_gemm_minmax_gemmlowp_ukernel_${MR}x${NR}__scalar(
     size_t mr,
     size_t nc,
     size_t kc,