Make SSE2 microkernels consistent with neon zip microkernels.
 - DEC is now MOV

PiperOrigin-RevId: 425319775
diff --git a/test/x64-transpose.cc b/test/x64-transpose.cc
index a3991f8..9d52acd 100644
--- a/test/x64-transpose.cc
+++ b/test/x64-transpose.cc
@@ -1378,7 +1378,7 @@
 }
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_2_bw_2) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(2)
@@ -1386,10 +1386,10 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_1_4_bw_1_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_1_4_bw_1_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 1; i <= 4; ++i){
       for(size_t j = 1; j <= 4; ++j){
@@ -1399,12 +1399,12 @@
           .block_width(j)
           .block_height(i)
           .iterations(1)
-          .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+          .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
       }
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_2_bw_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(4)
@@ -1412,10 +1412,10 @@
       .block_width(4)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_2_bw_3_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_3_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1424,11 +1424,11 @@
         .block_width(i)
         .block_height(2)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_4_bw_3_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_4_bw_3_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1437,11 +1437,11 @@
         .block_width(i)
         .block_height(4)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_4_bw_2) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_4_bw_2) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(2)
@@ -1449,10 +1449,10 @@
       .block_width(2)
       .block_height(4)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_3_4_bw_2){
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_2){
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1461,11 +1461,11 @@
         .block_width(2)
         .block_height(i)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_3_4_bw_4){
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_4){
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1474,11 +1474,11 @@
         .block_width(4)
         .block_height(i)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_3_4_bw_3_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_3_4_bw_3_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       for(size_t j = 3; j < 4; ++j){
@@ -1488,12 +1488,12 @@
           .block_width(j)
           .block_height(i)
           .iterations(1)
-          .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+          .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
       }
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_2_bw_2_is_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_is_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(4)
@@ -1501,10 +1501,10 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_2_bw_2_os_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_os_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(2)
@@ -1512,10 +1512,10 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_MULTI_DEC_SSE2, bh_2_bw_2_is_4_os_4) {
+  TEST(X64_TRANSPOSE__2X2_MULTI_MOV_SSE2, bh_2_bw_2_is_4_os_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(4)
@@ -1523,7 +1523,7 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_multi_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_multi_mov_sse2);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64
 
@@ -1831,7 +1831,7 @@
 
 
 #if XNN_ARCH_X86 || XNN_ARCH_X86_64
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_2_bw_2) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(2)
@@ -1839,10 +1839,10 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_1_4_bw_1_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_1_4_bw_1_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 1; i <= 4; ++i){
       for(size_t j = 1; j <= 4; ++j){
@@ -1852,12 +1852,12 @@
           .block_width(j)
           .block_height(i)
           .iterations(1)
-          .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+          .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
       }
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_2_bw_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(4)
@@ -1865,10 +1865,10 @@
       .block_width(4)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_2_bw_3_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_3_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1877,11 +1877,11 @@
         .block_width(i)
         .block_height(2)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_4_bw_3_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_4_bw_3_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1890,11 +1890,11 @@
         .block_width(i)
         .block_height(4)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_4_bw_2) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_4_bw_2) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(2)
@@ -1902,10 +1902,10 @@
       .block_width(2)
       .block_height(4)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_3_4_bw_2){
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_2){
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1914,11 +1914,11 @@
         .block_width(2)
         .block_height(i)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_3_4_bw_4){
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_4){
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       TransposeMicrokernelTester()
@@ -1927,11 +1927,11 @@
         .block_width(4)
         .block_height(i)
         .iterations(1)
-        .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+        .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_3_4_bw_3_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_3_4_bw_3_4) {
     TEST_REQUIRES_X86_SSE2;
     for(size_t i = 3; i < 4; ++i){
       for(size_t j = 3; j < 4; ++j){
@@ -1941,12 +1941,12 @@
           .block_width(j)
           .block_height(i)
           .iterations(1)
-          .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+          .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
       }
     }
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_2_bw_2_is_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_is_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(4)
@@ -1954,10 +1954,10 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_2_bw_2_os_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_os_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(2)
@@ -1965,10 +1965,10 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
   }
 
-  TEST(X64_TRANSPOSE__2X2_REUSE_DEC_SSE2, bh_2_bw_2_is_4_os_4) {
+  TEST(X64_TRANSPOSE__2X2_REUSE_MOV_SSE2, bh_2_bw_2_is_4_os_4) {
     TEST_REQUIRES_X86_SSE2;
     TransposeMicrokernelTester()
       .input_stride(4)
@@ -1976,7 +1976,7 @@
       .block_width(2)
       .block_height(2)
       .iterations(1)
-      .Test(xnn_x64_transpose_ukernel__2x2_reuse_dec_sse2);
+      .Test(xnn_x64_transpose_ukernel__2x2_reuse_mov_sse2);
   }
 #endif  // XNN_ARCH_X86 || XNN_ARCH_X86_64