Include XOP headers in clang-cl compatible way. Fix #1382.

PiperOrigin-RevId: 366463649
diff --git a/src/qs8-gemm/MRx4c2-sse.c.in b/src/qs8-gemm/MRx4c2-sse.c.in
index 53eb918..5011fae 100644
--- a/src/qs8-gemm/MRx4c2-sse.c.in
+++ b/src/qs8-gemm/MRx4c2-sse.c.in
@@ -9,7 +9,7 @@
 #include <assert.h>
 
 $if SSE == 5:
-  #ifdef __GNUC__
+  #if defined(__GNUC__) || defined(__clang__)
     #include <x86intrin.h>
   #else
     #include <immintrin.h>
diff --git a/src/qs8-gemm/MRx4c8-sse.c.in b/src/qs8-gemm/MRx4c8-sse.c.in
index 9e98cfd..613c2e7 100644
--- a/src/qs8-gemm/MRx4c8-sse.c.in
+++ b/src/qs8-gemm/MRx4c8-sse.c.in
@@ -9,7 +9,7 @@
 #include <assert.h>
 
 $if SSE == 5:
-  #ifdef __GNUC__
+  #if defined(__GNUC__) || defined(__clang__)
     #include <x86intrin.h>
   #else
     #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-xop-ld128.c b/src/qs8-gemm/gen/1x4c2-minmax-xop-ld128.c
index 7473e30..cc52508 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/1x4c2-minmax-xop-ld64.c b/src/qs8-gemm/gen/1x4c2-minmax-xop-ld64.c
index ece3f81..1741eae 100644
--- a/src/qs8-gemm/gen/1x4c2-minmax-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c2-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/1x4c2-xw-minmax-xop.c b/src/qs8-gemm/gen/1x4c2-xw-minmax-xop.c
index b4a2490..1adb00a 100644
--- a/src/qs8-gemm/gen/1x4c2-xw-minmax-xop.c
+++ b/src/qs8-gemm/gen/1x4c2-xw-minmax-xop.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-xop-ld128.c b/src/qs8-gemm/gen/1x4c8-minmax-xop-ld128.c
index 5411da3..3c3f2bd 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-xop-ld128.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/1x4c8-minmax-xop-ld64.c b/src/qs8-gemm/gen/1x4c8-minmax-xop-ld64.c
index 6bab96c..cd0970a 100644
--- a/src/qs8-gemm/gen/1x4c8-minmax-xop-ld64.c
+++ b/src/qs8-gemm/gen/1x4c8-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/1x4c8-xw-minmax-xop.c b/src/qs8-gemm/gen/1x4c8-xw-minmax-xop.c
index d4f4020..efdf22a 100644
--- a/src/qs8-gemm/gen/1x4c8-xw-minmax-xop.c
+++ b/src/qs8-gemm/gen/1x4c8-xw-minmax-xop.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-xop-ld128.c b/src/qs8-gemm/gen/2x4c8-minmax-xop-ld128.c
index 610ecd3..01a7394 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-xop-ld128.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/2x4c8-minmax-xop-ld64.c b/src/qs8-gemm/gen/2x4c8-minmax-xop-ld64.c
index 5ef963f..c4e6fd8 100644
--- a/src/qs8-gemm/gen/2x4c8-minmax-xop-ld64.c
+++ b/src/qs8-gemm/gen/2x4c8-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/2x4c8-xw-minmax-xop.c b/src/qs8-gemm/gen/2x4c8-xw-minmax-xop.c
index 6e41cc5..361b629 100644
--- a/src/qs8-gemm/gen/2x4c8-xw-minmax-xop.c
+++ b/src/qs8-gemm/gen/2x4c8-xw-minmax-xop.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-xop-ld128.c b/src/qs8-gemm/gen/3x4c8-minmax-xop-ld128.c
index df0e5d5..bbfa397 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-xop-ld128.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/3x4c8-minmax-xop-ld64.c b/src/qs8-gemm/gen/3x4c8-minmax-xop-ld64.c
index 090bd73..5a6411a 100644
--- a/src/qs8-gemm/gen/3x4c8-minmax-xop-ld64.c
+++ b/src/qs8-gemm/gen/3x4c8-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/3x4c8-xw-minmax-xop.c b/src/qs8-gemm/gen/3x4c8-xw-minmax-xop.c
index 79cdbf1..17f34af 100644
--- a/src/qs8-gemm/gen/3x4c8-xw-minmax-xop.c
+++ b/src/qs8-gemm/gen/3x4c8-xw-minmax-xop.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-xop-ld128.c b/src/qs8-gemm/gen/4x4c2-minmax-xop-ld128.c
index 860d51b..433576d 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-xop-ld128.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/4x4c2-minmax-xop-ld64.c b/src/qs8-gemm/gen/4x4c2-minmax-xop-ld64.c
index 5f89488..0580e6c 100644
--- a/src/qs8-gemm/gen/4x4c2-minmax-xop-ld64.c
+++ b/src/qs8-gemm/gen/4x4c2-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-gemm/gen/4x4c2-xw-minmax-xop.c b/src/qs8-gemm/gen/4x4c2-xw-minmax-xop.c
index 471b856..ac409c8 100644
--- a/src/qs8-gemm/gen/4x4c2-xw-minmax-xop.c
+++ b/src/qs8-gemm/gen/4x4c2-xw-minmax-xop.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/MRx4c2-sse.c.in b/src/qs8-igemm/MRx4c2-sse.c.in
index d111fa1..ba38430 100644
--- a/src/qs8-igemm/MRx4c2-sse.c.in
+++ b/src/qs8-igemm/MRx4c2-sse.c.in
@@ -8,7 +8,7 @@
 #include <assert.h>
 
 $if SSE == 5:
-  #ifdef __GNUC__
+  #if defined(__GNUC__) || defined(__clang__)
     #include <x86intrin.h>
   #else
     #include <immintrin.h>
diff --git a/src/qs8-igemm/MRx4c8-sse.c.in b/src/qs8-igemm/MRx4c8-sse.c.in
index 030d144..3ec96d5 100644
--- a/src/qs8-igemm/MRx4c8-sse.c.in
+++ b/src/qs8-igemm/MRx4c8-sse.c.in
@@ -8,7 +8,7 @@
 #include <assert.h>
 
 $if SSE == 5:
-  #ifdef __GNUC__
+  #if defined(__GNUC__) || defined(__clang__)
     #include <x86intrin.h>
   #else
     #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-xop-ld128.c b/src/qs8-igemm/gen/1x4c2-minmax-xop-ld128.c
index fe4d345..dd36ce8 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-xop-ld128.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/1x4c2-minmax-xop-ld64.c b/src/qs8-igemm/gen/1x4c2-minmax-xop-ld64.c
index 7417f8e..76e6063 100644
--- a/src/qs8-igemm/gen/1x4c2-minmax-xop-ld64.c
+++ b/src/qs8-igemm/gen/1x4c2-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-xop-ld128.c b/src/qs8-igemm/gen/1x4c8-minmax-xop-ld128.c
index aa7eca9..33cddcb 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-xop-ld128.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/1x4c8-minmax-xop-ld64.c b/src/qs8-igemm/gen/1x4c8-minmax-xop-ld64.c
index 1204750..4269c26 100644
--- a/src/qs8-igemm/gen/1x4c8-minmax-xop-ld64.c
+++ b/src/qs8-igemm/gen/1x4c8-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-xop-ld128.c b/src/qs8-igemm/gen/2x4c8-minmax-xop-ld128.c
index e58f425..415b366 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-xop-ld128.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/2x4c8-minmax-xop-ld64.c b/src/qs8-igemm/gen/2x4c8-minmax-xop-ld64.c
index 02bbdf4..aca6372 100644
--- a/src/qs8-igemm/gen/2x4c8-minmax-xop-ld64.c
+++ b/src/qs8-igemm/gen/2x4c8-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-xop-ld128.c b/src/qs8-igemm/gen/3x4c8-minmax-xop-ld128.c
index 4adc8c1..e38a8b9 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-xop-ld128.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/3x4c8-minmax-xop-ld64.c b/src/qs8-igemm/gen/3x4c8-minmax-xop-ld64.c
index 6bb8aaa..aa63645 100644
--- a/src/qs8-igemm/gen/3x4c8-minmax-xop-ld64.c
+++ b/src/qs8-igemm/gen/3x4c8-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-xop-ld128.c b/src/qs8-igemm/gen/4x4c2-minmax-xop-ld128.c
index e6faba2..5b614fb 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-xop-ld128.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-xop-ld128.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-igemm/gen/4x4c2-minmax-xop-ld64.c b/src/qs8-igemm/gen/4x4c2-minmax-xop-ld64.c
index 0253dfa..47102db 100644
--- a/src/qs8-igemm/gen/4x4c2-minmax-xop-ld64.c
+++ b/src/qs8-igemm/gen/4x4c2-minmax-xop-ld64.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x16.c b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x16.c
index a7f1d3d..646eba9 100644
--- a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x16.c
+++ b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x16.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x24.c b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x24.c
index 1396345..75800b7 100644
--- a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x24.c
+++ b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x24.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x32.c b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x32.c
index 764b529..b7052e0 100644
--- a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x32.c
+++ b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x32.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x8.c b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x8.c
index 3b2298f..dcae71b 100644
--- a/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x8.c
+++ b/src/qs8-vadd/gen/minmax-xop-mul32-ld32-x8.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vadd/sse-mul32-ld32.c.in b/src/qs8-vadd/sse-mul32-ld32.c.in
index 7c4caba..a0a8da3 100644
--- a/src/qs8-vadd/sse-mul32-ld32.c.in
+++ b/src/qs8-vadd/sse-mul32-ld32.c.in
@@ -9,7 +9,7 @@
 #include <assert.h>
 
 $if SSE == 5:
-  #ifdef __GNUC__
+  #if defined(__GNUC__) || defined(__clang__)
     #include <x86intrin.h>
   #else
     #include <immintrin.h>
diff --git a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x16.c b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x16.c
index 6f419ca..8c39f6a 100644
--- a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x16.c
+++ b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x16.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x24.c b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x24.c
index c488e52..ff006d1 100644
--- a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x24.c
+++ b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x24.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x32.c b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x32.c
index 400a38b..72d8894 100644
--- a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x32.c
+++ b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x32.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x8.c b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x8.c
index 358a8d2..abbf33d 100644
--- a/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x8.c
+++ b/src/qs8-vaddc/gen/minmax-xop-mul32-ld32-x8.c
@@ -9,7 +9,7 @@
 
 #include <assert.h>
 
-#ifdef __GNUC__
+#if defined(__GNUC__) || defined(__clang__)
   #include <x86intrin.h>
 #else
   #include <immintrin.h>
diff --git a/src/qs8-vaddc/sse-mul32-ld32.c.in b/src/qs8-vaddc/sse-mul32-ld32.c.in
index 8d68545..2ba8f9a 100644
--- a/src/qs8-vaddc/sse-mul32-ld32.c.in
+++ b/src/qs8-vaddc/sse-mul32-ld32.c.in
@@ -9,7 +9,7 @@
 #include <assert.h>
 
 $if SSE == 5:
-  #ifdef __GNUC__
+  #if defined(__GNUC__) || defined(__clang__)
     #include <x86intrin.h>
   #else
     #include <immintrin.h>