__set_neon_cumulative_sat() modifies the contents on the QC flag, and
some intrinsics do so too: this patch adds the explicit dependency on
the asm statement, to avoid code reordering or removal.
When writing QC, the asm statement now has a fake input dependency,
which is the output of the intrinsic being tested. Modifying the
__set_neon_cumulative_sat macro is necessary, to be able to accept all
the possible input types.
Update the generic code in ref_v_binary_sat_op.c and ref_v_unary_sat_op.c
accordingly, as well as all the tests involving QC.
diff --git a/ref_vqshlu_n.c b/ref_vqshlu_n.c
index b72261c..27d53de 100644
--- a/ref_vqshlu_n.c
+++ b/ref_vqshlu_n.c
@@ -40,15 +40,15 @@
FNNAME (INSN)
{
/* Basic test: v2=vqshlu_n(v1,v), then store the result. */
-#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) \
- Set_Neon_Cumulative_Sat(0); \
- VECT_VAR(vector_res, T3, W, N) = \
- INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \
- V); \
- vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \
- VECT_VAR(vector_res, T3, W, N)); \
- dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \
- xSTR(T1), W, N)
+#define TEST_VQSHLU_N2(INSN, Q, T1, T2, T3, T4, W, N, V) \
+ Set_Neon_Cumulative_Sat(0, VECT_VAR(vector_res, T3, W, N)); \
+ VECT_VAR(vector_res, T3, W, N) = \
+ INSN##Q##_n_##T2##W(VECT_VAR(vector, T1, W, N), \
+ V); \
+ vst1##Q##_##T4##W(VECT_VAR(result, T3, W, N), \
+ VECT_VAR(vector_res, T3, W, N)); \
+ dump_neon_cumulative_sat(TEST_MSG, xSTR(INSN##Q##_n_##T2##W), \
+ xSTR(T1), W, N)
/* Two auxliary macros are necessary to expand INSN */
#define TEST_VQSHLU_N1(INSN, Q, T1, T2, T3, T4, W, N, V) \