syscall_filter: Add a small operand optimization

Since all <, <=, >, >= operands are unsigned, when the immediate fits in
32-bits (which should be the vast majority of the time), we can omit one
of the comparison that would normally occur. So, for

 arg1 >= K

That would be roughly translated to

 if (hi(arg1) > hi(K)) jump NEXT;
 if (hi(arg1) == hi(K) && lo(arg1) >= lo(K)) jump NEXT;
 jump KILL;

If the first check (|hi(arg1) > hi(K)|) fails, we then evaluate the
whole second expression. If |hi(K) == 0|, then the only value of
|hi(arg1)| for which it would fail would be if |hi(arg1) == 0|, so we
don't need to evaluate |hi(arg1) == hi(K)| at all, since we know that
it's always going to be true. In other words,

 // given that |hi(K) == 0|,
 if (hi(arg1) > 0) jump NEXT;
 // if the code gets here, |hi(arg1) == 0|.
 if (lo(arg1) >= lo(K)) jump NEXT;
 jump KILL;

The case for > is identical, and </<= get translated into >/>= since
cBPF only supports the latter two operators, which concludes the
proof of correctness for this optimization.

This saves one opcode.

Bug: 111726641
Test: make tests
Test: echo 'read: arg1 <= 0xbadc0ffee0ddf00d' | \
      ./parse_seccomp_policy --dump - | \
      ./libseccomp/tools/scmp_bpf_disasm
Test: echo 'read: arg1 <= 0xff' | ./parse_seccomp_policy --dump - | \
      ./libseccomp/tools/scmp_bpf_disasm

Change-Id: Ia00362ce92ff5e858c7366dab013e2db88c09818
diff --git a/bpf.c b/bpf.c
index f4b980b..3697f1c 100644
--- a/bpf.c
+++ b/bpf.c
@@ -121,8 +121,15 @@
 	struct sock_filter *curr_block = filter;
 
 	/* bpf_load_arg leaves |hi| in A. */
-	curr_block += bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
-	curr_block += bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	if (hi == 0) {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(2) + jt, NEXT);
+	} else {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
+		curr_block +=
+		    bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	}
 	set_bpf_stmt(curr_block++, BPF_LD + BPF_MEM, 0); /* swap in |lo| */
 	curr_block += bpf_comp_jgt32(curr_block, lo, jt, jf);
 
@@ -138,8 +145,15 @@
 	struct sock_filter *curr_block = filter;
 
 	/* bpf_load_arg leaves |hi| in A. */
-	curr_block += bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
-	curr_block += bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	if (hi == 0) {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(2) + jt, NEXT);
+	} else {
+		curr_block +=
+		    bpf_comp_jgt32(curr_block, hi, SKIPN(3) + jt, NEXT);
+		curr_block +=
+		    bpf_comp_jeq32(curr_block, hi, NEXT, SKIPN(2) + jf);
+	}
 	set_bpf_stmt(curr_block++, BPF_LD + BPF_MEM, 0); /* swap in |lo| */
 	curr_block += bpf_comp_jge32(curr_block, lo, jt, jf);
 
@@ -190,7 +204,7 @@
 	return bpf_comp_jset(filter, negative_mask, jf, jt);
 }
 
-static size_t bpf_arg_comp_len(int op)
+static size_t bpf_arg_comp_len(int op, unsigned long c attribute_unused)
 {
 	/* The comparisons that use gt/ge internally may have extra opcodes. */
 	switch (op) {
@@ -198,6 +212,13 @@
 	case LE:
 	case GT:
 	case GE:
+#if defined(BITS64)
+		/*
+		 * |c| can only have a high 32-bit part when running on 64 bits.
+		 */
+		if ((c >> 32) == 0)
+			return BPF_ARG_SHORT_GT_GE_COMP_LEN + 1;
+#endif
 		return BPF_ARG_GT_GE_COMP_LEN + 1;
 	default:
 		return BPF_ARG_COMP_LEN + 1;
@@ -207,7 +228,7 @@
 size_t bpf_arg_comp(struct sock_filter **pfilter, int op, int argidx,
 		    unsigned long c, unsigned int label_id)
 {
-	size_t filter_len = bpf_arg_comp_len(op);
+	size_t filter_len = bpf_arg_comp_len(op, c);
 	struct sock_filter *filter =
 	    calloc(filter_len, sizeof(struct sock_filter));
 	struct sock_filter *curr_block = filter;