blob: 2b4c0ea5bbd75573d7cfff0f94ea08a5210a934f [file] [log] [blame]
buzbee1452bee2015-03-06 14:43:04 -08001 /*
2 * Compare two 64-bit values. Puts 0, 1, or -1 into the destination
3 * register based on the results of the comparison.
4 *
5 * We load the full values with LDM, but in practice many values could
6 * be resolved by only looking at the high word. This could be made
7 * faster or slower by splitting the LDM into a pair of LDRs.
8 *
9 * If we just wanted to set condition flags, we could do this:
10 * subs ip, r0, r2
11 * sbcs ip, r1, r3
12 * subeqs ip, r0, r2
13 * Leaving { <0, 0, >0 } in ip. However, we have to set it to a specific
14 * integer value, which we can do with 2 conditional mov/mvn instructions
15 * (set 1, set -1; if they're equal we already have 0 in ip), giving
16 * us a constant 5-cycle path plus a branch at the end to the
17 * instruction epilogue code. The multi-compare approach below needs
18 * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
19 * in the worst case (the 64-bit values are equal).
20 */
21 /* cmp-long vAA, vBB, vCC */
22 FETCH r0, 1 @ r0<- CCBB
23 mov r9, rINST, lsr #8 @ r9<- AA
24 and r2, r0, #255 @ r2<- BB
25 mov r3, r0, lsr #8 @ r3<- CC
26 add r2, rFP, r2, lsl #2 @ r2<- &fp[BB]
27 add r3, rFP, r3, lsl #2 @ r3<- &fp[CC]
28 ldmia r2, {r0-r1} @ r0/r1<- vBB/vBB+1
29 ldmia r3, {r2-r3} @ r2/r3<- vCC/vCC+1
30 cmp r1, r3 @ compare (vBB+1, vCC+1)
31 blt .L${opcode}_less @ signed compare on high part
32 bgt .L${opcode}_greater
33 subs r1, r0, r2 @ r1<- r0 - r2
34 bhi .L${opcode}_greater @ unsigned compare on low part
35 bne .L${opcode}_less
36 b .L${opcode}_finish @ equal; r1 already holds 0
37%break
38
39.L${opcode}_less:
40 mvn r1, #0 @ r1<- -1
41 @ Want to cond code the next mov so we can avoid branch, but don't see it;
42 @ instead, we just replicate the tail end.
43 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
44 SET_VREG r1, r9 @ vAA<- r1
45 GET_INST_OPCODE ip @ extract opcode from rINST
46 GOTO_OPCODE ip @ jump to next instruction
47
48.L${opcode}_greater:
49 mov r1, #1 @ r1<- 1
50 @ fall through to _finish
51
52.L${opcode}_finish:
53 FETCH_ADVANCE_INST 2 @ advance rPC, load rINST
54 SET_VREG r1, r9 @ vAA<- r1
55 GET_INST_OPCODE ip @ extract opcode from rINST
56 GOTO_OPCODE ip @ jump to next instruction