Move flushsrs into a better position (after most of the prefetching
stores are done) to maximize overlap of memory misses.
(Logical change 1.191)
diff --git a/src/ia64/getcontext-ia64.S b/src/ia64/getcontext-ia64.S
index d325f5a..b5901c0 100644
--- a/src/ia64/getcontext-ia64.S
+++ b/src/ia64/getcontext-ia64.S
@@ -41,33 +41,34 @@
_Uia64_getcontext:
.prologue
alloc rPFS = ar.pfs, 1, 0, 0, 0
- flushrs // save dirty partition on rbs
+ mov.m rFPSR = ar.fpsr
add r2 = SC_MASK, r32
;;
st8 [r2] = r0 // clear sc->sc_mask
- mov.m rFPSR = ar.fpsr
+ mov.m rRSC = ar.rsc
add r2 = GR(1), r32
;;
- mov.m rRSC = ar.rsc
mov.m rBSP = ar.bsp
- mov rPR = pr
-
.save ar.unat, rUNAT
mov.m rUNAT = ar.unat
.body
- st8.spill [r2] = r1, (GR(12) - GR(1))
- add r3 = SC_NAT, r32
+ add r3 = GR(12), r32
;;
- st8.spill [r2] = sp, (SC_PR - GR(12))
- lfetch.fault.nt1 [r3] // prefetch nat...ar.lc
- adds r3 = FR(2), r32
+.mem.offset 0,0; st8.spill [r2] = r1, (SC_NAT - GR(1))
+.mem.offset 8,0; st8.spill [r3] = sp, (SC_PR - GR(12))
+ mov rPR = pr
;;
- st8 [r2] = rPR
- stf.spill [r3] = f2, (FR(16) - FR(2))
- add r2 = FR(24), r32
+ lfetch.fault.nt1 [r2] // prefetch nat...ar.lc
+ st8 [r3] = rPR
+ adds r2 = FR(2), r32
;;
- stf.spill [r2] = f24, (FR(31) - FR(24))
- stf.spill [r3] = f16
+ stf.spill [r2] = f2, (FR(16) - FR(2))
+ ;;
+ stf.spill [r2] = f16, (FR(31) - FR(16))
+ add r3 = FR(24), r32
+ ;;
+ flushrs // save dirty partition on rbs
+ stf.spill [r3] = f24
add r3 = GR(4), r32
;;
stf.spill [r2] = f31