[AVR32] Optimize the TLB miss handler
Reorder some instructions and change the register usage to reduce
the number of pipeline stalls. Also use the bfextu and bfins
instructions for bitfield manipulations instead of shifting and
masking.
This makes gzipping a 80MB file approximately 2% faster.
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
diff --git a/arch/avr32/kernel/entry-avr32b.S b/arch/avr32/kernel/entry-avr32b.S
index 484e083..42657f1 100644
--- a/arch/avr32/kernel/entry-avr32b.S
+++ b/arch/avr32/kernel/entry-avr32b.S
@@ -100,55 +100,49 @@
.global tlb_miss_common
tlb_miss_common:
- mfsr r0, SYSREG_PTBR
- mfsr r1, SYSREG_TLBEAR
+ mfsr r0, SYSREG_TLBEAR
+ mfsr r1, SYSREG_PTBR
/* Is it the vmalloc space? */
- bld r1, 31
+ bld r0, 31
brcs handle_vmalloc_miss
/* First level lookup */
pgtbl_lookup:
- lsr r2, r1, PGDIR_SHIFT
- ld.w r0, r0[r2 << 2]
- bld r0, _PAGE_BIT_PRESENT
+ lsr r2, r0, PGDIR_SHIFT
+ ld.w r3, r1[r2 << 2]
+ bfextu r1, r0, PAGE_SHIFT, PGDIR_SHIFT - PAGE_SHIFT
+ bld r3, _PAGE_BIT_PRESENT
brcc page_table_not_present
- /* TODO: Check access rights on page table if necessary */
-
/* Translate to virtual address in P1. */
- andl r0, 0xf000
- sbr r0, 31
+ andl r3, 0xf000
+ sbr r3, 31
/* Second level lookup */
- lsl r1, (32 - PGDIR_SHIFT)
- lsr r1, (32 - PGDIR_SHIFT) + PAGE_SHIFT
- add r2, r0, r1 << 2
- ld.w r1, r2[0]
- bld r1, _PAGE_BIT_PRESENT
+ ld.w r2, r3[r1 << 2]
+ mfsr r0, SYSREG_TLBARLO
+ bld r2, _PAGE_BIT_PRESENT
brcc page_not_present
/* Mark the page as accessed */
- sbr r1, _PAGE_BIT_ACCESSED
- st.w r2[0], r1
+ sbr r2, _PAGE_BIT_ACCESSED
+ st.w r3[r1 << 2], r2
/* Drop software flags */
- andl r1, _PAGE_FLAGS_HARDWARE_MASK & 0xffff
- mtsr SYSREG_TLBELO, r1
+ andl r2, _PAGE_FLAGS_HARDWARE_MASK & 0xffff
+ mtsr SYSREG_TLBELO, r2
/* Figure out which entry we want to replace */
- mfsr r0, SYSREG_TLBARLO
+ mfsr r1, SYSREG_MMUCR
clz r2, r0
brcc 1f
- mov r1, -1 /* All entries have been accessed, */
- mtsr SYSREG_TLBARLO, r1 /* so reset TLBAR */
- mov r2, 0 /* and start at 0 */
-1: mfsr r1, SYSREG_MMUCR
- lsl r2, 14
- andl r1, 0x3fff, COH
- or r1, r2
- mtsr SYSREG_MMUCR, r1
+ mov r3, -1 /* All entries have been accessed, */
+ mov r2, 0 /* so start at 0 */
+ mtsr SYSREG_TLBARLO, r3 /* and reset TLBAR */
+1: bfins r1, r2, SYSREG_DRP_OFFSET, SYSREG_DRP_SIZE
+ mtsr SYSREG_MMUCR, r1
tlbw
tlbmiss_restore
@@ -156,8 +150,8 @@
handle_vmalloc_miss:
/* Simply do the lookup in init's page table */
- mov r0, lo(swapper_pg_dir)
- orh r0, hi(swapper_pg_dir)
+ mov r1, lo(swapper_pg_dir)
+ orh r1, hi(swapper_pg_dir)
rjmp pgtbl_lookup