Christophe Lyon | 073831a | 2011-01-24 17:37:40 +0100 | [diff] [blame] | 1 | ;================================================================== |
| 2 | ; Copyright ARM Ltd 2005. All rights reserved. |
| 3 | ; |
| 4 | ; Cortex-A8 Dhrystone example - Startup Code |
| 5 | ;================================================================== |
| 6 | |
| 7 | PRESERVE8 |
| 8 | AREA CORTEXA8, CODE, READONLY |
| 9 | |
| 10 | ENTRY |
| 11 | |
| 12 | ; Standard definitions of mode bits and interrupt (I & F) flags in PSRs |
| 13 | |
| 14 | Mode_USR EQU 0x10 |
| 15 | Mode_FIQ EQU 0x11 |
| 16 | Mode_IRQ EQU 0x12 |
| 17 | Mode_SVC EQU 0x13 |
| 18 | Mode_ABT EQU 0x17 |
| 19 | Mode_UNDEF EQU 0x1B |
| 20 | Mode_SYS EQU 0x1F |
| 21 | |
| 22 | I_Bit EQU 0x80 ; when I bit is set, IRQ is disabled |
| 23 | F_Bit EQU 0x40 ; when F bit is set, FIQ is disabled |
| 24 | |
| 25 | ;================================================================== |
| 26 | ; Disable Cortex-A8 MMU if enabled |
| 27 | ;================================================================== |
| 28 | |
| 29 | EXPORT Start |
| 30 | |
| 31 | Start |
| 32 | |
| 33 | MRC p15, 0, r0, c1, c0, 0 ; Read CP15 Control Register into r0 |
| 34 | TST r0, #0x1 ; Is the MMU enabled? |
| 35 | BICNE r0, r0, #0x1 ; Clear bit 0 |
| 36 | MCRNE p15, 0, r0, c1, c0, 0 ; Write value back |
| 37 | |
| 38 | ;================================================================== |
| 39 | ; Initialise Supervisor Mode Stack |
| 40 | ; Note stack must be 8 byte aligned. |
| 41 | ;================================================================== |
| 42 | |
| 43 | IMPORT ||Image$$STACK$$ZI$$Limit|| ; Linker symbol from scatter file |
| 44 | LDR SP, =||Image$$STACK$$ZI$$Limit|| |
| 45 | |
| 46 | ;================================================================== |
| 47 | ; TLB maintenance, Invalidate Data and Instruction TLB's |
| 48 | ;================================================================== |
| 49 | |
| 50 | MOV r0,#0 |
| 51 | MCR p15, 0, r0, c8, c7, 0 ; Cortex-A8 I-TLB and D-TLB invalidation |
| 52 | |
| 53 | ;================================================================== |
| 54 | ; Cache Invalidation code for Cortex-A8 |
| 55 | ;================================================================== |
| 56 | |
| 57 | ; Invalidate L1 Instruction Cache |
| 58 | |
| 59 | MRC p15, 1, r0, c0, c0, 1 ; Read CLIDR |
| 60 | TST r0, #0x3 ; Harvard Cache? |
| 61 | MOV r0, #0 |
| 62 | MCRNE p15, 0, r0, c7, c5, 0 ; Invalidate Instruction Cache |
| 63 | |
| 64 | ; Invalidate Data/Unified Caches |
| 65 | |
| 66 | MRC p15, 1, r0, c0, c0, 1 ; Read CLIDR |
| 67 | ANDS r3, r0, #&7000000 |
| 68 | MOV r3, r3, LSR #23 ; Total cache levels << 1 |
| 69 | BEQ Finished |
| 70 | |
| 71 | MOV r10, #0 ; R10 holds current cache level << 1 |
| 72 | Loop1 ADD r2, r10, r10, LSR #1 ; R2 holds cache "Set" position |
| 73 | MOV r1, r0, LSR r2 ; Bottom 3 bits are the Cache-type for this level |
| 74 | AND r1, R1, #7 ; Get those 3 bits alone |
| 75 | CMP r1, #2 |
| 76 | BLT Skip ; No cache or only instruction cache at this level |
| 77 | |
| 78 | MCR p15, 2, r10, c0, c0, 0 ; Write the Cache Size selection register |
| 79 | MOV r1, #0 |
| 80 | MCR p15, 0, r1, c7, c5, 4 ; PrefetchFlush to sync the change to the CacheSizeID reg |
| 81 | MRC p15, 1, r1, c0, c0, 0 ; Reads current Cache Size ID register |
| 82 | AND r2, r1, #&7 ; Extract the line length field |
| 83 | ADD r2, r2, #4 ; Add 4 for the line length offset (log2 16 bytes) |
| 84 | LDR r4, =0x3FF |
| 85 | ANDS r4, r4, r1, LSR #3 ; R4 is the max number on the way size (right aligned) |
| 86 | CLZ r5, r4 ; R5 is the bit position of the way size increment |
| 87 | LDR r7, =0x00007FFF |
| 88 | ANDS r7, r7, r1, LSR #13 ; R7 is the max number of the index size (right aligned) |
| 89 | |
| 90 | Loop2 MOV r9, r4 ; R9 working copy of the max way size (right aligned) |
| 91 | |
| 92 | Loop3 ORR r11, r10, r9, LSL r5 ; Factor in the Way number and cache number into R11 |
| 93 | ORR r11, r11, r7, LSL r2 ; Factor in the Set number |
| 94 | MCR p15, 0, r11, c7, c14, 2 ; Clean and Invalidate by set/way |
| 95 | SUBS r9, r9, #1 ; Decrement the Way number |
| 96 | BGE Loop3 |
| 97 | SUBS r7, r7, #1 ; Decrement the Set number |
| 98 | BGE Loop2 |
| 99 | Skip ADD r10, r10, #2 ; increment the cache number |
| 100 | CMP r3, r10 |
| 101 | BGT Loop1 |
| 102 | |
| 103 | Finished |
| 104 | |
| 105 | |
| 106 | ;=================================================================== |
| 107 | ; Cortex-A8 MMU Configuration |
| 108 | ; Set translation table base |
| 109 | ;=================================================================== |
| 110 | |
| 111 | |
| 112 | IMPORT ||Image$$TTB$$ZI$$Base|| ; from scatter file.; |
| 113 | |
| 114 | ; Cortex-A8 supports two translation tables |
| 115 | ; Configure translation table base (TTB) control register cp15,c2 |
| 116 | ; to a value of all zeros, indicates we are using TTB register 0. |
| 117 | |
| 118 | MOV r0,#0x0 |
| 119 | MCR p15, 0, r0, c2, c0, 2 |
| 120 | |
| 121 | ; write the address of our page table base to TTB register 0.; |
| 122 | ; We are setting to outer-noncachable [4:3] is zero |
| 123 | |
| 124 | LDR r0,=||Image$$TTB$$ZI$$Base|| |
| 125 | MCR p15, 0, r0, c2, c0, 0 |
| 126 | |
| 127 | |
| 128 | ;=================================================================== |
| 129 | ; Cortex-A8 PAGE TABLE generation, using standard Arch v6 tables |
| 130 | ; |
| 131 | ; AP[11:10] - Access Permissions = b11, Read/Write Access |
| 132 | ; Domain[8:5] - Domain = b1111, Domain 15 |
| 133 | ; Type[1:0] - Descriptor Type = b10, 1Mb descriptors |
| 134 | ; |
| 135 | ; TEX C B |
| 136 | ; 000 0 0 Strongly Ordered |
| 137 | ; 001 1 1 Outer and inner write back, write allocate Normal |
| 138 | ;=================================================================== |
| 139 | |
| 140 | LDR r1,=0xfff ; loop counter |
| 141 | LDR r2,=2_00000000000000000000110111100010 |
| 142 | |
| 143 | ; r0 contains the address of the translation table base |
| 144 | ; r1 is loop counter |
| 145 | ; r2 is level1 descriptor (bits 19:0) |
| 146 | |
| 147 | ; use loop counter to create 4096 individual table entries |
| 148 | ; this writes from address 0x7FFC down to 0x4000 in word steps (4bytes). |
| 149 | |
| 150 | init_ttb_1 |
| 151 | |
| 152 | ORR r3, r2, r1, LSL#20 ; r3 now contains full level1 descriptor to write |
| 153 | STR r3, [r0, r1, LSL#2] ; str table entry at TTB base + loopcount*4 |
| 154 | SUBS r1, r1, #1 ; decrement loop counter |
| 155 | BPL init_ttb_1 |
| 156 | |
| 157 | ; In this example we will change the cacheable attribute in the first descriptor. |
| 158 | ; Virtual memory from 0 to 1MB will be cacheable (write back mode). |
| 159 | ; TEX[14:12]=001 and CB[3:2]= 11, Outer and inner write back, write allocate. |
| 160 | |
| 161 | ORR r3,r3,#2_0000000001100 ; Set CB bits |
| 162 | ORR r3,r3,#2_1000000000000 ; Set TEX bits |
| 163 | STR r3,[r0] |
| 164 | |
| 165 | ADD r2, r3, #0x100000 ; alter r3 to have correct base address for second descriptor (flat mapping) |
| 166 | STR r2, [r0, #4] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 167 | |
| 168 | ADD r2, r3, #0x200000 ; alter r3 to have correct base address for 3 descriptor (flat mapping) |
| 169 | STR r2, [r0, #8] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 170 | |
| 171 | ADD r2, r3, #0x300000 ; alter r3 to have correct base address for 4 descriptor (flat mapping) |
| 172 | STR r2, [r0, #0xc] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 173 | |
| 174 | ADD r2, r3, #0x400000 ; alter r3 to have correct base address for 5 descriptor (flat mapping) |
| 175 | STR r2, [r0, #0x10] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 176 | |
| 177 | ADD r2, r3, #0x500000 ; alter r3 to have correct base address for 6 descriptor (flat mapping) |
| 178 | STR r2, [r0, #0x14] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 179 | |
| 180 | ADD r2, r3, #0x600000 ; alter r3 to have correct base address for 7 descriptor (flat mapping) |
| 181 | STR r2, [r0, #0x18] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 182 | |
| 183 | ADD r2, r3, #0x700000 ; alter r3 to have correct base address for 8 descriptor (flat mapping) |
| 184 | STR r2, [r0, #0x1c] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 185 | |
| 186 | ADD r2, r3, #0x800000 ; alter r3 to have correct base address for 9 descriptor (flat mapping) |
| 187 | STR r2, [r0, #0x20] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 188 | |
| 189 | ADD r2, r3, #0x900000 ; alter r3 to have correct base address for 10 descriptor (flat mapping) |
| 190 | STR r2, [r0, #0x24] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 191 | |
| 192 | ADD r2, r3, #0xa00000 ; alter r3 to have correct base address for 11 descriptor (flat mapping) |
| 193 | STR r2, [r0, #0x28] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 194 | |
| 195 | ADD r2, r3, #0xb00000 ; alter r3 to have correct base address for 12 descriptor (flat mapping) |
| 196 | STR r2, [r0, #0x2c] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 197 | |
| 198 | ADD r2, r3, #0xc00000 ; alter r3 to have correct base address for 13 descriptor (flat mapping) |
| 199 | STR r2, [r0, #0x30] ; store the new descriptor at r0 + 4 (overwrite second section descriptor) |
| 200 | |
| 201 | ;=================================================================== |
| 202 | ; Setup domain control register - Enable all domains to client mode |
| 203 | ;=================================================================== |
| 204 | |
| 205 | MRC p15, 0, r0, c3, c0, 0 ; Read Domain Access Control Register |
| 206 | LDR r0, =0x55555555 ; Initialize every domain entry to b01 (client) |
| 207 | MCR p15, 0, r0, c3, c0, 0 ; Write Domain Access Control Register |
| 208 | |
| 209 | ;=================================================================== |
| 210 | ; Setup L2 Cache - L2 Cache Auxiliary Control |
| 211 | ;=================================================================== |
| 212 | |
| 213 | MOV r0, #0 |
| 214 | ;MCR p15, 1, r0, c9, c0, 2 ; Write L2 Auxilary Control Register |
| 215 | |
| 216 | ;================================================================== |
| 217 | ; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11. |
| 218 | ; Enables Full Access i.e. in both priv and non priv modes |
| 219 | ;================================================================== |
| 220 | |
| 221 | MRC p15, 0, r0, c1, c0, 2 ; read CP access register |
| 222 | ORR r0, r0, #(0x3 <<20) ; enable access CP 10 |
| 223 | ORR r0, r0, #(0x3 <<22) ; enable access CP 11 |
| 224 | MCR p15, 0, r0, c1, c0, 2 ; write CP access register back |
| 225 | |
| 226 | ;================================================================== |
| 227 | ; Switch on the VFP and Neon Hardware |
| 228 | ;================================================================= |
| 229 | |
| 230 | MOV r0, #0 ; Set up a register |
| 231 | ORR r0, r0, #(0x1 << 30) |
| 232 | FMXR FPEXC, r0 ; Write FPEXC register, EN bit set. |
| 233 | |
| 234 | ;=================================================================== |
| 235 | ; Enable MMU and Branch to __main |
| 236 | ;=================================================================== |
| 237 | |
| 238 | IMPORT __main ; before MMU enabled import label to __main |
| 239 | LDR r12,=__main ; save this in register for possible long jump |
| 240 | |
| 241 | |
| 242 | MRC p15, 0, r0, c1, c0, 0 ; read CP15 register 1 into r0 |
| 243 | ORR r0, r0, #0x1 ; enable MMU before scatter loading |
| 244 | MCR p15, 0, r0, c1, c0, 0 ; write CP15 register 1 |
| 245 | |
| 246 | |
| 247 | ; Now the MMU is enabled, virtual to physical address translations will occur. |
| 248 | ; This will affect the next instruction fetches. |
| 249 | ; |
| 250 | ; The two instructions currently in the ARM pipeline will have been fetched |
| 251 | ; before the MMU was enabled. This property is useful because the next two |
| 252 | ; instructions are safe even if new instruction fetches fail. If this routine |
| 253 | ; was mapped out of the new virtual memory map, the branch to __main would |
| 254 | ; still succeed. |
| 255 | |
| 256 | BX r12 ; branch to __main C library entry point |
| 257 | |
| 258 | END ; mark the end of this file |
| 259 | |