Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | Copyright 2003 Richard Curnow, SuperH (UK) Ltd. |
| 3 | |
| 4 | This file is subject to the terms and conditions of the GNU General Public |
| 5 | License. See the file "COPYING" in the main directory of this archive |
| 6 | for more details. |
| 7 | |
| 8 | Tight version of memset for the case of just clearing a page. It turns out |
| 9 | that having the alloco's spaced out slightly due to the increment/branch |
| 10 | pair causes them to contend less for access to the cache. Similarly, |
| 11 | keeping the stores apart from the allocos causes less contention. => Do two |
| 12 | separate loops. Do multiple stores per loop to amortise the |
| 13 | increment/branch cost a little. |
| 14 | |
| 15 | Parameters: |
| 16 | r2 : source effective address (start of page) |
| 17 | |
| 18 | Always clears 4096 bytes. |
| 19 | |
| 20 | Note : alloco guarded by synco to avoid TAKum03020 erratum |
| 21 | |
| 22 | */ |
| 23 | |
| 24 | .section .text..SHmedia32,"ax" |
| 25 | .little |
| 26 | |
| 27 | .balign 8 |
| 28 | .global sh64_page_clear |
| 29 | sh64_page_clear: |
| 30 | pta/l 1f, tr1 |
| 31 | pta/l 2f, tr2 |
| 32 | ptabs/l r18, tr0 |
| 33 | |
| 34 | movi 4096, r7 |
| 35 | add r2, r7, r7 |
| 36 | add r2, r63, r6 |
| 37 | 1: |
| 38 | alloco r6, 0 |
| 39 | synco ! TAKum03020 |
| 40 | addi r6, 32, r6 |
| 41 | bgt/l r7, r6, tr1 |
| 42 | |
| 43 | add r2, r63, r6 |
| 44 | 2: |
| 45 | st.q r6, 0, r63 |
| 46 | st.q r6, 8, r63 |
| 47 | st.q r6, 16, r63 |
| 48 | st.q r6, 24, r63 |
| 49 | addi r6, 32, r6 |
| 50 | bgt/l r7, r6, tr2 |
| 51 | |
| 52 | blink tr0, r63 |
| 53 | |
| 54 | |