Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* memset.S: optimised assembly memset |
| 2 | * |
| 3 | * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. |
| 4 | * Written by David Howells (dhowells@redhat.com) |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | */ |
| 11 | |
| 12 | |
| 13 | .text |
| 14 | .p2align 4 |
| 15 | |
| 16 | ############################################################################### |
| 17 | # |
| 18 | # void *memset(void *p, char ch, size_t count) |
| 19 | # |
| 20 | # - NOTE: must not use any stack. exception detection performs function return |
| 21 | # to caller's fixup routine, aborting the remainder of the set |
| 22 | # GR4, GR7, GR8, and GR11 must be managed |
| 23 | # |
| 24 | ############################################################################### |
| 25 | .globl memset,__memset_end |
| 26 | .type memset,@function |
| 27 | memset: |
| 28 | orcc.p gr10,gr0,gr5,icc3 ; GR5 = count |
| 29 | andi gr9,#0xff,gr9 |
| 30 | or.p gr8,gr0,gr4 ; GR4 = address |
| 31 | beqlr icc3,#0 |
| 32 | |
| 33 | # conditionally write a byte to 2b-align the address |
| 34 | setlos.p #1,gr6 |
| 35 | andicc gr4,#1,gr0,icc0 |
| 36 | ckne icc0,cc7 |
| 37 | cstb.p gr9,@(gr4,gr0) ,cc7,#1 |
| 38 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| 39 | cadd.p gr4,gr6,gr4 ,cc7,#1 |
| 40 | beqlr icc3,#0 |
| 41 | |
| 42 | # conditionally write a word to 4b-align the address |
| 43 | andicc.p gr4,#2,gr0,icc0 |
| 44 | subicc gr5,#2,gr0,icc1 |
| 45 | setlos.p #2,gr6 |
| 46 | ckne icc0,cc7 |
| 47 | slli.p gr9,#8,gr12 ; need to double up the pattern |
| 48 | cknc icc1,cc5 |
| 49 | or.p gr9,gr12,gr12 |
| 50 | andcr cc7,cc5,cc7 |
| 51 | |
| 52 | csth.p gr12,@(gr4,gr0) ,cc7,#1 |
| 53 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| 54 | cadd.p gr4,gr6,gr4 ,cc7,#1 |
| 55 | beqlr icc3,#0 |
| 56 | |
| 57 | # conditionally write a dword to 8b-align the address |
| 58 | andicc.p gr4,#4,gr0,icc0 |
| 59 | subicc gr5,#4,gr0,icc1 |
| 60 | setlos.p #4,gr6 |
| 61 | ckne icc0,cc7 |
| 62 | slli.p gr12,#16,gr13 ; need to quadruple-up the pattern |
| 63 | cknc icc1,cc5 |
| 64 | or.p gr13,gr12,gr12 |
| 65 | andcr cc7,cc5,cc7 |
| 66 | |
| 67 | cst.p gr12,@(gr4,gr0) ,cc7,#1 |
| 68 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| 69 | cadd.p gr4,gr6,gr4 ,cc7,#1 |
| 70 | beqlr icc3,#0 |
| 71 | |
| 72 | or.p gr12,gr12,gr13 ; need to octuple-up the pattern |
| 73 | |
| 74 | # the address is now 8b-aligned - loop around writing 64b chunks |
| 75 | setlos #8,gr7 |
| 76 | subi.p gr4,#8,gr4 ; store with update index does weird stuff |
| 77 | setlos #64,gr6 |
| 78 | |
| 79 | subicc gr5,#64,gr0,icc0 |
| 80 | 0: cknc icc0,cc7 |
| 81 | cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| 82 | cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| 83 | cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| 84 | cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| 85 | cstdu gr12,@(gr4,gr7) ,cc7,#1 |
| 86 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 87 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| 88 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 89 | subicc gr5,#64,gr0,icc0 |
| 90 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 91 | beqlr icc3,#0 |
| 92 | bnc icc0,#2,0b |
| 93 | |
| 94 | # now do 32-byte remnant |
| 95 | subicc.p gr5,#32,gr0,icc0 |
| 96 | setlos #32,gr6 |
| 97 | cknc icc0,cc7 |
| 98 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 99 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| 100 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 101 | setlos #16,gr6 |
| 102 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 103 | subicc gr5,#16,gr0,icc0 |
| 104 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 105 | beqlr icc3,#0 |
| 106 | |
| 107 | # now do 16-byte remnant |
| 108 | cknc icc0,cc7 |
| 109 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 110 | csubcc gr5,gr6,gr5 ,cc7,#1 ; also set ICC3 |
| 111 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 112 | beqlr icc3,#0 |
| 113 | |
| 114 | # now do 8-byte remnant |
| 115 | subicc gr5,#8,gr0,icc1 |
| 116 | cknc icc1,cc7 |
| 117 | cstdu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 118 | csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| 119 | setlos.p #4,gr7 |
| 120 | beqlr icc3,#0 |
| 121 | |
| 122 | # now do 4-byte remnant |
| 123 | subicc gr5,#4,gr0,icc0 |
| 124 | addi.p gr4,#4,gr4 |
| 125 | cknc icc0,cc7 |
| 126 | cstu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 127 | csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| 128 | subicc.p gr5,#2,gr0,icc1 |
| 129 | beqlr icc3,#0 |
| 130 | |
| 131 | # now do 2-byte remnant |
| 132 | setlos #2,gr7 |
| 133 | addi.p gr4,#2,gr4 |
| 134 | cknc icc1,cc7 |
| 135 | csthu.p gr12,@(gr4,gr7) ,cc7,#1 |
| 136 | csubcc gr5,gr7,gr5 ,cc7,#1 ; also set ICC3 |
| 137 | subicc.p gr5,#1,gr0,icc0 |
| 138 | beqlr icc3,#0 |
| 139 | |
| 140 | # now do 1-byte remnant |
| 141 | setlos #0,gr7 |
| 142 | addi.p gr4,#2,gr4 |
| 143 | cknc icc0,cc7 |
| 144 | cstb.p gr12,@(gr4,gr0) ,cc7,#1 |
| 145 | bralr |
| 146 | __memset_end: |
| 147 | |
| 148 | .size memset, __memset_end-memset |
| 149 | |
| 150 | ############################################################################### |
| 151 | # |
| 152 | # clear memory in userspace |
| 153 | # - return the number of bytes that could not be cleared (0 on complete success) |
| 154 | # |
| 155 | # long __memset_user(void *p, size_t count) |
| 156 | # |
| 157 | ############################################################################### |
| 158 | .globl __memset_user, __memset_user_error_lr, __memset_user_error_handler |
| 159 | .type __memset_user,@function |
| 160 | __memset_user: |
| 161 | movsg lr,gr11 |
| 162 | |
| 163 | # abuse memset to do the dirty work |
| 164 | or.p gr9,gr9,gr10 |
| 165 | setlos #0,gr9 |
| 166 | call memset |
| 167 | __memset_user_error_lr: |
| 168 | jmpl.p @(gr11,gr0) |
| 169 | setlos #0,gr8 |
| 170 | |
| 171 | # deal any exception generated by memset |
| 172 | # GR4 - memset's address tracking pointer |
| 173 | # GR7 - memset's step value (index register for store insns) |
| 174 | # GR8 - memset's original start address |
| 175 | # GR10 - memset's original count |
| 176 | __memset_user_error_handler: |
| 177 | add.p gr4,gr7,gr4 |
| 178 | add gr8,gr10,gr8 |
| 179 | jmpl.p @(gr11,gr0) |
| 180 | sub gr8,gr4,gr8 ; we return the amount left uncleared |
| 181 | |
| 182 | .size __memset_user, .-__memset_user |