andrew@webrtc.org | fccf64c | 2013-09-18 17:40:46 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Written by Wilco Dijkstra, 1996. The following email exchange establishes the |
| 3 | * license. |
| 4 | * |
| 5 | * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com> |
| 6 | * Date: Fri, Jun 24, 2011 at 3:20 AM |
| 7 | * Subject: Re: sqrt routine |
| 8 | * To: Kevin Ma <kma@google.com> |
| 9 | * Hi Kevin, |
| 10 | * Thanks for asking. Those routines are public domain (originally posted to |
| 11 | * comp.sys.arm a long time ago), so you can use them freely for any purpose. |
| 12 | * Cheers, |
| 13 | * Wilco |
| 14 | * |
| 15 | * ----- Original Message ----- |
| 16 | * From: "Kevin Ma" <kma@google.com> |
| 17 | * To: <Wilco.Dijkstra@ntlworld.com> |
| 18 | * Sent: Thursday, June 23, 2011 11:44 PM |
| 19 | * Subject: Fwd: sqrt routine |
| 20 | * Hi Wilco, |
| 21 | * I saw your sqrt routine from several web sites, including |
| 22 | * http://www.finesse.demon.co.uk/steven/sqrt.html. |
| 23 | * Just wonder if there's any copyright information with your Successive |
| 24 | * approximation routines, or if I can freely use it for any purpose. |
| 25 | * Thanks. |
| 26 | * Kevin |
| 27 | */ |
| 28 | |
| 29 | // Minor modifications in code style for WebRTC, 2012. |
| 30 | // Code optimizations for MIPS, 2013. |
| 31 | |
| 32 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
| 33 | |
| 34 | /* |
| 35 | * Algorithm: |
| 36 | * Successive approximation of the equation (root + delta) ^ 2 = N |
| 37 | * until delta < 1. If delta < 1 we have the integer part of SQRT (N). |
| 38 | * Use delta = 2^i for i = 15 .. 0. |
| 39 | * |
| 40 | * Output precision is 16 bits. Note for large input values (close to |
| 41 | * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) |
| 42 | * contains the MSB information (a non-sign value). Do with caution |
| 43 | * if you need to cast the output to int16_t type. |
| 44 | * |
| 45 | * If the input value is negative, it returns 0. |
| 46 | */ |
| 47 | |
| 48 | |
| 49 | int32_t WebRtcSpl_SqrtFloor(int32_t value) |
| 50 | { |
| 51 | int32_t root = 0, tmp1, tmp2, tmp3, tmp4; |
| 52 | |
| 53 | __asm __volatile( |
| 54 | ".set push \n\t" |
| 55 | ".set noreorder \n\t" |
| 56 | |
| 57 | "lui %[tmp1], 0x4000 \n\t" |
| 58 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 59 | "sub %[tmp3], %[value], %[tmp1] \n\t" |
| 60 | "lui %[tmp1], 0x1 \n\t" |
| 61 | "or %[tmp4], %[root], %[tmp1] \n\t" |
| 62 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 63 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 64 | |
| 65 | "addiu %[tmp1], $0, 0x4000 \n\t" |
| 66 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 67 | "sll %[tmp1], 14 \n\t" |
| 68 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 69 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 70 | "ori %[tmp4], %[root], 0x8000 \n\t" |
| 71 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 72 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 73 | |
| 74 | "addiu %[tmp1], $0, 0x2000 \n\t" |
| 75 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 76 | "sll %[tmp1], 13 \n\t" |
| 77 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 78 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 79 | "ori %[tmp4], %[root], 0x4000 \n\t" |
| 80 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 81 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 82 | |
| 83 | "addiu %[tmp1], $0, 0x1000 \n\t" |
| 84 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 85 | "sll %[tmp1], 12 \n\t" |
| 86 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 87 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 88 | "ori %[tmp4], %[root], 0x2000 \n\t" |
| 89 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 90 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 91 | |
| 92 | "addiu %[tmp1], $0, 0x800 \n\t" |
| 93 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 94 | "sll %[tmp1], 11 \n\t" |
| 95 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 96 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 97 | "ori %[tmp4], %[root], 0x1000 \n\t" |
| 98 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 99 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 100 | |
| 101 | "addiu %[tmp1], $0, 0x400 \n\t" |
| 102 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 103 | "sll %[tmp1], 10 \n\t" |
| 104 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 105 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 106 | "ori %[tmp4], %[root], 0x800 \n\t" |
| 107 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 108 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 109 | |
| 110 | "addiu %[tmp1], $0, 0x200 \n\t" |
| 111 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 112 | "sll %[tmp1], 9 \n\t" |
| 113 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 114 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 115 | "ori %[tmp4], %[root], 0x400 \n\t" |
| 116 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 117 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 118 | |
| 119 | "addiu %[tmp1], $0, 0x100 \n\t" |
| 120 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 121 | "sll %[tmp1], 8 \n\t" |
| 122 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 123 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 124 | "ori %[tmp4], %[root], 0x200 \n\t" |
| 125 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 126 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 127 | |
| 128 | "addiu %[tmp1], $0, 0x80 \n\t" |
| 129 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 130 | "sll %[tmp1], 7 \n\t" |
| 131 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 132 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 133 | "ori %[tmp4], %[root], 0x100 \n\t" |
| 134 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 135 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 136 | |
| 137 | "addiu %[tmp1], $0, 0x40 \n\t" |
| 138 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 139 | "sll %[tmp1], 6 \n\t" |
| 140 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 141 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 142 | "ori %[tmp4], %[root], 0x80 \n\t" |
| 143 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 144 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 145 | |
| 146 | "addiu %[tmp1], $0, 0x20 \n\t" |
| 147 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 148 | "sll %[tmp1], 5 \n\t" |
| 149 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 150 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 151 | "ori %[tmp4], %[root], 0x40 \n\t" |
| 152 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 153 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 154 | |
| 155 | "addiu %[tmp1], $0, 0x10 \n\t" |
| 156 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 157 | "sll %[tmp1], 4 \n\t" |
| 158 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 159 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 160 | "ori %[tmp4], %[root], 0x20 \n\t" |
| 161 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 162 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 163 | |
| 164 | "addiu %[tmp1], $0, 0x8 \n\t" |
| 165 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 166 | "sll %[tmp1], 3 \n\t" |
| 167 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 168 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 169 | "ori %[tmp4], %[root], 0x10 \n\t" |
| 170 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 171 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 172 | |
| 173 | "addiu %[tmp1], $0, 0x4 \n\t" |
| 174 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 175 | "sll %[tmp1], 2 \n\t" |
| 176 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 177 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 178 | "ori %[tmp4], %[root], 0x8 \n\t" |
| 179 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 180 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 181 | |
| 182 | "addiu %[tmp1], $0, 0x2 \n\t" |
| 183 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 184 | "sll %[tmp1], 1 \n\t" |
| 185 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 186 | "subu %[tmp3], %[value], %[tmp1] \n\t" |
| 187 | "ori %[tmp4], %[root], 0x4 \n\t" |
| 188 | "movz %[value], %[tmp3], %[tmp2] \n\t" |
| 189 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 190 | |
| 191 | "addiu %[tmp1], $0, 0x1 \n\t" |
| 192 | "addu %[tmp1], %[tmp1], %[root] \n\t" |
| 193 | "slt %[tmp2], %[value], %[tmp1] \n\t" |
| 194 | "ori %[tmp4], %[root], 0x2 \n\t" |
| 195 | "movz %[root], %[tmp4], %[tmp2] \n\t" |
| 196 | |
| 197 | ".set pop \n\t" |
| 198 | |
| 199 | : [root] "+r" (root), [value] "+r" (value), |
| 200 | [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), |
| 201 | [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4) |
| 202 | : |
| 203 | ); |
| 204 | |
| 205 | return root >> 1; |
| 206 | } |
| 207 | |