Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2010 Broadcom Corporation |
| 3 | * |
| 4 | * Permission to use, copy, modify, and/or distribute this software for any |
| 5 | * purpose with or without fee is hereby granted, provided that the above |
| 6 | * copyright notice and this permission notice appear in all copies. |
| 7 | * |
| 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| 11 | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 13 | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 14 | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 15 | */ |
| 16 | |
Greg Kroah-Hartman | a1c16ed | 2010-10-21 11:17:44 -0700 | [diff] [blame^] | 17 | #include <linux/types.h> |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 18 | #include "qmath.h" |
| 19 | |
| 20 | /* |
| 21 | Description: This function saturate input 32 bit number into a 16 bit number. |
| 22 | If input number is greater than 0x7fff then output is saturated to 0x7fff. |
| 23 | else if input number is less than 0xffff8000 then output is saturated to 0xffff8000 |
| 24 | else output is same as input. |
| 25 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 26 | s16 qm_sat32(s32 op) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 27 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 28 | s16 result; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 29 | if (op > (s32) 0x7fff) { |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 30 | result = 0x7fff; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 31 | } else if (op < (s32) 0xffff8000) { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 32 | result = (s16) (0x8000); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 33 | } else { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 34 | result = (s16) op; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 35 | } |
| 36 | return result; |
| 37 | } |
| 38 | |
| 39 | /* |
| 40 | Description: This function multiply two input 16 bit numbers and return the 32 bit result. |
| 41 | This multiplication is similar to compiler multiplication. This operation is defined if |
| 42 | 16 bit multiplication on the processor platform is cheaper than 32 bit multiplication (as |
| 43 | the most of qmath functions can be replaced with processor intrinsic instructions). |
| 44 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 45 | s32 qm_mul321616(s16 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 46 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 47 | return (s32) (op1) * (s32) (op2); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 48 | } |
| 49 | |
| 50 | /* |
| 51 | Description: This function make 16 bit multiplication and return the result in 16 bits. |
| 52 | To fit the result into 16 bits the 32 bit multiplication result is right |
| 53 | shifted by 16 bits. |
| 54 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 55 | s16 qm_mul16(s16 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 56 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 57 | s32 result; |
| 58 | result = ((s32) (op1) * (s32) (op2)); |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 59 | return (s16) (result >> 16); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 60 | } |
| 61 | |
| 62 | /* |
| 63 | Description: This function multiply two 16 bit numbers and return the result in 32 bits. |
| 64 | This function remove the extra sign bit created by the multiplication by leftshifting the |
| 65 | 32 bit multiplication result by 1 bit before returning the result. So the output is |
| 66 | twice that of compiler multiplication. (i.e. qm_muls321616(2,3)=12). |
| 67 | When both input 16 bit numbers are 0x8000, then the result is saturated to 0x7fffffff. |
| 68 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 69 | s32 qm_muls321616(s16 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 70 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 71 | s32 result; |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 72 | if (op1 == (s16) (0x8000) && op2 == (s16) (0x8000)) { |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 73 | result = 0x7fffffff; |
| 74 | } else { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 75 | result = ((s32) (op1) * (s32) (op2)); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 76 | result = result << 1; |
| 77 | } |
| 78 | return result; |
| 79 | } |
| 80 | |
| 81 | /* |
| 82 | Description: This function make 16 bit unsigned multiplication. To fit the output into |
| 83 | 16 bits the 32 bit multiplication result is right shifted by 16 bits. |
| 84 | */ |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 85 | u16 qm_mulu16(u16 op1, u16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 86 | { |
Greg Kroah-Hartman | 66cbd3a | 2010-10-08 11:05:47 -0700 | [diff] [blame] | 87 | return (u16) (((u32) op1 * (u32) op2) >> 16); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 88 | } |
| 89 | |
| 90 | /* |
| 91 | Description: This function make 16 bit multiplication and return the result in 16 bits. |
| 92 | To fit the multiplication result into 16 bits the multiplication result is right shifted by |
| 93 | 15 bits. Right shifting 15 bits instead of 16 bits is done to remove the extra sign bit formed |
| 94 | due to the multiplication. |
| 95 | When both the 16bit inputs are 0x8000 then the output is saturated to 0x7fffffff. |
| 96 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 97 | s16 qm_muls16(s16 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 98 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 99 | s32 result; |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 100 | if (op1 == (s16) 0x8000 && op2 == (s16) 0x8000) { |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 101 | result = 0x7fffffff; |
| 102 | } else { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 103 | result = ((s32) (op1) * (s32) (op2)); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 104 | } |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 105 | return (s16) (result >> 15); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 106 | } |
| 107 | |
| 108 | /* |
| 109 | Description: This function add two 32 bit numbers and return the 32bit result. |
| 110 | If the result overflow 32 bits, the output will be saturated to 32bits. |
| 111 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 112 | s32 qm_add32(s32 op1, s32 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 113 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 114 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 115 | result = op1 + op2; |
| 116 | if (op1 < 0 && op2 < 0 && result > 0) { |
| 117 | result = 0x80000000; |
| 118 | } else if (op1 > 0 && op2 > 0 && result < 0) { |
| 119 | result = 0x7fffffff; |
| 120 | } |
| 121 | return result; |
| 122 | } |
| 123 | |
| 124 | /* |
| 125 | Description: This function add two 16 bit numbers and return the 16bit result. |
| 126 | If the result overflow 16 bits, the output will be saturated to 16bits. |
| 127 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 128 | s16 qm_add16(s16 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 129 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 130 | s16 result; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 131 | s32 temp = (s32) op1 + (s32) op2; |
| 132 | if (temp > (s32) 0x7fff) { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 133 | result = (s16) 0x7fff; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 134 | } else if (temp < (s32) 0xffff8000) { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 135 | result = (s16) 0xffff8000; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 136 | } else { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 137 | result = (s16) temp; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 138 | } |
| 139 | return result; |
| 140 | } |
| 141 | |
| 142 | /* |
| 143 | Description: This function make 16 bit subtraction and return the 16bit result. |
| 144 | If the result overflow 16 bits, the output will be saturated to 16bits. |
| 145 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 146 | s16 qm_sub16(s16 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 147 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 148 | s16 result; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 149 | s32 temp = (s32) op1 - (s32) op2; |
| 150 | if (temp > (s32) 0x7fff) { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 151 | result = (s16) 0x7fff; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 152 | } else if (temp < (s32) 0xffff8000) { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 153 | result = (s16) 0xffff8000; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 154 | } else { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 155 | result = (s16) temp; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 156 | } |
| 157 | return result; |
| 158 | } |
| 159 | |
| 160 | /* |
| 161 | Description: This function make 32 bit subtraction and return the 32bit result. |
| 162 | If the result overflow 32 bits, the output will be saturated to 32bits. |
| 163 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 164 | s32 qm_sub32(s32 op1, s32 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 165 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 166 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 167 | result = op1 - op2; |
| 168 | if (op1 >= 0 && op2 < 0 && result < 0) { |
| 169 | result = 0x7fffffff; |
| 170 | } else if (op1 < 0 && op2 > 0 && result > 0) { |
| 171 | result = 0x80000000; |
| 172 | } |
| 173 | return result; |
| 174 | } |
| 175 | |
| 176 | /* |
| 177 | Description: This function multiply input 16 bit numbers and accumulate the result |
| 178 | into the input 32 bit number and return the 32 bit accumulated result. |
| 179 | If the accumulation result in overflow, then the output will be saturated. |
| 180 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 181 | s32 qm_mac321616(s32 acc, s16 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 182 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 183 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 184 | result = qm_add32(acc, qm_mul321616(op1, op2)); |
| 185 | return result; |
| 186 | } |
| 187 | |
| 188 | /* |
| 189 | Description: This function make a 32 bit saturated left shift when the specified shift |
| 190 | is +ve. This function will make a 32 bit right shift when the specified shift is -ve. |
| 191 | This function return the result after shifting operation. |
| 192 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 193 | s32 qm_shl32(s32 op, int shift) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 194 | { |
| 195 | int i; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 196 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 197 | result = op; |
| 198 | if (shift > 31) |
| 199 | shift = 31; |
| 200 | else if (shift < -31) |
| 201 | shift = -31; |
| 202 | if (shift >= 0) { |
| 203 | for (i = 0; i < shift; i++) { |
| 204 | result = qm_add32(result, result); |
| 205 | } |
| 206 | } else { |
| 207 | result = result >> (-shift); |
| 208 | } |
| 209 | return result; |
| 210 | } |
| 211 | |
| 212 | /* |
| 213 | Description: This function make a 32 bit right shift when shift is +ve. |
| 214 | This function make a 32 bit saturated left shift when shift is -ve. This function |
| 215 | return the result of the shift operation. |
| 216 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 217 | s32 qm_shr32(s32 op, int shift) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 218 | { |
| 219 | return qm_shl32(op, -shift); |
| 220 | } |
| 221 | |
| 222 | /* |
| 223 | Description: This function make a 16 bit saturated left shift when the specified shift |
| 224 | is +ve. This function will make a 16 bit right shift when the specified shift is -ve. |
| 225 | This function return the result after shifting operation. |
| 226 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 227 | s16 qm_shl16(s16 op, int shift) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 228 | { |
| 229 | int i; |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 230 | s16 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 231 | result = op; |
| 232 | if (shift > 15) |
| 233 | shift = 15; |
| 234 | else if (shift < -15) |
| 235 | shift = -15; |
| 236 | if (shift > 0) { |
| 237 | for (i = 0; i < shift; i++) { |
| 238 | result = qm_add16(result, result); |
| 239 | } |
| 240 | } else { |
| 241 | result = result >> (-shift); |
| 242 | } |
| 243 | return result; |
| 244 | } |
| 245 | |
| 246 | /* |
| 247 | Description: This function make a 16 bit right shift when shift is +ve. |
| 248 | This function make a 16 bit saturated left shift when shift is -ve. This function |
| 249 | return the result of the shift operation. |
| 250 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 251 | s16 qm_shr16(s16 op, int shift) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 252 | { |
| 253 | return qm_shl16(op, -shift); |
| 254 | } |
| 255 | |
| 256 | /* |
| 257 | Description: This function return the number of redundant sign bits in a 16 bit number. |
| 258 | Example: qm_norm16(0x0080) = 7. |
| 259 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 260 | s16 qm_norm16(s16 op) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 261 | { |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 262 | u16 u16extraSignBits; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 263 | if (op == 0) { |
| 264 | return 15; |
| 265 | } else { |
| 266 | u16extraSignBits = 0; |
| 267 | while ((op >> 15) == (op >> 14)) { |
| 268 | u16extraSignBits++; |
| 269 | op = op << 1; |
| 270 | } |
| 271 | } |
| 272 | return u16extraSignBits; |
| 273 | } |
| 274 | |
| 275 | /* |
| 276 | Description: This function return the number of redundant sign bits in a 32 bit number. |
| 277 | Example: qm_norm32(0x00000080) = 23 |
| 278 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 279 | s16 qm_norm32(s32 op) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 280 | { |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 281 | u16 u16extraSignBits; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 282 | if (op == 0) { |
| 283 | return 31; |
| 284 | } else { |
| 285 | u16extraSignBits = 0; |
| 286 | while ((op >> 31) == (op >> 30)) { |
| 287 | u16extraSignBits++; |
| 288 | op = op << 1; |
| 289 | } |
| 290 | } |
| 291 | return u16extraSignBits; |
| 292 | } |
| 293 | |
| 294 | /* |
| 295 | Description: This function divide two 16 bit unsigned numbers. |
| 296 | The numerator should be less than denominator. So the quotient is always less than 1. |
| 297 | This function return the quotient in q.15 format. |
| 298 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 299 | s16 qm_div_s(s16 num, s16 denom) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 300 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 301 | s16 var_out; |
| 302 | s16 iteration; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 303 | s32 L_num; |
| 304 | s32 L_denom; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 305 | L_num = (num) << 15; |
| 306 | L_denom = (denom) << 15; |
| 307 | for (iteration = 0; iteration < 15; iteration++) { |
| 308 | L_num <<= 1; |
| 309 | if (L_num >= L_denom) { |
| 310 | L_num = qm_sub32(L_num, L_denom); |
| 311 | L_num = qm_add32(L_num, 1); |
| 312 | } |
| 313 | } |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 314 | var_out = (s16) (L_num & 0x7fff); |
Jason Cooper | 90ea229 | 2010-09-14 09:45:32 -0400 | [diff] [blame] | 315 | return var_out; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 316 | } |
| 317 | |
| 318 | /* |
| 319 | Description: This function compute the absolute value of a 16 bit number. |
| 320 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 321 | s16 qm_abs16(s16 op) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 322 | { |
| 323 | if (op < 0) { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 324 | if (op == (s16) 0xffff8000) { |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 325 | return 0x7fff; |
| 326 | } else { |
| 327 | return -op; |
| 328 | } |
| 329 | } else { |
| 330 | return op; |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | /* |
| 335 | Description: This function divide two 16 bit numbers. |
| 336 | The quotient is returned through return value. |
| 337 | The qformat of the quotient is returned through the pointer (qQuotient) passed |
| 338 | to this function. The qformat of quotient is adjusted appropriately such that |
| 339 | the quotient occupies all 16 bits. |
| 340 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 341 | s16 qm_div16(s16 num, s16 denom, s16 *qQuotient) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 342 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 343 | s16 sign; |
| 344 | s16 nNum, nDenom; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 345 | sign = num ^ denom; |
| 346 | num = qm_abs16(num); |
| 347 | denom = qm_abs16(denom); |
| 348 | nNum = qm_norm16(num); |
| 349 | nDenom = qm_norm16(denom); |
| 350 | num = qm_shl16(num, nNum - 1); |
| 351 | denom = qm_shl16(denom, nDenom); |
| 352 | *qQuotient = nNum - 1 - nDenom + 15; |
| 353 | if (sign >= 0) { |
| 354 | return qm_div_s(num, denom); |
| 355 | } else { |
| 356 | return -qm_div_s(num, denom); |
| 357 | } |
| 358 | } |
| 359 | |
| 360 | /* |
| 361 | Description: This function compute absolute value of a 32 bit number. |
| 362 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 363 | s32 qm_abs32(s32 op) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 364 | { |
| 365 | if (op < 0) { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 366 | if (op == (s32) 0x80000000) { |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 367 | return 0x7fffffff; |
| 368 | } else { |
| 369 | return -op; |
| 370 | } |
| 371 | } else { |
| 372 | return op; |
| 373 | } |
| 374 | } |
| 375 | |
| 376 | /* |
| 377 | Description: This function divide two 32 bit numbers. The division is performed |
| 378 | by considering only important 16 bits in 32 bit numbers. |
| 379 | The quotient is returned through return value. |
| 380 | The qformat of the quotient is returned through the pointer (qquotient) passed |
| 381 | to this function. The qformat of quotient is adjusted appropriately such that |
| 382 | the quotient occupies all 16 bits. |
| 383 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 384 | s16 qm_div163232(s32 num, s32 denom, s16 *qquotient) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 385 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 386 | s32 sign; |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 387 | s16 nNum, nDenom; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 388 | sign = num ^ denom; |
| 389 | num = qm_abs32(num); |
| 390 | denom = qm_abs32(denom); |
| 391 | nNum = qm_norm32(num); |
| 392 | nDenom = qm_norm32(denom); |
| 393 | num = qm_shl32(num, nNum - 1); |
| 394 | denom = qm_shl32(denom, nDenom); |
| 395 | *qquotient = nNum - 1 - nDenom + 15; |
| 396 | if (sign >= 0) { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 397 | return qm_div_s((s16) (num >> 16), (s16) (denom >> 16)); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 398 | } else { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 399 | return -qm_div_s((s16) (num >> 16), (s16) (denom >> 16)); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 400 | } |
| 401 | } |
| 402 | |
| 403 | /* |
| 404 | Description: This function multiply a 32 bit number with a 16 bit number. |
| 405 | The multiplicaton result is right shifted by 16 bits to fit the result |
| 406 | into 32 bit output. |
| 407 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 408 | s32 qm_mul323216(s32 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 409 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 410 | s16 hi; |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 411 | u16 lo; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 412 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 413 | hi = op1 >> 16; |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 414 | lo = (s16) (op1 & 0xffff); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 415 | result = qm_mul321616(hi, op2); |
| 416 | result = result + (qm_mulsu321616(op2, lo) >> 16); |
| 417 | return result; |
| 418 | } |
| 419 | |
| 420 | /* |
| 421 | Description: This function multiply signed 16 bit number with unsigned 16 bit number and return |
| 422 | the result in 32 bits. |
| 423 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 424 | s32 qm_mulsu321616(s16 op1, u16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 425 | { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 426 | return (s32) (op1) * op2; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 427 | } |
| 428 | |
| 429 | /* |
| 430 | Description: This function multiply 32 bit number with 16 bit number. The multiplication result is |
| 431 | right shifted by 15 bits to fit the result into 32 bits. Right shifting by only 15 bits instead of |
| 432 | 16 bits is done to remove the extra sign bit formed by multiplication from the return value. |
| 433 | When the input numbers are 0x80000000, 0x8000 the return value is saturated to 0x7fffffff. |
| 434 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 435 | s32 qm_muls323216(s32 op1, s16 op2) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 436 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 437 | s16 hi; |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 438 | u16 lo; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 439 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 440 | hi = op1 >> 16; |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 441 | lo = (s16) (op1 & 0xffff); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 442 | result = qm_muls321616(hi, op2); |
| 443 | result = qm_add32(result, (qm_mulsu321616(op2, lo) >> 15)); |
| 444 | return result; |
| 445 | } |
| 446 | |
| 447 | /* |
| 448 | Description: This function multiply two 32 bit numbers. The multiplication result is right |
| 449 | shifted by 32 bits to fit the multiplication result into 32 bits. The right shifted |
| 450 | multiplication result is returned as output. |
| 451 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 452 | s32 qm_mul32(s32 a, s32 b) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 453 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 454 | s16 hi1, hi2; |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 455 | u16 lo1, lo2; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 456 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 457 | hi1 = a >> 16; |
| 458 | hi2 = b >> 16; |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 459 | lo1 = (u16) (a & 0xffff); |
| 460 | lo2 = (u16) (b & 0xffff); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 461 | result = qm_mul321616(hi1, hi2); |
| 462 | result = result + (qm_mulsu321616(hi1, lo2) >> 16); |
| 463 | result = result + (qm_mulsu321616(hi2, lo1) >> 16); |
| 464 | return result; |
| 465 | } |
| 466 | |
| 467 | /* |
| 468 | Description: This function multiply two 32 bit numbers. The multiplication result is |
| 469 | right shifted by 31 bits to fit the multiplication result into 32 bits. The right |
| 470 | shifted multiplication result is returned as output. Right shifting by only 31 bits |
| 471 | instead of 32 bits is done to remove the extra sign bit formed by multiplication. |
| 472 | When the input numbers are 0x80000000, 0x80000000 the return value is saturated to |
| 473 | 0x7fffffff. |
| 474 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 475 | s32 qm_muls32(s32 a, s32 b) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 476 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 477 | s16 hi1, hi2; |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 478 | u16 lo1, lo2; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 479 | s32 result; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 480 | hi1 = a >> 16; |
| 481 | hi2 = b >> 16; |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 482 | lo1 = (u16) (a & 0xffff); |
| 483 | lo2 = (u16) (b & 0xffff); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 484 | result = qm_muls321616(hi1, hi2); |
| 485 | result = qm_add32(result, (qm_mulsu321616(hi1, lo2) >> 15)); |
| 486 | result = qm_add32(result, (qm_mulsu321616(hi2, lo1) >> 15)); |
| 487 | result = qm_add32(result, (qm_mulu16(lo1, lo2) >> 15)); |
| 488 | return result; |
| 489 | } |
| 490 | |
| 491 | /* This table is log2(1+(i/32)) where i=[0:1:31], in q.15 format */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 492 | static const s16 log_table[] = { |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 493 | 0, |
| 494 | 1455, |
| 495 | 2866, |
| 496 | 4236, |
| 497 | 5568, |
| 498 | 6863, |
| 499 | 8124, |
| 500 | 9352, |
| 501 | 10549, |
| 502 | 11716, |
| 503 | 12855, |
| 504 | 13968, |
| 505 | 15055, |
| 506 | 16117, |
| 507 | 17156, |
| 508 | 18173, |
| 509 | 19168, |
| 510 | 20143, |
| 511 | 21098, |
| 512 | 22034, |
| 513 | 22952, |
| 514 | 23852, |
| 515 | 24736, |
| 516 | 25604, |
| 517 | 26455, |
| 518 | 27292, |
| 519 | 28114, |
| 520 | 28922, |
| 521 | 29717, |
| 522 | 30498, |
| 523 | 31267, |
| 524 | 32024 |
| 525 | }; |
| 526 | |
| 527 | #define LOG_TABLE_SIZE 32 /* log_table size */ |
| 528 | #define LOG2_LOG_TABLE_SIZE 5 /* log2(log_table size) */ |
| 529 | #define Q_LOG_TABLE 15 /* qformat of log_table */ |
| 530 | #define LOG10_2 19728 /* log10(2) in q.16 */ |
| 531 | |
| 532 | /* |
| 533 | Description: |
| 534 | This routine takes the input number N and its q format qN and compute |
| 535 | the log10(N). This routine first normalizes the input no N. Then N is in mag*(2^x) format. |
| 536 | mag is any number in the range 2^30-(2^31 - 1). Then log2(mag * 2^x) = log2(mag) + x is computed. |
| 537 | From that log10(mag * 2^x) = log2(mag * 2^x) * log10(2) is computed. |
| 538 | This routine looks the log2 value in the table considering LOG2_LOG_TABLE_SIZE+1 MSBs. |
| 539 | As the MSB is always 1, only next LOG2_OF_LOG_TABLE_SIZE MSBs are used for table lookup. |
| 540 | Next 16 MSBs are used for interpolation. |
| 541 | Inputs: |
| 542 | N - number to which log10 has to be found. |
| 543 | qN - q format of N |
| 544 | log10N - address where log10(N) will be written. |
| 545 | qLog10N - address where log10N qformat will be written. |
| 546 | Note/Problem: |
| 547 | For accurate results input should be in normalized or near normalized form. |
| 548 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 549 | void qm_log10(s32 N, s16 qN, s16 *log10N, s16 *qLog10N) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 550 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 551 | s16 s16norm, s16tableIndex, s16errorApproximation; |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 552 | u16 u16offset; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 553 | s32 s32log; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 554 | |
| 555 | /* Logerithm of negative values is undefined. |
| 556 | * assert N is greater than 0. |
| 557 | */ |
| 558 | /* ASSERT(N > 0); */ |
| 559 | |
| 560 | /* normalize the N. */ |
| 561 | s16norm = qm_norm32(N); |
| 562 | N = N << s16norm; |
| 563 | |
| 564 | /* The qformat of N after normalization. |
| 565 | * -30 is added to treat the no as between 1.0 to 2.0 |
| 566 | * i.e. after adding the -30 to the qformat the decimal point will be |
| 567 | * just rigtht of the MSB. (i.e. after sign bit and 1st MSB). i.e. |
| 568 | * at the right side of 30th bit. |
| 569 | */ |
| 570 | qN = qN + s16norm - 30; |
| 571 | |
| 572 | /* take the table index as the LOG2_OF_LOG_TABLE_SIZE bits right of the MSB */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 573 | s16tableIndex = (s16) (N >> (32 - (2 + LOG2_LOG_TABLE_SIZE))); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 574 | |
| 575 | /* remove the MSB. the MSB is always 1 after normalization. */ |
| 576 | s16tableIndex = |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 577 | s16tableIndex & (s16) ((1 << LOG2_LOG_TABLE_SIZE) - 1); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 578 | |
| 579 | /* remove the (1+LOG2_OF_LOG_TABLE_SIZE) MSBs in the N. */ |
| 580 | N = N & ((1 << (32 - (2 + LOG2_LOG_TABLE_SIZE))) - 1); |
| 581 | |
| 582 | /* take the offset as the 16 MSBS after table index. |
| 583 | */ |
Greg Kroah-Hartman | 7d4df48 | 2010-10-07 17:04:47 -0700 | [diff] [blame] | 584 | u16offset = (u16) (N >> (32 - (2 + LOG2_LOG_TABLE_SIZE + 16))); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 585 | |
| 586 | /* look the log value in the table. */ |
| 587 | s32log = log_table[s16tableIndex]; /* q.15 format */ |
| 588 | |
| 589 | /* interpolate using the offset. */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 590 | s16errorApproximation = (s16) qm_mulu16(u16offset, (u16) (log_table[s16tableIndex + 1] - log_table[s16tableIndex])); /* q.15 */ |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 591 | |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 592 | s32log = qm_add16((s16) s32log, s16errorApproximation); /* q.15 format */ |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 593 | |
| 594 | /* adjust for the qformat of the N as |
| 595 | * log2(mag * 2^x) = log2(mag) + x |
| 596 | */ |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 597 | s32log = qm_add32(s32log, ((s32) -qN) << 15); /* q.15 format */ |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 598 | |
| 599 | /* normalize the result. */ |
| 600 | s16norm = qm_norm32(s32log); |
| 601 | |
| 602 | /* bring all the important bits into lower 16 bits */ |
| 603 | s32log = qm_shl32(s32log, s16norm - 16); /* q.15+s16norm-16 format */ |
| 604 | |
| 605 | /* compute the log10(N) by multiplying log2(N) with log10(2). |
| 606 | * as log10(mag * 2^x) = log2(mag * 2^x) * log10(2) |
| 607 | * log10N in q.15+s16norm-16+1 (LOG10_2 is in q.16) |
| 608 | */ |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 609 | *log10N = qm_muls16((s16) s32log, (s16) LOG10_2); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 610 | |
| 611 | /* write the q format of the result. */ |
| 612 | *qLog10N = 15 + s16norm - 16 + 1; |
| 613 | |
| 614 | return; |
| 615 | } |
| 616 | |
| 617 | /* |
| 618 | Description: |
| 619 | This routine compute 1/N. |
| 620 | This routine reformates the given no N as N * 2^qN where N is in between 0.5 and 1.0 |
| 621 | in q.15 format in 16 bits. So the problem now boils down to finding the inverse of a |
| 622 | q.15 no in 16 bits which is in the range of 0.5 to 1.0. The output is always between |
| 623 | 2.0 to 1. So the output is 2.0 to 1.0 in q.30 format. Once the final output format is found |
| 624 | by taking the qN into account. Inverse is found with newton rapson method. Initially |
| 625 | inverse (x) is guessed as 1/0.75 (with appropriate sign). The new guess is calculated |
| 626 | using the formula x' = 2*x - N*x*x. After 4 or 5 iterations the inverse is very close to |
| 627 | inverse of N. |
| 628 | Inputs: |
| 629 | N - number to which 1/N has to be found. |
| 630 | qn - q format of N. |
| 631 | sqrtN - address where 1/N has to be written. |
| 632 | qsqrtN - address where q format of 1/N has to be written. |
| 633 | */ |
| 634 | #define qx 29 |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 635 | void qm_1byN(s32 N, s16 qN, s32 *result, s16 *qResult) |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 636 | { |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 637 | s16 normN; |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 638 | s32 s32firstTerm, s32secondTerm, x; |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 639 | int i; |
| 640 | |
| 641 | normN = qm_norm32(N); |
| 642 | |
| 643 | /* limit N to least significant 16 bits. 15th bit is the sign bit. */ |
| 644 | N = qm_shl32(N, normN - 16); |
| 645 | qN = qN + normN - 16 - 15; |
| 646 | /* -15 is added to treat N as 16 bit q.15 number in the range from 0.5 to 1 */ |
| 647 | |
| 648 | /* Take the initial guess as 1/0.75 in qx format with appropriate sign. */ |
| 649 | if (N >= 0) { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 650 | x = (s32) ((1 / 0.75) * (1 << qx)); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 651 | /* input no is in the range 0.5 to 1. So 1/0.75 is taken as initial guess. */ |
| 652 | } else { |
Greg Kroah-Hartman | 3e26416 | 2010-10-08 11:11:13 -0700 | [diff] [blame] | 653 | x = (s32) ((1 / -0.75) * (1 << qx)); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 654 | /* input no is in the range -0.5 to -1. So 1/-0.75 is taken as initial guess. */ |
| 655 | } |
| 656 | |
| 657 | /* iterate the equation x = 2*x - N*x*x for 4 times. */ |
| 658 | for (i = 0; i < 4; i++) { |
| 659 | s32firstTerm = qm_shl32(x, 1); /* s32firstTerm = 2*x in q.29 */ |
| 660 | s32secondTerm = |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 661 | qm_muls321616((s16) (s32firstTerm >> 16), |
| 662 | (s16) (s32firstTerm >> 16)); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 663 | /* s32secondTerm = x*x in q.(29+1-16)*2+1 */ |
| 664 | s32secondTerm = |
Greg Kroah-Hartman | e59fe08 | 2010-10-07 17:08:21 -0700 | [diff] [blame] | 665 | qm_muls321616((s16) (s32secondTerm >> 16), (s16) N); |
Henry Ptasinski | a9533e7 | 2010-09-08 21:04:42 -0700 | [diff] [blame] | 666 | /* s32secondTerm = N*x*x in q.((29+1-16)*2+1)-16+15+1 i.e. in q.29 */ |
| 667 | x = qm_sub32(s32firstTerm, s32secondTerm); |
| 668 | /* can be added directly as both are in q.29 */ |
| 669 | } |
| 670 | |
| 671 | /* Bring the x to q.30 format. */ |
| 672 | *result = qm_shl32(x, 1); |
| 673 | /* giving the output in q.30 format for q.15 input in 16 bits. */ |
| 674 | |
| 675 | /* compute the final q format of the result. */ |
| 676 | *qResult = -qN + 30; /* adjusting the q format of actual output */ |
| 677 | |
| 678 | return; |
| 679 | } |
| 680 | |
| 681 | #undef qx |