Andy Green | 3faf44e | 2014-10-06 16:47:20 +0800 | [diff] [blame] | 1 | /* |
| 2 | * minilex.c |
| 3 | * |
| 4 | * High efficiency lexical state parser |
| 5 | * |
| 6 | * Copyright (C)2011-2014 Andy Green <andy@warmcat.com> |
| 7 | * |
| 8 | * Licensed under LGPL2 |
| 9 | * |
| 10 | * Usage: gcc minihuf.c -o minihuf && ./minihuf > huftable.h |
| 11 | * |
| 12 | * Run it twice to test parsing on the generated table on stderr |
| 13 | */ |
| 14 | |
| 15 | #include <stdio.h> |
| 16 | #include <stdlib.h> |
| 17 | #include <string.h> |
| 18 | |
| 19 | #define ARRAY_SIZE(n) (sizeof(n) / sizeof(n[0])) |
| 20 | |
| 21 | struct huf { |
| 22 | unsigned int code; |
| 23 | unsigned char len; |
| 24 | }; |
| 25 | |
| 26 | static struct huf huf_literal[] = { |
| 27 | /* 0x00 */ { 0x1ff8, 13 }, |
| 28 | /* 0x01 */ { 0x7fffd8, 23 }, |
| 29 | /* 0x02 */ { 0xfffffe2, 28 }, |
| 30 | /* 0x03 */ { 0xfffffe3, 28 }, |
| 31 | /* 0x04 */ { 0xfffffe4, 28 }, |
| 32 | /* 0x05 */ { 0xfffffe5, 28 }, |
| 33 | /* 0x06 */ { 0xfffffe6, 28 }, |
| 34 | /* 0x07 */ { 0xfffffe7, 28 }, |
| 35 | /* 0x08 */ { 0xfffffe8, 28 }, |
| 36 | /* 0x09 */ { 0xffffea, 24 }, |
| 37 | /* 0x0a */ { 0x3ffffffc, 30 }, |
| 38 | /* 0x0b */ { 0xfffffe9, 28 }, |
| 39 | |
| 40 | /* 0x0c */ { 0xfffffea, 28 }, |
| 41 | /* 0x0d */ { 0x3ffffffd, 30 }, |
| 42 | /* 0x0e */ { 0xfffffeb, 28 }, |
| 43 | /* 0x0f */ { 0xfffffec, 28 }, |
| 44 | /* 0x10 */ { 0xfffffed, 28 }, |
| 45 | /* 0x11 */ { 0xfffffee, 28 }, |
| 46 | /* 0x12 */ { 0xfffffef, 28 }, |
| 47 | /* 0x13 */ { 0xffffff0, 28 }, |
| 48 | /* 0x14 */ { 0xffffff1, 28 }, |
| 49 | /* 0x15 */ { 0xffffff2, 28 }, |
| 50 | /* 0x16 */ { 0x3ffffffe, 30 }, |
| 51 | /* 0x17 */ { 0xffffff3, 28 }, |
| 52 | /* 0x18 */ { 0xffffff4, 28 }, |
| 53 | /* 0x19 */ { 0xffffff5, 28 }, |
| 54 | /* 0x1a */ { 0xffffff6, 28 }, |
| 55 | /* 0x1b */ { 0xffffff7, 28 }, |
| 56 | /* 0x1c */ { 0xffffff8, 28 }, |
| 57 | /* 0x1d */ { 0xffffff9, 28 }, |
| 58 | /* 0x1e */ { 0xffffffa, 28 }, |
| 59 | /* 0x1f */ { 0xffffffb, 28 }, |
| 60 | /* 0x20 */ { 0x14, 6 }, |
| 61 | /* 0x21 */ { 0x3f8, 10 }, |
| 62 | /* 0x22 */ { 0x3f9, 10 }, |
| 63 | /* 0x23 */ { 0xffa, 12 }, |
| 64 | /* 0x24 */ { 0x1ff9, 13 }, |
| 65 | /* 0x25 */ { 0x15, 6 }, |
| 66 | /* 0x26 */ { 0xf8, 8 }, |
| 67 | /* 0x27 */ { 0x7fa, 11 }, |
| 68 | /* 0x28 */ { 0x3fa, 10 }, |
| 69 | /* 0x29 */ { 0x3fb, 10 }, |
| 70 | /* 0x2a */ { 0xf9, 8 }, |
| 71 | /* 0x2b */ { 0x7fb, 11 }, |
| 72 | /* 0x2c */ { 0xfa, 8 }, |
| 73 | /* 0x2d */ { 0x16, 6 }, |
| 74 | /* 0x2e */ { 0x17, 6 }, |
| 75 | /* 0x2f */ { 0x18, 6 }, |
| 76 | /* 0x30 */ { 0x0, 5 }, |
| 77 | /* 0x31 */ { 0x1, 5 }, |
| 78 | /* 0x32 */ { 0x2, 5 }, |
| 79 | /* 0x33 */ { 0x19, 6 }, |
| 80 | /* 0x34 */ { 0x1a, 6 }, |
| 81 | /* 0x35 */ { 0x1b, 6 }, |
| 82 | /* 0x36 */ { 0x1c, 6 }, |
| 83 | /* 0x37 */ { 0x1d, 6 }, |
| 84 | /* 0x38 */ { 0x1e, 6 }, |
| 85 | /* 0x39 */ { 0x1f, 6 }, |
| 86 | /* 0x3a */ { 0x5c, 7 }, |
| 87 | /* 0x3b */ { 0xfb, 8 }, |
| 88 | |
| 89 | /* 0x3c */ { 0x7ffc, 15 }, |
| 90 | /* 0x3d */ { 0x20, 6 }, |
| 91 | /* 0x3e */ { 0xffb, 12 }, |
| 92 | /* 0x3f */ { 0x3fc, 10 }, |
| 93 | /* 0x40 */ { 0x1ffa, 13 }, |
| 94 | /* 0x41 */ { 0x21, 6 }, |
| 95 | /* 0x42 */ { 0x5d, 7 }, |
| 96 | /* 0x43 */ { 0x5e, 7 }, |
| 97 | /* 0x44 */ { 0x5f, 7 }, |
| 98 | /* 0x45 */ { 0x60, 7 }, |
| 99 | /* 0x46 */ { 0x61, 7 }, |
| 100 | /* 0x47 */ { 0x62, 7 }, |
| 101 | /* 0x48 */ { 0x63, 7 }, |
| 102 | /* 0x49 */ { 0x64, 7 }, |
| 103 | /* 0x4a */ { 0x65, 7 }, |
| 104 | /* 0x4b */ { 0x66, 7 }, |
| 105 | /* 0x4c */ { 0x67, 7 }, |
| 106 | /* 0x4d */ { 0x68, 7 }, |
| 107 | /* 0x4e */ { 0x69, 7 }, |
| 108 | /* 0x4f */ { 0x6a, 7 }, |
| 109 | /* 0x50 */ { 0x6b, 7 }, |
| 110 | /* 0x51 */ { 0x6c, 7 }, |
| 111 | /* 0x52 */ { 0x6d, 7 }, |
| 112 | /* 0x53 */ { 0x6e, 7 }, |
| 113 | /* 0x54 */ { 0x6f, 7 }, |
| 114 | /* 0x55 */ { 0x70, 7 }, |
| 115 | /* 0x56 */ { 0x71, 7 }, |
| 116 | /* 0x57 */ { 0x72, 7 }, |
| 117 | /* 0x58 */ { 0xfc, 8 }, |
| 118 | /* 0x59 */ { 0x73, 7 }, |
| 119 | /* 0x5a */ { 0xfd, 8 }, |
| 120 | /* 0x5b */ { 0x1ffb, 13 }, |
| 121 | /* 0x5c */ { 0x7fff0, 19 }, |
| 122 | /* 0x5d */ { 0x1ffc, 13 }, |
| 123 | /* 0x5e */ { 0x3ffc, 14 }, |
| 124 | /* 0x5f */ { 0x22, 6 }, |
| 125 | /* 0x60 */ { 0x7ffd, 15 }, |
| 126 | /* 0x61 */ { 0x3, 5 }, |
| 127 | /* 0x62 */ { 0x23, 6 }, |
| 128 | /* 0x63 */ { 0x4, 5 }, |
| 129 | /* 0x64 */ { 0x24, 6 }, |
| 130 | /* 0x65 */ { 0x5, 5 }, |
| 131 | /* 0x66 */ { 0x25, 6 }, |
| 132 | /* 0x67 */ { 0x26, 6 }, |
| 133 | /* 0x68 */ { 0x27, 6 }, |
| 134 | /* 0x69 */ { 0x6, 5 }, |
| 135 | /* 0x6a */ { 0x74, 7 }, |
| 136 | /* 0x6b */ { 0x75, 7 }, |
| 137 | |
| 138 | |
| 139 | /* 0x6c */ { 0x28, 6 }, |
| 140 | /* 0x6d */ { 0x29, 6 }, |
| 141 | /* 0x6e */ { 0x2a, 6 }, |
| 142 | /* 0x6f */ { 0x7, 5 }, |
| 143 | /* 0x70 */ { 0x2b, 6 }, |
| 144 | /* 0x71 */ { 0x76, 7 }, |
| 145 | /* 0x72 */ { 0x2c, 6 }, |
| 146 | /* 0x73 */ { 0x8, 5 }, |
| 147 | /* 0x74 */ { 0x9, 5 }, |
| 148 | /* 0x75 */ { 0x2d, 6 }, |
| 149 | /* 0x76 */ { 0x77, 7 }, |
| 150 | /* 0x77 */ { 0x78, 7 }, |
| 151 | /* 0x78 */ { 0x79, 7 }, |
| 152 | /* 0x79 */ { 0x7a, 7 }, |
| 153 | /* 0x7a */ { 0x7b, 7 }, |
| 154 | /* 0x7b */ { 0x7ffe, 15 }, |
| 155 | /* 0x7c */ { 0x7fc, 11 }, |
| 156 | /* 0x7d */ { 0x3ffd, 14 }, |
| 157 | /* 0x7e */ { 0x1ffd, 13 }, |
| 158 | /* 0x7f */ { 0xffffffc, 28 }, |
| 159 | /* 0x80 */ { 0xfffe6, 20 }, |
| 160 | /* 0x81 */ { 0x3fffd2, 22 }, |
| 161 | /* 0x82 */ { 0xfffe7, 20 }, |
| 162 | /* 0x83 */ { 0xfffe8, 20 }, |
| 163 | /* 0x84 */ { 0x3fffd3, 22 }, |
| 164 | /* 0x85 */ { 0x3fffd4, 22 }, |
| 165 | /* 0x86 */ { 0x3fffd5, 22 }, |
| 166 | /* 0x87 */ { 0x7fffd9, 23 }, |
| 167 | /* 0x88 */ { 0x3fffd6, 22 }, |
| 168 | /* 0x89 */ { 0x7fffda, 23 }, |
| 169 | /* 0x8a */ { 0x7fffdb, 23 }, |
| 170 | /* 0x8b */ { 0x7fffdc, 23 }, |
| 171 | /* 0x8c */ { 0x7fffdd, 23 }, |
| 172 | /* 0x8d */ { 0x7fffde, 23 }, |
| 173 | /* 0x8e */ { 0xffffeb, 24 }, |
| 174 | /* 0x8f */ { 0x7fffdf, 23 }, |
| 175 | /* 0x90 */ { 0xffffec, 24 }, |
| 176 | /* 0x91 */ { 0xffffed, 24 }, |
| 177 | /* 0x92 */ { 0x3fffd7, 22 }, |
| 178 | /* 0x93 */ { 0x7fffe0, 23 }, |
| 179 | /* 0x94 */ { 0xffffee, 24 }, |
| 180 | /* 0x95 */ { 0x7fffe1, 23 }, |
| 181 | /* 0x96 */ { 0x7fffe2, 23 }, |
| 182 | /* 0x97 */ { 0x7fffe3, 23 }, |
| 183 | /* 0x98 */ { 0x7fffe4, 23 }, |
| 184 | /* 0x99 */ { 0x1fffdc, 21 }, |
| 185 | /* 0x9a */ { 0x3fffd8, 22 }, |
| 186 | /* 0x9b */ { 0x7fffe5, 23 }, |
| 187 | |
| 188 | /* 0x9c */ { 0x3fffd9, 22 }, |
| 189 | /* 0x9d */ { 0x7fffe6, 23 }, |
| 190 | /* 0x9e */ { 0x7fffe7, 23 }, |
| 191 | /* 0x9f */ { 0xffffef, 24 }, |
| 192 | /* 0xa0 */ { 0x3fffda, 22 }, |
| 193 | /* 0xa1 */ { 0x1fffdd, 21 }, |
| 194 | /* 0xa2 */ { 0xfffe9, 20 }, |
| 195 | /* 0xa3 */ { 0x3fffdb, 22 }, |
| 196 | /* 0xa4 */ { 0x3fffdc, 22 }, |
| 197 | /* 0xa5 */ { 0x7fffe8, 23 }, |
| 198 | /* 0xa6 */ { 0x7fffe9, 23 }, |
| 199 | /* 0xa7 */ { 0x1fffde, 21 }, |
| 200 | /* 0xa8 */ { 0x7fffea, 23 }, |
| 201 | /* 0xa9 */ { 0x3fffdd, 22 }, |
| 202 | /* 0xaa */ { 0x3fffde, 22 }, |
| 203 | /* 0xab */ { 0xfffff0, 24 }, |
| 204 | /* 0xac */ { 0x1fffdf, 21 }, |
| 205 | /* 0xad */ { 0x3fffdf, 22 }, |
| 206 | /* 0xae */ { 0x7fffeb, 23 }, |
| 207 | /* 0xaf */ { 0x7fffec, 23 }, |
| 208 | /* 0xb0 */ { 0x1fffe0, 21 }, |
| 209 | /* 0xb1 */ { 0x1fffe1, 21 }, |
| 210 | /* 0xb2 */ { 0x3fffe0, 22 }, |
| 211 | /* 0xb3 */ { 0x1fffe2, 21 }, |
| 212 | /* 0xb4 */ { 0x7fffed, 23 }, |
| 213 | /* 0xb5 */ { 0x3fffe1, 22 }, |
| 214 | /* 0xb6 */ { 0x7fffee, 23 }, |
| 215 | /* 0xb7 */ { 0x7fffef, 23 }, |
| 216 | /* 0xb8 */ { 0xfffea, 20 }, |
| 217 | /* 0xb9 */ { 0x3fffe2, 22 }, |
| 218 | /* 0xba */ { 0x3fffe3, 22 }, |
| 219 | /* 0xbb */ { 0x3fffe4, 22 }, |
| 220 | /* 0xbc */ { 0x7ffff0, 23 }, |
| 221 | /* 0xbd */ { 0x3fffe5, 22 }, |
| 222 | /* 0xbe */ { 0x3fffe6, 22 }, |
| 223 | /* 0xbf */ { 0x7ffff1, 23 }, |
| 224 | /* 0xc0 */ { 0x3ffffe0, 26 }, |
| 225 | /* 0xc1 */ { 0x3ffffe1, 26 }, |
| 226 | /* 0xc2 */ { 0xfffeb, 20 }, |
| 227 | /* 0xc3 */ { 0x7fff1, 19 }, |
| 228 | /* 0xc4 */ { 0x3fffe7, 22 }, |
| 229 | /* 0xc5 */ { 0x7ffff2, 23 }, |
| 230 | /* 0xc6 */ { 0x3fffe8, 22 }, |
| 231 | /* 0xc7 */ { 0x1ffffec, 25 }, |
| 232 | /* 0xc8 */ { 0x3ffffe2, 26 }, |
| 233 | /* 0xc9 */ { 0x3ffffe3, 26 }, |
| 234 | /* 0xca */ { 0x3ffffe4, 26 }, |
| 235 | /* 0xcb */ { 0x7ffffde, 27 }, |
| 236 | |
| 237 | /* 0xcc */ { 0x7ffffdf, 27 }, |
| 238 | /* 0xcd */ { 0x3ffffe5, 26 }, |
| 239 | /* 0xce */ { 0xfffff1, 24 }, |
| 240 | /* 0xcf */ { 0x1ffffed, 25 }, |
| 241 | /* 0xd0 */ { 0x7fff2, 19 }, |
| 242 | /* 0xd1 */ { 0x1fffe3, 21 }, |
| 243 | /* 0xd2 */ { 0x3ffffe6, 26 }, |
| 244 | /* 0xd3 */ { 0x7ffffe0, 27 }, |
| 245 | /* 0xd4 */ { 0x7ffffe1, 27 }, |
| 246 | /* 0xd5 */ { 0x3ffffe7, 26 }, |
| 247 | /* 0xd6 */ { 0x7ffffe2, 27 }, |
| 248 | /* 0xd7 */ { 0xfffff2, 24 }, |
| 249 | /* 0xd8 */ { 0x1fffe4, 21 }, |
| 250 | /* 0xd9 */ { 0x1fffe5, 21 }, |
| 251 | /* 0xda */ { 0x3ffffe8, 26 }, |
| 252 | /* 0xdb */ { 0x3ffffe9, 26 }, |
| 253 | /* 0xdc */ { 0xffffffd, 28 }, |
| 254 | /* 0xdd */ { 0x7ffffe3, 27 }, |
| 255 | /* 0xde */ { 0x7ffffe4, 27 }, |
| 256 | /* 0xdf */ { 0x7ffffe5, 27 }, |
| 257 | /* 0xe0 */ { 0xfffec, 20 }, |
| 258 | /* 0xe1 */ { 0xfffff3, 24 }, |
| 259 | /* 0xe2 */ { 0xfffed, 20 }, |
| 260 | /* 0xe3 */ { 0x1fffe6, 21 }, |
| 261 | /* 0xe4 */ { 0x3fffe9, 22 }, |
| 262 | /* 0xe5 */ { 0x1fffe7, 21 }, |
| 263 | /* 0xe6 */ { 0x1fffe8, 21 }, |
| 264 | /* 0xe7 */ { 0x7ffff3, 23 }, |
| 265 | /* 0xe8 */ { 0x3fffea, 22 }, |
| 266 | /* 0xe9 */ { 0x3fffeb, 22 }, |
| 267 | /* 0xea */ { 0x1ffffee, 25 }, |
| 268 | /* 0xeb */ { 0x1ffffef, 25 }, |
| 269 | /* 0xec */ { 0xfffff4, 24 }, |
| 270 | /* 0xed */ { 0xfffff5, 24 }, |
| 271 | /* 0xee */ { 0x3ffffea, 26 }, |
| 272 | /* 0xef */ { 0x7ffff4, 23 }, |
| 273 | /* 0xf0 */ { 0x3ffffeb, 26 }, |
| 274 | /* 0xf1 */ { 0x7ffffe6, 27 }, |
| 275 | /* 0xf2 */ { 0x3ffffec, 26 }, |
| 276 | /* 0xf3 */ { 0x3ffffed, 26 }, |
| 277 | /* 0xf4 */ { 0x7ffffe7, 27 }, |
| 278 | /* 0xf5 */ { 0x7ffffe8, 27 }, |
| 279 | /* 0xf6 */ { 0x7ffffe9, 27 }, |
| 280 | /* 0xf7 */ { 0x7ffffea, 27 }, |
| 281 | /* 0xf8 */ { 0x7ffffeb, 27 }, |
| 282 | /* 0xf9 */ { 0xffffffe, 28 }, |
| 283 | /* 0xfa */ { 0x7ffffec, 27 }, |
| 284 | /* 0xfb */ { 0x7ffffed, 27 }, |
| 285 | |
| 286 | /* 0xfc */ { 0x7ffffee, 27 }, |
| 287 | /* 0xfd */ { 0x7ffffef, 27 }, |
| 288 | /* 0xfe */ { 0x7fffff0, 27 }, |
| 289 | /* 0xff */ { 0x3ffffee, 26 }, |
| 290 | /* 0x100 */ { 0x3fffffff, 30 }, |
| 291 | }; |
| 292 | |
| 293 | int code_bit(int idx, int bit) |
| 294 | { |
| 295 | if (bit < huf_literal[idx].len) |
| 296 | return !!(huf_literal[idx].code & (1 << (huf_literal[idx].len - 1 - bit))); |
| 297 | |
| 298 | return -1; |
| 299 | } |
| 300 | |
| 301 | #include "huftable.h" |
| 302 | |
| 303 | #define PARALLEL 2 |
| 304 | |
| 305 | struct state { |
| 306 | int terminal; |
| 307 | int state[PARALLEL]; |
| 308 | int bytepos; |
| 309 | |
| 310 | int real_pos; |
| 311 | }; |
| 312 | |
| 313 | struct state state[2000]; |
| 314 | unsigned char terms[2000]; |
| 315 | int next = 1; |
| 316 | |
| 317 | int lextable_decode(int pos, char c) |
| 318 | { |
| 319 | int q = pos + !!c; |
| 320 | |
| 321 | if (lextable_terms[q >> 3] & (1 << (q & 7))) /* terminal */ |
| 322 | return lextable[q] | 0x8000; |
| 323 | |
| 324 | return pos + (lextable[q] << 1); |
| 325 | } |
| 326 | |
| 327 | int main(void) |
| 328 | { |
| 329 | int n = 0; |
| 330 | int m = 0; |
| 331 | int prev; |
| 332 | char c; |
| 333 | int walk; |
| 334 | int saw; |
| 335 | int y; |
| 336 | int j; |
| 337 | int q; |
| 338 | int pos = 0; |
| 339 | int biggest = 0; |
| 340 | int fails = 0; |
| 341 | |
| 342 | m = 0; |
| 343 | while (m < ARRAY_SIZE(state)) { |
| 344 | for (j = 0; j < PARALLEL; j++) { |
| 345 | state[m].state[j] = 0xffff; |
| 346 | state[m].terminal = 0; |
| 347 | } |
| 348 | m++; |
| 349 | } |
| 350 | |
| 351 | while (n < ARRAY_SIZE(huf_literal)) { |
| 352 | |
| 353 | m = 0; |
| 354 | walk = 0; |
| 355 | prev = 0; |
| 356 | |
| 357 | while (m < huf_literal[n].len) { |
| 358 | |
| 359 | saw = 0; |
| 360 | if (state[walk].state[code_bit(n, m)] != 0xffff) { |
| 361 | /* exists -- go forward */ |
| 362 | walk = state[walk].state[code_bit(n, m)]; |
| 363 | goto again; |
| 364 | } |
| 365 | |
| 366 | /* something we didn't see before */ |
| 367 | |
| 368 | state[walk].state[code_bit(n, m)] = next; |
| 369 | walk = next++; |
| 370 | again: |
| 371 | m++; |
| 372 | } |
| 373 | |
| 374 | state[walk].terminal = n++; |
| 375 | state[walk].state[0] = 0; /* terminal marker */ |
| 376 | } |
| 377 | |
| 378 | walk = 0; |
| 379 | for (n = 0; n < next; n++) { |
| 380 | state[n].bytepos = walk; |
| 381 | walk += (2 * 2); |
| 382 | } |
| 383 | |
| 384 | /* compute everyone's position first */ |
| 385 | |
| 386 | pos = 0; |
| 387 | walk = 0; |
| 388 | for (n = 0; n < next; n++) { |
| 389 | |
| 390 | state[n].real_pos = pos; |
| 391 | |
| 392 | if (state[n].state[0]) /* nonterminal */ |
| 393 | pos += 2; |
| 394 | |
| 395 | walk ++; |
| 396 | } |
| 397 | |
| 398 | fprintf(stdout, "static unsigned char lextable[] = {\n"); |
| 399 | |
| 400 | #define TERMINAL_MASK 0x8000 |
| 401 | |
| 402 | walk = 0; |
| 403 | pos = 0; |
| 404 | q = 0; |
| 405 | for (n = 0; n < next; n++) { |
| 406 | q = pos; |
| 407 | for (m = 0; m < 2; m++) { |
| 408 | saw = state[n].state[m]; |
| 409 | |
| 410 | if (saw == 0) { // c is a terminal then |
| 411 | m = 2; |
| 412 | continue; |
| 413 | } |
| 414 | if (!m) |
| 415 | fprintf(stdout, "/* pos %04x: %3d */ ", |
| 416 | state[n].real_pos, n); |
| 417 | else |
| 418 | fprintf(stdout, " "); |
| 419 | |
| 420 | if (saw == 0xffff) { |
| 421 | fprintf(stdout, |
| 422 | " 0xff, 0xff, /* 0 = fail */\n "); |
| 423 | pos ++; /* fail */ |
| 424 | fails++; |
| 425 | continue; |
| 426 | } |
| 427 | |
| 428 | if (state[saw].state[0] == 0) { /* points to terminal */ |
| 429 | fprintf(stdout, " /* terminal %d */ 0x%02X,\n", |
| 430 | state[saw].terminal, |
| 431 | state[saw].terminal & 0xff); |
| 432 | terms[(state[n].real_pos + m) >> 3] |= |
| 433 | 1 << ((state[n].real_pos + m) & 7); |
| 434 | pos++; |
| 435 | walk++; |
| 436 | continue; |
| 437 | } |
| 438 | |
| 439 | j = (state[saw].real_pos - q) >> 1; |
| 440 | |
| 441 | if (j > biggest) |
| 442 | biggest = j; |
| 443 | |
| 444 | if (j > 0xffff) { |
| 445 | fprintf(stderr, |
| 446 | "Jump > 64K bytes ahead (%d to %d)\n", |
| 447 | state[n].real_pos, state[saw].real_pos); |
| 448 | return 1; |
| 449 | } |
| 450 | |
| 451 | fprintf(stdout, " /* %d */ 0x%02X " |
| 452 | "/* (to 0x%04X state %3d) */,\n", |
| 453 | m, |
| 454 | j & 0xff, |
| 455 | state[saw].real_pos, saw); |
| 456 | pos++; |
| 457 | |
| 458 | walk++; |
| 459 | } |
| 460 | } |
| 461 | |
| 462 | fprintf(stdout, "/* total size %d bytes, biggest jump %d/256, fails=%d */\n};\n" |
| 463 | "\n static unsigned char lextable_terms[] = {\n", |
| 464 | pos, biggest, fails); |
| 465 | |
| 466 | for (n = 0; n < (walk + 7) / 8; n++) { |
| 467 | if (!(n & 7)) |
| 468 | fprintf(stdout, "\n\t"); |
| 469 | fprintf(stdout, "0x%02x, ", terms[n]); |
| 470 | } |
| 471 | fprintf(stdout, "\n};\n"); |
| 472 | |
| 473 | /* |
| 474 | * Try to parse every legal input string |
| 475 | */ |
| 476 | |
| 477 | for (n = 0; n < ARRAY_SIZE(huf_literal); n++) { |
| 478 | walk = 0; |
| 479 | m = 0; |
| 480 | y = -1; |
| 481 | |
| 482 | fprintf(stderr, " trying %d\n", n); |
| 483 | |
| 484 | while (m < huf_literal[n].len) { |
Andy Green | 2add634 | 2014-10-12 08:38:16 +0800 | [diff] [blame] | 485 | prev = walk; |
Andy Green | 3faf44e | 2014-10-06 16:47:20 +0800 | [diff] [blame] | 486 | walk = lextable_decode(walk, code_bit(n, m)); |
| 487 | |
| 488 | if (walk == 0xffff) { |
| 489 | fprintf(stderr, "failed\n"); |
| 490 | return 3; |
| 491 | } |
| 492 | |
| 493 | if (walk & 0x8000) { |
| 494 | y = walk & 0x7fff; |
Andy Green | 2add634 | 2014-10-12 08:38:16 +0800 | [diff] [blame] | 495 | if (y == 0 && m == 29) { |
Andy Green | 3faf44e | 2014-10-06 16:47:20 +0800 | [diff] [blame] | 496 | y |= 0x100; |
Andy Green | 2add634 | 2014-10-12 08:38:16 +0800 | [diff] [blame] | 497 | fprintf(stdout, |
| 498 | "\n/* state that points to " |
| 499 | "0x100 for disambiguation with " |
| 500 | "0x0 */\n" |
| 501 | "#define HUFTABLE_0x100_PREV " |
| 502 | "%d\n", prev); |
| 503 | } |
Andy Green | 3faf44e | 2014-10-06 16:47:20 +0800 | [diff] [blame] | 504 | break; |
| 505 | } |
| 506 | m++; |
| 507 | } |
| 508 | |
| 509 | if (y != n) { |
| 510 | fprintf(stderr, "decode failed %d got %d (0x%x)\n", n, y, y); |
| 511 | return 4; |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | fprintf(stderr, "All decode OK\n"); |
| 516 | |
| 517 | return 0; |
| 518 | } |