Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* Generate assembler source containing symbol information |
| 2 | * |
| 3 | * Copyright 2002 by Kai Germaschewski |
| 4 | * |
| 5 | * This software may be used and distributed according to the terms |
| 6 | * of the GNU General Public License, incorporated herein by reference. |
| 7 | * |
| 8 | * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S |
| 9 | * |
| 10 | * ChangeLog: |
| 11 | * |
| 12 | * (25/Aug/2004) Paulo Marques <pmarques@grupopie.com> |
| 13 | * Changed the compression method from stem compression to "table lookup" |
| 14 | * compression |
| 15 | * |
| 16 | * Table compression uses all the unused char codes on the symbols and |
| 17 | * maps these to the most used substrings (tokens). For instance, it might |
| 18 | * map char code 0xF7 to represent "write_" and then in every symbol where |
| 19 | * "write_" appears it can be replaced by 0xF7, saving 5 bytes. |
| 20 | * The used codes themselves are also placed in the table so that the |
| 21 | * decompresion can work without "special cases". |
| 22 | * Applied to kernel symbols, this usually produces a compression ratio |
| 23 | * of about 50%. |
| 24 | * |
| 25 | */ |
| 26 | |
| 27 | #include <stdio.h> |
| 28 | #include <stdlib.h> |
| 29 | #include <string.h> |
| 30 | #include <ctype.h> |
| 31 | |
| 32 | /* maximum token length used. It doesn't pay to increase it a lot, because |
| 33 | * very long substrings probably don't repeat themselves too often. */ |
| 34 | #define MAX_TOK_SIZE 11 |
| 35 | #define KSYM_NAME_LEN 127 |
| 36 | |
| 37 | /* we use only a subset of the complete symbol table to gather the token count, |
| 38 | * to speed up compression, at the expense of a little compression ratio */ |
| 39 | #define WORKING_SET 1024 |
| 40 | |
| 41 | /* first find the best token only on the list of tokens that would profit more |
| 42 | * than GOOD_BAD_THRESHOLD. Only if this list is empty go to the "bad" list. |
| 43 | * Increasing this value will put less tokens on the "good" list, so the search |
| 44 | * is faster. However, if the good list runs out of tokens, we must painfully |
| 45 | * search the bad list. */ |
| 46 | #define GOOD_BAD_THRESHOLD 10 |
| 47 | |
| 48 | /* token hash parameters */ |
| 49 | #define HASH_BITS 18 |
| 50 | #define HASH_TABLE_SIZE (1 << HASH_BITS) |
| 51 | #define HASH_MASK (HASH_TABLE_SIZE - 1) |
| 52 | #define HASH_BASE_OFFSET 2166136261U |
| 53 | #define HASH_FOLD(a) ((a)&(HASH_MASK)) |
| 54 | |
| 55 | /* flags to mark symbols */ |
| 56 | #define SYM_FLAG_VALID 1 |
| 57 | #define SYM_FLAG_SAMPLED 2 |
| 58 | |
| 59 | struct sym_entry { |
| 60 | unsigned long long addr; |
| 61 | char type; |
| 62 | unsigned char flags; |
| 63 | unsigned char len; |
| 64 | unsigned char *sym; |
| 65 | }; |
| 66 | |
| 67 | |
| 68 | static struct sym_entry *table; |
| 69 | static int size, cnt; |
| 70 | static unsigned long long _stext, _etext, _sinittext, _einittext; |
| 71 | static int all_symbols = 0; |
| 72 | |
| 73 | struct token { |
| 74 | unsigned char data[MAX_TOK_SIZE]; |
| 75 | unsigned char len; |
| 76 | /* profit: the number of bytes that could be saved by inserting this |
| 77 | * token into the table */ |
| 78 | int profit; |
| 79 | struct token *next; /* next token on the hash list */ |
| 80 | struct token *right; /* next token on the good/bad list */ |
| 81 | struct token *left; /* previous token on the good/bad list */ |
| 82 | struct token *smaller; /* token that is less one letter than this one */ |
| 83 | }; |
| 84 | |
| 85 | struct token bad_head, good_head; |
| 86 | struct token *hash_table[HASH_TABLE_SIZE]; |
| 87 | |
| 88 | /* the table that holds the result of the compression */ |
| 89 | unsigned char best_table[256][MAX_TOK_SIZE+1]; |
| 90 | unsigned char best_table_len[256]; |
| 91 | |
| 92 | |
| 93 | static void |
| 94 | usage(void) |
| 95 | { |
| 96 | fprintf(stderr, "Usage: kallsyms [--all-symbols] < in.map > out.S\n"); |
| 97 | exit(1); |
| 98 | } |
| 99 | |
| 100 | /* |
| 101 | * This ignores the intensely annoying "mapping symbols" found |
| 102 | * in ARM ELF files: $a, $t and $d. |
| 103 | */ |
| 104 | static inline int |
| 105 | is_arm_mapping_symbol(const char *str) |
| 106 | { |
| 107 | return str[0] == '$' && strchr("atd", str[1]) |
| 108 | && (str[2] == '\0' || str[2] == '.'); |
| 109 | } |
| 110 | |
| 111 | static int |
| 112 | read_symbol(FILE *in, struct sym_entry *s) |
| 113 | { |
| 114 | char str[500]; |
| 115 | int rc; |
| 116 | |
| 117 | rc = fscanf(in, "%llx %c %499s\n", &s->addr, &s->type, str); |
| 118 | if (rc != 3) { |
| 119 | if (rc != EOF) { |
| 120 | /* skip line */ |
| 121 | fgets(str, 500, in); |
| 122 | } |
| 123 | return -1; |
| 124 | } |
| 125 | |
| 126 | /* Ignore most absolute/undefined (?) symbols. */ |
| 127 | if (strcmp(str, "_stext") == 0) |
| 128 | _stext = s->addr; |
| 129 | else if (strcmp(str, "_etext") == 0) |
| 130 | _etext = s->addr; |
| 131 | else if (strcmp(str, "_sinittext") == 0) |
| 132 | _sinittext = s->addr; |
| 133 | else if (strcmp(str, "_einittext") == 0) |
| 134 | _einittext = s->addr; |
| 135 | else if (toupper(s->type) == 'A') |
| 136 | { |
| 137 | /* Keep these useful absolute symbols */ |
| 138 | if (strcmp(str, "__kernel_syscall_via_break") && |
| 139 | strcmp(str, "__kernel_syscall_via_epc") && |
| 140 | strcmp(str, "__kernel_sigtramp") && |
| 141 | strcmp(str, "__gp")) |
| 142 | return -1; |
| 143 | |
| 144 | } |
| 145 | else if (toupper(s->type) == 'U' || |
| 146 | is_arm_mapping_symbol(str)) |
| 147 | return -1; |
| 148 | |
| 149 | /* include the type field in the symbol name, so that it gets |
| 150 | * compressed together */ |
| 151 | s->len = strlen(str) + 1; |
| 152 | s->sym = (char *) malloc(s->len + 1); |
| 153 | strcpy(s->sym + 1, str); |
| 154 | s->sym[0] = s->type; |
| 155 | |
| 156 | return 0; |
| 157 | } |
| 158 | |
| 159 | static int |
| 160 | symbol_valid(struct sym_entry *s) |
| 161 | { |
| 162 | /* Symbols which vary between passes. Passes 1 and 2 must have |
| 163 | * identical symbol lists. The kallsyms_* symbols below are only added |
| 164 | * after pass 1, they would be included in pass 2 when --all-symbols is |
| 165 | * specified so exclude them to get a stable symbol list. |
| 166 | */ |
| 167 | static char *special_symbols[] = { |
| 168 | "kallsyms_addresses", |
| 169 | "kallsyms_num_syms", |
| 170 | "kallsyms_names", |
| 171 | "kallsyms_markers", |
| 172 | "kallsyms_token_table", |
| 173 | "kallsyms_token_index", |
| 174 | |
| 175 | /* Exclude linker generated symbols which vary between passes */ |
| 176 | "_SDA_BASE_", /* ppc */ |
| 177 | "_SDA2_BASE_", /* ppc */ |
| 178 | NULL }; |
| 179 | int i; |
| 180 | |
| 181 | /* if --all-symbols is not specified, then symbols outside the text |
| 182 | * and inittext sections are discarded */ |
| 183 | if (!all_symbols) { |
| 184 | if ((s->addr < _stext || s->addr > _etext) |
| 185 | && (s->addr < _sinittext || s->addr > _einittext)) |
| 186 | return 0; |
| 187 | /* Corner case. Discard any symbols with the same value as |
| 188 | * _etext or _einittext, they can move between pass 1 and 2 |
| 189 | * when the kallsyms data is added. If these symbols move then |
| 190 | * they may get dropped in pass 2, which breaks the kallsyms |
| 191 | * rules. |
| 192 | */ |
| 193 | if ((s->addr == _etext && strcmp(s->sym + 1, "_etext")) || |
| 194 | (s->addr == _einittext && strcmp(s->sym + 1, "_einittext"))) |
| 195 | return 0; |
| 196 | } |
| 197 | |
| 198 | /* Exclude symbols which vary between passes. */ |
| 199 | if (strstr(s->sym + 1, "_compiled.")) |
| 200 | return 0; |
| 201 | |
| 202 | for (i = 0; special_symbols[i]; i++) |
| 203 | if( strcmp(s->sym + 1, special_symbols[i]) == 0 ) |
| 204 | return 0; |
| 205 | |
| 206 | return 1; |
| 207 | } |
| 208 | |
| 209 | static void |
| 210 | read_map(FILE *in) |
| 211 | { |
| 212 | while (!feof(in)) { |
| 213 | if (cnt >= size) { |
| 214 | size += 10000; |
| 215 | table = realloc(table, sizeof(*table) * size); |
| 216 | if (!table) { |
| 217 | fprintf(stderr, "out of memory\n"); |
| 218 | exit (1); |
| 219 | } |
| 220 | } |
| 221 | if (read_symbol(in, &table[cnt]) == 0) |
| 222 | cnt++; |
| 223 | } |
| 224 | } |
| 225 | |
| 226 | static void output_label(char *label) |
| 227 | { |
| 228 | printf(".globl %s\n",label); |
| 229 | printf("\tALGN\n"); |
| 230 | printf("%s:\n",label); |
| 231 | } |
| 232 | |
| 233 | /* uncompress a compressed symbol. When this function is called, the best table |
| 234 | * might still be compressed itself, so the function needs to be recursive */ |
| 235 | static int expand_symbol(unsigned char *data, int len, char *result) |
| 236 | { |
| 237 | int c, rlen, total=0; |
| 238 | |
| 239 | while (len) { |
| 240 | c = *data; |
| 241 | /* if the table holds a single char that is the same as the one |
| 242 | * we are looking for, then end the search */ |
| 243 | if (best_table[c][0]==c && best_table_len[c]==1) { |
| 244 | *result++ = c; |
| 245 | total++; |
| 246 | } else { |
| 247 | /* if not, recurse and expand */ |
| 248 | rlen = expand_symbol(best_table[c], best_table_len[c], result); |
| 249 | total += rlen; |
| 250 | result += rlen; |
| 251 | } |
| 252 | data++; |
| 253 | len--; |
| 254 | } |
| 255 | *result=0; |
| 256 | |
| 257 | return total; |
| 258 | } |
| 259 | |
| 260 | static void |
| 261 | write_src(void) |
| 262 | { |
| 263 | int i, k, off, valid; |
| 264 | unsigned int best_idx[256]; |
| 265 | unsigned int *markers; |
| 266 | char buf[KSYM_NAME_LEN+1]; |
| 267 | |
| 268 | printf("#include <asm/types.h>\n"); |
| 269 | printf("#if BITS_PER_LONG == 64\n"); |
| 270 | printf("#define PTR .quad\n"); |
| 271 | printf("#define ALGN .align 8\n"); |
| 272 | printf("#else\n"); |
| 273 | printf("#define PTR .long\n"); |
| 274 | printf("#define ALGN .align 4\n"); |
| 275 | printf("#endif\n"); |
| 276 | |
| 277 | printf(".data\n"); |
| 278 | |
| 279 | output_label("kallsyms_addresses"); |
| 280 | valid = 0; |
| 281 | for (i = 0; i < cnt; i++) { |
| 282 | if (table[i].flags & SYM_FLAG_VALID) { |
| 283 | printf("\tPTR\t%#llx\n", table[i].addr); |
| 284 | valid++; |
| 285 | } |
| 286 | } |
| 287 | printf("\n"); |
| 288 | |
| 289 | output_label("kallsyms_num_syms"); |
| 290 | printf("\tPTR\t%d\n", valid); |
| 291 | printf("\n"); |
| 292 | |
| 293 | /* table of offset markers, that give the offset in the compressed stream |
| 294 | * every 256 symbols */ |
| 295 | markers = (unsigned int *) malloc(sizeof(unsigned int)*((valid + 255) / 256)); |
| 296 | |
| 297 | output_label("kallsyms_names"); |
| 298 | valid = 0; |
| 299 | off = 0; |
| 300 | for (i = 0; i < cnt; i++) { |
| 301 | |
| 302 | if (!table[i].flags & SYM_FLAG_VALID) |
| 303 | continue; |
| 304 | |
| 305 | if ((valid & 0xFF) == 0) |
| 306 | markers[valid >> 8] = off; |
| 307 | |
| 308 | printf("\t.byte 0x%02x", table[i].len); |
| 309 | for (k = 0; k < table[i].len; k++) |
| 310 | printf(", 0x%02x", table[i].sym[k]); |
| 311 | printf("\n"); |
| 312 | |
| 313 | off += table[i].len + 1; |
| 314 | valid++; |
| 315 | } |
| 316 | printf("\n"); |
| 317 | |
| 318 | output_label("kallsyms_markers"); |
| 319 | for (i = 0; i < ((valid + 255) >> 8); i++) |
| 320 | printf("\tPTR\t%d\n", markers[i]); |
| 321 | printf("\n"); |
| 322 | |
| 323 | free(markers); |
| 324 | |
| 325 | output_label("kallsyms_token_table"); |
| 326 | off = 0; |
| 327 | for (i = 0; i < 256; i++) { |
| 328 | best_idx[i] = off; |
| 329 | expand_symbol(best_table[i],best_table_len[i],buf); |
| 330 | printf("\t.asciz\t\"%s\"\n", buf); |
| 331 | off += strlen(buf) + 1; |
| 332 | } |
| 333 | printf("\n"); |
| 334 | |
| 335 | output_label("kallsyms_token_index"); |
| 336 | for (i = 0; i < 256; i++) |
| 337 | printf("\t.short\t%d\n", best_idx[i]); |
| 338 | printf("\n"); |
| 339 | } |
| 340 | |
| 341 | |
| 342 | /* table lookup compression functions */ |
| 343 | |
| 344 | static inline unsigned int rehash_token(unsigned int hash, unsigned char data) |
| 345 | { |
| 346 | return ((hash * 16777619) ^ data); |
| 347 | } |
| 348 | |
| 349 | static unsigned int hash_token(unsigned char *data, int len) |
| 350 | { |
| 351 | unsigned int hash=HASH_BASE_OFFSET; |
| 352 | int i; |
| 353 | |
| 354 | for (i = 0; i < len; i++) |
| 355 | hash = rehash_token(hash, data[i]); |
| 356 | |
| 357 | return HASH_FOLD(hash); |
| 358 | } |
| 359 | |
| 360 | /* find a token given its data and hash value */ |
| 361 | static struct token *find_token_hash(unsigned char *data, int len, unsigned int hash) |
| 362 | { |
| 363 | struct token *ptr; |
| 364 | |
| 365 | ptr = hash_table[hash]; |
| 366 | |
| 367 | while (ptr) { |
| 368 | if ((ptr->len == len) && (memcmp(ptr->data, data, len) == 0)) |
| 369 | return ptr; |
| 370 | ptr=ptr->next; |
| 371 | } |
| 372 | |
| 373 | return NULL; |
| 374 | } |
| 375 | |
| 376 | static inline void insert_token_in_group(struct token *head, struct token *ptr) |
| 377 | { |
| 378 | ptr->right = head->right; |
| 379 | ptr->right->left = ptr; |
| 380 | head->right = ptr; |
| 381 | ptr->left = head; |
| 382 | } |
| 383 | |
| 384 | static inline void remove_token_from_group(struct token *ptr) |
| 385 | { |
| 386 | ptr->left->right = ptr->right; |
| 387 | ptr->right->left = ptr->left; |
| 388 | } |
| 389 | |
| 390 | |
| 391 | /* build the counts for all the tokens that start with "data", and have lenghts |
| 392 | * from 2 to "len" */ |
| 393 | static void learn_token(unsigned char *data, int len) |
| 394 | { |
| 395 | struct token *ptr,*last_ptr; |
| 396 | int i, newprofit; |
| 397 | unsigned int hash = HASH_BASE_OFFSET; |
| 398 | unsigned int hashes[MAX_TOK_SIZE + 1]; |
| 399 | |
| 400 | if (len > MAX_TOK_SIZE) |
| 401 | len = MAX_TOK_SIZE; |
| 402 | |
| 403 | /* calculate and store the hash values for all the sub-tokens */ |
| 404 | hash = rehash_token(hash, data[0]); |
| 405 | for (i = 2; i <= len; i++) { |
| 406 | hash = rehash_token(hash, data[i-1]); |
| 407 | hashes[i] = HASH_FOLD(hash); |
| 408 | } |
| 409 | |
| 410 | last_ptr = NULL; |
| 411 | ptr = NULL; |
| 412 | |
| 413 | for (i = len; i >= 2; i--) { |
| 414 | hash = hashes[i]; |
| 415 | |
| 416 | if (!ptr) ptr = find_token_hash(data, i, hash); |
| 417 | |
| 418 | if (!ptr) { |
| 419 | /* create a new token entry */ |
| 420 | ptr = (struct token *) malloc(sizeof(*ptr)); |
| 421 | |
| 422 | memcpy(ptr->data, data, i); |
| 423 | ptr->len = i; |
| 424 | |
| 425 | /* when we create an entry, it's profit is 0 because |
| 426 | * we also take into account the size of the token on |
| 427 | * the compressed table. We then subtract GOOD_BAD_THRESHOLD |
| 428 | * so that the test to see if this token belongs to |
| 429 | * the good or bad list, is a comparison to zero */ |
| 430 | ptr->profit = -GOOD_BAD_THRESHOLD; |
| 431 | |
| 432 | ptr->next = hash_table[hash]; |
| 433 | hash_table[hash] = ptr; |
| 434 | |
| 435 | insert_token_in_group(&bad_head, ptr); |
| 436 | |
| 437 | ptr->smaller = NULL; |
| 438 | } else { |
| 439 | newprofit = ptr->profit + (ptr->len - 1); |
| 440 | /* check to see if this token needs to be moved to a |
| 441 | * different list */ |
| 442 | if((ptr->profit < 0) && (newprofit >= 0)) { |
| 443 | remove_token_from_group(ptr); |
| 444 | insert_token_in_group(&good_head,ptr); |
| 445 | } |
| 446 | ptr->profit = newprofit; |
| 447 | } |
| 448 | |
| 449 | if (last_ptr) last_ptr->smaller = ptr; |
| 450 | last_ptr = ptr; |
| 451 | |
| 452 | ptr = ptr->smaller; |
| 453 | } |
| 454 | } |
| 455 | |
| 456 | /* decrease the counts for all the tokens that start with "data", and have lenghts |
| 457 | * from 2 to "len". This function is much simpler than learn_token because we have |
| 458 | * more guarantees (tho tokens exist, the ->smaller pointer is set, etc.) |
| 459 | * The two separate functions exist only because of compression performance */ |
| 460 | static void forget_token(unsigned char *data, int len) |
| 461 | { |
| 462 | struct token *ptr; |
| 463 | int i, newprofit; |
| 464 | unsigned int hash=0; |
| 465 | |
| 466 | if (len > MAX_TOK_SIZE) len = MAX_TOK_SIZE; |
| 467 | |
| 468 | hash = hash_token(data, len); |
| 469 | ptr = find_token_hash(data, len, hash); |
| 470 | |
| 471 | for (i = len; i >= 2; i--) { |
| 472 | |
| 473 | newprofit = ptr->profit - (ptr->len - 1); |
| 474 | if ((ptr->profit >= 0) && (newprofit < 0)) { |
| 475 | remove_token_from_group(ptr); |
| 476 | insert_token_in_group(&bad_head, ptr); |
| 477 | } |
| 478 | ptr->profit=newprofit; |
| 479 | |
| 480 | ptr=ptr->smaller; |
| 481 | } |
| 482 | } |
| 483 | |
| 484 | /* count all the possible tokens in a symbol */ |
| 485 | static void learn_symbol(unsigned char *symbol, int len) |
| 486 | { |
| 487 | int i; |
| 488 | |
| 489 | for (i = 0; i < len - 1; i++) |
| 490 | learn_token(symbol + i, len - i); |
| 491 | } |
| 492 | |
| 493 | /* decrease the count for all the possible tokens in a symbol */ |
| 494 | static void forget_symbol(unsigned char *symbol, int len) |
| 495 | { |
| 496 | int i; |
| 497 | |
| 498 | for (i = 0; i < len - 1; i++) |
| 499 | forget_token(symbol + i, len - i); |
| 500 | } |
| 501 | |
| 502 | /* set all the symbol flags and do the initial token count */ |
| 503 | static void build_initial_tok_table(void) |
| 504 | { |
| 505 | int i, use_it, valid; |
| 506 | |
| 507 | valid = 0; |
| 508 | for (i = 0; i < cnt; i++) { |
| 509 | table[i].flags = 0; |
| 510 | if ( symbol_valid(&table[i]) ) { |
| 511 | table[i].flags |= SYM_FLAG_VALID; |
| 512 | valid++; |
| 513 | } |
| 514 | } |
| 515 | |
| 516 | use_it = 0; |
| 517 | for (i = 0; i < cnt; i++) { |
| 518 | |
| 519 | /* subsample the available symbols. This method is almost like |
| 520 | * a Bresenham's algorithm to get uniformly distributed samples |
| 521 | * across the symbol table */ |
| 522 | if (table[i].flags & SYM_FLAG_VALID) { |
| 523 | |
| 524 | use_it += WORKING_SET; |
| 525 | |
| 526 | if (use_it >= valid) { |
| 527 | table[i].flags |= SYM_FLAG_SAMPLED; |
| 528 | use_it -= valid; |
| 529 | } |
| 530 | } |
| 531 | if (table[i].flags & SYM_FLAG_SAMPLED) |
| 532 | learn_symbol(table[i].sym, table[i].len); |
| 533 | } |
| 534 | } |
| 535 | |
| 536 | /* replace a given token in all the valid symbols. Use the sampled symbols |
| 537 | * to update the counts */ |
| 538 | static void compress_symbols(unsigned char *str, int tlen, int idx) |
| 539 | { |
| 540 | int i, len, learn, size; |
| 541 | unsigned char *p; |
| 542 | |
| 543 | for (i = 0; i < cnt; i++) { |
| 544 | |
| 545 | if (!(table[i].flags & SYM_FLAG_VALID)) continue; |
| 546 | |
| 547 | len = table[i].len; |
| 548 | learn = 0; |
| 549 | p = table[i].sym; |
| 550 | |
| 551 | do { |
| 552 | /* find the token on the symbol */ |
| 553 | p = (unsigned char *) strstr((char *) p, (char *) str); |
| 554 | if (!p) break; |
| 555 | |
| 556 | if (!learn) { |
| 557 | /* if this symbol was used to count, decrease it */ |
| 558 | if (table[i].flags & SYM_FLAG_SAMPLED) |
| 559 | forget_symbol(table[i].sym, len); |
| 560 | learn = 1; |
| 561 | } |
| 562 | |
| 563 | *p = idx; |
| 564 | size = (len - (p - table[i].sym)) - tlen + 1; |
| 565 | memmove(p + 1, p + tlen, size); |
| 566 | p++; |
| 567 | len -= tlen - 1; |
| 568 | |
| 569 | } while (size >= tlen); |
| 570 | |
| 571 | if(learn) { |
| 572 | table[i].len = len; |
| 573 | /* if this symbol was used to count, learn it again */ |
| 574 | if(table[i].flags & SYM_FLAG_SAMPLED) |
| 575 | learn_symbol(table[i].sym, len); |
| 576 | } |
| 577 | } |
| 578 | } |
| 579 | |
| 580 | /* search the token with the maximum profit */ |
| 581 | static struct token *find_best_token(void) |
| 582 | { |
| 583 | struct token *ptr,*best,*head; |
| 584 | int bestprofit; |
| 585 | |
| 586 | bestprofit=-10000; |
| 587 | |
| 588 | /* failsafe: if the "good" list is empty search from the "bad" list */ |
| 589 | if(good_head.right == &good_head) head = &bad_head; |
| 590 | else head = &good_head; |
| 591 | |
| 592 | ptr = head->right; |
| 593 | best = NULL; |
| 594 | while (ptr != head) { |
| 595 | if (ptr->profit > bestprofit) { |
| 596 | bestprofit = ptr->profit; |
| 597 | best = ptr; |
| 598 | } |
| 599 | ptr = ptr->right; |
| 600 | } |
| 601 | |
| 602 | return best; |
| 603 | } |
| 604 | |
| 605 | /* this is the core of the algorithm: calculate the "best" table */ |
| 606 | static void optimize_result(void) |
| 607 | { |
| 608 | struct token *best; |
| 609 | int i; |
| 610 | |
| 611 | /* using the '\0' symbol last allows compress_symbols to use standard |
| 612 | * fast string functions */ |
| 613 | for (i = 255; i >= 0; i--) { |
| 614 | |
| 615 | /* if this table slot is empty (it is not used by an actual |
| 616 | * original char code */ |
| 617 | if (!best_table_len[i]) { |
| 618 | |
| 619 | /* find the token with the breates profit value */ |
| 620 | best = find_best_token(); |
| 621 | |
| 622 | /* place it in the "best" table */ |
| 623 | best_table_len[i] = best->len; |
| 624 | memcpy(best_table[i], best->data, best_table_len[i]); |
| 625 | /* zero terminate the token so that we can use strstr |
| 626 | in compress_symbols */ |
| 627 | best_table[i][best_table_len[i]]='\0'; |
| 628 | |
| 629 | /* replace this token in all the valid symbols */ |
| 630 | compress_symbols(best_table[i], best_table_len[i], i); |
| 631 | } |
| 632 | } |
| 633 | } |
| 634 | |
| 635 | /* start by placing the symbols that are actually used on the table */ |
| 636 | static void insert_real_symbols_in_table(void) |
| 637 | { |
| 638 | int i, j, c; |
| 639 | |
| 640 | memset(best_table, 0, sizeof(best_table)); |
| 641 | memset(best_table_len, 0, sizeof(best_table_len)); |
| 642 | |
| 643 | for (i = 0; i < cnt; i++) { |
| 644 | if (table[i].flags & SYM_FLAG_VALID) { |
| 645 | for (j = 0; j < table[i].len; j++) { |
| 646 | c = table[i].sym[j]; |
| 647 | best_table[c][0]=c; |
| 648 | best_table_len[c]=1; |
| 649 | } |
| 650 | } |
| 651 | } |
| 652 | } |
| 653 | |
| 654 | static void optimize_token_table(void) |
| 655 | { |
| 656 | memset(hash_table, 0, sizeof(hash_table)); |
| 657 | |
| 658 | good_head.left = &good_head; |
| 659 | good_head.right = &good_head; |
| 660 | |
| 661 | bad_head.left = &bad_head; |
| 662 | bad_head.right = &bad_head; |
| 663 | |
| 664 | build_initial_tok_table(); |
| 665 | |
| 666 | insert_real_symbols_in_table(); |
| 667 | |
| 668 | optimize_result(); |
| 669 | } |
| 670 | |
| 671 | |
| 672 | int |
| 673 | main(int argc, char **argv) |
| 674 | { |
| 675 | if (argc == 2 && strcmp(argv[1], "--all-symbols") == 0) |
| 676 | all_symbols = 1; |
| 677 | else if (argc != 1) |
| 678 | usage(); |
| 679 | |
| 680 | read_map(stdin); |
| 681 | optimize_token_table(); |
| 682 | write_src(); |
| 683 | |
| 684 | return 0; |
| 685 | } |
| 686 | |