Andy Lutomirski | 98eedc3 | 2011-07-13 09:24:16 -0400 | [diff] [blame] | 1 | /* |
| 2 | * parse_vdso.c: Linux reference vDSO parser |
| 3 | * Written by Andrew Lutomirski, 2011. |
| 4 | * |
| 5 | * This code is meant to be linked in to various programs that run on Linux. |
| 6 | * As such, it is available with as few restrictions as possible. This file |
| 7 | * is licensed under the Creative Commons Zero License, version 1.0, |
| 8 | * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode |
| 9 | * |
| 10 | * The vDSO is a regular ELF DSO that the kernel maps into user space when |
| 11 | * it starts a program. It works equally well in statically and dynamically |
| 12 | * linked binaries. |
| 13 | * |
| 14 | * This code is tested on x86_64. In principle it should work on any 64-bit |
| 15 | * architecture that has a vDSO. |
| 16 | */ |
| 17 | |
| 18 | #include <stdbool.h> |
| 19 | #include <stdint.h> |
| 20 | #include <string.h> |
| 21 | #include <elf.h> |
| 22 | |
| 23 | /* |
| 24 | * To use this vDSO parser, first call one of the vdso_init_* functions. |
| 25 | * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR |
| 26 | * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv. |
| 27 | * Then call vdso_sym for each symbol you want. For example, to look up |
| 28 | * gettimeofday on x86_64, use: |
| 29 | * |
| 30 | * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday"); |
| 31 | * or |
| 32 | * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday"); |
| 33 | * |
| 34 | * vdso_sym will return 0 if the symbol doesn't exist or if the init function |
| 35 | * failed or was not called. vdso_sym is a little slow, so its return value |
| 36 | * should be cached. |
| 37 | * |
| 38 | * vdso_sym is threadsafe; the init functions are not. |
| 39 | * |
| 40 | * These are the prototypes: |
| 41 | */ |
| 42 | extern void vdso_init_from_auxv(void *auxv); |
| 43 | extern void vdso_init_from_sysinfo_ehdr(uintptr_t base); |
| 44 | extern void *vdso_sym(const char *version, const char *name); |
| 45 | |
| 46 | |
| 47 | /* And here's the code. */ |
| 48 | |
| 49 | #ifndef __x86_64__ |
| 50 | # error Not yet ported to non-x86_64 architectures |
| 51 | #endif |
| 52 | |
| 53 | static struct vdso_info |
| 54 | { |
| 55 | bool valid; |
| 56 | |
| 57 | /* Load information */ |
| 58 | uintptr_t load_addr; |
| 59 | uintptr_t load_offset; /* load_addr - recorded vaddr */ |
| 60 | |
| 61 | /* Symbol table */ |
| 62 | Elf64_Sym *symtab; |
| 63 | const char *symstrings; |
| 64 | Elf64_Word *bucket, *chain; |
| 65 | Elf64_Word nbucket, nchain; |
| 66 | |
| 67 | /* Version table */ |
| 68 | Elf64_Versym *versym; |
| 69 | Elf64_Verdef *verdef; |
| 70 | } vdso_info; |
| 71 | |
| 72 | /* Straight from the ELF specification. */ |
| 73 | static unsigned long elf_hash(const unsigned char *name) |
| 74 | { |
| 75 | unsigned long h = 0, g; |
| 76 | while (*name) |
| 77 | { |
| 78 | h = (h << 4) + *name++; |
| 79 | if (g = h & 0xf0000000) |
| 80 | h ^= g >> 24; |
| 81 | h &= ~g; |
| 82 | } |
| 83 | return h; |
| 84 | } |
| 85 | |
| 86 | void vdso_init_from_sysinfo_ehdr(uintptr_t base) |
| 87 | { |
| 88 | size_t i; |
| 89 | bool found_vaddr = false; |
| 90 | |
| 91 | vdso_info.valid = false; |
| 92 | |
| 93 | vdso_info.load_addr = base; |
| 94 | |
| 95 | Elf64_Ehdr *hdr = (Elf64_Ehdr*)base; |
| 96 | Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff); |
| 97 | Elf64_Dyn *dyn = 0; |
| 98 | |
| 99 | /* |
| 100 | * We need two things from the segment table: the load offset |
| 101 | * and the dynamic table. |
| 102 | */ |
| 103 | for (i = 0; i < hdr->e_phnum; i++) |
| 104 | { |
| 105 | if (pt[i].p_type == PT_LOAD && !found_vaddr) { |
| 106 | found_vaddr = true; |
| 107 | vdso_info.load_offset = base |
| 108 | + (uintptr_t)pt[i].p_offset |
| 109 | - (uintptr_t)pt[i].p_vaddr; |
| 110 | } else if (pt[i].p_type == PT_DYNAMIC) { |
| 111 | dyn = (Elf64_Dyn*)(base + pt[i].p_offset); |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | if (!found_vaddr || !dyn) |
| 116 | return; /* Failed */ |
| 117 | |
| 118 | /* |
| 119 | * Fish out the useful bits of the dynamic table. |
| 120 | */ |
| 121 | Elf64_Word *hash = 0; |
| 122 | vdso_info.symstrings = 0; |
| 123 | vdso_info.symtab = 0; |
| 124 | vdso_info.versym = 0; |
| 125 | vdso_info.verdef = 0; |
| 126 | for (i = 0; dyn[i].d_tag != DT_NULL; i++) { |
| 127 | switch (dyn[i].d_tag) { |
| 128 | case DT_STRTAB: |
| 129 | vdso_info.symstrings = (const char *) |
| 130 | ((uintptr_t)dyn[i].d_un.d_ptr |
| 131 | + vdso_info.load_offset); |
| 132 | break; |
| 133 | case DT_SYMTAB: |
| 134 | vdso_info.symtab = (Elf64_Sym *) |
| 135 | ((uintptr_t)dyn[i].d_un.d_ptr |
| 136 | + vdso_info.load_offset); |
| 137 | break; |
| 138 | case DT_HASH: |
| 139 | hash = (Elf64_Word *) |
| 140 | ((uintptr_t)dyn[i].d_un.d_ptr |
| 141 | + vdso_info.load_offset); |
| 142 | break; |
| 143 | case DT_VERSYM: |
| 144 | vdso_info.versym = (Elf64_Versym *) |
| 145 | ((uintptr_t)dyn[i].d_un.d_ptr |
| 146 | + vdso_info.load_offset); |
| 147 | break; |
| 148 | case DT_VERDEF: |
| 149 | vdso_info.verdef = (Elf64_Verdef *) |
| 150 | ((uintptr_t)dyn[i].d_un.d_ptr |
| 151 | + vdso_info.load_offset); |
| 152 | break; |
| 153 | } |
| 154 | } |
| 155 | if (!vdso_info.symstrings || !vdso_info.symtab || !hash) |
| 156 | return; /* Failed */ |
| 157 | |
| 158 | if (!vdso_info.verdef) |
| 159 | vdso_info.versym = 0; |
| 160 | |
| 161 | /* Parse the hash table header. */ |
| 162 | vdso_info.nbucket = hash[0]; |
| 163 | vdso_info.nchain = hash[1]; |
| 164 | vdso_info.bucket = &hash[2]; |
| 165 | vdso_info.chain = &hash[vdso_info.nbucket + 2]; |
| 166 | |
| 167 | /* That's all we need. */ |
| 168 | vdso_info.valid = true; |
| 169 | } |
| 170 | |
| 171 | static bool vdso_match_version(Elf64_Versym ver, |
| 172 | const char *name, Elf64_Word hash) |
| 173 | { |
| 174 | /* |
| 175 | * This is a helper function to check if the version indexed by |
| 176 | * ver matches name (which hashes to hash). |
| 177 | * |
| 178 | * The version definition table is a mess, and I don't know how |
| 179 | * to do this in better than linear time without allocating memory |
| 180 | * to build an index. I also don't know why the table has |
| 181 | * variable size entries in the first place. |
| 182 | * |
| 183 | * For added fun, I can't find a comprehensible specification of how |
| 184 | * to parse all the weird flags in the table. |
| 185 | * |
| 186 | * So I just parse the whole table every time. |
| 187 | */ |
| 188 | |
| 189 | /* First step: find the version definition */ |
| 190 | ver &= 0x7fff; /* Apparently bit 15 means "hidden" */ |
| 191 | Elf64_Verdef *def = vdso_info.verdef; |
| 192 | while(true) { |
| 193 | if ((def->vd_flags & VER_FLG_BASE) == 0 |
| 194 | && (def->vd_ndx & 0x7fff) == ver) |
| 195 | break; |
| 196 | |
| 197 | if (def->vd_next == 0) |
| 198 | return false; /* No definition. */ |
| 199 | |
| 200 | def = (Elf64_Verdef *)((char *)def + def->vd_next); |
| 201 | } |
| 202 | |
| 203 | /* Now figure out whether it matches. */ |
| 204 | Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux); |
| 205 | return def->vd_hash == hash |
| 206 | && !strcmp(name, vdso_info.symstrings + aux->vda_name); |
| 207 | } |
| 208 | |
| 209 | void *vdso_sym(const char *version, const char *name) |
| 210 | { |
| 211 | unsigned long ver_hash; |
| 212 | if (!vdso_info.valid) |
| 213 | return 0; |
| 214 | |
| 215 | ver_hash = elf_hash(version); |
| 216 | Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket]; |
| 217 | |
| 218 | for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) { |
| 219 | Elf64_Sym *sym = &vdso_info.symtab[chain]; |
| 220 | |
| 221 | /* Check for a defined global or weak function w/ right name. */ |
| 222 | if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) |
| 223 | continue; |
| 224 | if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && |
| 225 | ELF64_ST_BIND(sym->st_info) != STB_WEAK) |
| 226 | continue; |
| 227 | if (sym->st_shndx == SHN_UNDEF) |
| 228 | continue; |
| 229 | if (strcmp(name, vdso_info.symstrings + sym->st_name)) |
| 230 | continue; |
| 231 | |
| 232 | /* Check symbol version. */ |
| 233 | if (vdso_info.versym |
| 234 | && !vdso_match_version(vdso_info.versym[chain], |
| 235 | version, ver_hash)) |
| 236 | continue; |
| 237 | |
| 238 | return (void *)(vdso_info.load_offset + sym->st_value); |
| 239 | } |
| 240 | |
| 241 | return 0; |
| 242 | } |
| 243 | |
| 244 | void vdso_init_from_auxv(void *auxv) |
| 245 | { |
| 246 | Elf64_auxv_t *elf_auxv = auxv; |
| 247 | for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++) |
| 248 | { |
| 249 | if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) { |
| 250 | vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val); |
| 251 | return; |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | vdso_info.valid = false; |
| 256 | } |