blob: 85870208edcfd42be8e583459ba17046266ed441 [file] [log] [blame]
Andy Lutomirski98eedc32011-07-13 09:24:16 -04001/*
2 * parse_vdso.c: Linux reference vDSO parser
3 * Written by Andrew Lutomirski, 2011.
4 *
5 * This code is meant to be linked in to various programs that run on Linux.
6 * As such, it is available with as few restrictions as possible. This file
7 * is licensed under the Creative Commons Zero License, version 1.0,
8 * available at http://creativecommons.org/publicdomain/zero/1.0/legalcode
9 *
10 * The vDSO is a regular ELF DSO that the kernel maps into user space when
11 * it starts a program. It works equally well in statically and dynamically
12 * linked binaries.
13 *
14 * This code is tested on x86_64. In principle it should work on any 64-bit
15 * architecture that has a vDSO.
16 */
17
18#include <stdbool.h>
19#include <stdint.h>
20#include <string.h>
21#include <elf.h>
22
23/*
24 * To use this vDSO parser, first call one of the vdso_init_* functions.
25 * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
26 * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
27 * Then call vdso_sym for each symbol you want. For example, to look up
28 * gettimeofday on x86_64, use:
29 *
30 * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
31 * or
32 * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
33 *
34 * vdso_sym will return 0 if the symbol doesn't exist or if the init function
35 * failed or was not called. vdso_sym is a little slow, so its return value
36 * should be cached.
37 *
38 * vdso_sym is threadsafe; the init functions are not.
39 *
40 * These are the prototypes:
41 */
42extern void vdso_init_from_auxv(void *auxv);
43extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
44extern void *vdso_sym(const char *version, const char *name);
45
46
47/* And here's the code. */
48
49#ifndef __x86_64__
50# error Not yet ported to non-x86_64 architectures
51#endif
52
53static struct vdso_info
54{
55 bool valid;
56
57 /* Load information */
58 uintptr_t load_addr;
59 uintptr_t load_offset; /* load_addr - recorded vaddr */
60
61 /* Symbol table */
62 Elf64_Sym *symtab;
63 const char *symstrings;
64 Elf64_Word *bucket, *chain;
65 Elf64_Word nbucket, nchain;
66
67 /* Version table */
68 Elf64_Versym *versym;
69 Elf64_Verdef *verdef;
70} vdso_info;
71
72/* Straight from the ELF specification. */
73static unsigned long elf_hash(const unsigned char *name)
74{
75 unsigned long h = 0, g;
76 while (*name)
77 {
78 h = (h << 4) + *name++;
79 if (g = h & 0xf0000000)
80 h ^= g >> 24;
81 h &= ~g;
82 }
83 return h;
84}
85
86void vdso_init_from_sysinfo_ehdr(uintptr_t base)
87{
88 size_t i;
89 bool found_vaddr = false;
90
91 vdso_info.valid = false;
92
93 vdso_info.load_addr = base;
94
95 Elf64_Ehdr *hdr = (Elf64_Ehdr*)base;
96 Elf64_Phdr *pt = (Elf64_Phdr*)(vdso_info.load_addr + hdr->e_phoff);
97 Elf64_Dyn *dyn = 0;
98
99 /*
100 * We need two things from the segment table: the load offset
101 * and the dynamic table.
102 */
103 for (i = 0; i < hdr->e_phnum; i++)
104 {
105 if (pt[i].p_type == PT_LOAD && !found_vaddr) {
106 found_vaddr = true;
107 vdso_info.load_offset = base
108 + (uintptr_t)pt[i].p_offset
109 - (uintptr_t)pt[i].p_vaddr;
110 } else if (pt[i].p_type == PT_DYNAMIC) {
111 dyn = (Elf64_Dyn*)(base + pt[i].p_offset);
112 }
113 }
114
115 if (!found_vaddr || !dyn)
116 return; /* Failed */
117
118 /*
119 * Fish out the useful bits of the dynamic table.
120 */
121 Elf64_Word *hash = 0;
122 vdso_info.symstrings = 0;
123 vdso_info.symtab = 0;
124 vdso_info.versym = 0;
125 vdso_info.verdef = 0;
126 for (i = 0; dyn[i].d_tag != DT_NULL; i++) {
127 switch (dyn[i].d_tag) {
128 case DT_STRTAB:
129 vdso_info.symstrings = (const char *)
130 ((uintptr_t)dyn[i].d_un.d_ptr
131 + vdso_info.load_offset);
132 break;
133 case DT_SYMTAB:
134 vdso_info.symtab = (Elf64_Sym *)
135 ((uintptr_t)dyn[i].d_un.d_ptr
136 + vdso_info.load_offset);
137 break;
138 case DT_HASH:
139 hash = (Elf64_Word *)
140 ((uintptr_t)dyn[i].d_un.d_ptr
141 + vdso_info.load_offset);
142 break;
143 case DT_VERSYM:
144 vdso_info.versym = (Elf64_Versym *)
145 ((uintptr_t)dyn[i].d_un.d_ptr
146 + vdso_info.load_offset);
147 break;
148 case DT_VERDEF:
149 vdso_info.verdef = (Elf64_Verdef *)
150 ((uintptr_t)dyn[i].d_un.d_ptr
151 + vdso_info.load_offset);
152 break;
153 }
154 }
155 if (!vdso_info.symstrings || !vdso_info.symtab || !hash)
156 return; /* Failed */
157
158 if (!vdso_info.verdef)
159 vdso_info.versym = 0;
160
161 /* Parse the hash table header. */
162 vdso_info.nbucket = hash[0];
163 vdso_info.nchain = hash[1];
164 vdso_info.bucket = &hash[2];
165 vdso_info.chain = &hash[vdso_info.nbucket + 2];
166
167 /* That's all we need. */
168 vdso_info.valid = true;
169}
170
171static bool vdso_match_version(Elf64_Versym ver,
172 const char *name, Elf64_Word hash)
173{
174 /*
175 * This is a helper function to check if the version indexed by
176 * ver matches name (which hashes to hash).
177 *
178 * The version definition table is a mess, and I don't know how
179 * to do this in better than linear time without allocating memory
180 * to build an index. I also don't know why the table has
181 * variable size entries in the first place.
182 *
183 * For added fun, I can't find a comprehensible specification of how
184 * to parse all the weird flags in the table.
185 *
186 * So I just parse the whole table every time.
187 */
188
189 /* First step: find the version definition */
190 ver &= 0x7fff; /* Apparently bit 15 means "hidden" */
191 Elf64_Verdef *def = vdso_info.verdef;
192 while(true) {
193 if ((def->vd_flags & VER_FLG_BASE) == 0
194 && (def->vd_ndx & 0x7fff) == ver)
195 break;
196
197 if (def->vd_next == 0)
198 return false; /* No definition. */
199
200 def = (Elf64_Verdef *)((char *)def + def->vd_next);
201 }
202
203 /* Now figure out whether it matches. */
204 Elf64_Verdaux *aux = (Elf64_Verdaux*)((char *)def + def->vd_aux);
205 return def->vd_hash == hash
206 && !strcmp(name, vdso_info.symstrings + aux->vda_name);
207}
208
209void *vdso_sym(const char *version, const char *name)
210{
211 unsigned long ver_hash;
212 if (!vdso_info.valid)
213 return 0;
214
215 ver_hash = elf_hash(version);
216 Elf64_Word chain = vdso_info.bucket[elf_hash(name) % vdso_info.nbucket];
217
218 for (; chain != STN_UNDEF; chain = vdso_info.chain[chain]) {
219 Elf64_Sym *sym = &vdso_info.symtab[chain];
220
221 /* Check for a defined global or weak function w/ right name. */
222 if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC)
223 continue;
224 if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL &&
225 ELF64_ST_BIND(sym->st_info) != STB_WEAK)
226 continue;
227 if (sym->st_shndx == SHN_UNDEF)
228 continue;
229 if (strcmp(name, vdso_info.symstrings + sym->st_name))
230 continue;
231
232 /* Check symbol version. */
233 if (vdso_info.versym
234 && !vdso_match_version(vdso_info.versym[chain],
235 version, ver_hash))
236 continue;
237
238 return (void *)(vdso_info.load_offset + sym->st_value);
239 }
240
241 return 0;
242}
243
244void vdso_init_from_auxv(void *auxv)
245{
246 Elf64_auxv_t *elf_auxv = auxv;
247 for (int i = 0; elf_auxv[i].a_type != AT_NULL; i++)
248 {
249 if (elf_auxv[i].a_type == AT_SYSINFO_EHDR) {
250 vdso_init_from_sysinfo_ehdr(elf_auxv[i].a_un.a_val);
251 return;
252 }
253 }
254
255 vdso_info.valid = false;
256}