Denys Vlasenko | c25be94 | 2015-09-16 20:23:29 +0200 | [diff] [blame] | 1 | /* |
| 2 | * 32-bit syscall ABI conformance test. |
| 3 | * |
| 4 | * Copyright (c) 2015 Denys Vlasenko |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms and conditions of the GNU General Public License, |
| 8 | * version 2, as published by the Free Software Foundation. |
| 9 | * |
| 10 | * This program is distributed in the hope it will be useful, but |
| 11 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 13 | * General Public License for more details. |
| 14 | */ |
| 15 | /* |
| 16 | * Can be built statically: |
| 17 | * gcc -Os -Wall -static -m32 test_syscall_vdso.c thunks_32.S |
| 18 | */ |
| 19 | #undef _GNU_SOURCE |
| 20 | #define _GNU_SOURCE 1 |
| 21 | #undef __USE_GNU |
| 22 | #define __USE_GNU 1 |
| 23 | #include <unistd.h> |
| 24 | #include <stdlib.h> |
| 25 | #include <string.h> |
| 26 | #include <stdio.h> |
| 27 | #include <signal.h> |
| 28 | #include <sys/types.h> |
| 29 | #include <sys/select.h> |
| 30 | #include <sys/time.h> |
| 31 | #include <elf.h> |
| 32 | #include <sys/ptrace.h> |
| 33 | #include <sys/wait.h> |
| 34 | |
| 35 | #if !defined(__i386__) |
| 36 | int main(int argc, char **argv, char **envp) |
| 37 | { |
| 38 | printf("[SKIP]\tNot a 32-bit x86 userspace\n"); |
| 39 | return 0; |
| 40 | } |
| 41 | #else |
| 42 | |
| 43 | long syscall_addr; |
| 44 | long get_syscall(char **envp) |
| 45 | { |
| 46 | Elf32_auxv_t *auxv; |
| 47 | while (*envp++ != NULL) |
| 48 | continue; |
| 49 | for (auxv = (void *)envp; auxv->a_type != AT_NULL; auxv++) |
| 50 | if (auxv->a_type == AT_SYSINFO) |
| 51 | return auxv->a_un.a_val; |
| 52 | printf("[WARN]\tAT_SYSINFO not supplied\n"); |
| 53 | return 0; |
| 54 | } |
| 55 | |
| 56 | asm ( |
| 57 | " .pushsection .text\n" |
| 58 | " .global int80\n" |
| 59 | "int80:\n" |
| 60 | " int $0x80\n" |
| 61 | " ret\n" |
| 62 | " .popsection\n" |
| 63 | ); |
| 64 | extern char int80; |
| 65 | |
| 66 | struct regs64 { |
| 67 | uint64_t rax, rbx, rcx, rdx; |
| 68 | uint64_t rsi, rdi, rbp, rsp; |
| 69 | uint64_t r8, r9, r10, r11; |
| 70 | uint64_t r12, r13, r14, r15; |
| 71 | }; |
| 72 | struct regs64 regs64; |
| 73 | int kernel_is_64bit; |
| 74 | |
| 75 | asm ( |
| 76 | " .pushsection .text\n" |
| 77 | " .code64\n" |
| 78 | "get_regs64:\n" |
| 79 | " push %rax\n" |
| 80 | " mov $regs64, %eax\n" |
| 81 | " pop 0*8(%rax)\n" |
| 82 | " movq %rbx, 1*8(%rax)\n" |
| 83 | " movq %rcx, 2*8(%rax)\n" |
| 84 | " movq %rdx, 3*8(%rax)\n" |
| 85 | " movq %rsi, 4*8(%rax)\n" |
| 86 | " movq %rdi, 5*8(%rax)\n" |
| 87 | " movq %rbp, 6*8(%rax)\n" |
| 88 | " movq %rsp, 7*8(%rax)\n" |
| 89 | " movq %r8, 8*8(%rax)\n" |
| 90 | " movq %r9, 9*8(%rax)\n" |
| 91 | " movq %r10, 10*8(%rax)\n" |
| 92 | " movq %r11, 11*8(%rax)\n" |
| 93 | " movq %r12, 12*8(%rax)\n" |
| 94 | " movq %r13, 13*8(%rax)\n" |
| 95 | " movq %r14, 14*8(%rax)\n" |
| 96 | " movq %r15, 15*8(%rax)\n" |
| 97 | " ret\n" |
| 98 | "poison_regs64:\n" |
| 99 | " movq $0x7f7f7f7f, %r8\n" |
| 100 | " shl $32, %r8\n" |
| 101 | " orq $0x7f7f7f7f, %r8\n" |
| 102 | " movq %r8, %r9\n" |
Andy Lutomirski | 8bb2610 | 2018-04-17 07:36:36 -0700 | [diff] [blame] | 103 | " incq %r9\n" |
| 104 | " movq %r9, %r10\n" |
| 105 | " incq %r10\n" |
| 106 | " movq %r10, %r11\n" |
| 107 | " incq %r11\n" |
| 108 | " movq %r11, %r12\n" |
| 109 | " incq %r12\n" |
| 110 | " movq %r12, %r13\n" |
| 111 | " incq %r13\n" |
| 112 | " movq %r13, %r14\n" |
| 113 | " incq %r14\n" |
| 114 | " movq %r14, %r15\n" |
| 115 | " incq %r15\n" |
Denys Vlasenko | c25be94 | 2015-09-16 20:23:29 +0200 | [diff] [blame] | 116 | " ret\n" |
| 117 | " .code32\n" |
| 118 | " .popsection\n" |
| 119 | ); |
| 120 | extern void get_regs64(void); |
| 121 | extern void poison_regs64(void); |
| 122 | extern unsigned long call64_from_32(void (*function)(void)); |
| 123 | void print_regs64(void) |
| 124 | { |
| 125 | if (!kernel_is_64bit) |
| 126 | return; |
| 127 | printf("ax:%016llx bx:%016llx cx:%016llx dx:%016llx\n", regs64.rax, regs64.rbx, regs64.rcx, regs64.rdx); |
| 128 | printf("si:%016llx di:%016llx bp:%016llx sp:%016llx\n", regs64.rsi, regs64.rdi, regs64.rbp, regs64.rsp); |
| 129 | printf(" 8:%016llx 9:%016llx 10:%016llx 11:%016llx\n", regs64.r8 , regs64.r9 , regs64.r10, regs64.r11); |
| 130 | printf("12:%016llx 13:%016llx 14:%016llx 15:%016llx\n", regs64.r12, regs64.r13, regs64.r14, regs64.r15); |
| 131 | } |
| 132 | |
| 133 | int check_regs64(void) |
| 134 | { |
| 135 | int err = 0; |
| 136 | int num = 8; |
| 137 | uint64_t *r64 = ®s64.r8; |
Andy Lutomirski | 8bb2610 | 2018-04-17 07:36:36 -0700 | [diff] [blame] | 138 | uint64_t expected = 0x7f7f7f7f7f7f7f7fULL; |
Denys Vlasenko | c25be94 | 2015-09-16 20:23:29 +0200 | [diff] [blame] | 139 | |
| 140 | if (!kernel_is_64bit) |
| 141 | return 0; |
| 142 | |
| 143 | do { |
Andy Lutomirski | 8bb2610 | 2018-04-17 07:36:36 -0700 | [diff] [blame] | 144 | if (*r64 == expected++) |
Denys Vlasenko | c25be94 | 2015-09-16 20:23:29 +0200 | [diff] [blame] | 145 | continue; /* register did not change */ |
| 146 | if (syscall_addr != (long)&int80) { |
| 147 | /* |
| 148 | * Non-INT80 syscall entrypoints are allowed to clobber R8+ regs: |
| 149 | * either clear them to 0, or for R11, load EFLAGS. |
| 150 | */ |
| 151 | if (*r64 == 0) |
| 152 | continue; |
| 153 | if (num == 11) { |
| 154 | printf("[NOTE]\tR11 has changed:%016llx - assuming clobbered by SYSRET insn\n", *r64); |
| 155 | continue; |
| 156 | } |
| 157 | } else { |
Andy Lutomirski | 8bb2610 | 2018-04-17 07:36:36 -0700 | [diff] [blame] | 158 | /* |
| 159 | * INT80 syscall entrypoint can be used by |
Denys Vlasenko | c25be94 | 2015-09-16 20:23:29 +0200 | [diff] [blame] | 160 | * 64-bit programs too, unlike SYSCALL/SYSENTER. |
| 161 | * Therefore it must preserve R12+ |
| 162 | * (they are callee-saved registers in 64-bit C ABI). |
| 163 | * |
Andy Lutomirski | 8bb2610 | 2018-04-17 07:36:36 -0700 | [diff] [blame] | 164 | * Starting in Linux 4.17 (and any kernel that |
| 165 | * backports the change), R8..11 are preserved. |
| 166 | * Historically (and probably unintentionally), they |
| 167 | * were clobbered or zeroed. |
Denys Vlasenko | c25be94 | 2015-09-16 20:23:29 +0200 | [diff] [blame] | 168 | */ |
Denys Vlasenko | c25be94 | 2015-09-16 20:23:29 +0200 | [diff] [blame] | 169 | } |
| 170 | printf("[FAIL]\tR%d has changed:%016llx\n", num, *r64); |
| 171 | err++; |
| 172 | } while (r64++, ++num < 16); |
| 173 | |
| 174 | if (!err) |
| 175 | printf("[OK]\tR8..R15 did not leak kernel data\n"); |
| 176 | return err; |
| 177 | } |
| 178 | |
| 179 | int nfds; |
| 180 | fd_set rfds; |
| 181 | fd_set wfds; |
| 182 | fd_set efds; |
| 183 | struct timespec timeout; |
| 184 | sigset_t sigmask; |
| 185 | struct { |
| 186 | sigset_t *sp; |
| 187 | int sz; |
| 188 | } sigmask_desc; |
| 189 | |
| 190 | void prep_args() |
| 191 | { |
| 192 | nfds = 42; |
| 193 | FD_ZERO(&rfds); |
| 194 | FD_ZERO(&wfds); |
| 195 | FD_ZERO(&efds); |
| 196 | FD_SET(0, &rfds); |
| 197 | FD_SET(1, &wfds); |
| 198 | FD_SET(2, &efds); |
| 199 | timeout.tv_sec = 0; |
| 200 | timeout.tv_nsec = 123; |
| 201 | sigemptyset(&sigmask); |
| 202 | sigaddset(&sigmask, SIGINT); |
| 203 | sigaddset(&sigmask, SIGUSR2); |
| 204 | sigaddset(&sigmask, SIGRTMAX); |
| 205 | sigmask_desc.sp = &sigmask; |
| 206 | sigmask_desc.sz = 8; /* bytes */ |
| 207 | } |
| 208 | |
| 209 | static void print_flags(const char *name, unsigned long r) |
| 210 | { |
| 211 | static const char *bitarray[] = { |
| 212 | "\n" ,"c\n" ,/* Carry Flag */ |
| 213 | "0 " ,"1 " ,/* Bit 1 - always on */ |
| 214 | "" ,"p " ,/* Parity Flag */ |
| 215 | "0 " ,"3? " , |
| 216 | "" ,"a " ,/* Auxiliary carry Flag */ |
| 217 | "0 " ,"5? " , |
| 218 | "" ,"z " ,/* Zero Flag */ |
| 219 | "" ,"s " ,/* Sign Flag */ |
| 220 | "" ,"t " ,/* Trap Flag */ |
| 221 | "" ,"i " ,/* Interrupt Flag */ |
| 222 | "" ,"d " ,/* Direction Flag */ |
| 223 | "" ,"o " ,/* Overflow Flag */ |
| 224 | "0 " ,"1 " ,/* I/O Privilege Level (2 bits) */ |
| 225 | "0" ,"1" ,/* I/O Privilege Level (2 bits) */ |
| 226 | "" ,"n " ,/* Nested Task */ |
| 227 | "0 " ,"15? ", |
| 228 | "" ,"r " ,/* Resume Flag */ |
| 229 | "" ,"v " ,/* Virtual Mode */ |
| 230 | "" ,"ac " ,/* Alignment Check/Access Control */ |
| 231 | "" ,"vif ",/* Virtual Interrupt Flag */ |
| 232 | "" ,"vip ",/* Virtual Interrupt Pending */ |
| 233 | "" ,"id " ,/* CPUID detection */ |
| 234 | NULL |
| 235 | }; |
| 236 | const char **bitstr; |
| 237 | int bit; |
| 238 | |
| 239 | printf("%s=%016lx ", name, r); |
| 240 | bitstr = bitarray + 42; |
| 241 | bit = 21; |
| 242 | if ((r >> 22) != 0) |
| 243 | printf("(extra bits are set) "); |
| 244 | do { |
| 245 | if (bitstr[(r >> bit) & 1][0]) |
| 246 | fputs(bitstr[(r >> bit) & 1], stdout); |
| 247 | bitstr -= 2; |
| 248 | bit--; |
| 249 | } while (bit >= 0); |
| 250 | } |
| 251 | |
| 252 | int run_syscall(void) |
| 253 | { |
| 254 | long flags, bad_arg; |
| 255 | |
| 256 | prep_args(); |
| 257 | |
| 258 | if (kernel_is_64bit) |
| 259 | call64_from_32(poison_regs64); |
| 260 | /*print_regs64();*/ |
| 261 | |
| 262 | asm("\n" |
| 263 | /* Try 6-arg syscall: pselect. It should return quickly */ |
| 264 | " push %%ebp\n" |
| 265 | " mov $308, %%eax\n" /* PSELECT */ |
| 266 | " mov nfds, %%ebx\n" /* ebx arg1 */ |
| 267 | " mov $rfds, %%ecx\n" /* ecx arg2 */ |
| 268 | " mov $wfds, %%edx\n" /* edx arg3 */ |
| 269 | " mov $efds, %%esi\n" /* esi arg4 */ |
| 270 | " mov $timeout, %%edi\n" /* edi arg5 */ |
| 271 | " mov $sigmask_desc, %%ebp\n" /* %ebp arg6 */ |
| 272 | " push $0x200ed7\n" /* set almost all flags */ |
| 273 | " popf\n" /* except TF, IOPL, NT, RF, VM, AC, VIF, VIP */ |
| 274 | " call *syscall_addr\n" |
| 275 | /* Check that registers are not clobbered */ |
| 276 | " pushf\n" |
| 277 | " pop %%eax\n" |
| 278 | " cld\n" |
| 279 | " cmp nfds, %%ebx\n" /* ebx arg1 */ |
| 280 | " mov $1, %%ebx\n" |
| 281 | " jne 1f\n" |
| 282 | " cmp $rfds, %%ecx\n" /* ecx arg2 */ |
| 283 | " mov $2, %%ebx\n" |
| 284 | " jne 1f\n" |
| 285 | " cmp $wfds, %%edx\n" /* edx arg3 */ |
| 286 | " mov $3, %%ebx\n" |
| 287 | " jne 1f\n" |
| 288 | " cmp $efds, %%esi\n" /* esi arg4 */ |
| 289 | " mov $4, %%ebx\n" |
| 290 | " jne 1f\n" |
| 291 | " cmp $timeout, %%edi\n" /* edi arg5 */ |
| 292 | " mov $5, %%ebx\n" |
| 293 | " jne 1f\n" |
| 294 | " cmpl $sigmask_desc, %%ebp\n" /* %ebp arg6 */ |
| 295 | " mov $6, %%ebx\n" |
| 296 | " jne 1f\n" |
| 297 | " mov $0, %%ebx\n" |
| 298 | "1:\n" |
| 299 | " pop %%ebp\n" |
| 300 | : "=a" (flags), "=b" (bad_arg) |
| 301 | : |
| 302 | : "cx", "dx", "si", "di" |
| 303 | ); |
| 304 | |
| 305 | if (kernel_is_64bit) { |
| 306 | memset(®s64, 0x77, sizeof(regs64)); |
| 307 | call64_from_32(get_regs64); |
| 308 | /*print_regs64();*/ |
| 309 | } |
| 310 | |
| 311 | /* |
| 312 | * On paravirt kernels, flags are not preserved across syscalls. |
| 313 | * Thus, we do not consider it a bug if some are changed. |
| 314 | * We just show ones which do. |
| 315 | */ |
| 316 | if ((0x200ed7 ^ flags) != 0) { |
| 317 | print_flags("[WARN]\tFlags before", 0x200ed7); |
| 318 | print_flags("[WARN]\tFlags after", flags); |
| 319 | print_flags("[WARN]\tFlags change", (0x200ed7 ^ flags)); |
| 320 | } |
| 321 | |
| 322 | if (bad_arg) { |
| 323 | printf("[FAIL]\targ#%ld clobbered\n", bad_arg); |
| 324 | return 1; |
| 325 | } |
| 326 | printf("[OK]\tArguments are preserved across syscall\n"); |
| 327 | |
| 328 | return check_regs64(); |
| 329 | } |
| 330 | |
| 331 | int run_syscall_twice() |
| 332 | { |
| 333 | int exitcode = 0; |
| 334 | long sv; |
| 335 | |
| 336 | if (syscall_addr) { |
| 337 | printf("[RUN]\tExecuting 6-argument 32-bit syscall via VDSO\n"); |
| 338 | exitcode = run_syscall(); |
| 339 | } |
| 340 | sv = syscall_addr; |
| 341 | syscall_addr = (long)&int80; |
| 342 | printf("[RUN]\tExecuting 6-argument 32-bit syscall via INT 80\n"); |
| 343 | exitcode += run_syscall(); |
| 344 | syscall_addr = sv; |
| 345 | return exitcode; |
| 346 | } |
| 347 | |
| 348 | void ptrace_me() |
| 349 | { |
| 350 | pid_t pid; |
| 351 | |
| 352 | fflush(NULL); |
| 353 | pid = fork(); |
| 354 | if (pid < 0) |
| 355 | exit(1); |
| 356 | if (pid == 0) { |
| 357 | /* child */ |
| 358 | if (ptrace(PTRACE_TRACEME, 0L, 0L, 0L) != 0) |
| 359 | exit(0); |
| 360 | raise(SIGSTOP); |
| 361 | return; |
| 362 | } |
| 363 | /* parent */ |
| 364 | printf("[RUN]\tRunning tests under ptrace\n"); |
| 365 | while (1) { |
| 366 | int status; |
| 367 | pid = waitpid(-1, &status, __WALL); |
| 368 | if (WIFEXITED(status)) |
| 369 | exit(WEXITSTATUS(status)); |
| 370 | if (WIFSIGNALED(status)) |
| 371 | exit(WTERMSIG(status)); |
| 372 | if (pid <= 0 || !WIFSTOPPED(status)) /* paranoia */ |
| 373 | exit(255); |
| 374 | /* |
| 375 | * Note: we do not inject sig = WSTOPSIG(status). |
| 376 | * We probably should, but careful: do not inject SIGTRAP |
| 377 | * generated by syscall entry/exit stops. |
| 378 | * That kills the child. |
| 379 | */ |
| 380 | ptrace(PTRACE_SYSCALL, pid, 0L, 0L /*sig*/); |
| 381 | } |
| 382 | } |
| 383 | |
| 384 | int main(int argc, char **argv, char **envp) |
| 385 | { |
| 386 | int exitcode = 0; |
| 387 | int cs; |
| 388 | |
| 389 | asm("\n" |
| 390 | " movl %%cs, %%eax\n" |
| 391 | : "=a" (cs) |
| 392 | ); |
| 393 | kernel_is_64bit = (cs == 0x23); |
| 394 | if (!kernel_is_64bit) |
| 395 | printf("[NOTE]\tNot a 64-bit kernel, won't test R8..R15 leaks\n"); |
| 396 | |
| 397 | /* This only works for non-static builds: |
| 398 | * syscall_addr = dlsym(dlopen("linux-gate.so.1", RTLD_NOW), "__kernel_vsyscall"); |
| 399 | */ |
| 400 | syscall_addr = get_syscall(envp); |
| 401 | |
| 402 | exitcode += run_syscall_twice(); |
| 403 | ptrace_me(); |
| 404 | exitcode += run_syscall_twice(); |
| 405 | |
| 406 | return exitcode; |
| 407 | } |
| 408 | #endif |