Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 2 | * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3 | * Licensed under the GPL |
| 4 | */ |
| 5 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 6 | #include <stdio.h> |
| 7 | #include <stdlib.h> |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 8 | #include <unistd.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 | #include <errno.h> |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 10 | #include <signal.h> |
| 11 | #include <string.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 12 | #include <sys/resource.h> |
Jeff Dike | 4ff83ce | 2007-05-06 14:51:08 -0700 | [diff] [blame] | 13 | #include "as-layout.h" |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 14 | #include "init.h" |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 15 | #include "kern_util.h" |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 16 | #include "os.h" |
Paolo 'Blaisorblade' Giarrusso | c13e569 | 2006-10-19 23:28:20 -0700 | [diff] [blame] | 17 | #include "um_malloc.h" |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 18 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 19 | #define PGD_BOUND (4 * 1024 * 1024) |
| 20 | #define STACKSIZE (8 * 1024 * 1024) |
| 21 | #define THREAD_NAME_LEN (256) |
| 22 | |
Richard Weinberger | b743ac5 | 2011-07-25 17:12:52 -0700 | [diff] [blame] | 23 | long elf_aux_hwcap; |
| 24 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 25 | static void set_stklim(void) |
| 26 | { |
| 27 | struct rlimit lim; |
| 28 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 29 | if (getrlimit(RLIMIT_STACK, &lim) < 0) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 30 | perror("getrlimit"); |
| 31 | exit(1); |
| 32 | } |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 33 | if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 34 | lim.rlim_cur = STACKSIZE; |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 35 | if (setrlimit(RLIMIT_STACK, &lim) < 0) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 36 | perror("setrlimit"); |
| 37 | exit(1); |
| 38 | } |
| 39 | } |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 40 | } |
| 41 | |
| 42 | static __init void do_uml_initcalls(void) |
| 43 | { |
| 44 | initcall_t *call; |
| 45 | |
| 46 | call = &__uml_initcall_start; |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 47 | while (call < &__uml_initcall_end) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 48 | (*call)(); |
| 49 | call++; |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | static void last_ditch_exit(int sig) |
| 54 | { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 55 | uml_cleanup(); |
| 56 | exit(1); |
| 57 | } |
| 58 | |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 59 | static void install_fatal_handler(int sig) |
| 60 | { |
| 61 | struct sigaction action; |
| 62 | |
| 63 | /* All signals are enabled in this handler ... */ |
| 64 | sigemptyset(&action.sa_mask); |
| 65 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 66 | /* |
| 67 | * ... including the signal being handled, plus we want the |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 68 | * handler reset to the default behavior, so that if an exit |
| 69 | * handler is hanging for some reason, the UML will just die |
| 70 | * after this signal is sent a second time. |
| 71 | */ |
| 72 | action.sa_flags = SA_RESETHAND | SA_NODEFER; |
| 73 | action.sa_restorer = NULL; |
| 74 | action.sa_handler = last_ditch_exit; |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 75 | if (sigaction(sig, &action, NULL) < 0) { |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 76 | printf("failed to install handler for signal %d - errno = %d\n", |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 77 | sig, errno); |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 78 | exit(1); |
| 79 | } |
| 80 | } |
| 81 | |
Richard Weinberger | 0ce451a | 2011-05-24 17:13:00 -0700 | [diff] [blame] | 82 | #define UML_LIB_PATH ":" OS_LIB_PATH "/uml" |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 83 | |
| 84 | static void setup_env_path(void) |
| 85 | { |
| 86 | char *new_path = NULL; |
| 87 | char *old_path = NULL; |
| 88 | int path_len = 0; |
| 89 | |
| 90 | old_path = getenv("PATH"); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 91 | /* |
| 92 | * if no PATH variable is set or it has an empty value |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 93 | * just use the default + /usr/lib/uml |
| 94 | */ |
| 95 | if (!old_path || (path_len = strlen(old_path)) == 0) { |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 96 | if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) |
| 97 | perror("couldn't putenv"); |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 98 | return; |
| 99 | } |
| 100 | |
| 101 | /* append /usr/lib/uml to the existing path */ |
| 102 | path_len += strlen("PATH=" UML_LIB_PATH) + 1; |
| 103 | new_path = malloc(path_len); |
| 104 | if (!new_path) { |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 105 | perror("couldn't malloc to set a new PATH"); |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 106 | return; |
| 107 | } |
| 108 | snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 109 | if (putenv(new_path)) { |
| 110 | perror("couldn't putenv to set a new PATH"); |
| 111 | free(new_path); |
| 112 | } |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 113 | } |
| 114 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 115 | extern void scan_elf_aux( char **envp); |
| 116 | |
Jeff Dike | 36e4546 | 2007-05-06 14:51:11 -0700 | [diff] [blame] | 117 | int __init main(int argc, char **argv, char **envp) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 118 | { |
| 119 | char **new_argv; |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 120 | int ret, i, err; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 121 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 122 | set_stklim(); |
| 123 | |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 124 | setup_env_path(); |
| 125 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 126 | new_argv = malloc((argc + 1) * sizeof(char *)); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 127 | if (new_argv == NULL) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 128 | perror("Mallocing argv"); |
| 129 | exit(1); |
| 130 | } |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 131 | for (i = 0; i < argc; i++) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 132 | new_argv[i] = strdup(argv[i]); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 133 | if (new_argv[i] == NULL) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 134 | perror("Mallocing an arg"); |
| 135 | exit(1); |
| 136 | } |
| 137 | } |
| 138 | new_argv[argc] = NULL; |
| 139 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 140 | /* |
| 141 | * Allow these signals to bring down a UML if all other |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 142 | * methods of control fail. |
| 143 | */ |
| 144 | install_fatal_handler(SIGINT); |
| 145 | install_fatal_handler(SIGTERM); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 146 | |
Richard Weinberger | b743ac5 | 2011-07-25 17:12:52 -0700 | [diff] [blame] | 147 | #ifdef CONFIG_ARCH_REUSE_HOST_VSYSCALL_AREA |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 148 | scan_elf_aux(envp); |
Richard Weinberger | b743ac5 | 2011-07-25 17:12:52 -0700 | [diff] [blame] | 149 | #endif |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 150 | |
| 151 | do_uml_initcalls(); |
| 152 | ret = linux_main(argc, argv); |
| 153 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 154 | /* |
| 155 | * Disable SIGPROF - I have no idea why libc doesn't do this or turn |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 156 | * off the profiling time, but UML dies with a SIGPROF just before |
| 157 | * exiting when profiling is active. |
| 158 | */ |
| 159 | change_sig(SIGPROF, 0); |
| 160 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 161 | /* |
| 162 | * This signal stuff used to be in the reboot case. However, |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 163 | * sometimes a SIGVTALRM can come in when we're halting (reproducably |
| 164 | * when writing out gcov information, presumably because that takes |
| 165 | * some time) and cause a segfault. |
| 166 | */ |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 167 | |
Jeff Dike | 61b63c5 | 2007-10-16 01:27:27 -0700 | [diff] [blame] | 168 | /* stop timers and set SIGVTALRM to be ignored */ |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 169 | disable_timer(); |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 170 | |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 171 | /* disable SIGIO for the fds and set SIGIO to be ignored */ |
| 172 | err = deactivate_all_fds(); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 173 | if (err) |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 174 | printf("deactivate_all_fds failed, errno = %d\n", -err); |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 175 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 176 | /* |
| 177 | * Let any pending signals fire now. This ensures |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 178 | * that they won't be delivered after the exec, when |
| 179 | * they are definitely not expected. |
| 180 | */ |
| 181 | unblock_signals(); |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 182 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | /* Reboot */ |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 184 | if (ret) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 185 | printf("\n"); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 186 | execvp(new_argv[0], new_argv); |
| 187 | perror("Failed to exec kernel"); |
| 188 | ret = 1; |
| 189 | } |
| 190 | printf("\n"); |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 191 | return uml_exitcode; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 192 | } |
| 193 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 194 | extern void *__real_malloc(int); |
| 195 | |
| 196 | void *__wrap_malloc(int size) |
| 197 | { |
| 198 | void *ret; |
| 199 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 200 | if (!kmalloc_ok) |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 201 | return __real_malloc(size); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 202 | else if (size <= UM_KERN_PAGE_SIZE) |
Jeff Dike | c539ab7 | 2007-06-16 10:16:09 -0700 | [diff] [blame] | 203 | /* finding contiguous pages can be hard*/ |
Jeff Dike | 43f5b30 | 2008-05-12 14:01:52 -0700 | [diff] [blame] | 204 | ret = uml_kmalloc(size, UM_GFP_KERNEL); |
Jeff Dike | e4c4bf9 | 2007-07-15 23:38:56 -0700 | [diff] [blame] | 205 | else ret = vmalloc(size); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 206 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 207 | /* |
| 208 | * glibc people insist that if malloc fails, errno should be |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 209 | * set by malloc as well. So we do. |
| 210 | */ |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 211 | if (ret == NULL) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 212 | errno = ENOMEM; |
| 213 | |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 214 | return ret; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 215 | } |
| 216 | |
| 217 | void *__wrap_calloc(int n, int size) |
| 218 | { |
| 219 | void *ptr = __wrap_malloc(n * size); |
| 220 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 221 | if (ptr == NULL) |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 222 | return NULL; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | memset(ptr, 0, n * size); |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 224 | return ptr; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 225 | } |
| 226 | |
| 227 | extern void __real_free(void *); |
| 228 | |
| 229 | extern unsigned long high_physmem; |
| 230 | |
| 231 | void __wrap_free(void *ptr) |
| 232 | { |
| 233 | unsigned long addr = (unsigned long) ptr; |
| 234 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 235 | /* |
| 236 | * We need to know how the allocation happened, so it can be correctly |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 237 | * freed. This is done by seeing what region of memory the pointer is |
| 238 | * in - |
| 239 | * physical memory - kmalloc/kfree |
| 240 | * kernel virtual memory - vmalloc/vfree |
| 241 | * anywhere else - malloc/free |
| 242 | * If kmalloc is not yet possible, then either high_physmem and/or |
| 243 | * end_vm are still 0 (as at startup), in which case we call free, or |
| 244 | * we have set them, but anyway addr has not been allocated from those |
| 245 | * areas. So, in both cases __real_free is called. |
| 246 | * |
| 247 | * CAN_KMALLOC is checked because it would be bad to free a buffer |
| 248 | * with kmalloc/vmalloc after they have been turned off during |
| 249 | * shutdown. |
| 250 | * XXX: However, we sometimes shutdown CAN_KMALLOC temporarily, so |
| 251 | * there is a possibility for memory leaks. |
| 252 | */ |
| 253 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 254 | if ((addr >= uml_physmem) && (addr < high_physmem)) { |
| 255 | if (kmalloc_ok) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 256 | kfree(ptr); |
| 257 | } |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 258 | else if ((addr >= start_vm) && (addr < end_vm)) { |
| 259 | if (kmalloc_ok) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 260 | vfree(ptr); |
| 261 | } |
| 262 | else __real_free(ptr); |
| 263 | } |