Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 2 | * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3 | * Licensed under the GPL |
| 4 | */ |
| 5 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 6 | #include <stdio.h> |
| 7 | #include <stdlib.h> |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 8 | #include <unistd.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 | #include <errno.h> |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 10 | #include <signal.h> |
| 11 | #include <string.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 12 | #include <sys/resource.h> |
Jeff Dike | 4ff83ce | 2007-05-06 14:51:08 -0700 | [diff] [blame] | 13 | #include "as-layout.h" |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 14 | #include "init.h" |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 15 | #include "kern_constants.h" |
| 16 | #include "kern_util.h" |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 | #include "os.h" |
Paolo 'Blaisorblade' Giarrusso | c13e569 | 2006-10-19 23:28:20 -0700 | [diff] [blame] | 18 | #include "um_malloc.h" |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 19 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 20 | #define PGD_BOUND (4 * 1024 * 1024) |
| 21 | #define STACKSIZE (8 * 1024 * 1024) |
| 22 | #define THREAD_NAME_LEN (256) |
| 23 | |
| 24 | static void set_stklim(void) |
| 25 | { |
| 26 | struct rlimit lim; |
| 27 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 28 | if (getrlimit(RLIMIT_STACK, &lim) < 0) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 29 | perror("getrlimit"); |
| 30 | exit(1); |
| 31 | } |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 32 | if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | lim.rlim_cur = STACKSIZE; |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 34 | if (setrlimit(RLIMIT_STACK, &lim) < 0) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 35 | perror("setrlimit"); |
| 36 | exit(1); |
| 37 | } |
| 38 | } |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 39 | } |
| 40 | |
| 41 | static __init void do_uml_initcalls(void) |
| 42 | { |
| 43 | initcall_t *call; |
| 44 | |
| 45 | call = &__uml_initcall_start; |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 46 | while (call < &__uml_initcall_end) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 47 | (*call)(); |
| 48 | call++; |
| 49 | } |
| 50 | } |
| 51 | |
| 52 | static void last_ditch_exit(int sig) |
| 53 | { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 54 | uml_cleanup(); |
| 55 | exit(1); |
| 56 | } |
| 57 | |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 58 | static void install_fatal_handler(int sig) |
| 59 | { |
| 60 | struct sigaction action; |
| 61 | |
| 62 | /* All signals are enabled in this handler ... */ |
| 63 | sigemptyset(&action.sa_mask); |
| 64 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 65 | /* |
| 66 | * ... including the signal being handled, plus we want the |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 67 | * handler reset to the default behavior, so that if an exit |
| 68 | * handler is hanging for some reason, the UML will just die |
| 69 | * after this signal is sent a second time. |
| 70 | */ |
| 71 | action.sa_flags = SA_RESETHAND | SA_NODEFER; |
| 72 | action.sa_restorer = NULL; |
| 73 | action.sa_handler = last_ditch_exit; |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 74 | if (sigaction(sig, &action, NULL) < 0) { |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 75 | printf("failed to install handler for signal %d - errno = %d\n", |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 76 | sig, errno); |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 77 | exit(1); |
| 78 | } |
| 79 | } |
| 80 | |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 81 | #define UML_LIB_PATH ":/usr/lib/uml" |
| 82 | |
| 83 | static void setup_env_path(void) |
| 84 | { |
| 85 | char *new_path = NULL; |
| 86 | char *old_path = NULL; |
| 87 | int path_len = 0; |
| 88 | |
| 89 | old_path = getenv("PATH"); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 90 | /* |
| 91 | * if no PATH variable is set or it has an empty value |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 92 | * just use the default + /usr/lib/uml |
| 93 | */ |
| 94 | if (!old_path || (path_len = strlen(old_path)) == 0) { |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 95 | if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) |
| 96 | perror("couldn't putenv"); |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 97 | return; |
| 98 | } |
| 99 | |
| 100 | /* append /usr/lib/uml to the existing path */ |
| 101 | path_len += strlen("PATH=" UML_LIB_PATH) + 1; |
| 102 | new_path = malloc(path_len); |
| 103 | if (!new_path) { |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 104 | perror("couldn't malloc to set a new PATH"); |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 105 | return; |
| 106 | } |
| 107 | snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); |
WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 108 | if (putenv(new_path)) { |
| 109 | perror("couldn't putenv to set a new PATH"); |
| 110 | free(new_path); |
| 111 | } |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 112 | } |
| 113 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 114 | extern void scan_elf_aux( char **envp); |
| 115 | |
Jeff Dike | 36e4546 | 2007-05-06 14:51:11 -0700 | [diff] [blame] | 116 | int __init main(int argc, char **argv, char **envp) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 117 | { |
| 118 | char **new_argv; |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 119 | int ret, i, err; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 120 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 121 | set_stklim(); |
| 122 | |
Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 123 | setup_env_path(); |
| 124 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 125 | new_argv = malloc((argc + 1) * sizeof(char *)); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 126 | if (new_argv == NULL) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 127 | perror("Mallocing argv"); |
| 128 | exit(1); |
| 129 | } |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 130 | for (i = 0; i < argc; i++) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 131 | new_argv[i] = strdup(argv[i]); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 132 | if (new_argv[i] == NULL) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 133 | perror("Mallocing an arg"); |
| 134 | exit(1); |
| 135 | } |
| 136 | } |
| 137 | new_argv[argc] = NULL; |
| 138 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 139 | /* |
| 140 | * Allow these signals to bring down a UML if all other |
Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 141 | * methods of control fail. |
| 142 | */ |
| 143 | install_fatal_handler(SIGINT); |
| 144 | install_fatal_handler(SIGTERM); |
| 145 | install_fatal_handler(SIGHUP); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 146 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 147 | scan_elf_aux(envp); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 148 | |
| 149 | do_uml_initcalls(); |
| 150 | ret = linux_main(argc, argv); |
| 151 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 152 | /* |
| 153 | * Disable SIGPROF - I have no idea why libc doesn't do this or turn |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 154 | * off the profiling time, but UML dies with a SIGPROF just before |
| 155 | * exiting when profiling is active. |
| 156 | */ |
| 157 | change_sig(SIGPROF, 0); |
| 158 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 159 | /* |
| 160 | * This signal stuff used to be in the reboot case. However, |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 161 | * sometimes a SIGVTALRM can come in when we're halting (reproducably |
| 162 | * when writing out gcov information, presumably because that takes |
| 163 | * some time) and cause a segfault. |
| 164 | */ |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 165 | |
Jeff Dike | 61b63c5 | 2007-10-16 01:27:27 -0700 | [diff] [blame] | 166 | /* stop timers and set SIGVTALRM to be ignored */ |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 167 | disable_timer(); |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 168 | |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 169 | /* disable SIGIO for the fds and set SIGIO to be ignored */ |
| 170 | err = deactivate_all_fds(); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 171 | if (err) |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 172 | printf("deactivate_all_fds failed, errno = %d\n", -err); |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 173 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 174 | /* |
| 175 | * Let any pending signals fire now. This ensures |
Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 176 | * that they won't be delivered after the exec, when |
| 177 | * they are definitely not expected. |
| 178 | */ |
| 179 | unblock_signals(); |
Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 180 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 181 | /* Reboot */ |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 182 | if (ret) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | printf("\n"); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 184 | execvp(new_argv[0], new_argv); |
| 185 | perror("Failed to exec kernel"); |
| 186 | ret = 1; |
| 187 | } |
| 188 | printf("\n"); |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 189 | return uml_exitcode; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 190 | } |
| 191 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 192 | extern void *__real_malloc(int); |
| 193 | |
| 194 | void *__wrap_malloc(int size) |
| 195 | { |
| 196 | void *ret; |
| 197 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 198 | if (!kmalloc_ok) |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 199 | return __real_malloc(size); |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 200 | else if (size <= UM_KERN_PAGE_SIZE) |
Jeff Dike | c539ab7 | 2007-06-16 10:16:09 -0700 | [diff] [blame] | 201 | /* finding contiguous pages can be hard*/ |
Jeff Dike | 43f5b30 | 2008-05-12 14:01:52 -0700 | [diff] [blame] | 202 | ret = uml_kmalloc(size, UM_GFP_KERNEL); |
Jeff Dike | e4c4bf9 | 2007-07-15 23:38:56 -0700 | [diff] [blame] | 203 | else ret = vmalloc(size); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 204 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 205 | /* |
| 206 | * glibc people insist that if malloc fails, errno should be |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 207 | * set by malloc as well. So we do. |
| 208 | */ |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 209 | if (ret == NULL) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 210 | errno = ENOMEM; |
| 211 | |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 212 | return ret; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 213 | } |
| 214 | |
| 215 | void *__wrap_calloc(int n, int size) |
| 216 | { |
| 217 | void *ptr = __wrap_malloc(n * size); |
| 218 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 219 | if (ptr == NULL) |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 220 | return NULL; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | memset(ptr, 0, n * size); |
Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 222 | return ptr; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | } |
| 224 | |
| 225 | extern void __real_free(void *); |
| 226 | |
| 227 | extern unsigned long high_physmem; |
| 228 | |
| 229 | void __wrap_free(void *ptr) |
| 230 | { |
| 231 | unsigned long addr = (unsigned long) ptr; |
| 232 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 233 | /* |
| 234 | * We need to know how the allocation happened, so it can be correctly |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 235 | * freed. This is done by seeing what region of memory the pointer is |
| 236 | * in - |
| 237 | * physical memory - kmalloc/kfree |
| 238 | * kernel virtual memory - vmalloc/vfree |
| 239 | * anywhere else - malloc/free |
| 240 | * If kmalloc is not yet possible, then either high_physmem and/or |
| 241 | * end_vm are still 0 (as at startup), in which case we call free, or |
| 242 | * we have set them, but anyway addr has not been allocated from those |
| 243 | * areas. So, in both cases __real_free is called. |
| 244 | * |
| 245 | * CAN_KMALLOC is checked because it would be bad to free a buffer |
| 246 | * with kmalloc/vmalloc after they have been turned off during |
| 247 | * shutdown. |
| 248 | * XXX: However, we sometimes shutdown CAN_KMALLOC temporarily, so |
| 249 | * there is a possibility for memory leaks. |
| 250 | */ |
| 251 | |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 252 | if ((addr >= uml_physmem) && (addr < high_physmem)) { |
| 253 | if (kmalloc_ok) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 254 | kfree(ptr); |
| 255 | } |
Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 256 | else if ((addr >= start_vm) && (addr < end_vm)) { |
| 257 | if (kmalloc_ok) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 258 | vfree(ptr); |
| 259 | } |
| 260 | else __real_free(ptr); |
| 261 | } |