Mike Dodd | 8cfa702 | 2010-11-17 11:12:26 -0800 | [diff] [blame^] | 1 | /** |
| 2 | * @file op_syscalls.c |
| 3 | * Tracing of system calls |
| 4 | * |
| 5 | * @remark Copyright 2002 OProfile authors |
| 6 | * @remark Read the file COPYING |
| 7 | * |
| 8 | * @author John Levon |
| 9 | * @author Philippe Elie |
| 10 | */ |
| 11 | |
| 12 | #include <linux/sched.h> |
| 13 | #include <linux/unistd.h> |
| 14 | #include <linux/mman.h> |
| 15 | #include <linux/file.h> |
| 16 | |
| 17 | #include "oprofile.h" |
| 18 | |
| 19 | void oprof_put_note(struct op_note * samp); |
| 20 | void __oprof_put_note(struct op_note * samp); |
| 21 | |
| 22 | extern spinlock_t note_lock; |
| 23 | |
| 24 | /* ------------ system calls --------------- */ |
| 25 | |
| 26 | struct mmap_arg_struct { |
| 27 | unsigned long addr; |
| 28 | unsigned long len; |
| 29 | unsigned long prot; |
| 30 | unsigned long flags; |
| 31 | unsigned long fd; |
| 32 | unsigned long offset; |
| 33 | }; |
| 34 | |
| 35 | asmlinkage static int (*old_sys_fork)(struct pt_regs); |
| 36 | asmlinkage static int (*old_sys_vfork)(struct pt_regs); |
| 37 | asmlinkage static int (*old_sys_clone)(struct pt_regs); |
| 38 | asmlinkage static int (*old_sys_execve)(struct pt_regs); |
| 39 | asmlinkage static int (*old_old_mmap)(struct mmap_arg_struct *); |
| 40 | #ifdef HAVE_MMAP2 |
| 41 | asmlinkage static long (*old_sys_mmap2)(ulong, ulong, ulong, ulong, ulong, ulong); |
| 42 | #endif |
| 43 | asmlinkage static long (*old_sys_init_module)(char const *, struct module *); |
| 44 | asmlinkage static long (*old_sys_exit)(int); |
| 45 | |
| 46 | /* called with note_lock held */ |
| 47 | static void oprof_output_map(ulong addr, ulong len, ulong offset, |
| 48 | struct file * file, int is_execve) |
| 49 | { |
| 50 | struct op_note note; |
| 51 | |
| 52 | /* don't bother with /dev/zero mappings etc. */ |
| 53 | if (!len) |
| 54 | return; |
| 55 | |
| 56 | note.pid = current->pid; |
| 57 | note.tgid = op_get_tgid(); |
| 58 | note.addr = addr; |
| 59 | note.len = len; |
| 60 | note.offset = offset; |
| 61 | note.type = is_execve ? OP_EXEC : OP_MAP; |
| 62 | note.hash = hash_path(file); |
| 63 | if (note.hash == -1) |
| 64 | return; |
| 65 | /* holding note lock */ |
| 66 | __oprof_put_note(¬e); |
| 67 | } |
| 68 | |
| 69 | static int oprof_output_maps(struct task_struct * task) |
| 70 | { |
| 71 | int size=0; |
| 72 | struct mm_struct * mm; |
| 73 | struct vm_area_struct * map; |
| 74 | |
| 75 | /* we don't need to worry about mm_users here, since there is at |
| 76 | least one user (current), and if there's other code using this |
| 77 | mm, then mm_users must be at least 2; we should never have to |
| 78 | mmput() here. */ |
| 79 | |
| 80 | if (!(mm = task->mm)) |
| 81 | goto out; |
| 82 | |
| 83 | lock_mmap(mm); |
| 84 | spin_lock(¬e_lock); |
| 85 | |
| 86 | /* We need two pass, daemon assume than the first mmap notification |
| 87 | * is for the executable but some process doesn't follow this model. |
| 88 | */ |
| 89 | for (map = mm->mmap; map; map = map->vm_next) { |
| 90 | if (!(map->vm_flags & VM_EXEC) || !map->vm_file) |
| 91 | continue; |
| 92 | if (!(map->vm_flags & VM_EXECUTABLE)) |
| 93 | continue; |
| 94 | |
| 95 | oprof_output_map(map->vm_start, map->vm_end-map->vm_start, |
| 96 | GET_VM_OFFSET(map), map->vm_file, 1); |
| 97 | } |
| 98 | for (map = mm->mmap; map; map = map->vm_next) { |
| 99 | if (!(map->vm_flags & VM_EXEC) || !map->vm_file) |
| 100 | continue; |
| 101 | if (map->vm_flags & VM_EXECUTABLE) |
| 102 | continue; |
| 103 | |
| 104 | oprof_output_map(map->vm_start, map->vm_end-map->vm_start, |
| 105 | GET_VM_OFFSET(map), map->vm_file, 0); |
| 106 | } |
| 107 | |
| 108 | spin_unlock(¬e_lock); |
| 109 | unlock_mmap(mm); |
| 110 | |
| 111 | out: |
| 112 | return size; |
| 113 | } |
| 114 | |
| 115 | asmlinkage static int my_sys_execve(struct pt_regs regs) |
| 116 | { |
| 117 | char * filename; |
| 118 | int ret; |
| 119 | |
| 120 | MOD_INC_USE_COUNT; |
| 121 | |
| 122 | lock_execve(); |
| 123 | |
| 124 | filename = getname((char *)regs.ebx); |
| 125 | if (IS_ERR(filename)) { |
| 126 | ret = PTR_ERR(filename); |
| 127 | goto out; |
| 128 | } |
| 129 | ret = do_execve(filename, (char **)regs.ecx, (char **)regs.edx, ®s); |
| 130 | |
| 131 | if (!ret) { |
| 132 | PTRACE_OFF(current); |
| 133 | oprof_output_maps(current); |
| 134 | } |
| 135 | |
| 136 | putname(filename); |
| 137 | |
| 138 | out: |
| 139 | unlock_execve(); |
| 140 | MOD_DEC_USE_COUNT; |
| 141 | return ret; |
| 142 | } |
| 143 | |
| 144 | static void out_mmap(ulong addr, ulong len, ulong prot, ulong flags, ulong fd, |
| 145 | ulong offset) |
| 146 | { |
| 147 | struct file * file; |
| 148 | |
| 149 | lock_out_mmap(); |
| 150 | |
| 151 | file = fget(fd); |
| 152 | if (!file) |
| 153 | goto out; |
| 154 | |
| 155 | spin_lock(¬e_lock); |
| 156 | oprof_output_map(addr, len, offset, file, 0); |
| 157 | spin_unlock(¬e_lock); |
| 158 | |
| 159 | fput(file); |
| 160 | |
| 161 | out: |
| 162 | unlock_out_mmap(); |
| 163 | } |
| 164 | |
| 165 | #ifdef HAVE_MMAP2 |
| 166 | asmlinkage static int my_sys_mmap2(ulong addr, ulong len, |
| 167 | ulong prot, ulong flags, ulong fd, ulong pgoff) |
| 168 | { |
| 169 | int ret; |
| 170 | |
| 171 | MOD_INC_USE_COUNT; |
| 172 | |
| 173 | ret = old_sys_mmap2(addr, len, prot, flags, fd, pgoff); |
| 174 | |
| 175 | if ((prot & PROT_EXEC) && ret >= 0) |
| 176 | out_mmap(ret, len, prot, flags, fd, pgoff << PAGE_SHIFT); |
| 177 | |
| 178 | MOD_DEC_USE_COUNT; |
| 179 | return ret; |
| 180 | } |
| 181 | #endif |
| 182 | |
| 183 | asmlinkage static int my_old_mmap(struct mmap_arg_struct * arg) |
| 184 | { |
| 185 | int ret; |
| 186 | |
| 187 | MOD_INC_USE_COUNT; |
| 188 | |
| 189 | ret = old_old_mmap(arg); |
| 190 | |
| 191 | if (ret >= 0) { |
| 192 | struct mmap_arg_struct a; |
| 193 | |
| 194 | if (copy_from_user(&a, arg, sizeof(a))) { |
| 195 | ret = -EFAULT; |
| 196 | goto out; |
| 197 | } |
| 198 | |
| 199 | if (a.prot&PROT_EXEC) |
| 200 | out_mmap(ret, a.len, a.prot, a.flags, a.fd, a.offset); |
| 201 | } |
| 202 | |
| 203 | out: |
| 204 | MOD_DEC_USE_COUNT; |
| 205 | return ret; |
| 206 | } |
| 207 | |
| 208 | inline static void oprof_report_fork(u32 old_pid, u32 new_pid, u32 old_tgid, u32 new_tgid) |
| 209 | { |
| 210 | struct op_note note; |
| 211 | |
| 212 | note.type = OP_FORK; |
| 213 | note.pid = old_pid; |
| 214 | note.tgid = old_tgid; |
| 215 | note.addr = new_pid; |
| 216 | note.len = new_tgid; |
| 217 | oprof_put_note(¬e); |
| 218 | } |
| 219 | |
| 220 | asmlinkage static int my_sys_fork(struct pt_regs regs) |
| 221 | { |
| 222 | u32 pid = current->pid; |
| 223 | u32 tgid = op_get_tgid(); |
| 224 | int ret; |
| 225 | |
| 226 | MOD_INC_USE_COUNT; |
| 227 | |
| 228 | ret = old_sys_fork(regs); |
| 229 | if (ret) |
| 230 | oprof_report_fork(pid, ret, tgid, ret); |
| 231 | MOD_DEC_USE_COUNT; |
| 232 | return ret; |
| 233 | } |
| 234 | |
| 235 | asmlinkage static int my_sys_vfork(struct pt_regs regs) |
| 236 | { |
| 237 | u32 pid = current->pid; |
| 238 | u32 tgid = op_get_tgid(); |
| 239 | int ret; |
| 240 | |
| 241 | MOD_INC_USE_COUNT; |
| 242 | ret = old_sys_vfork(regs); |
| 243 | if (ret) |
| 244 | oprof_report_fork(pid, ret, tgid, ret); |
| 245 | MOD_DEC_USE_COUNT; |
| 246 | return ret; |
| 247 | } |
| 248 | |
| 249 | asmlinkage static int my_sys_clone(struct pt_regs regs) |
| 250 | { |
| 251 | u32 pid = current->pid; |
| 252 | u32 tgid = op_get_tgid(); |
| 253 | #if V_AT_LEAST(2, 4, 0) |
| 254 | u32 clone_flags = regs.ebx; |
| 255 | #endif |
| 256 | int ret; |
| 257 | |
| 258 | MOD_INC_USE_COUNT; |
| 259 | ret = old_sys_clone(regs); |
| 260 | if (ret) { |
| 261 | #if V_AT_LEAST(2, 4, 0) |
| 262 | if (clone_flags & CLONE_THREAD) |
| 263 | oprof_report_fork(pid, ret, tgid, tgid); |
| 264 | else |
| 265 | #endif |
| 266 | oprof_report_fork(pid, ret, tgid, ret); |
| 267 | } |
| 268 | MOD_DEC_USE_COUNT; |
| 269 | return ret; |
| 270 | } |
| 271 | |
| 272 | asmlinkage static long my_sys_init_module(char const * name_user, struct module * mod_user) |
| 273 | { |
| 274 | long ret; |
| 275 | |
| 276 | MOD_INC_USE_COUNT; |
| 277 | |
| 278 | ret = old_sys_init_module(name_user, mod_user); |
| 279 | |
| 280 | if (ret >= 0) { |
| 281 | struct op_note note; |
| 282 | |
| 283 | note.type = OP_DROP_MODULES; |
| 284 | oprof_put_note(¬e); |
| 285 | } |
| 286 | MOD_DEC_USE_COUNT; |
| 287 | return ret; |
| 288 | } |
| 289 | |
| 290 | /* used from do_nmi */ |
| 291 | asmlinkage long my_sys_exit(int error_code) |
| 292 | { |
| 293 | struct op_note note; |
| 294 | |
| 295 | MOD_INC_USE_COUNT; |
| 296 | |
| 297 | note.type = OP_EXIT; |
| 298 | note.pid = current->pid; |
| 299 | note.tgid = op_get_tgid(); |
| 300 | oprof_put_note(¬e); |
| 301 | |
| 302 | /* this looks UP-dangerous, as the exit sleeps and we don't |
| 303 | * have a use count, but in fact its ok as sys_exit is noreturn, |
| 304 | * so we can never come back to this non-existent exec page |
| 305 | */ |
| 306 | MOD_DEC_USE_COUNT; |
| 307 | return old_sys_exit(error_code); |
| 308 | } |
| 309 | |
| 310 | extern void * sys_call_table[]; |
| 311 | |
| 312 | void op_save_syscalls(void) |
| 313 | { |
| 314 | old_sys_fork = sys_call_table[__NR_fork]; |
| 315 | old_sys_vfork = sys_call_table[__NR_vfork]; |
| 316 | old_sys_clone = sys_call_table[__NR_clone]; |
| 317 | old_sys_execve = sys_call_table[__NR_execve]; |
| 318 | old_old_mmap = sys_call_table[__NR_mmap]; |
| 319 | #ifdef HAVE_MMAP2 |
| 320 | old_sys_mmap2 = sys_call_table[__NR_mmap2]; |
| 321 | #endif |
| 322 | old_sys_init_module = sys_call_table[__NR_init_module]; |
| 323 | old_sys_exit = sys_call_table[__NR_exit]; |
| 324 | } |
| 325 | |
| 326 | void op_intercept_syscalls(void) |
| 327 | { |
| 328 | sys_call_table[__NR_fork] = my_sys_fork; |
| 329 | sys_call_table[__NR_vfork] = my_sys_vfork; |
| 330 | sys_call_table[__NR_clone] = my_sys_clone; |
| 331 | sys_call_table[__NR_execve] = my_sys_execve; |
| 332 | sys_call_table[__NR_mmap] = my_old_mmap; |
| 333 | #ifdef HAVE_MMAP2 |
| 334 | sys_call_table[__NR_mmap2] = my_sys_mmap2; |
| 335 | #endif |
| 336 | sys_call_table[__NR_init_module] = my_sys_init_module; |
| 337 | sys_call_table[__NR_exit] = my_sys_exit; |
| 338 | } |
| 339 | |
| 340 | void op_restore_syscalls(void) |
| 341 | { |
| 342 | sys_call_table[__NR_fork] = old_sys_fork; |
| 343 | sys_call_table[__NR_vfork] = old_sys_vfork; |
| 344 | sys_call_table[__NR_clone] = old_sys_clone; |
| 345 | sys_call_table[__NR_execve] = old_sys_execve; |
| 346 | sys_call_table[__NR_mmap] = old_old_mmap; |
| 347 | #ifdef HAVE_MMAP2 |
| 348 | sys_call_table[__NR_mmap2] = old_sys_mmap2; |
| 349 | #endif |
| 350 | sys_call_table[__NR_init_module] = old_sys_init_module; |
| 351 | sys_call_table[__NR_exit] = old_sys_exit; |
| 352 | } |