| /* |
| * kvm trace |
| * |
| * It is designed to allow debugging traces of kvm to be generated |
| * on UP / SMP machines. Each trace entry can be timestamped so that |
| * it's possible to reconstruct a chronological record of trace events. |
| * The implementation refers to blktrace kernel support. |
| * |
| * Copyright (c) 2008 Intel Corporation |
| * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> |
| * |
| * Authors: Feng(Eric) Liu, eric.e.liu@intel.com |
| * |
| * Date: Feb 2008 |
| */ |
| |
| #include <linux/module.h> |
| #include <linux/relay.h> |
| #include <linux/debugfs.h> |
| |
| #include <linux/kvm_host.h> |
| |
| #define KVM_TRACE_STATE_RUNNING (1 << 0) |
| #define KVM_TRACE_STATE_PAUSE (1 << 1) |
| #define KVM_TRACE_STATE_CLEARUP (1 << 2) |
| |
| struct kvm_trace { |
| int trace_state; |
| struct rchan *rchan; |
| struct dentry *lost_file; |
| atomic_t lost_records; |
| }; |
| static struct kvm_trace *kvm_trace; |
| |
| struct kvm_trace_probe { |
| const char *name; |
| const char *format; |
| u32 cycle_in; |
| marker_probe_func *probe_func; |
| }; |
| |
| static inline int calc_rec_size(int cycle, int extra) |
| { |
| int rec_size = KVM_TRC_HEAD_SIZE; |
| |
| rec_size += extra; |
| return cycle ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; |
| } |
| |
| static void kvm_add_trace(void *probe_private, void *call_data, |
| const char *format, va_list *args) |
| { |
| struct kvm_trace_probe *p = probe_private; |
| struct kvm_trace *kt = kvm_trace; |
| struct kvm_trace_rec rec; |
| struct kvm_vcpu *vcpu; |
| int i, extra, size; |
| |
| if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) |
| return; |
| |
| rec.event = va_arg(*args, u32); |
| vcpu = va_arg(*args, struct kvm_vcpu *); |
| rec.pid = current->tgid; |
| rec.vcpu_id = vcpu->vcpu_id; |
| |
| extra = va_arg(*args, u32); |
| WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); |
| extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); |
| rec.extra_u32 = extra; |
| |
| rec.cycle_in = p->cycle_in; |
| |
| if (rec.cycle_in) { |
| u64 cycle = 0; |
| |
| cycle = get_cycles(); |
| rec.u.cycle.cycle_lo = (u32)cycle; |
| rec.u.cycle.cycle_hi = (u32)(cycle >> 32); |
| |
| for (i = 0; i < rec.extra_u32; i++) |
| rec.u.cycle.extra_u32[i] = va_arg(*args, u32); |
| } else { |
| for (i = 0; i < rec.extra_u32; i++) |
| rec.u.nocycle.extra_u32[i] = va_arg(*args, u32); |
| } |
| |
| size = calc_rec_size(rec.cycle_in, rec.extra_u32 * sizeof(u32)); |
| relay_write(kt->rchan, &rec, size); |
| } |
| |
| static struct kvm_trace_probe kvm_trace_probes[] = { |
| { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, |
| { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, |
| }; |
| |
| static int lost_records_get(void *data, u64 *val) |
| { |
| struct kvm_trace *kt = data; |
| |
| *val = atomic_read(&kt->lost_records); |
| return 0; |
| } |
| |
| DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); |
| |
| /* |
| * The relay channel is used in "no-overwrite" mode, it keeps trace of how |
| * many times we encountered a full subbuffer, to tell user space app the |
| * lost records there were. |
| */ |
| static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, |
| void *prev_subbuf, size_t prev_padding) |
| { |
| struct kvm_trace *kt; |
| |
| if (!relay_buf_full(buf)) |
| return 1; |
| |
| kt = buf->chan->private_data; |
| atomic_inc(&kt->lost_records); |
| |
| return 0; |
| } |
| |
| static struct dentry *kvm_create_buf_file_callack(const char *filename, |
| struct dentry *parent, |
| int mode, |
| struct rchan_buf *buf, |
| int *is_global) |
| { |
| return debugfs_create_file(filename, mode, parent, buf, |
| &relay_file_operations); |
| } |
| |
| static int kvm_remove_buf_file_callback(struct dentry *dentry) |
| { |
| debugfs_remove(dentry); |
| return 0; |
| } |
| |
| static struct rchan_callbacks kvm_relay_callbacks = { |
| .subbuf_start = kvm_subbuf_start_callback, |
| .create_buf_file = kvm_create_buf_file_callack, |
| .remove_buf_file = kvm_remove_buf_file_callback, |
| }; |
| |
| static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) |
| { |
| struct kvm_trace *kt; |
| int i, r = -ENOMEM; |
| |
| if (!kuts->buf_size || !kuts->buf_nr) |
| return -EINVAL; |
| |
| kt = kzalloc(sizeof(*kt), GFP_KERNEL); |
| if (!kt) |
| goto err; |
| |
| r = -EIO; |
| atomic_set(&kt->lost_records, 0); |
| kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, |
| kt, &kvm_trace_lost_ops); |
| if (!kt->lost_file) |
| goto err; |
| |
| kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, |
| kuts->buf_nr, &kvm_relay_callbacks, kt); |
| if (!kt->rchan) |
| goto err; |
| |
| kvm_trace = kt; |
| |
| for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { |
| struct kvm_trace_probe *p = &kvm_trace_probes[i]; |
| |
| r = marker_probe_register(p->name, p->format, p->probe_func, p); |
| if (r) |
| printk(KERN_INFO "Unable to register probe %s\n", |
| p->name); |
| } |
| |
| kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; |
| |
| return 0; |
| err: |
| if (kt) { |
| if (kt->lost_file) |
| debugfs_remove(kt->lost_file); |
| if (kt->rchan) |
| relay_close(kt->rchan); |
| kfree(kt); |
| } |
| return r; |
| } |
| |
| static int kvm_trace_enable(char __user *arg) |
| { |
| struct kvm_user_trace_setup kuts; |
| int ret; |
| |
| ret = copy_from_user(&kuts, arg, sizeof(kuts)); |
| if (ret) |
| return -EFAULT; |
| |
| ret = do_kvm_trace_enable(&kuts); |
| if (ret) |
| return ret; |
| |
| return 0; |
| } |
| |
| static int kvm_trace_pause(void) |
| { |
| struct kvm_trace *kt = kvm_trace; |
| int r = -EINVAL; |
| |
| if (kt == NULL) |
| return r; |
| |
| if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { |
| kt->trace_state = KVM_TRACE_STATE_PAUSE; |
| relay_flush(kt->rchan); |
| r = 0; |
| } |
| |
| return r; |
| } |
| |
| void kvm_trace_cleanup(void) |
| { |
| struct kvm_trace *kt = kvm_trace; |
| int i; |
| |
| if (kt == NULL) |
| return; |
| |
| if (kt->trace_state == KVM_TRACE_STATE_RUNNING || |
| kt->trace_state == KVM_TRACE_STATE_PAUSE) { |
| |
| kt->trace_state = KVM_TRACE_STATE_CLEARUP; |
| |
| for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { |
| struct kvm_trace_probe *p = &kvm_trace_probes[i]; |
| marker_probe_unregister(p->name, p->probe_func, p); |
| } |
| |
| relay_close(kt->rchan); |
| debugfs_remove(kt->lost_file); |
| kfree(kt); |
| } |
| } |
| |
| int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) |
| { |
| void __user *argp = (void __user *)arg; |
| long r = -EINVAL; |
| |
| if (!capable(CAP_SYS_ADMIN)) |
| return -EPERM; |
| |
| switch (ioctl) { |
| case KVM_TRACE_ENABLE: |
| r = kvm_trace_enable(argp); |
| break; |
| case KVM_TRACE_PAUSE: |
| r = kvm_trace_pause(); |
| break; |
| case KVM_TRACE_DISABLE: |
| r = 0; |
| kvm_trace_cleanup(); |
| break; |
| } |
| |
| return r; |
| } |