Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 1 | /* |
| 2 | * kvm trace |
| 3 | * |
| 4 | * It is designed to allow debugging traces of kvm to be generated |
| 5 | * on UP / SMP machines. Each trace entry can be timestamped so that |
| 6 | * it's possible to reconstruct a chronological record of trace events. |
| 7 | * The implementation refers to blktrace kernel support. |
| 8 | * |
| 9 | * Copyright (c) 2008 Intel Corporation |
| 10 | * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk> |
| 11 | * |
| 12 | * Authors: Feng(Eric) Liu, eric.e.liu@intel.com |
| 13 | * |
| 14 | * Date: Feb 2008 |
| 15 | */ |
| 16 | |
| 17 | #include <linux/module.h> |
| 18 | #include <linux/relay.h> |
| 19 | #include <linux/debugfs.h> |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 20 | #include <linux/ktime.h> |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 21 | |
| 22 | #include <linux/kvm_host.h> |
| 23 | |
| 24 | #define KVM_TRACE_STATE_RUNNING (1 << 0) |
| 25 | #define KVM_TRACE_STATE_PAUSE (1 << 1) |
| 26 | #define KVM_TRACE_STATE_CLEARUP (1 << 2) |
| 27 | |
| 28 | struct kvm_trace { |
| 29 | int trace_state; |
| 30 | struct rchan *rchan; |
| 31 | struct dentry *lost_file; |
| 32 | atomic_t lost_records; |
| 33 | }; |
| 34 | static struct kvm_trace *kvm_trace; |
| 35 | |
| 36 | struct kvm_trace_probe { |
| 37 | const char *name; |
| 38 | const char *format; |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 39 | u32 timestamp_in; |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 40 | marker_probe_func *probe_func; |
| 41 | }; |
| 42 | |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 43 | static inline int calc_rec_size(int timestamp, int extra) |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 44 | { |
| 45 | int rec_size = KVM_TRC_HEAD_SIZE; |
| 46 | |
| 47 | rec_size += extra; |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 48 | return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size; |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 49 | } |
| 50 | |
| 51 | static void kvm_add_trace(void *probe_private, void *call_data, |
| 52 | const char *format, va_list *args) |
| 53 | { |
| 54 | struct kvm_trace_probe *p = probe_private; |
| 55 | struct kvm_trace *kt = kvm_trace; |
| 56 | struct kvm_trace_rec rec; |
| 57 | struct kvm_vcpu *vcpu; |
Christian Ehrhardt | e32c8f2 | 2008-07-14 14:00:00 +0200 | [diff] [blame] | 58 | int i, size; |
| 59 | u32 extra; |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 60 | |
| 61 | if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING)) |
| 62 | return; |
| 63 | |
Christian Ehrhardt | e32c8f2 | 2008-07-14 14:00:00 +0200 | [diff] [blame] | 64 | rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32)); |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 65 | vcpu = va_arg(*args, struct kvm_vcpu *); |
| 66 | rec.pid = current->tgid; |
| 67 | rec.vcpu_id = vcpu->vcpu_id; |
| 68 | |
| 69 | extra = va_arg(*args, u32); |
| 70 | WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX)); |
| 71 | extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX); |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 72 | |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 73 | rec.rec_val |= TRACE_REC_TCS(p->timestamp_in) |
Christian Ehrhardt | e32c8f2 | 2008-07-14 14:00:00 +0200 | [diff] [blame] | 74 | | TRACE_REC_NUM_DATA_ARGS(extra); |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 75 | |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 76 | if (p->timestamp_in) { |
| 77 | rec.u.timestamp.timestamp = ktime_to_ns(ktime_get()); |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 78 | |
Christian Ehrhardt | e32c8f2 | 2008-07-14 14:00:00 +0200 | [diff] [blame] | 79 | for (i = 0; i < extra; i++) |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 80 | rec.u.timestamp.extra_u32[i] = va_arg(*args, u32); |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 81 | } else { |
Christian Ehrhardt | e32c8f2 | 2008-07-14 14:00:00 +0200 | [diff] [blame] | 82 | for (i = 0; i < extra; i++) |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 83 | rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32); |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 84 | } |
| 85 | |
Christian Ehrhardt | 3f7f95c | 2008-07-14 14:00:01 +0200 | [diff] [blame] | 86 | size = calc_rec_size(p->timestamp_in, extra * sizeof(u32)); |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 87 | relay_write(kt->rchan, &rec, size); |
| 88 | } |
| 89 | |
| 90 | static struct kvm_trace_probe kvm_trace_probes[] = { |
| 91 | { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace }, |
| 92 | { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace }, |
| 93 | }; |
| 94 | |
| 95 | static int lost_records_get(void *data, u64 *val) |
| 96 | { |
| 97 | struct kvm_trace *kt = data; |
| 98 | |
| 99 | *val = atomic_read(&kt->lost_records); |
| 100 | return 0; |
| 101 | } |
| 102 | |
| 103 | DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n"); |
| 104 | |
| 105 | /* |
| 106 | * The relay channel is used in "no-overwrite" mode, it keeps trace of how |
| 107 | * many times we encountered a full subbuffer, to tell user space app the |
| 108 | * lost records there were. |
| 109 | */ |
| 110 | static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf, |
| 111 | void *prev_subbuf, size_t prev_padding) |
| 112 | { |
| 113 | struct kvm_trace *kt; |
| 114 | |
Tan, Li | 9ef621d | 2008-05-23 14:54:09 +0800 | [diff] [blame] | 115 | if (!relay_buf_full(buf)) { |
| 116 | if (!prev_subbuf) { |
| 117 | /* |
| 118 | * executed only once when the channel is opened |
| 119 | * save metadata as first record |
| 120 | */ |
| 121 | subbuf_start_reserve(buf, sizeof(u32)); |
| 122 | *(u32 *)subbuf = 0x12345678; |
| 123 | } |
| 124 | |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 125 | return 1; |
Tan, Li | 9ef621d | 2008-05-23 14:54:09 +0800 | [diff] [blame] | 126 | } |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 127 | |
| 128 | kt = buf->chan->private_data; |
| 129 | atomic_inc(&kt->lost_records); |
| 130 | |
| 131 | return 0; |
| 132 | } |
| 133 | |
| 134 | static struct dentry *kvm_create_buf_file_callack(const char *filename, |
| 135 | struct dentry *parent, |
| 136 | int mode, |
| 137 | struct rchan_buf *buf, |
| 138 | int *is_global) |
| 139 | { |
| 140 | return debugfs_create_file(filename, mode, parent, buf, |
| 141 | &relay_file_operations); |
| 142 | } |
| 143 | |
| 144 | static int kvm_remove_buf_file_callback(struct dentry *dentry) |
| 145 | { |
| 146 | debugfs_remove(dentry); |
| 147 | return 0; |
| 148 | } |
| 149 | |
| 150 | static struct rchan_callbacks kvm_relay_callbacks = { |
| 151 | .subbuf_start = kvm_subbuf_start_callback, |
| 152 | .create_buf_file = kvm_create_buf_file_callack, |
| 153 | .remove_buf_file = kvm_remove_buf_file_callback, |
| 154 | }; |
| 155 | |
| 156 | static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts) |
| 157 | { |
| 158 | struct kvm_trace *kt; |
| 159 | int i, r = -ENOMEM; |
| 160 | |
| 161 | if (!kuts->buf_size || !kuts->buf_nr) |
| 162 | return -EINVAL; |
| 163 | |
| 164 | kt = kzalloc(sizeof(*kt), GFP_KERNEL); |
| 165 | if (!kt) |
| 166 | goto err; |
| 167 | |
| 168 | r = -EIO; |
| 169 | atomic_set(&kt->lost_records, 0); |
Hollis Blanchard | 76f7c87 | 2008-04-15 16:05:42 -0500 | [diff] [blame] | 170 | kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir, |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 171 | kt, &kvm_trace_lost_ops); |
| 172 | if (!kt->lost_file) |
| 173 | goto err; |
| 174 | |
Hollis Blanchard | 76f7c87 | 2008-04-15 16:05:42 -0500 | [diff] [blame] | 175 | kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size, |
Feng(Eric) Liu | d4c9ff2 | 2008-04-10 08:47:53 -0400 | [diff] [blame] | 176 | kuts->buf_nr, &kvm_relay_callbacks, kt); |
| 177 | if (!kt->rchan) |
| 178 | goto err; |
| 179 | |
| 180 | kvm_trace = kt; |
| 181 | |
| 182 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { |
| 183 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; |
| 184 | |
| 185 | r = marker_probe_register(p->name, p->format, p->probe_func, p); |
| 186 | if (r) |
| 187 | printk(KERN_INFO "Unable to register probe %s\n", |
| 188 | p->name); |
| 189 | } |
| 190 | |
| 191 | kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING; |
| 192 | |
| 193 | return 0; |
| 194 | err: |
| 195 | if (kt) { |
| 196 | if (kt->lost_file) |
| 197 | debugfs_remove(kt->lost_file); |
| 198 | if (kt->rchan) |
| 199 | relay_close(kt->rchan); |
| 200 | kfree(kt); |
| 201 | } |
| 202 | return r; |
| 203 | } |
| 204 | |
| 205 | static int kvm_trace_enable(char __user *arg) |
| 206 | { |
| 207 | struct kvm_user_trace_setup kuts; |
| 208 | int ret; |
| 209 | |
| 210 | ret = copy_from_user(&kuts, arg, sizeof(kuts)); |
| 211 | if (ret) |
| 212 | return -EFAULT; |
| 213 | |
| 214 | ret = do_kvm_trace_enable(&kuts); |
| 215 | if (ret) |
| 216 | return ret; |
| 217 | |
| 218 | return 0; |
| 219 | } |
| 220 | |
| 221 | static int kvm_trace_pause(void) |
| 222 | { |
| 223 | struct kvm_trace *kt = kvm_trace; |
| 224 | int r = -EINVAL; |
| 225 | |
| 226 | if (kt == NULL) |
| 227 | return r; |
| 228 | |
| 229 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING) { |
| 230 | kt->trace_state = KVM_TRACE_STATE_PAUSE; |
| 231 | relay_flush(kt->rchan); |
| 232 | r = 0; |
| 233 | } |
| 234 | |
| 235 | return r; |
| 236 | } |
| 237 | |
| 238 | void kvm_trace_cleanup(void) |
| 239 | { |
| 240 | struct kvm_trace *kt = kvm_trace; |
| 241 | int i; |
| 242 | |
| 243 | if (kt == NULL) |
| 244 | return; |
| 245 | |
| 246 | if (kt->trace_state == KVM_TRACE_STATE_RUNNING || |
| 247 | kt->trace_state == KVM_TRACE_STATE_PAUSE) { |
| 248 | |
| 249 | kt->trace_state = KVM_TRACE_STATE_CLEARUP; |
| 250 | |
| 251 | for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) { |
| 252 | struct kvm_trace_probe *p = &kvm_trace_probes[i]; |
| 253 | marker_probe_unregister(p->name, p->probe_func, p); |
| 254 | } |
| 255 | |
| 256 | relay_close(kt->rchan); |
| 257 | debugfs_remove(kt->lost_file); |
| 258 | kfree(kt); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg) |
| 263 | { |
| 264 | void __user *argp = (void __user *)arg; |
| 265 | long r = -EINVAL; |
| 266 | |
| 267 | if (!capable(CAP_SYS_ADMIN)) |
| 268 | return -EPERM; |
| 269 | |
| 270 | switch (ioctl) { |
| 271 | case KVM_TRACE_ENABLE: |
| 272 | r = kvm_trace_enable(argp); |
| 273 | break; |
| 274 | case KVM_TRACE_PAUSE: |
| 275 | r = kvm_trace_pause(); |
| 276 | break; |
| 277 | case KVM_TRACE_DISABLE: |
| 278 | r = 0; |
| 279 | kvm_trace_cleanup(); |
| 280 | break; |
| 281 | } |
| 282 | |
| 283 | return r; |
| 284 | } |