blob: 41dcc845f78c34d65a9d13e72cc7023a8a14f142 [file] [log] [blame]
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -04001/*
2 * kvm trace
3 *
4 * It is designed to allow debugging traces of kvm to be generated
5 * on UP / SMP machines. Each trace entry can be timestamped so that
6 * it's possible to reconstruct a chronological record of trace events.
7 * The implementation refers to blktrace kernel support.
8 *
9 * Copyright (c) 2008 Intel Corporation
10 * Copyright (C) 2006 Jens Axboe <axboe@kernel.dk>
11 *
12 * Authors: Feng(Eric) Liu, eric.e.liu@intel.com
13 *
14 * Date: Feb 2008
15 */
16
17#include <linux/module.h>
18#include <linux/relay.h>
19#include <linux/debugfs.h>
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020020#include <linux/ktime.h>
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040021
22#include <linux/kvm_host.h>
23
24#define KVM_TRACE_STATE_RUNNING (1 << 0)
25#define KVM_TRACE_STATE_PAUSE (1 << 1)
26#define KVM_TRACE_STATE_CLEARUP (1 << 2)
27
28struct kvm_trace {
29 int trace_state;
30 struct rchan *rchan;
31 struct dentry *lost_file;
32 atomic_t lost_records;
33};
34static struct kvm_trace *kvm_trace;
35
36struct kvm_trace_probe {
37 const char *name;
38 const char *format;
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020039 u32 timestamp_in;
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040040 marker_probe_func *probe_func;
41};
42
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020043static inline int calc_rec_size(int timestamp, int extra)
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040044{
45 int rec_size = KVM_TRC_HEAD_SIZE;
46
47 rec_size += extra;
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020048 return timestamp ? rec_size += KVM_TRC_CYCLE_SIZE : rec_size;
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040049}
50
51static void kvm_add_trace(void *probe_private, void *call_data,
52 const char *format, va_list *args)
53{
54 struct kvm_trace_probe *p = probe_private;
55 struct kvm_trace *kt = kvm_trace;
56 struct kvm_trace_rec rec;
57 struct kvm_vcpu *vcpu;
Christian Ehrhardte32c8f22008-07-14 14:00:00 +020058 int i, size;
59 u32 extra;
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040060
61 if (unlikely(kt->trace_state != KVM_TRACE_STATE_RUNNING))
62 return;
63
Christian Ehrhardte32c8f22008-07-14 14:00:00 +020064 rec.rec_val = TRACE_REC_EVENT_ID(va_arg(*args, u32));
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040065 vcpu = va_arg(*args, struct kvm_vcpu *);
66 rec.pid = current->tgid;
67 rec.vcpu_id = vcpu->vcpu_id;
68
69 extra = va_arg(*args, u32);
70 WARN_ON(!(extra <= KVM_TRC_EXTRA_MAX));
71 extra = min_t(u32, extra, KVM_TRC_EXTRA_MAX);
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040072
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020073 rec.rec_val |= TRACE_REC_TCS(p->timestamp_in)
Christian Ehrhardte32c8f22008-07-14 14:00:00 +020074 | TRACE_REC_NUM_DATA_ARGS(extra);
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040075
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020076 if (p->timestamp_in) {
77 rec.u.timestamp.timestamp = ktime_to_ns(ktime_get());
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040078
Christian Ehrhardte32c8f22008-07-14 14:00:00 +020079 for (i = 0; i < extra; i++)
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020080 rec.u.timestamp.extra_u32[i] = va_arg(*args, u32);
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040081 } else {
Christian Ehrhardte32c8f22008-07-14 14:00:00 +020082 for (i = 0; i < extra; i++)
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020083 rec.u.notimestamp.extra_u32[i] = va_arg(*args, u32);
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040084 }
85
Christian Ehrhardt3f7f95c2008-07-14 14:00:01 +020086 size = calc_rec_size(p->timestamp_in, extra * sizeof(u32));
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -040087 relay_write(kt->rchan, &rec, size);
88}
89
90static struct kvm_trace_probe kvm_trace_probes[] = {
91 { "kvm_trace_entryexit", "%u %p %u %u %u %u %u %u", 1, kvm_add_trace },
92 { "kvm_trace_handler", "%u %p %u %u %u %u %u %u", 0, kvm_add_trace },
93};
94
95static int lost_records_get(void *data, u64 *val)
96{
97 struct kvm_trace *kt = data;
98
99 *val = atomic_read(&kt->lost_records);
100 return 0;
101}
102
103DEFINE_SIMPLE_ATTRIBUTE(kvm_trace_lost_ops, lost_records_get, NULL, "%llu\n");
104
105/*
106 * The relay channel is used in "no-overwrite" mode, it keeps trace of how
107 * many times we encountered a full subbuffer, to tell user space app the
108 * lost records there were.
109 */
110static int kvm_subbuf_start_callback(struct rchan_buf *buf, void *subbuf,
111 void *prev_subbuf, size_t prev_padding)
112{
113 struct kvm_trace *kt;
114
Tan, Li9ef621d2008-05-23 14:54:09 +0800115 if (!relay_buf_full(buf)) {
116 if (!prev_subbuf) {
117 /*
118 * executed only once when the channel is opened
119 * save metadata as first record
120 */
121 subbuf_start_reserve(buf, sizeof(u32));
122 *(u32 *)subbuf = 0x12345678;
123 }
124
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -0400125 return 1;
Tan, Li9ef621d2008-05-23 14:54:09 +0800126 }
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -0400127
128 kt = buf->chan->private_data;
129 atomic_inc(&kt->lost_records);
130
131 return 0;
132}
133
134static struct dentry *kvm_create_buf_file_callack(const char *filename,
135 struct dentry *parent,
136 int mode,
137 struct rchan_buf *buf,
138 int *is_global)
139{
140 return debugfs_create_file(filename, mode, parent, buf,
141 &relay_file_operations);
142}
143
144static int kvm_remove_buf_file_callback(struct dentry *dentry)
145{
146 debugfs_remove(dentry);
147 return 0;
148}
149
150static struct rchan_callbacks kvm_relay_callbacks = {
151 .subbuf_start = kvm_subbuf_start_callback,
152 .create_buf_file = kvm_create_buf_file_callack,
153 .remove_buf_file = kvm_remove_buf_file_callback,
154};
155
156static int do_kvm_trace_enable(struct kvm_user_trace_setup *kuts)
157{
158 struct kvm_trace *kt;
159 int i, r = -ENOMEM;
160
161 if (!kuts->buf_size || !kuts->buf_nr)
162 return -EINVAL;
163
164 kt = kzalloc(sizeof(*kt), GFP_KERNEL);
165 if (!kt)
166 goto err;
167
168 r = -EIO;
169 atomic_set(&kt->lost_records, 0);
Hollis Blanchard76f7c872008-04-15 16:05:42 -0500170 kt->lost_file = debugfs_create_file("lost_records", 0444, kvm_debugfs_dir,
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -0400171 kt, &kvm_trace_lost_ops);
172 if (!kt->lost_file)
173 goto err;
174
Hollis Blanchard76f7c872008-04-15 16:05:42 -0500175 kt->rchan = relay_open("trace", kvm_debugfs_dir, kuts->buf_size,
Feng(Eric) Liud4c9ff22008-04-10 08:47:53 -0400176 kuts->buf_nr, &kvm_relay_callbacks, kt);
177 if (!kt->rchan)
178 goto err;
179
180 kvm_trace = kt;
181
182 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
183 struct kvm_trace_probe *p = &kvm_trace_probes[i];
184
185 r = marker_probe_register(p->name, p->format, p->probe_func, p);
186 if (r)
187 printk(KERN_INFO "Unable to register probe %s\n",
188 p->name);
189 }
190
191 kvm_trace->trace_state = KVM_TRACE_STATE_RUNNING;
192
193 return 0;
194err:
195 if (kt) {
196 if (kt->lost_file)
197 debugfs_remove(kt->lost_file);
198 if (kt->rchan)
199 relay_close(kt->rchan);
200 kfree(kt);
201 }
202 return r;
203}
204
205static int kvm_trace_enable(char __user *arg)
206{
207 struct kvm_user_trace_setup kuts;
208 int ret;
209
210 ret = copy_from_user(&kuts, arg, sizeof(kuts));
211 if (ret)
212 return -EFAULT;
213
214 ret = do_kvm_trace_enable(&kuts);
215 if (ret)
216 return ret;
217
218 return 0;
219}
220
221static int kvm_trace_pause(void)
222{
223 struct kvm_trace *kt = kvm_trace;
224 int r = -EINVAL;
225
226 if (kt == NULL)
227 return r;
228
229 if (kt->trace_state == KVM_TRACE_STATE_RUNNING) {
230 kt->trace_state = KVM_TRACE_STATE_PAUSE;
231 relay_flush(kt->rchan);
232 r = 0;
233 }
234
235 return r;
236}
237
238void kvm_trace_cleanup(void)
239{
240 struct kvm_trace *kt = kvm_trace;
241 int i;
242
243 if (kt == NULL)
244 return;
245
246 if (kt->trace_state == KVM_TRACE_STATE_RUNNING ||
247 kt->trace_state == KVM_TRACE_STATE_PAUSE) {
248
249 kt->trace_state = KVM_TRACE_STATE_CLEARUP;
250
251 for (i = 0; i < ARRAY_SIZE(kvm_trace_probes); i++) {
252 struct kvm_trace_probe *p = &kvm_trace_probes[i];
253 marker_probe_unregister(p->name, p->probe_func, p);
254 }
255
256 relay_close(kt->rchan);
257 debugfs_remove(kt->lost_file);
258 kfree(kt);
259 }
260}
261
262int kvm_trace_ioctl(unsigned int ioctl, unsigned long arg)
263{
264 void __user *argp = (void __user *)arg;
265 long r = -EINVAL;
266
267 if (!capable(CAP_SYS_ADMIN))
268 return -EPERM;
269
270 switch (ioctl) {
271 case KVM_TRACE_ENABLE:
272 r = kvm_trace_enable(argp);
273 break;
274 case KVM_TRACE_PAUSE:
275 r = kvm_trace_pause();
276 break;
277 case KVM_TRACE_DISABLE:
278 r = 0;
279 kvm_trace_cleanup();
280 break;
281 }
282
283 return r;
284}