blob: c93d3d2640ab884b2a4d43a4339afd2613cd149d [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/**
2 * @file cpu_buffer.c
3 *
4 * @remark Copyright 2002 OProfile authors
5 * @remark Read the file COPYING
6 *
7 * @author John Levon <levon@movementarian.org>
8 *
9 * Each CPU has a local buffer that stores PC value/event
10 * pairs. We also log context switches when we notice them.
11 * Eventually each CPU's buffer is processed into the global
12 * event buffer by sync_buffer().
13 *
14 * We use a local buffer for two reasons: an NMI or similar
15 * interrupt cannot synchronise, and high sampling rates
16 * would lead to catastrophic global synchronisation if
17 * a global buffer was used.
18 */
19
20#include <linux/sched.h>
21#include <linux/oprofile.h>
22#include <linux/vmalloc.h>
23#include <linux/errno.h>
24
25#include "event_buffer.h"
26#include "cpu_buffer.h"
27#include "buffer_sync.h"
28#include "oprof.h"
29
30struct oprofile_cpu_buffer cpu_buffer[NR_CPUS] __cacheline_aligned;
31
David Howellsc4028952006-11-22 14:57:56 +000032static void wq_sync_buffer(struct work_struct *work);
Linus Torvalds1da177e2005-04-16 15:20:36 -070033
34#define DEFAULT_TIMER_EXPIRE (HZ / 10)
35static int work_enabled;
36
37void free_cpu_buffers(void)
38{
39 int i;
40
Andrew Morton394e3902006-03-23 03:01:05 -080041 for_each_online_cpu(i)
Linus Torvalds1da177e2005-04-16 15:20:36 -070042 vfree(cpu_buffer[i].buffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -070043}
Jesper Juhl77933d72005-07-27 11:46:09 -070044
Linus Torvalds1da177e2005-04-16 15:20:36 -070045int alloc_cpu_buffers(void)
46{
47 int i;
48
49 unsigned long buffer_size = fs_cpu_buffer_size;
50
51 for_each_online_cpu(i) {
52 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
53
Eric Dumazet25ab7cd2006-01-08 01:03:21 -080054 b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
55 cpu_to_node(i));
Linus Torvalds1da177e2005-04-16 15:20:36 -070056 if (!b->buffer)
57 goto fail;
58
59 b->last_task = NULL;
60 b->last_is_kernel = -1;
61 b->tracing = 0;
62 b->buffer_size = buffer_size;
63 b->tail_pos = 0;
64 b->head_pos = 0;
65 b->sample_received = 0;
66 b->sample_lost_overflow = 0;
Philippe Eliedf9d1772007-11-14 16:58:48 -080067 b->backtrace_aborted = 0;
68 b->sample_invalid_eip = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -070069 b->cpu = i;
David Howellsc4028952006-11-22 14:57:56 +000070 INIT_DELAYED_WORK(&b->work, wq_sync_buffer);
Linus Torvalds1da177e2005-04-16 15:20:36 -070071 }
72 return 0;
73
74fail:
75 free_cpu_buffers();
76 return -ENOMEM;
77}
Linus Torvalds1da177e2005-04-16 15:20:36 -070078
79void start_cpu_work(void)
80{
81 int i;
82
83 work_enabled = 1;
84
85 for_each_online_cpu(i) {
86 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
87
88 /*
89 * Spread the work by 1 jiffy per cpu so they dont all
90 * fire at once.
91 */
92 schedule_delayed_work_on(i, &b->work, DEFAULT_TIMER_EXPIRE + i);
93 }
94}
95
Linus Torvalds1da177e2005-04-16 15:20:36 -070096void end_cpu_work(void)
97{
98 int i;
99
100 work_enabled = 0;
101
102 for_each_online_cpu(i) {
103 struct oprofile_cpu_buffer * b = &cpu_buffer[i];
104
105 cancel_delayed_work(&b->work);
106 }
107
108 flush_scheduled_work();
109}
110
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111/* Resets the cpu buffer to a sane state. */
112void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf)
113{
114 /* reset these to invalid values; the next sample
115 * collected will populate the buffer with proper
116 * values to initialize the buffer
117 */
118 cpu_buf->last_is_kernel = -1;
119 cpu_buf->last_task = NULL;
120}
121
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122/* compute number of available slots in cpu_buffer queue */
123static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b)
124{
125 unsigned long head = b->head_pos;
126 unsigned long tail = b->tail_pos;
127
128 if (tail > head)
129 return (tail - head) - 1;
130
131 return tail + (b->buffer_size - head) - 1;
132}
133
Linus Torvalds1da177e2005-04-16 15:20:36 -0700134static void increment_head(struct oprofile_cpu_buffer * b)
135{
136 unsigned long new_head = b->head_pos + 1;
137
138 /* Ensure anything written to the slot before we
139 * increment is visible */
140 wmb();
141
142 if (new_head < b->buffer_size)
143 b->head_pos = new_head;
144 else
145 b->head_pos = 0;
146}
147
Jesper Juhl77933d72005-07-27 11:46:09 -0700148static inline void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149add_sample(struct oprofile_cpu_buffer * cpu_buf,
150 unsigned long pc, unsigned long event)
151{
152 struct op_sample * entry = &cpu_buf->buffer[cpu_buf->head_pos];
153 entry->eip = pc;
154 entry->event = event;
155 increment_head(cpu_buf);
156}
157
Jesper Juhl77933d72005-07-27 11:46:09 -0700158static inline void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159add_code(struct oprofile_cpu_buffer * buffer, unsigned long value)
160{
161 add_sample(buffer, ESCAPE_CODE, value);
162}
163
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164/* This must be safe from any context. It's safe writing here
165 * because of the head/tail separation of the writer and reader
166 * of the CPU buffer.
167 *
168 * is_kernel is needed because on some architectures you cannot
169 * tell if you are in kernel or user space simply by looking at
170 * pc. We tag this in the buffer by generating kernel enter/exit
171 * events whenever is_kernel changes
172 */
173static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc,
174 int is_kernel, unsigned long event)
175{
176 struct task_struct * task;
177
178 cpu_buf->sample_received++;
179
Philippe Eliedf9d1772007-11-14 16:58:48 -0800180 if (pc == ESCAPE_CODE) {
181 cpu_buf->sample_invalid_eip++;
182 return 0;
183 }
184
Linus Torvalds1da177e2005-04-16 15:20:36 -0700185 if (nr_available_slots(cpu_buf) < 3) {
186 cpu_buf->sample_lost_overflow++;
187 return 0;
188 }
189
190 is_kernel = !!is_kernel;
191
192 task = current;
193
194 /* notice a switch from user->kernel or vice versa */
195 if (cpu_buf->last_is_kernel != is_kernel) {
196 cpu_buf->last_is_kernel = is_kernel;
197 add_code(cpu_buf, is_kernel);
198 }
199
200 /* notice a task switch */
201 if (cpu_buf->last_task != task) {
202 cpu_buf->last_task = task;
203 add_code(cpu_buf, (unsigned long)task);
204 }
205
206 add_sample(cpu_buf, pc, event);
207 return 1;
208}
209
210static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf)
211{
212 if (nr_available_slots(cpu_buf) < 4) {
213 cpu_buf->sample_lost_overflow++;
214 return 0;
215 }
216
217 add_code(cpu_buf, CPU_TRACE_BEGIN);
218 cpu_buf->tracing = 1;
219 return 1;
220}
221
Linus Torvalds1da177e2005-04-16 15:20:36 -0700222static void oprofile_end_trace(struct oprofile_cpu_buffer * cpu_buf)
223{
224 cpu_buf->tracing = 0;
225}
226
Brian Rogan27357712006-03-28 01:56:20 -0800227void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
228 unsigned long event, int is_kernel)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229{
230 struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231
232 if (!backtrace_depth) {
233 log_sample(cpu_buf, pc, is_kernel, event);
234 return;
235 }
236
237 if (!oprofile_begin_trace(cpu_buf))
238 return;
239
240 /* if log_sample() fail we can't backtrace since we lost the source
241 * of this event */
242 if (log_sample(cpu_buf, pc, is_kernel, event))
243 oprofile_ops.backtrace(regs, backtrace_depth);
244 oprofile_end_trace(cpu_buf);
245}
246
Brian Rogan27357712006-03-28 01:56:20 -0800247void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
248{
249 int is_kernel = !user_mode(regs);
250 unsigned long pc = profile_pc(regs);
251
252 oprofile_add_ext_sample(pc, regs, event, is_kernel);
253}
254
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
256{
257 struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
258 log_sample(cpu_buf, pc, is_kernel, event);
259}
260
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261void oprofile_add_trace(unsigned long pc)
262{
263 struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[smp_processor_id()];
264
265 if (!cpu_buf->tracing)
266 return;
267
268 if (nr_available_slots(cpu_buf) < 1) {
269 cpu_buf->tracing = 0;
270 cpu_buf->sample_lost_overflow++;
271 return;
272 }
273
274 /* broken frame can give an eip with the same value as an escape code,
275 * abort the trace if we get it */
276 if (pc == ESCAPE_CODE) {
277 cpu_buf->tracing = 0;
278 cpu_buf->backtrace_aborted++;
279 return;
280 }
281
282 add_sample(cpu_buf, pc, 0);
283}
284
Linus Torvalds1da177e2005-04-16 15:20:36 -0700285/*
286 * This serves to avoid cpu buffer overflow, and makes sure
287 * the task mortuary progresses
288 *
289 * By using schedule_delayed_work_on and then schedule_delayed_work
290 * we guarantee this will stay on the correct cpu
291 */
David Howellsc4028952006-11-22 14:57:56 +0000292static void wq_sync_buffer(struct work_struct *work)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700293{
David Howellsc4028952006-11-22 14:57:56 +0000294 struct oprofile_cpu_buffer * b =
295 container_of(work, struct oprofile_cpu_buffer, work.work);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 if (b->cpu != smp_processor_id()) {
297 printk("WQ on CPU%d, prefer CPU%d\n",
298 smp_processor_id(), b->cpu);
299 }
300 sync_buffer(b->cpu);
301
302 /* don't re-add the work if we're shutting down */
303 if (work_enabled)
304 schedule_delayed_work(&b->work, DEFAULT_TIMER_EXPIRE);
305}