blob: 93d82038af4b541853420a48502df0fc4c9c2407 [file] [log] [blame]
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +02001/* Support for MMIO probes.
2 * Benfit many code from kprobes
3 * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
4 * 2007 Alexander Eichner
5 * 2008 Pekka Paalanen <pq@iki.fi>
6 */
7
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +02008#include <linux/list.h>
Ingo Molnar668a6c32008-05-19 13:35:24 +02009#include <linux/rculist.h>
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020010#include <linux/spinlock.h>
11#include <linux/hash.h>
12#include <linux/init.h>
13#include <linux/module.h>
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020014#include <linux/kernel.h>
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020015#include <linux/uaccess.h>
16#include <linux/ptrace.h>
17#include <linux/preempt.h>
Pekka Paalanenf5136382008-05-12 21:20:57 +020018#include <linux/percpu.h>
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020019#include <linux/kdebug.h>
Pekka Paalanend61fc442008-05-12 21:20:57 +020020#include <linux/mutex.h>
Pekka Paalanen970e6fa2008-05-12 21:21:03 +020021#include <linux/io.h>
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020022#include <asm/cacheflush.h>
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020023#include <asm/tlbflush.h>
Pekka Paalanen970e6fa2008-05-12 21:21:03 +020024#include <linux/errno.h>
Pekka Paalanen13829532008-05-12 21:20:58 +020025#include <asm/debugreg.h>
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020026#include <linux/mmiotrace.h>
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020027
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020028#define KMMIO_PAGE_HASH_BITS 4
29#define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
30
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020031struct kmmio_fault_page {
32 struct list_head list;
33 struct kmmio_fault_page *release_next;
34 unsigned long page; /* location of the fault page */
35
36 /*
37 * Number of times this page has been registered as a part
38 * of a probe. If zero, page is disarmed and this may be freed.
39 * Used only by writers (RCU).
40 */
41 int count;
42};
43
44struct kmmio_delayed_release {
45 struct rcu_head rcu;
46 struct kmmio_fault_page *release_list;
47};
48
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020049struct kmmio_context {
50 struct kmmio_fault_page *fpage;
51 struct kmmio_probe *probe;
52 unsigned long saved_flags;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020053 unsigned long addr;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020054 int active;
55};
56
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020057static DEFINE_SPINLOCK(kmmio_lock);
58
Pekka Paalanen13829532008-05-12 21:20:58 +020059/* Protected by kmmio_lock */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020060unsigned int kmmio_count;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020061
62/* Read-protected by RCU, write-protected by kmmio_lock. */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020063static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
64static LIST_HEAD(kmmio_probes);
65
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020066static struct list_head *kmmio_page_list(unsigned long page)
67{
68 return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)];
69}
70
Pekka Paalanenf5136382008-05-12 21:20:57 +020071/* Accessed per-cpu */
72static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020073
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020074/*
75 * this is basically a dynamic stabbing problem:
76 * Could use the existing prio tree code or
77 * Possible better implementations:
78 * The Interval Skip List: A Data Structure for Finding All Intervals That
79 * Overlap a Point (might be simple)
80 * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
81 */
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020082/* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020083static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
84{
85 struct kmmio_probe *p;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020086 list_for_each_entry_rcu(p, &kmmio_probes, list) {
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020087 if (addr >= p->addr && addr <= (p->addr + p->len))
88 return p;
89 }
90 return NULL;
91}
92
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020093/* You must be holding RCU read lock. */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020094static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page)
95{
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +020096 struct list_head *head;
97 struct kmmio_fault_page *p;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +020098
99 page &= PAGE_MASK;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200100 head = kmmio_page_list(page);
101 list_for_each_entry_rcu(p, head, list) {
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200102 if (p->page == page)
103 return p;
104 }
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200105 return NULL;
106}
107
Pekka Paalanen790e2a22008-05-12 21:21:14 +0200108static void set_page_present(unsigned long addr, bool present,
109 unsigned int *pglevel)
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200110{
Pekka Paalanen13829532008-05-12 21:20:58 +0200111 pteval_t pteval;
112 pmdval_t pmdval;
Pekka Paalanen790e2a22008-05-12 21:21:14 +0200113 unsigned int level;
Pekka Paalanen13829532008-05-12 21:20:58 +0200114 pmd_t *pmd;
115 pte_t *pte = lookup_address(addr, &level);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200116
Pekka Paalanen75bb8832008-05-12 21:20:56 +0200117 if (!pte) {
Pekka Paalanen13829532008-05-12 21:20:58 +0200118 pr_err("kmmio: no pte for page 0x%08lx\n", addr);
Pekka Paalanen75bb8832008-05-12 21:20:56 +0200119 return;
120 }
121
Pekka Paalanen13829532008-05-12 21:20:58 +0200122 if (pglevel)
123 *pglevel = level;
124
125 switch (level) {
126 case PG_LEVEL_2M:
127 pmd = (pmd_t *)pte;
128 pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT;
129 if (present)
130 pmdval |= _PAGE_PRESENT;
131 set_pmd(pmd, __pmd(pmdval));
132 break;
133
134 case PG_LEVEL_4K:
135 pteval = pte_val(*pte) & ~_PAGE_PRESENT;
136 if (present)
137 pteval |= _PAGE_PRESENT;
138 set_pte_atomic(pte, __pte(pteval));
139 break;
140
141 default:
142 pr_err("kmmio: unexpected page level 0x%x.\n", level);
143 return;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200144 }
145
Pekka Paalanen13829532008-05-12 21:20:58 +0200146 __flush_tlb_one(addr);
147}
Pekka Paalanen75bb8832008-05-12 21:20:56 +0200148
Pekka Paalanen13829532008-05-12 21:20:58 +0200149/** Mark the given page as not present. Access to it will trigger a fault. */
Pekka Paalanen790e2a22008-05-12 21:21:14 +0200150static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
Pekka Paalanen13829532008-05-12 21:20:58 +0200151{
Pekka Paalanen790e2a22008-05-12 21:21:14 +0200152 set_page_present(page & PAGE_MASK, false, pglevel);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200153}
154
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200155/** Mark the given page as present. */
Pekka Paalanen790e2a22008-05-12 21:21:14 +0200156static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel)
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200157{
Pekka Paalanen790e2a22008-05-12 21:21:14 +0200158 set_page_present(page & PAGE_MASK, true, pglevel);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200159}
160
161/*
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200162 * This is being called from do_page_fault().
163 *
164 * We may be in an interrupt or a critical section. Also prefecthing may
165 * trigger a page fault. We may be in the middle of process switch.
166 * We cannot take any locks, because we could be executing especially
167 * within a kmmio critical section.
168 *
169 * Local interrupts are disabled, so preemption cannot happen.
170 * Do not enable interrupts, do not sleep, and watch out for other CPUs.
171 */
172/*
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200173 * Interrupts are disabled on entry as trap3 is an interrupt gate
174 * and they remain disabled thorough out this function.
175 */
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200176int kmmio_handler(struct pt_regs *regs, unsigned long addr)
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200177{
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200178 struct kmmio_context *ctx;
179 struct kmmio_fault_page *faultpage;
Pekka Paalanen13829532008-05-12 21:20:58 +0200180 int ret = 0; /* default to fault not handled */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200181
182 /*
183 * Preemption is now disabled to prevent process switch during
184 * single stepping. We can only handle one active kmmio trace
185 * per cpu, so ensure that we finish it before something else
Pekka Paalanend61fc442008-05-12 21:20:57 +0200186 * gets to run. We also hold the RCU read lock over single
187 * stepping to avoid looking up the probe and kmmio_fault_page
188 * again.
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200189 */
190 preempt_disable();
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200191 rcu_read_lock();
Pekka Paalanend61fc442008-05-12 21:20:57 +0200192
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200193 faultpage = get_kmmio_fault_page(addr);
194 if (!faultpage) {
195 /*
196 * Either this page fault is not caused by kmmio, or
197 * another CPU just pulled the kmmio probe from under
Pekka Paalanen13829532008-05-12 21:20:58 +0200198 * our feet. The latter case should not be possible.
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200199 */
200 goto no_kmmio;
201 }
202
203 ctx = &get_cpu_var(kmmio_ctx);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200204 if (ctx->active) {
Pekka Paalanen13829532008-05-12 21:20:58 +0200205 disarm_kmmio_fault_page(faultpage->page, NULL);
206 if (addr == ctx->addr) {
207 /*
208 * On SMP we sometimes get recursive probe hits on the
209 * same address. Context is already saved, fall out.
210 */
211 pr_debug("kmmio: duplicate probe hit on CPU %d, for "
212 "address 0x%08lx.\n",
213 smp_processor_id(), addr);
214 ret = 1;
215 goto no_kmmio_ctx;
216 }
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200217 /*
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200218 * Prevent overwriting already in-flight context.
Pekka Paalanen13829532008-05-12 21:20:58 +0200219 * This should not happen, let's hope disarming at least
220 * prevents a panic.
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200221 */
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200222 pr_emerg("kmmio: recursive probe hit on CPU %d, "
223 "for address 0x%08lx. Ignoring.\n",
Pekka Paalanenf5136382008-05-12 21:20:57 +0200224 smp_processor_id(), addr);
Pekka Paalanen13829532008-05-12 21:20:58 +0200225 pr_emerg("kmmio: previous hit was at 0x%08lx.\n",
226 ctx->addr);
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200227 goto no_kmmio_ctx;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200228 }
229 ctx->active++;
230
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200231 ctx->fpage = faultpage;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200232 ctx->probe = get_kmmio_probe(addr);
Ingo Molnar49023162008-05-12 21:20:58 +0200233 ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200234 ctx->addr = addr;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200235
236 if (ctx->probe && ctx->probe->pre_handler)
237 ctx->probe->pre_handler(ctx->probe, regs, addr);
238
Pekka Paalanend61fc442008-05-12 21:20:57 +0200239 /*
240 * Enable single-stepping and disable interrupts for the faulting
241 * context. Local interrupts must not get enabled during stepping.
242 */
Ingo Molnar49023162008-05-12 21:20:58 +0200243 regs->flags |= X86_EFLAGS_TF;
244 regs->flags &= ~X86_EFLAGS_IF;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200245
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200246 /* Now we set present bit in PTE and single step. */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200247 disarm_kmmio_fault_page(ctx->fpage->page, NULL);
248
Pekka Paalanend61fc442008-05-12 21:20:57 +0200249 /*
250 * If another cpu accesses the same page while we are stepping,
251 * the access will not be caught. It will simply succeed and the
252 * only downside is we lose the event. If this becomes a problem,
253 * the user should drop to single cpu before tracing.
254 */
255
Pekka Paalanenf5136382008-05-12 21:20:57 +0200256 put_cpu_var(kmmio_ctx);
Pekka Paalanen13829532008-05-12 21:20:58 +0200257 return 1; /* fault handled */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200258
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200259no_kmmio_ctx:
Pekka Paalanenf5136382008-05-12 21:20:57 +0200260 put_cpu_var(kmmio_ctx);
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200261no_kmmio:
262 rcu_read_unlock();
263 preempt_enable_no_resched();
Pekka Paalanen13829532008-05-12 21:20:58 +0200264 return ret;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200265}
266
267/*
268 * Interrupts are disabled on entry as trap1 is an interrupt gate
269 * and they remain disabled thorough out this function.
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200270 * This must always get called as the pair to kmmio_handler().
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200271 */
272static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
273{
Pekka Paalanenf5136382008-05-12 21:20:57 +0200274 int ret = 0;
275 struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200276
Pekka Paalanen13829532008-05-12 21:20:58 +0200277 if (!ctx->active) {
278 pr_debug("kmmio: spurious debug trap on CPU %d.\n",
279 smp_processor_id());
Pekka Paalanenf5136382008-05-12 21:20:57 +0200280 goto out;
Pekka Paalanen13829532008-05-12 21:20:58 +0200281 }
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200282
283 if (ctx->probe && ctx->probe->post_handler)
284 ctx->probe->post_handler(ctx->probe, condition, regs);
285
Pekka Paalanend61fc442008-05-12 21:20:57 +0200286 arm_kmmio_fault_page(ctx->fpage->page, NULL);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200287
Ingo Molnar49023162008-05-12 21:20:58 +0200288 regs->flags &= ~X86_EFLAGS_TF;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200289 regs->flags |= ctx->saved_flags;
290
291 /* These were acquired in kmmio_handler(). */
292 ctx->active--;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200293 BUG_ON(ctx->active);
Pekka Paalanend61fc442008-05-12 21:20:57 +0200294 rcu_read_unlock();
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200295 preempt_enable_no_resched();
296
297 /*
298 * if somebody else is singlestepping across a probe point, flags
299 * will have TF set, in which case, continue the remaining processing
300 * of do_debug, as if this is not a probe hit.
301 */
Ingo Molnar49023162008-05-12 21:20:58 +0200302 if (!(regs->flags & X86_EFLAGS_TF))
Pekka Paalanenf5136382008-05-12 21:20:57 +0200303 ret = 1;
Pekka Paalanenf5136382008-05-12 21:20:57 +0200304out:
305 put_cpu_var(kmmio_ctx);
306 return ret;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200307}
308
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200309/* You must be holding kmmio_lock. */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200310static int add_kmmio_fault_page(unsigned long page)
311{
312 struct kmmio_fault_page *f;
313
314 page &= PAGE_MASK;
315 f = get_kmmio_fault_page(page);
316 if (f) {
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200317 if (!f->count)
318 arm_kmmio_fault_page(f->page, NULL);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200319 f->count++;
320 return 0;
321 }
322
323 f = kmalloc(sizeof(*f), GFP_ATOMIC);
324 if (!f)
325 return -1;
326
327 f->count = 1;
328 f->page = page;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200329 list_add_rcu(&f->list, kmmio_page_list(f->page));
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200330
331 arm_kmmio_fault_page(f->page, NULL);
332
333 return 0;
334}
335
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200336/* You must be holding kmmio_lock. */
337static void release_kmmio_fault_page(unsigned long page,
338 struct kmmio_fault_page **release_list)
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200339{
340 struct kmmio_fault_page *f;
341
342 page &= PAGE_MASK;
343 f = get_kmmio_fault_page(page);
344 if (!f)
345 return;
346
347 f->count--;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200348 BUG_ON(f->count < 0);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200349 if (!f->count) {
350 disarm_kmmio_fault_page(f->page, NULL);
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200351 f->release_next = *release_list;
352 *release_list = f;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200353 }
354}
355
Pekka Paalanen87e547f2008-05-12 21:21:03 +0200356/*
357 * With page-unaligned ioremaps, one or two armed pages may contain
358 * addresses from outside the intended mapping. Events for these addresses
359 * are currently silently dropped. The events may result only from programming
360 * mistakes by accessing addresses before the beginning or past the end of a
361 * mapping.
362 */
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200363int register_kmmio_probe(struct kmmio_probe *p)
364{
Pekka Paalanend61fc442008-05-12 21:20:57 +0200365 unsigned long flags;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200366 int ret = 0;
367 unsigned long size = 0;
Pekka Paalanen87e547f2008-05-12 21:21:03 +0200368 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200369
Pekka Paalanend61fc442008-05-12 21:20:57 +0200370 spin_lock_irqsave(&kmmio_lock, flags);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200371 if (get_kmmio_probe(p->addr)) {
372 ret = -EEXIST;
373 goto out;
374 }
Pekka Paalanend61fc442008-05-12 21:20:57 +0200375 kmmio_count++;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200376 list_add_rcu(&p->list, &kmmio_probes);
Pekka Paalanen87e547f2008-05-12 21:21:03 +0200377 while (size < size_lim) {
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200378 if (add_kmmio_fault_page(p->addr + size))
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200379 pr_err("kmmio: Unable to set page fault.\n");
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200380 size += PAGE_SIZE;
381 }
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200382out:
Pekka Paalanend61fc442008-05-12 21:20:57 +0200383 spin_unlock_irqrestore(&kmmio_lock, flags);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200384 /*
385 * XXX: What should I do here?
386 * Here was a call to global_flush_tlb(), but it does not exist
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200387 * anymore. It seems it's not needed after all.
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200388 */
389 return ret;
390}
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200391EXPORT_SYMBOL(register_kmmio_probe);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200392
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200393static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200394{
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200395 struct kmmio_delayed_release *dr = container_of(
396 head,
397 struct kmmio_delayed_release,
398 rcu);
399 struct kmmio_fault_page *p = dr->release_list;
400 while (p) {
401 struct kmmio_fault_page *next = p->release_next;
402 BUG_ON(p->count);
403 kfree(p);
404 p = next;
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200405 }
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200406 kfree(dr);
407}
408
409static void remove_kmmio_fault_pages(struct rcu_head *head)
410{
411 struct kmmio_delayed_release *dr = container_of(
412 head,
413 struct kmmio_delayed_release,
414 rcu);
415 struct kmmio_fault_page *p = dr->release_list;
416 struct kmmio_fault_page **prevp = &dr->release_list;
417 unsigned long flags;
418 spin_lock_irqsave(&kmmio_lock, flags);
419 while (p) {
420 if (!p->count)
421 list_del_rcu(&p->list);
422 else
423 *prevp = p->release_next;
424 prevp = &p->release_next;
425 p = p->release_next;
426 }
427 spin_unlock_irqrestore(&kmmio_lock, flags);
428 /* This is the real RCU destroy call. */
429 call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200430}
431
432/*
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200433 * Remove a kmmio probe. You have to synchronize_rcu() before you can be
Pekka Paalanend61fc442008-05-12 21:20:57 +0200434 * sure that the callbacks will not be called anymore. Only after that
435 * you may actually release your struct kmmio_probe.
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200436 *
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200437 * Unregistering a kmmio fault page has three steps:
438 * 1. release_kmmio_fault_page()
439 * Disarm the page, wait a grace period to let all faults finish.
440 * 2. remove_kmmio_fault_pages()
441 * Remove the pages from kmmio_page_table.
442 * 3. rcu_free_kmmio_fault_pages()
443 * Actally free the kmmio_fault_page structs as with RCU.
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200444 */
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200445void unregister_kmmio_probe(struct kmmio_probe *p)
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200446{
Pekka Paalanend61fc442008-05-12 21:20:57 +0200447 unsigned long flags;
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200448 unsigned long size = 0;
Pekka Paalanen87e547f2008-05-12 21:21:03 +0200449 const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200450 struct kmmio_fault_page *release_list = NULL;
451 struct kmmio_delayed_release *drelease;
452
Pekka Paalanend61fc442008-05-12 21:20:57 +0200453 spin_lock_irqsave(&kmmio_lock, flags);
Pekka Paalanen87e547f2008-05-12 21:21:03 +0200454 while (size < size_lim) {
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200455 release_kmmio_fault_page(p->addr + size, &release_list);
456 size += PAGE_SIZE;
457 }
458 list_del_rcu(&p->list);
459 kmmio_count--;
Pekka Paalanend61fc442008-05-12 21:20:57 +0200460 spin_unlock_irqrestore(&kmmio_lock, flags);
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200461
462 drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
463 if (!drelease) {
464 pr_crit("kmmio: leaking kmmio_fault_page objects.\n");
465 return;
466 }
467 drelease->release_list = release_list;
468
469 /*
470 * This is not really RCU here. We have just disarmed a set of
471 * pages so that they cannot trigger page faults anymore. However,
472 * we cannot remove the pages from kmmio_page_table,
473 * because a probe hit might be in flight on another CPU. The
474 * pages are collected into a list, and they will be removed from
475 * kmmio_page_table when it is certain that no probe hit related to
476 * these pages can be in flight. RCU grace period sounds like a
477 * good choice.
478 *
479 * If we removed the pages too early, kmmio page fault handler might
480 * not find the respective kmmio_fault_page and determine it's not
481 * a kmmio fault, when it actually is. This would lead to madness.
482 */
483 call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200484}
Pekka Paalanen0fd0e3d2008-05-12 21:20:57 +0200485EXPORT_SYMBOL(unregister_kmmio_probe);
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200486
487static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val,
488 void *args)
489{
490 struct die_args *arg = args;
491
Pekka Paalanen13829532008-05-12 21:20:58 +0200492 if (val == DIE_DEBUG && (arg->err & DR_STEP))
Pekka Paalanen8b7d89d2008-05-12 21:20:56 +0200493 if (post_kmmio_handler(arg->err, arg->regs) == 1)
494 return NOTIFY_STOP;
495
496 return NOTIFY_DONE;
497}
Pekka Paalanen13829532008-05-12 21:20:58 +0200498
499static struct notifier_block nb_die = {
500 .notifier_call = kmmio_die_notifier
501};
502
503static int __init init_kmmio(void)
504{
505 int i;
506 for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
507 INIT_LIST_HEAD(&kmmio_page_table[i]);
508 return register_die_notifier(&nb_die);
509}
510fs_initcall(init_kmmio); /* should be before device_initcall() */