blob: 51f2b228723f2c00d09512f8c73dc94343a6f58d [file] [log] [blame]
David Woodhouse8a94ade2015-03-24 14:54:56 +00001/*
2 * Copyright © 2015 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>
14 */
15
16#include <linux/intel-iommu.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +010017#include <linux/mmu_notifier.h>
18#include <linux/sched.h>
19#include <linux/slab.h>
20#include <linux/intel-svm.h>
21#include <linux/rculist.h>
22#include <linux/pci.h>
23#include <linux/pci-ats.h>
David Woodhousea222a7f2015-10-07 23:35:18 +010024#include <linux/dmar.h>
25#include <linux/interrupt.h>
26
27static irqreturn_t prq_event_thread(int irq, void *d);
David Woodhouse2f26e0a2015-09-09 11:40:47 +010028
29struct pasid_entry {
30 u64 val;
31};
David Woodhouse8a94ade2015-03-24 14:54:56 +000032
David Woodhouse907fea32015-10-13 14:11:13 +010033struct pasid_state_entry {
34 u64 val;
35};
36
David Woodhouse8a94ade2015-03-24 14:54:56 +000037int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
38{
39 struct page *pages;
40 int order;
41
David Woodhouse91017042016-09-12 10:49:11 +080042 /* Start at 2 because it's defined as 2^(1+PSS) */
43 iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
David Woodhouse8a94ade2015-03-24 14:54:56 +000044
David Woodhouse91017042016-09-12 10:49:11 +080045 /* Eventually I'm promised we will get a multi-level PASID table
46 * and it won't have to be physically contiguous. Until then,
47 * limit the size because 8MiB contiguous allocations can be hard
48 * to come by. The limit of 0x20000, which is 1MiB for each of
49 * the PASID and PASID-state tables, is somewhat arbitrary. */
50 if (iommu->pasid_max > 0x20000)
51 iommu->pasid_max = 0x20000;
52
53 order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse8a94ade2015-03-24 14:54:56 +000054 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
55 if (!pages) {
56 pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
57 iommu->name);
58 return -ENOMEM;
59 }
60 iommu->pasid_table = page_address(pages);
61 pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
62
63 if (ecap_dis(iommu->ecap)) {
David Woodhouse91017042016-09-12 10:49:11 +080064 /* Just making it explicit... */
65 BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
David Woodhouse8a94ade2015-03-24 14:54:56 +000066 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
67 if (pages)
68 iommu->pasid_state_table = page_address(pages);
69 else
70 pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
71 iommu->name);
72 }
73
David Woodhouse2f26e0a2015-09-09 11:40:47 +010074 idr_init(&iommu->pasid_idr);
75
David Woodhouse8a94ade2015-03-24 14:54:56 +000076 return 0;
77}
78
79int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
80{
David Woodhouse91017042016-09-12 10:49:11 +080081 int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse8a94ade2015-03-24 14:54:56 +000082
83 if (iommu->pasid_table) {
84 free_pages((unsigned long)iommu->pasid_table, order);
85 iommu->pasid_table = NULL;
86 }
87 if (iommu->pasid_state_table) {
88 free_pages((unsigned long)iommu->pasid_state_table, order);
89 iommu->pasid_state_table = NULL;
90 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +010091 idr_destroy(&iommu->pasid_idr);
David Woodhouse8a94ade2015-03-24 14:54:56 +000092 return 0;
93}
David Woodhouse2f26e0a2015-09-09 11:40:47 +010094
David Woodhousea222a7f2015-10-07 23:35:18 +010095#define PRQ_ORDER 0
96
97int intel_svm_enable_prq(struct intel_iommu *iommu)
98{
99 struct page *pages;
100 int irq, ret;
101
102 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
103 if (!pages) {
104 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
105 iommu->name);
106 return -ENOMEM;
107 }
108 iommu->prq = page_address(pages);
109
110 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
111 if (irq <= 0) {
112 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
113 iommu->name);
114 ret = -EINVAL;
115 err:
116 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
117 iommu->prq = NULL;
118 return ret;
119 }
120 iommu->pr_irq = irq;
121
122 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
123
124 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
125 iommu->prq_name, iommu);
126 if (ret) {
127 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
128 iommu->name);
129 dmar_free_hwirq(irq);
130 goto err;
131 }
132 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
133 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
134 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
135
136 return 0;
137}
138
139int intel_svm_finish_prq(struct intel_iommu *iommu)
140{
141 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
142 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
143 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
144
145 free_irq(iommu->pr_irq, iommu);
146 dmar_free_hwirq(iommu->pr_irq);
147 iommu->pr_irq = 0;
148
149 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
150 iommu->prq = NULL;
151
152 return 0;
153}
154
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100155static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
David Woodhouse5d52f482015-10-20 15:52:13 +0100156 unsigned long address, unsigned long pages, int ih, int gl)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100157{
158 struct qi_desc desc;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100159
David Woodhouse5d52f482015-10-20 15:52:13 +0100160 if (pages == -1) {
David Woodhousee0349922015-10-16 19:36:53 +0100161 /* For global kernel pages we have to flush them in *all* PASIDs
162 * because that's the only option the hardware gives us. Despite
163 * the fact that they are actually only accessible through one. */
164 if (gl)
165 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
166 QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
167 else
168 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
169 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100170 desc.high = 0;
171 } else {
David Woodhouse5d52f482015-10-20 15:52:13 +0100172 int mask = ilog2(__roundup_pow_of_two(pages));
173
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100174 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
175 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
David Woodhousee0349922015-10-16 19:36:53 +0100176 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100177 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
178 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100179 qi_submit_sync(&desc, svm->iommu);
180
181 if (sdev->dev_iotlb) {
182 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
183 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
David Woodhouse5d52f482015-10-20 15:52:13 +0100184 if (pages == -1) {
185 desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
186 } else if (pages > 1) {
187 /* The least significant zero bit indicates the size. So,
188 * for example, an "address" value of 0x12345f000 will
189 * flush from 0x123440000 to 0x12347ffff (256KiB). */
190 unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
191 unsigned long mask = __rounddown_pow_of_two(address ^ last);;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100192
David Woodhouse5d52f482015-10-20 15:52:13 +0100193 desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100194 } else {
195 desc.high = QI_DEV_EIOTLB_ADDR(address);
196 }
197 qi_submit_sync(&desc, svm->iommu);
198 }
199}
200
201static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
David Woodhouse5d52f482015-10-20 15:52:13 +0100202 unsigned long pages, int ih, int gl)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100203{
204 struct intel_svm_dev *sdev;
205
David Woodhouse907fea32015-10-13 14:11:13 +0100206 /* Try deferred invalidate if available */
207 if (svm->iommu->pasid_state_table &&
208 !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
209 return;
210
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100211 rcu_read_lock();
212 list_for_each_entry_rcu(sdev, &svm->devs, list)
David Woodhousee0349922015-10-16 19:36:53 +0100213 intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100214 rcu_read_unlock();
215}
216
217static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
218 unsigned long address, pte_t pte)
219{
220 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
221
David Woodhousee0349922015-10-16 19:36:53 +0100222 intel_flush_svm_range(svm, address, 1, 1, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100223}
224
225static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
226 unsigned long address)
227{
228 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
229
David Woodhousee0349922015-10-16 19:36:53 +0100230 intel_flush_svm_range(svm, address, 1, 1, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100231}
232
233/* Pages have been freed at this point */
234static void intel_invalidate_range(struct mmu_notifier *mn,
235 struct mm_struct *mm,
236 unsigned long start, unsigned long end)
237{
238 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
239
240 intel_flush_svm_range(svm, start,
David Woodhousee0349922015-10-16 19:36:53 +0100241 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100242}
243
244
David Woodhouse5a10ba22015-10-24 21:06:39 +0200245static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100246{
247 struct qi_desc desc;
248
249 desc.high = 0;
David Woodhouse5a10ba22015-10-24 21:06:39 +0200250 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100251
252 qi_submit_sync(&desc, svm->iommu);
253}
254
255static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
256{
257 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
David Woodhousee57e58b2016-01-12 19:18:06 +0000258 struct intel_svm_dev *sdev;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100259
David Woodhousee57e58b2016-01-12 19:18:06 +0000260 /* This might end up being called from exit_mmap(), *before* the page
261 * tables are cleared. And __mmu_notifier_release() will delete us from
262 * the list of notifiers so that our invalidate_range() callback doesn't
263 * get called when the page tables are cleared. So we need to protect
264 * against hardware accessing those page tables.
265 *
266 * We do it by clearing the entry in the PASID table and then flushing
267 * the IOTLB and the PASID table caches. This might upset hardware;
268 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
269 * page) so that we end up taking a fault that the hardware really
270 * *has* to handle gracefully without affecting other processes.
271 */
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100272 svm->iommu->pasid_table[svm->pasid].val = 0;
David Woodhousee57e58b2016-01-12 19:18:06 +0000273 wmb();
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100274
David Woodhousee57e58b2016-01-12 19:18:06 +0000275 rcu_read_lock();
276 list_for_each_entry_rcu(sdev, &svm->devs, list) {
277 intel_flush_pasid_dev(svm, sdev, svm->pasid);
278 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
279 }
280 rcu_read_unlock();
281
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100282}
283
284static const struct mmu_notifier_ops intel_mmuops = {
285 .release = intel_mm_release,
286 .change_pte = intel_change_pte,
287 .invalidate_page = intel_invalidate_page,
288 .invalidate_range = intel_invalidate_range,
289};
290
291static DEFINE_MUTEX(pasid_mutex);
292
David Woodhouse0204a492015-10-13 17:18:10 +0100293int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100294{
295 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
296 struct intel_svm_dev *sdev;
297 struct intel_svm *svm = NULL;
David Woodhouse5cec7532015-10-15 15:52:15 +0100298 struct mm_struct *mm = NULL;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100299 int pasid_max;
300 int ret;
301
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100302 if (WARN_ON(!iommu))
303 return -EINVAL;
304
305 if (dev_is_pci(dev)) {
306 pasid_max = pci_max_pasids(to_pci_dev(dev));
307 if (pasid_max < 0)
308 return -EINVAL;
309 } else
310 pasid_max = 1 << 20;
311
David Woodhouse5cec7532015-10-15 15:52:15 +0100312 if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
313 if (!ecap_srs(iommu->ecap))
314 return -EINVAL;
315 } else if (pasid) {
316 mm = get_task_mm(current);
317 BUG_ON(!mm);
318 }
319
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100320 mutex_lock(&pasid_mutex);
David Woodhouse569e4f72015-10-15 13:59:14 +0100321 if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100322 int i;
323
324 idr_for_each_entry(&iommu->pasid_idr, svm, i) {
David Woodhouse5cec7532015-10-15 15:52:15 +0100325 if (svm->mm != mm ||
David Woodhouse569e4f72015-10-15 13:59:14 +0100326 (svm->flags & SVM_FLAG_PRIVATE_PASID))
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100327 continue;
328
329 if (svm->pasid >= pasid_max) {
330 dev_warn(dev,
331 "Limited PASID width. Cannot use existing PASID %d\n",
332 svm->pasid);
333 ret = -ENOSPC;
334 goto out;
335 }
336
337 list_for_each_entry(sdev, &svm->devs, list) {
338 if (dev == sdev->dev) {
David Woodhouse0204a492015-10-13 17:18:10 +0100339 if (sdev->ops != ops) {
340 ret = -EBUSY;
341 goto out;
342 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100343 sdev->users++;
344 goto success;
345 }
346 }
347
348 break;
349 }
350 }
351
352 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
353 if (!sdev) {
354 ret = -ENOMEM;
355 goto out;
356 }
357 sdev->dev = dev;
358
359 ret = intel_iommu_enable_pasid(iommu, sdev);
360 if (ret || !pasid) {
361 /* If they don't actually want to assign a PASID, this is
362 * just an enabling check/preparation. */
363 kfree(sdev);
364 goto out;
365 }
366 /* Finish the setup now we know we're keeping it */
367 sdev->users = 1;
David Woodhouse0204a492015-10-13 17:18:10 +0100368 sdev->ops = ops;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100369 init_rcu_head(&sdev->rcu);
370
371 if (!svm) {
372 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
373 if (!svm) {
374 ret = -ENOMEM;
375 kfree(sdev);
376 goto out;
377 }
378 svm->iommu = iommu;
379
David Woodhouse91017042016-09-12 10:49:11 +0800380 if (pasid_max > iommu->pasid_max)
381 pasid_max = iommu->pasid_max;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100382
David Woodhouse5a10ba22015-10-24 21:06:39 +0200383 /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
384 ret = idr_alloc(&iommu->pasid_idr, svm,
385 !!cap_caching_mode(iommu->cap),
386 pasid_max - 1, GFP_KERNEL);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100387 if (ret < 0) {
388 kfree(svm);
389 goto out;
390 }
391 svm->pasid = ret;
392 svm->notifier.ops = &intel_mmuops;
David Woodhouse5cec7532015-10-15 15:52:15 +0100393 svm->mm = mm;
David Woodhouse569e4f72015-10-15 13:59:14 +0100394 svm->flags = flags;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100395 INIT_LIST_HEAD_RCU(&svm->devs);
396 ret = -ENOMEM;
David Woodhouse5cec7532015-10-15 15:52:15 +0100397 if (mm) {
398 ret = mmu_notifier_register(&svm->notifier, mm);
399 if (ret) {
400 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
401 kfree(svm);
402 kfree(sdev);
403 goto out;
404 }
405 iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
David Woodhouse5cec7532015-10-15 15:52:15 +0100406 } else
407 iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100408 wmb();
David Woodhouse5a10ba22015-10-24 21:06:39 +0200409 /* In caching mode, we still have to flush with PASID 0 when
410 * a PASID table entry becomes present. Not entirely clear
411 * *why* that would be the case — surely we could just issue
412 * a flush with the PASID value that we've changed? The PASID
413 * is the index into the table, after all. It's not like domain
414 * IDs in the case of the equivalent context-entry change in
415 * caching mode. And for that matter it's not entirely clear why
416 * a VMM would be in the business of caching the PASID table
417 * anyway. Surely that can be left entirely to the guest? */
418 if (cap_caching_mode(iommu->cap))
419 intel_flush_pasid_dev(svm, sdev, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100420 }
421 list_add_rcu(&sdev->list, &svm->devs);
422
423 success:
424 *pasid = svm->pasid;
425 ret = 0;
426 out:
427 mutex_unlock(&pasid_mutex);
David Woodhouse5cec7532015-10-15 15:52:15 +0100428 if (mm)
429 mmput(mm);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100430 return ret;
431}
432EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
433
434int intel_svm_unbind_mm(struct device *dev, int pasid)
435{
436 struct intel_svm_dev *sdev;
437 struct intel_iommu *iommu;
438 struct intel_svm *svm;
439 int ret = -EINVAL;
440
441 mutex_lock(&pasid_mutex);
442 iommu = intel_svm_device_to_iommu(dev);
443 if (!iommu || !iommu->pasid_table)
444 goto out;
445
446 svm = idr_find(&iommu->pasid_idr, pasid);
447 if (!svm)
448 goto out;
449
450 list_for_each_entry(sdev, &svm->devs, list) {
451 if (dev == sdev->dev) {
452 ret = 0;
453 sdev->users--;
454 if (!sdev->users) {
455 list_del_rcu(&sdev->list);
456 /* Flush the PASID cache and IOTLB for this device.
457 * Note that we do depend on the hardware *not* using
458 * the PASID any more. Just as we depend on other
459 * devices never using PASIDs that they have no right
460 * to use. We have a *shared* PASID table, because it's
461 * large and has to be physically contiguous. So it's
462 * hard to be as defensive as we might like. */
David Woodhouse5a10ba22015-10-24 21:06:39 +0200463 intel_flush_pasid_dev(svm, sdev, svm->pasid);
David Woodhousee0349922015-10-16 19:36:53 +0100464 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100465 kfree_rcu(sdev, rcu);
466
467 if (list_empty(&svm->devs)) {
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100468
469 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
David Woodhouse5cec7532015-10-15 15:52:15 +0100470 if (svm->mm)
David Woodhousee57e58b2016-01-12 19:18:06 +0000471 mmu_notifier_unregister(&svm->notifier, svm->mm);
472
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100473 /* We mandate that no page faults may be outstanding
474 * for the PASID when intel_svm_unbind_mm() is called.
475 * If that is not obeyed, subtle errors will happen.
476 * Let's make them less subtle... */
477 memset(svm, 0x6b, sizeof(*svm));
478 kfree(svm);
479 }
480 }
481 break;
482 }
483 }
484 out:
485 mutex_unlock(&pasid_mutex);
486
487 return ret;
488}
489EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
David Woodhousea222a7f2015-10-07 23:35:18 +0100490
491/* Page request queue descriptor */
492struct page_req_dsc {
493 u64 srr:1;
494 u64 bof:1;
495 u64 pasid_present:1;
496 u64 lpig:1;
497 u64 pasid:20;
498 u64 bus:8;
499 u64 private:23;
500 u64 prg_index:9;
501 u64 rd_req:1;
502 u64 wr_req:1;
503 u64 exe_req:1;
504 u64 priv_req:1;
505 u64 devfn:8;
506 u64 addr:52;
507};
508
509#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
Joerg Roedel7f8312a2015-11-17 16:11:39 +0100510
511static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
512{
513 unsigned long requested = 0;
514
515 if (req->exe_req)
516 requested |= VM_EXEC;
517
518 if (req->rd_req)
519 requested |= VM_READ;
520
521 if (req->wr_req)
522 requested |= VM_WRITE;
523
524 return (requested & ~vma->vm_flags) != 0;
525}
526
David Woodhousea222a7f2015-10-07 23:35:18 +0100527static irqreturn_t prq_event_thread(int irq, void *d)
528{
529 struct intel_iommu *iommu = d;
530 struct intel_svm *svm = NULL;
531 int head, tail, handled = 0;
532
David Woodhouse46924002016-02-15 12:42:38 +0000533 /* Clear PPR bit before reading head/tail registers, to
534 * ensure that we get a new interrupt if needed. */
535 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
536
David Woodhousea222a7f2015-10-07 23:35:18 +0100537 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
538 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
539 while (head != tail) {
David Woodhouse0204a492015-10-13 17:18:10 +0100540 struct intel_svm_dev *sdev;
David Woodhousea222a7f2015-10-07 23:35:18 +0100541 struct vm_area_struct *vma;
542 struct page_req_dsc *req;
543 struct qi_desc resp;
544 int ret, result;
545 u64 address;
546
547 handled = 1;
548
549 req = &iommu->prq[head / sizeof(*req)];
550
551 result = QI_RESP_FAILURE;
David Woodhouse7f92a2e2015-10-16 17:22:31 +0100552 address = (u64)req->addr << VTD_PAGE_SHIFT;
David Woodhousea222a7f2015-10-07 23:35:18 +0100553 if (!req->pasid_present) {
554 pr_err("%s: Page request without PASID: %08llx %08llx\n",
555 iommu->name, ((unsigned long long *)req)[0],
556 ((unsigned long long *)req)[1]);
557 goto bad_req;
558 }
559
560 if (!svm || svm->pasid != req->pasid) {
561 rcu_read_lock();
562 svm = idr_find(&iommu->pasid_idr, req->pasid);
563 /* It *can't* go away, because the driver is not permitted
564 * to unbind the mm while any page faults are outstanding.
565 * So we only need RCU to protect the internal idr code. */
566 rcu_read_unlock();
567
568 if (!svm) {
569 pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
570 iommu->name, req->pasid, ((unsigned long long *)req)[0],
571 ((unsigned long long *)req)[1]);
David Woodhouse26322ab2015-10-15 21:12:56 +0100572 goto no_pasid;
David Woodhousea222a7f2015-10-07 23:35:18 +0100573 }
574 }
575
576 result = QI_RESP_INVALID;
David Woodhouse5cec7532015-10-15 15:52:15 +0100577 /* Since we're using init_mm.pgd directly, we should never take
578 * any faults on kernel addresses. */
579 if (!svm->mm)
580 goto bad_req;
David Woodhousee57e58b2016-01-12 19:18:06 +0000581 /* If the mm is already defunct, don't handle faults. */
Vegard Nossum388f7932017-02-27 14:30:13 -0800582 if (!mmget_not_zero(svm->mm))
David Woodhousee57e58b2016-01-12 19:18:06 +0000583 goto bad_req;
David Woodhousea222a7f2015-10-07 23:35:18 +0100584 down_read(&svm->mm->mmap_sem);
585 vma = find_extend_vma(svm->mm, address);
586 if (!vma || address < vma->vm_start)
587 goto invalid;
588
Joerg Roedel7f8312a2015-11-17 16:11:39 +0100589 if (access_error(vma, req))
590 goto invalid;
591
Kirill A. Shutemovdcddffd2016-07-26 15:25:18 -0700592 ret = handle_mm_fault(vma, address,
David Woodhousea222a7f2015-10-07 23:35:18 +0100593 req->wr_req ? FAULT_FLAG_WRITE : 0);
594 if (ret & VM_FAULT_ERROR)
595 goto invalid;
596
597 result = QI_RESP_SUCCESS;
598 invalid:
599 up_read(&svm->mm->mmap_sem);
David Woodhousee57e58b2016-01-12 19:18:06 +0000600 mmput(svm->mm);
David Woodhousea222a7f2015-10-07 23:35:18 +0100601 bad_req:
602 /* Accounting for major/minor faults? */
David Woodhouse0204a492015-10-13 17:18:10 +0100603 rcu_read_lock();
604 list_for_each_entry_rcu(sdev, &svm->devs, list) {
Dan Carpenter3c7c2f32015-10-17 08:18:47 +0300605 if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
David Woodhouse0204a492015-10-13 17:18:10 +0100606 break;
607 }
608 /* Other devices can go away, but the drivers are not permitted
609 * to unbind while any page faults might be in flight. So it's
610 * OK to drop the 'lock' here now we have it. */
611 rcu_read_unlock();
612
613 if (WARN_ON(&sdev->list == &svm->devs))
614 sdev = NULL;
615
616 if (sdev && sdev->ops && sdev->ops->fault_cb) {
617 int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
David Woodhouse0bdec952015-10-28 15:14:09 +0900618 (req->exe_req << 1) | (req->priv_req);
David Woodhouse0204a492015-10-13 17:18:10 +0100619 sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
620 }
David Woodhouse26322ab2015-10-15 21:12:56 +0100621 /* We get here in the error case where the PASID lookup failed,
622 and these can be NULL. Do not use them below this point! */
623 sdev = NULL;
624 svm = NULL;
625 no_pasid:
David Woodhousea222a7f2015-10-07 23:35:18 +0100626 if (req->lpig) {
627 /* Page Group Response */
628 resp.low = QI_PGRP_PASID(req->pasid) |
629 QI_PGRP_DID((req->bus << 8) | req->devfn) |
630 QI_PGRP_PASID_P(req->pasid_present) |
631 QI_PGRP_RESP_TYPE;
632 resp.high = QI_PGRP_IDX(req->prg_index) |
633 QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
634
David Woodhouse26322ab2015-10-15 21:12:56 +0100635 qi_submit_sync(&resp, iommu);
David Woodhousea222a7f2015-10-07 23:35:18 +0100636 } else if (req->srr) {
637 /* Page Stream Response */
638 resp.low = QI_PSTRM_IDX(req->prg_index) |
639 QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
640 QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
641 resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
642 QI_PSTRM_RESP_CODE(result);
643
David Woodhouse26322ab2015-10-15 21:12:56 +0100644 qi_submit_sync(&resp, iommu);
David Woodhousea222a7f2015-10-07 23:35:18 +0100645 }
646
647 head = (head + sizeof(*req)) & PRQ_RING_MASK;
648 }
649
650 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
651
652 return IRQ_RETVAL(handled);
653}