blob: 7dc2f8d415b6921aafd1f07fb0fa785cb536e7a1 [file] [log] [blame]
David Woodhouse8a94ade2015-03-24 14:54:56 +00001/*
2 * Copyright © 2015 Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * Authors: David Woodhouse <dwmw2@infradead.org>
14 */
15
16#include <linux/intel-iommu.h>
David Woodhouse2f26e0a2015-09-09 11:40:47 +010017#include <linux/mmu_notifier.h>
18#include <linux/sched.h>
19#include <linux/slab.h>
20#include <linux/intel-svm.h>
21#include <linux/rculist.h>
22#include <linux/pci.h>
23#include <linux/pci-ats.h>
David Woodhousea222a7f2015-10-07 23:35:18 +010024#include <linux/dmar.h>
25#include <linux/interrupt.h>
26
27static irqreturn_t prq_event_thread(int irq, void *d);
David Woodhouse2f26e0a2015-09-09 11:40:47 +010028
29struct pasid_entry {
30 u64 val;
31};
David Woodhouse8a94ade2015-03-24 14:54:56 +000032
David Woodhouse907fea32015-10-13 14:11:13 +010033struct pasid_state_entry {
34 u64 val;
35};
36
David Woodhouse8a94ade2015-03-24 14:54:56 +000037int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu)
38{
39 struct page *pages;
40 int order;
41
David Woodhouse91017042016-09-12 10:49:11 +080042 /* Start at 2 because it's defined as 2^(1+PSS) */
43 iommu->pasid_max = 2 << ecap_pss(iommu->ecap);
David Woodhouse8a94ade2015-03-24 14:54:56 +000044
David Woodhouse91017042016-09-12 10:49:11 +080045 /* Eventually I'm promised we will get a multi-level PASID table
46 * and it won't have to be physically contiguous. Until then,
47 * limit the size because 8MiB contiguous allocations can be hard
48 * to come by. The limit of 0x20000, which is 1MiB for each of
49 * the PASID and PASID-state tables, is somewhat arbitrary. */
50 if (iommu->pasid_max > 0x20000)
51 iommu->pasid_max = 0x20000;
52
53 order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse8a94ade2015-03-24 14:54:56 +000054 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
55 if (!pages) {
56 pr_warn("IOMMU: %s: Failed to allocate PASID table\n",
57 iommu->name);
58 return -ENOMEM;
59 }
60 iommu->pasid_table = page_address(pages);
61 pr_info("%s: Allocated order %d PASID table.\n", iommu->name, order);
62
63 if (ecap_dis(iommu->ecap)) {
David Woodhouse91017042016-09-12 10:49:11 +080064 /* Just making it explicit... */
65 BUILD_BUG_ON(sizeof(struct pasid_entry) != sizeof(struct pasid_state_entry));
David Woodhouse8a94ade2015-03-24 14:54:56 +000066 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
67 if (pages)
68 iommu->pasid_state_table = page_address(pages);
69 else
70 pr_warn("IOMMU: %s: Failed to allocate PASID state table\n",
71 iommu->name);
72 }
73
David Woodhouse2f26e0a2015-09-09 11:40:47 +010074 idr_init(&iommu->pasid_idr);
75
David Woodhouse8a94ade2015-03-24 14:54:56 +000076 return 0;
77}
78
79int intel_svm_free_pasid_tables(struct intel_iommu *iommu)
80{
David Woodhouse91017042016-09-12 10:49:11 +080081 int order = get_order(sizeof(struct pasid_entry) * iommu->pasid_max);
David Woodhouse8a94ade2015-03-24 14:54:56 +000082
83 if (iommu->pasid_table) {
84 free_pages((unsigned long)iommu->pasid_table, order);
85 iommu->pasid_table = NULL;
86 }
87 if (iommu->pasid_state_table) {
88 free_pages((unsigned long)iommu->pasid_state_table, order);
89 iommu->pasid_state_table = NULL;
90 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +010091 idr_destroy(&iommu->pasid_idr);
David Woodhouse8a94ade2015-03-24 14:54:56 +000092 return 0;
93}
David Woodhouse2f26e0a2015-09-09 11:40:47 +010094
David Woodhousea222a7f2015-10-07 23:35:18 +010095#define PRQ_ORDER 0
96
97int intel_svm_enable_prq(struct intel_iommu *iommu)
98{
99 struct page *pages;
100 int irq, ret;
101
102 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER);
103 if (!pages) {
104 pr_warn("IOMMU: %s: Failed to allocate page request queue\n",
105 iommu->name);
106 return -ENOMEM;
107 }
108 iommu->prq = page_address(pages);
109
110 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu);
111 if (irq <= 0) {
112 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n",
113 iommu->name);
114 ret = -EINVAL;
115 err:
116 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
117 iommu->prq = NULL;
118 return ret;
119 }
120 iommu->pr_irq = irq;
121
122 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id);
123
124 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT,
125 iommu->prq_name, iommu);
126 if (ret) {
127 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n",
128 iommu->name);
129 dmar_free_hwirq(irq);
Jerry Snitselaar62088f52017-12-20 09:48:56 -0700130 iommu->pr_irq = 0;
David Woodhousea222a7f2015-10-07 23:35:18 +0100131 goto err;
132 }
133 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
134 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
135 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER);
136
137 return 0;
138}
139
140int intel_svm_finish_prq(struct intel_iommu *iommu)
141{
142 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL);
143 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL);
144 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL);
145
Jerry Snitselaar62088f52017-12-20 09:48:56 -0700146 if (iommu->pr_irq) {
147 free_irq(iommu->pr_irq, iommu);
148 dmar_free_hwirq(iommu->pr_irq);
149 iommu->pr_irq = 0;
150 }
David Woodhousea222a7f2015-10-07 23:35:18 +0100151
152 free_pages((unsigned long)iommu->prq, PRQ_ORDER);
153 iommu->prq = NULL;
154
155 return 0;
156}
157
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100158static void intel_flush_svm_range_dev (struct intel_svm *svm, struct intel_svm_dev *sdev,
David Woodhouse5d52f482015-10-20 15:52:13 +0100159 unsigned long address, unsigned long pages, int ih, int gl)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100160{
161 struct qi_desc desc;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100162
David Woodhouse5d52f482015-10-20 15:52:13 +0100163 if (pages == -1) {
David Woodhousee0349922015-10-16 19:36:53 +0100164 /* For global kernel pages we have to flush them in *all* PASIDs
165 * because that's the only option the hardware gives us. Despite
166 * the fact that they are actually only accessible through one. */
167 if (gl)
168 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
169 QI_EIOTLB_GRAN(QI_GRAN_ALL_ALL) | QI_EIOTLB_TYPE;
170 else
171 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
172 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100173 desc.high = 0;
174 } else {
David Woodhouse5d52f482015-10-20 15:52:13 +0100175 int mask = ilog2(__roundup_pow_of_two(pages));
176
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100177 desc.low = QI_EIOTLB_PASID(svm->pasid) | QI_EIOTLB_DID(sdev->did) |
178 QI_EIOTLB_GRAN(QI_GRAN_PSI_PASID) | QI_EIOTLB_TYPE;
David Woodhousee0349922015-10-16 19:36:53 +0100179 desc.high = QI_EIOTLB_ADDR(address) | QI_EIOTLB_GL(gl) |
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100180 QI_EIOTLB_IH(ih) | QI_EIOTLB_AM(mask);
181 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100182 qi_submit_sync(&desc, svm->iommu);
183
184 if (sdev->dev_iotlb) {
185 desc.low = QI_DEV_EIOTLB_PASID(svm->pasid) | QI_DEV_EIOTLB_SID(sdev->sid) |
186 QI_DEV_EIOTLB_QDEP(sdev->qdep) | QI_DEIOTLB_TYPE;
David Woodhouse5d52f482015-10-20 15:52:13 +0100187 if (pages == -1) {
188 desc.high = QI_DEV_EIOTLB_ADDR(-1ULL >> 1) | QI_DEV_EIOTLB_SIZE;
189 } else if (pages > 1) {
190 /* The least significant zero bit indicates the size. So,
191 * for example, an "address" value of 0x12345f000 will
192 * flush from 0x123440000 to 0x12347ffff (256KiB). */
193 unsigned long last = address + ((unsigned long)(pages - 1) << VTD_PAGE_SHIFT);
194 unsigned long mask = __rounddown_pow_of_two(address ^ last);;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100195
David Woodhouse5d52f482015-10-20 15:52:13 +0100196 desc.high = QI_DEV_EIOTLB_ADDR((address & ~mask) | (mask - 1)) | QI_DEV_EIOTLB_SIZE;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100197 } else {
198 desc.high = QI_DEV_EIOTLB_ADDR(address);
199 }
200 qi_submit_sync(&desc, svm->iommu);
201 }
202}
203
204static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address,
David Woodhouse5d52f482015-10-20 15:52:13 +0100205 unsigned long pages, int ih, int gl)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100206{
207 struct intel_svm_dev *sdev;
208
David Woodhouse907fea32015-10-13 14:11:13 +0100209 /* Try deferred invalidate if available */
210 if (svm->iommu->pasid_state_table &&
211 !cmpxchg64(&svm->iommu->pasid_state_table[svm->pasid].val, 0, 1ULL << 63))
212 return;
213
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100214 rcu_read_lock();
215 list_for_each_entry_rcu(sdev, &svm->devs, list)
David Woodhousee0349922015-10-16 19:36:53 +0100216 intel_flush_svm_range_dev(svm, sdev, address, pages, ih, gl);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100217 rcu_read_unlock();
218}
219
220static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
221 unsigned long address, pte_t pte)
222{
223 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
224
David Woodhousee0349922015-10-16 19:36:53 +0100225 intel_flush_svm_range(svm, address, 1, 1, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100226}
227
228static void intel_invalidate_page(struct mmu_notifier *mn, struct mm_struct *mm,
229 unsigned long address)
230{
231 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
232
David Woodhousee0349922015-10-16 19:36:53 +0100233 intel_flush_svm_range(svm, address, 1, 1, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100234}
235
236/* Pages have been freed at this point */
237static void intel_invalidate_range(struct mmu_notifier *mn,
238 struct mm_struct *mm,
239 unsigned long start, unsigned long end)
240{
241 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
242
243 intel_flush_svm_range(svm, start,
David Woodhousee0349922015-10-16 19:36:53 +0100244 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100245}
246
247
David Woodhouse5a10ba22015-10-24 21:06:39 +0200248static void intel_flush_pasid_dev(struct intel_svm *svm, struct intel_svm_dev *sdev, int pasid)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100249{
250 struct qi_desc desc;
251
252 desc.high = 0;
David Woodhouse5a10ba22015-10-24 21:06:39 +0200253 desc.low = QI_PC_TYPE | QI_PC_DID(sdev->did) | QI_PC_PASID_SEL | QI_PC_PASID(pasid);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100254
255 qi_submit_sync(&desc, svm->iommu);
256}
257
258static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
259{
260 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier);
David Woodhousee57e58b2016-01-12 19:18:06 +0000261 struct intel_svm_dev *sdev;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100262
David Woodhousee57e58b2016-01-12 19:18:06 +0000263 /* This might end up being called from exit_mmap(), *before* the page
264 * tables are cleared. And __mmu_notifier_release() will delete us from
265 * the list of notifiers so that our invalidate_range() callback doesn't
266 * get called when the page tables are cleared. So we need to protect
267 * against hardware accessing those page tables.
268 *
269 * We do it by clearing the entry in the PASID table and then flushing
270 * the IOTLB and the PASID table caches. This might upset hardware;
271 * perhaps we'll want to point the PASID to a dummy PGD (like the zero
272 * page) so that we end up taking a fault that the hardware really
273 * *has* to handle gracefully without affecting other processes.
274 */
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100275 svm->iommu->pasid_table[svm->pasid].val = 0;
David Woodhousee57e58b2016-01-12 19:18:06 +0000276 wmb();
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100277
David Woodhousee57e58b2016-01-12 19:18:06 +0000278 rcu_read_lock();
279 list_for_each_entry_rcu(sdev, &svm->devs, list) {
280 intel_flush_pasid_dev(svm, sdev, svm->pasid);
281 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
282 }
283 rcu_read_unlock();
284
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100285}
286
287static const struct mmu_notifier_ops intel_mmuops = {
288 .release = intel_mm_release,
289 .change_pte = intel_change_pte,
290 .invalidate_page = intel_invalidate_page,
291 .invalidate_range = intel_invalidate_range,
292};
293
294static DEFINE_MUTEX(pasid_mutex);
295
David Woodhouse0204a492015-10-13 17:18:10 +0100296int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ops *ops)
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100297{
298 struct intel_iommu *iommu = intel_svm_device_to_iommu(dev);
299 struct intel_svm_dev *sdev;
300 struct intel_svm *svm = NULL;
David Woodhouse5cec7532015-10-15 15:52:15 +0100301 struct mm_struct *mm = NULL;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100302 int pasid_max;
303 int ret;
304
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100305 if (WARN_ON(!iommu))
306 return -EINVAL;
307
308 if (dev_is_pci(dev)) {
309 pasid_max = pci_max_pasids(to_pci_dev(dev));
310 if (pasid_max < 0)
311 return -EINVAL;
312 } else
313 pasid_max = 1 << 20;
314
David Woodhouse5cec7532015-10-15 15:52:15 +0100315 if ((flags & SVM_FLAG_SUPERVISOR_MODE)) {
316 if (!ecap_srs(iommu->ecap))
317 return -EINVAL;
318 } else if (pasid) {
319 mm = get_task_mm(current);
320 BUG_ON(!mm);
321 }
322
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100323 mutex_lock(&pasid_mutex);
David Woodhouse569e4f72015-10-15 13:59:14 +0100324 if (pasid && !(flags & SVM_FLAG_PRIVATE_PASID)) {
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100325 int i;
326
327 idr_for_each_entry(&iommu->pasid_idr, svm, i) {
David Woodhouse5cec7532015-10-15 15:52:15 +0100328 if (svm->mm != mm ||
David Woodhouse569e4f72015-10-15 13:59:14 +0100329 (svm->flags & SVM_FLAG_PRIVATE_PASID))
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100330 continue;
331
332 if (svm->pasid >= pasid_max) {
333 dev_warn(dev,
334 "Limited PASID width. Cannot use existing PASID %d\n",
335 svm->pasid);
336 ret = -ENOSPC;
337 goto out;
338 }
339
340 list_for_each_entry(sdev, &svm->devs, list) {
341 if (dev == sdev->dev) {
David Woodhouse0204a492015-10-13 17:18:10 +0100342 if (sdev->ops != ops) {
343 ret = -EBUSY;
344 goto out;
345 }
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100346 sdev->users++;
347 goto success;
348 }
349 }
350
351 break;
352 }
353 }
354
355 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL);
356 if (!sdev) {
357 ret = -ENOMEM;
358 goto out;
359 }
360 sdev->dev = dev;
361
362 ret = intel_iommu_enable_pasid(iommu, sdev);
363 if (ret || !pasid) {
364 /* If they don't actually want to assign a PASID, this is
365 * just an enabling check/preparation. */
366 kfree(sdev);
367 goto out;
368 }
369 /* Finish the setup now we know we're keeping it */
370 sdev->users = 1;
David Woodhouse0204a492015-10-13 17:18:10 +0100371 sdev->ops = ops;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100372 init_rcu_head(&sdev->rcu);
373
374 if (!svm) {
375 svm = kzalloc(sizeof(*svm), GFP_KERNEL);
376 if (!svm) {
377 ret = -ENOMEM;
378 kfree(sdev);
379 goto out;
380 }
381 svm->iommu = iommu;
382
David Woodhouse91017042016-09-12 10:49:11 +0800383 if (pasid_max > iommu->pasid_max)
384 pasid_max = iommu->pasid_max;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100385
David Woodhouse5a10ba22015-10-24 21:06:39 +0200386 /* Do not use PASID 0 in caching mode (virtualised IOMMU) */
387 ret = idr_alloc(&iommu->pasid_idr, svm,
388 !!cap_caching_mode(iommu->cap),
389 pasid_max - 1, GFP_KERNEL);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100390 if (ret < 0) {
391 kfree(svm);
Lu Baolu94911a02018-02-24 13:42:27 +0800392 kfree(sdev);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100393 goto out;
394 }
395 svm->pasid = ret;
396 svm->notifier.ops = &intel_mmuops;
David Woodhouse5cec7532015-10-15 15:52:15 +0100397 svm->mm = mm;
David Woodhouse569e4f72015-10-15 13:59:14 +0100398 svm->flags = flags;
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100399 INIT_LIST_HEAD_RCU(&svm->devs);
400 ret = -ENOMEM;
David Woodhouse5cec7532015-10-15 15:52:15 +0100401 if (mm) {
402 ret = mmu_notifier_register(&svm->notifier, mm);
403 if (ret) {
404 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
405 kfree(svm);
406 kfree(sdev);
407 goto out;
408 }
409 iommu->pasid_table[svm->pasid].val = (u64)__pa(mm->pgd) | 1;
David Woodhouse5cec7532015-10-15 15:52:15 +0100410 } else
411 iommu->pasid_table[svm->pasid].val = (u64)__pa(init_mm.pgd) | 1 | (1ULL << 11);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100412 wmb();
David Woodhouse5a10ba22015-10-24 21:06:39 +0200413 /* In caching mode, we still have to flush with PASID 0 when
414 * a PASID table entry becomes present. Not entirely clear
415 * *why* that would be the case — surely we could just issue
416 * a flush with the PASID value that we've changed? The PASID
417 * is the index into the table, after all. It's not like domain
418 * IDs in the case of the equivalent context-entry change in
419 * caching mode. And for that matter it's not entirely clear why
420 * a VMM would be in the business of caching the PASID table
421 * anyway. Surely that can be left entirely to the guest? */
422 if (cap_caching_mode(iommu->cap))
423 intel_flush_pasid_dev(svm, sdev, 0);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100424 }
425 list_add_rcu(&sdev->list, &svm->devs);
426
427 success:
428 *pasid = svm->pasid;
429 ret = 0;
430 out:
431 mutex_unlock(&pasid_mutex);
David Woodhouse5cec7532015-10-15 15:52:15 +0100432 if (mm)
433 mmput(mm);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100434 return ret;
435}
436EXPORT_SYMBOL_GPL(intel_svm_bind_mm);
437
438int intel_svm_unbind_mm(struct device *dev, int pasid)
439{
440 struct intel_svm_dev *sdev;
441 struct intel_iommu *iommu;
442 struct intel_svm *svm;
443 int ret = -EINVAL;
444
445 mutex_lock(&pasid_mutex);
446 iommu = intel_svm_device_to_iommu(dev);
447 if (!iommu || !iommu->pasid_table)
448 goto out;
449
450 svm = idr_find(&iommu->pasid_idr, pasid);
451 if (!svm)
452 goto out;
453
454 list_for_each_entry(sdev, &svm->devs, list) {
455 if (dev == sdev->dev) {
456 ret = 0;
457 sdev->users--;
458 if (!sdev->users) {
459 list_del_rcu(&sdev->list);
460 /* Flush the PASID cache and IOTLB for this device.
461 * Note that we do depend on the hardware *not* using
462 * the PASID any more. Just as we depend on other
463 * devices never using PASIDs that they have no right
464 * to use. We have a *shared* PASID table, because it's
465 * large and has to be physically contiguous. So it's
466 * hard to be as defensive as we might like. */
David Woodhouse5a10ba22015-10-24 21:06:39 +0200467 intel_flush_pasid_dev(svm, sdev, svm->pasid);
David Woodhousee0349922015-10-16 19:36:53 +0100468 intel_flush_svm_range_dev(svm, sdev, 0, -1, 0, !svm->mm);
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100469 kfree_rcu(sdev, rcu);
470
471 if (list_empty(&svm->devs)) {
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100472
473 idr_remove(&svm->iommu->pasid_idr, svm->pasid);
David Woodhouse5cec7532015-10-15 15:52:15 +0100474 if (svm->mm)
David Woodhousee57e58b2016-01-12 19:18:06 +0000475 mmu_notifier_unregister(&svm->notifier, svm->mm);
476
David Woodhouse2f26e0a2015-09-09 11:40:47 +0100477 /* We mandate that no page faults may be outstanding
478 * for the PASID when intel_svm_unbind_mm() is called.
479 * If that is not obeyed, subtle errors will happen.
480 * Let's make them less subtle... */
481 memset(svm, 0x6b, sizeof(*svm));
482 kfree(svm);
483 }
484 }
485 break;
486 }
487 }
488 out:
489 mutex_unlock(&pasid_mutex);
490
491 return ret;
492}
493EXPORT_SYMBOL_GPL(intel_svm_unbind_mm);
David Woodhousea222a7f2015-10-07 23:35:18 +0100494
495/* Page request queue descriptor */
496struct page_req_dsc {
497 u64 srr:1;
498 u64 bof:1;
499 u64 pasid_present:1;
500 u64 lpig:1;
501 u64 pasid:20;
502 u64 bus:8;
503 u64 private:23;
504 u64 prg_index:9;
505 u64 rd_req:1;
506 u64 wr_req:1;
507 u64 exe_req:1;
508 u64 priv_req:1;
509 u64 devfn:8;
510 u64 addr:52;
511};
512
513#define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10)
Joerg Roedel7f8312a2015-11-17 16:11:39 +0100514
515static bool access_error(struct vm_area_struct *vma, struct page_req_dsc *req)
516{
517 unsigned long requested = 0;
518
519 if (req->exe_req)
520 requested |= VM_EXEC;
521
522 if (req->rd_req)
523 requested |= VM_READ;
524
525 if (req->wr_req)
526 requested |= VM_WRITE;
527
528 return (requested & ~vma->vm_flags) != 0;
529}
530
David Woodhousea222a7f2015-10-07 23:35:18 +0100531static irqreturn_t prq_event_thread(int irq, void *d)
532{
533 struct intel_iommu *iommu = d;
534 struct intel_svm *svm = NULL;
535 int head, tail, handled = 0;
536
David Woodhouse46924002016-02-15 12:42:38 +0000537 /* Clear PPR bit before reading head/tail registers, to
538 * ensure that we get a new interrupt if needed. */
539 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG);
540
David Woodhousea222a7f2015-10-07 23:35:18 +0100541 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK;
542 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK;
543 while (head != tail) {
David Woodhouse0204a492015-10-13 17:18:10 +0100544 struct intel_svm_dev *sdev;
David Woodhousea222a7f2015-10-07 23:35:18 +0100545 struct vm_area_struct *vma;
546 struct page_req_dsc *req;
547 struct qi_desc resp;
548 int ret, result;
549 u64 address;
550
551 handled = 1;
552
553 req = &iommu->prq[head / sizeof(*req)];
554
555 result = QI_RESP_FAILURE;
David Woodhouse7f92a2e2015-10-16 17:22:31 +0100556 address = (u64)req->addr << VTD_PAGE_SHIFT;
David Woodhousea222a7f2015-10-07 23:35:18 +0100557 if (!req->pasid_present) {
558 pr_err("%s: Page request without PASID: %08llx %08llx\n",
559 iommu->name, ((unsigned long long *)req)[0],
560 ((unsigned long long *)req)[1]);
Lu Baolu19c60742018-11-05 10:18:58 +0800561 goto no_pasid;
David Woodhousea222a7f2015-10-07 23:35:18 +0100562 }
563
564 if (!svm || svm->pasid != req->pasid) {
565 rcu_read_lock();
566 svm = idr_find(&iommu->pasid_idr, req->pasid);
567 /* It *can't* go away, because the driver is not permitted
568 * to unbind the mm while any page faults are outstanding.
569 * So we only need RCU to protect the internal idr code. */
570 rcu_read_unlock();
571
572 if (!svm) {
573 pr_err("%s: Page request for invalid PASID %d: %08llx %08llx\n",
574 iommu->name, req->pasid, ((unsigned long long *)req)[0],
575 ((unsigned long long *)req)[1]);
David Woodhouse26322ab2015-10-15 21:12:56 +0100576 goto no_pasid;
David Woodhousea222a7f2015-10-07 23:35:18 +0100577 }
578 }
579
580 result = QI_RESP_INVALID;
David Woodhouse5cec7532015-10-15 15:52:15 +0100581 /* Since we're using init_mm.pgd directly, we should never take
582 * any faults on kernel addresses. */
583 if (!svm->mm)
584 goto bad_req;
David Woodhousee57e58b2016-01-12 19:18:06 +0000585 /* If the mm is already defunct, don't handle faults. */
586 if (!atomic_inc_not_zero(&svm->mm->mm_users))
587 goto bad_req;
David Woodhousea222a7f2015-10-07 23:35:18 +0100588 down_read(&svm->mm->mmap_sem);
589 vma = find_extend_vma(svm->mm, address);
590 if (!vma || address < vma->vm_start)
591 goto invalid;
592
Joerg Roedel7f8312a2015-11-17 16:11:39 +0100593 if (access_error(vma, req))
594 goto invalid;
595
Kirill A. Shutemovdcddffd2016-07-26 15:25:18 -0700596 ret = handle_mm_fault(vma, address,
David Woodhousea222a7f2015-10-07 23:35:18 +0100597 req->wr_req ? FAULT_FLAG_WRITE : 0);
598 if (ret & VM_FAULT_ERROR)
599 goto invalid;
600
601 result = QI_RESP_SUCCESS;
602 invalid:
603 up_read(&svm->mm->mmap_sem);
David Woodhousee57e58b2016-01-12 19:18:06 +0000604 mmput(svm->mm);
David Woodhousea222a7f2015-10-07 23:35:18 +0100605 bad_req:
606 /* Accounting for major/minor faults? */
David Woodhouse0204a492015-10-13 17:18:10 +0100607 rcu_read_lock();
608 list_for_each_entry_rcu(sdev, &svm->devs, list) {
Dan Carpenter3c7c2f32015-10-17 08:18:47 +0300609 if (sdev->sid == PCI_DEVID(req->bus, req->devfn))
David Woodhouse0204a492015-10-13 17:18:10 +0100610 break;
611 }
612 /* Other devices can go away, but the drivers are not permitted
613 * to unbind while any page faults might be in flight. So it's
614 * OK to drop the 'lock' here now we have it. */
615 rcu_read_unlock();
616
617 if (WARN_ON(&sdev->list == &svm->devs))
618 sdev = NULL;
619
620 if (sdev && sdev->ops && sdev->ops->fault_cb) {
621 int rwxp = (req->rd_req << 3) | (req->wr_req << 2) |
David Woodhouse0bdec952015-10-28 15:14:09 +0900622 (req->exe_req << 1) | (req->priv_req);
David Woodhouse0204a492015-10-13 17:18:10 +0100623 sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result);
624 }
David Woodhouse26322ab2015-10-15 21:12:56 +0100625 /* We get here in the error case where the PASID lookup failed,
626 and these can be NULL. Do not use them below this point! */
627 sdev = NULL;
628 svm = NULL;
629 no_pasid:
David Woodhousea222a7f2015-10-07 23:35:18 +0100630 if (req->lpig) {
631 /* Page Group Response */
632 resp.low = QI_PGRP_PASID(req->pasid) |
633 QI_PGRP_DID((req->bus << 8) | req->devfn) |
634 QI_PGRP_PASID_P(req->pasid_present) |
635 QI_PGRP_RESP_TYPE;
636 resp.high = QI_PGRP_IDX(req->prg_index) |
637 QI_PGRP_PRIV(req->private) | QI_PGRP_RESP_CODE(result);
638
David Woodhouse26322ab2015-10-15 21:12:56 +0100639 qi_submit_sync(&resp, iommu);
David Woodhousea222a7f2015-10-07 23:35:18 +0100640 } else if (req->srr) {
641 /* Page Stream Response */
642 resp.low = QI_PSTRM_IDX(req->prg_index) |
643 QI_PSTRM_PRIV(req->private) | QI_PSTRM_BUS(req->bus) |
644 QI_PSTRM_PASID(req->pasid) | QI_PSTRM_RESP_TYPE;
645 resp.high = QI_PSTRM_ADDR(address) | QI_PSTRM_DEVFN(req->devfn) |
646 QI_PSTRM_RESP_CODE(result);
647
David Woodhouse26322ab2015-10-15 21:12:56 +0100648 qi_submit_sync(&resp, iommu);
David Woodhousea222a7f2015-10-07 23:35:18 +0100649 }
650
651 head = (head + sizeof(*req)) & PRQ_RING_MASK;
652 }
653
654 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail);
655
656 return IRQ_RETVAL(handled);
657}