blob: f1380269cabd7c0cfb7b4cddcf8f3f0452884e9a [file] [log] [blame]
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
mark gross98bcef52008-02-23 15:23:35 -080017 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Fenghua Yu5b6985c2008-10-16 18:02:32 -070021 * Author: Fenghua Yu <fenghua.yu@intel.com>
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070022 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
mark gross5e0d2a62008-03-04 15:22:08 -080026#include <linux/debugfs.h>
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070027#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070030#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
mark gross5e0d2a62008-03-04 15:22:08 -080035#include <linux/timer.h>
Kay, Allen M38717942008-09-09 18:37:29 +030036#include <linux/iova.h>
37#include <linux/intel-iommu.h>
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070038#include <asm/cacheflush.h>
FUJITA Tomonori46a7fa22008-07-11 10:23:42 +090039#include <asm/iommu.h>
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070040#include "pci.h"
41
Fenghua Yu5b6985c2008-10-16 18:02:32 -070042#define ROOT_SIZE VTD_PAGE_SIZE
43#define CONTEXT_SIZE VTD_PAGE_SIZE
44
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070045#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
46#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
47
48#define IOAPIC_RANGE_START (0xfee00000)
49#define IOAPIC_RANGE_END (0xfeefffff)
50#define IOVA_START_ADDR (0x1000)
51
52#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
53
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070054#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55
Mark McLoughlinf27be032008-11-20 15:49:43 +000056#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
57#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
58#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
mark gross5e0d2a62008-03-04 15:22:08 -080059
Weidong Hand9630fe2008-12-08 11:06:32 +080060/* global iommu list, set NULL for ignored DMAR units */
61static struct intel_iommu **g_iommus;
62
Mark McLoughlin46b08e12008-11-20 15:49:44 +000063/*
64 * 0: Present
65 * 1-11: Reserved
66 * 12-63: Context Ptr (12 - (haw-1))
67 * 64-127: Reserved
68 */
69struct root_entry {
70 u64 val;
71 u64 rsvd1;
72};
73#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
74static inline bool root_present(struct root_entry *root)
75{
76 return (root->val & 1);
77}
78static inline void set_root_present(struct root_entry *root)
79{
80 root->val |= 1;
81}
82static inline void set_root_value(struct root_entry *root, unsigned long value)
83{
84 root->val |= value & VTD_PAGE_MASK;
85}
86
87static inline struct context_entry *
88get_context_addr_from_root(struct root_entry *root)
89{
90 return (struct context_entry *)
91 (root_present(root)?phys_to_virt(
92 root->val & VTD_PAGE_MASK) :
93 NULL);
94}
95
Mark McLoughlin7a8fc252008-11-20 15:49:45 +000096/*
97 * low 64 bits:
98 * 0: present
99 * 1: fault processing disable
100 * 2-3: translation type
101 * 12-63: address space root
102 * high 64 bits:
103 * 0-2: address width
104 * 3-6: aval
105 * 8-23: domain id
106 */
107struct context_entry {
108 u64 lo;
109 u64 hi;
110};
Mark McLoughlin7a8fc252008-11-20 15:49:45 +0000111
Mark McLoughlinc07e7d22008-11-21 16:54:46 +0000112static inline bool context_present(struct context_entry *context)
113{
114 return (context->lo & 1);
115}
116static inline void context_set_present(struct context_entry *context)
117{
118 context->lo |= 1;
119}
120
121static inline void context_set_fault_enable(struct context_entry *context)
122{
123 context->lo &= (((u64)-1) << 2) | 1;
124}
125
Mark McLoughlin7a8fc252008-11-20 15:49:45 +0000126#define CONTEXT_TT_MULTI_LEVEL 0
Mark McLoughlinc07e7d22008-11-21 16:54:46 +0000127
128static inline void context_set_translation_type(struct context_entry *context,
129 unsigned long value)
130{
131 context->lo &= (((u64)-1) << 4) | 3;
132 context->lo |= (value & 3) << 2;
133}
134
135static inline void context_set_address_root(struct context_entry *context,
136 unsigned long value)
137{
138 context->lo |= value & VTD_PAGE_MASK;
139}
140
141static inline void context_set_address_width(struct context_entry *context,
142 unsigned long value)
143{
144 context->hi |= value & 7;
145}
146
147static inline void context_set_domain_id(struct context_entry *context,
148 unsigned long value)
149{
150 context->hi |= (value & ((1 << 16) - 1)) << 8;
151}
152
153static inline void context_clear_entry(struct context_entry *context)
154{
155 context->lo = 0;
156 context->hi = 0;
157}
Mark McLoughlin7a8fc252008-11-20 15:49:45 +0000158
Mark McLoughlin622ba122008-11-20 15:49:46 +0000159/*
160 * 0: readable
161 * 1: writable
162 * 2-6: reserved
163 * 7: super page
164 * 8-11: available
165 * 12-63: Host physcial address
166 */
167struct dma_pte {
168 u64 val;
169};
Mark McLoughlin622ba122008-11-20 15:49:46 +0000170
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000171static inline void dma_clear_pte(struct dma_pte *pte)
172{
173 pte->val = 0;
174}
175
176static inline void dma_set_pte_readable(struct dma_pte *pte)
177{
178 pte->val |= DMA_PTE_READ;
179}
180
181static inline void dma_set_pte_writable(struct dma_pte *pte)
182{
183 pte->val |= DMA_PTE_WRITE;
184}
185
186static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
187{
188 pte->val = (pte->val & ~3) | (prot & 3);
189}
190
191static inline u64 dma_pte_addr(struct dma_pte *pte)
192{
193 return (pte->val & VTD_PAGE_MASK);
194}
195
196static inline void dma_set_pte_addr(struct dma_pte *pte, u64 addr)
197{
198 pte->val |= (addr & VTD_PAGE_MASK);
199}
200
201static inline bool dma_pte_present(struct dma_pte *pte)
202{
203 return (pte->val & 3) != 0;
204}
Mark McLoughlin622ba122008-11-20 15:49:46 +0000205
Weidong Han3b5410e2008-12-08 09:17:15 +0800206/* devices under the same p2p bridge are owned in one domain */
207#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 < 0)
208
Weidong Han1ce28fe2008-12-08 16:35:39 +0800209/* domain represents a virtual machine, more than one devices
210 * across iommus may be owned in one domain, e.g. kvm guest.
211 */
212#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
213
Mark McLoughlin99126f72008-11-20 15:49:47 +0000214struct dmar_domain {
215 int id; /* domain id */
Weidong Han8c11e792008-12-08 15:29:22 +0800216 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
Mark McLoughlin99126f72008-11-20 15:49:47 +0000217
218 struct list_head devices; /* all devices' list */
219 struct iova_domain iovad; /* iova's that belong to this domain */
220
221 struct dma_pte *pgd; /* virtual address */
222 spinlock_t mapping_lock; /* page table lock */
223 int gaw; /* max guest address width */
224
225 /* adjusted guest address width, 0 is level 2 30-bit */
226 int agaw;
227
Weidong Han3b5410e2008-12-08 09:17:15 +0800228 int flags; /* flags to find out type of domain */
Weidong Han8e6040972008-12-08 15:49:06 +0800229
230 int iommu_coherency;/* indicate coherency of iommu access */
Weidong Hanc7151a82008-12-08 22:51:37 +0800231 int iommu_count; /* reference count of iommu */
232 spinlock_t iommu_lock; /* protect iommu set in domain */
Mark McLoughlin99126f72008-11-20 15:49:47 +0000233};
234
Mark McLoughlina647dac2008-11-20 15:49:48 +0000235/* PCI domain-device relationship */
236struct device_domain_info {
237 struct list_head link; /* link to domain siblings */
238 struct list_head global; /* link to global list */
239 u8 bus; /* PCI bus numer */
240 u8 devfn; /* PCI devfn number */
241 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
242 struct dmar_domain *domain; /* pointer to domain */
243};
244
mark gross5e0d2a62008-03-04 15:22:08 -0800245static void flush_unmaps_timeout(unsigned long data);
246
247DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
248
mark gross80b20dd2008-04-18 13:53:58 -0700249#define HIGH_WATER_MARK 250
250struct deferred_flush_tables {
251 int next;
252 struct iova *iova[HIGH_WATER_MARK];
253 struct dmar_domain *domain[HIGH_WATER_MARK];
254};
255
256static struct deferred_flush_tables *deferred_flush;
257
mark gross5e0d2a62008-03-04 15:22:08 -0800258/* bitmap for indexing intel_iommus */
mark gross5e0d2a62008-03-04 15:22:08 -0800259static int g_num_of_iommus;
260
261static DEFINE_SPINLOCK(async_umap_flush_lock);
262static LIST_HEAD(unmaps_to_do);
263
264static int timer_on;
265static long list_size;
mark gross5e0d2a62008-03-04 15:22:08 -0800266
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700267static void domain_remove_dev_info(struct dmar_domain *domain);
268
Suresh Siddha2ae21012008-07-10 11:16:43 -0700269int dmar_disabled;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700270static int __initdata dmar_map_gfx = 1;
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -0700271static int dmar_forcedac;
mark gross5e0d2a62008-03-04 15:22:08 -0800272static int intel_iommu_strict;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700273
274#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
275static DEFINE_SPINLOCK(device_domain_lock);
276static LIST_HEAD(device_domain_list);
277
278static int __init intel_iommu_setup(char *str)
279{
280 if (!str)
281 return -EINVAL;
282 while (*str) {
283 if (!strncmp(str, "off", 3)) {
284 dmar_disabled = 1;
285 printk(KERN_INFO"Intel-IOMMU: disabled\n");
286 } else if (!strncmp(str, "igfx_off", 8)) {
287 dmar_map_gfx = 0;
288 printk(KERN_INFO
289 "Intel-IOMMU: disable GFX device mapping\n");
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -0700290 } else if (!strncmp(str, "forcedac", 8)) {
mark gross5e0d2a62008-03-04 15:22:08 -0800291 printk(KERN_INFO
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -0700292 "Intel-IOMMU: Forcing DAC for PCI devices\n");
293 dmar_forcedac = 1;
mark gross5e0d2a62008-03-04 15:22:08 -0800294 } else if (!strncmp(str, "strict", 6)) {
295 printk(KERN_INFO
296 "Intel-IOMMU: disable batched IOTLB flush\n");
297 intel_iommu_strict = 1;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700298 }
299
300 str += strcspn(str, ",");
301 while (*str == ',')
302 str++;
303 }
304 return 0;
305}
306__setup("intel_iommu=", intel_iommu_setup);
307
308static struct kmem_cache *iommu_domain_cache;
309static struct kmem_cache *iommu_devinfo_cache;
310static struct kmem_cache *iommu_iova_cache;
311
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700312static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
313{
314 unsigned int flags;
315 void *vaddr;
316
317 /* trying to avoid low memory issues */
318 flags = current->flags & PF_MEMALLOC;
319 current->flags |= PF_MEMALLOC;
320 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
321 current->flags &= (~PF_MEMALLOC | flags);
322 return vaddr;
323}
324
325
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700326static inline void *alloc_pgtable_page(void)
327{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700328 unsigned int flags;
329 void *vaddr;
330
331 /* trying to avoid low memory issues */
332 flags = current->flags & PF_MEMALLOC;
333 current->flags |= PF_MEMALLOC;
334 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
335 current->flags &= (~PF_MEMALLOC | flags);
336 return vaddr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700337}
338
339static inline void free_pgtable_page(void *vaddr)
340{
341 free_page((unsigned long)vaddr);
342}
343
344static inline void *alloc_domain_mem(void)
345{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700346 return iommu_kmem_cache_alloc(iommu_domain_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700347}
348
Kay, Allen M38717942008-09-09 18:37:29 +0300349static void free_domain_mem(void *vaddr)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700350{
351 kmem_cache_free(iommu_domain_cache, vaddr);
352}
353
354static inline void * alloc_devinfo_mem(void)
355{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700356 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700357}
358
359static inline void free_devinfo_mem(void *vaddr)
360{
361 kmem_cache_free(iommu_devinfo_cache, vaddr);
362}
363
364struct iova *alloc_iova_mem(void)
365{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700366 return iommu_kmem_cache_alloc(iommu_iova_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700367}
368
369void free_iova_mem(struct iova *iova)
370{
371 kmem_cache_free(iommu_iova_cache, iova);
372}
373
Weidong Han1b573682008-12-08 15:34:06 +0800374
375static inline int width_to_agaw(int width);
376
377/* calculate agaw for each iommu.
378 * "SAGAW" may be different across iommus, use a default agaw, and
379 * get a supported less agaw for iommus that don't support the default agaw.
380 */
381int iommu_calculate_agaw(struct intel_iommu *iommu)
382{
383 unsigned long sagaw;
384 int agaw = -1;
385
386 sagaw = cap_sagaw(iommu->cap);
387 for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH);
388 agaw >= 0; agaw--) {
389 if (test_bit(agaw, &sagaw))
390 break;
391 }
392
393 return agaw;
394}
395
Weidong Han8c11e792008-12-08 15:29:22 +0800396/* in native case, each domain is related to only one iommu */
397static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
398{
399 int iommu_id;
400
Weidong Han1ce28fe2008-12-08 16:35:39 +0800401 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
402
Weidong Han8c11e792008-12-08 15:29:22 +0800403 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
404 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
405 return NULL;
406
407 return g_iommus[iommu_id];
408}
409
Weidong Han8e6040972008-12-08 15:49:06 +0800410/* "Coherency" capability may be different across iommus */
411static void domain_update_iommu_coherency(struct dmar_domain *domain)
412{
413 int i;
414
415 domain->iommu_coherency = 1;
416
417 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
418 for (; i < g_num_of_iommus; ) {
419 if (!ecap_coherent(g_iommus[i]->ecap)) {
420 domain->iommu_coherency = 0;
421 break;
422 }
423 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
424 }
425}
426
Weidong Hanc7151a82008-12-08 22:51:37 +0800427static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn)
428{
429 struct dmar_drhd_unit *drhd = NULL;
430 int i;
431
432 for_each_drhd_unit(drhd) {
433 if (drhd->ignored)
434 continue;
435
436 for (i = 0; i < drhd->devices_cnt; i++)
437 if (drhd->devices[i]->bus->number == bus &&
438 drhd->devices[i]->devfn == devfn)
439 return drhd->iommu;
440
441 if (drhd->include_all)
442 return drhd->iommu;
443 }
444
445 return NULL;
446}
447
Weidong Han5331fe62008-12-08 23:00:00 +0800448static void domain_flush_cache(struct dmar_domain *domain,
449 void *addr, int size)
450{
451 if (!domain->iommu_coherency)
452 clflush_cache_range(addr, size);
453}
454
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700455/* Gets context entry for a given bus and devfn */
456static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
457 u8 bus, u8 devfn)
458{
459 struct root_entry *root;
460 struct context_entry *context;
461 unsigned long phy_addr;
462 unsigned long flags;
463
464 spin_lock_irqsave(&iommu->lock, flags);
465 root = &iommu->root_entry[bus];
466 context = get_context_addr_from_root(root);
467 if (!context) {
468 context = (struct context_entry *)alloc_pgtable_page();
469 if (!context) {
470 spin_unlock_irqrestore(&iommu->lock, flags);
471 return NULL;
472 }
Fenghua Yu5b6985c2008-10-16 18:02:32 -0700473 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700474 phy_addr = virt_to_phys((void *)context);
475 set_root_value(root, phy_addr);
476 set_root_present(root);
477 __iommu_flush_cache(iommu, root, sizeof(*root));
478 }
479 spin_unlock_irqrestore(&iommu->lock, flags);
480 return &context[devfn];
481}
482
483static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
484{
485 struct root_entry *root;
486 struct context_entry *context;
487 int ret;
488 unsigned long flags;
489
490 spin_lock_irqsave(&iommu->lock, flags);
491 root = &iommu->root_entry[bus];
492 context = get_context_addr_from_root(root);
493 if (!context) {
494 ret = 0;
495 goto out;
496 }
Mark McLoughlinc07e7d22008-11-21 16:54:46 +0000497 ret = context_present(&context[devfn]);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700498out:
499 spin_unlock_irqrestore(&iommu->lock, flags);
500 return ret;
501}
502
503static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
504{
505 struct root_entry *root;
506 struct context_entry *context;
507 unsigned long flags;
508
509 spin_lock_irqsave(&iommu->lock, flags);
510 root = &iommu->root_entry[bus];
511 context = get_context_addr_from_root(root);
512 if (context) {
Mark McLoughlinc07e7d22008-11-21 16:54:46 +0000513 context_clear_entry(&context[devfn]);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700514 __iommu_flush_cache(iommu, &context[devfn], \
515 sizeof(*context));
516 }
517 spin_unlock_irqrestore(&iommu->lock, flags);
518}
519
520static void free_context_table(struct intel_iommu *iommu)
521{
522 struct root_entry *root;
523 int i;
524 unsigned long flags;
525 struct context_entry *context;
526
527 spin_lock_irqsave(&iommu->lock, flags);
528 if (!iommu->root_entry) {
529 goto out;
530 }
531 for (i = 0; i < ROOT_ENTRY_NR; i++) {
532 root = &iommu->root_entry[i];
533 context = get_context_addr_from_root(root);
534 if (context)
535 free_pgtable_page(context);
536 }
537 free_pgtable_page(iommu->root_entry);
538 iommu->root_entry = NULL;
539out:
540 spin_unlock_irqrestore(&iommu->lock, flags);
541}
542
543/* page table handling */
544#define LEVEL_STRIDE (9)
545#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
546
547static inline int agaw_to_level(int agaw)
548{
549 return agaw + 2;
550}
551
552static inline int agaw_to_width(int agaw)
553{
554 return 30 + agaw * LEVEL_STRIDE;
555
556}
557
558static inline int width_to_agaw(int width)
559{
560 return (width - 30) / LEVEL_STRIDE;
561}
562
563static inline unsigned int level_to_offset_bits(int level)
564{
565 return (12 + (level - 1) * LEVEL_STRIDE);
566}
567
568static inline int address_level_offset(u64 addr, int level)
569{
570 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
571}
572
573static inline u64 level_mask(int level)
574{
575 return ((u64)-1 << level_to_offset_bits(level));
576}
577
578static inline u64 level_size(int level)
579{
580 return ((u64)1 << level_to_offset_bits(level));
581}
582
583static inline u64 align_to_level(u64 addr, int level)
584{
585 return ((addr + level_size(level) - 1) & level_mask(level));
586}
587
588static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
589{
590 int addr_width = agaw_to_width(domain->agaw);
591 struct dma_pte *parent, *pte = NULL;
592 int level = agaw_to_level(domain->agaw);
593 int offset;
594 unsigned long flags;
595
596 BUG_ON(!domain->pgd);
597
598 addr &= (((u64)1) << addr_width) - 1;
599 parent = domain->pgd;
600
601 spin_lock_irqsave(&domain->mapping_lock, flags);
602 while (level > 0) {
603 void *tmp_page;
604
605 offset = address_level_offset(addr, level);
606 pte = &parent[offset];
607 if (level == 1)
608 break;
609
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000610 if (!dma_pte_present(pte)) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700611 tmp_page = alloc_pgtable_page();
612
613 if (!tmp_page) {
614 spin_unlock_irqrestore(&domain->mapping_lock,
615 flags);
616 return NULL;
617 }
Weidong Han5331fe62008-12-08 23:00:00 +0800618 domain_flush_cache(domain, tmp_page, PAGE_SIZE);
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000619 dma_set_pte_addr(pte, virt_to_phys(tmp_page));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700620 /*
621 * high level table always sets r/w, last level page
622 * table control read/write
623 */
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000624 dma_set_pte_readable(pte);
625 dma_set_pte_writable(pte);
Weidong Han5331fe62008-12-08 23:00:00 +0800626 domain_flush_cache(domain, pte, sizeof(*pte));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700627 }
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000628 parent = phys_to_virt(dma_pte_addr(pte));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700629 level--;
630 }
631
632 spin_unlock_irqrestore(&domain->mapping_lock, flags);
633 return pte;
634}
635
636/* return address's pte at specific level */
637static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
638 int level)
639{
640 struct dma_pte *parent, *pte = NULL;
641 int total = agaw_to_level(domain->agaw);
642 int offset;
643
644 parent = domain->pgd;
645 while (level <= total) {
646 offset = address_level_offset(addr, total);
647 pte = &parent[offset];
648 if (level == total)
649 return pte;
650
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000651 if (!dma_pte_present(pte))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700652 break;
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000653 parent = phys_to_virt(dma_pte_addr(pte));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700654 total--;
655 }
656 return NULL;
657}
658
659/* clear one page's page table */
660static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
661{
662 struct dma_pte *pte = NULL;
663
664 /* get last level pte */
665 pte = dma_addr_level_pte(domain, addr, 1);
666
667 if (pte) {
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000668 dma_clear_pte(pte);
Weidong Han5331fe62008-12-08 23:00:00 +0800669 domain_flush_cache(domain, pte, sizeof(*pte));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700670 }
671}
672
673/* clear last level pte, a tlb flush should be followed */
674static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
675{
676 int addr_width = agaw_to_width(domain->agaw);
677
678 start &= (((u64)1) << addr_width) - 1;
679 end &= (((u64)1) << addr_width) - 1;
680 /* in case it's partial page */
Fenghua Yu5b6985c2008-10-16 18:02:32 -0700681 start = PAGE_ALIGN(start);
682 end &= PAGE_MASK;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700683
684 /* we don't need lock here, nobody else touches the iova range */
685 while (start < end) {
686 dma_pte_clear_one(domain, start);
Fenghua Yu5b6985c2008-10-16 18:02:32 -0700687 start += VTD_PAGE_SIZE;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700688 }
689}
690
691/* free page table pages. last level pte should already be cleared */
692static void dma_pte_free_pagetable(struct dmar_domain *domain,
693 u64 start, u64 end)
694{
695 int addr_width = agaw_to_width(domain->agaw);
696 struct dma_pte *pte;
697 int total = agaw_to_level(domain->agaw);
698 int level;
699 u64 tmp;
700
701 start &= (((u64)1) << addr_width) - 1;
702 end &= (((u64)1) << addr_width) - 1;
703
704 /* we don't need lock here, nobody else touches the iova range */
705 level = 2;
706 while (level <= total) {
707 tmp = align_to_level(start, level);
708 if (tmp >= end || (tmp + level_size(level) > end))
709 return;
710
711 while (tmp < end) {
712 pte = dma_addr_level_pte(domain, tmp, level);
713 if (pte) {
714 free_pgtable_page(
Mark McLoughlin19c239c2008-11-21 16:56:53 +0000715 phys_to_virt(dma_pte_addr(pte)));
716 dma_clear_pte(pte);
Weidong Han5331fe62008-12-08 23:00:00 +0800717 domain_flush_cache(domain, pte, sizeof(*pte));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700718 }
719 tmp += level_size(level);
720 }
721 level++;
722 }
723 /* free pgd */
724 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
725 free_pgtable_page(domain->pgd);
726 domain->pgd = NULL;
727 }
728}
729
730/* iommu handling */
731static int iommu_alloc_root_entry(struct intel_iommu *iommu)
732{
733 struct root_entry *root;
734 unsigned long flags;
735
736 root = (struct root_entry *)alloc_pgtable_page();
737 if (!root)
738 return -ENOMEM;
739
Fenghua Yu5b6985c2008-10-16 18:02:32 -0700740 __iommu_flush_cache(iommu, root, ROOT_SIZE);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700741
742 spin_lock_irqsave(&iommu->lock, flags);
743 iommu->root_entry = root;
744 spin_unlock_irqrestore(&iommu->lock, flags);
745
746 return 0;
747}
748
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700749static void iommu_set_root_entry(struct intel_iommu *iommu)
750{
751 void *addr;
752 u32 cmd, sts;
753 unsigned long flag;
754
755 addr = iommu->root_entry;
756
757 spin_lock_irqsave(&iommu->register_lock, flag);
758 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
759
760 cmd = iommu->gcmd | DMA_GCMD_SRTP;
761 writel(cmd, iommu->reg + DMAR_GCMD_REG);
762
763 /* Make sure hardware complete it */
764 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
765 readl, (sts & DMA_GSTS_RTPS), sts);
766
767 spin_unlock_irqrestore(&iommu->register_lock, flag);
768}
769
770static void iommu_flush_write_buffer(struct intel_iommu *iommu)
771{
772 u32 val;
773 unsigned long flag;
774
775 if (!cap_rwbf(iommu->cap))
776 return;
777 val = iommu->gcmd | DMA_GCMD_WBF;
778
779 spin_lock_irqsave(&iommu->register_lock, flag);
780 writel(val, iommu->reg + DMAR_GCMD_REG);
781
782 /* Make sure hardware complete it */
783 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
784 readl, (!(val & DMA_GSTS_WBFS)), val);
785
786 spin_unlock_irqrestore(&iommu->register_lock, flag);
787}
788
789/* return value determine if we need a write buffer flush */
790static int __iommu_flush_context(struct intel_iommu *iommu,
791 u16 did, u16 source_id, u8 function_mask, u64 type,
792 int non_present_entry_flush)
793{
794 u64 val = 0;
795 unsigned long flag;
796
797 /*
798 * In the non-present entry flush case, if hardware doesn't cache
799 * non-present entry we do nothing and if hardware cache non-present
800 * entry, we flush entries of domain 0 (the domain id is used to cache
801 * any non-present entries)
802 */
803 if (non_present_entry_flush) {
804 if (!cap_caching_mode(iommu->cap))
805 return 1;
806 else
807 did = 0;
808 }
809
810 switch (type) {
811 case DMA_CCMD_GLOBAL_INVL:
812 val = DMA_CCMD_GLOBAL_INVL;
813 break;
814 case DMA_CCMD_DOMAIN_INVL:
815 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
816 break;
817 case DMA_CCMD_DEVICE_INVL:
818 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
819 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
820 break;
821 default:
822 BUG();
823 }
824 val |= DMA_CCMD_ICC;
825
826 spin_lock_irqsave(&iommu->register_lock, flag);
827 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
828
829 /* Make sure hardware complete it */
830 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
831 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
832
833 spin_unlock_irqrestore(&iommu->register_lock, flag);
834
Ameya Palande4d235ba2008-10-18 20:27:30 -0700835 /* flush context entry will implicitly flush write buffer */
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700836 return 0;
837}
838
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700839/* return value determine if we need a write buffer flush */
840static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
841 u64 addr, unsigned int size_order, u64 type,
842 int non_present_entry_flush)
843{
844 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
845 u64 val = 0, val_iva = 0;
846 unsigned long flag;
847
848 /*
849 * In the non-present entry flush case, if hardware doesn't cache
850 * non-present entry we do nothing and if hardware cache non-present
851 * entry, we flush entries of domain 0 (the domain id is used to cache
852 * any non-present entries)
853 */
854 if (non_present_entry_flush) {
855 if (!cap_caching_mode(iommu->cap))
856 return 1;
857 else
858 did = 0;
859 }
860
861 switch (type) {
862 case DMA_TLB_GLOBAL_FLUSH:
863 /* global flush doesn't need set IVA_REG */
864 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
865 break;
866 case DMA_TLB_DSI_FLUSH:
867 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
868 break;
869 case DMA_TLB_PSI_FLUSH:
870 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
871 /* Note: always flush non-leaf currently */
872 val_iva = size_order | addr;
873 break;
874 default:
875 BUG();
876 }
877 /* Note: set drain read/write */
878#if 0
879 /*
880 * This is probably to be super secure.. Looks like we can
881 * ignore it without any impact.
882 */
883 if (cap_read_drain(iommu->cap))
884 val |= DMA_TLB_READ_DRAIN;
885#endif
886 if (cap_write_drain(iommu->cap))
887 val |= DMA_TLB_WRITE_DRAIN;
888
889 spin_lock_irqsave(&iommu->register_lock, flag);
890 /* Note: Only uses first TLB reg currently */
891 if (val_iva)
892 dmar_writeq(iommu->reg + tlb_offset, val_iva);
893 dmar_writeq(iommu->reg + tlb_offset + 8, val);
894
895 /* Make sure hardware complete it */
896 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
897 dmar_readq, (!(val & DMA_TLB_IVT)), val);
898
899 spin_unlock_irqrestore(&iommu->register_lock, flag);
900
901 /* check IOTLB invalidation granularity */
902 if (DMA_TLB_IAIG(val) == 0)
903 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
904 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
905 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
Fenghua Yu5b6985c2008-10-16 18:02:32 -0700906 (unsigned long long)DMA_TLB_IIRG(type),
907 (unsigned long long)DMA_TLB_IAIG(val));
Ameya Palande4d235ba2008-10-18 20:27:30 -0700908 /* flush iotlb entry will implicitly flush write buffer */
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700909 return 0;
910}
911
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700912static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
913 u64 addr, unsigned int pages, int non_present_entry_flush)
914{
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700915 unsigned int mask;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700916
Fenghua Yu5b6985c2008-10-16 18:02:32 -0700917 BUG_ON(addr & (~VTD_PAGE_MASK));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700918 BUG_ON(pages == 0);
919
920 /* Fallback to domain selective flush if no PSI support */
921 if (!cap_pgsel_inv(iommu->cap))
Youquan Songa77b67d2008-10-16 16:31:56 -0700922 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
923 DMA_TLB_DSI_FLUSH,
924 non_present_entry_flush);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700925
926 /*
927 * PSI requires page size to be 2 ^ x, and the base address is naturally
928 * aligned to the size
929 */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700930 mask = ilog2(__roundup_pow_of_two(pages));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700931 /* Fallback to domain selective flush if size is too big */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700932 if (mask > cap_max_amask_val(iommu->cap))
Youquan Songa77b67d2008-10-16 16:31:56 -0700933 return iommu->flush.flush_iotlb(iommu, did, 0, 0,
934 DMA_TLB_DSI_FLUSH, non_present_entry_flush);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700935
Youquan Songa77b67d2008-10-16 16:31:56 -0700936 return iommu->flush.flush_iotlb(iommu, did, addr, mask,
937 DMA_TLB_PSI_FLUSH,
938 non_present_entry_flush);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700939}
940
mark grossf8bab732008-02-08 04:18:38 -0800941static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
942{
943 u32 pmen;
944 unsigned long flags;
945
946 spin_lock_irqsave(&iommu->register_lock, flags);
947 pmen = readl(iommu->reg + DMAR_PMEN_REG);
948 pmen &= ~DMA_PMEN_EPM;
949 writel(pmen, iommu->reg + DMAR_PMEN_REG);
950
951 /* wait for the protected region status bit to clear */
952 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
953 readl, !(pmen & DMA_PMEN_PRS), pmen);
954
955 spin_unlock_irqrestore(&iommu->register_lock, flags);
956}
957
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700958static int iommu_enable_translation(struct intel_iommu *iommu)
959{
960 u32 sts;
961 unsigned long flags;
962
963 spin_lock_irqsave(&iommu->register_lock, flags);
964 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
965
966 /* Make sure hardware complete it */
967 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
968 readl, (sts & DMA_GSTS_TES), sts);
969
970 iommu->gcmd |= DMA_GCMD_TE;
971 spin_unlock_irqrestore(&iommu->register_lock, flags);
972 return 0;
973}
974
975static int iommu_disable_translation(struct intel_iommu *iommu)
976{
977 u32 sts;
978 unsigned long flag;
979
980 spin_lock_irqsave(&iommu->register_lock, flag);
981 iommu->gcmd &= ~DMA_GCMD_TE;
982 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
983
984 /* Make sure hardware complete it */
985 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
986 readl, (!(sts & DMA_GSTS_TES)), sts);
987
988 spin_unlock_irqrestore(&iommu->register_lock, flag);
989 return 0;
990}
991
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700992/* iommu interrupt handling. Most stuff are MSI-like. */
993
mark grossd94afc62008-02-08 04:18:39 -0800994static const char *fault_reason_strings[] =
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700995{
996 "Software",
997 "Present bit in root entry is clear",
998 "Present bit in context entry is clear",
999 "Invalid context entry",
1000 "Access beyond MGAW",
1001 "PTE Write access is not set",
1002 "PTE Read access is not set",
1003 "Next page table ptr is invalid",
1004 "Root table address invalid",
1005 "Context table ptr is invalid",
1006 "non-zero reserved fields in RTP",
1007 "non-zero reserved fields in CTP",
1008 "non-zero reserved fields in PTE",
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001009};
mark grossf8bab732008-02-08 04:18:38 -08001010#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001011
mark grossd94afc62008-02-08 04:18:39 -08001012const char *dmar_get_fault_reason(u8 fault_reason)
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001013{
mark grossd94afc62008-02-08 04:18:39 -08001014 if (fault_reason > MAX_FAULT_REASON_IDX)
1015 return "Unknown";
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001016 else
1017 return fault_reason_strings[fault_reason];
1018}
1019
1020void dmar_msi_unmask(unsigned int irq)
1021{
1022 struct intel_iommu *iommu = get_irq_data(irq);
1023 unsigned long flag;
1024
1025 /* unmask it */
1026 spin_lock_irqsave(&iommu->register_lock, flag);
1027 writel(0, iommu->reg + DMAR_FECTL_REG);
1028 /* Read a reg to force flush the post write */
1029 readl(iommu->reg + DMAR_FECTL_REG);
1030 spin_unlock_irqrestore(&iommu->register_lock, flag);
1031}
1032
1033void dmar_msi_mask(unsigned int irq)
1034{
1035 unsigned long flag;
1036 struct intel_iommu *iommu = get_irq_data(irq);
1037
1038 /* mask it */
1039 spin_lock_irqsave(&iommu->register_lock, flag);
1040 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1041 /* Read a reg to force flush the post write */
1042 readl(iommu->reg + DMAR_FECTL_REG);
1043 spin_unlock_irqrestore(&iommu->register_lock, flag);
1044}
1045
1046void dmar_msi_write(int irq, struct msi_msg *msg)
1047{
1048 struct intel_iommu *iommu = get_irq_data(irq);
1049 unsigned long flag;
1050
1051 spin_lock_irqsave(&iommu->register_lock, flag);
1052 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1053 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1054 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1055 spin_unlock_irqrestore(&iommu->register_lock, flag);
1056}
1057
1058void dmar_msi_read(int irq, struct msi_msg *msg)
1059{
1060 struct intel_iommu *iommu = get_irq_data(irq);
1061 unsigned long flag;
1062
1063 spin_lock_irqsave(&iommu->register_lock, flag);
1064 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1065 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1066 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1067 spin_unlock_irqrestore(&iommu->register_lock, flag);
1068}
1069
1070static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001071 u8 fault_reason, u16 source_id, unsigned long long addr)
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001072{
mark grossd94afc62008-02-08 04:18:39 -08001073 const char *reason;
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001074
1075 reason = dmar_get_fault_reason(fault_reason);
1076
1077 printk(KERN_ERR
1078 "DMAR:[%s] Request device [%02x:%02x.%d] "
1079 "fault addr %llx \n"
1080 "DMAR:[fault reason %02d] %s\n",
1081 (type ? "DMA Read" : "DMA Write"),
1082 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1083 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1084 return 0;
1085}
1086
1087#define PRIMARY_FAULT_REG_LEN (16)
1088static irqreturn_t iommu_page_fault(int irq, void *dev_id)
1089{
1090 struct intel_iommu *iommu = dev_id;
1091 int reg, fault_index;
1092 u32 fault_status;
1093 unsigned long flag;
1094
1095 spin_lock_irqsave(&iommu->register_lock, flag);
1096 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1097
1098 /* TBD: ignore advanced fault log currently */
1099 if (!(fault_status & DMA_FSTS_PPF))
1100 goto clear_overflow;
1101
1102 fault_index = dma_fsts_fault_record_index(fault_status);
1103 reg = cap_fault_reg_offset(iommu->cap);
1104 while (1) {
1105 u8 fault_reason;
1106 u16 source_id;
1107 u64 guest_addr;
1108 int type;
1109 u32 data;
1110
1111 /* highest 32 bits */
1112 data = readl(iommu->reg + reg +
1113 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1114 if (!(data & DMA_FRCD_F))
1115 break;
1116
1117 fault_reason = dma_frcd_fault_reason(data);
1118 type = dma_frcd_type(data);
1119
1120 data = readl(iommu->reg + reg +
1121 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1122 source_id = dma_frcd_source_id(data);
1123
1124 guest_addr = dmar_readq(iommu->reg + reg +
1125 fault_index * PRIMARY_FAULT_REG_LEN);
1126 guest_addr = dma_frcd_page_addr(guest_addr);
1127 /* clear the fault */
1128 writel(DMA_FRCD_F, iommu->reg + reg +
1129 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1130
1131 spin_unlock_irqrestore(&iommu->register_lock, flag);
1132
1133 iommu_page_fault_do_one(iommu, type, fault_reason,
1134 source_id, guest_addr);
1135
1136 fault_index++;
1137 if (fault_index > cap_num_fault_regs(iommu->cap))
1138 fault_index = 0;
1139 spin_lock_irqsave(&iommu->register_lock, flag);
1140 }
1141clear_overflow:
1142 /* clear primary fault overflow */
1143 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1144 if (fault_status & DMA_FSTS_PFO)
1145 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
1146
1147 spin_unlock_irqrestore(&iommu->register_lock, flag);
1148 return IRQ_HANDLED;
1149}
1150
1151int dmar_set_interrupt(struct intel_iommu *iommu)
1152{
1153 int irq, ret;
1154
1155 irq = create_irq();
1156 if (!irq) {
1157 printk(KERN_ERR "IOMMU: no free vectors\n");
1158 return -EINVAL;
1159 }
1160
1161 set_irq_data(irq, iommu);
1162 iommu->irq = irq;
1163
1164 ret = arch_setup_dmar_msi(irq);
1165 if (ret) {
1166 set_irq_data(irq, NULL);
1167 iommu->irq = 0;
1168 destroy_irq(irq);
1169 return 0;
1170 }
1171
1172 /* Force fault register is cleared */
1173 iommu_page_fault(irq, iommu);
1174
1175 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
1176 if (ret)
1177 printk(KERN_ERR "IOMMU: can't request irq\n");
1178 return ret;
1179}
1180
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001181static int iommu_init_domains(struct intel_iommu *iommu)
1182{
1183 unsigned long ndomains;
1184 unsigned long nlongs;
1185
1186 ndomains = cap_ndoms(iommu->cap);
1187 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1188 nlongs = BITS_TO_LONGS(ndomains);
1189
1190 /* TBD: there might be 64K domains,
1191 * consider other allocation for future chip
1192 */
1193 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1194 if (!iommu->domain_ids) {
1195 printk(KERN_ERR "Allocating domain id array failed\n");
1196 return -ENOMEM;
1197 }
1198 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1199 GFP_KERNEL);
1200 if (!iommu->domains) {
1201 printk(KERN_ERR "Allocating domain array failed\n");
1202 kfree(iommu->domain_ids);
1203 return -ENOMEM;
1204 }
1205
Suresh Siddhae61d98d2008-07-10 11:16:35 -07001206 spin_lock_init(&iommu->lock);
1207
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001208 /*
1209 * if Caching mode is set, then invalid translations are tagged
1210 * with domainid 0. Hence we need to pre-allocate it.
1211 */
1212 if (cap_caching_mode(iommu->cap))
1213 set_bit(0, iommu->domain_ids);
1214 return 0;
1215}
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001216
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001217
1218static void domain_exit(struct dmar_domain *domain);
Weidong Han5e98c4b2008-12-08 23:03:27 +08001219static void vm_domain_exit(struct dmar_domain *domain);
Suresh Siddhae61d98d2008-07-10 11:16:35 -07001220
1221void free_dmar_iommu(struct intel_iommu *iommu)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001222{
1223 struct dmar_domain *domain;
1224 int i;
Weidong Hanc7151a82008-12-08 22:51:37 +08001225 unsigned long flags;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001226
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001227 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1228 for (; i < cap_ndoms(iommu->cap); ) {
1229 domain = iommu->domains[i];
1230 clear_bit(i, iommu->domain_ids);
Weidong Hanc7151a82008-12-08 22:51:37 +08001231
1232 spin_lock_irqsave(&domain->iommu_lock, flags);
Weidong Han5e98c4b2008-12-08 23:03:27 +08001233 if (--domain->iommu_count == 0) {
1234 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1235 vm_domain_exit(domain);
1236 else
1237 domain_exit(domain);
1238 }
Weidong Hanc7151a82008-12-08 22:51:37 +08001239 spin_unlock_irqrestore(&domain->iommu_lock, flags);
1240
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001241 i = find_next_bit(iommu->domain_ids,
1242 cap_ndoms(iommu->cap), i+1);
1243 }
1244
1245 if (iommu->gcmd & DMA_GCMD_TE)
1246 iommu_disable_translation(iommu);
1247
1248 if (iommu->irq) {
1249 set_irq_data(iommu->irq, NULL);
1250 /* This will mask the irq */
1251 free_irq(iommu->irq, iommu);
1252 destroy_irq(iommu->irq);
1253 }
1254
1255 kfree(iommu->domains);
1256 kfree(iommu->domain_ids);
1257
Weidong Hand9630fe2008-12-08 11:06:32 +08001258 g_iommus[iommu->seq_id] = NULL;
1259
1260 /* if all iommus are freed, free g_iommus */
1261 for (i = 0; i < g_num_of_iommus; i++) {
1262 if (g_iommus[i])
1263 break;
1264 }
1265
1266 if (i == g_num_of_iommus)
1267 kfree(g_iommus);
1268
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001269 /* free context mapping */
1270 free_context_table(iommu);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001271}
1272
1273static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1274{
1275 unsigned long num;
1276 unsigned long ndomains;
1277 struct dmar_domain *domain;
1278 unsigned long flags;
1279
1280 domain = alloc_domain_mem();
1281 if (!domain)
1282 return NULL;
1283
1284 ndomains = cap_ndoms(iommu->cap);
1285
1286 spin_lock_irqsave(&iommu->lock, flags);
1287 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1288 if (num >= ndomains) {
1289 spin_unlock_irqrestore(&iommu->lock, flags);
1290 free_domain_mem(domain);
1291 printk(KERN_ERR "IOMMU: no free domain ids\n");
1292 return NULL;
1293 }
1294
1295 set_bit(num, iommu->domain_ids);
1296 domain->id = num;
Weidong Han8c11e792008-12-08 15:29:22 +08001297 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1298 set_bit(iommu->seq_id, &domain->iommu_bmp);
Weidong Hand71a2f32008-12-07 21:13:41 +08001299 domain->flags = 0;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001300 iommu->domains[num] = domain;
1301 spin_unlock_irqrestore(&iommu->lock, flags);
1302
1303 return domain;
1304}
1305
1306static void iommu_free_domain(struct dmar_domain *domain)
1307{
1308 unsigned long flags;
Weidong Han8c11e792008-12-08 15:29:22 +08001309 struct intel_iommu *iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001310
Weidong Han8c11e792008-12-08 15:29:22 +08001311 iommu = domain_get_iommu(domain);
1312
1313 spin_lock_irqsave(&iommu->lock, flags);
1314 clear_bit(domain->id, iommu->domain_ids);
1315 spin_unlock_irqrestore(&iommu->lock, flags);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001316}
1317
1318static struct iova_domain reserved_iova_list;
Mark Gross8a443df2008-03-04 14:59:31 -08001319static struct lock_class_key reserved_alloc_key;
1320static struct lock_class_key reserved_rbtree_key;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001321
1322static void dmar_init_reserved_ranges(void)
1323{
1324 struct pci_dev *pdev = NULL;
1325 struct iova *iova;
1326 int i;
1327 u64 addr, size;
1328
David Millerf6611972008-02-06 01:36:23 -08001329 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001330
Mark Gross8a443df2008-03-04 14:59:31 -08001331 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1332 &reserved_alloc_key);
1333 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1334 &reserved_rbtree_key);
1335
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001336 /* IOAPIC ranges shouldn't be accessed by DMA */
1337 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1338 IOVA_PFN(IOAPIC_RANGE_END));
1339 if (!iova)
1340 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1341
1342 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1343 for_each_pci_dev(pdev) {
1344 struct resource *r;
1345
1346 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1347 r = &pdev->resource[i];
1348 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1349 continue;
1350 addr = r->start;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001351 addr &= PAGE_MASK;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001352 size = r->end - addr;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001353 size = PAGE_ALIGN(size);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001354 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1355 IOVA_PFN(size + addr) - 1);
1356 if (!iova)
1357 printk(KERN_ERR "Reserve iova failed\n");
1358 }
1359 }
1360
1361}
1362
1363static void domain_reserve_special_ranges(struct dmar_domain *domain)
1364{
1365 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1366}
1367
1368static inline int guestwidth_to_adjustwidth(int gaw)
1369{
1370 int agaw;
1371 int r = (gaw - 12) % 9;
1372
1373 if (r == 0)
1374 agaw = gaw;
1375 else
1376 agaw = gaw + 9 - r;
1377 if (agaw > 64)
1378 agaw = 64;
1379 return agaw;
1380}
1381
1382static int domain_init(struct dmar_domain *domain, int guest_width)
1383{
1384 struct intel_iommu *iommu;
1385 int adjust_width, agaw;
1386 unsigned long sagaw;
1387
David Millerf6611972008-02-06 01:36:23 -08001388 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001389 spin_lock_init(&domain->mapping_lock);
Weidong Hanc7151a82008-12-08 22:51:37 +08001390 spin_lock_init(&domain->iommu_lock);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001391
1392 domain_reserve_special_ranges(domain);
1393
1394 /* calculate AGAW */
Weidong Han8c11e792008-12-08 15:29:22 +08001395 iommu = domain_get_iommu(domain);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001396 if (guest_width > cap_mgaw(iommu->cap))
1397 guest_width = cap_mgaw(iommu->cap);
1398 domain->gaw = guest_width;
1399 adjust_width = guestwidth_to_adjustwidth(guest_width);
1400 agaw = width_to_agaw(adjust_width);
1401 sagaw = cap_sagaw(iommu->cap);
1402 if (!test_bit(agaw, &sagaw)) {
1403 /* hardware doesn't support it, choose a bigger one */
1404 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1405 agaw = find_next_bit(&sagaw, 5, agaw);
1406 if (agaw >= 5)
1407 return -ENODEV;
1408 }
1409 domain->agaw = agaw;
1410 INIT_LIST_HEAD(&domain->devices);
1411
Weidong Han8e6040972008-12-08 15:49:06 +08001412 if (ecap_coherent(iommu->ecap))
1413 domain->iommu_coherency = 1;
1414 else
1415 domain->iommu_coherency = 0;
1416
Weidong Hanc7151a82008-12-08 22:51:37 +08001417 domain->iommu_count = 1;
1418
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001419 /* always allocate the top pgd */
1420 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1421 if (!domain->pgd)
1422 return -ENOMEM;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001423 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001424 return 0;
1425}
1426
1427static void domain_exit(struct dmar_domain *domain)
1428{
1429 u64 end;
1430
1431 /* Domain 0 is reserved, so dont process it */
1432 if (!domain)
1433 return;
1434
1435 domain_remove_dev_info(domain);
1436 /* destroy iovas */
1437 put_iova_domain(&domain->iovad);
1438 end = DOMAIN_MAX_ADDR(domain->gaw);
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001439 end = end & (~PAGE_MASK);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001440
1441 /* clear ptes */
1442 dma_pte_clear_range(domain, 0, end);
1443
1444 /* free page tables */
1445 dma_pte_free_pagetable(domain, 0, end);
1446
1447 iommu_free_domain(domain);
1448 free_domain_mem(domain);
1449}
1450
1451static int domain_context_mapping_one(struct dmar_domain *domain,
1452 u8 bus, u8 devfn)
1453{
1454 struct context_entry *context;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001455 unsigned long flags;
Weidong Han5331fe62008-12-08 23:00:00 +08001456 struct intel_iommu *iommu;
Weidong Hanea6606b2008-12-08 23:08:15 +08001457 struct dma_pte *pgd;
1458 unsigned long num;
1459 unsigned long ndomains;
1460 int id;
1461 int agaw;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001462
1463 pr_debug("Set context mapping for %02x:%02x.%d\n",
1464 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1465 BUG_ON(!domain->pgd);
Weidong Han5331fe62008-12-08 23:00:00 +08001466
1467 iommu = device_to_iommu(bus, devfn);
1468 if (!iommu)
1469 return -ENODEV;
1470
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001471 context = device_to_context_entry(iommu, bus, devfn);
1472 if (!context)
1473 return -ENOMEM;
1474 spin_lock_irqsave(&iommu->lock, flags);
Mark McLoughlinc07e7d22008-11-21 16:54:46 +00001475 if (context_present(context)) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001476 spin_unlock_irqrestore(&iommu->lock, flags);
1477 return 0;
1478 }
1479
Weidong Hanea6606b2008-12-08 23:08:15 +08001480 id = domain->id;
1481 pgd = domain->pgd;
1482
1483 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) {
1484 int found = 0;
1485
1486 /* find an available domain id for this device in iommu */
1487 ndomains = cap_ndoms(iommu->cap);
1488 num = find_first_bit(iommu->domain_ids, ndomains);
1489 for (; num < ndomains; ) {
1490 if (iommu->domains[num] == domain) {
1491 id = num;
1492 found = 1;
1493 break;
1494 }
1495 num = find_next_bit(iommu->domain_ids,
1496 cap_ndoms(iommu->cap), num+1);
1497 }
1498
1499 if (found == 0) {
1500 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1501 if (num >= ndomains) {
1502 spin_unlock_irqrestore(&iommu->lock, flags);
1503 printk(KERN_ERR "IOMMU: no free domain ids\n");
1504 return -EFAULT;
1505 }
1506
1507 set_bit(num, iommu->domain_ids);
1508 iommu->domains[num] = domain;
1509 id = num;
1510 }
1511
1512 /* Skip top levels of page tables for
1513 * iommu which has less agaw than default.
1514 */
1515 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1516 pgd = phys_to_virt(dma_pte_addr(pgd));
1517 if (!dma_pte_present(pgd)) {
1518 spin_unlock_irqrestore(&iommu->lock, flags);
1519 return -ENOMEM;
1520 }
1521 }
1522 }
1523
1524 context_set_domain_id(context, id);
1525 context_set_address_width(context, iommu->agaw);
1526 context_set_address_root(context, virt_to_phys(pgd));
Mark McLoughlinc07e7d22008-11-21 16:54:46 +00001527 context_set_translation_type(context, CONTEXT_TT_MULTI_LEVEL);
1528 context_set_fault_enable(context);
1529 context_set_present(context);
Weidong Han5331fe62008-12-08 23:00:00 +08001530 domain_flush_cache(domain, context, sizeof(*context));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001531
1532 /* it's a non-present to present mapping */
Youquan Songa77b67d2008-10-16 16:31:56 -07001533 if (iommu->flush.flush_context(iommu, domain->id,
1534 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT,
1535 DMA_CCMD_DEVICE_INVL, 1))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001536 iommu_flush_write_buffer(iommu);
1537 else
Youquan Songa77b67d2008-10-16 16:31:56 -07001538 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH, 0);
1539
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001540 spin_unlock_irqrestore(&iommu->lock, flags);
Weidong Hanc7151a82008-12-08 22:51:37 +08001541
1542 spin_lock_irqsave(&domain->iommu_lock, flags);
1543 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1544 domain->iommu_count++;
1545 domain_update_iommu_coherency(domain);
1546 }
1547 spin_unlock_irqrestore(&domain->iommu_lock, flags);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001548 return 0;
1549}
1550
1551static int
1552domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1553{
1554 int ret;
1555 struct pci_dev *tmp, *parent;
1556
1557 ret = domain_context_mapping_one(domain, pdev->bus->number,
1558 pdev->devfn);
1559 if (ret)
1560 return ret;
1561
1562 /* dependent device mapping */
1563 tmp = pci_find_upstream_pcie_bridge(pdev);
1564 if (!tmp)
1565 return 0;
1566 /* Secondary interface's bus number and devfn 0 */
1567 parent = pdev->bus->self;
1568 while (parent != tmp) {
1569 ret = domain_context_mapping_one(domain, parent->bus->number,
1570 parent->devfn);
1571 if (ret)
1572 return ret;
1573 parent = parent->bus->self;
1574 }
1575 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1576 return domain_context_mapping_one(domain,
1577 tmp->subordinate->number, 0);
1578 else /* this is a legacy PCI bridge */
1579 return domain_context_mapping_one(domain,
1580 tmp->bus->number, tmp->devfn);
1581}
1582
Weidong Han5331fe62008-12-08 23:00:00 +08001583static int domain_context_mapped(struct pci_dev *pdev)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001584{
1585 int ret;
1586 struct pci_dev *tmp, *parent;
Weidong Han5331fe62008-12-08 23:00:00 +08001587 struct intel_iommu *iommu;
1588
1589 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
1590 if (!iommu)
1591 return -ENODEV;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001592
Weidong Han8c11e792008-12-08 15:29:22 +08001593 ret = device_context_mapped(iommu,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001594 pdev->bus->number, pdev->devfn);
1595 if (!ret)
1596 return ret;
1597 /* dependent device mapping */
1598 tmp = pci_find_upstream_pcie_bridge(pdev);
1599 if (!tmp)
1600 return ret;
1601 /* Secondary interface's bus number and devfn 0 */
1602 parent = pdev->bus->self;
1603 while (parent != tmp) {
Weidong Han8c11e792008-12-08 15:29:22 +08001604 ret = device_context_mapped(iommu, parent->bus->number,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001605 parent->devfn);
1606 if (!ret)
1607 return ret;
1608 parent = parent->bus->self;
1609 }
1610 if (tmp->is_pcie)
Weidong Han8c11e792008-12-08 15:29:22 +08001611 return device_context_mapped(iommu,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001612 tmp->subordinate->number, 0);
1613 else
Weidong Han8c11e792008-12-08 15:29:22 +08001614 return device_context_mapped(iommu,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001615 tmp->bus->number, tmp->devfn);
1616}
1617
1618static int
1619domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1620 u64 hpa, size_t size, int prot)
1621{
1622 u64 start_pfn, end_pfn;
1623 struct dma_pte *pte;
1624 int index;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001625 int addr_width = agaw_to_width(domain->agaw);
1626
1627 hpa &= (((u64)1) << addr_width) - 1;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001628
1629 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1630 return -EINVAL;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001631 iova &= PAGE_MASK;
1632 start_pfn = ((u64)hpa) >> VTD_PAGE_SHIFT;
1633 end_pfn = (VTD_PAGE_ALIGN(((u64)hpa) + size)) >> VTD_PAGE_SHIFT;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001634 index = 0;
1635 while (start_pfn < end_pfn) {
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001636 pte = addr_to_dma_pte(domain, iova + VTD_PAGE_SIZE * index);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001637 if (!pte)
1638 return -ENOMEM;
1639 /* We don't need lock here, nobody else
1640 * touches the iova range
1641 */
Mark McLoughlin19c239c2008-11-21 16:56:53 +00001642 BUG_ON(dma_pte_addr(pte));
1643 dma_set_pte_addr(pte, start_pfn << VTD_PAGE_SHIFT);
1644 dma_set_pte_prot(pte, prot);
Weidong Han5331fe62008-12-08 23:00:00 +08001645 domain_flush_cache(domain, pte, sizeof(*pte));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001646 start_pfn++;
1647 index++;
1648 }
1649 return 0;
1650}
1651
Weidong Hanc7151a82008-12-08 22:51:37 +08001652static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001653{
Weidong Hanc7151a82008-12-08 22:51:37 +08001654 if (!iommu)
1655 return;
Weidong Han8c11e792008-12-08 15:29:22 +08001656
1657 clear_context_table(iommu, bus, devfn);
1658 iommu->flush.flush_context(iommu, 0, 0, 0,
Youquan Songa77b67d2008-10-16 16:31:56 -07001659 DMA_CCMD_GLOBAL_INVL, 0);
Weidong Han8c11e792008-12-08 15:29:22 +08001660 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
Youquan Songa77b67d2008-10-16 16:31:56 -07001661 DMA_TLB_GLOBAL_FLUSH, 0);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001662}
1663
1664static void domain_remove_dev_info(struct dmar_domain *domain)
1665{
1666 struct device_domain_info *info;
1667 unsigned long flags;
Weidong Hanc7151a82008-12-08 22:51:37 +08001668 struct intel_iommu *iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001669
1670 spin_lock_irqsave(&device_domain_lock, flags);
1671 while (!list_empty(&domain->devices)) {
1672 info = list_entry(domain->devices.next,
1673 struct device_domain_info, link);
1674 list_del(&info->link);
1675 list_del(&info->global);
1676 if (info->dev)
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001677 info->dev->dev.archdata.iommu = NULL;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001678 spin_unlock_irqrestore(&device_domain_lock, flags);
1679
Weidong Hanc7151a82008-12-08 22:51:37 +08001680 iommu = device_to_iommu(info->bus, info->devfn);
1681 iommu_detach_dev(iommu, info->bus, info->devfn);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001682 free_devinfo_mem(info);
1683
1684 spin_lock_irqsave(&device_domain_lock, flags);
1685 }
1686 spin_unlock_irqrestore(&device_domain_lock, flags);
1687}
1688
1689/*
1690 * find_domain
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001691 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001692 */
Kay, Allen M38717942008-09-09 18:37:29 +03001693static struct dmar_domain *
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001694find_domain(struct pci_dev *pdev)
1695{
1696 struct device_domain_info *info;
1697
1698 /* No lock here, assumes no domain exit in normal case */
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001699 info = pdev->dev.archdata.iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001700 if (info)
1701 return info->domain;
1702 return NULL;
1703}
1704
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001705/* domain is initialized */
1706static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1707{
1708 struct dmar_domain *domain, *found = NULL;
1709 struct intel_iommu *iommu;
1710 struct dmar_drhd_unit *drhd;
1711 struct device_domain_info *info, *tmp;
1712 struct pci_dev *dev_tmp;
1713 unsigned long flags;
1714 int bus = 0, devfn = 0;
1715
1716 domain = find_domain(pdev);
1717 if (domain)
1718 return domain;
1719
1720 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1721 if (dev_tmp) {
1722 if (dev_tmp->is_pcie) {
1723 bus = dev_tmp->subordinate->number;
1724 devfn = 0;
1725 } else {
1726 bus = dev_tmp->bus->number;
1727 devfn = dev_tmp->devfn;
1728 }
1729 spin_lock_irqsave(&device_domain_lock, flags);
1730 list_for_each_entry(info, &device_domain_list, global) {
1731 if (info->bus == bus && info->devfn == devfn) {
1732 found = info->domain;
1733 break;
1734 }
1735 }
1736 spin_unlock_irqrestore(&device_domain_lock, flags);
1737 /* pcie-pci bridge already has a domain, uses it */
1738 if (found) {
1739 domain = found;
1740 goto found_domain;
1741 }
1742 }
1743
1744 /* Allocate new domain for the device */
1745 drhd = dmar_find_matched_drhd_unit(pdev);
1746 if (!drhd) {
1747 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1748 pci_name(pdev));
1749 return NULL;
1750 }
1751 iommu = drhd->iommu;
1752
1753 domain = iommu_alloc_domain(iommu);
1754 if (!domain)
1755 goto error;
1756
1757 if (domain_init(domain, gaw)) {
1758 domain_exit(domain);
1759 goto error;
1760 }
1761
1762 /* register pcie-to-pci device */
1763 if (dev_tmp) {
1764 info = alloc_devinfo_mem();
1765 if (!info) {
1766 domain_exit(domain);
1767 goto error;
1768 }
1769 info->bus = bus;
1770 info->devfn = devfn;
1771 info->dev = NULL;
1772 info->domain = domain;
1773 /* This domain is shared by devices under p2p bridge */
Weidong Han3b5410e2008-12-08 09:17:15 +08001774 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001775
1776 /* pcie-to-pci bridge already has a domain, uses it */
1777 found = NULL;
1778 spin_lock_irqsave(&device_domain_lock, flags);
1779 list_for_each_entry(tmp, &device_domain_list, global) {
1780 if (tmp->bus == bus && tmp->devfn == devfn) {
1781 found = tmp->domain;
1782 break;
1783 }
1784 }
1785 if (found) {
1786 free_devinfo_mem(info);
1787 domain_exit(domain);
1788 domain = found;
1789 } else {
1790 list_add(&info->link, &domain->devices);
1791 list_add(&info->global, &device_domain_list);
1792 }
1793 spin_unlock_irqrestore(&device_domain_lock, flags);
1794 }
1795
1796found_domain:
1797 info = alloc_devinfo_mem();
1798 if (!info)
1799 goto error;
1800 info->bus = pdev->bus->number;
1801 info->devfn = pdev->devfn;
1802 info->dev = pdev;
1803 info->domain = domain;
1804 spin_lock_irqsave(&device_domain_lock, flags);
1805 /* somebody is fast */
1806 found = find_domain(pdev);
1807 if (found != NULL) {
1808 spin_unlock_irqrestore(&device_domain_lock, flags);
1809 if (found != domain) {
1810 domain_exit(domain);
1811 domain = found;
1812 }
1813 free_devinfo_mem(info);
1814 return domain;
1815 }
1816 list_add(&info->link, &domain->devices);
1817 list_add(&info->global, &device_domain_list);
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001818 pdev->dev.archdata.iommu = info;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001819 spin_unlock_irqrestore(&device_domain_lock, flags);
1820 return domain;
1821error:
1822 /* recheck it here, maybe others set it */
1823 return find_domain(pdev);
1824}
1825
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001826static int iommu_prepare_identity_map(struct pci_dev *pdev,
1827 unsigned long long start,
1828 unsigned long long end)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001829{
1830 struct dmar_domain *domain;
1831 unsigned long size;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001832 unsigned long long base;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001833 int ret;
1834
1835 printk(KERN_INFO
1836 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1837 pci_name(pdev), start, end);
1838 /* page table init */
1839 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1840 if (!domain)
1841 return -ENOMEM;
1842
1843 /* The address might not be aligned */
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001844 base = start & PAGE_MASK;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001845 size = end - base;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07001846 size = PAGE_ALIGN(size);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001847 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1848 IOVA_PFN(base + size) - 1)) {
1849 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1850 ret = -ENOMEM;
1851 goto error;
1852 }
1853
1854 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1855 size, base, pci_name(pdev));
1856 /*
1857 * RMRR range might have overlap with physical memory range,
1858 * clear it first
1859 */
1860 dma_pte_clear_range(domain, base, base + size);
1861
1862 ret = domain_page_mapping(domain, base, base, size,
1863 DMA_PTE_READ|DMA_PTE_WRITE);
1864 if (ret)
1865 goto error;
1866
1867 /* context entry init */
1868 ret = domain_context_mapping(domain, pdev);
1869 if (!ret)
1870 return 0;
1871error:
1872 domain_exit(domain);
1873 return ret;
1874
1875}
1876
1877static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1878 struct pci_dev *pdev)
1879{
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001880 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001881 return 0;
1882 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1883 rmrr->end_address + 1);
1884}
1885
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001886#ifdef CONFIG_DMAR_GFX_WA
Yinghai Lud52d53b2008-06-16 20:10:55 -07001887struct iommu_prepare_data {
1888 struct pci_dev *pdev;
1889 int ret;
1890};
1891
1892static int __init iommu_prepare_work_fn(unsigned long start_pfn,
1893 unsigned long end_pfn, void *datax)
1894{
1895 struct iommu_prepare_data *data;
1896
1897 data = (struct iommu_prepare_data *)datax;
1898
1899 data->ret = iommu_prepare_identity_map(data->pdev,
1900 start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
1901 return data->ret;
1902
1903}
1904
1905static int __init iommu_prepare_with_active_regions(struct pci_dev *pdev)
1906{
1907 int nid;
1908 struct iommu_prepare_data data;
1909
1910 data.pdev = pdev;
1911 data.ret = 0;
1912
1913 for_each_online_node(nid) {
1914 work_with_active_regions(nid, iommu_prepare_work_fn, &data);
1915 if (data.ret)
1916 return data.ret;
1917 }
1918 return data.ret;
1919}
1920
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001921static void __init iommu_prepare_gfx_mapping(void)
1922{
1923 struct pci_dev *pdev = NULL;
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001924 int ret;
1925
1926 for_each_pci_dev(pdev) {
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001927 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001928 !IS_GFX_DEVICE(pdev))
1929 continue;
1930 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1931 pci_name(pdev));
Yinghai Lud52d53b2008-06-16 20:10:55 -07001932 ret = iommu_prepare_with_active_regions(pdev);
1933 if (ret)
1934 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001935 }
1936}
Mark McLoughlin2abd7e12008-11-20 15:49:50 +00001937#else /* !CONFIG_DMAR_GFX_WA */
1938static inline void iommu_prepare_gfx_mapping(void)
1939{
1940 return;
1941}
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001942#endif
1943
Keshavamurthy, Anil S49a04292007-10-21 16:41:57 -07001944#ifdef CONFIG_DMAR_FLOPPY_WA
1945static inline void iommu_prepare_isa(void)
1946{
1947 struct pci_dev *pdev;
1948 int ret;
1949
1950 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1951 if (!pdev)
1952 return;
1953
1954 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1955 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1956
1957 if (ret)
1958 printk("IOMMU: Failed to create 0-64M identity map, "
1959 "floppy might not work\n");
1960
1961}
1962#else
1963static inline void iommu_prepare_isa(void)
1964{
1965 return;
1966}
1967#endif /* !CONFIG_DMAR_FLPY_WA */
1968
Mark McLoughlin519a0542008-11-20 14:21:13 +00001969static int __init init_dmars(void)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001970{
1971 struct dmar_drhd_unit *drhd;
1972 struct dmar_rmrr_unit *rmrr;
1973 struct pci_dev *pdev;
1974 struct intel_iommu *iommu;
mark gross80b20dd2008-04-18 13:53:58 -07001975 int i, ret, unit = 0;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001976
1977 /*
1978 * for each drhd
1979 * allocate root
1980 * initialize and program root entry to not present
1981 * endfor
1982 */
1983 for_each_drhd_unit(drhd) {
mark gross5e0d2a62008-03-04 15:22:08 -08001984 g_num_of_iommus++;
1985 /*
1986 * lock not needed as this is only incremented in the single
1987 * threaded kernel __init code path all other access are read
1988 * only
1989 */
1990 }
1991
Weidong Hand9630fe2008-12-08 11:06:32 +08001992 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
1993 GFP_KERNEL);
1994 if (!g_iommus) {
1995 printk(KERN_ERR "Allocating global iommu array failed\n");
1996 ret = -ENOMEM;
1997 goto error;
1998 }
1999
mark gross80b20dd2008-04-18 13:53:58 -07002000 deferred_flush = kzalloc(g_num_of_iommus *
2001 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2002 if (!deferred_flush) {
Weidong Hand9630fe2008-12-08 11:06:32 +08002003 kfree(g_iommus);
mark gross5e0d2a62008-03-04 15:22:08 -08002004 ret = -ENOMEM;
2005 goto error;
2006 }
2007
mark gross5e0d2a62008-03-04 15:22:08 -08002008 for_each_drhd_unit(drhd) {
2009 if (drhd->ignored)
2010 continue;
Suresh Siddha1886e8a2008-07-10 11:16:37 -07002011
2012 iommu = drhd->iommu;
Weidong Hand9630fe2008-12-08 11:06:32 +08002013 g_iommus[iommu->seq_id] = iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002014
Suresh Siddhae61d98d2008-07-10 11:16:35 -07002015 ret = iommu_init_domains(iommu);
2016 if (ret)
2017 goto error;
2018
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002019 /*
2020 * TBD:
2021 * we could share the same root & context tables
2022 * amoung all IOMMU's. Need to Split it later.
2023 */
2024 ret = iommu_alloc_root_entry(iommu);
2025 if (ret) {
2026 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2027 goto error;
2028 }
2029 }
2030
Youquan Songa77b67d2008-10-16 16:31:56 -07002031 for_each_drhd_unit(drhd) {
2032 if (drhd->ignored)
2033 continue;
2034
2035 iommu = drhd->iommu;
2036 if (dmar_enable_qi(iommu)) {
2037 /*
2038 * Queued Invalidate not enabled, use Register Based
2039 * Invalidate
2040 */
2041 iommu->flush.flush_context = __iommu_flush_context;
2042 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2043 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
FUJITA Tomonorib4e0f9e2008-11-19 13:53:42 +09002044 "invalidation\n",
2045 (unsigned long long)drhd->reg_base_addr);
Youquan Songa77b67d2008-10-16 16:31:56 -07002046 } else {
2047 iommu->flush.flush_context = qi_flush_context;
2048 iommu->flush.flush_iotlb = qi_flush_iotlb;
2049 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
FUJITA Tomonorib4e0f9e2008-11-19 13:53:42 +09002050 "invalidation\n",
2051 (unsigned long long)drhd->reg_base_addr);
Youquan Songa77b67d2008-10-16 16:31:56 -07002052 }
2053 }
2054
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002055 /*
2056 * For each rmrr
2057 * for each dev attached to rmrr
2058 * do
2059 * locate drhd for dev, alloc domain for dev
2060 * allocate free domain
2061 * allocate page table entries for rmrr
2062 * if context not allocated for bus
2063 * allocate and init context
2064 * set present in root table for this bus
2065 * init context with domain, translation etc
2066 * endfor
2067 * endfor
2068 */
2069 for_each_rmrr_units(rmrr) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002070 for (i = 0; i < rmrr->devices_cnt; i++) {
2071 pdev = rmrr->devices[i];
2072 /* some BIOS lists non-exist devices in DMAR table */
2073 if (!pdev)
2074 continue;
2075 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2076 if (ret)
2077 printk(KERN_ERR
2078 "IOMMU: mapping reserved region failed\n");
2079 }
2080 }
2081
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07002082 iommu_prepare_gfx_mapping();
2083
Keshavamurthy, Anil S49a04292007-10-21 16:41:57 -07002084 iommu_prepare_isa();
2085
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002086 /*
2087 * for each drhd
2088 * enable fault log
2089 * global invalidate context cache
2090 * global invalidate iotlb
2091 * enable translation
2092 */
2093 for_each_drhd_unit(drhd) {
2094 if (drhd->ignored)
2095 continue;
2096 iommu = drhd->iommu;
2097 sprintf (iommu->name, "dmar%d", unit++);
2098
2099 iommu_flush_write_buffer(iommu);
2100
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07002101 ret = dmar_set_interrupt(iommu);
2102 if (ret)
2103 goto error;
2104
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002105 iommu_set_root_entry(iommu);
2106
Youquan Songa77b67d2008-10-16 16:31:56 -07002107 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
2108 0);
2109 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
2110 0);
mark grossf8bab732008-02-08 04:18:38 -08002111 iommu_disable_protect_mem_regions(iommu);
2112
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002113 ret = iommu_enable_translation(iommu);
2114 if (ret)
2115 goto error;
2116 }
2117
2118 return 0;
2119error:
2120 for_each_drhd_unit(drhd) {
2121 if (drhd->ignored)
2122 continue;
2123 iommu = drhd->iommu;
2124 free_iommu(iommu);
2125 }
Weidong Hand9630fe2008-12-08 11:06:32 +08002126 kfree(g_iommus);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002127 return ret;
2128}
2129
2130static inline u64 aligned_size(u64 host_addr, size_t size)
2131{
2132 u64 addr;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002133 addr = (host_addr & (~PAGE_MASK)) + size;
2134 return PAGE_ALIGN(addr);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002135}
2136
2137struct iova *
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002138iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002139{
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002140 struct iova *piova;
2141
2142 /* Make sure it's in range */
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002143 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002144 if (!size || (IOVA_START_ADDR + size > end))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002145 return NULL;
2146
2147 piova = alloc_iova(&domain->iovad,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002148 size >> PAGE_SHIFT, IOVA_PFN(end), 1);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002149 return piova;
2150}
2151
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002152static struct iova *
2153__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002154 size_t size, u64 dma_mask)
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002155{
2156 struct pci_dev *pdev = to_pci_dev(dev);
2157 struct iova *iova = NULL;
2158
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002159 if (dma_mask <= DMA_32BIT_MASK || dmar_forcedac)
2160 iova = iommu_alloc_iova(domain, size, dma_mask);
2161 else {
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002162 /*
2163 * First try to allocate an io virtual address in
2164 * DMA_32BIT_MASK and if that fails then try allocating
Joe Perches36098012007-12-17 11:40:11 -08002165 * from higher range
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002166 */
2167 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
2168 if (!iova)
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002169 iova = iommu_alloc_iova(domain, size, dma_mask);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002170 }
2171
2172 if (!iova) {
2173 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
2174 return NULL;
2175 }
2176
2177 return iova;
2178}
2179
2180static struct dmar_domain *
2181get_valid_domain_for_dev(struct pci_dev *pdev)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002182{
2183 struct dmar_domain *domain;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002184 int ret;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002185
2186 domain = get_domain_for_dev(pdev,
2187 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2188 if (!domain) {
2189 printk(KERN_ERR
2190 "Allocating domain for %s failed", pci_name(pdev));
Al Viro4fe05bb2007-10-29 04:51:16 +00002191 return NULL;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002192 }
2193
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002194 /* make sure context mapping is ok */
Weidong Han5331fe62008-12-08 23:00:00 +08002195 if (unlikely(!domain_context_mapped(pdev))) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002196 ret = domain_context_mapping(domain, pdev);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002197 if (ret) {
2198 printk(KERN_ERR
2199 "Domain context map for %s failed",
2200 pci_name(pdev));
Al Viro4fe05bb2007-10-29 04:51:16 +00002201 return NULL;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002202 }
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002203 }
2204
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002205 return domain;
2206}
2207
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002208static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2209 size_t size, int dir, u64 dma_mask)
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002210{
2211 struct pci_dev *pdev = to_pci_dev(hwdev);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002212 struct dmar_domain *domain;
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002213 phys_addr_t start_paddr;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002214 struct iova *iova;
2215 int prot = 0;
Ingo Molnar6865f0d2008-04-22 11:09:04 +02002216 int ret;
Weidong Han8c11e792008-12-08 15:29:22 +08002217 struct intel_iommu *iommu;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002218
2219 BUG_ON(dir == DMA_NONE);
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002220 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Ingo Molnar6865f0d2008-04-22 11:09:04 +02002221 return paddr;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002222
2223 domain = get_valid_domain_for_dev(pdev);
2224 if (!domain)
2225 return 0;
2226
Weidong Han8c11e792008-12-08 15:29:22 +08002227 iommu = domain_get_iommu(domain);
Ingo Molnar6865f0d2008-04-22 11:09:04 +02002228 size = aligned_size((u64)paddr, size);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002229
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002230 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002231 if (!iova)
2232 goto error;
2233
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002234 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002235
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002236 /*
2237 * Check if DMAR supports zero-length reads on write only
2238 * mappings..
2239 */
2240 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
Weidong Han8c11e792008-12-08 15:29:22 +08002241 !cap_zlr(iommu->cap))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002242 prot |= DMA_PTE_READ;
2243 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2244 prot |= DMA_PTE_WRITE;
2245 /*
Ingo Molnar6865f0d2008-04-22 11:09:04 +02002246 * paddr - (paddr + size) might be partial page, we should map the whole
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002247 * page. Note: if two part of one page are separately mapped, we
Ingo Molnar6865f0d2008-04-22 11:09:04 +02002248 * might have two guest_addr mapping to the same host paddr, but this
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002249 * is not a big problem
2250 */
Ingo Molnar6865f0d2008-04-22 11:09:04 +02002251 ret = domain_page_mapping(domain, start_paddr,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002252 ((u64)paddr) & PAGE_MASK, size, prot);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002253 if (ret)
2254 goto error;
2255
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002256 /* it's a non-present to present mapping */
Weidong Han8c11e792008-12-08 15:29:22 +08002257 ret = iommu_flush_iotlb_psi(iommu, domain->id,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002258 start_paddr, size >> VTD_PAGE_SHIFT, 1);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002259 if (ret)
Weidong Han8c11e792008-12-08 15:29:22 +08002260 iommu_flush_write_buffer(iommu);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002261
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002262 return start_paddr + ((u64)paddr & (~PAGE_MASK));
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002263
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002264error:
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002265 if (iova)
2266 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002267 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002268 pci_name(pdev), size, (unsigned long long)paddr, dir);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002269 return 0;
2270}
2271
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002272dma_addr_t intel_map_single(struct device *hwdev, phys_addr_t paddr,
2273 size_t size, int dir)
2274{
2275 return __intel_map_single(hwdev, paddr, size, dir,
2276 to_pci_dev(hwdev)->dma_mask);
2277}
2278
mark gross5e0d2a62008-03-04 15:22:08 -08002279static void flush_unmaps(void)
2280{
mark gross80b20dd2008-04-18 13:53:58 -07002281 int i, j;
mark gross5e0d2a62008-03-04 15:22:08 -08002282
mark gross5e0d2a62008-03-04 15:22:08 -08002283 timer_on = 0;
2284
2285 /* just flush them all */
2286 for (i = 0; i < g_num_of_iommus; i++) {
Weidong Hana2bb8452008-12-08 11:24:12 +08002287 struct intel_iommu *iommu = g_iommus[i];
2288 if (!iommu)
2289 continue;
Suresh Siddhac42d9f32008-07-10 11:16:36 -07002290
Weidong Hana2bb8452008-12-08 11:24:12 +08002291 if (deferred_flush[i].next) {
Youquan Songa77b67d2008-10-16 16:31:56 -07002292 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
2293 DMA_TLB_GLOBAL_FLUSH, 0);
mark gross80b20dd2008-04-18 13:53:58 -07002294 for (j = 0; j < deferred_flush[i].next; j++) {
2295 __free_iova(&deferred_flush[i].domain[j]->iovad,
2296 deferred_flush[i].iova[j]);
2297 }
2298 deferred_flush[i].next = 0;
2299 }
mark gross5e0d2a62008-03-04 15:22:08 -08002300 }
2301
mark gross5e0d2a62008-03-04 15:22:08 -08002302 list_size = 0;
mark gross5e0d2a62008-03-04 15:22:08 -08002303}
2304
2305static void flush_unmaps_timeout(unsigned long data)
2306{
mark gross80b20dd2008-04-18 13:53:58 -07002307 unsigned long flags;
2308
2309 spin_lock_irqsave(&async_umap_flush_lock, flags);
mark gross5e0d2a62008-03-04 15:22:08 -08002310 flush_unmaps();
mark gross80b20dd2008-04-18 13:53:58 -07002311 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
mark gross5e0d2a62008-03-04 15:22:08 -08002312}
2313
2314static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2315{
2316 unsigned long flags;
mark gross80b20dd2008-04-18 13:53:58 -07002317 int next, iommu_id;
Weidong Han8c11e792008-12-08 15:29:22 +08002318 struct intel_iommu *iommu;
mark gross5e0d2a62008-03-04 15:22:08 -08002319
2320 spin_lock_irqsave(&async_umap_flush_lock, flags);
mark gross80b20dd2008-04-18 13:53:58 -07002321 if (list_size == HIGH_WATER_MARK)
2322 flush_unmaps();
2323
Weidong Han8c11e792008-12-08 15:29:22 +08002324 iommu = domain_get_iommu(dom);
2325 iommu_id = iommu->seq_id;
Suresh Siddhac42d9f32008-07-10 11:16:36 -07002326
mark gross80b20dd2008-04-18 13:53:58 -07002327 next = deferred_flush[iommu_id].next;
2328 deferred_flush[iommu_id].domain[next] = dom;
2329 deferred_flush[iommu_id].iova[next] = iova;
2330 deferred_flush[iommu_id].next++;
mark gross5e0d2a62008-03-04 15:22:08 -08002331
2332 if (!timer_on) {
2333 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2334 timer_on = 1;
2335 }
2336 list_size++;
2337 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2338}
2339
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002340void intel_unmap_single(struct device *dev, dma_addr_t dev_addr, size_t size,
2341 int dir)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002342{
2343 struct pci_dev *pdev = to_pci_dev(dev);
2344 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002345 unsigned long start_addr;
2346 struct iova *iova;
Weidong Han8c11e792008-12-08 15:29:22 +08002347 struct intel_iommu *iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002348
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002349 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002350 return;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002351 domain = find_domain(pdev);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002352 BUG_ON(!domain);
2353
Weidong Han8c11e792008-12-08 15:29:22 +08002354 iommu = domain_get_iommu(domain);
2355
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002356 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
2357 if (!iova)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002358 return;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002359
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002360 start_addr = iova->pfn_lo << PAGE_SHIFT;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002361 size = aligned_size((u64)dev_addr, size);
2362
2363 pr_debug("Device %s unmapping: %lx@%llx\n",
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002364 pci_name(pdev), size, (unsigned long long)start_addr);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002365
2366 /* clear the whole page */
2367 dma_pte_clear_range(domain, start_addr, start_addr + size);
2368 /* free page tables */
2369 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
mark gross5e0d2a62008-03-04 15:22:08 -08002370 if (intel_iommu_strict) {
Weidong Han8c11e792008-12-08 15:29:22 +08002371 if (iommu_flush_iotlb_psi(iommu,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002372 domain->id, start_addr, size >> VTD_PAGE_SHIFT, 0))
Weidong Han8c11e792008-12-08 15:29:22 +08002373 iommu_flush_write_buffer(iommu);
mark gross5e0d2a62008-03-04 15:22:08 -08002374 /* free iova */
2375 __free_iova(&domain->iovad, iova);
2376 } else {
2377 add_unmap(domain, iova);
2378 /*
2379 * queue up the release of the unmap to save the 1/6th of the
2380 * cpu used up by the iotlb flush operation...
2381 */
mark gross5e0d2a62008-03-04 15:22:08 -08002382 }
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002383}
2384
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002385void *intel_alloc_coherent(struct device *hwdev, size_t size,
2386 dma_addr_t *dma_handle, gfp_t flags)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002387{
2388 void *vaddr;
2389 int order;
2390
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002391 size = PAGE_ALIGN(size);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002392 order = get_order(size);
2393 flags &= ~(GFP_DMA | GFP_DMA32);
2394
2395 vaddr = (void *)__get_free_pages(flags, order);
2396 if (!vaddr)
2397 return NULL;
2398 memset(vaddr, 0, size);
2399
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002400 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2401 DMA_BIDIRECTIONAL,
2402 hwdev->coherent_dma_mask);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002403 if (*dma_handle)
2404 return vaddr;
2405 free_pages((unsigned long)vaddr, order);
2406 return NULL;
2407}
2408
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002409void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2410 dma_addr_t dma_handle)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002411{
2412 int order;
2413
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002414 size = PAGE_ALIGN(size);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002415 order = get_order(size);
2416
2417 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
2418 free_pages((unsigned long)vaddr, order);
2419}
2420
FUJITA Tomonori12d4d402007-10-23 09:32:25 +02002421#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002422
2423void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2424 int nelems, int dir)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002425{
2426 int i;
2427 struct pci_dev *pdev = to_pci_dev(hwdev);
2428 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002429 unsigned long start_addr;
2430 struct iova *iova;
2431 size_t size = 0;
2432 void *addr;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002433 struct scatterlist *sg;
Weidong Han8c11e792008-12-08 15:29:22 +08002434 struct intel_iommu *iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002435
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002436 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002437 return;
2438
2439 domain = find_domain(pdev);
Weidong Han8c11e792008-12-08 15:29:22 +08002440 BUG_ON(!domain);
2441
2442 iommu = domain_get_iommu(domain);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002443
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002444 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002445 if (!iova)
2446 return;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002447 for_each_sg(sglist, sg, nelems, i) {
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002448 addr = SG_ENT_VIRT_ADDRESS(sg);
2449 size += aligned_size((u64)addr, sg->length);
2450 }
2451
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002452 start_addr = iova->pfn_lo << PAGE_SHIFT;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002453
2454 /* clear the whole page */
2455 dma_pte_clear_range(domain, start_addr, start_addr + size);
2456 /* free page tables */
2457 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2458
Weidong Han8c11e792008-12-08 15:29:22 +08002459 if (iommu_flush_iotlb_psi(iommu, domain->id, start_addr,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002460 size >> VTD_PAGE_SHIFT, 0))
Weidong Han8c11e792008-12-08 15:29:22 +08002461 iommu_flush_write_buffer(iommu);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002462
2463 /* free iova */
2464 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002465}
2466
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002467static int intel_nontranslate_map_sg(struct device *hddev,
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002468 struct scatterlist *sglist, int nelems, int dir)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002469{
2470 int i;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002471 struct scatterlist *sg;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002472
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002473 for_each_sg(sglist, sg, nelems, i) {
FUJITA Tomonori12d4d402007-10-23 09:32:25 +02002474 BUG_ON(!sg_page(sg));
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002475 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2476 sg->dma_length = sg->length;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002477 }
2478 return nelems;
2479}
2480
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002481int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2482 int dir)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002483{
2484 void *addr;
2485 int i;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002486 struct pci_dev *pdev = to_pci_dev(hwdev);
2487 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002488 size_t size = 0;
2489 int prot = 0;
2490 size_t offset = 0;
2491 struct iova *iova = NULL;
2492 int ret;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002493 struct scatterlist *sg;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002494 unsigned long start_addr;
Weidong Han8c11e792008-12-08 15:29:22 +08002495 struct intel_iommu *iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002496
2497 BUG_ON(dir == DMA_NONE);
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002498 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002499 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002500
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002501 domain = get_valid_domain_for_dev(pdev);
2502 if (!domain)
2503 return 0;
2504
Weidong Han8c11e792008-12-08 15:29:22 +08002505 iommu = domain_get_iommu(domain);
2506
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002507 for_each_sg(sglist, sg, nelems, i) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002508 addr = SG_ENT_VIRT_ADDRESS(sg);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002509 addr = (void *)virt_to_phys(addr);
2510 size += aligned_size((u64)addr, sg->length);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002511 }
2512
FUJITA Tomonoribb9e6d62008-10-15 16:08:28 +09002513 iova = __intel_alloc_iova(hwdev, domain, size, pdev->dma_mask);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002514 if (!iova) {
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002515 sglist->dma_length = 0;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002516 return 0;
2517 }
2518
2519 /*
2520 * Check if DMAR supports zero-length reads on write only
2521 * mappings..
2522 */
2523 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
Weidong Han8c11e792008-12-08 15:29:22 +08002524 !cap_zlr(iommu->cap))
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002525 prot |= DMA_PTE_READ;
2526 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2527 prot |= DMA_PTE_WRITE;
2528
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002529 start_addr = iova->pfn_lo << PAGE_SHIFT;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002530 offset = 0;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002531 for_each_sg(sglist, sg, nelems, i) {
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002532 addr = SG_ENT_VIRT_ADDRESS(sg);
2533 addr = (void *)virt_to_phys(addr);
2534 size = aligned_size((u64)addr, sg->length);
2535 ret = domain_page_mapping(domain, start_addr + offset,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002536 ((u64)addr) & PAGE_MASK,
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002537 size, prot);
2538 if (ret) {
2539 /* clear the page */
2540 dma_pte_clear_range(domain, start_addr,
2541 start_addr + offset);
2542 /* free page tables */
2543 dma_pte_free_pagetable(domain, start_addr,
2544 start_addr + offset);
2545 /* free iova */
2546 __free_iova(&domain->iovad, iova);
2547 return 0;
2548 }
2549 sg->dma_address = start_addr + offset +
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002550 ((u64)addr & (~PAGE_MASK));
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002551 sg->dma_length = sg->length;
2552 offset += size;
2553 }
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002554
2555 /* it's a non-present to present mapping */
Weidong Han8c11e792008-12-08 15:29:22 +08002556 if (iommu_flush_iotlb_psi(iommu, domain->id,
Fenghua Yu5b6985c2008-10-16 18:02:32 -07002557 start_addr, offset >> VTD_PAGE_SHIFT, 1))
Weidong Han8c11e792008-12-08 15:29:22 +08002558 iommu_flush_write_buffer(iommu);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002559 return nelems;
2560}
2561
2562static struct dma_mapping_ops intel_dma_ops = {
2563 .alloc_coherent = intel_alloc_coherent,
2564 .free_coherent = intel_free_coherent,
2565 .map_single = intel_map_single,
2566 .unmap_single = intel_unmap_single,
2567 .map_sg = intel_map_sg,
2568 .unmap_sg = intel_unmap_sg,
2569};
2570
2571static inline int iommu_domain_cache_init(void)
2572{
2573 int ret = 0;
2574
2575 iommu_domain_cache = kmem_cache_create("iommu_domain",
2576 sizeof(struct dmar_domain),
2577 0,
2578 SLAB_HWCACHE_ALIGN,
2579
2580 NULL);
2581 if (!iommu_domain_cache) {
2582 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2583 ret = -ENOMEM;
2584 }
2585
2586 return ret;
2587}
2588
2589static inline int iommu_devinfo_cache_init(void)
2590{
2591 int ret = 0;
2592
2593 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2594 sizeof(struct device_domain_info),
2595 0,
2596 SLAB_HWCACHE_ALIGN,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002597 NULL);
2598 if (!iommu_devinfo_cache) {
2599 printk(KERN_ERR "Couldn't create devinfo cache\n");
2600 ret = -ENOMEM;
2601 }
2602
2603 return ret;
2604}
2605
2606static inline int iommu_iova_cache_init(void)
2607{
2608 int ret = 0;
2609
2610 iommu_iova_cache = kmem_cache_create("iommu_iova",
2611 sizeof(struct iova),
2612 0,
2613 SLAB_HWCACHE_ALIGN,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002614 NULL);
2615 if (!iommu_iova_cache) {
2616 printk(KERN_ERR "Couldn't create iova cache\n");
2617 ret = -ENOMEM;
2618 }
2619
2620 return ret;
2621}
2622
2623static int __init iommu_init_mempool(void)
2624{
2625 int ret;
2626 ret = iommu_iova_cache_init();
2627 if (ret)
2628 return ret;
2629
2630 ret = iommu_domain_cache_init();
2631 if (ret)
2632 goto domain_error;
2633
2634 ret = iommu_devinfo_cache_init();
2635 if (!ret)
2636 return ret;
2637
2638 kmem_cache_destroy(iommu_domain_cache);
2639domain_error:
2640 kmem_cache_destroy(iommu_iova_cache);
2641
2642 return -ENOMEM;
2643}
2644
2645static void __init iommu_exit_mempool(void)
2646{
2647 kmem_cache_destroy(iommu_devinfo_cache);
2648 kmem_cache_destroy(iommu_domain_cache);
2649 kmem_cache_destroy(iommu_iova_cache);
2650
2651}
2652
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002653static void __init init_no_remapping_devices(void)
2654{
2655 struct dmar_drhd_unit *drhd;
2656
2657 for_each_drhd_unit(drhd) {
2658 if (!drhd->include_all) {
2659 int i;
2660 for (i = 0; i < drhd->devices_cnt; i++)
2661 if (drhd->devices[i] != NULL)
2662 break;
2663 /* ignore DMAR unit if no pci devices exist */
2664 if (i == drhd->devices_cnt)
2665 drhd->ignored = 1;
2666 }
2667 }
2668
2669 if (dmar_map_gfx)
2670 return;
2671
2672 for_each_drhd_unit(drhd) {
2673 int i;
2674 if (drhd->ignored || drhd->include_all)
2675 continue;
2676
2677 for (i = 0; i < drhd->devices_cnt; i++)
2678 if (drhd->devices[i] &&
2679 !IS_GFX_DEVICE(drhd->devices[i]))
2680 break;
2681
2682 if (i < drhd->devices_cnt)
2683 continue;
2684
2685 /* bypass IOMMU if it is just for gfx devices */
2686 drhd->ignored = 1;
2687 for (i = 0; i < drhd->devices_cnt; i++) {
2688 if (!drhd->devices[i])
2689 continue;
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002690 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002691 }
2692 }
2693}
2694
2695int __init intel_iommu_init(void)
2696{
2697 int ret = 0;
2698
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002699 if (dmar_table_init())
2700 return -ENODEV;
2701
Suresh Siddha1886e8a2008-07-10 11:16:37 -07002702 if (dmar_dev_scope_init())
2703 return -ENODEV;
2704
Suresh Siddha2ae21012008-07-10 11:16:43 -07002705 /*
2706 * Check the need for DMA-remapping initialization now.
2707 * Above initialization will also be used by Interrupt-remapping.
2708 */
2709 if (no_iommu || swiotlb || dmar_disabled)
2710 return -ENODEV;
2711
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002712 iommu_init_mempool();
2713 dmar_init_reserved_ranges();
2714
2715 init_no_remapping_devices();
2716
2717 ret = init_dmars();
2718 if (ret) {
2719 printk(KERN_ERR "IOMMU: dmar init failed\n");
2720 put_iova_domain(&reserved_iova_list);
2721 iommu_exit_mempool();
2722 return ret;
2723 }
2724 printk(KERN_INFO
2725 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2726
mark gross5e0d2a62008-03-04 15:22:08 -08002727 init_timer(&unmap_timer);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002728 force_iommu = 1;
2729 dma_ops = &intel_dma_ops;
2730 return 0;
2731}
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07002732
Weidong Hanc7151a82008-12-08 22:51:37 +08002733static int vm_domain_add_dev_info(struct dmar_domain *domain,
2734 struct pci_dev *pdev)
2735{
2736 struct device_domain_info *info;
2737 unsigned long flags;
2738
2739 info = alloc_devinfo_mem();
2740 if (!info)
2741 return -ENOMEM;
2742
2743 info->bus = pdev->bus->number;
2744 info->devfn = pdev->devfn;
2745 info->dev = pdev;
2746 info->domain = domain;
2747
2748 spin_lock_irqsave(&device_domain_lock, flags);
2749 list_add(&info->link, &domain->devices);
2750 list_add(&info->global, &device_domain_list);
2751 pdev->dev.archdata.iommu = info;
2752 spin_unlock_irqrestore(&device_domain_lock, flags);
2753
2754 return 0;
2755}
2756
2757static void vm_domain_remove_one_dev_info(struct dmar_domain *domain,
2758 struct pci_dev *pdev)
2759{
2760 struct device_domain_info *info;
2761 struct intel_iommu *iommu;
2762 unsigned long flags;
2763 int found = 0;
2764 struct list_head *entry, *tmp;
2765
2766 iommu = device_to_iommu(pdev->bus->number, pdev->devfn);
2767 if (!iommu)
2768 return;
2769
2770 spin_lock_irqsave(&device_domain_lock, flags);
2771 list_for_each_safe(entry, tmp, &domain->devices) {
2772 info = list_entry(entry, struct device_domain_info, link);
2773 if (info->bus == pdev->bus->number &&
2774 info->devfn == pdev->devfn) {
2775 list_del(&info->link);
2776 list_del(&info->global);
2777 if (info->dev)
2778 info->dev->dev.archdata.iommu = NULL;
2779 spin_unlock_irqrestore(&device_domain_lock, flags);
2780
2781 iommu_detach_dev(iommu, info->bus, info->devfn);
2782 free_devinfo_mem(info);
2783
2784 spin_lock_irqsave(&device_domain_lock, flags);
2785
2786 if (found)
2787 break;
2788 else
2789 continue;
2790 }
2791
2792 /* if there is no other devices under the same iommu
2793 * owned by this domain, clear this iommu in iommu_bmp
2794 * update iommu count and coherency
2795 */
2796 if (device_to_iommu(info->bus, info->devfn) == iommu)
2797 found = 1;
2798 }
2799
2800 if (found == 0) {
2801 unsigned long tmp_flags;
2802 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
2803 clear_bit(iommu->seq_id, &domain->iommu_bmp);
2804 domain->iommu_count--;
2805 domain_update_iommu_coherency(domain);
2806 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
2807 }
2808
2809 spin_unlock_irqrestore(&device_domain_lock, flags);
2810}
2811
2812static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
2813{
2814 struct device_domain_info *info;
2815 struct intel_iommu *iommu;
2816 unsigned long flags1, flags2;
2817
2818 spin_lock_irqsave(&device_domain_lock, flags1);
2819 while (!list_empty(&domain->devices)) {
2820 info = list_entry(domain->devices.next,
2821 struct device_domain_info, link);
2822 list_del(&info->link);
2823 list_del(&info->global);
2824 if (info->dev)
2825 info->dev->dev.archdata.iommu = NULL;
2826
2827 spin_unlock_irqrestore(&device_domain_lock, flags1);
2828
2829 iommu = device_to_iommu(info->bus, info->devfn);
2830 iommu_detach_dev(iommu, info->bus, info->devfn);
2831
2832 /* clear this iommu in iommu_bmp, update iommu count
2833 * and coherency
2834 */
2835 spin_lock_irqsave(&domain->iommu_lock, flags2);
2836 if (test_and_clear_bit(iommu->seq_id,
2837 &domain->iommu_bmp)) {
2838 domain->iommu_count--;
2839 domain_update_iommu_coherency(domain);
2840 }
2841 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
2842
2843 free_devinfo_mem(info);
2844 spin_lock_irqsave(&device_domain_lock, flags1);
2845 }
2846 spin_unlock_irqrestore(&device_domain_lock, flags1);
2847}
2848
Weidong Han5e98c4b2008-12-08 23:03:27 +08002849/* domain id for virtual machine, it won't be set in context */
2850static unsigned long vm_domid;
2851
2852static struct dmar_domain *iommu_alloc_vm_domain(void)
2853{
2854 struct dmar_domain *domain;
2855
2856 domain = alloc_domain_mem();
2857 if (!domain)
2858 return NULL;
2859
2860 domain->id = vm_domid++;
2861 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
2862 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
2863
2864 return domain;
2865}
2866
2867static int vm_domain_init(struct dmar_domain *domain, int guest_width)
2868{
2869 int adjust_width;
2870
2871 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
2872 spin_lock_init(&domain->mapping_lock);
2873 spin_lock_init(&domain->iommu_lock);
2874
2875 domain_reserve_special_ranges(domain);
2876
2877 /* calculate AGAW */
2878 domain->gaw = guest_width;
2879 adjust_width = guestwidth_to_adjustwidth(guest_width);
2880 domain->agaw = width_to_agaw(adjust_width);
2881
2882 INIT_LIST_HEAD(&domain->devices);
2883
2884 domain->iommu_count = 0;
2885 domain->iommu_coherency = 0;
2886
2887 /* always allocate the top pgd */
2888 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
2889 if (!domain->pgd)
2890 return -ENOMEM;
2891 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
2892 return 0;
2893}
2894
2895static void iommu_free_vm_domain(struct dmar_domain *domain)
2896{
2897 unsigned long flags;
2898 struct dmar_drhd_unit *drhd;
2899 struct intel_iommu *iommu;
2900 unsigned long i;
2901 unsigned long ndomains;
2902
2903 for_each_drhd_unit(drhd) {
2904 if (drhd->ignored)
2905 continue;
2906 iommu = drhd->iommu;
2907
2908 ndomains = cap_ndoms(iommu->cap);
2909 i = find_first_bit(iommu->domain_ids, ndomains);
2910 for (; i < ndomains; ) {
2911 if (iommu->domains[i] == domain) {
2912 spin_lock_irqsave(&iommu->lock, flags);
2913 clear_bit(i, iommu->domain_ids);
2914 iommu->domains[i] = NULL;
2915 spin_unlock_irqrestore(&iommu->lock, flags);
2916 break;
2917 }
2918 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
2919 }
2920 }
2921}
2922
2923static void vm_domain_exit(struct dmar_domain *domain)
2924{
2925 u64 end;
2926
2927 /* Domain 0 is reserved, so dont process it */
2928 if (!domain)
2929 return;
2930
2931 vm_domain_remove_all_dev_info(domain);
2932 /* destroy iovas */
2933 put_iova_domain(&domain->iovad);
2934 end = DOMAIN_MAX_ADDR(domain->gaw);
2935 end = end & (~VTD_PAGE_MASK);
2936
2937 /* clear ptes */
2938 dma_pte_clear_range(domain, 0, end);
2939
2940 /* free page tables */
2941 dma_pte_free_pagetable(domain, 0, end);
2942
2943 iommu_free_vm_domain(domain);
2944 free_domain_mem(domain);
2945}
2946
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002947struct dmar_domain *intel_iommu_alloc_domain(void)
Kay, Allen M38717942008-09-09 18:37:29 +03002948{
Kay, Allen M38717942008-09-09 18:37:29 +03002949 struct dmar_domain *domain;
Kay, Allen M38717942008-09-09 18:37:29 +03002950
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002951 domain = iommu_alloc_vm_domain();
Kay, Allen M38717942008-09-09 18:37:29 +03002952 if (!domain) {
2953 printk(KERN_ERR
2954 "intel_iommu_domain_alloc: domain == NULL\n");
2955 return NULL;
2956 }
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002957 if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
Kay, Allen M38717942008-09-09 18:37:29 +03002958 printk(KERN_ERR
2959 "intel_iommu_domain_alloc: domain_init() failed\n");
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002960 vm_domain_exit(domain);
Kay, Allen M38717942008-09-09 18:37:29 +03002961 return NULL;
2962 }
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002963
Kay, Allen M38717942008-09-09 18:37:29 +03002964 return domain;
2965}
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002966EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain);
Kay, Allen M38717942008-09-09 18:37:29 +03002967
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002968void intel_iommu_free_domain(struct dmar_domain *domain)
Kay, Allen M38717942008-09-09 18:37:29 +03002969{
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002970 vm_domain_exit(domain);
Kay, Allen M38717942008-09-09 18:37:29 +03002971}
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002972EXPORT_SYMBOL_GPL(intel_iommu_free_domain);
Kay, Allen M38717942008-09-09 18:37:29 +03002973
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002974int intel_iommu_attach_device(struct dmar_domain *domain,
2975 struct pci_dev *pdev)
Kay, Allen M38717942008-09-09 18:37:29 +03002976{
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002977 int ret;
Kay, Allen M38717942008-09-09 18:37:29 +03002978
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08002979 /* normally pdev is not mapped */
2980 if (unlikely(domain_context_mapped(pdev))) {
2981 struct dmar_domain *old_domain;
2982
2983 old_domain = find_domain(pdev);
2984 if (old_domain) {
2985 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
2986 vm_domain_remove_one_dev_info(old_domain, pdev);
2987 else
2988 domain_remove_dev_info(old_domain);
2989 }
2990 }
2991
2992 ret = domain_context_mapping(domain, pdev);
2993 if (ret)
2994 return ret;
2995
2996 ret = vm_domain_add_dev_info(domain, pdev);
2997 return ret;
2998}
2999EXPORT_SYMBOL_GPL(intel_iommu_attach_device);
3000
3001void intel_iommu_detach_device(struct dmar_domain *domain,
3002 struct pci_dev *pdev)
Kay, Allen M38717942008-09-09 18:37:29 +03003003{
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003004 vm_domain_remove_one_dev_info(domain, pdev);
Kay, Allen M38717942008-09-09 18:37:29 +03003005}
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003006EXPORT_SYMBOL_GPL(intel_iommu_detach_device);
Kay, Allen M38717942008-09-09 18:37:29 +03003007
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003008int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova,
3009 u64 hpa, size_t size, int prot)
Kay, Allen M38717942008-09-09 18:37:29 +03003010{
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003011 int ret;
3012 ret = domain_page_mapping(domain, iova, hpa, size, prot);
3013 return ret;
Kay, Allen M38717942008-09-09 18:37:29 +03003014}
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003015EXPORT_SYMBOL_GPL(intel_iommu_map_address);
3016
3017void intel_iommu_unmap_address(struct dmar_domain *domain,
3018 dma_addr_t iova, size_t size)
3019{
3020 dma_addr_t base;
3021
3022 /* The address might not be aligned */
3023 base = iova & VTD_PAGE_MASK;
3024 size = VTD_PAGE_ALIGN(size);
3025 dma_pte_clear_range(domain, base, base + size);
3026}
3027EXPORT_SYMBOL_GPL(intel_iommu_unmap_address);
Kay, Allen M38717942008-09-09 18:37:29 +03003028
3029int intel_iommu_found(void)
3030{
3031 return g_num_of_iommus;
3032}
3033EXPORT_SYMBOL_GPL(intel_iommu_found);
3034
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003035u64 intel_iommu_iova_to_phys(struct dmar_domain *domain, u64 iova)
Kay, Allen M38717942008-09-09 18:37:29 +03003036{
3037 struct dma_pte *pte;
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003038 u64 phys = 0;
Kay, Allen M38717942008-09-09 18:37:29 +03003039
Kay, Allen M38717942008-09-09 18:37:29 +03003040 pte = addr_to_dma_pte(domain, iova);
Kay, Allen M38717942008-09-09 18:37:29 +03003041 if (pte)
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003042 phys = dma_pte_addr(pte);
Kay, Allen M38717942008-09-09 18:37:29 +03003043
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003044 return phys;
Kay, Allen M38717942008-09-09 18:37:29 +03003045}
Weidong Hanfaa3d6f2008-12-08 23:09:29 +08003046EXPORT_SYMBOL_GPL(intel_iommu_iova_to_phys);