blob: 4cb949f0ebd9746e8e0c5d29a6c5fba034bbf370 [file] [log] [blame]
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
mark gross98bcef52008-02-23 15:23:35 -080017 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070021 */
22
23#include <linux/init.h>
24#include <linux/bitmap.h>
25#include <linux/slab.h>
26#include <linux/irq.h>
27#include <linux/interrupt.h>
28#include <linux/sysdev.h>
29#include <linux/spinlock.h>
30#include <linux/pci.h>
31#include <linux/dmar.h>
32#include <linux/dma-mapping.h>
33#include <linux/mempool.h>
34#include "iova.h"
35#include "intel-iommu.h"
36#include <asm/proto.h> /* force_iommu in this header in x86-64*/
37#include <asm/cacheflush.h>
Joerg Roedel395624f2007-10-24 12:49:47 +020038#include <asm/gart.h>
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070039#include "pci.h"
40
41#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
42#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
43
44#define IOAPIC_RANGE_START (0xfee00000)
45#define IOAPIC_RANGE_END (0xfeefffff)
46#define IOVA_START_ADDR (0x1000)
47
48#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
49
50#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
51
52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
53
54static void domain_remove_dev_info(struct dmar_domain *domain);
55
56static int dmar_disabled;
57static int __initdata dmar_map_gfx = 1;
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070058static int dmar_forcedac;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070059
60#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
61static DEFINE_SPINLOCK(device_domain_lock);
62static LIST_HEAD(device_domain_list);
63
64static int __init intel_iommu_setup(char *str)
65{
66 if (!str)
67 return -EINVAL;
68 while (*str) {
69 if (!strncmp(str, "off", 3)) {
70 dmar_disabled = 1;
71 printk(KERN_INFO"Intel-IOMMU: disabled\n");
72 } else if (!strncmp(str, "igfx_off", 8)) {
73 dmar_map_gfx = 0;
74 printk(KERN_INFO
75 "Intel-IOMMU: disable GFX device mapping\n");
Keshavamurthy, Anil S7d3b03c2007-10-21 16:41:53 -070076 } else if (!strncmp(str, "forcedac", 8)) {
77 printk (KERN_INFO
78 "Intel-IOMMU: Forcing DAC for PCI devices\n");
79 dmar_forcedac = 1;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -070080 }
81
82 str += strcspn(str, ",");
83 while (*str == ',')
84 str++;
85 }
86 return 0;
87}
88__setup("intel_iommu=", intel_iommu_setup);
89
90static struct kmem_cache *iommu_domain_cache;
91static struct kmem_cache *iommu_devinfo_cache;
92static struct kmem_cache *iommu_iova_cache;
93
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -070094static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
95{
96 unsigned int flags;
97 void *vaddr;
98
99 /* trying to avoid low memory issues */
100 flags = current->flags & PF_MEMALLOC;
101 current->flags |= PF_MEMALLOC;
102 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
103 current->flags &= (~PF_MEMALLOC | flags);
104 return vaddr;
105}
106
107
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700108static inline void *alloc_pgtable_page(void)
109{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700110 unsigned int flags;
111 void *vaddr;
112
113 /* trying to avoid low memory issues */
114 flags = current->flags & PF_MEMALLOC;
115 current->flags |= PF_MEMALLOC;
116 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
117 current->flags &= (~PF_MEMALLOC | flags);
118 return vaddr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700119}
120
121static inline void free_pgtable_page(void *vaddr)
122{
123 free_page((unsigned long)vaddr);
124}
125
126static inline void *alloc_domain_mem(void)
127{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700128 return iommu_kmem_cache_alloc(iommu_domain_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700129}
130
131static inline void free_domain_mem(void *vaddr)
132{
133 kmem_cache_free(iommu_domain_cache, vaddr);
134}
135
136static inline void * alloc_devinfo_mem(void)
137{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700138 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700139}
140
141static inline void free_devinfo_mem(void *vaddr)
142{
143 kmem_cache_free(iommu_devinfo_cache, vaddr);
144}
145
146struct iova *alloc_iova_mem(void)
147{
Keshavamurthy, Anil Seb3fa7c2007-10-21 16:41:52 -0700148 return iommu_kmem_cache_alloc(iommu_iova_cache);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700149}
150
151void free_iova_mem(struct iova *iova)
152{
153 kmem_cache_free(iommu_iova_cache, iova);
154}
155
156static inline void __iommu_flush_cache(
157 struct intel_iommu *iommu, void *addr, int size)
158{
159 if (!ecap_coherent(iommu->ecap))
160 clflush_cache_range(addr, size);
161}
162
163/* Gets context entry for a given bus and devfn */
164static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
165 u8 bus, u8 devfn)
166{
167 struct root_entry *root;
168 struct context_entry *context;
169 unsigned long phy_addr;
170 unsigned long flags;
171
172 spin_lock_irqsave(&iommu->lock, flags);
173 root = &iommu->root_entry[bus];
174 context = get_context_addr_from_root(root);
175 if (!context) {
176 context = (struct context_entry *)alloc_pgtable_page();
177 if (!context) {
178 spin_unlock_irqrestore(&iommu->lock, flags);
179 return NULL;
180 }
181 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
182 phy_addr = virt_to_phys((void *)context);
183 set_root_value(root, phy_addr);
184 set_root_present(root);
185 __iommu_flush_cache(iommu, root, sizeof(*root));
186 }
187 spin_unlock_irqrestore(&iommu->lock, flags);
188 return &context[devfn];
189}
190
191static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
192{
193 struct root_entry *root;
194 struct context_entry *context;
195 int ret;
196 unsigned long flags;
197
198 spin_lock_irqsave(&iommu->lock, flags);
199 root = &iommu->root_entry[bus];
200 context = get_context_addr_from_root(root);
201 if (!context) {
202 ret = 0;
203 goto out;
204 }
205 ret = context_present(context[devfn]);
206out:
207 spin_unlock_irqrestore(&iommu->lock, flags);
208 return ret;
209}
210
211static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
212{
213 struct root_entry *root;
214 struct context_entry *context;
215 unsigned long flags;
216
217 spin_lock_irqsave(&iommu->lock, flags);
218 root = &iommu->root_entry[bus];
219 context = get_context_addr_from_root(root);
220 if (context) {
221 context_clear_entry(context[devfn]);
222 __iommu_flush_cache(iommu, &context[devfn], \
223 sizeof(*context));
224 }
225 spin_unlock_irqrestore(&iommu->lock, flags);
226}
227
228static void free_context_table(struct intel_iommu *iommu)
229{
230 struct root_entry *root;
231 int i;
232 unsigned long flags;
233 struct context_entry *context;
234
235 spin_lock_irqsave(&iommu->lock, flags);
236 if (!iommu->root_entry) {
237 goto out;
238 }
239 for (i = 0; i < ROOT_ENTRY_NR; i++) {
240 root = &iommu->root_entry[i];
241 context = get_context_addr_from_root(root);
242 if (context)
243 free_pgtable_page(context);
244 }
245 free_pgtable_page(iommu->root_entry);
246 iommu->root_entry = NULL;
247out:
248 spin_unlock_irqrestore(&iommu->lock, flags);
249}
250
251/* page table handling */
252#define LEVEL_STRIDE (9)
253#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
254
255static inline int agaw_to_level(int agaw)
256{
257 return agaw + 2;
258}
259
260static inline int agaw_to_width(int agaw)
261{
262 return 30 + agaw * LEVEL_STRIDE;
263
264}
265
266static inline int width_to_agaw(int width)
267{
268 return (width - 30) / LEVEL_STRIDE;
269}
270
271static inline unsigned int level_to_offset_bits(int level)
272{
273 return (12 + (level - 1) * LEVEL_STRIDE);
274}
275
276static inline int address_level_offset(u64 addr, int level)
277{
278 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
279}
280
281static inline u64 level_mask(int level)
282{
283 return ((u64)-1 << level_to_offset_bits(level));
284}
285
286static inline u64 level_size(int level)
287{
288 return ((u64)1 << level_to_offset_bits(level));
289}
290
291static inline u64 align_to_level(u64 addr, int level)
292{
293 return ((addr + level_size(level) - 1) & level_mask(level));
294}
295
296static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
297{
298 int addr_width = agaw_to_width(domain->agaw);
299 struct dma_pte *parent, *pte = NULL;
300 int level = agaw_to_level(domain->agaw);
301 int offset;
302 unsigned long flags;
303
304 BUG_ON(!domain->pgd);
305
306 addr &= (((u64)1) << addr_width) - 1;
307 parent = domain->pgd;
308
309 spin_lock_irqsave(&domain->mapping_lock, flags);
310 while (level > 0) {
311 void *tmp_page;
312
313 offset = address_level_offset(addr, level);
314 pte = &parent[offset];
315 if (level == 1)
316 break;
317
318 if (!dma_pte_present(*pte)) {
319 tmp_page = alloc_pgtable_page();
320
321 if (!tmp_page) {
322 spin_unlock_irqrestore(&domain->mapping_lock,
323 flags);
324 return NULL;
325 }
326 __iommu_flush_cache(domain->iommu, tmp_page,
327 PAGE_SIZE_4K);
328 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
329 /*
330 * high level table always sets r/w, last level page
331 * table control read/write
332 */
333 dma_set_pte_readable(*pte);
334 dma_set_pte_writable(*pte);
335 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
336 }
337 parent = phys_to_virt(dma_pte_addr(*pte));
338 level--;
339 }
340
341 spin_unlock_irqrestore(&domain->mapping_lock, flags);
342 return pte;
343}
344
345/* return address's pte at specific level */
346static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
347 int level)
348{
349 struct dma_pte *parent, *pte = NULL;
350 int total = agaw_to_level(domain->agaw);
351 int offset;
352
353 parent = domain->pgd;
354 while (level <= total) {
355 offset = address_level_offset(addr, total);
356 pte = &parent[offset];
357 if (level == total)
358 return pte;
359
360 if (!dma_pte_present(*pte))
361 break;
362 parent = phys_to_virt(dma_pte_addr(*pte));
363 total--;
364 }
365 return NULL;
366}
367
368/* clear one page's page table */
369static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
370{
371 struct dma_pte *pte = NULL;
372
373 /* get last level pte */
374 pte = dma_addr_level_pte(domain, addr, 1);
375
376 if (pte) {
377 dma_clear_pte(*pte);
378 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
379 }
380}
381
382/* clear last level pte, a tlb flush should be followed */
383static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
384{
385 int addr_width = agaw_to_width(domain->agaw);
386
387 start &= (((u64)1) << addr_width) - 1;
388 end &= (((u64)1) << addr_width) - 1;
389 /* in case it's partial page */
390 start = PAGE_ALIGN_4K(start);
391 end &= PAGE_MASK_4K;
392
393 /* we don't need lock here, nobody else touches the iova range */
394 while (start < end) {
395 dma_pte_clear_one(domain, start);
396 start += PAGE_SIZE_4K;
397 }
398}
399
400/* free page table pages. last level pte should already be cleared */
401static void dma_pte_free_pagetable(struct dmar_domain *domain,
402 u64 start, u64 end)
403{
404 int addr_width = agaw_to_width(domain->agaw);
405 struct dma_pte *pte;
406 int total = agaw_to_level(domain->agaw);
407 int level;
408 u64 tmp;
409
410 start &= (((u64)1) << addr_width) - 1;
411 end &= (((u64)1) << addr_width) - 1;
412
413 /* we don't need lock here, nobody else touches the iova range */
414 level = 2;
415 while (level <= total) {
416 tmp = align_to_level(start, level);
417 if (tmp >= end || (tmp + level_size(level) > end))
418 return;
419
420 while (tmp < end) {
421 pte = dma_addr_level_pte(domain, tmp, level);
422 if (pte) {
423 free_pgtable_page(
424 phys_to_virt(dma_pte_addr(*pte)));
425 dma_clear_pte(*pte);
426 __iommu_flush_cache(domain->iommu,
427 pte, sizeof(*pte));
428 }
429 tmp += level_size(level);
430 }
431 level++;
432 }
433 /* free pgd */
434 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
435 free_pgtable_page(domain->pgd);
436 domain->pgd = NULL;
437 }
438}
439
440/* iommu handling */
441static int iommu_alloc_root_entry(struct intel_iommu *iommu)
442{
443 struct root_entry *root;
444 unsigned long flags;
445
446 root = (struct root_entry *)alloc_pgtable_page();
447 if (!root)
448 return -ENOMEM;
449
450 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
451
452 spin_lock_irqsave(&iommu->lock, flags);
453 iommu->root_entry = root;
454 spin_unlock_irqrestore(&iommu->lock, flags);
455
456 return 0;
457}
458
459#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
460{\
461 unsigned long start_time = jiffies;\
462 while (1) {\
463 sts = op (iommu->reg + offset);\
464 if (cond)\
465 break;\
466 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
467 panic("DMAR hardware is malfunctioning\n");\
468 cpu_relax();\
469 }\
470}
471
472static void iommu_set_root_entry(struct intel_iommu *iommu)
473{
474 void *addr;
475 u32 cmd, sts;
476 unsigned long flag;
477
478 addr = iommu->root_entry;
479
480 spin_lock_irqsave(&iommu->register_lock, flag);
481 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
482
483 cmd = iommu->gcmd | DMA_GCMD_SRTP;
484 writel(cmd, iommu->reg + DMAR_GCMD_REG);
485
486 /* Make sure hardware complete it */
487 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
488 readl, (sts & DMA_GSTS_RTPS), sts);
489
490 spin_unlock_irqrestore(&iommu->register_lock, flag);
491}
492
493static void iommu_flush_write_buffer(struct intel_iommu *iommu)
494{
495 u32 val;
496 unsigned long flag;
497
498 if (!cap_rwbf(iommu->cap))
499 return;
500 val = iommu->gcmd | DMA_GCMD_WBF;
501
502 spin_lock_irqsave(&iommu->register_lock, flag);
503 writel(val, iommu->reg + DMAR_GCMD_REG);
504
505 /* Make sure hardware complete it */
506 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
507 readl, (!(val & DMA_GSTS_WBFS)), val);
508
509 spin_unlock_irqrestore(&iommu->register_lock, flag);
510}
511
512/* return value determine if we need a write buffer flush */
513static int __iommu_flush_context(struct intel_iommu *iommu,
514 u16 did, u16 source_id, u8 function_mask, u64 type,
515 int non_present_entry_flush)
516{
517 u64 val = 0;
518 unsigned long flag;
519
520 /*
521 * In the non-present entry flush case, if hardware doesn't cache
522 * non-present entry we do nothing and if hardware cache non-present
523 * entry, we flush entries of domain 0 (the domain id is used to cache
524 * any non-present entries)
525 */
526 if (non_present_entry_flush) {
527 if (!cap_caching_mode(iommu->cap))
528 return 1;
529 else
530 did = 0;
531 }
532
533 switch (type) {
534 case DMA_CCMD_GLOBAL_INVL:
535 val = DMA_CCMD_GLOBAL_INVL;
536 break;
537 case DMA_CCMD_DOMAIN_INVL:
538 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
539 break;
540 case DMA_CCMD_DEVICE_INVL:
541 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
542 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
543 break;
544 default:
545 BUG();
546 }
547 val |= DMA_CCMD_ICC;
548
549 spin_lock_irqsave(&iommu->register_lock, flag);
550 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
551
552 /* Make sure hardware complete it */
553 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
554 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
555
556 spin_unlock_irqrestore(&iommu->register_lock, flag);
557
558 /* flush context entry will implictly flush write buffer */
559 return 0;
560}
561
562static int inline iommu_flush_context_global(struct intel_iommu *iommu,
563 int non_present_entry_flush)
564{
565 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
566 non_present_entry_flush);
567}
568
569static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
570 int non_present_entry_flush)
571{
572 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
573 non_present_entry_flush);
574}
575
576static int inline iommu_flush_context_device(struct intel_iommu *iommu,
577 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
578{
579 return __iommu_flush_context(iommu, did, source_id, function_mask,
580 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
581}
582
583/* return value determine if we need a write buffer flush */
584static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
585 u64 addr, unsigned int size_order, u64 type,
586 int non_present_entry_flush)
587{
588 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
589 u64 val = 0, val_iva = 0;
590 unsigned long flag;
591
592 /*
593 * In the non-present entry flush case, if hardware doesn't cache
594 * non-present entry we do nothing and if hardware cache non-present
595 * entry, we flush entries of domain 0 (the domain id is used to cache
596 * any non-present entries)
597 */
598 if (non_present_entry_flush) {
599 if (!cap_caching_mode(iommu->cap))
600 return 1;
601 else
602 did = 0;
603 }
604
605 switch (type) {
606 case DMA_TLB_GLOBAL_FLUSH:
607 /* global flush doesn't need set IVA_REG */
608 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
609 break;
610 case DMA_TLB_DSI_FLUSH:
611 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
612 break;
613 case DMA_TLB_PSI_FLUSH:
614 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
615 /* Note: always flush non-leaf currently */
616 val_iva = size_order | addr;
617 break;
618 default:
619 BUG();
620 }
621 /* Note: set drain read/write */
622#if 0
623 /*
624 * This is probably to be super secure.. Looks like we can
625 * ignore it without any impact.
626 */
627 if (cap_read_drain(iommu->cap))
628 val |= DMA_TLB_READ_DRAIN;
629#endif
630 if (cap_write_drain(iommu->cap))
631 val |= DMA_TLB_WRITE_DRAIN;
632
633 spin_lock_irqsave(&iommu->register_lock, flag);
634 /* Note: Only uses first TLB reg currently */
635 if (val_iva)
636 dmar_writeq(iommu->reg + tlb_offset, val_iva);
637 dmar_writeq(iommu->reg + tlb_offset + 8, val);
638
639 /* Make sure hardware complete it */
640 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
641 dmar_readq, (!(val & DMA_TLB_IVT)), val);
642
643 spin_unlock_irqrestore(&iommu->register_lock, flag);
644
645 /* check IOTLB invalidation granularity */
646 if (DMA_TLB_IAIG(val) == 0)
647 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
648 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
649 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
650 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
651 /* flush context entry will implictly flush write buffer */
652 return 0;
653}
654
655static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
656 int non_present_entry_flush)
657{
658 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
659 non_present_entry_flush);
660}
661
662static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
663 int non_present_entry_flush)
664{
665 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
666 non_present_entry_flush);
667}
668
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700669static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
670 u64 addr, unsigned int pages, int non_present_entry_flush)
671{
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700672 unsigned int mask;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700673
674 BUG_ON(addr & (~PAGE_MASK_4K));
675 BUG_ON(pages == 0);
676
677 /* Fallback to domain selective flush if no PSI support */
678 if (!cap_pgsel_inv(iommu->cap))
679 return iommu_flush_iotlb_dsi(iommu, did,
680 non_present_entry_flush);
681
682 /*
683 * PSI requires page size to be 2 ^ x, and the base address is naturally
684 * aligned to the size
685 */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700686 mask = ilog2(__roundup_pow_of_two(pages));
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700687 /* Fallback to domain selective flush if size is too big */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700688 if (mask > cap_max_amask_val(iommu->cap))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700689 return iommu_flush_iotlb_dsi(iommu, did,
690 non_present_entry_flush);
691
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -0700692 return __iommu_flush_iotlb(iommu, did, addr, mask,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700693 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
694}
695
mark grossf8bab732008-02-08 04:18:38 -0800696static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
697{
698 u32 pmen;
699 unsigned long flags;
700
701 spin_lock_irqsave(&iommu->register_lock, flags);
702 pmen = readl(iommu->reg + DMAR_PMEN_REG);
703 pmen &= ~DMA_PMEN_EPM;
704 writel(pmen, iommu->reg + DMAR_PMEN_REG);
705
706 /* wait for the protected region status bit to clear */
707 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
708 readl, !(pmen & DMA_PMEN_PRS), pmen);
709
710 spin_unlock_irqrestore(&iommu->register_lock, flags);
711}
712
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700713static int iommu_enable_translation(struct intel_iommu *iommu)
714{
715 u32 sts;
716 unsigned long flags;
717
718 spin_lock_irqsave(&iommu->register_lock, flags);
719 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
720
721 /* Make sure hardware complete it */
722 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
723 readl, (sts & DMA_GSTS_TES), sts);
724
725 iommu->gcmd |= DMA_GCMD_TE;
726 spin_unlock_irqrestore(&iommu->register_lock, flags);
727 return 0;
728}
729
730static int iommu_disable_translation(struct intel_iommu *iommu)
731{
732 u32 sts;
733 unsigned long flag;
734
735 spin_lock_irqsave(&iommu->register_lock, flag);
736 iommu->gcmd &= ~DMA_GCMD_TE;
737 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
738
739 /* Make sure hardware complete it */
740 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
741 readl, (!(sts & DMA_GSTS_TES)), sts);
742
743 spin_unlock_irqrestore(&iommu->register_lock, flag);
744 return 0;
745}
746
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700747/* iommu interrupt handling. Most stuff are MSI-like. */
748
mark grossd94afc62008-02-08 04:18:39 -0800749static const char *fault_reason_strings[] =
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700750{
751 "Software",
752 "Present bit in root entry is clear",
753 "Present bit in context entry is clear",
754 "Invalid context entry",
755 "Access beyond MGAW",
756 "PTE Write access is not set",
757 "PTE Read access is not set",
758 "Next page table ptr is invalid",
759 "Root table address invalid",
760 "Context table ptr is invalid",
761 "non-zero reserved fields in RTP",
762 "non-zero reserved fields in CTP",
763 "non-zero reserved fields in PTE",
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700764};
mark grossf8bab732008-02-08 04:18:38 -0800765#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700766
mark grossd94afc62008-02-08 04:18:39 -0800767const char *dmar_get_fault_reason(u8 fault_reason)
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700768{
mark grossd94afc62008-02-08 04:18:39 -0800769 if (fault_reason > MAX_FAULT_REASON_IDX)
770 return "Unknown";
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700771 else
772 return fault_reason_strings[fault_reason];
773}
774
775void dmar_msi_unmask(unsigned int irq)
776{
777 struct intel_iommu *iommu = get_irq_data(irq);
778 unsigned long flag;
779
780 /* unmask it */
781 spin_lock_irqsave(&iommu->register_lock, flag);
782 writel(0, iommu->reg + DMAR_FECTL_REG);
783 /* Read a reg to force flush the post write */
784 readl(iommu->reg + DMAR_FECTL_REG);
785 spin_unlock_irqrestore(&iommu->register_lock, flag);
786}
787
788void dmar_msi_mask(unsigned int irq)
789{
790 unsigned long flag;
791 struct intel_iommu *iommu = get_irq_data(irq);
792
793 /* mask it */
794 spin_lock_irqsave(&iommu->register_lock, flag);
795 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
796 /* Read a reg to force flush the post write */
797 readl(iommu->reg + DMAR_FECTL_REG);
798 spin_unlock_irqrestore(&iommu->register_lock, flag);
799}
800
801void dmar_msi_write(int irq, struct msi_msg *msg)
802{
803 struct intel_iommu *iommu = get_irq_data(irq);
804 unsigned long flag;
805
806 spin_lock_irqsave(&iommu->register_lock, flag);
807 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
808 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
809 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
810 spin_unlock_irqrestore(&iommu->register_lock, flag);
811}
812
813void dmar_msi_read(int irq, struct msi_msg *msg)
814{
815 struct intel_iommu *iommu = get_irq_data(irq);
816 unsigned long flag;
817
818 spin_lock_irqsave(&iommu->register_lock, flag);
819 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
820 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
821 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
822 spin_unlock_irqrestore(&iommu->register_lock, flag);
823}
824
825static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
826 u8 fault_reason, u16 source_id, u64 addr)
827{
mark grossd94afc62008-02-08 04:18:39 -0800828 const char *reason;
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -0700829
830 reason = dmar_get_fault_reason(fault_reason);
831
832 printk(KERN_ERR
833 "DMAR:[%s] Request device [%02x:%02x.%d] "
834 "fault addr %llx \n"
835 "DMAR:[fault reason %02d] %s\n",
836 (type ? "DMA Read" : "DMA Write"),
837 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
838 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
839 return 0;
840}
841
842#define PRIMARY_FAULT_REG_LEN (16)
843static irqreturn_t iommu_page_fault(int irq, void *dev_id)
844{
845 struct intel_iommu *iommu = dev_id;
846 int reg, fault_index;
847 u32 fault_status;
848 unsigned long flag;
849
850 spin_lock_irqsave(&iommu->register_lock, flag);
851 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
852
853 /* TBD: ignore advanced fault log currently */
854 if (!(fault_status & DMA_FSTS_PPF))
855 goto clear_overflow;
856
857 fault_index = dma_fsts_fault_record_index(fault_status);
858 reg = cap_fault_reg_offset(iommu->cap);
859 while (1) {
860 u8 fault_reason;
861 u16 source_id;
862 u64 guest_addr;
863 int type;
864 u32 data;
865
866 /* highest 32 bits */
867 data = readl(iommu->reg + reg +
868 fault_index * PRIMARY_FAULT_REG_LEN + 12);
869 if (!(data & DMA_FRCD_F))
870 break;
871
872 fault_reason = dma_frcd_fault_reason(data);
873 type = dma_frcd_type(data);
874
875 data = readl(iommu->reg + reg +
876 fault_index * PRIMARY_FAULT_REG_LEN + 8);
877 source_id = dma_frcd_source_id(data);
878
879 guest_addr = dmar_readq(iommu->reg + reg +
880 fault_index * PRIMARY_FAULT_REG_LEN);
881 guest_addr = dma_frcd_page_addr(guest_addr);
882 /* clear the fault */
883 writel(DMA_FRCD_F, iommu->reg + reg +
884 fault_index * PRIMARY_FAULT_REG_LEN + 12);
885
886 spin_unlock_irqrestore(&iommu->register_lock, flag);
887
888 iommu_page_fault_do_one(iommu, type, fault_reason,
889 source_id, guest_addr);
890
891 fault_index++;
892 if (fault_index > cap_num_fault_regs(iommu->cap))
893 fault_index = 0;
894 spin_lock_irqsave(&iommu->register_lock, flag);
895 }
896clear_overflow:
897 /* clear primary fault overflow */
898 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
899 if (fault_status & DMA_FSTS_PFO)
900 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
901
902 spin_unlock_irqrestore(&iommu->register_lock, flag);
903 return IRQ_HANDLED;
904}
905
906int dmar_set_interrupt(struct intel_iommu *iommu)
907{
908 int irq, ret;
909
910 irq = create_irq();
911 if (!irq) {
912 printk(KERN_ERR "IOMMU: no free vectors\n");
913 return -EINVAL;
914 }
915
916 set_irq_data(irq, iommu);
917 iommu->irq = irq;
918
919 ret = arch_setup_dmar_msi(irq);
920 if (ret) {
921 set_irq_data(irq, NULL);
922 iommu->irq = 0;
923 destroy_irq(irq);
924 return 0;
925 }
926
927 /* Force fault register is cleared */
928 iommu_page_fault(irq, iommu);
929
930 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
931 if (ret)
932 printk(KERN_ERR "IOMMU: can't request irq\n");
933 return ret;
934}
935
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -0700936static int iommu_init_domains(struct intel_iommu *iommu)
937{
938 unsigned long ndomains;
939 unsigned long nlongs;
940
941 ndomains = cap_ndoms(iommu->cap);
942 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
943 nlongs = BITS_TO_LONGS(ndomains);
944
945 /* TBD: there might be 64K domains,
946 * consider other allocation for future chip
947 */
948 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
949 if (!iommu->domain_ids) {
950 printk(KERN_ERR "Allocating domain id array failed\n");
951 return -ENOMEM;
952 }
953 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
954 GFP_KERNEL);
955 if (!iommu->domains) {
956 printk(KERN_ERR "Allocating domain array failed\n");
957 kfree(iommu->domain_ids);
958 return -ENOMEM;
959 }
960
961 /*
962 * if Caching mode is set, then invalid translations are tagged
963 * with domainid 0. Hence we need to pre-allocate it.
964 */
965 if (cap_caching_mode(iommu->cap))
966 set_bit(0, iommu->domain_ids);
967 return 0;
968}
969
970static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
971{
972 struct intel_iommu *iommu;
973 int ret;
974 int map_size;
975 u32 ver;
976
977 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
978 if (!iommu)
979 return NULL;
980 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
981 if (!iommu->reg) {
982 printk(KERN_ERR "IOMMU: can't map the region\n");
983 goto error;
984 }
985 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
986 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
987
988 /* the registers might be more than one page */
989 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
990 cap_max_fault_reg_offset(iommu->cap));
991 map_size = PAGE_ALIGN_4K(map_size);
992 if (map_size > PAGE_SIZE_4K) {
993 iounmap(iommu->reg);
994 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
995 if (!iommu->reg) {
996 printk(KERN_ERR "IOMMU: can't map the region\n");
997 goto error;
998 }
999 }
1000
1001 ver = readl(iommu->reg + DMAR_VER_REG);
1002 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1003 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1004 iommu->cap, iommu->ecap);
1005 ret = iommu_init_domains(iommu);
1006 if (ret)
1007 goto error_unmap;
1008 spin_lock_init(&iommu->lock);
1009 spin_lock_init(&iommu->register_lock);
1010
1011 drhd->iommu = iommu;
1012 return iommu;
1013error_unmap:
1014 iounmap(iommu->reg);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001015error:
1016 kfree(iommu);
1017 return NULL;
1018}
1019
1020static void domain_exit(struct dmar_domain *domain);
1021static void free_iommu(struct intel_iommu *iommu)
1022{
1023 struct dmar_domain *domain;
1024 int i;
1025
1026 if (!iommu)
1027 return;
1028
1029 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1030 for (; i < cap_ndoms(iommu->cap); ) {
1031 domain = iommu->domains[i];
1032 clear_bit(i, iommu->domain_ids);
1033 domain_exit(domain);
1034 i = find_next_bit(iommu->domain_ids,
1035 cap_ndoms(iommu->cap), i+1);
1036 }
1037
1038 if (iommu->gcmd & DMA_GCMD_TE)
1039 iommu_disable_translation(iommu);
1040
1041 if (iommu->irq) {
1042 set_irq_data(iommu->irq, NULL);
1043 /* This will mask the irq */
1044 free_irq(iommu->irq, iommu);
1045 destroy_irq(iommu->irq);
1046 }
1047
1048 kfree(iommu->domains);
1049 kfree(iommu->domain_ids);
1050
1051 /* free context mapping */
1052 free_context_table(iommu);
1053
1054 if (iommu->reg)
1055 iounmap(iommu->reg);
1056 kfree(iommu);
1057}
1058
1059static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1060{
1061 unsigned long num;
1062 unsigned long ndomains;
1063 struct dmar_domain *domain;
1064 unsigned long flags;
1065
1066 domain = alloc_domain_mem();
1067 if (!domain)
1068 return NULL;
1069
1070 ndomains = cap_ndoms(iommu->cap);
1071
1072 spin_lock_irqsave(&iommu->lock, flags);
1073 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1074 if (num >= ndomains) {
1075 spin_unlock_irqrestore(&iommu->lock, flags);
1076 free_domain_mem(domain);
1077 printk(KERN_ERR "IOMMU: no free domain ids\n");
1078 return NULL;
1079 }
1080
1081 set_bit(num, iommu->domain_ids);
1082 domain->id = num;
1083 domain->iommu = iommu;
1084 iommu->domains[num] = domain;
1085 spin_unlock_irqrestore(&iommu->lock, flags);
1086
1087 return domain;
1088}
1089
1090static void iommu_free_domain(struct dmar_domain *domain)
1091{
1092 unsigned long flags;
1093
1094 spin_lock_irqsave(&domain->iommu->lock, flags);
1095 clear_bit(domain->id, domain->iommu->domain_ids);
1096 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1097}
1098
1099static struct iova_domain reserved_iova_list;
Mark Gross8a443df2008-03-04 14:59:31 -08001100static struct lock_class_key reserved_alloc_key;
1101static struct lock_class_key reserved_rbtree_key;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001102
1103static void dmar_init_reserved_ranges(void)
1104{
1105 struct pci_dev *pdev = NULL;
1106 struct iova *iova;
1107 int i;
1108 u64 addr, size;
1109
David Millerf6611972008-02-06 01:36:23 -08001110 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001111
Mark Gross8a443df2008-03-04 14:59:31 -08001112 lockdep_set_class(&reserved_iova_list.iova_alloc_lock,
1113 &reserved_alloc_key);
1114 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1115 &reserved_rbtree_key);
1116
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001117 /* IOAPIC ranges shouldn't be accessed by DMA */
1118 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1119 IOVA_PFN(IOAPIC_RANGE_END));
1120 if (!iova)
1121 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1122
1123 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1124 for_each_pci_dev(pdev) {
1125 struct resource *r;
1126
1127 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1128 r = &pdev->resource[i];
1129 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1130 continue;
1131 addr = r->start;
1132 addr &= PAGE_MASK_4K;
1133 size = r->end - addr;
1134 size = PAGE_ALIGN_4K(size);
1135 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1136 IOVA_PFN(size + addr) - 1);
1137 if (!iova)
1138 printk(KERN_ERR "Reserve iova failed\n");
1139 }
1140 }
1141
1142}
1143
1144static void domain_reserve_special_ranges(struct dmar_domain *domain)
1145{
1146 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1147}
1148
1149static inline int guestwidth_to_adjustwidth(int gaw)
1150{
1151 int agaw;
1152 int r = (gaw - 12) % 9;
1153
1154 if (r == 0)
1155 agaw = gaw;
1156 else
1157 agaw = gaw + 9 - r;
1158 if (agaw > 64)
1159 agaw = 64;
1160 return agaw;
1161}
1162
1163static int domain_init(struct dmar_domain *domain, int guest_width)
1164{
1165 struct intel_iommu *iommu;
1166 int adjust_width, agaw;
1167 unsigned long sagaw;
1168
David Millerf6611972008-02-06 01:36:23 -08001169 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001170 spin_lock_init(&domain->mapping_lock);
1171
1172 domain_reserve_special_ranges(domain);
1173
1174 /* calculate AGAW */
1175 iommu = domain->iommu;
1176 if (guest_width > cap_mgaw(iommu->cap))
1177 guest_width = cap_mgaw(iommu->cap);
1178 domain->gaw = guest_width;
1179 adjust_width = guestwidth_to_adjustwidth(guest_width);
1180 agaw = width_to_agaw(adjust_width);
1181 sagaw = cap_sagaw(iommu->cap);
1182 if (!test_bit(agaw, &sagaw)) {
1183 /* hardware doesn't support it, choose a bigger one */
1184 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1185 agaw = find_next_bit(&sagaw, 5, agaw);
1186 if (agaw >= 5)
1187 return -ENODEV;
1188 }
1189 domain->agaw = agaw;
1190 INIT_LIST_HEAD(&domain->devices);
1191
1192 /* always allocate the top pgd */
1193 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1194 if (!domain->pgd)
1195 return -ENOMEM;
1196 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1197 return 0;
1198}
1199
1200static void domain_exit(struct dmar_domain *domain)
1201{
1202 u64 end;
1203
1204 /* Domain 0 is reserved, so dont process it */
1205 if (!domain)
1206 return;
1207
1208 domain_remove_dev_info(domain);
1209 /* destroy iovas */
1210 put_iova_domain(&domain->iovad);
1211 end = DOMAIN_MAX_ADDR(domain->gaw);
1212 end = end & (~PAGE_MASK_4K);
1213
1214 /* clear ptes */
1215 dma_pte_clear_range(domain, 0, end);
1216
1217 /* free page tables */
1218 dma_pte_free_pagetable(domain, 0, end);
1219
1220 iommu_free_domain(domain);
1221 free_domain_mem(domain);
1222}
1223
1224static int domain_context_mapping_one(struct dmar_domain *domain,
1225 u8 bus, u8 devfn)
1226{
1227 struct context_entry *context;
1228 struct intel_iommu *iommu = domain->iommu;
1229 unsigned long flags;
1230
1231 pr_debug("Set context mapping for %02x:%02x.%d\n",
1232 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1233 BUG_ON(!domain->pgd);
1234 context = device_to_context_entry(iommu, bus, devfn);
1235 if (!context)
1236 return -ENOMEM;
1237 spin_lock_irqsave(&iommu->lock, flags);
1238 if (context_present(*context)) {
1239 spin_unlock_irqrestore(&iommu->lock, flags);
1240 return 0;
1241 }
1242
1243 context_set_domain_id(*context, domain->id);
1244 context_set_address_width(*context, domain->agaw);
1245 context_set_address_root(*context, virt_to_phys(domain->pgd));
1246 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1247 context_set_fault_enable(*context);
1248 context_set_present(*context);
1249 __iommu_flush_cache(iommu, context, sizeof(*context));
1250
1251 /* it's a non-present to present mapping */
1252 if (iommu_flush_context_device(iommu, domain->id,
1253 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1254 iommu_flush_write_buffer(iommu);
1255 else
1256 iommu_flush_iotlb_dsi(iommu, 0, 0);
1257 spin_unlock_irqrestore(&iommu->lock, flags);
1258 return 0;
1259}
1260
1261static int
1262domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1263{
1264 int ret;
1265 struct pci_dev *tmp, *parent;
1266
1267 ret = domain_context_mapping_one(domain, pdev->bus->number,
1268 pdev->devfn);
1269 if (ret)
1270 return ret;
1271
1272 /* dependent device mapping */
1273 tmp = pci_find_upstream_pcie_bridge(pdev);
1274 if (!tmp)
1275 return 0;
1276 /* Secondary interface's bus number and devfn 0 */
1277 parent = pdev->bus->self;
1278 while (parent != tmp) {
1279 ret = domain_context_mapping_one(domain, parent->bus->number,
1280 parent->devfn);
1281 if (ret)
1282 return ret;
1283 parent = parent->bus->self;
1284 }
1285 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1286 return domain_context_mapping_one(domain,
1287 tmp->subordinate->number, 0);
1288 else /* this is a legacy PCI bridge */
1289 return domain_context_mapping_one(domain,
1290 tmp->bus->number, tmp->devfn);
1291}
1292
1293static int domain_context_mapped(struct dmar_domain *domain,
1294 struct pci_dev *pdev)
1295{
1296 int ret;
1297 struct pci_dev *tmp, *parent;
1298
1299 ret = device_context_mapped(domain->iommu,
1300 pdev->bus->number, pdev->devfn);
1301 if (!ret)
1302 return ret;
1303 /* dependent device mapping */
1304 tmp = pci_find_upstream_pcie_bridge(pdev);
1305 if (!tmp)
1306 return ret;
1307 /* Secondary interface's bus number and devfn 0 */
1308 parent = pdev->bus->self;
1309 while (parent != tmp) {
1310 ret = device_context_mapped(domain->iommu, parent->bus->number,
1311 parent->devfn);
1312 if (!ret)
1313 return ret;
1314 parent = parent->bus->self;
1315 }
1316 if (tmp->is_pcie)
1317 return device_context_mapped(domain->iommu,
1318 tmp->subordinate->number, 0);
1319 else
1320 return device_context_mapped(domain->iommu,
1321 tmp->bus->number, tmp->devfn);
1322}
1323
1324static int
1325domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1326 u64 hpa, size_t size, int prot)
1327{
1328 u64 start_pfn, end_pfn;
1329 struct dma_pte *pte;
1330 int index;
1331
1332 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1333 return -EINVAL;
1334 iova &= PAGE_MASK_4K;
1335 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1336 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1337 index = 0;
1338 while (start_pfn < end_pfn) {
1339 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1340 if (!pte)
1341 return -ENOMEM;
1342 /* We don't need lock here, nobody else
1343 * touches the iova range
1344 */
1345 BUG_ON(dma_pte_addr(*pte));
1346 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1347 dma_set_pte_prot(*pte, prot);
1348 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1349 start_pfn++;
1350 index++;
1351 }
1352 return 0;
1353}
1354
1355static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1356{
1357 clear_context_table(domain->iommu, bus, devfn);
1358 iommu_flush_context_global(domain->iommu, 0);
1359 iommu_flush_iotlb_global(domain->iommu, 0);
1360}
1361
1362static void domain_remove_dev_info(struct dmar_domain *domain)
1363{
1364 struct device_domain_info *info;
1365 unsigned long flags;
1366
1367 spin_lock_irqsave(&device_domain_lock, flags);
1368 while (!list_empty(&domain->devices)) {
1369 info = list_entry(domain->devices.next,
1370 struct device_domain_info, link);
1371 list_del(&info->link);
1372 list_del(&info->global);
1373 if (info->dev)
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001374 info->dev->dev.archdata.iommu = NULL;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001375 spin_unlock_irqrestore(&device_domain_lock, flags);
1376
1377 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1378 free_devinfo_mem(info);
1379
1380 spin_lock_irqsave(&device_domain_lock, flags);
1381 }
1382 spin_unlock_irqrestore(&device_domain_lock, flags);
1383}
1384
1385/*
1386 * find_domain
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001387 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001388 */
1389struct dmar_domain *
1390find_domain(struct pci_dev *pdev)
1391{
1392 struct device_domain_info *info;
1393
1394 /* No lock here, assumes no domain exit in normal case */
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001395 info = pdev->dev.archdata.iommu;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001396 if (info)
1397 return info->domain;
1398 return NULL;
1399}
1400
1401static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1402 struct pci_dev *dev)
1403{
1404 int index;
1405
1406 while (dev) {
1407 for (index = 0; index < cnt; index ++)
1408 if (dev == devices[index])
1409 return 1;
1410
1411 /* Check our parent */
1412 dev = dev->bus->self;
1413 }
1414
1415 return 0;
1416}
1417
1418static struct dmar_drhd_unit *
1419dmar_find_matched_drhd_unit(struct pci_dev *dev)
1420{
1421 struct dmar_drhd_unit *drhd = NULL;
1422
1423 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1424 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1425 drhd->devices_cnt, dev))
1426 return drhd;
1427 }
1428
1429 return NULL;
1430}
1431
1432/* domain is initialized */
1433static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1434{
1435 struct dmar_domain *domain, *found = NULL;
1436 struct intel_iommu *iommu;
1437 struct dmar_drhd_unit *drhd;
1438 struct device_domain_info *info, *tmp;
1439 struct pci_dev *dev_tmp;
1440 unsigned long flags;
1441 int bus = 0, devfn = 0;
1442
1443 domain = find_domain(pdev);
1444 if (domain)
1445 return domain;
1446
1447 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1448 if (dev_tmp) {
1449 if (dev_tmp->is_pcie) {
1450 bus = dev_tmp->subordinate->number;
1451 devfn = 0;
1452 } else {
1453 bus = dev_tmp->bus->number;
1454 devfn = dev_tmp->devfn;
1455 }
1456 spin_lock_irqsave(&device_domain_lock, flags);
1457 list_for_each_entry(info, &device_domain_list, global) {
1458 if (info->bus == bus && info->devfn == devfn) {
1459 found = info->domain;
1460 break;
1461 }
1462 }
1463 spin_unlock_irqrestore(&device_domain_lock, flags);
1464 /* pcie-pci bridge already has a domain, uses it */
1465 if (found) {
1466 domain = found;
1467 goto found_domain;
1468 }
1469 }
1470
1471 /* Allocate new domain for the device */
1472 drhd = dmar_find_matched_drhd_unit(pdev);
1473 if (!drhd) {
1474 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1475 pci_name(pdev));
1476 return NULL;
1477 }
1478 iommu = drhd->iommu;
1479
1480 domain = iommu_alloc_domain(iommu);
1481 if (!domain)
1482 goto error;
1483
1484 if (domain_init(domain, gaw)) {
1485 domain_exit(domain);
1486 goto error;
1487 }
1488
1489 /* register pcie-to-pci device */
1490 if (dev_tmp) {
1491 info = alloc_devinfo_mem();
1492 if (!info) {
1493 domain_exit(domain);
1494 goto error;
1495 }
1496 info->bus = bus;
1497 info->devfn = devfn;
1498 info->dev = NULL;
1499 info->domain = domain;
1500 /* This domain is shared by devices under p2p bridge */
1501 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1502
1503 /* pcie-to-pci bridge already has a domain, uses it */
1504 found = NULL;
1505 spin_lock_irqsave(&device_domain_lock, flags);
1506 list_for_each_entry(tmp, &device_domain_list, global) {
1507 if (tmp->bus == bus && tmp->devfn == devfn) {
1508 found = tmp->domain;
1509 break;
1510 }
1511 }
1512 if (found) {
1513 free_devinfo_mem(info);
1514 domain_exit(domain);
1515 domain = found;
1516 } else {
1517 list_add(&info->link, &domain->devices);
1518 list_add(&info->global, &device_domain_list);
1519 }
1520 spin_unlock_irqrestore(&device_domain_lock, flags);
1521 }
1522
1523found_domain:
1524 info = alloc_devinfo_mem();
1525 if (!info)
1526 goto error;
1527 info->bus = pdev->bus->number;
1528 info->devfn = pdev->devfn;
1529 info->dev = pdev;
1530 info->domain = domain;
1531 spin_lock_irqsave(&device_domain_lock, flags);
1532 /* somebody is fast */
1533 found = find_domain(pdev);
1534 if (found != NULL) {
1535 spin_unlock_irqrestore(&device_domain_lock, flags);
1536 if (found != domain) {
1537 domain_exit(domain);
1538 domain = found;
1539 }
1540 free_devinfo_mem(info);
1541 return domain;
1542 }
1543 list_add(&info->link, &domain->devices);
1544 list_add(&info->global, &device_domain_list);
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001545 pdev->dev.archdata.iommu = info;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001546 spin_unlock_irqrestore(&device_domain_lock, flags);
1547 return domain;
1548error:
1549 /* recheck it here, maybe others set it */
1550 return find_domain(pdev);
1551}
1552
1553static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1554{
1555 struct dmar_domain *domain;
1556 unsigned long size;
1557 u64 base;
1558 int ret;
1559
1560 printk(KERN_INFO
1561 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1562 pci_name(pdev), start, end);
1563 /* page table init */
1564 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1565 if (!domain)
1566 return -ENOMEM;
1567
1568 /* The address might not be aligned */
1569 base = start & PAGE_MASK_4K;
1570 size = end - base;
1571 size = PAGE_ALIGN_4K(size);
1572 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1573 IOVA_PFN(base + size) - 1)) {
1574 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1575 ret = -ENOMEM;
1576 goto error;
1577 }
1578
1579 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1580 size, base, pci_name(pdev));
1581 /*
1582 * RMRR range might have overlap with physical memory range,
1583 * clear it first
1584 */
1585 dma_pte_clear_range(domain, base, base + size);
1586
1587 ret = domain_page_mapping(domain, base, base, size,
1588 DMA_PTE_READ|DMA_PTE_WRITE);
1589 if (ret)
1590 goto error;
1591
1592 /* context entry init */
1593 ret = domain_context_mapping(domain, pdev);
1594 if (!ret)
1595 return 0;
1596error:
1597 domain_exit(domain);
1598 return ret;
1599
1600}
1601
1602static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1603 struct pci_dev *pdev)
1604{
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001605 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001606 return 0;
1607 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1608 rmrr->end_address + 1);
1609}
1610
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001611#ifdef CONFIG_DMAR_GFX_WA
1612extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1613static void __init iommu_prepare_gfx_mapping(void)
1614{
1615 struct pci_dev *pdev = NULL;
1616 u64 base, size;
1617 int slot;
1618 int ret;
1619
1620 for_each_pci_dev(pdev) {
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001621 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001622 !IS_GFX_DEVICE(pdev))
1623 continue;
1624 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1625 pci_name(pdev));
1626 slot = arch_get_ram_range(0, &base, &size);
1627 while (slot >= 0) {
1628 ret = iommu_prepare_identity_map(pdev,
1629 base, base + size);
1630 if (ret)
1631 goto error;
1632 slot = arch_get_ram_range(slot, &base, &size);
1633 }
1634 continue;
1635error:
1636 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1637 }
1638}
1639#endif
1640
Keshavamurthy, Anil S49a04292007-10-21 16:41:57 -07001641#ifdef CONFIG_DMAR_FLOPPY_WA
1642static inline void iommu_prepare_isa(void)
1643{
1644 struct pci_dev *pdev;
1645 int ret;
1646
1647 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1648 if (!pdev)
1649 return;
1650
1651 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1652 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1653
1654 if (ret)
1655 printk("IOMMU: Failed to create 0-64M identity map, "
1656 "floppy might not work\n");
1657
1658}
1659#else
1660static inline void iommu_prepare_isa(void)
1661{
1662 return;
1663}
1664#endif /* !CONFIG_DMAR_FLPY_WA */
1665
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001666int __init init_dmars(void)
1667{
1668 struct dmar_drhd_unit *drhd;
1669 struct dmar_rmrr_unit *rmrr;
1670 struct pci_dev *pdev;
1671 struct intel_iommu *iommu;
1672 int ret, unit = 0;
1673
1674 /*
1675 * for each drhd
1676 * allocate root
1677 * initialize and program root entry to not present
1678 * endfor
1679 */
1680 for_each_drhd_unit(drhd) {
1681 if (drhd->ignored)
1682 continue;
1683 iommu = alloc_iommu(drhd);
1684 if (!iommu) {
1685 ret = -ENOMEM;
1686 goto error;
1687 }
1688
1689 /*
1690 * TBD:
1691 * we could share the same root & context tables
1692 * amoung all IOMMU's. Need to Split it later.
1693 */
1694 ret = iommu_alloc_root_entry(iommu);
1695 if (ret) {
1696 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1697 goto error;
1698 }
1699 }
1700
1701 /*
1702 * For each rmrr
1703 * for each dev attached to rmrr
1704 * do
1705 * locate drhd for dev, alloc domain for dev
1706 * allocate free domain
1707 * allocate page table entries for rmrr
1708 * if context not allocated for bus
1709 * allocate and init context
1710 * set present in root table for this bus
1711 * init context with domain, translation etc
1712 * endfor
1713 * endfor
1714 */
1715 for_each_rmrr_units(rmrr) {
1716 int i;
1717 for (i = 0; i < rmrr->devices_cnt; i++) {
1718 pdev = rmrr->devices[i];
1719 /* some BIOS lists non-exist devices in DMAR table */
1720 if (!pdev)
1721 continue;
1722 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1723 if (ret)
1724 printk(KERN_ERR
1725 "IOMMU: mapping reserved region failed\n");
1726 }
1727 }
1728
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07001729 iommu_prepare_gfx_mapping();
1730
Keshavamurthy, Anil S49a04292007-10-21 16:41:57 -07001731 iommu_prepare_isa();
1732
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001733 /*
1734 * for each drhd
1735 * enable fault log
1736 * global invalidate context cache
1737 * global invalidate iotlb
1738 * enable translation
1739 */
1740 for_each_drhd_unit(drhd) {
1741 if (drhd->ignored)
1742 continue;
1743 iommu = drhd->iommu;
1744 sprintf (iommu->name, "dmar%d", unit++);
1745
1746 iommu_flush_write_buffer(iommu);
1747
Keshavamurthy, Anil S3460a6d2007-10-21 16:41:54 -07001748 ret = dmar_set_interrupt(iommu);
1749 if (ret)
1750 goto error;
1751
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001752 iommu_set_root_entry(iommu);
1753
1754 iommu_flush_context_global(iommu, 0);
1755 iommu_flush_iotlb_global(iommu, 0);
1756
mark grossf8bab732008-02-08 04:18:38 -08001757 iommu_disable_protect_mem_regions(iommu);
1758
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001759 ret = iommu_enable_translation(iommu);
1760 if (ret)
1761 goto error;
1762 }
1763
1764 return 0;
1765error:
1766 for_each_drhd_unit(drhd) {
1767 if (drhd->ignored)
1768 continue;
1769 iommu = drhd->iommu;
1770 free_iommu(iommu);
1771 }
1772 return ret;
1773}
1774
1775static inline u64 aligned_size(u64 host_addr, size_t size)
1776{
1777 u64 addr;
1778 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1779 return PAGE_ALIGN_4K(addr);
1780}
1781
1782struct iova *
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001783iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001784{
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001785 struct iova *piova;
1786
1787 /* Make sure it's in range */
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001788 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001789 if (!size || (IOVA_START_ADDR + size > end))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001790 return NULL;
1791
1792 piova = alloc_iova(&domain->iovad,
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001793 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001794 return piova;
1795}
1796
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001797static struct iova *
1798__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1799 size_t size)
1800{
1801 struct pci_dev *pdev = to_pci_dev(dev);
1802 struct iova *iova = NULL;
1803
1804 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
1805 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1806 } else {
1807 /*
1808 * First try to allocate an io virtual address in
1809 * DMA_32BIT_MASK and if that fails then try allocating
Joe Perches36098012007-12-17 11:40:11 -08001810 * from higher range
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001811 */
1812 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
1813 if (!iova)
1814 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
1815 }
1816
1817 if (!iova) {
1818 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
1819 return NULL;
1820 }
1821
1822 return iova;
1823}
1824
1825static struct dmar_domain *
1826get_valid_domain_for_dev(struct pci_dev *pdev)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001827{
1828 struct dmar_domain *domain;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001829 int ret;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001830
1831 domain = get_domain_for_dev(pdev,
1832 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1833 if (!domain) {
1834 printk(KERN_ERR
1835 "Allocating domain for %s failed", pci_name(pdev));
Al Viro4fe05bb2007-10-29 04:51:16 +00001836 return NULL;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001837 }
1838
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001839 /* make sure context mapping is ok */
1840 if (unlikely(!domain_context_mapped(domain, pdev))) {
1841 ret = domain_context_mapping(domain, pdev);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001842 if (ret) {
1843 printk(KERN_ERR
1844 "Domain context map for %s failed",
1845 pci_name(pdev));
Al Viro4fe05bb2007-10-29 04:51:16 +00001846 return NULL;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001847 }
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001848 }
1849
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001850 return domain;
1851}
1852
1853static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1854 size_t size, int dir)
1855{
1856 struct pci_dev *pdev = to_pci_dev(hwdev);
1857 int ret;
1858 struct dmar_domain *domain;
1859 unsigned long start_addr;
1860 struct iova *iova;
1861 int prot = 0;
1862
1863 BUG_ON(dir == DMA_NONE);
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001864 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001865 return virt_to_bus(addr);
1866
1867 domain = get_valid_domain_for_dev(pdev);
1868 if (!domain)
1869 return 0;
1870
1871 addr = (void *)virt_to_phys(addr);
1872 size = aligned_size((u64)addr, size);
1873
1874 iova = __intel_alloc_iova(hwdev, domain, size);
1875 if (!iova)
1876 goto error;
1877
1878 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1879
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001880 /*
1881 * Check if DMAR supports zero-length reads on write only
1882 * mappings..
1883 */
1884 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1885 !cap_zlr(domain->iommu->cap))
1886 prot |= DMA_PTE_READ;
1887 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1888 prot |= DMA_PTE_WRITE;
1889 /*
1890 * addr - (addr + size) might be partial page, we should map the whole
1891 * page. Note: if two part of one page are separately mapped, we
1892 * might have two guest_addr mapping to the same host addr, but this
1893 * is not a big problem
1894 */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001895 ret = domain_page_mapping(domain, start_addr,
1896 ((u64)addr) & PAGE_MASK_4K, size, prot);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001897 if (ret)
1898 goto error;
1899
1900 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1901 pci_name(pdev), size, (u64)addr,
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001902 size, (u64)start_addr, dir);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001903
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001904 /* it's a non-present to present mapping */
1905 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1906 start_addr, size >> PAGE_SHIFT_4K, 1);
1907 if (ret)
1908 iommu_flush_write_buffer(domain->iommu);
1909
1910 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
1911
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001912error:
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001913 if (iova)
1914 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001915 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1916 pci_name(pdev), size, (u64)addr, dir);
1917 return 0;
1918}
1919
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001920static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
1921 size_t size, int dir)
1922{
1923 struct pci_dev *pdev = to_pci_dev(dev);
1924 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001925 unsigned long start_addr;
1926 struct iova *iova;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001927
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07001928 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001929 return;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001930 domain = find_domain(pdev);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001931 BUG_ON(!domain);
1932
1933 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
1934 if (!iova)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001935 return;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001936
1937 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1938 size = aligned_size((u64)dev_addr, size);
1939
1940 pr_debug("Device %s unmapping: %lx@%llx\n",
1941 pci_name(pdev), size, (u64)start_addr);
1942
1943 /* clear the whole page */
1944 dma_pte_clear_range(domain, start_addr, start_addr + size);
1945 /* free page tables */
1946 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1947
1948 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1949 size >> PAGE_SHIFT_4K, 0))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001950 iommu_flush_write_buffer(domain->iommu);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001951
1952 /* free iova */
1953 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001954}
1955
1956static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1957 dma_addr_t *dma_handle, gfp_t flags)
1958{
1959 void *vaddr;
1960 int order;
1961
1962 size = PAGE_ALIGN_4K(size);
1963 order = get_order(size);
1964 flags &= ~(GFP_DMA | GFP_DMA32);
1965
1966 vaddr = (void *)__get_free_pages(flags, order);
1967 if (!vaddr)
1968 return NULL;
1969 memset(vaddr, 0, size);
1970
1971 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1972 if (*dma_handle)
1973 return vaddr;
1974 free_pages((unsigned long)vaddr, order);
1975 return NULL;
1976}
1977
1978static void intel_free_coherent(struct device *hwdev, size_t size,
1979 void *vaddr, dma_addr_t dma_handle)
1980{
1981 int order;
1982
1983 size = PAGE_ALIGN_4K(size);
1984 order = get_order(size);
1985
1986 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1987 free_pages((unsigned long)vaddr, order);
1988}
1989
FUJITA Tomonori12d4d402007-10-23 09:32:25 +02001990#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07001991static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07001992 int nelems, int dir)
1993{
1994 int i;
1995 struct pci_dev *pdev = to_pci_dev(hwdev);
1996 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07001997 unsigned long start_addr;
1998 struct iova *iova;
1999 size_t size = 0;
2000 void *addr;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002001 struct scatterlist *sg;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002002
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002003 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002004 return;
2005
2006 domain = find_domain(pdev);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002007
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002008 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002009 if (!iova)
2010 return;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002011 for_each_sg(sglist, sg, nelems, i) {
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002012 addr = SG_ENT_VIRT_ADDRESS(sg);
2013 size += aligned_size((u64)addr, sg->length);
2014 }
2015
2016 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2017
2018 /* clear the whole page */
2019 dma_pte_clear_range(domain, start_addr, start_addr + size);
2020 /* free page tables */
2021 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2022
2023 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2024 size >> PAGE_SHIFT_4K, 0))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002025 iommu_flush_write_buffer(domain->iommu);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002026
2027 /* free iova */
2028 __free_iova(&domain->iovad, iova);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002029}
2030
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002031static int intel_nontranslate_map_sg(struct device *hddev,
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002032 struct scatterlist *sglist, int nelems, int dir)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002033{
2034 int i;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002035 struct scatterlist *sg;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002036
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002037 for_each_sg(sglist, sg, nelems, i) {
FUJITA Tomonori12d4d402007-10-23 09:32:25 +02002038 BUG_ON(!sg_page(sg));
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002039 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2040 sg->dma_length = sg->length;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002041 }
2042 return nelems;
2043}
2044
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002045static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2046 int nelems, int dir)
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002047{
2048 void *addr;
2049 int i;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002050 struct pci_dev *pdev = to_pci_dev(hwdev);
2051 struct dmar_domain *domain;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002052 size_t size = 0;
2053 int prot = 0;
2054 size_t offset = 0;
2055 struct iova *iova = NULL;
2056 int ret;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002057 struct scatterlist *sg;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002058 unsigned long start_addr;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002059
2060 BUG_ON(dir == DMA_NONE);
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002061 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002062 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002063
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002064 domain = get_valid_domain_for_dev(pdev);
2065 if (!domain)
2066 return 0;
2067
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002068 for_each_sg(sglist, sg, nelems, i) {
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002069 addr = SG_ENT_VIRT_ADDRESS(sg);
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002070 addr = (void *)virt_to_phys(addr);
2071 size += aligned_size((u64)addr, sg->length);
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002072 }
2073
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002074 iova = __intel_alloc_iova(hwdev, domain, size);
2075 if (!iova) {
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002076 sglist->dma_length = 0;
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002077 return 0;
2078 }
2079
2080 /*
2081 * Check if DMAR supports zero-length reads on write only
2082 * mappings..
2083 */
2084 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2085 !cap_zlr(domain->iommu->cap))
2086 prot |= DMA_PTE_READ;
2087 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2088 prot |= DMA_PTE_WRITE;
2089
2090 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2091 offset = 0;
FUJITA Tomonoric03ab372007-10-21 16:42:00 -07002092 for_each_sg(sglist, sg, nelems, i) {
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002093 addr = SG_ENT_VIRT_ADDRESS(sg);
2094 addr = (void *)virt_to_phys(addr);
2095 size = aligned_size((u64)addr, sg->length);
2096 ret = domain_page_mapping(domain, start_addr + offset,
2097 ((u64)addr) & PAGE_MASK_4K,
2098 size, prot);
2099 if (ret) {
2100 /* clear the page */
2101 dma_pte_clear_range(domain, start_addr,
2102 start_addr + offset);
2103 /* free page tables */
2104 dma_pte_free_pagetable(domain, start_addr,
2105 start_addr + offset);
2106 /* free iova */
2107 __free_iova(&domain->iovad, iova);
2108 return 0;
2109 }
2110 sg->dma_address = start_addr + offset +
2111 ((u64)addr & (~PAGE_MASK_4K));
2112 sg->dma_length = sg->length;
2113 offset += size;
2114 }
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002115
2116 /* it's a non-present to present mapping */
Keshavamurthy, Anil Sf76aec72007-10-21 16:41:58 -07002117 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2118 start_addr, offset >> PAGE_SHIFT_4K, 1))
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002119 iommu_flush_write_buffer(domain->iommu);
2120 return nelems;
2121}
2122
2123static struct dma_mapping_ops intel_dma_ops = {
2124 .alloc_coherent = intel_alloc_coherent,
2125 .free_coherent = intel_free_coherent,
2126 .map_single = intel_map_single,
2127 .unmap_single = intel_unmap_single,
2128 .map_sg = intel_map_sg,
2129 .unmap_sg = intel_unmap_sg,
2130};
2131
2132static inline int iommu_domain_cache_init(void)
2133{
2134 int ret = 0;
2135
2136 iommu_domain_cache = kmem_cache_create("iommu_domain",
2137 sizeof(struct dmar_domain),
2138 0,
2139 SLAB_HWCACHE_ALIGN,
2140
2141 NULL);
2142 if (!iommu_domain_cache) {
2143 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2144 ret = -ENOMEM;
2145 }
2146
2147 return ret;
2148}
2149
2150static inline int iommu_devinfo_cache_init(void)
2151{
2152 int ret = 0;
2153
2154 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2155 sizeof(struct device_domain_info),
2156 0,
2157 SLAB_HWCACHE_ALIGN,
2158
2159 NULL);
2160 if (!iommu_devinfo_cache) {
2161 printk(KERN_ERR "Couldn't create devinfo cache\n");
2162 ret = -ENOMEM;
2163 }
2164
2165 return ret;
2166}
2167
2168static inline int iommu_iova_cache_init(void)
2169{
2170 int ret = 0;
2171
2172 iommu_iova_cache = kmem_cache_create("iommu_iova",
2173 sizeof(struct iova),
2174 0,
2175 SLAB_HWCACHE_ALIGN,
2176
2177 NULL);
2178 if (!iommu_iova_cache) {
2179 printk(KERN_ERR "Couldn't create iova cache\n");
2180 ret = -ENOMEM;
2181 }
2182
2183 return ret;
2184}
2185
2186static int __init iommu_init_mempool(void)
2187{
2188 int ret;
2189 ret = iommu_iova_cache_init();
2190 if (ret)
2191 return ret;
2192
2193 ret = iommu_domain_cache_init();
2194 if (ret)
2195 goto domain_error;
2196
2197 ret = iommu_devinfo_cache_init();
2198 if (!ret)
2199 return ret;
2200
2201 kmem_cache_destroy(iommu_domain_cache);
2202domain_error:
2203 kmem_cache_destroy(iommu_iova_cache);
2204
2205 return -ENOMEM;
2206}
2207
2208static void __init iommu_exit_mempool(void)
2209{
2210 kmem_cache_destroy(iommu_devinfo_cache);
2211 kmem_cache_destroy(iommu_domain_cache);
2212 kmem_cache_destroy(iommu_iova_cache);
2213
2214}
2215
2216void __init detect_intel_iommu(void)
2217{
2218 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2219 return;
2220 if (early_dmar_detect()) {
2221 iommu_detected = 1;
2222 }
2223}
2224
2225static void __init init_no_remapping_devices(void)
2226{
2227 struct dmar_drhd_unit *drhd;
2228
2229 for_each_drhd_unit(drhd) {
2230 if (!drhd->include_all) {
2231 int i;
2232 for (i = 0; i < drhd->devices_cnt; i++)
2233 if (drhd->devices[i] != NULL)
2234 break;
2235 /* ignore DMAR unit if no pci devices exist */
2236 if (i == drhd->devices_cnt)
2237 drhd->ignored = 1;
2238 }
2239 }
2240
2241 if (dmar_map_gfx)
2242 return;
2243
2244 for_each_drhd_unit(drhd) {
2245 int i;
2246 if (drhd->ignored || drhd->include_all)
2247 continue;
2248
2249 for (i = 0; i < drhd->devices_cnt; i++)
2250 if (drhd->devices[i] &&
2251 !IS_GFX_DEVICE(drhd->devices[i]))
2252 break;
2253
2254 if (i < drhd->devices_cnt)
2255 continue;
2256
2257 /* bypass IOMMU if it is just for gfx devices */
2258 drhd->ignored = 1;
2259 for (i = 0; i < drhd->devices_cnt; i++) {
2260 if (!drhd->devices[i])
2261 continue;
Keshavamurthy, Anil S358dd8a2007-10-21 16:41:59 -07002262 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
Keshavamurthy, Anil Sba395922007-10-21 16:41:49 -07002263 }
2264 }
2265}
2266
2267int __init intel_iommu_init(void)
2268{
2269 int ret = 0;
2270
2271 if (no_iommu || swiotlb || dmar_disabled)
2272 return -ENODEV;
2273
2274 if (dmar_table_init())
2275 return -ENODEV;
2276
2277 iommu_init_mempool();
2278 dmar_init_reserved_ranges();
2279
2280 init_no_remapping_devices();
2281
2282 ret = init_dmars();
2283 if (ret) {
2284 printk(KERN_ERR "IOMMU: dmar init failed\n");
2285 put_iova_domain(&reserved_iova_list);
2286 iommu_exit_mempool();
2287 return ret;
2288 }
2289 printk(KERN_INFO
2290 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2291
2292 force_iommu = 1;
2293 dma_ops = &intel_dma_ops;
2294 return 0;
2295}
Keshavamurthy, Anil Se8204822007-10-21 16:41:55 -07002296