blob: 2c119aca28c9b2169d33d16c4845be4d435898a2 [file] [log] [blame]
Liam Mark4d4fbba2017-02-08 10:30:49 -08001/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -07002 *
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 and
5 * only version 2 as published by the Free Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 */
12
13#include <linux/dma-contiguous.h>
14#include <linux/dma-mapping.h>
15#include <linux/dma-mapping-fast.h>
16#include <linux/io-pgtable-fast.h>
Patrick Daly7bcb5462016-08-03 17:27:36 -070017#include <linux/vmalloc.h>
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -070018#include <asm/cacheflush.h>
19#include <asm/dma-iommu.h>
Charan Teja Reddy29f61402017-02-09 20:44:29 +053020#include <linux/slab.h>
21#include <linux/vmalloc.h>
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -070022
23/* some redundant definitions... :( TODO: move to io-pgtable-fast.h */
24#define FAST_PAGE_SHIFT 12
25#define FAST_PAGE_SIZE (1UL << FAST_PAGE_SHIFT)
26#define FAST_PAGE_MASK (~(PAGE_SIZE - 1))
27#define FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000)
Liam Mark83a9f86e2017-02-08 09:37:17 -080028#define FAST_MAIR_ATTR_IDX_CACHE 1
29#define FAST_PTE_ATTRINDX_SHIFT 2
30#define FAST_PTE_ATTRINDX_MASK 0x7
31#define FAST_PTE_SH_SHIFT 8
32#define FAST_PTE_SH_MASK (((av8l_fast_iopte)0x3) << FAST_PTE_SH_SHIFT)
33#define FAST_PTE_SH_OS (((av8l_fast_iopte)2) << FAST_PTE_SH_SHIFT)
34#define FAST_PTE_SH_IS (((av8l_fast_iopte)3) << FAST_PTE_SH_SHIFT)
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -070035
Mitchel Humpherys425d03d2016-06-23 13:25:12 -070036static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
37 bool coherent)
38{
39 if (attrs & DMA_ATTR_STRONGLY_ORDERED)
40 return pgprot_noncached(prot);
41 else if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
42 return pgprot_writecombine(prot);
43 return prot;
44}
45
46static int __get_iommu_pgprot(unsigned long attrs, int prot,
47 bool coherent)
48{
49 if (!(attrs & DMA_ATTR_EXEC_MAPPING))
50 prot |= IOMMU_NOEXEC;
51 if ((attrs & DMA_ATTR_STRONGLY_ORDERED))
52 prot |= IOMMU_MMIO;
53 if (coherent)
54 prot |= IOMMU_CACHE;
55
56 return prot;
57}
58
Mitchel Humpherys9de66db2016-06-07 11:09:44 -070059static void fast_dmac_clean_range(struct dma_fast_smmu_mapping *mapping,
60 void *start, void *end)
61{
62 if (!mapping->is_smmu_pt_coherent)
63 dmac_clean_range(start, end);
64}
65
Liam Mark83a9f86e2017-02-08 09:37:17 -080066static bool __fast_is_pte_coherent(av8l_fast_iopte *ptep)
67{
68 int attr_idx = (*ptep & (FAST_PTE_ATTRINDX_MASK <<
69 FAST_PTE_ATTRINDX_SHIFT)) >>
70 FAST_PTE_ATTRINDX_SHIFT;
71
72 if ((attr_idx == FAST_MAIR_ATTR_IDX_CACHE) &&
73 (((*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_IS) ||
74 (*ptep & FAST_PTE_SH_MASK) == FAST_PTE_SH_OS))
75 return true;
76
77 return false;
78}
79
80static bool is_dma_coherent(struct device *dev, unsigned long attrs)
81{
82 bool is_coherent;
83
84 if (attrs & DMA_ATTR_FORCE_COHERENT)
85 is_coherent = true;
86 else if (attrs & DMA_ATTR_FORCE_NON_COHERENT)
87 is_coherent = false;
88 else if (is_device_dma_coherent(dev))
89 is_coherent = true;
90 else
91 is_coherent = false;
92
93 return is_coherent;
94}
95
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -070096/*
97 * Checks if the allocated range (ending at @end) covered the upcoming
98 * stale bit. We don't need to know exactly where the range starts since
99 * we already know where the candidate search range started. If, starting
100 * from the beginning of the candidate search range, we had to step over
101 * (or landed directly on top of) the upcoming stale bit, then we return
102 * true.
103 *
104 * Due to wrapping, there are two scenarios we'll need to check: (1) if the
105 * range [search_start, upcoming_stale] spans 0 (i.e. search_start >
106 * upcoming_stale), and, (2) if the range: [search_start, upcoming_stale]
107 * does *not* span 0 (i.e. search_start <= upcoming_stale). And for each
108 * of those two scenarios we need to handle three cases: (1) the bit was
109 * found before wrapping or
110 */
111static bool __bit_covered_stale(unsigned long upcoming_stale,
112 unsigned long search_start,
113 unsigned long end)
114{
115 if (search_start > upcoming_stale) {
116 if (end >= search_start) {
117 /*
118 * We started searching above upcoming_stale and we
119 * didn't wrap, so we couldn't have crossed
120 * upcoming_stale.
121 */
122 return false;
123 }
124 /*
125 * We wrapped. Did we cross (or land on top of)
126 * upcoming_stale?
127 */
128 return end >= upcoming_stale;
129 }
130
131 if (search_start <= upcoming_stale) {
132 if (end >= search_start) {
133 /*
134 * We didn't wrap. Did we cross (or land on top
135 * of) upcoming_stale?
136 */
137 return end >= upcoming_stale;
138 }
139 /*
140 * We wrapped. So we must have crossed upcoming_stale
141 * (since we started searching below it).
142 */
143 return true;
144 }
145
146 /* we should have covered all logical combinations... */
147 WARN_ON(1);
148 return true;
149}
150
151static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800152 unsigned long attrs,
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700153 size_t size)
154{
155 unsigned long bit, prev_search_start, nbits = size >> FAST_PAGE_SHIFT;
156 unsigned long align = (1 << get_order(size)) - 1;
157
158 bit = bitmap_find_next_zero_area(
159 mapping->bitmap, mapping->num_4k_pages, mapping->next_start,
160 nbits, align);
161 if (unlikely(bit > mapping->num_4k_pages)) {
162 /* try wrapping */
163 mapping->next_start = 0; /* TODO: SHOULD I REALLY DO THIS?!? */
164 bit = bitmap_find_next_zero_area(
165 mapping->bitmap, mapping->num_4k_pages, 0, nbits,
166 align);
167 if (unlikely(bit > mapping->num_4k_pages))
168 return DMA_ERROR_CODE;
169 }
170
171 bitmap_set(mapping->bitmap, bit, nbits);
172 prev_search_start = mapping->next_start;
173 mapping->next_start = bit + nbits;
174 if (unlikely(mapping->next_start >= mapping->num_4k_pages))
175 mapping->next_start = 0;
176
177 /*
178 * If we just re-allocated a VA whose TLB hasn't been invalidated
179 * since it was last used and unmapped, we need to invalidate it
180 * here. We actually invalidate the entire TLB so that we don't
181 * have to invalidate the TLB again until we wrap back around.
182 */
183 if (mapping->have_stale_tlbs &&
184 __bit_covered_stale(mapping->upcoming_stale_bit,
185 prev_search_start,
186 bit + nbits - 1)) {
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800187 bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
188
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700189 iommu_tlbiall(mapping->domain);
190 mapping->have_stale_tlbs = false;
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800191 av8l_fast_clear_stale_ptes(mapping->pgtbl_pmds, skip_sync);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700192 }
193
194 return (bit << FAST_PAGE_SHIFT) + mapping->base;
195}
196
197/*
198 * Checks whether the candidate bit will be allocated sooner than the
199 * current upcoming stale bit. We can say candidate will be upcoming
200 * sooner than the current upcoming stale bit if it lies between the
201 * starting bit of the next search range and the upcoming stale bit
202 * (allowing for wrap-around).
203 *
204 * Stated differently, we're checking the relative ordering of three
205 * unsigned numbers. So we need to check all 6 (i.e. 3!) permutations,
206 * namely:
207 *
208 * 0 |---A---B---C---| TOP (Case 1)
209 * 0 |---A---C---B---| TOP (Case 2)
210 * 0 |---B---A---C---| TOP (Case 3)
211 * 0 |---B---C---A---| TOP (Case 4)
212 * 0 |---C---A---B---| TOP (Case 5)
213 * 0 |---C---B---A---| TOP (Case 6)
214 *
215 * Note that since we're allowing numbers to wrap, the following three
216 * scenarios are all equivalent for Case 1:
217 *
218 * 0 |---A---B---C---| TOP
219 * 0 |---C---A---B---| TOP (C has wrapped. This is Case 5.)
220 * 0 |---B---C---A---| TOP (C and B have wrapped. This is Case 4.)
221 *
222 * In any of these cases, if we start searching from A, we will find B
223 * before we find C.
224 *
225 * We can also find two equivalent cases for Case 2:
226 *
227 * 0 |---A---C---B---| TOP
228 * 0 |---B---A---C---| TOP (B has wrapped. This is Case 3.)
229 * 0 |---C---B---A---| TOP (B and C have wrapped. This is Case 6.)
230 *
231 * In any of these cases, if we start searching from A, we will find C
232 * before we find B.
233 */
234static bool __bit_is_sooner(unsigned long candidate,
235 struct dma_fast_smmu_mapping *mapping)
236{
237 unsigned long A = mapping->next_start;
238 unsigned long B = candidate;
239 unsigned long C = mapping->upcoming_stale_bit;
240
241 if ((A < B && B < C) || /* Case 1 */
242 (C < A && A < B) || /* Case 5 */
243 (B < C && C < A)) /* Case 4 */
244 return true;
245
246 if ((A < C && C < B) || /* Case 2 */
247 (B < A && A < C) || /* Case 3 */
248 (C < B && B < A)) /* Case 6 */
249 return false;
250
251 /*
252 * For simplicity, we've been ignoring the possibility of any of
253 * our three numbers being equal. Handle those cases here (they
254 * shouldn't happen very often, (I think?)).
255 */
256
257 /*
258 * If candidate is the next bit to be searched then it's definitely
259 * sooner.
260 */
261 if (A == B)
262 return true;
263
264 /*
265 * If candidate is the next upcoming stale bit we'll return false
266 * to avoid doing `upcoming = candidate' in the caller (which would
267 * be useless since they're already equal)
268 */
269 if (B == C)
270 return false;
271
272 /*
273 * If next start is the upcoming stale bit then candidate can't
274 * possibly be sooner. The "soonest" bit is already selected.
275 */
276 if (A == C)
277 return false;
278
279 /* We should have covered all logical combinations. */
280 WARN(1, "Well, that's awkward. A=%ld, B=%ld, C=%ld\n", A, B, C);
281 return true;
282}
283
284static void __fast_smmu_free_iova(struct dma_fast_smmu_mapping *mapping,
285 dma_addr_t iova, size_t size)
286{
287 unsigned long start_bit = (iova - mapping->base) >> FAST_PAGE_SHIFT;
288 unsigned long nbits = size >> FAST_PAGE_SHIFT;
289
290 /*
291 * We don't invalidate TLBs on unmap. We invalidate TLBs on map
292 * when we're about to re-allocate a VA that was previously
293 * unmapped but hasn't yet been invalidated. So we need to keep
294 * track of which bit is the closest to being re-allocated here.
295 */
296 if (__bit_is_sooner(start_bit, mapping))
297 mapping->upcoming_stale_bit = start_bit;
298
299 bitmap_clear(mapping->bitmap, start_bit, nbits);
300 mapping->have_stale_tlbs = true;
301}
302
303
304static void __fast_dma_page_cpu_to_dev(struct page *page, unsigned long off,
305 size_t size, enum dma_data_direction dir)
306{
307 __dma_map_area(page_address(page) + off, size, dir);
308}
309
310static void __fast_dma_page_dev_to_cpu(struct page *page, unsigned long off,
311 size_t size, enum dma_data_direction dir)
312{
313 __dma_unmap_area(page_address(page) + off, size, dir);
314
315 /* TODO: WHAT IS THIS? */
316 /*
317 * Mark the D-cache clean for this page to avoid extra flushing.
318 */
319 if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
320 set_bit(PG_dcache_clean, &page->flags);
321}
322
323static int __fast_dma_direction_to_prot(enum dma_data_direction dir)
324{
325 switch (dir) {
326 case DMA_BIDIRECTIONAL:
327 return IOMMU_READ | IOMMU_WRITE;
328 case DMA_TO_DEVICE:
329 return IOMMU_READ;
330 case DMA_FROM_DEVICE:
331 return IOMMU_WRITE;
332 default:
333 return 0;
334 }
335}
336
337static dma_addr_t fast_smmu_map_page(struct device *dev, struct page *page,
338 unsigned long offset, size_t size,
339 enum dma_data_direction dir,
340 unsigned long attrs)
341{
342 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
343 dma_addr_t iova;
344 unsigned long flags;
345 av8l_fast_iopte *pmd;
346 phys_addr_t phys_plus_off = page_to_phys(page) + offset;
347 phys_addr_t phys_to_map = round_down(phys_plus_off, FAST_PAGE_SIZE);
348 unsigned long offset_from_phys_to_map = phys_plus_off & ~FAST_PAGE_MASK;
349 size_t len = ALIGN(size + offset_from_phys_to_map, FAST_PAGE_SIZE);
350 int nptes = len >> FAST_PAGE_SHIFT;
351 bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
352 int prot = __fast_dma_direction_to_prot(dir);
Liam Mark83a9f86e2017-02-08 09:37:17 -0800353 bool is_coherent = is_dma_coherent(dev, attrs);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700354
Mitchel Humpherys425d03d2016-06-23 13:25:12 -0700355 prot = __get_iommu_pgprot(attrs, prot, is_coherent);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700356
Mitchel Humpherys425d03d2016-06-23 13:25:12 -0700357 if (!skip_sync && !is_coherent)
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700358 __fast_dma_page_cpu_to_dev(phys_to_page(phys_to_map),
359 offset_from_phys_to_map, size, dir);
360
361 spin_lock_irqsave(&mapping->lock, flags);
362
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800363 iova = __fast_smmu_alloc_iova(mapping, attrs, len);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700364
365 if (unlikely(iova == DMA_ERROR_CODE))
366 goto fail;
367
368 pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
369
370 if (unlikely(av8l_fast_map_public(pmd, phys_to_map, len, prot)))
371 goto fail_free_iova;
372
Mitchel Humpherys9de66db2016-06-07 11:09:44 -0700373 fast_dmac_clean_range(mapping, pmd, pmd + nptes);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700374
375 spin_unlock_irqrestore(&mapping->lock, flags);
376 return iova + offset_from_phys_to_map;
377
378fail_free_iova:
379 __fast_smmu_free_iova(mapping, iova, size);
380fail:
381 spin_unlock_irqrestore(&mapping->lock, flags);
382 return DMA_ERROR_CODE;
383}
384
385static void fast_smmu_unmap_page(struct device *dev, dma_addr_t iova,
386 size_t size, enum dma_data_direction dir,
387 unsigned long attrs)
388{
389 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
390 unsigned long flags;
391 av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
392 unsigned long offset = iova & ~FAST_PAGE_MASK;
393 size_t len = ALIGN(size + offset, FAST_PAGE_SIZE);
394 int nptes = len >> FAST_PAGE_SHIFT;
395 struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
396 bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
Liam Mark83a9f86e2017-02-08 09:37:17 -0800397 bool is_coherent = is_dma_coherent(dev, attrs);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700398
Mitchel Humpherys425d03d2016-06-23 13:25:12 -0700399 if (!skip_sync && !is_coherent)
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700400 __fast_dma_page_dev_to_cpu(page, offset, size, dir);
401
402 spin_lock_irqsave(&mapping->lock, flags);
403 av8l_fast_unmap_public(pmd, len);
Mitchel Humpherys9de66db2016-06-07 11:09:44 -0700404 fast_dmac_clean_range(mapping, pmd, pmd + nptes);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700405 __fast_smmu_free_iova(mapping, iova, len);
406 spin_unlock_irqrestore(&mapping->lock, flags);
407}
408
Liam Mark78d7fb52016-12-01 13:05:31 -0800409static void fast_smmu_sync_single_for_cpu(struct device *dev,
410 dma_addr_t iova, size_t size, enum dma_data_direction dir)
411{
412 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
413 av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
414 unsigned long offset = iova & ~FAST_PAGE_MASK;
415 struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
416
Liam Mark83a9f86e2017-02-08 09:37:17 -0800417 if (!__fast_is_pte_coherent(pmd))
Liam Mark78d7fb52016-12-01 13:05:31 -0800418 __fast_dma_page_dev_to_cpu(page, offset, size, dir);
419}
420
421static void fast_smmu_sync_single_for_device(struct device *dev,
422 dma_addr_t iova, size_t size, enum dma_data_direction dir)
423{
424 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
425 av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
426 unsigned long offset = iova & ~FAST_PAGE_MASK;
427 struct page *page = phys_to_page((*pmd & FAST_PTE_ADDR_MASK));
428
Liam Mark83a9f86e2017-02-08 09:37:17 -0800429 if (!__fast_is_pte_coherent(pmd))
Liam Mark78d7fb52016-12-01 13:05:31 -0800430 __fast_dma_page_cpu_to_dev(page, offset, size, dir);
431}
432
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700433static int fast_smmu_map_sg(struct device *dev, struct scatterlist *sg,
434 int nents, enum dma_data_direction dir,
435 unsigned long attrs)
436{
Patrick Daly36c547a2017-09-06 19:13:02 -0700437 /* 0 indicates error */
438 return 0;
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700439}
440
441static void fast_smmu_unmap_sg(struct device *dev,
442 struct scatterlist *sg, int nents,
443 enum dma_data_direction dir,
444 unsigned long attrs)
445{
446 WARN_ON_ONCE(1);
447}
448
Liam Mark78d7fb52016-12-01 13:05:31 -0800449static void fast_smmu_sync_sg_for_cpu(struct device *dev,
450 struct scatterlist *sg, int nents, enum dma_data_direction dir)
451{
452 WARN_ON_ONCE(1);
453}
454
455static void fast_smmu_sync_sg_for_device(struct device *dev,
456 struct scatterlist *sg, int nents, enum dma_data_direction dir)
457{
458 WARN_ON_ONCE(1);
459}
460
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700461static void __fast_smmu_free_pages(struct page **pages, int count)
462{
463 int i;
464
465 for (i = 0; i < count; i++)
466 __free_page(pages[i]);
467 kvfree(pages);
468}
469
470static struct page **__fast_smmu_alloc_pages(unsigned int count, gfp_t gfp)
471{
472 struct page **pages;
473 unsigned int i = 0, array_size = count * sizeof(*pages);
474
475 if (array_size <= PAGE_SIZE)
476 pages = kzalloc(array_size, GFP_KERNEL);
477 else
478 pages = vzalloc(array_size);
479 if (!pages)
480 return NULL;
481
482 /* IOMMU can map any pages, so himem can also be used here */
483 gfp |= __GFP_NOWARN | __GFP_HIGHMEM;
484
485 for (i = 0; i < count; ++i) {
486 struct page *page = alloc_page(gfp);
487
488 if (!page) {
489 __fast_smmu_free_pages(pages, i);
490 return NULL;
491 }
492 pages[i] = page;
493 }
494 return pages;
495}
496
497static void *fast_smmu_alloc(struct device *dev, size_t size,
498 dma_addr_t *handle, gfp_t gfp,
499 unsigned long attrs)
500{
501 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
502 struct sg_table sgt;
503 dma_addr_t dma_addr, iova_iter;
504 void *addr;
505 av8l_fast_iopte *ptep;
506 unsigned long flags;
507 struct sg_mapping_iter miter;
508 unsigned int count = ALIGN(size, SZ_4K) >> PAGE_SHIFT;
509 int prot = IOMMU_READ | IOMMU_WRITE; /* TODO: extract from attrs */
Liam Mark83a9f86e2017-02-08 09:37:17 -0800510 bool is_coherent = is_dma_coherent(dev, attrs);
Mitchel Humpherys425d03d2016-06-23 13:25:12 -0700511 pgprot_t remap_prot = __get_dma_pgprot(attrs, PAGE_KERNEL, is_coherent);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700512 struct page **pages;
513
Mitchel Humpherys425d03d2016-06-23 13:25:12 -0700514 prot = __get_iommu_pgprot(attrs, prot, is_coherent);
515
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700516 *handle = DMA_ERROR_CODE;
517
518 pages = __fast_smmu_alloc_pages(count, gfp);
519 if (!pages) {
520 dev_err(dev, "no pages\n");
521 return NULL;
522 }
523
524 size = ALIGN(size, SZ_4K);
525 if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, gfp)) {
526 dev_err(dev, "no sg tablen\n");
527 goto out_free_pages;
528 }
529
Mitchel Humpherys425d03d2016-06-23 13:25:12 -0700530 if (!is_coherent) {
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700531 /*
532 * The CPU-centric flushing implied by SG_MITER_TO_SG isn't
533 * sufficient here, so skip it by using the "wrong" direction.
534 */
535 sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
536 SG_MITER_FROM_SG);
537 while (sg_miter_next(&miter))
Kyle Yan65be4a52016-10-31 15:05:00 -0700538 __dma_flush_area(miter.addr, miter.length);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700539 sg_miter_stop(&miter);
540 }
541
542 spin_lock_irqsave(&mapping->lock, flags);
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800543 dma_addr = __fast_smmu_alloc_iova(mapping, attrs, size);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700544 if (dma_addr == DMA_ERROR_CODE) {
545 dev_err(dev, "no iova\n");
546 spin_unlock_irqrestore(&mapping->lock, flags);
547 goto out_free_sg;
548 }
549 iova_iter = dma_addr;
550 sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
551 SG_MITER_FROM_SG | SG_MITER_ATOMIC);
552 while (sg_miter_next(&miter)) {
553 int nptes = miter.length >> FAST_PAGE_SHIFT;
554
555 ptep = iopte_pmd_offset(mapping->pgtbl_pmds, iova_iter);
556 if (unlikely(av8l_fast_map_public(
557 ptep, page_to_phys(miter.page),
558 miter.length, prot))) {
559 dev_err(dev, "no map public\n");
560 /* TODO: unwind previously successful mappings */
561 goto out_free_iova;
562 }
Mitchel Humpherys9de66db2016-06-07 11:09:44 -0700563 fast_dmac_clean_range(mapping, ptep, ptep + nptes);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700564 iova_iter += miter.length;
565 }
566 sg_miter_stop(&miter);
567 spin_unlock_irqrestore(&mapping->lock, flags);
568
569 addr = dma_common_pages_remap(pages, size, VM_USERMAP, remap_prot,
570 __builtin_return_address(0));
571 if (!addr) {
572 dev_err(dev, "no common pages\n");
573 goto out_unmap;
574 }
575
576 *handle = dma_addr;
577 sg_free_table(&sgt);
578 return addr;
579
580out_unmap:
581 /* need to take the lock again for page tables and iova */
582 spin_lock_irqsave(&mapping->lock, flags);
583 ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_addr);
584 av8l_fast_unmap_public(ptep, size);
Mitchel Humpherys9de66db2016-06-07 11:09:44 -0700585 fast_dmac_clean_range(mapping, ptep, ptep + count);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700586out_free_iova:
587 __fast_smmu_free_iova(mapping, dma_addr, size);
588 spin_unlock_irqrestore(&mapping->lock, flags);
589out_free_sg:
590 sg_free_table(&sgt);
591out_free_pages:
592 __fast_smmu_free_pages(pages, count);
593 return NULL;
594}
595
596static void fast_smmu_free(struct device *dev, size_t size,
597 void *vaddr, dma_addr_t dma_handle,
598 unsigned long attrs)
599{
600 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
601 struct vm_struct *area;
602 struct page **pages;
603 size_t count = ALIGN(size, SZ_4K) >> FAST_PAGE_SHIFT;
604 av8l_fast_iopte *ptep;
605 unsigned long flags;
606
607 size = ALIGN(size, SZ_4K);
608
609 area = find_vm_area(vaddr);
610 if (WARN_ON_ONCE(!area))
611 return;
612
613 pages = area->pages;
614 dma_common_free_remap(vaddr, size, VM_USERMAP, false);
615 ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_handle);
616 spin_lock_irqsave(&mapping->lock, flags);
617 av8l_fast_unmap_public(ptep, size);
Mitchel Humpherys9de66db2016-06-07 11:09:44 -0700618 fast_dmac_clean_range(mapping, ptep, ptep + count);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700619 __fast_smmu_free_iova(mapping, dma_handle, size);
620 spin_unlock_irqrestore(&mapping->lock, flags);
621 __fast_smmu_free_pages(pages, count);
622}
623
Patrick Daly7bcb5462016-08-03 17:27:36 -0700624static int fast_smmu_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
625 void *cpu_addr, dma_addr_t dma_addr,
626 size_t size, unsigned long attrs)
627{
628 struct vm_struct *area;
629 unsigned long uaddr = vma->vm_start;
630 struct page **pages;
631 int i, nr_pages, ret = 0;
Liam Mark83a9f86e2017-02-08 09:37:17 -0800632 bool coherent = is_dma_coherent(dev, attrs);
Patrick Daly7bcb5462016-08-03 17:27:36 -0700633
Mitchel Humpherys425d03d2016-06-23 13:25:12 -0700634 vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
635 coherent);
Patrick Daly7bcb5462016-08-03 17:27:36 -0700636 area = find_vm_area(cpu_addr);
637 if (!area)
638 return -EINVAL;
639
640 pages = area->pages;
641 nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
642 for (i = vma->vm_pgoff; i < nr_pages && uaddr < vma->vm_end; i++) {
643 ret = vm_insert_page(vma, uaddr, pages[i]);
644 if (ret)
645 break;
646 uaddr += PAGE_SIZE;
647 }
648
649 return ret;
650}
651
Patrick Daly9c79f382017-06-12 18:15:25 -0700652static int fast_smmu_get_sgtable(struct device *dev, struct sg_table *sgt,
653 void *cpu_addr, dma_addr_t dma_addr,
654 size_t size, unsigned long attrs)
655{
656 unsigned int n_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
657 struct vm_struct *area;
658
659 area = find_vm_area(cpu_addr);
660 if (!area || !area->pages)
661 return -EINVAL;
662
663 return sg_alloc_table_from_pages(sgt, area->pages, n_pages, 0, size,
664 GFP_KERNEL);
665}
666
Patrick Daly199fa672017-05-04 15:30:16 -0700667static dma_addr_t fast_smmu_dma_map_resource(
668 struct device *dev, phys_addr_t phys_addr,
669 size_t size, enum dma_data_direction dir,
670 unsigned long attrs)
671{
672 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
673 size_t offset = phys_addr & ~FAST_PAGE_MASK;
674 size_t len = round_up(size + offset, FAST_PAGE_SIZE);
675 dma_addr_t dma_addr;
676 int prot;
677 unsigned long flags;
678
679 spin_lock_irqsave(&mapping->lock, flags);
680 dma_addr = __fast_smmu_alloc_iova(mapping, attrs, len);
681 spin_unlock_irqrestore(&mapping->lock, flags);
682
683 if (dma_addr == DMA_ERROR_CODE)
684 return dma_addr;
685
686 prot = __fast_dma_direction_to_prot(dir);
687 prot |= IOMMU_MMIO;
688
689 if (iommu_map(mapping->domain, dma_addr, phys_addr - offset,
690 len, prot)) {
691 spin_lock_irqsave(&mapping->lock, flags);
692 __fast_smmu_free_iova(mapping, dma_addr, len);
693 spin_unlock_irqrestore(&mapping->lock, flags);
694 return DMA_ERROR_CODE;
695 }
696 return dma_addr + offset;
697}
698
699static void fast_smmu_dma_unmap_resource(
700 struct device *dev, dma_addr_t addr,
701 size_t size, enum dma_data_direction dir,
702 unsigned long attrs)
703{
704 struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
705 size_t offset = addr & ~FAST_PAGE_MASK;
706 size_t len = round_up(size + offset, FAST_PAGE_SIZE);
707 unsigned long flags;
708
709 iommu_unmap(mapping->domain, addr - offset, len);
710 spin_lock_irqsave(&mapping->lock, flags);
711 __fast_smmu_free_iova(mapping, addr, len);
712 spin_unlock_irqrestore(&mapping->lock, flags);
713}
714
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700715static int fast_smmu_mapping_error(struct device *dev,
716 dma_addr_t dma_addr)
717{
718 return dma_addr == DMA_ERROR_CODE;
719}
720
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800721static void __fast_smmu_mapped_over_stale(struct dma_fast_smmu_mapping *fast,
722 void *data)
723{
724 av8l_fast_iopte *ptep = data;
725 dma_addr_t iova;
726 unsigned long bitmap_idx;
727
728 bitmap_idx = (unsigned long)(ptep - fast->pgtbl_pmds);
729 iova = bitmap_idx << FAST_PAGE_SHIFT;
730 dev_err(fast->dev, "Mapped over stale tlb at %pa\n", &iova);
731 dev_err(fast->dev, "bitmap (failure at idx %lu):\n", bitmap_idx);
732 dev_err(fast->dev, "ptep: %p pmds: %p diff: %lu\n", ptep,
Charan Teja Reddy29f61402017-02-09 20:44:29 +0530733 fast->pgtbl_pmds, bitmap_idx);
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800734 print_hex_dump(KERN_ERR, "bmap: ", DUMP_PREFIX_ADDRESS,
735 32, 8, fast->bitmap, fast->bitmap_size, false);
736}
737
738static int fast_smmu_notify(struct notifier_block *self,
739 unsigned long action, void *data)
740{
741 struct dma_fast_smmu_mapping *fast = container_of(
742 self, struct dma_fast_smmu_mapping, notifier);
743
744 switch (action) {
745 case MAPPED_OVER_STALE_TLB:
746 __fast_smmu_mapped_over_stale(fast, data);
747 return NOTIFY_OK;
748 default:
749 WARN(1, "Unhandled notifier action");
750 return NOTIFY_DONE;
751 }
752}
753
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700754static const struct dma_map_ops fast_smmu_dma_ops = {
755 .alloc = fast_smmu_alloc,
756 .free = fast_smmu_free,
Patrick Daly7bcb5462016-08-03 17:27:36 -0700757 .mmap = fast_smmu_mmap_attrs,
Patrick Daly9c79f382017-06-12 18:15:25 -0700758 .get_sgtable = fast_smmu_get_sgtable,
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700759 .map_page = fast_smmu_map_page,
760 .unmap_page = fast_smmu_unmap_page,
Liam Mark78d7fb52016-12-01 13:05:31 -0800761 .sync_single_for_cpu = fast_smmu_sync_single_for_cpu,
762 .sync_single_for_device = fast_smmu_sync_single_for_device,
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700763 .map_sg = fast_smmu_map_sg,
764 .unmap_sg = fast_smmu_unmap_sg,
Liam Mark78d7fb52016-12-01 13:05:31 -0800765 .sync_sg_for_cpu = fast_smmu_sync_sg_for_cpu,
766 .sync_sg_for_device = fast_smmu_sync_sg_for_device,
Patrick Daly199fa672017-05-04 15:30:16 -0700767 .map_resource = fast_smmu_dma_map_resource,
768 .unmap_resource = fast_smmu_dma_unmap_resource,
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700769 .mapping_error = fast_smmu_mapping_error,
770};
771
772/**
773 * __fast_smmu_create_mapping_sized
774 * @base: bottom of the VA range
775 * @size: size of the VA range in bytes
776 *
777 * Creates a mapping structure which holds information about used/unused IO
778 * address ranges, which is required to perform mapping with IOMMU aware
779 * functions. The only VA range supported is [0, 4GB).
780 *
781 * The client device need to be attached to the mapping with
782 * fast_smmu_attach_device function.
783 */
784static struct dma_fast_smmu_mapping *__fast_smmu_create_mapping_sized(
Charan Teja Reddy29f61402017-02-09 20:44:29 +0530785 dma_addr_t base, u64 size)
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700786{
787 struct dma_fast_smmu_mapping *fast;
788
789 fast = kzalloc(sizeof(struct dma_fast_smmu_mapping), GFP_KERNEL);
790 if (!fast)
791 goto err;
792
793 fast->base = base;
794 fast->size = size;
795 fast->num_4k_pages = size >> FAST_PAGE_SHIFT;
796 fast->bitmap_size = BITS_TO_LONGS(fast->num_4k_pages) * sizeof(long);
797
Liam Mark4d4fbba2017-02-08 10:30:49 -0800798 fast->bitmap = kzalloc(fast->bitmap_size, GFP_KERNEL | __GFP_NOWARN |
799 __GFP_NORETRY);
800 if (!fast->bitmap)
801 fast->bitmap = vzalloc(fast->bitmap_size);
802
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700803 if (!fast->bitmap)
804 goto err2;
805
806 spin_lock_init(&fast->lock);
807
808 return fast;
809err2:
810 kfree(fast);
811err:
812 return ERR_PTR(-ENOMEM);
813}
814
Patrick Dalybc8b1cb2017-05-04 17:10:10 -0700815/*
816 * Based off of similar code from dma-iommu.c, but modified to use a different
817 * iova allocator
818 */
819static void fast_smmu_reserve_pci_windows(struct device *dev,
820 struct dma_fast_smmu_mapping *mapping)
821{
822 struct pci_host_bridge *bridge;
823 struct resource_entry *window;
824 phys_addr_t start, end;
825 struct pci_dev *pci_dev;
826 unsigned long flags;
827
828 if (!dev_is_pci(dev))
829 return;
830
831 pci_dev = to_pci_dev(dev);
832 bridge = pci_find_host_bridge(pci_dev->bus);
833
834 spin_lock_irqsave(&mapping->lock, flags);
835 resource_list_for_each_entry(window, &bridge->windows) {
836 if (resource_type(window->res) != IORESOURCE_MEM &&
837 resource_type(window->res) != IORESOURCE_IO)
838 continue;
839
840 start = round_down(window->res->start - window->offset,
841 FAST_PAGE_SIZE);
842 end = round_up(window->res->end - window->offset,
843 FAST_PAGE_SIZE);
844 start = max_t(unsigned long, mapping->base, start);
845 end = min_t(unsigned long, mapping->base + mapping->size, end);
846 if (start >= end)
847 continue;
848
849 dev_dbg(dev, "iova allocator reserved 0x%pa-0x%pa\n",
850 &start, &end);
851
852 start = (start - mapping->base) >> FAST_PAGE_SHIFT;
853 end = (end - mapping->base) >> FAST_PAGE_SHIFT;
854 bitmap_set(mapping->bitmap, start, end - start);
855 }
856 spin_unlock_irqrestore(&mapping->lock, flags);
857}
858
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700859/**
Patrick Daly1748f082017-09-05 21:32:52 -0700860 * fast_smmu_init_mapping
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700861 * @dev: valid struct device pointer
862 * @mapping: io address space mapping structure (returned from
Patrick Daly1748f082017-09-05 21:32:52 -0700863 * arm_iommu_create_mapping)
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700864 *
Patrick Daly1748f082017-09-05 21:32:52 -0700865 * Called the first time a device is attached to this mapping.
866 * Not for dma client use.
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700867 */
Patrick Daly1748f082017-09-05 21:32:52 -0700868int fast_smmu_init_mapping(struct device *dev,
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700869 struct dma_iommu_mapping *mapping)
870{
Patrick Daly0df84ac2017-10-11 17:32:41 -0700871 int err;
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700872 struct iommu_domain *domain = mapping->domain;
Mitchel Humpherysad9df1f2016-05-27 14:58:31 -0700873 struct iommu_group *group;
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700874 struct iommu_pgtbl_info info;
Charan Teja Reddy29f61402017-02-09 20:44:29 +0530875 u64 size = (u64)mapping->bits << PAGE_SHIFT;
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700876
Patrick Daly1748f082017-09-05 21:32:52 -0700877 if (mapping->base + size > (SZ_1G * 4ULL)) {
878 dev_err(dev, "Iova end address too large\n");
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700879 return -EINVAL;
Patrick Daly1748f082017-09-05 21:32:52 -0700880 }
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700881
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700882 mapping->fast = __fast_smmu_create_mapping_sized(mapping->base, size);
883 if (IS_ERR(mapping->fast))
884 return -ENOMEM;
885 mapping->fast->domain = domain;
886 mapping->fast->dev = dev;
887
Patrick Dalybc8b1cb2017-05-04 17:10:10 -0700888 fast_smmu_reserve_pci_windows(dev, mapping->fast);
889
Mitchel Humpherysad9df1f2016-05-27 14:58:31 -0700890 group = dev->iommu_group;
891 if (!group) {
892 dev_err(dev, "No iommu associated with device\n");
Patrick Daly1748f082017-09-05 21:32:52 -0700893 err = -ENODEV;
894 goto release_mapping;
Mitchel Humpherysad9df1f2016-05-27 14:58:31 -0700895 }
896
897 if (iommu_get_domain_for_dev(dev)) {
898 dev_err(dev, "Device already attached to other iommu_domain\n");
Patrick Daly1748f082017-09-05 21:32:52 -0700899 err = -EINVAL;
900 goto release_mapping;
Mitchel Humpherysad9df1f2016-05-27 14:58:31 -0700901 }
902
Patrick Daly1748f082017-09-05 21:32:52 -0700903 /*
904 * Need to attach prior to calling DOMAIN_ATTR_PGTBL_INFO and then
905 * detach to be in the expected state. Its a bit messy.
906 */
907 if (iommu_attach_group(mapping->domain, group)) {
908 err = -EINVAL;
909 goto release_mapping;
910 }
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700911
912 if (iommu_domain_get_attr(domain, DOMAIN_ATTR_PGTBL_INFO,
913 &info)) {
914 dev_err(dev, "Couldn't get page table info\n");
Patrick Daly1748f082017-09-05 21:32:52 -0700915 err = -EINVAL;
916 goto detach_group;
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700917 }
918 mapping->fast->pgtbl_pmds = info.pmds;
919
Mitchel Humpherys9de66db2016-06-07 11:09:44 -0700920 if (iommu_domain_get_attr(domain, DOMAIN_ATTR_PAGE_TABLE_IS_COHERENT,
Patrick Daly1748f082017-09-05 21:32:52 -0700921 &mapping->fast->is_smmu_pt_coherent)) {
922 err = -EINVAL;
923 goto detach_group;
924 }
Mitchel Humpherys9de66db2016-06-07 11:09:44 -0700925
Mitchel Humpherys5c704e02015-12-21 15:06:34 -0800926 mapping->fast->notifier.notifier_call = fast_smmu_notify;
927 av8l_register_notify(&mapping->fast->notifier);
928
Patrick Daly1748f082017-09-05 21:32:52 -0700929 iommu_detach_group(mapping->domain, group);
930 mapping->ops = &fast_smmu_dma_ops;
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700931 return 0;
Patrick Daly1748f082017-09-05 21:32:52 -0700932
933detach_group:
934 iommu_detach_group(mapping->domain, group);
935release_mapping:
936 kfree(mapping->fast->bitmap);
937 kfree(mapping->fast);
938 return err;
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700939}
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700940
941/**
Patrick Daly1748f082017-09-05 21:32:52 -0700942 * fast_smmu_release_mapping
943 * @kref: dma_iommu_mapping->kref
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700944 *
Patrick Daly1748f082017-09-05 21:32:52 -0700945 * Cleans up the given iommu mapping.
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700946 */
Patrick Daly1748f082017-09-05 21:32:52 -0700947void fast_smmu_release_mapping(struct kref *kref)
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700948{
Patrick Daly1748f082017-09-05 21:32:52 -0700949 struct dma_iommu_mapping *mapping =
950 container_of(kref, struct dma_iommu_mapping, kref);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700951
Liam Mark4d4fbba2017-02-08 10:30:49 -0800952 kvfree(mapping->fast->bitmap);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700953 kfree(mapping->fast);
Patrick Daly1748f082017-09-05 21:32:52 -0700954 iommu_domain_free(mapping->domain);
955 kfree(mapping);
Mitchel Humpherys0e43f0a2015-10-08 15:03:09 -0700956}