Blame - drivers/iommu/dma-mapping-fast.c - kernel/msm-4.9

blob: 2d36ee38367093345e1e4456989babb7b07750f3 [file] [log] [blame]

Mitchel Humpherys	0e43f0a	2015-10-08 15:03:09 -0700	[diff] [blame^]	1	/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
				2	*
				3	* This program is free software; you can redistribute it and/or modify
				4	* it under the terms of the GNU General Public License version 2 and
				5	* only version 2 as published by the Free Software Foundation.
				6	*
				7	* This program is distributed in the hope that it will be useful,
				8	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				10	* GNU General Public License for more details.
				11	*/
				12
				13	#include <linux/dma-contiguous.h>
				14	#include <linux/dma-mapping.h>
				15	#include <linux/dma-mapping-fast.h>
				16	#include <linux/io-pgtable-fast.h>
				17	#include <asm/cacheflush.h>
				18	#include <asm/dma-iommu.h>
				19
				20
				21	/* some redundant definitions... :( TODO: move to io-pgtable-fast.h */
				22	#define FAST_PAGE_SHIFT 12
				23	#define FAST_PAGE_SIZE (1UL << FAST_PAGE_SHIFT)
				24	#define FAST_PAGE_MASK (~(PAGE_SIZE - 1))
				25	#define FAST_PTE_ADDR_MASK ((av8l_fast_iopte)0xfffffffff000)
				26
				27	/*
				28	* Checks if the allocated range (ending at @end) covered the upcoming
				29	* stale bit. We don't need to know exactly where the range starts since
				30	* we already know where the candidate search range started. If, starting
				31	* from the beginning of the candidate search range, we had to step over
				32	* (or landed directly on top of) the upcoming stale bit, then we return
				33	* true.
				34	*
				35	* Due to wrapping, there are two scenarios we'll need to check: (1) if the
				36	* range [search_start, upcoming_stale] spans 0 (i.e. search_start >
				37	* upcoming_stale), and, (2) if the range: [search_start, upcoming_stale]
				38	* does not span 0 (i.e. search_start <= upcoming_stale). And for each
				39	* of those two scenarios we need to handle three cases: (1) the bit was
				40	* found before wrapping or
				41	*/
				42	static bool __bit_covered_stale(unsigned long upcoming_stale,
				43	unsigned long search_start,
				44	unsigned long end)
				45	{
				46	if (search_start > upcoming_stale) {
				47	if (end >= search_start) {
				48	/*
				49	* We started searching above upcoming_stale and we
				50	* didn't wrap, so we couldn't have crossed
				51	* upcoming_stale.
				52	*/
				53	return false;
				54	}
				55	/*
				56	* We wrapped. Did we cross (or land on top of)
				57	* upcoming_stale?
				58	*/
				59	return end >= upcoming_stale;
				60	}
				61
				62	if (search_start <= upcoming_stale) {
				63	if (end >= search_start) {
				64	/*
				65	* We didn't wrap. Did we cross (or land on top
				66	* of) upcoming_stale?
				67	*/
				68	return end >= upcoming_stale;
				69	}
				70	/*
				71	* We wrapped. So we must have crossed upcoming_stale
				72	* (since we started searching below it).
				73	*/
				74	return true;
				75	}
				76
				77	/* we should have covered all logical combinations... */
				78	WARN_ON(1);
				79	return true;
				80	}
				81
				82	static dma_addr_t __fast_smmu_alloc_iova(struct dma_fast_smmu_mapping *mapping,
				83	size_t size)
				84	{
				85	unsigned long bit, prev_search_start, nbits = size >> FAST_PAGE_SHIFT;
				86	unsigned long align = (1 << get_order(size)) - 1;
				87
				88	bit = bitmap_find_next_zero_area(
				89	mapping->bitmap, mapping->num_4k_pages, mapping->next_start,
				90	nbits, align);
				91	if (unlikely(bit > mapping->num_4k_pages)) {
				92	/* try wrapping */
				93	mapping->next_start = 0; /* TODO: SHOULD I REALLY DO THIS?!? */
				94	bit = bitmap_find_next_zero_area(
				95	mapping->bitmap, mapping->num_4k_pages, 0, nbits,
				96	align);
				97	if (unlikely(bit > mapping->num_4k_pages))
				98	return DMA_ERROR_CODE;
				99	}
				100
				101	bitmap_set(mapping->bitmap, bit, nbits);
				102	prev_search_start = mapping->next_start;
				103	mapping->next_start = bit + nbits;
				104	if (unlikely(mapping->next_start >= mapping->num_4k_pages))
				105	mapping->next_start = 0;
				106
				107	/*
				108	* If we just re-allocated a VA whose TLB hasn't been invalidated
				109	* since it was last used and unmapped, we need to invalidate it
				110	* here. We actually invalidate the entire TLB so that we don't
				111	* have to invalidate the TLB again until we wrap back around.
				112	*/
				113	if (mapping->have_stale_tlbs &&
				114	__bit_covered_stale(mapping->upcoming_stale_bit,
				115	prev_search_start,
				116	bit + nbits - 1)) {
				117	iommu_tlbiall(mapping->domain);
				118	mapping->have_stale_tlbs = false;
				119	}
				120
				121	return (bit << FAST_PAGE_SHIFT) + mapping->base;
				122	}
				123
				124	/*
				125	* Checks whether the candidate bit will be allocated sooner than the
				126	* current upcoming stale bit. We can say candidate will be upcoming
				127	* sooner than the current upcoming stale bit if it lies between the
				128	* starting bit of the next search range and the upcoming stale bit
				129	* (allowing for wrap-around).
				130	*
				131	* Stated differently, we're checking the relative ordering of three
				132	* unsigned numbers. So we need to check all 6 (i.e. 3!) permutations,
				133	* namely:
				134	*
				135	* 0 \|---A---B---C---\| TOP (Case 1)
				136	* 0 \|---A---C---B---\| TOP (Case 2)
				137	* 0 \|---B---A---C---\| TOP (Case 3)
				138	* 0 \|---B---C---A---\| TOP (Case 4)
				139	* 0 \|---C---A---B---\| TOP (Case 5)
				140	* 0 \|---C---B---A---\| TOP (Case 6)
				141	*
				142	* Note that since we're allowing numbers to wrap, the following three
				143	* scenarios are all equivalent for Case 1:
				144	*
				145	* 0 \|---A---B---C---\| TOP
				146	* 0 \|---C---A---B---\| TOP (C has wrapped. This is Case 5.)
				147	* 0 \|---B---C---A---\| TOP (C and B have wrapped. This is Case 4.)
				148	*
				149	* In any of these cases, if we start searching from A, we will find B
				150	* before we find C.
				151	*
				152	* We can also find two equivalent cases for Case 2:
				153	*
				154	* 0 \|---A---C---B---\| TOP
				155	* 0 \|---B---A---C---\| TOP (B has wrapped. This is Case 3.)
				156	* 0 \|---C---B---A---\| TOP (B and C have wrapped. This is Case 6.)
				157	*
				158	* In any of these cases, if we start searching from A, we will find C
				159	* before we find B.
				160	*/
				161	static bool __bit_is_sooner(unsigned long candidate,
				162	struct dma_fast_smmu_mapping *mapping)
				163	{
				164	unsigned long A = mapping->next_start;
				165	unsigned long B = candidate;
				166	unsigned long C = mapping->upcoming_stale_bit;
				167
				168	if ((A < B && B < C) \|\| /* Case 1 */
				169	(C < A && A < B) \|\| /* Case 5 */
				170	(B < C && C < A)) /* Case 4 */
				171	return true;
				172
				173	if ((A < C && C < B) \|\| /* Case 2 */
				174	(B < A && A < C) \|\| /* Case 3 */
				175	(C < B && B < A)) /* Case 6 */
				176	return false;
				177
				178	/*
				179	* For simplicity, we've been ignoring the possibility of any of
				180	* our three numbers being equal. Handle those cases here (they
				181	* shouldn't happen very often, (I think?)).
				182	*/
				183
				184	/*
				185	* If candidate is the next bit to be searched then it's definitely
				186	* sooner.
				187	*/
				188	if (A == B)
				189	return true;
				190
				191	/*
				192	* If candidate is the next upcoming stale bit we'll return false
				193	* to avoid doing `upcoming = candidate' in the caller (which would
				194	* be useless since they're already equal)
				195	*/
				196	if (B == C)
				197	return false;
				198
				199	/*
				200	* If next start is the upcoming stale bit then candidate can't
				201	* possibly be sooner. The "soonest" bit is already selected.
				202	*/
				203	if (A == C)
				204	return false;
				205
				206	/* We should have covered all logical combinations. */
				207	WARN(1, "Well, that's awkward. A=%ld, B=%ld, C=%ld\n", A, B, C);
				208	return true;
				209	}
				210
				211	static void __fast_smmu_free_iova(struct dma_fast_smmu_mapping *mapping,
				212	dma_addr_t iova, size_t size)
				213	{
				214	unsigned long start_bit = (iova - mapping->base) >> FAST_PAGE_SHIFT;
				215	unsigned long nbits = size >> FAST_PAGE_SHIFT;
				216
				217	/*
				218	* We don't invalidate TLBs on unmap. We invalidate TLBs on map
				219	* when we're about to re-allocate a VA that was previously
				220	* unmapped but hasn't yet been invalidated. So we need to keep
				221	* track of which bit is the closest to being re-allocated here.
				222	*/
				223	if (__bit_is_sooner(start_bit, mapping))
				224	mapping->upcoming_stale_bit = start_bit;
				225
				226	bitmap_clear(mapping->bitmap, start_bit, nbits);
				227	mapping->have_stale_tlbs = true;
				228	}
				229
				230
				231	static void __fast_dma_page_cpu_to_dev(struct page *page, unsigned long off,
				232	size_t size, enum dma_data_direction dir)
				233	{
				234	__dma_map_area(page_address(page) + off, size, dir);
				235	}
				236
				237	static void __fast_dma_page_dev_to_cpu(struct page *page, unsigned long off,
				238	size_t size, enum dma_data_direction dir)
				239	{
				240	__dma_unmap_area(page_address(page) + off, size, dir);
				241
				242	/* TODO: WHAT IS THIS? */
				243	/*
				244	* Mark the D-cache clean for this page to avoid extra flushing.
				245	*/
				246	if (dir != DMA_TO_DEVICE && off == 0 && size >= PAGE_SIZE)
				247	set_bit(PG_dcache_clean, &page->flags);
				248	}
				249
				250	static int __fast_dma_direction_to_prot(enum dma_data_direction dir)
				251	{
				252	switch (dir) {
				253	case DMA_BIDIRECTIONAL:
				254	return IOMMU_READ \| IOMMU_WRITE;
				255	case DMA_TO_DEVICE:
				256	return IOMMU_READ;
				257	case DMA_FROM_DEVICE:
				258	return IOMMU_WRITE;
				259	default:
				260	return 0;
				261	}
				262	}
				263
				264	static dma_addr_t fast_smmu_map_page(struct device dev, struct page page,
				265	unsigned long offset, size_t size,
				266	enum dma_data_direction dir,
				267	unsigned long attrs)
				268	{
				269	struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
				270	dma_addr_t iova;
				271	unsigned long flags;
				272	av8l_fast_iopte *pmd;
				273	phys_addr_t phys_plus_off = page_to_phys(page) + offset;
				274	phys_addr_t phys_to_map = round_down(phys_plus_off, FAST_PAGE_SIZE);
				275	unsigned long offset_from_phys_to_map = phys_plus_off & ~FAST_PAGE_MASK;
				276	size_t len = ALIGN(size + offset_from_phys_to_map, FAST_PAGE_SIZE);
				277	int nptes = len >> FAST_PAGE_SHIFT;
				278	bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
				279	int prot = __fast_dma_direction_to_prot(dir);
				280
				281	if (attrs & DMA_ATTR_STRONGLY_ORDERED)
				282	prot \|= IOMMU_MMIO;
				283
				284	if (!skip_sync)
				285	__fast_dma_page_cpu_to_dev(phys_to_page(phys_to_map),
				286	offset_from_phys_to_map, size, dir);
				287
				288	spin_lock_irqsave(&mapping->lock, flags);
				289
				290	iova = __fast_smmu_alloc_iova(mapping, len);
				291
				292	if (unlikely(iova == DMA_ERROR_CODE))
				293	goto fail;
				294
				295	pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
				296
				297	if (unlikely(av8l_fast_map_public(pmd, phys_to_map, len, prot)))
				298	goto fail_free_iova;
				299
				300	if (!skip_sync) /* TODO: should ask SMMU if coherent */
				301	dmac_clean_range(pmd, pmd + nptes);
				302
				303	spin_unlock_irqrestore(&mapping->lock, flags);
				304	return iova + offset_from_phys_to_map;
				305
				306	fail_free_iova:
				307	__fast_smmu_free_iova(mapping, iova, size);
				308	fail:
				309	spin_unlock_irqrestore(&mapping->lock, flags);
				310	return DMA_ERROR_CODE;
				311	}
				312
				313	static void fast_smmu_unmap_page(struct device *dev, dma_addr_t iova,
				314	size_t size, enum dma_data_direction dir,
				315	unsigned long attrs)
				316	{
				317	struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
				318	unsigned long flags;
				319	av8l_fast_iopte *pmd = iopte_pmd_offset(mapping->pgtbl_pmds, iova);
				320	unsigned long offset = iova & ~FAST_PAGE_MASK;
				321	size_t len = ALIGN(size + offset, FAST_PAGE_SIZE);
				322	int nptes = len >> FAST_PAGE_SHIFT;
				323	struct page page = phys_to_page((pmd & FAST_PTE_ADDR_MASK));
				324	bool skip_sync = (attrs & DMA_ATTR_SKIP_CPU_SYNC);
				325
				326	if (!skip_sync)
				327	__fast_dma_page_dev_to_cpu(page, offset, size, dir);
				328
				329	spin_lock_irqsave(&mapping->lock, flags);
				330	av8l_fast_unmap_public(pmd, len);
				331	if (!skip_sync) /* TODO: should ask SMMU if coherent */
				332	dmac_clean_range(pmd, pmd + nptes);
				333	__fast_smmu_free_iova(mapping, iova, len);
				334	spin_unlock_irqrestore(&mapping->lock, flags);
				335	}
				336
				337	static int fast_smmu_map_sg(struct device dev, struct scatterlist sg,
				338	int nents, enum dma_data_direction dir,
				339	unsigned long attrs)
				340	{
				341	return -EINVAL;
				342	}
				343
				344	static void fast_smmu_unmap_sg(struct device *dev,
				345	struct scatterlist *sg, int nents,
				346	enum dma_data_direction dir,
				347	unsigned long attrs)
				348	{
				349	WARN_ON_ONCE(1);
				350	}
				351
				352	static void __fast_smmu_free_pages(struct page **pages, int count)
				353	{
				354	int i;
				355
				356	for (i = 0; i < count; i++)
				357	__free_page(pages[i]);
				358	kvfree(pages);
				359	}
				360
				361	static struct page **__fast_smmu_alloc_pages(unsigned int count, gfp_t gfp)
				362	{
				363	struct page **pages;
				364	unsigned int i = 0, array_size = count * sizeof(*pages);
				365
				366	if (array_size <= PAGE_SIZE)
				367	pages = kzalloc(array_size, GFP_KERNEL);
				368	else
				369	pages = vzalloc(array_size);
				370	if (!pages)
				371	return NULL;
				372
				373	/* IOMMU can map any pages, so himem can also be used here */
				374	gfp \|= __GFP_NOWARN \| __GFP_HIGHMEM;
				375
				376	for (i = 0; i < count; ++i) {
				377	struct page *page = alloc_page(gfp);
				378
				379	if (!page) {
				380	__fast_smmu_free_pages(pages, i);
				381	return NULL;
				382	}
				383	pages[i] = page;
				384	}
				385	return pages;
				386	}
				387
				388	static void fast_smmu_alloc(struct device dev, size_t size,
				389	dma_addr_t *handle, gfp_t gfp,
				390	unsigned long attrs)
				391	{
				392	struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
				393	struct sg_table sgt;
				394	dma_addr_t dma_addr, iova_iter;
				395	void *addr;
				396	av8l_fast_iopte *ptep;
				397	unsigned long flags;
				398	struct sg_mapping_iter miter;
				399	unsigned int count = ALIGN(size, SZ_4K) >> PAGE_SHIFT;
				400	int prot = IOMMU_READ \| IOMMU_WRITE; /* TODO: extract from attrs */
				401	pgprot_t remap_prot = pgprot_writecombine(PAGE_KERNEL);
				402	struct page **pages;
				403
				404	*handle = DMA_ERROR_CODE;
				405
				406	pages = __fast_smmu_alloc_pages(count, gfp);
				407	if (!pages) {
				408	dev_err(dev, "no pages\n");
				409	return NULL;
				410	}
				411
				412	size = ALIGN(size, SZ_4K);
				413	if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, gfp)) {
				414	dev_err(dev, "no sg tablen\n");
				415	goto out_free_pages;
				416	}
				417
				418	if (!(prot & IOMMU_CACHE)) {
				419	/*
				420	* The CPU-centric flushing implied by SG_MITER_TO_SG isn't
				421	* sufficient here, so skip it by using the "wrong" direction.
				422	*/
				423	sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
				424	SG_MITER_FROM_SG);
				425	while (sg_miter_next(&miter))
				426	__dma_flush_range(miter.addr,
				427	miter.addr + miter.length);
				428	sg_miter_stop(&miter);
				429	}
				430
				431	spin_lock_irqsave(&mapping->lock, flags);
				432	dma_addr = __fast_smmu_alloc_iova(mapping, size);
				433	if (dma_addr == DMA_ERROR_CODE) {
				434	dev_err(dev, "no iova\n");
				435	spin_unlock_irqrestore(&mapping->lock, flags);
				436	goto out_free_sg;
				437	}
				438	iova_iter = dma_addr;
				439	sg_miter_start(&miter, sgt.sgl, sgt.orig_nents,
				440	SG_MITER_FROM_SG \| SG_MITER_ATOMIC);
				441	while (sg_miter_next(&miter)) {
				442	int nptes = miter.length >> FAST_PAGE_SHIFT;
				443
				444	ptep = iopte_pmd_offset(mapping->pgtbl_pmds, iova_iter);
				445	if (unlikely(av8l_fast_map_public(
				446	ptep, page_to_phys(miter.page),
				447	miter.length, prot))) {
				448	dev_err(dev, "no map public\n");
				449	/* TODO: unwind previously successful mappings */
				450	goto out_free_iova;
				451	}
				452	dmac_clean_range(ptep, ptep + nptes);
				453	iova_iter += miter.length;
				454	}
				455	sg_miter_stop(&miter);
				456	spin_unlock_irqrestore(&mapping->lock, flags);
				457
				458	addr = dma_common_pages_remap(pages, size, VM_USERMAP, remap_prot,
				459	__builtin_return_address(0));
				460	if (!addr) {
				461	dev_err(dev, "no common pages\n");
				462	goto out_unmap;
				463	}
				464
				465	*handle = dma_addr;
				466	sg_free_table(&sgt);
				467	return addr;
				468
				469	out_unmap:
				470	/* need to take the lock again for page tables and iova */
				471	spin_lock_irqsave(&mapping->lock, flags);
				472	ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_addr);
				473	av8l_fast_unmap_public(ptep, size);
				474	dmac_clean_range(ptep, ptep + count);
				475	out_free_iova:
				476	__fast_smmu_free_iova(mapping, dma_addr, size);
				477	spin_unlock_irqrestore(&mapping->lock, flags);
				478	out_free_sg:
				479	sg_free_table(&sgt);
				480	out_free_pages:
				481	__fast_smmu_free_pages(pages, count);
				482	return NULL;
				483	}
				484
				485	static void fast_smmu_free(struct device *dev, size_t size,
				486	void *vaddr, dma_addr_t dma_handle,
				487	unsigned long attrs)
				488	{
				489	struct dma_fast_smmu_mapping *mapping = dev->archdata.mapping->fast;
				490	struct vm_struct *area;
				491	struct page **pages;
				492	size_t count = ALIGN(size, SZ_4K) >> FAST_PAGE_SHIFT;
				493	av8l_fast_iopte *ptep;
				494	unsigned long flags;
				495
				496	size = ALIGN(size, SZ_4K);
				497
				498	area = find_vm_area(vaddr);
				499	if (WARN_ON_ONCE(!area))
				500	return;
				501
				502	pages = area->pages;
				503	dma_common_free_remap(vaddr, size, VM_USERMAP, false);
				504	ptep = iopte_pmd_offset(mapping->pgtbl_pmds, dma_handle);
				505	spin_lock_irqsave(&mapping->lock, flags);
				506	av8l_fast_unmap_public(ptep, size);
				507	dmac_clean_range(ptep, ptep + count);
				508	__fast_smmu_free_iova(mapping, dma_handle, size);
				509	spin_unlock_irqrestore(&mapping->lock, flags);
				510	__fast_smmu_free_pages(pages, count);
				511	}
				512
				513	static int fast_smmu_dma_supported(struct device *dev, u64 mask)
				514	{
				515	return mask <= 0xffffffff;
				516	}
				517
				518	static int fast_smmu_mapping_error(struct device *dev,
				519	dma_addr_t dma_addr)
				520	{
				521	return dma_addr == DMA_ERROR_CODE;
				522	}
				523
				524	static const struct dma_map_ops fast_smmu_dma_ops = {
				525	.alloc = fast_smmu_alloc,
				526	.free = fast_smmu_free,
				527	.map_page = fast_smmu_map_page,
				528	.unmap_page = fast_smmu_unmap_page,
				529	.map_sg = fast_smmu_map_sg,
				530	.unmap_sg = fast_smmu_unmap_sg,
				531	.dma_supported = fast_smmu_dma_supported,
				532	.mapping_error = fast_smmu_mapping_error,
				533	};
				534
				535	/**
				536	* __fast_smmu_create_mapping_sized
				537	* @base: bottom of the VA range
				538	* @size: size of the VA range in bytes
				539	*
				540	* Creates a mapping structure which holds information about used/unused IO
				541	* address ranges, which is required to perform mapping with IOMMU aware
				542	* functions. The only VA range supported is [0, 4GB).
				543	*
				544	* The client device need to be attached to the mapping with
				545	* fast_smmu_attach_device function.
				546	*/
				547	static struct dma_fast_smmu_mapping *__fast_smmu_create_mapping_sized(
				548	dma_addr_t base, size_t size)
				549	{
				550	struct dma_fast_smmu_mapping *fast;
				551
				552	fast = kzalloc(sizeof(struct dma_fast_smmu_mapping), GFP_KERNEL);
				553	if (!fast)
				554	goto err;
				555
				556	fast->base = base;
				557	fast->size = size;
				558	fast->num_4k_pages = size >> FAST_PAGE_SHIFT;
				559	fast->bitmap_size = BITS_TO_LONGS(fast->num_4k_pages) * sizeof(long);
				560
				561	fast->bitmap = kzalloc(fast->bitmap_size, GFP_KERNEL);
				562	if (!fast->bitmap)
				563	goto err2;
				564
				565	spin_lock_init(&fast->lock);
				566
				567	return fast;
				568	err2:
				569	kfree(fast);
				570	err:
				571	return ERR_PTR(-ENOMEM);
				572	}
				573
				574
				575	#define PGTBL_MEM_SIZE (SZ_4K + (4 * SZ_4K) + (2048 * SZ_4K))
				576
				577
				578	/**
				579	* fast_smmu_attach_device
				580	* @dev: valid struct device pointer
				581	* @mapping: io address space mapping structure (returned from
				582	* fast_smmu_create_mapping)
				583	*
				584	* Attaches specified io address space mapping to the provided device,
				585	* this replaces the dma operations (dma_map_ops pointer) with the
				586	* IOMMU aware version. More than one client might be attached to
				587	* the same io address space mapping.
				588	*/
				589	int fast_smmu_attach_device(struct device *dev,
				590	struct dma_iommu_mapping *mapping)
				591	{
				592	int atomic_domain = 1;
				593	struct iommu_domain *domain = mapping->domain;
				594	struct iommu_pgtbl_info info;
				595	size_t size = mapping->bits << PAGE_SHIFT;
				596
				597	if (mapping->base + size > (SZ_1G * 4ULL))
				598	return -EINVAL;
				599
				600	if (iommu_domain_set_attr(domain, DOMAIN_ATTR_ATOMIC,
				601	&atomic_domain))
				602	return -EINVAL;
				603
				604	mapping->fast = __fast_smmu_create_mapping_sized(mapping->base, size);
				605	if (IS_ERR(mapping->fast))
				606	return -ENOMEM;
				607	mapping->fast->domain = domain;
				608	mapping->fast->dev = dev;
				609
				610	if (iommu_attach_device(domain, dev))
				611	return -EINVAL;
				612
				613	if (iommu_domain_get_attr(domain, DOMAIN_ATTR_PGTBL_INFO,
				614	&info)) {
				615	dev_err(dev, "Couldn't get page table info\n");
				616	fast_smmu_detach_device(dev, mapping);
				617	return -EINVAL;
				618	}
				619	mapping->fast->pgtbl_pmds = info.pmds;
				620
				621	dev->archdata.mapping = mapping;
				622	set_dma_ops(dev, &fast_smmu_dma_ops);
				623
				624	return 0;
				625	}
				626	EXPORT_SYMBOL(fast_smmu_attach_device);
				627
				628	/**
				629	* fast_smmu_detach_device
				630	* @dev: valid struct device pointer
				631	*
				632	* Detaches the provided device from a previously attached map.
				633	* This voids the dma operations (dma_map_ops pointer)
				634	*/
				635	void fast_smmu_detach_device(struct device *dev,
				636	struct dma_iommu_mapping *mapping)
				637	{
				638	iommu_detach_device(mapping->domain, dev);
				639	dev->archdata.mapping = NULL;
				640	set_dma_ops(dev, NULL);
				641
				642	kfree(mapping->fast->bitmap);
				643	kfree(mapping->fast);
				644	}
				645	EXPORT_SYMBOL(fast_smmu_detach_device);