Blame - drivers/gpu/drm/radeon/radeon_vm.c - kernel/msm-5.4

blob: 5160176897931cbdb1e9a0d5651a563e54df2529 [file] [log] [blame]

Christian König	2280ab5	2014-02-20 10:25:15 +0100	[diff] [blame]	1	/*
				2	* Copyright 2008 Advanced Micro Devices, Inc.
				3	* Copyright 2008 Red Hat Inc.
				4	* Copyright 2009 Jerome Glisse.
				5	*
				6	* Permission is hereby granted, free of charge, to any person obtaining a
				7	* copy of this software and associated documentation files (the "Software"),
				8	* to deal in the Software without restriction, including without limitation
				9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				10	* and/or sell copies of the Software, and to permit persons to whom the
				11	* Software is furnished to do so, subject to the following conditions:
				12	*
				13	* The above copyright notice and this permission notice shall be included in
				14	* all copies or substantial portions of the Software.
				15	*
				16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
				20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
				21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
				22	* OTHER DEALINGS IN THE SOFTWARE.
				23	*
				24	* Authors: Dave Airlie
				25	* Alex Deucher
				26	* Jerome Glisse
				27	*/
				28	#include <drm/drmP.h>
				29	#include <drm/radeon_drm.h>
				30	#include "radeon.h"
				31	#include "radeon_trace.h"
				32
				33	/*
				34	* GPUVM
				35	* GPUVM is similar to the legacy gart on older asics, however
				36	* rather than there being a single global gart table
				37	* for the entire GPU, there are multiple VM page tables active
				38	* at any given time. The VM page tables can contain a mix
				39	* vram pages and system memory pages and system memory pages
				40	* can be mapped as snooped (cached system pages) or unsnooped
				41	* (uncached system pages).
				42	* Each VM has an ID associated with it and there is a page table
				43	* associated with each VMID. When execting a command buffer,
				44	* the kernel tells the the ring what VMID to use for that command
				45	* buffer. VMIDs are allocated dynamically as commands are submitted.
				46	* The userspace drivers maintain their own address space and the kernel
				47	* sets up their pages tables accordingly when they submit their
				48	* command buffers and a VMID is assigned.
				49	* Cayman/Trinity support up to 8 active VMs at any given time;
				50	* SI supports 16.
				51	*/
				52
				53	/**
				54	* radeon_vm_num_pde - return the number of page directory entries
				55	*
				56	* @rdev: radeon_device pointer
				57	*
				58	* Calculate the number of page directory entries (cayman+).
				59	*/
				60	static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
				61	{
				62	return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
				63	}
				64
				65	/**
				66	* radeon_vm_directory_size - returns the size of the page directory in bytes
				67	*
				68	* @rdev: radeon_device pointer
				69	*
				70	* Calculate the size of the page directory in bytes (cayman+).
				71	*/
				72	static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
				73	{
				74	return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
				75	}
				76
				77	/**
				78	* radeon_vm_manager_init - init the vm manager
				79	*
				80	* @rdev: radeon_device pointer
				81	*
				82	* Init the vm manager (cayman+).
				83	* Returns 0 for success, error for failure.
				84	*/
				85	int radeon_vm_manager_init(struct radeon_device *rdev)
				86	{
				87	struct radeon_vm *vm;
				88	struct radeon_bo_va *bo_va;
				89	int r;
				90	unsigned size;
				91
				92	if (!rdev->vm_manager.enabled) {
				93	/* allocate enough for 2 full VM pts */
				94	size = radeon_vm_directory_size(rdev);
				95	size += rdev->vm_manager.max_pfn * 8;
				96	size *= 2;
				97	r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
				98	RADEON_GPU_PAGE_ALIGN(size),
				99	RADEON_VM_PTB_ALIGN_SIZE,
				100	RADEON_GEM_DOMAIN_VRAM);
				101	if (r) {
				102	dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
				103	(rdev->vm_manager.max_pfn * 8) >> 10);
				104	return r;
				105	}
				106
				107	r = radeon_asic_vm_init(rdev);
				108	if (r)
				109	return r;
				110
				111	rdev->vm_manager.enabled = true;
				112
				113	r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
				114	if (r)
				115	return r;
				116	}
				117
				118	/* restore page table */
				119	list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
				120	if (vm->page_directory == NULL)
				121	continue;
				122
				123	list_for_each_entry(bo_va, &vm->va, vm_list) {
				124	bo_va->valid = false;
				125	}
				126	}
				127	return 0;
				128	}
				129
				130	/**
				131	* radeon_vm_free_pt - free the page table for a specific vm
				132	*
				133	* @rdev: radeon_device pointer
				134	* @vm: vm to unbind
				135	*
				136	* Free the page table of a specific vm (cayman+).
				137	*
				138	* Global and local mutex must be lock!
				139	*/
				140	static void radeon_vm_free_pt(struct radeon_device *rdev,
				141	struct radeon_vm *vm)
				142	{
				143	struct radeon_bo_va *bo_va;
				144	int i;
				145
				146	if (!vm->page_directory)
				147	return;
				148
				149	list_del_init(&vm->list);
				150	radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
				151
				152	list_for_each_entry(bo_va, &vm->va, vm_list) {
				153	bo_va->valid = false;
				154	}
				155
				156	if (vm->page_tables == NULL)
				157	return;
				158
				159	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
				160	radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
				161
				162	kfree(vm->page_tables);
				163	}
				164
				165	/**
				166	* radeon_vm_manager_fini - tear down the vm manager
				167	*
				168	* @rdev: radeon_device pointer
				169	*
				170	* Tear down the VM manager (cayman+).
				171	*/
				172	void radeon_vm_manager_fini(struct radeon_device *rdev)
				173	{
				174	struct radeon_vm vm, tmp;
				175	int i;
				176
				177	if (!rdev->vm_manager.enabled)
				178	return;
				179
				180	mutex_lock(&rdev->vm_manager.lock);
				181	/* free all allocated page tables */
				182	list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
				183	mutex_lock(&vm->mutex);
				184	radeon_vm_free_pt(rdev, vm);
				185	mutex_unlock(&vm->mutex);
				186	}
				187	for (i = 0; i < RADEON_NUM_VM; ++i) {
				188	radeon_fence_unref(&rdev->vm_manager.active[i]);
				189	}
				190	radeon_asic_vm_fini(rdev);
				191	mutex_unlock(&rdev->vm_manager.lock);
				192
				193	radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
				194	radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
				195	rdev->vm_manager.enabled = false;
				196	}
				197
				198	/**
				199	* radeon_vm_evict - evict page table to make room for new one
				200	*
				201	* @rdev: radeon_device pointer
				202	* @vm: VM we want to allocate something for
				203	*
				204	* Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
				205	* Returns 0 for success, -ENOMEM for failure.
				206	*
				207	* Global and local mutex must be locked!
				208	*/
				209	static int radeon_vm_evict(struct radeon_device rdev, struct radeon_vm vm)
				210	{
				211	struct radeon_vm *vm_evict;
				212
				213	if (list_empty(&rdev->vm_manager.lru_vm))
				214	return -ENOMEM;
				215
				216	vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
				217	struct radeon_vm, list);
				218	if (vm_evict == vm)
				219	return -ENOMEM;
				220
				221	mutex_lock(&vm_evict->mutex);
				222	radeon_vm_free_pt(rdev, vm_evict);
				223	mutex_unlock(&vm_evict->mutex);
				224	return 0;
				225	}
				226
				227	/**
				228	* radeon_vm_alloc_pt - allocates a page table for a VM
				229	*
				230	* @rdev: radeon_device pointer
				231	* @vm: vm to bind
				232	*
				233	* Allocate a page table for the requested vm (cayman+).
				234	* Returns 0 for success, error for failure.
				235	*
				236	* Global and local mutex must be locked!
				237	*/
				238	int radeon_vm_alloc_pt(struct radeon_device rdev, struct radeon_vm vm)
				239	{
				240	unsigned pd_size, pd_entries, pts_size;
				241	struct radeon_ib ib;
				242	int r;
				243
				244	if (vm == NULL) {
				245	return -EINVAL;
				246	}
				247
				248	if (vm->page_directory != NULL) {
				249	return 0;
				250	}
				251
				252	pd_size = radeon_vm_directory_size(rdev);
				253	pd_entries = radeon_vm_num_pdes(rdev);
				254
				255	retry:
				256	r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
				257	&vm->page_directory, pd_size,
				258	RADEON_VM_PTB_ALIGN_SIZE, false);
				259	if (r == -ENOMEM) {
				260	r = radeon_vm_evict(rdev, vm);
				261	if (r)
				262	return r;
				263	goto retry;
				264
				265	} else if (r) {
				266	return r;
				267	}
				268
				269	vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
				270
				271	/* Initially clear the page directory */
				272	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
				273	NULL, pd_entries * 2 + 64);
				274	if (r) {
				275	radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
				276	return r;
				277	}
				278
				279	ib.length_dw = 0;
				280
				281	radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
				282	0, pd_entries, 0, 0);
				283
				284	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
				285	r = radeon_ib_schedule(rdev, &ib, NULL);
				286	if (r) {
				287	radeon_ib_free(rdev, &ib);
				288	radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
				289	return r;
				290	}
				291	radeon_fence_unref(&vm->fence);
				292	vm->fence = radeon_fence_ref(ib.fence);
				293	radeon_ib_free(rdev, &ib);
				294	radeon_fence_unref(&vm->last_flush);
				295
				296	/* allocate page table array */
				297	pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
				298	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
				299
				300	if (vm->page_tables == NULL) {
				301	DRM_ERROR("Cannot allocate memory for page table array\n");
				302	radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
				303	return -ENOMEM;
				304	}
				305
				306	return 0;
				307	}
				308
				309	/**
				310	* radeon_vm_add_to_lru - add VMs page table to LRU list
				311	*
				312	* @rdev: radeon_device pointer
				313	* @vm: vm to add to LRU
				314	*
				315	* Add the allocated page table to the LRU list (cayman+).
				316	*
				317	* Global mutex must be locked!
				318	*/
				319	void radeon_vm_add_to_lru(struct radeon_device rdev, struct radeon_vm vm)
				320	{
				321	list_del_init(&vm->list);
				322	list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
				323	}
				324
				325	/**
				326	* radeon_vm_grab_id - allocate the next free VMID
				327	*
				328	* @rdev: radeon_device pointer
				329	* @vm: vm to allocate id for
				330	* @ring: ring we want to submit job to
				331	*
				332	* Allocate an id for the vm (cayman+).
				333	* Returns the fence we need to sync to (if any).
				334	*
				335	* Global and local mutex must be locked!
				336	*/
				337	struct radeon_fence radeon_vm_grab_id(struct radeon_device rdev,
				338	struct radeon_vm *vm, int ring)
				339	{
				340	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
				341	unsigned choices[2] = {};
				342	unsigned i;
				343
				344	/* check if the id is still valid */
				345	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
				346	return NULL;
				347
				348	/* we definately need to flush */
				349	radeon_fence_unref(&vm->last_flush);
				350
				351	/* skip over VMID 0, since it is the system VM */
				352	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
				353	struct radeon_fence *fence = rdev->vm_manager.active[i];
				354
				355	if (fence == NULL) {
				356	/* found a free one */
				357	vm->id = i;
				358	trace_radeon_vm_grab_id(vm->id, ring);
				359	return NULL;
				360	}
				361
				362	if (radeon_fence_is_earlier(fence, best[fence->ring])) {
				363	best[fence->ring] = fence;
				364	choices[fence->ring == ring ? 0 : 1] = i;
				365	}
				366	}
				367
				368	for (i = 0; i < 2; ++i) {
				369	if (choices[i]) {
				370	vm->id = choices[i];
				371	trace_radeon_vm_grab_id(vm->id, ring);
				372	return rdev->vm_manager.active[choices[i]];
				373	}
				374	}
				375
				376	/* should never happen */
				377	BUG();
				378	return NULL;
				379	}
				380
				381	/**
Christian König	fa68834	2014-02-20 10:47:05 +0100	[diff] [blame^]	382	* radeon_vm_flush - hardware flush the vm
				383	*
				384	* @rdev: radeon_device pointer
				385	* @vm: vm we want to flush
				386	* @ring: ring to use for flush
				387	*
				388	* Flush the vm (cayman+).
				389	*
				390	* Global and local mutex must be locked!
				391	*/
				392	void radeon_vm_flush(struct radeon_device *rdev,
				393	struct radeon_vm *vm,
				394	int ring)
				395	{
				396	/* if we can't remember our last VM flush then flush now! */
				397	/* XXX figure out why we have to flush all the time */
				398	if (!vm->last_flush \|\| true)
				399	radeon_ring_vm_flush(rdev, ring, vm);
				400	}
				401
				402	/**
Christian König	2280ab5	2014-02-20 10:25:15 +0100	[diff] [blame]	403	* radeon_vm_fence - remember fence for vm
				404	*
				405	* @rdev: radeon_device pointer
				406	* @vm: vm we want to fence
				407	* @fence: fence to remember
				408	*
				409	* Fence the vm (cayman+).
				410	* Set the fence used to protect page table and id.
				411	*
				412	* Global and local mutex must be locked!
				413	*/
				414	void radeon_vm_fence(struct radeon_device *rdev,
				415	struct radeon_vm *vm,
				416	struct radeon_fence *fence)
				417	{
Christian König	2280ab5	2014-02-20 10:25:15 +0100	[diff] [blame]	418	radeon_fence_unref(&vm->fence);
				419	vm->fence = radeon_fence_ref(fence);
				420
Christian König	fa68834	2014-02-20 10:47:05 +0100	[diff] [blame^]	421	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
				422	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
				423
Christian König	2280ab5	2014-02-20 10:25:15 +0100	[diff] [blame]	424	radeon_fence_unref(&vm->last_id_use);
				425	vm->last_id_use = radeon_fence_ref(fence);
Christian König	fa68834	2014-02-20 10:47:05 +0100	[diff] [blame^]	426
				427	/* we just flushed the VM, remember that */
				428	if (!vm->last_flush)
				429	vm->last_flush = radeon_fence_ref(fence);
Christian König	2280ab5	2014-02-20 10:25:15 +0100	[diff] [blame]	430	}
				431
				432	/**
				433	* radeon_vm_bo_find - find the bo_va for a specific vm & bo
				434	*
				435	* @vm: requested vm
				436	* @bo: requested buffer object
				437	*
				438	* Find @bo inside the requested vm (cayman+).
				439	* Search inside the @bos vm list for the requested vm
				440	* Returns the found bo_va or NULL if none is found
				441	*
				442	* Object has to be reserved!
				443	*/
				444	struct radeon_bo_va radeon_vm_bo_find(struct radeon_vm vm,
				445	struct radeon_bo *bo)
				446	{
				447	struct radeon_bo_va *bo_va;
				448
				449	list_for_each_entry(bo_va, &bo->va, bo_list) {
				450	if (bo_va->vm == vm) {
				451	return bo_va;
				452	}
				453	}
				454	return NULL;
				455	}
				456
				457	/**
				458	* radeon_vm_bo_add - add a bo to a specific vm
				459	*
				460	* @rdev: radeon_device pointer
				461	* @vm: requested vm
				462	* @bo: radeon buffer object
				463	*
				464	* Add @bo into the requested vm (cayman+).
				465	* Add @bo to the list of bos associated with the vm
				466	* Returns newly added bo_va or NULL for failure
				467	*
				468	* Object has to be reserved!
				469	*/
				470	struct radeon_bo_va radeon_vm_bo_add(struct radeon_device rdev,
				471	struct radeon_vm *vm,
				472	struct radeon_bo *bo)
				473	{
				474	struct radeon_bo_va *bo_va;
				475
				476	bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
				477	if (bo_va == NULL) {
				478	return NULL;
				479	}
				480	bo_va->vm = vm;
				481	bo_va->bo = bo;
				482	bo_va->soffset = 0;
				483	bo_va->eoffset = 0;
				484	bo_va->flags = 0;
				485	bo_va->valid = false;
				486	bo_va->ref_count = 1;
				487	INIT_LIST_HEAD(&bo_va->bo_list);
				488	INIT_LIST_HEAD(&bo_va->vm_list);
				489
				490	mutex_lock(&vm->mutex);
				491	list_add(&bo_va->vm_list, &vm->va);
				492	list_add_tail(&bo_va->bo_list, &bo->va);
				493	mutex_unlock(&vm->mutex);
				494
				495	return bo_va;
				496	}
				497
				498	/**
				499	* radeon_vm_bo_set_addr - set bos virtual address inside a vm
				500	*
				501	* @rdev: radeon_device pointer
				502	* @bo_va: bo_va to store the address
				503	* @soffset: requested offset of the buffer in the VM address space
				504	* @flags: attributes of pages (read/write/valid/etc.)
				505	*
				506	* Set offset of @bo_va (cayman+).
				507	* Validate and set the offset requested within the vm address space.
				508	* Returns 0 for success, error for failure.
				509	*
				510	* Object has to be reserved!
				511	*/
				512	int radeon_vm_bo_set_addr(struct radeon_device *rdev,
				513	struct radeon_bo_va *bo_va,
				514	uint64_t soffset,
				515	uint32_t flags)
				516	{
				517	uint64_t size = radeon_bo_size(bo_va->bo);
				518	uint64_t eoffset, last_offset = 0;
				519	struct radeon_vm *vm = bo_va->vm;
				520	struct radeon_bo_va *tmp;
				521	struct list_head *head;
				522	unsigned last_pfn;
				523
				524	if (soffset) {
				525	/* make sure object fit at this offset */
				526	eoffset = soffset + size;
				527	if (soffset >= eoffset) {
				528	return -EINVAL;
				529	}
				530
				531	last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
				532	if (last_pfn > rdev->vm_manager.max_pfn) {
				533	dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
				534	last_pfn, rdev->vm_manager.max_pfn);
				535	return -EINVAL;
				536	}
				537
				538	} else {
				539	eoffset = last_pfn = 0;
				540	}
				541
				542	mutex_lock(&vm->mutex);
				543	head = &vm->va;
				544	last_offset = 0;
				545	list_for_each_entry(tmp, &vm->va, vm_list) {
				546	if (bo_va == tmp) {
				547	/* skip over currently modified bo */
				548	continue;
				549	}
				550
				551	if (soffset >= last_offset && eoffset <= tmp->soffset) {
				552	/* bo can be added before this one */
				553	break;
				554	}
				555	if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
				556	/* bo and tmp overlap, invalid offset */
				557	dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
				558	bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
				559	(unsigned)tmp->soffset, (unsigned)tmp->eoffset);
				560	mutex_unlock(&vm->mutex);
				561	return -EINVAL;
				562	}
				563	last_offset = tmp->eoffset;
				564	head = &tmp->vm_list;
				565	}
				566
				567	bo_va->soffset = soffset;
				568	bo_va->eoffset = eoffset;
				569	bo_va->flags = flags;
				570	bo_va->valid = false;
				571	list_move(&bo_va->vm_list, head);
				572
				573	mutex_unlock(&vm->mutex);
				574	return 0;
				575	}
				576
				577	/**
				578	* radeon_vm_map_gart - get the physical address of a gart page
				579	*
				580	* @rdev: radeon_device pointer
				581	* @addr: the unmapped addr
				582	*
				583	* Look up the physical address of the page that the pte resolves
				584	* to (cayman+).
				585	* Returns the physical address of the page.
				586	*/
				587	uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
				588	{
				589	uint64_t result;
				590
				591	/* page table offset */
				592	result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
				593
				594	/* in case cpu page size != gpu page size*/
				595	result \|= addr & (~PAGE_MASK);
				596
				597	return result;
				598	}
				599
				600	/**
				601	* radeon_vm_page_flags - translate page flags to what the hw uses
				602	*
				603	* @flags: flags comming from userspace
				604	*
				605	* Translate the flags the userspace ABI uses to hw flags.
				606	*/
				607	static uint32_t radeon_vm_page_flags(uint32_t flags)
				608	{
				609	uint32_t hw_flags = 0;
				610	hw_flags \|= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
				611	hw_flags \|= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
				612	hw_flags \|= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
				613	if (flags & RADEON_VM_PAGE_SYSTEM) {
				614	hw_flags \|= R600_PTE_SYSTEM;
				615	hw_flags \|= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
				616	}
				617	return hw_flags;
				618	}
				619
				620	/**
				621	* radeon_vm_update_pdes - make sure that page directory is valid
				622	*
				623	* @rdev: radeon_device pointer
				624	* @vm: requested vm
				625	* @start: start of GPU address range
				626	* @end: end of GPU address range
				627	*
				628	* Allocates new page tables if necessary
				629	* and updates the page directory (cayman+).
				630	* Returns 0 for success, error for failure.
				631	*
				632	* Global and local mutex must be locked!
				633	*/
				634	static int radeon_vm_update_pdes(struct radeon_device *rdev,
				635	struct radeon_vm *vm,
				636	struct radeon_ib *ib,
				637	uint64_t start, uint64_t end)
				638	{
				639	static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
				640
				641	uint64_t last_pde = ~0, last_pt = ~0;
				642	unsigned count = 0;
				643	uint64_t pt_idx;
				644	int r;
				645
				646	start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
				647	end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
				648
				649	/* walk over the address space and update the page directory */
				650	for (pt_idx = start; pt_idx <= end; ++pt_idx) {
				651	uint64_t pde, pt;
				652
				653	if (vm->page_tables[pt_idx])
				654	continue;
				655
				656	retry:
				657	r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
				658	&vm->page_tables[pt_idx],
				659	RADEON_VM_PTE_COUNT * 8,
				660	RADEON_GPU_PAGE_SIZE, false);
				661
				662	if (r == -ENOMEM) {
				663	r = radeon_vm_evict(rdev, vm);
				664	if (r)
				665	return r;
				666	goto retry;
				667	} else if (r) {
				668	return r;
				669	}
				670
				671	pde = vm->pd_gpu_addr + pt_idx * 8;
				672
				673	pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
				674
				675	if (((last_pde + 8 * count) != pde) \|\|
				676	((last_pt + incr * count) != pt)) {
				677
				678	if (count) {
				679	radeon_asic_vm_set_page(rdev, ib, last_pde,
				680	last_pt, count, incr,
				681	R600_PTE_VALID);
				682
				683	count *= RADEON_VM_PTE_COUNT;
				684	radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
				685	count, 0, 0);
				686	}
				687
				688	count = 1;
				689	last_pde = pde;
				690	last_pt = pt;
				691	} else {
				692	++count;
				693	}
				694	}
				695
				696	if (count) {
				697	radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
				698	incr, R600_PTE_VALID);
				699
				700	count *= RADEON_VM_PTE_COUNT;
				701	radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
				702	count, 0, 0);
				703	}
				704
				705	return 0;
				706	}
				707
				708	/**
				709	* radeon_vm_update_ptes - make sure that page tables are valid
				710	*
				711	* @rdev: radeon_device pointer
				712	* @vm: requested vm
				713	* @start: start of GPU address range
				714	* @end: end of GPU address range
				715	* @dst: destination address to map to
				716	* @flags: mapping flags
				717	*
				718	* Update the page tables in the range @start - @end (cayman+).
				719	*
				720	* Global and local mutex must be locked!
				721	*/
				722	static void radeon_vm_update_ptes(struct radeon_device *rdev,
				723	struct radeon_vm *vm,
				724	struct radeon_ib *ib,
				725	uint64_t start, uint64_t end,
				726	uint64_t dst, uint32_t flags)
				727	{
				728	static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
				729
				730	uint64_t last_pte = ~0, last_dst = ~0;
				731	unsigned count = 0;
				732	uint64_t addr;
				733
				734	start = start / RADEON_GPU_PAGE_SIZE;
				735	end = end / RADEON_GPU_PAGE_SIZE;
				736
				737	/* walk over the address space and update the page tables */
				738	for (addr = start; addr < end; ) {
				739	uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
				740	unsigned nptes;
				741	uint64_t pte;
				742
				743	if ((addr & ~mask) == (end & ~mask))
				744	nptes = end - addr;
				745	else
				746	nptes = RADEON_VM_PTE_COUNT - (addr & mask);
				747
				748	pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
				749	pte += (addr & mask) * 8;
				750
				751	if ((last_pte + 8 * count) != pte) {
				752
				753	if (count) {
				754	radeon_asic_vm_set_page(rdev, ib, last_pte,
				755	last_dst, count,
				756	RADEON_GPU_PAGE_SIZE,
				757	flags);
				758	}
				759
				760	count = nptes;
				761	last_pte = pte;
				762	last_dst = dst;
				763	} else {
				764	count += nptes;
				765	}
				766
				767	addr += nptes;
				768	dst += nptes * RADEON_GPU_PAGE_SIZE;
				769	}
				770
				771	if (count) {
				772	radeon_asic_vm_set_page(rdev, ib, last_pte,
				773	last_dst, count,
				774	RADEON_GPU_PAGE_SIZE, flags);
				775	}
				776	}
				777
				778	/**
				779	* radeon_vm_bo_update - map a bo into the vm page table
				780	*
				781	* @rdev: radeon_device pointer
				782	* @vm: requested vm
				783	* @bo: radeon buffer object
				784	* @mem: ttm mem
				785	*
				786	* Fill in the page table entries for @bo (cayman+).
				787	* Returns 0 for success, -EINVAL for failure.
				788	*
				789	* Object have to be reserved & global and local mutex must be locked!
				790	*/
				791	int radeon_vm_bo_update(struct radeon_device *rdev,
				792	struct radeon_vm *vm,
				793	struct radeon_bo *bo,
				794	struct ttm_mem_reg *mem)
				795	{
				796	struct radeon_ib ib;
				797	struct radeon_bo_va *bo_va;
				798	unsigned nptes, npdes, ndw;
				799	uint64_t addr;
				800	int r;
				801
				802	/* nothing to do if vm isn't bound */
				803	if (vm->page_directory == NULL)
				804	return 0;
				805
				806	bo_va = radeon_vm_bo_find(vm, bo);
				807	if (bo_va == NULL) {
				808	dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
				809	return -EINVAL;
				810	}
				811
				812	if (!bo_va->soffset) {
				813	dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
				814	bo, vm);
				815	return -EINVAL;
				816	}
				817
				818	if ((bo_va->valid && mem) \|\| (!bo_va->valid && mem == NULL))
				819	return 0;
				820
				821	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
				822	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
				823	if (mem) {
				824	addr = mem->start << PAGE_SHIFT;
				825	if (mem->mem_type != TTM_PL_SYSTEM) {
				826	bo_va->flags \|= RADEON_VM_PAGE_VALID;
				827	bo_va->valid = true;
				828	}
				829	if (mem->mem_type == TTM_PL_TT) {
				830	bo_va->flags \|= RADEON_VM_PAGE_SYSTEM;
				831	} else {
				832	addr += rdev->vm_manager.vram_base_offset;
				833	}
				834	} else {
				835	addr = 0;
				836	bo_va->valid = false;
				837	}
				838
				839	trace_radeon_vm_bo_update(bo_va);
				840
				841	nptes = radeon_bo_ngpu_pages(bo);
				842
				843	/* assume two extra pdes in case the mapping overlaps the borders */
				844	npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
				845
				846	/* padding, etc. */
				847	ndw = 64;
				848
				849	if (RADEON_VM_BLOCK_SIZE > 11)
				850	/* reserve space for one header for every 2k dwords */
				851	ndw += (nptes >> 11) * 4;
				852	else
				853	/* reserve space for one header for
				854	every (1 << BLOCK_SIZE) entries */
				855	ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
				856
				857	/* reserve space for pte addresses */
				858	ndw += nptes * 2;
				859
				860	/* reserve space for one header for every 2k dwords */
				861	ndw += (npdes >> 11) * 4;
				862
				863	/* reserve space for pde addresses */
				864	ndw += npdes * 2;
				865
				866	/* reserve space for clearing new page tables */
				867	ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
				868
				869	/* update too big for an IB */
				870	if (ndw > 0xfffff)
				871	return -ENOMEM;
				872
				873	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
				874	if (r)
				875	return r;
				876	ib.length_dw = 0;
				877
				878	r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
				879	if (r) {
				880	radeon_ib_free(rdev, &ib);
				881	return r;
				882	}
				883
				884	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
				885	addr, radeon_vm_page_flags(bo_va->flags));
				886
				887	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
				888	r = radeon_ib_schedule(rdev, &ib, NULL);
				889	if (r) {
				890	radeon_ib_free(rdev, &ib);
				891	return r;
				892	}
				893	radeon_fence_unref(&vm->fence);
				894	vm->fence = radeon_fence_ref(ib.fence);
				895	radeon_ib_free(rdev, &ib);
				896	radeon_fence_unref(&vm->last_flush);
				897
				898	return 0;
				899	}
				900
				901	/**
				902	* radeon_vm_bo_rmv - remove a bo to a specific vm
				903	*
				904	* @rdev: radeon_device pointer
				905	* @bo_va: requested bo_va
				906	*
				907	* Remove @bo_va->bo from the requested vm (cayman+).
				908	* Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
				909	* remove the ptes for @bo_va in the page table.
				910	* Returns 0 for success.
				911	*
				912	* Object have to be reserved!
				913	*/
				914	int radeon_vm_bo_rmv(struct radeon_device *rdev,
				915	struct radeon_bo_va *bo_va)
				916	{
				917	int r = 0;
				918
				919	mutex_lock(&rdev->vm_manager.lock);
				920	mutex_lock(&bo_va->vm->mutex);
				921	if (bo_va->soffset) {
				922	r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
				923	}
				924	mutex_unlock(&rdev->vm_manager.lock);
				925	list_del(&bo_va->vm_list);
				926	mutex_unlock(&bo_va->vm->mutex);
				927	list_del(&bo_va->bo_list);
				928
				929	kfree(bo_va);
				930	return r;
				931	}
				932
				933	/**
				934	* radeon_vm_bo_invalidate - mark the bo as invalid
				935	*
				936	* @rdev: radeon_device pointer
				937	* @vm: requested vm
				938	* @bo: radeon buffer object
				939	*
				940	* Mark @bo as invalid (cayman+).
				941	*/
				942	void radeon_vm_bo_invalidate(struct radeon_device *rdev,
				943	struct radeon_bo *bo)
				944	{
				945	struct radeon_bo_va *bo_va;
				946
				947	list_for_each_entry(bo_va, &bo->va, bo_list) {
				948	bo_va->valid = false;
				949	}
				950	}
				951
				952	/**
				953	* radeon_vm_init - initialize a vm instance
				954	*
				955	* @rdev: radeon_device pointer
				956	* @vm: requested vm
				957	*
				958	* Init @vm fields (cayman+).
				959	*/
				960	void radeon_vm_init(struct radeon_device rdev, struct radeon_vm vm)
				961	{
				962	vm->id = 0;
				963	vm->fence = NULL;
				964	vm->last_flush = NULL;
				965	vm->last_id_use = NULL;
				966	mutex_init(&vm->mutex);
				967	INIT_LIST_HEAD(&vm->list);
				968	INIT_LIST_HEAD(&vm->va);
				969	}
				970
				971	/**
				972	* radeon_vm_fini - tear down a vm instance
				973	*
				974	* @rdev: radeon_device pointer
				975	* @vm: requested vm
				976	*
				977	* Tear down @vm (cayman+).
				978	* Unbind the VM and remove all bos from the vm bo list
				979	*/
				980	void radeon_vm_fini(struct radeon_device rdev, struct radeon_vm vm)
				981	{
				982	struct radeon_bo_va bo_va, tmp;
				983	int r;
				984
				985	mutex_lock(&rdev->vm_manager.lock);
				986	mutex_lock(&vm->mutex);
				987	radeon_vm_free_pt(rdev, vm);
				988	mutex_unlock(&rdev->vm_manager.lock);
				989
				990	if (!list_empty(&vm->va)) {
				991	dev_err(rdev->dev, "still active bo inside vm\n");
				992	}
				993	list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
				994	list_del_init(&bo_va->vm_list);
				995	r = radeon_bo_reserve(bo_va->bo, false);
				996	if (!r) {
				997	list_del_init(&bo_va->bo_list);
				998	radeon_bo_unreserve(bo_va->bo);
				999	kfree(bo_va);
				1000	}
				1001	}
				1002	radeon_fence_unref(&vm->fence);
				1003	radeon_fence_unref(&vm->last_flush);
				1004	radeon_fence_unref(&vm->last_id_use);
				1005	mutex_unlock(&vm->mutex);
				1006	}