Blame - drivers/gpu/drm/vc4/vc4_gem.c - kernel/msm-4.19

blob: 936dddfa890f29e2d866e520ab969b357ec82b1e [file] [log] [blame]

Eric Anholt	d5b1a78	2015-11-30 12:13:37 -0800	[diff] [blame^]	1	/*
				2	* Copyright © 2014 Broadcom
				3	*
				4	* Permission is hereby granted, free of charge, to any person obtaining a
				5	* copy of this software and associated documentation files (the "Software"),
				6	* to deal in the Software without restriction, including without limitation
				7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
				8	* and/or sell copies of the Software, and to permit persons to whom the
				9	* Software is furnished to do so, subject to the following conditions:
				10	*
				11	* The above copyright notice and this permission notice (including the next
				12	* paragraph) shall be included in all copies or substantial portions of the
				13	* Software.
				14	*
				15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
				16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
				17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
				18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
				19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
				20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
				21	* IN THE SOFTWARE.
				22	*/
				23
				24	#include <linux/module.h>
				25	#include <linux/platform_device.h>
				26	#include <linux/device.h>
				27	#include <linux/io.h>
				28
				29	#include "uapi/drm/vc4_drm.h"
				30	#include "vc4_drv.h"
				31	#include "vc4_regs.h"
				32	#include "vc4_trace.h"
				33
				34	static void
				35	vc4_queue_hangcheck(struct drm_device *dev)
				36	{
				37	struct vc4_dev *vc4 = to_vc4_dev(dev);
				38
				39	mod_timer(&vc4->hangcheck.timer,
				40	round_jiffies_up(jiffies + msecs_to_jiffies(100)));
				41	}
				42
				43	static void
				44	vc4_reset(struct drm_device *dev)
				45	{
				46	struct vc4_dev *vc4 = to_vc4_dev(dev);
				47
				48	DRM_INFO("Resetting GPU.\n");
				49	vc4_v3d_set_power(vc4, false);
				50	vc4_v3d_set_power(vc4, true);
				51
				52	vc4_irq_reset(dev);
				53
				54	/* Rearm the hangcheck -- another job might have been waiting
				55	* for our hung one to get kicked off, and vc4_irq_reset()
				56	* would have started it.
				57	*/
				58	vc4_queue_hangcheck(dev);
				59	}
				60
				61	static void
				62	vc4_reset_work(struct work_struct *work)
				63	{
				64	struct vc4_dev *vc4 =
				65	container_of(work, struct vc4_dev, hangcheck.reset_work);
				66
				67	vc4_reset(vc4->dev);
				68	}
				69
				70	static void
				71	vc4_hangcheck_elapsed(unsigned long data)
				72	{
				73	struct drm_device dev = (struct drm_device )data;
				74	struct vc4_dev *vc4 = to_vc4_dev(dev);
				75	uint32_t ct0ca, ct1ca;
				76
				77	/* If idle, we can stop watching for hangs. */
				78	if (list_empty(&vc4->job_list))
				79	return;
				80
				81	ct0ca = V3D_READ(V3D_CTNCA(0));
				82	ct1ca = V3D_READ(V3D_CTNCA(1));
				83
				84	/* If we've made any progress in execution, rearm the timer
				85	* and wait.
				86	*/
				87	if (ct0ca != vc4->hangcheck.last_ct0ca \|\|
				88	ct1ca != vc4->hangcheck.last_ct1ca) {
				89	vc4->hangcheck.last_ct0ca = ct0ca;
				90	vc4->hangcheck.last_ct1ca = ct1ca;
				91	vc4_queue_hangcheck(dev);
				92	return;
				93	}
				94
				95	/* We've gone too long with no progress, reset. This has to
				96	* be done from a work struct, since resetting can sleep and
				97	* this timer hook isn't allowed to.
				98	*/
				99	schedule_work(&vc4->hangcheck.reset_work);
				100	}
				101
				102	static void
				103	submit_cl(struct drm_device *dev, uint32_t thread, uint32_t start, uint32_t end)
				104	{
				105	struct vc4_dev *vc4 = to_vc4_dev(dev);
				106
				107	/* Set the current and end address of the control list.
				108	* Writing the end register is what starts the job.
				109	*/
				110	V3D_WRITE(V3D_CTNCA(thread), start);
				111	V3D_WRITE(V3D_CTNEA(thread), end);
				112	}
				113
				114	int
				115	vc4_wait_for_seqno(struct drm_device *dev, uint64_t seqno, uint64_t timeout_ns,
				116	bool interruptible)
				117	{
				118	struct vc4_dev *vc4 = to_vc4_dev(dev);
				119	int ret = 0;
				120	unsigned long timeout_expire;
				121	DEFINE_WAIT(wait);
				122
				123	if (vc4->finished_seqno >= seqno)
				124	return 0;
				125
				126	if (timeout_ns == 0)
				127	return -ETIME;
				128
				129	timeout_expire = jiffies + nsecs_to_jiffies(timeout_ns);
				130
				131	trace_vc4_wait_for_seqno_begin(dev, seqno, timeout_ns);
				132	for (;;) {
				133	prepare_to_wait(&vc4->job_wait_queue, &wait,
				134	interruptible ? TASK_INTERRUPTIBLE :
				135	TASK_UNINTERRUPTIBLE);
				136
				137	if (interruptible && signal_pending(current)) {
				138	ret = -ERESTARTSYS;
				139	break;
				140	}
				141
				142	if (vc4->finished_seqno >= seqno)
				143	break;
				144
				145	if (timeout_ns != ~0ull) {
				146	if (time_after_eq(jiffies, timeout_expire)) {
				147	ret = -ETIME;
				148	break;
				149	}
				150	schedule_timeout(timeout_expire - jiffies);
				151	} else {
				152	schedule();
				153	}
				154	}
				155
				156	finish_wait(&vc4->job_wait_queue, &wait);
				157	trace_vc4_wait_for_seqno_end(dev, seqno);
				158
				159	if (ret && ret != -ERESTARTSYS) {
				160	DRM_ERROR("timeout waiting for render thread idle\n");
				161	return ret;
				162	}
				163
				164	return 0;
				165	}
				166
				167	static void
				168	vc4_flush_caches(struct drm_device *dev)
				169	{
				170	struct vc4_dev *vc4 = to_vc4_dev(dev);
				171
				172	/* Flush the GPU L2 caches. These caches sit on top of system
				173	* L3 (the 128kb or so shared with the CPU), and are
				174	* non-allocating in the L3.
				175	*/
				176	V3D_WRITE(V3D_L2CACTL,
				177	V3D_L2CACTL_L2CCLR);
				178
				179	V3D_WRITE(V3D_SLCACTL,
				180	VC4_SET_FIELD(0xf, V3D_SLCACTL_T1CC) \|
				181	VC4_SET_FIELD(0xf, V3D_SLCACTL_T0CC) \|
				182	VC4_SET_FIELD(0xf, V3D_SLCACTL_UCC) \|
				183	VC4_SET_FIELD(0xf, V3D_SLCACTL_ICC));
				184	}
				185
				186	/* Sets the registers for the next job to be actually be executed in
				187	* the hardware.
				188	*
				189	* The job_lock should be held during this.
				190	*/
				191	void
				192	vc4_submit_next_job(struct drm_device *dev)
				193	{
				194	struct vc4_dev *vc4 = to_vc4_dev(dev);
				195	struct vc4_exec_info *exec = vc4_first_job(vc4);
				196
				197	if (!exec)
				198	return;
				199
				200	vc4_flush_caches(dev);
				201
				202	/* Disable the binner's pre-loaded overflow memory address */
				203	V3D_WRITE(V3D_BPOA, 0);
				204	V3D_WRITE(V3D_BPOS, 0);
				205
				206	if (exec->ct0ca != exec->ct0ea)
				207	submit_cl(dev, 0, exec->ct0ca, exec->ct0ea);
				208	submit_cl(dev, 1, exec->ct1ca, exec->ct1ea);
				209	}
				210
				211	static void
				212	vc4_update_bo_seqnos(struct vc4_exec_info *exec, uint64_t seqno)
				213	{
				214	struct vc4_bo *bo;
				215	unsigned i;
				216
				217	for (i = 0; i < exec->bo_count; i++) {
				218	bo = to_vc4_bo(&exec->bo[i]->base);
				219	bo->seqno = seqno;
				220	}
				221
				222	list_for_each_entry(bo, &exec->unref_list, unref_head) {
				223	bo->seqno = seqno;
				224	}
				225	}
				226
				227	/* Queues a struct vc4_exec_info for execution. If no job is
				228	* currently executing, then submits it.
				229	*
				230	* Unlike most GPUs, our hardware only handles one command list at a
				231	* time. To queue multiple jobs at once, we'd need to edit the
				232	* previous command list to have a jump to the new one at the end, and
				233	* then bump the end address. That's a change for a later date,
				234	* though.
				235	*/
				236	static void
				237	vc4_queue_submit(struct drm_device dev, struct vc4_exec_info exec)
				238	{
				239	struct vc4_dev *vc4 = to_vc4_dev(dev);
				240	uint64_t seqno;
				241	unsigned long irqflags;
				242
				243	spin_lock_irqsave(&vc4->job_lock, irqflags);
				244
				245	seqno = ++vc4->emit_seqno;
				246	exec->seqno = seqno;
				247	vc4_update_bo_seqnos(exec, seqno);
				248
				249	list_add_tail(&exec->head, &vc4->job_list);
				250
				251	/* If no job was executing, kick ours off. Otherwise, it'll
				252	* get started when the previous job's frame done interrupt
				253	* occurs.
				254	*/
				255	if (vc4_first_job(vc4) == exec) {
				256	vc4_submit_next_job(dev);
				257	vc4_queue_hangcheck(dev);
				258	}
				259
				260	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
				261	}
				262
				263	/**
				264	* Looks up a bunch of GEM handles for BOs and stores the array for
				265	* use in the command validator that actually writes relocated
				266	* addresses pointing to them.
				267	*/
				268	static int
				269	vc4_cl_lookup_bos(struct drm_device *dev,
				270	struct drm_file *file_priv,
				271	struct vc4_exec_info *exec)
				272	{
				273	struct drm_vc4_submit_cl *args = exec->args;
				274	uint32_t *handles;
				275	int ret = 0;
				276	int i;
				277
				278	exec->bo_count = args->bo_handle_count;
				279
				280	if (!exec->bo_count) {
				281	/* See comment on bo_index for why we have to check
				282	* this.
				283	*/
				284	DRM_ERROR("Rendering requires BOs to validate\n");
				285	return -EINVAL;
				286	}
				287
				288	exec->bo = kcalloc(exec->bo_count, sizeof(struct drm_gem_cma_object *),
				289	GFP_KERNEL);
				290	if (!exec->bo) {
				291	DRM_ERROR("Failed to allocate validated BO pointers\n");
				292	return -ENOMEM;
				293	}
				294
				295	handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
				296	if (!handles) {
				297	DRM_ERROR("Failed to allocate incoming GEM handles\n");
				298	goto fail;
				299	}
				300
				301	ret = copy_from_user(handles,
				302	(void __user *)(uintptr_t)args->bo_handles,
				303	exec->bo_count * sizeof(uint32_t));
				304	if (ret) {
				305	DRM_ERROR("Failed to copy in GEM handles\n");
				306	goto fail;
				307	}
				308
				309	spin_lock(&file_priv->table_lock);
				310	for (i = 0; i < exec->bo_count; i++) {
				311	struct drm_gem_object *bo = idr_find(&file_priv->object_idr,
				312	handles[i]);
				313	if (!bo) {
				314	DRM_ERROR("Failed to look up GEM BO %d: %d\n",
				315	i, handles[i]);
				316	ret = -EINVAL;
				317	spin_unlock(&file_priv->table_lock);
				318	goto fail;
				319	}
				320	drm_gem_object_reference(bo);
				321	exec->bo[i] = (struct drm_gem_cma_object *)bo;
				322	}
				323	spin_unlock(&file_priv->table_lock);
				324
				325	fail:
				326	kfree(handles);
				327	return 0;
				328	}
				329
				330	static int
				331	vc4_get_bcl(struct drm_device dev, struct vc4_exec_info exec)
				332	{
				333	struct drm_vc4_submit_cl *args = exec->args;
				334	void *temp = NULL;
				335	void *bin;
				336	int ret = 0;
				337	uint32_t bin_offset = 0;
				338	uint32_t shader_rec_offset = roundup(bin_offset + args->bin_cl_size,
				339	16);
				340	uint32_t uniforms_offset = shader_rec_offset + args->shader_rec_size;
				341	uint32_t exec_size = uniforms_offset + args->uniforms_size;
				342	uint32_t temp_size = exec_size + (sizeof(struct vc4_shader_state) *
				343	args->shader_rec_count);
				344	struct vc4_bo *bo;
				345
				346	if (uniforms_offset < shader_rec_offset \|\|
				347	exec_size < uniforms_offset \|\|
				348	args->shader_rec_count >= (UINT_MAX /
				349	sizeof(struct vc4_shader_state)) \|\|
				350	temp_size < exec_size) {
				351	DRM_ERROR("overflow in exec arguments\n");
				352	goto fail;
				353	}
				354
				355	/* Allocate space where we'll store the copied in user command lists
				356	* and shader records.
				357	*
				358	* We don't just copy directly into the BOs because we need to
				359	* read the contents back for validation, and I think the
				360	* bo->vaddr is uncached access.
				361	*/
				362	temp = kmalloc(temp_size, GFP_KERNEL);
				363	if (!temp) {
				364	DRM_ERROR("Failed to allocate storage for copying "
				365	"in bin/render CLs.\n");
				366	ret = -ENOMEM;
				367	goto fail;
				368	}
				369	bin = temp + bin_offset;
				370	exec->shader_rec_u = temp + shader_rec_offset;
				371	exec->uniforms_u = temp + uniforms_offset;
				372	exec->shader_state = temp + exec_size;
				373	exec->shader_state_size = args->shader_rec_count;
				374
				375	ret = copy_from_user(bin,
				376	(void __user *)(uintptr_t)args->bin_cl,
				377	args->bin_cl_size);
				378	if (ret) {
				379	DRM_ERROR("Failed to copy in bin cl\n");
				380	goto fail;
				381	}
				382
				383	ret = copy_from_user(exec->shader_rec_u,
				384	(void __user *)(uintptr_t)args->shader_rec,
				385	args->shader_rec_size);
				386	if (ret) {
				387	DRM_ERROR("Failed to copy in shader recs\n");
				388	goto fail;
				389	}
				390
				391	ret = copy_from_user(exec->uniforms_u,
				392	(void __user *)(uintptr_t)args->uniforms,
				393	args->uniforms_size);
				394	if (ret) {
				395	DRM_ERROR("Failed to copy in uniforms cl\n");
				396	goto fail;
				397	}
				398
				399	bo = vc4_bo_create(dev, exec_size, true);
				400	if (!bo) {
				401	DRM_ERROR("Couldn't allocate BO for binning\n");
				402	ret = PTR_ERR(exec->exec_bo);
				403	goto fail;
				404	}
				405	exec->exec_bo = &bo->base;
				406
				407	list_add_tail(&to_vc4_bo(&exec->exec_bo->base)->unref_head,
				408	&exec->unref_list);
				409
				410	exec->ct0ca = exec->exec_bo->paddr + bin_offset;
				411
				412	exec->bin_u = bin;
				413
				414	exec->shader_rec_v = exec->exec_bo->vaddr + shader_rec_offset;
				415	exec->shader_rec_p = exec->exec_bo->paddr + shader_rec_offset;
				416	exec->shader_rec_size = args->shader_rec_size;
				417
				418	exec->uniforms_v = exec->exec_bo->vaddr + uniforms_offset;
				419	exec->uniforms_p = exec->exec_bo->paddr + uniforms_offset;
				420	exec->uniforms_size = args->uniforms_size;
				421
				422	ret = vc4_validate_bin_cl(dev,
				423	exec->exec_bo->vaddr + bin_offset,
				424	bin,
				425	exec);
				426	if (ret)
				427	goto fail;
				428
				429	ret = vc4_validate_shader_recs(dev, exec);
				430
				431	fail:
				432	kfree(temp);
				433	return ret;
				434	}
				435
				436	static void
				437	vc4_complete_exec(struct drm_device dev, struct vc4_exec_info exec)
				438	{
				439	unsigned i;
				440
				441	/* Need the struct lock for drm_gem_object_unreference(). */
				442	mutex_lock(&dev->struct_mutex);
				443	if (exec->bo) {
				444	for (i = 0; i < exec->bo_count; i++)
				445	drm_gem_object_unreference(&exec->bo[i]->base);
				446	kfree(exec->bo);
				447	}
				448
				449	while (!list_empty(&exec->unref_list)) {
				450	struct vc4_bo *bo = list_first_entry(&exec->unref_list,
				451	struct vc4_bo, unref_head);
				452	list_del(&bo->unref_head);
				453	drm_gem_object_unreference(&bo->base.base);
				454	}
				455	mutex_unlock(&dev->struct_mutex);
				456
				457	kfree(exec);
				458	}
				459
				460	void
				461	vc4_job_handle_completed(struct vc4_dev *vc4)
				462	{
				463	unsigned long irqflags;
				464
				465	spin_lock_irqsave(&vc4->job_lock, irqflags);
				466	while (!list_empty(&vc4->job_done_list)) {
				467	struct vc4_exec_info *exec =
				468	list_first_entry(&vc4->job_done_list,
				469	struct vc4_exec_info, head);
				470	list_del(&exec->head);
				471
				472	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
				473	vc4_complete_exec(vc4->dev, exec);
				474	spin_lock_irqsave(&vc4->job_lock, irqflags);
				475	}
				476	spin_unlock_irqrestore(&vc4->job_lock, irqflags);
				477	}
				478
				479	/* Scheduled when any job has been completed, this walks the list of
				480	* jobs that had completed and unrefs their BOs and frees their exec
				481	* structs.
				482	*/
				483	static void
				484	vc4_job_done_work(struct work_struct *work)
				485	{
				486	struct vc4_dev *vc4 =
				487	container_of(work, struct vc4_dev, job_done_work);
				488
				489	vc4_job_handle_completed(vc4);
				490	}
				491
				492	static int
				493	vc4_wait_for_seqno_ioctl_helper(struct drm_device *dev,
				494	uint64_t seqno,
				495	uint64_t *timeout_ns)
				496	{
				497	unsigned long start = jiffies;
				498	int ret = vc4_wait_for_seqno(dev, seqno, *timeout_ns, true);
				499
				500	if ((ret == -EINTR \|\| ret == -ERESTARTSYS) && *timeout_ns != ~0ull) {
				501	uint64_t delta = jiffies_to_nsecs(jiffies - start);
				502
				503	if (*timeout_ns >= delta)
				504	*timeout_ns -= delta;
				505	}
				506
				507	return ret;
				508	}
				509
				510	int
				511	vc4_wait_seqno_ioctl(struct drm_device dev, void data,
				512	struct drm_file *file_priv)
				513	{
				514	struct drm_vc4_wait_seqno *args = data;
				515
				516	return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
				517	&args->timeout_ns);
				518	}
				519
				520	int
				521	vc4_wait_bo_ioctl(struct drm_device dev, void data,
				522	struct drm_file *file_priv)
				523	{
				524	int ret;
				525	struct drm_vc4_wait_bo *args = data;
				526	struct drm_gem_object *gem_obj;
				527	struct vc4_bo *bo;
				528
				529	gem_obj = drm_gem_object_lookup(dev, file_priv, args->handle);
				530	if (!gem_obj) {
				531	DRM_ERROR("Failed to look up GEM BO %d\n", args->handle);
				532	return -EINVAL;
				533	}
				534	bo = to_vc4_bo(gem_obj);
				535
				536	ret = vc4_wait_for_seqno_ioctl_helper(dev, bo->seqno,
				537	&args->timeout_ns);
				538
				539	drm_gem_object_unreference_unlocked(gem_obj);
				540	return ret;
				541	}
				542
				543	/**
				544	* Submits a command list to the VC4.
				545	*
				546	* This is what is called batchbuffer emitting on other hardware.
				547	*/
				548	int
				549	vc4_submit_cl_ioctl(struct drm_device dev, void data,
				550	struct drm_file *file_priv)
				551	{
				552	struct vc4_dev *vc4 = to_vc4_dev(dev);
				553	struct drm_vc4_submit_cl *args = data;
				554	struct vc4_exec_info *exec;
				555	int ret;
				556
				557	if ((args->flags & ~VC4_SUBMIT_CL_USE_CLEAR_COLOR) != 0) {
				558	DRM_ERROR("Unknown flags: 0x%02x\n", args->flags);
				559	return -EINVAL;
				560	}
				561
				562	exec = kcalloc(1, sizeof(*exec), GFP_KERNEL);
				563	if (!exec) {
				564	DRM_ERROR("malloc failure on exec struct\n");
				565	return -ENOMEM;
				566	}
				567
				568	exec->args = args;
				569	INIT_LIST_HEAD(&exec->unref_list);
				570
				571	ret = vc4_cl_lookup_bos(dev, file_priv, exec);
				572	if (ret)
				573	goto fail;
				574
				575	if (exec->args->bin_cl_size != 0) {
				576	ret = vc4_get_bcl(dev, exec);
				577	if (ret)
				578	goto fail;
				579	} else {
				580	exec->ct0ca = 0;
				581	exec->ct0ea = 0;
				582	}
				583
				584	ret = vc4_get_rcl(dev, exec);
				585	if (ret)
				586	goto fail;
				587
				588	/* Clear this out of the struct we'll be putting in the queue,
				589	* since it's part of our stack.
				590	*/
				591	exec->args = NULL;
				592
				593	vc4_queue_submit(dev, exec);
				594
				595	/* Return the seqno for our job. */
				596	args->seqno = vc4->emit_seqno;
				597
				598	return 0;
				599
				600	fail:
				601	vc4_complete_exec(vc4->dev, exec);
				602
				603	return ret;
				604	}
				605
				606	void
				607	vc4_gem_init(struct drm_device *dev)
				608	{
				609	struct vc4_dev *vc4 = to_vc4_dev(dev);
				610
				611	INIT_LIST_HEAD(&vc4->job_list);
				612	INIT_LIST_HEAD(&vc4->job_done_list);
				613	spin_lock_init(&vc4->job_lock);
				614
				615	INIT_WORK(&vc4->hangcheck.reset_work, vc4_reset_work);
				616	setup_timer(&vc4->hangcheck.timer,
				617	vc4_hangcheck_elapsed,
				618	(unsigned long)dev);
				619
				620	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
				621	}
				622
				623	void
				624	vc4_gem_destroy(struct drm_device *dev)
				625	{
				626	struct vc4_dev *vc4 = to_vc4_dev(dev);
				627
				628	/* Waiting for exec to finish would need to be done before
				629	* unregistering V3D.
				630	*/
				631	WARN_ON(vc4->emit_seqno != vc4->finished_seqno);
				632
				633	/* V3D should already have disabled its interrupt and cleared
				634	* the overflow allocation registers. Now free the object.
				635	*/
				636	if (vc4->overflow_mem) {
				637	drm_gem_object_unreference_unlocked(&vc4->overflow_mem->base.base);
				638	vc4->overflow_mem = NULL;
				639	}
				640
				641	vc4_bo_cache_destroy(dev);
				642	}