Blame - drivers/ntb/test/ntb_perf.c - kernel/msm-4.9

blob: db4dc61164ca87a73b66f54dacbb1bd04d95683f [file] [log] [blame]

Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	1	/*
				2	* This file is provided under a dual BSD/GPLv2 license. When using or
				3	* redistributing this file, you may do so under either license.
				4	*
				5	* GPL LICENSE SUMMARY
				6	*
				7	* Copyright(c) 2015 Intel Corporation. All rights reserved.
				8	*
				9	* This program is free software; you can redistribute it and/or modify
				10	* it under the terms of version 2 of the GNU General Public License as
				11	* published by the Free Software Foundation.
				12	*
				13	* BSD LICENSE
				14	*
				15	* Copyright(c) 2015 Intel Corporation. All rights reserved.
				16	*
				17	* Redistribution and use in source and binary forms, with or without
				18	* modification, are permitted provided that the following conditions
				19	* are met:
				20	*
				21	* * Redistributions of source code must retain the above copyright
				22	* notice, this list of conditions and the following disclaimer.
				23	* * Redistributions in binary form must reproduce the above copy
				24	* notice, this list of conditions and the following disclaimer in
				25	* the documentation and/or other materials provided with the
				26	* distribution.
				27	* * Neither the name of Intel Corporation nor the names of its
				28	* contributors may be used to endorse or promote products derived
				29	* from this software without specific prior written permission.
				30	*
				31	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				32	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				33	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				34	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				35	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				36	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				37	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				38	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				39	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				40	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				41	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				42	*
				43	* PCIe NTB Perf Linux driver
				44	*/
				45
				46	#include <linux/init.h>
				47	#include <linux/kernel.h>
				48	#include <linux/module.h>
				49	#include <linux/kthread.h>
				50	#include <linux/time.h>
				51	#include <linux/timer.h>
				52	#include <linux/dma-mapping.h>
				53	#include <linux/pci.h>
				54	#include <linux/slab.h>
				55	#include <linux/spinlock.h>
				56	#include <linux/debugfs.h>
				57	#include <linux/dmaengine.h>
				58	#include <linux/delay.h>
				59	#include <linux/sizes.h>
				60	#include <linux/ntb.h>
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	61	#include <linux/mutex.h>
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	62
				63	#define DRIVER_NAME "ntb_perf"
				64	#define DRIVER_DESCRIPTION "PCIe NTB Performance Measurement Tool"
				65
				66	#define DRIVER_LICENSE "Dual BSD/GPL"
				67	#define DRIVER_VERSION "1.0"
				68	#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
				69
				70	#define PERF_LINK_DOWN_TIMEOUT 10
				71	#define PERF_VERSION 0xffff0001
				72	#define MAX_THREADS 32
				73	#define MAX_TEST_SIZE SZ_1M
				74	#define MAX_SRCS 32
				75	#define DMA_OUT_RESOURCE_TO 50
				76	#define DMA_RETRIES 20
				77	#define SZ_4G (1ULL << 32)
				78	#define MAX_SEG_ORDER 20 /* no larger than 1M for kmalloc buffer */
				79
				80	MODULE_LICENSE(DRIVER_LICENSE);
				81	MODULE_VERSION(DRIVER_VERSION);
				82	MODULE_AUTHOR(DRIVER_AUTHOR);
				83	MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
				84
				85	static struct dentry *perf_debugfs_dir;
				86
Logan Gunthorpe	4aae977	2016-06-03 14:50:31 -0600	[diff] [blame]	87	static unsigned long max_mw_size;
				88	module_param(max_mw_size, ulong, 0644);
				89	MODULE_PARM_DESC(max_mw_size, "Limit size of large memory windows");
				90
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	91	static unsigned int seg_order = 19; /* 512K */
				92	module_param(seg_order, uint, 0644);
				93	MODULE_PARM_DESC(seg_order, "size order [n^2] of buffer segment for testing");
				94
				95	static unsigned int run_order = 32; /* 4G */
				96	module_param(run_order, uint, 0644);
				97	MODULE_PARM_DESC(run_order, "size order [n^2] of total data to transfer");
				98
				99	static bool use_dma; /* default to 0 */
				100	module_param(use_dma, bool, 0644);
				101	MODULE_PARM_DESC(use_dma, "Using DMA engine to measure performance");
				102
				103	struct perf_mw {
				104	phys_addr_t phys_addr;
				105	resource_size_t phys_size;
				106	resource_size_t xlat_align;
				107	resource_size_t xlat_align_size;
				108	void __iomem *vbase;
				109	size_t xlat_size;
				110	size_t buf_size;
				111	void *virt_addr;
				112	dma_addr_t dma_addr;
				113	};
				114
				115	struct perf_ctx;
				116
				117	struct pthr_ctx {
				118	struct task_struct *thread;
				119	struct perf_ctx *perf;
				120	atomic_t dma_sync;
				121	struct dma_chan *dma_chan;
				122	int dma_prep_err;
				123	int src_idx;
				124	void *srcs[MAX_SRCS];
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	125	wait_queue_head_t *wq;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	126	};
				127
				128	struct perf_ctx {
				129	struct ntb_dev *ntb;
				130	spinlock_t db_lock;
				131	struct perf_mw mw;
				132	bool link_is_up;
				133	struct work_struct link_cleanup;
				134	struct delayed_work link_work;
				135	struct dentry *debugfs_node_dir;
				136	struct dentry *debugfs_run;
				137	struct dentry *debugfs_threads;
				138	u8 perf_threads;
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	139	/* mutex ensures only one set of threads run at once */
				140	struct mutex run_mutex;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	141	struct pthr_ctx pthr_ctx[MAX_THREADS];
				142	atomic_t tsync;
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	143	atomic_t tdone;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	144	};
				145
				146	enum {
				147	VERSION = 0,
				148	MW_SZ_HIGH,
				149	MW_SZ_LOW,
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	150	MAX_SPAD
				151	};
				152
				153	static void perf_link_event(void *ctx)
				154	{
				155	struct perf_ctx *perf = ctx;
				156
				157	if (ntb_link_is_up(perf->ntb, NULL, NULL) == 1)
				158	schedule_delayed_work(&perf->link_work, 2*HZ);
				159	else
				160	schedule_work(&perf->link_cleanup);
				161	}
				162
				163	static void perf_db_event(void *ctx, int vec)
				164	{
				165	struct perf_ctx *perf = ctx;
				166	u64 db_bits, db_mask;
				167
				168	db_mask = ntb_db_vector_mask(perf->ntb, vec);
				169	db_bits = ntb_db_read(perf->ntb);
				170
				171	dev_dbg(&perf->ntb->dev, "doorbell vec %d mask %#llx bits %#llx\n",
				172	vec, db_mask, db_bits);
				173	}
				174
				175	static const struct ntb_ctx_ops perf_ops = {
				176	.link_event = perf_link_event,
				177	.db_event = perf_db_event,
				178	};
				179
				180	static void perf_copy_callback(void *data)
				181	{
				182	struct pthr_ctx *pctx = data;
				183
				184	atomic_dec(&pctx->dma_sync);
				185	}
				186
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	187	static ssize_t perf_copy(struct pthr_ctx pctx, char __iomem dst,
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	188	char *src, size_t size)
				189	{
				190	struct perf_ctx *perf = pctx->perf;
				191	struct dma_async_tx_descriptor *txd;
				192	struct dma_chan *chan = pctx->dma_chan;
				193	struct dma_device *device;
				194	struct dmaengine_unmap_data *unmap;
				195	dma_cookie_t cookie;
				196	size_t src_off, dst_off;
				197	struct perf_mw *mw = &perf->mw;
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	198	void __iomem *vbase;
				199	void __iomem *dst_vaddr;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	200	dma_addr_t dst_phys;
				201	int retries = 0;
				202
				203	if (!use_dma) {
				204	memcpy_toio(dst, src, size);
				205	return size;
				206	}
				207
				208	if (!chan) {
				209	dev_err(&perf->ntb->dev, "DMA engine does not exist\n");
				210	return -EINVAL;
				211	}
				212
				213	device = chan->device;
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	214	src_off = (uintptr_t)src & ~PAGE_MASK;
				215	dst_off = (uintptr_t __force)dst & ~PAGE_MASK;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	216
				217	if (!is_dma_copy_aligned(device, src_off, dst_off, size))
				218	return -ENODEV;
				219
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	220	vbase = mw->vbase;
				221	dst_vaddr = dst;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	222	dst_phys = mw->phys_addr + (dst_vaddr - vbase);
				223
				224	unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT);
				225	if (!unmap)
				226	return -ENOMEM;
				227
				228	unmap->len = size;
				229	unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
				230	src_off, size, DMA_TO_DEVICE);
				231	if (dma_mapping_error(device->dev, unmap->addr[0]))
				232	goto err_get_unmap;
				233
				234	unmap->to_cnt = 1;
				235
				236	do {
				237	txd = device->device_prep_dma_memcpy(chan, dst_phys,
				238	unmap->addr[0],
				239	size, DMA_PREP_INTERRUPT);
				240	if (!txd) {
				241	set_current_state(TASK_INTERRUPTIBLE);
				242	schedule_timeout(DMA_OUT_RESOURCE_TO);
				243	}
				244	} while (!txd && (++retries < DMA_RETRIES));
				245
				246	if (!txd) {
				247	pctx->dma_prep_err++;
				248	goto err_get_unmap;
				249	}
				250
				251	txd->callback = perf_copy_callback;
				252	txd->callback_param = pctx;
				253	dma_set_unmap(txd, unmap);
				254
				255	cookie = dmaengine_submit(txd);
				256	if (dma_submit_error(cookie))
				257	goto err_set_unmap;
				258
				259	atomic_inc(&pctx->dma_sync);
				260	dma_async_issue_pending(chan);
				261
				262	return size;
				263
				264	err_set_unmap:
				265	dmaengine_unmap_put(unmap);
				266	err_get_unmap:
				267	dmaengine_unmap_put(unmap);
				268	return 0;
				269	}
				270
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	271	static int perf_move_data(struct pthr_ctx pctx, char __iomem dst, char *src,
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	272	u64 buf_size, u64 win_size, u64 total)
				273	{
				274	int chunks, total_chunks, i;
				275	int copied_chunks = 0;
				276	u64 copied = 0, result;
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	277	char __iomem *tmp = dst;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	278	u64 perf, diff_us;
				279	ktime_t kstart, kstop, kdiff;
Logan Gunthorpe	fd2ecd8	2016-06-20 13:15:04 -0600	[diff] [blame]	280	unsigned long last_sleep = jiffies;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	281
				282	chunks = div64_u64(win_size, buf_size);
				283	total_chunks = div64_u64(total, buf_size);
				284	kstart = ktime_get();
				285
				286	for (i = 0; i < total_chunks; i++) {
				287	result = perf_copy(pctx, tmp, src, buf_size);
				288	copied += result;
				289	copied_chunks++;
				290	if (copied_chunks == chunks) {
				291	tmp = dst;
				292	copied_chunks = 0;
				293	} else
				294	tmp += buf_size;
				295
Logan Gunthorpe	fd2ecd8	2016-06-20 13:15:04 -0600	[diff] [blame]	296	/* Probably should schedule every 5s to prevent soft hang. */
				297	if (unlikely((jiffies - last_sleep) > 5 * HZ)) {
				298	last_sleep = jiffies;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	299	set_current_state(TASK_INTERRUPTIBLE);
				300	schedule_timeout(1);
				301	}
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	302
				303	if (unlikely(kthread_should_stop()))
				304	break;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	305	}
				306
				307	if (use_dma) {
				308	pr_info("%s: All DMA descriptors submitted\n", current->comm);
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	309	while (atomic_read(&pctx->dma_sync) != 0) {
				310	if (kthread_should_stop())
				311	break;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	312	msleep(20);
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	313	}
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	314	}
				315
				316	kstop = ktime_get();
				317	kdiff = ktime_sub(kstop, kstart);
				318	diff_us = ktime_to_us(kdiff);
				319
				320	pr_info("%s: copied %llu bytes\n", current->comm, copied);
				321
				322	pr_info("%s: lasted %llu usecs\n", current->comm, diff_us);
				323
				324	perf = div64_u64(copied, diff_us);
				325
				326	pr_info("%s: MBytes/s: %llu\n", current->comm, perf);
				327
				328	return 0;
				329	}
				330
				331	static bool perf_dma_filter_fn(struct dma_chan chan, void node)
				332	{
				333	return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
				334	}
				335
				336	static int ntb_perf_thread(void *data)
				337	{
				338	struct pthr_ctx *pctx = data;
				339	struct perf_ctx *perf = pctx->perf;
				340	struct pci_dev *pdev = perf->ntb->pdev;
				341	struct perf_mw *mw = &perf->mw;
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	342	char __iomem *dst;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	343	u64 win_size, buf_size, total;
				344	void *src;
				345	int rc, node, i;
				346	struct dma_chan *dma_chan = NULL;
				347
				348	pr_info("kthread %s starting...\n", current->comm);
				349
				350	node = dev_to_node(&pdev->dev);
				351
				352	if (use_dma && !pctx->dma_chan) {
				353	dma_cap_mask_t dma_mask;
				354
				355	dma_cap_zero(dma_mask);
				356	dma_cap_set(DMA_MEMCPY, dma_mask);
				357	dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
				358	(void *)(unsigned long)node);
				359	if (!dma_chan) {
				360	pr_warn("%s: cannot acquire DMA channel, quitting\n",
				361	current->comm);
				362	return -ENODEV;
				363	}
				364	pctx->dma_chan = dma_chan;
				365	}
				366
				367	for (i = 0; i < MAX_SRCS; i++) {
				368	pctx->srcs[i] = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
				369	if (!pctx->srcs[i]) {
				370	rc = -ENOMEM;
				371	goto err;
				372	}
				373	}
				374
				375	win_size = mw->phys_size;
				376	buf_size = 1ULL << seg_order;
				377	total = 1ULL << run_order;
				378
				379	if (buf_size > MAX_TEST_SIZE)
				380	buf_size = MAX_TEST_SIZE;
				381
Arnd Bergmann	1985a88	2016-01-26 10:31:45 +0100	[diff] [blame]	382	dst = (char __iomem *)mw->vbase;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	383
				384	atomic_inc(&perf->tsync);
				385	while (atomic_read(&perf->tsync) != perf->perf_threads)
				386	schedule();
				387
				388	src = pctx->srcs[pctx->src_idx];
				389	pctx->src_idx = (pctx->src_idx + 1) & (MAX_SRCS - 1);
				390
				391	rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
				392
				393	atomic_dec(&perf->tsync);
				394
				395	if (rc < 0) {
				396	pr_err("%s: failed\n", current->comm);
				397	rc = -ENXIO;
				398	goto err;
				399	}
				400
				401	for (i = 0; i < MAX_SRCS; i++) {
				402	kfree(pctx->srcs[i]);
				403	pctx->srcs[i] = NULL;
				404	}
				405
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	406	atomic_inc(&perf->tdone);
				407	wake_up(pctx->wq);
				408	rc = 0;
				409	goto done;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	410
				411	err:
				412	for (i = 0; i < MAX_SRCS; i++) {
				413	kfree(pctx->srcs[i]);
				414	pctx->srcs[i] = NULL;
				415	}
				416
				417	if (dma_chan) {
				418	dma_release_channel(dma_chan);
				419	pctx->dma_chan = NULL;
				420	}
				421
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	422	done:
				423	/* Wait until we are told to stop */
				424	for (;;) {
				425	set_current_state(TASK_INTERRUPTIBLE);
				426	if (kthread_should_stop())
				427	break;
				428	schedule();
				429	}
				430	__set_current_state(TASK_RUNNING);
				431
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	432	return rc;
				433	}
				434
				435	static void perf_free_mw(struct perf_ctx *perf)
				436	{
				437	struct perf_mw *mw = &perf->mw;
				438	struct pci_dev *pdev = perf->ntb->pdev;
				439
				440	if (!mw->virt_addr)
				441	return;
				442
				443	ntb_mw_clear_trans(perf->ntb, 0);
				444	dma_free_coherent(&pdev->dev, mw->buf_size,
				445	mw->virt_addr, mw->dma_addr);
				446	mw->xlat_size = 0;
				447	mw->buf_size = 0;
				448	mw->virt_addr = NULL;
				449	}
				450
				451	static int perf_set_mw(struct perf_ctx *perf, resource_size_t size)
				452	{
				453	struct perf_mw *mw = &perf->mw;
				454	size_t xlat_size, buf_size;
Dave Jiang	ee5f750	2016-03-07 15:57:25 -0700	[diff] [blame]	455	int rc;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	456
				457	if (!size)
				458	return -EINVAL;
				459
				460	xlat_size = round_up(size, mw->xlat_align_size);
				461	buf_size = round_up(size, mw->xlat_align);
				462
				463	if (mw->xlat_size == xlat_size)
				464	return 0;
				465
				466	if (mw->buf_size)
				467	perf_free_mw(perf);
				468
				469	mw->xlat_size = xlat_size;
				470	mw->buf_size = buf_size;
				471
				472	mw->virt_addr = dma_alloc_coherent(&perf->ntb->pdev->dev, buf_size,
				473	&mw->dma_addr, GFP_KERNEL);
				474	if (!mw->virt_addr) {
				475	mw->xlat_size = 0;
				476	mw->buf_size = 0;
				477	}
				478
Dave Jiang	ee5f750	2016-03-07 15:57:25 -0700	[diff] [blame]	479	rc = ntb_mw_set_trans(perf->ntb, 0, mw->dma_addr, mw->xlat_size);
				480	if (rc) {
				481	dev_err(&perf->ntb->dev, "Unable to set mw0 translation\n");
				482	perf_free_mw(perf);
				483	return -EIO;
				484	}
				485
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	486	return 0;
				487	}
				488
				489	static void perf_link_work(struct work_struct *work)
				490	{
				491	struct perf_ctx *perf =
				492	container_of(work, struct perf_ctx, link_work.work);
				493	struct ntb_dev *ndev = perf->ntb;
				494	struct pci_dev *pdev = ndev->pdev;
				495	u32 val;
				496	u64 size;
				497	int rc;
				498
				499	dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
				500
				501	size = perf->mw.phys_size;
Logan Gunthorpe	4aae977	2016-06-03 14:50:31 -0600	[diff] [blame]	502
				503	if (max_mw_size && size > max_mw_size)
				504	size = max_mw_size;
				505
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	506	ntb_peer_spad_write(ndev, MW_SZ_HIGH, upper_32_bits(size));
				507	ntb_peer_spad_write(ndev, MW_SZ_LOW, lower_32_bits(size));
				508	ntb_peer_spad_write(ndev, VERSION, PERF_VERSION);
				509
				510	/* now read what peer wrote */
				511	val = ntb_spad_read(ndev, VERSION);
				512	if (val != PERF_VERSION) {
				513	dev_dbg(&pdev->dev, "Remote version = %#x\n", val);
				514	goto out;
				515	}
				516
				517	val = ntb_spad_read(ndev, MW_SZ_HIGH);
				518	size = (u64)val << 32;
				519
				520	val = ntb_spad_read(ndev, MW_SZ_LOW);
				521	size \|= val;
				522
				523	dev_dbg(&pdev->dev, "Remote MW size = %#llx\n", size);
				524
				525	rc = perf_set_mw(perf, size);
				526	if (rc)
				527	goto out1;
				528
				529	perf->link_is_up = true;
				530
				531	return;
				532
				533	out1:
				534	perf_free_mw(perf);
				535
				536	out:
				537	if (ntb_link_is_up(ndev, NULL, NULL) == 1)
				538	schedule_delayed_work(&perf->link_work,
				539	msecs_to_jiffies(PERF_LINK_DOWN_TIMEOUT));
				540	}
				541
				542	static void perf_link_cleanup(struct work_struct *work)
				543	{
				544	struct perf_ctx *perf = container_of(work,
				545	struct perf_ctx,
				546	link_cleanup);
				547
				548	dev_dbg(&perf->ntb->pdev->dev, "%s called\n", __func__);
				549
				550	if (!perf->link_is_up)
				551	cancel_delayed_work_sync(&perf->link_work);
				552	}
				553
				554	static int perf_setup_mw(struct ntb_dev ntb, struct perf_ctx perf)
				555	{
				556	struct perf_mw *mw;
				557	int rc;
				558
				559	mw = &perf->mw;
				560
				561	rc = ntb_mw_get_range(ntb, 0, &mw->phys_addr, &mw->phys_size,
				562	&mw->xlat_align, &mw->xlat_align_size);
				563	if (rc)
				564	return rc;
				565
				566	perf->mw.vbase = ioremap_wc(mw->phys_addr, mw->phys_size);
				567	if (!mw->vbase)
				568	return -ENOMEM;
				569
				570	return 0;
				571	}
				572
				573	static ssize_t debugfs_run_read(struct file filp, char __user ubuf,
				574	size_t count, loff_t *offp)
				575	{
				576	struct perf_ctx *perf = filp->private_data;
				577	char *buf;
				578	ssize_t ret, out_offset;
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	579	int running;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	580
				581	if (!perf)
				582	return 0;
				583
				584	buf = kmalloc(64, GFP_KERNEL);
Sudip Mukherjee	2572c7f	2016-03-10 17:51:11 +0530	[diff] [blame]	585	if (!buf)
				586	return -ENOMEM;
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	587
				588	running = mutex_is_locked(&perf->run_mutex);
				589	out_offset = snprintf(buf, 64, "%d\n", running);
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	590	ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
				591	kfree(buf);
				592
				593	return ret;
				594	}
				595
Dave Jiang	838850e	2016-03-18 16:39:47 -0700	[diff] [blame]	596	static void threads_cleanup(struct perf_ctx *perf)
				597	{
				598	struct pthr_ctx *pctx;
				599	int i;
				600
Dave Jiang	838850e	2016-03-18 16:39:47 -0700	[diff] [blame]	601	for (i = 0; i < MAX_THREADS; i++) {
				602	pctx = &perf->pthr_ctx[i];
				603	if (pctx->thread) {
				604	kthread_stop(pctx->thread);
				605	pctx->thread = NULL;
				606	}
				607	}
				608	}
				609
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	610	static ssize_t debugfs_run_write(struct file filp, const char __user ubuf,
				611	size_t count, loff_t *offp)
				612	{
				613	struct perf_ctx *perf = filp->private_data;
				614	int node, i;
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	615	DECLARE_WAIT_QUEUE_HEAD(wq);
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	616
				617	if (!perf->link_is_up)
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	618	return -ENOLINK;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	619
				620	if (perf->perf_threads == 0)
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	621	return -EINVAL;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	622
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	623	if (!mutex_trylock(&perf->run_mutex))
				624	return -EBUSY;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	625
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	626	if (perf->perf_threads > MAX_THREADS) {
				627	perf->perf_threads = MAX_THREADS;
				628	pr_info("Reset total threads to: %u\n", MAX_THREADS);
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	629	}
				630
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	631	/* no greater than 1M */
				632	if (seg_order > MAX_SEG_ORDER) {
				633	seg_order = MAX_SEG_ORDER;
				634	pr_info("Fix seg_order to %u\n", seg_order);
				635	}
				636
				637	if (run_order < seg_order) {
				638	run_order = seg_order;
				639	pr_info("Fix run_order to %u\n", run_order);
				640	}
				641
				642	node = dev_to_node(&perf->ntb->pdev->dev);
				643	atomic_set(&perf->tdone, 0);
				644
				645	/* launch kernel thread */
				646	for (i = 0; i < perf->perf_threads; i++) {
				647	struct pthr_ctx *pctx;
				648
				649	pctx = &perf->pthr_ctx[i];
				650	atomic_set(&pctx->dma_sync, 0);
				651	pctx->perf = perf;
				652	pctx->wq = &wq;
				653	pctx->thread =
				654	kthread_create_on_node(ntb_perf_thread,
				655	(void *)pctx,
				656	node, "ntb_perf %d", i);
				657	if (IS_ERR(pctx->thread)) {
				658	pctx->thread = NULL;
				659	goto err;
				660	} else {
				661	wake_up_process(pctx->thread);
				662	}
				663	}
				664
				665	wait_event_interruptible(wq,
				666	atomic_read(&perf->tdone) == perf->perf_threads);
				667
				668	threads_cleanup(perf);
				669	mutex_unlock(&perf->run_mutex);
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	670	return count;
Dave Jiang	838850e	2016-03-18 16:39:47 -0700	[diff] [blame]	671
				672	err:
				673	threads_cleanup(perf);
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	674	mutex_unlock(&perf->run_mutex);
Dave Jiang	838850e	2016-03-18 16:39:47 -0700	[diff] [blame]	675	return -ENXIO;
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	676	}
				677
				678	static const struct file_operations ntb_perf_debugfs_run = {
				679	.owner = THIS_MODULE,
				680	.open = simple_open,
				681	.read = debugfs_run_read,
				682	.write = debugfs_run_write,
				683	};
				684
				685	static int perf_debugfs_setup(struct perf_ctx *perf)
				686	{
				687	struct pci_dev *pdev = perf->ntb->pdev;
				688
				689	if (!debugfs_initialized())
				690	return -ENODEV;
				691
				692	if (!perf_debugfs_dir) {
				693	perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
				694	if (!perf_debugfs_dir)
				695	return -ENODEV;
				696	}
				697
				698	perf->debugfs_node_dir = debugfs_create_dir(pci_name(pdev),
				699	perf_debugfs_dir);
				700	if (!perf->debugfs_node_dir)
				701	return -ENODEV;
				702
				703	perf->debugfs_run = debugfs_create_file("run", S_IRUSR \| S_IWUSR,
				704	perf->debugfs_node_dir, perf,
				705	&ntb_perf_debugfs_run);
				706	if (!perf->debugfs_run)
				707	return -ENODEV;
				708
				709	perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR \| S_IWUSR,
				710	perf->debugfs_node_dir,
				711	&perf->perf_threads);
				712	if (!perf->debugfs_threads)
				713	return -ENODEV;
				714
				715	return 0;
				716	}
				717
				718	static int perf_probe(struct ntb_client client, struct ntb_dev ntb)
				719	{
				720	struct pci_dev *pdev = ntb->pdev;
				721	struct perf_ctx *perf;
				722	int node;
				723	int rc = 0;
				724
Logan Gunthorpe	19645a0	2016-06-07 11:20:22 -0600	[diff] [blame]	725	if (ntb_spad_count(ntb) < MAX_SPAD) {
				726	dev_err(&ntb->dev, "Not enough scratch pad registers for %s",
				727	DRIVER_NAME);
				728	return -EIO;
				729	}
				730
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	731	node = dev_to_node(&pdev->dev);
				732
				733	perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, node);
				734	if (!perf) {
				735	rc = -ENOMEM;
				736	goto err_perf;
				737	}
				738
				739	perf->ntb = ntb;
				740	perf->perf_threads = 1;
				741	atomic_set(&perf->tsync, 0);
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	742	mutex_init(&perf->run_mutex);
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	743	spin_lock_init(&perf->db_lock);
				744	perf_setup_mw(ntb, perf);
				745	INIT_DELAYED_WORK(&perf->link_work, perf_link_work);
				746	INIT_WORK(&perf->link_cleanup, perf_link_cleanup);
				747
				748	rc = ntb_set_ctx(ntb, perf, &perf_ops);
				749	if (rc)
				750	goto err_ctx;
				751
				752	perf->link_is_up = false;
				753	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
				754	ntb_link_event(ntb);
				755
				756	rc = perf_debugfs_setup(perf);
				757	if (rc)
				758	goto err_ctx;
				759
				760	return 0;
				761
				762	err_ctx:
				763	cancel_delayed_work_sync(&perf->link_work);
				764	cancel_work_sync(&perf->link_cleanup);
				765	kfree(perf);
				766	err_perf:
				767	return rc;
				768	}
				769
				770	static void perf_remove(struct ntb_client client, struct ntb_dev ntb)
				771	{
				772	struct perf_ctx *perf = ntb->ctx;
				773	int i;
				774
				775	dev_dbg(&perf->ntb->dev, "%s called\n", __func__);
				776
Logan Gunthorpe	da573ea	2016-06-20 13:15:05 -0600	[diff] [blame^]	777	mutex_lock(&perf->run_mutex);
				778
Dave Jiang	8a7b6a7	2016-01-13 13:29:48 -0700	[diff] [blame]	779	cancel_delayed_work_sync(&perf->link_work);
				780	cancel_work_sync(&perf->link_cleanup);
				781
				782	ntb_clear_ctx(ntb);
				783	ntb_link_disable(ntb);
				784
				785	debugfs_remove_recursive(perf_debugfs_dir);
				786	perf_debugfs_dir = NULL;
				787
				788	if (use_dma) {
				789	for (i = 0; i < MAX_THREADS; i++) {
				790	struct pthr_ctx *pctx = &perf->pthr_ctx[i];
				791
				792	if (pctx->dma_chan)
				793	dma_release_channel(pctx->dma_chan);
				794	}
				795	}
				796
				797	kfree(perf);
				798	}
				799
				800	static struct ntb_client perf_client = {
				801	.ops = {
				802	.probe = perf_probe,
				803	.remove = perf_remove,
				804	},
				805	};
				806	module_ntb_client(perf_client);