Blame - drivers/net/ethernet/intel/ice/ice_txrx.c - kernel/msm-5.4

blob: 1ccf8e69b85a0d808c21468496d5ea089f99a3c1 [file] [log] [blame]

Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/* Copyright (c) 2018, Intel Corporation. */
				3
				4	/* The driver transmit and receive code */
				5
				6	#include <linux/prefetch.h>
				7	#include <linux/mm.h>
				8	#include "ice.h"
				9
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	10	#define ICE_RX_HDR_SIZE 256
				11
Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	12	/**
				13	* ice_unmap_and_free_tx_buf - Release a Tx buffer
				14	* @ring: the ring that owns the buffer
				15	* @tx_buf: the buffer to free
				16	*/
				17	static void
				18	ice_unmap_and_free_tx_buf(struct ice_ring ring, struct ice_tx_buf tx_buf)
				19	{
				20	if (tx_buf->skb) {
				21	dev_kfree_skb_any(tx_buf->skb);
				22	if (dma_unmap_len(tx_buf, len))
				23	dma_unmap_single(ring->dev,
				24	dma_unmap_addr(tx_buf, dma),
				25	dma_unmap_len(tx_buf, len),
				26	DMA_TO_DEVICE);
				27	} else if (dma_unmap_len(tx_buf, len)) {
				28	dma_unmap_page(ring->dev,
				29	dma_unmap_addr(tx_buf, dma),
				30	dma_unmap_len(tx_buf, len),
				31	DMA_TO_DEVICE);
				32	}
				33
				34	tx_buf->next_to_watch = NULL;
				35	tx_buf->skb = NULL;
				36	dma_unmap_len_set(tx_buf, len, 0);
				37	/* tx_buf must be completely set up in the transmit path */
				38	}
				39
				40	static struct netdev_queue txring_txq(const struct ice_ring ring)
				41	{
				42	return netdev_get_tx_queue(ring->netdev, ring->q_index);
				43	}
				44
				45	/**
				46	* ice_clean_tx_ring - Free any empty Tx buffers
				47	* @tx_ring: ring to be cleaned
				48	*/
				49	void ice_clean_tx_ring(struct ice_ring *tx_ring)
				50	{
				51	unsigned long size;
				52	u16 i;
				53
				54	/* ring already cleared, nothing to do */
				55	if (!tx_ring->tx_buf)
				56	return;
				57
				58	/* Free all the Tx ring sk_bufss */
				59	for (i = 0; i < tx_ring->count; i++)
				60	ice_unmap_and_free_tx_buf(tx_ring, &tx_ring->tx_buf[i]);
				61
				62	size = sizeof(struct ice_tx_buf) * tx_ring->count;
				63	memset(tx_ring->tx_buf, 0, size);
				64
				65	/* Zero out the descriptor ring */
				66	memset(tx_ring->desc, 0, tx_ring->size);
				67
				68	tx_ring->next_to_use = 0;
				69	tx_ring->next_to_clean = 0;
				70
				71	if (!tx_ring->netdev)
				72	return;
				73
				74	/* cleanup Tx queue statistics */
				75	netdev_tx_reset_queue(txring_txq(tx_ring));
				76	}
				77
				78	/**
				79	* ice_free_tx_ring - Free Tx resources per queue
				80	* @tx_ring: Tx descriptor ring for a specific queue
				81	*
				82	* Free all transmit software resources
				83	*/
				84	void ice_free_tx_ring(struct ice_ring *tx_ring)
				85	{
				86	ice_clean_tx_ring(tx_ring);
				87	devm_kfree(tx_ring->dev, tx_ring->tx_buf);
				88	tx_ring->tx_buf = NULL;
				89
				90	if (tx_ring->desc) {
				91	dmam_free_coherent(tx_ring->dev, tx_ring->size,
				92	tx_ring->desc, tx_ring->dma);
				93	tx_ring->desc = NULL;
				94	}
				95	}
				96
				97	/**
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	98	* ice_clean_tx_irq - Reclaim resources after transmit completes
				99	* @vsi: the VSI we care about
				100	* @tx_ring: Tx ring to clean
				101	* @napi_budget: Used to determine if we are in netpoll
				102	*
				103	* Returns true if there's any budget left (e.g. the clean is finished)
				104	*/
				105	static bool ice_clean_tx_irq(struct ice_vsi vsi, struct ice_ring tx_ring,
				106	int napi_budget)
				107	{
				108	unsigned int total_bytes = 0, total_pkts = 0;
				109	unsigned int budget = vsi->work_lmt;
				110	s16 i = tx_ring->next_to_clean;
				111	struct ice_tx_desc *tx_desc;
				112	struct ice_tx_buf *tx_buf;
				113
				114	tx_buf = &tx_ring->tx_buf[i];
				115	tx_desc = ICE_TX_DESC(tx_ring, i);
				116	i -= tx_ring->count;
				117
				118	do {
				119	struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
				120
				121	/* if next_to_watch is not set then there is no work pending */
				122	if (!eop_desc)
				123	break;
				124
				125	smp_rmb(); /* prevent any other reads prior to eop_desc */
				126
				127	/* if the descriptor isn't done, no work yet to do */
				128	if (!(eop_desc->cmd_type_offset_bsz &
				129	cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
				130	break;
				131
				132	/* clear next_to_watch to prevent false hangs */
				133	tx_buf->next_to_watch = NULL;
				134
				135	/* update the statistics for this packet */
				136	total_bytes += tx_buf->bytecount;
				137	total_pkts += tx_buf->gso_segs;
				138
				139	/* free the skb */
				140	napi_consume_skb(tx_buf->skb, napi_budget);
				141
				142	/* unmap skb header data */
				143	dma_unmap_single(tx_ring->dev,
				144	dma_unmap_addr(tx_buf, dma),
				145	dma_unmap_len(tx_buf, len),
				146	DMA_TO_DEVICE);
				147
				148	/* clear tx_buf data */
				149	tx_buf->skb = NULL;
				150	dma_unmap_len_set(tx_buf, len, 0);
				151
				152	/* unmap remaining buffers */
				153	while (tx_desc != eop_desc) {
				154	tx_buf++;
				155	tx_desc++;
				156	i++;
				157	if (unlikely(!i)) {
				158	i -= tx_ring->count;
				159	tx_buf = tx_ring->tx_buf;
				160	tx_desc = ICE_TX_DESC(tx_ring, 0);
				161	}
				162
				163	/* unmap any remaining paged data */
				164	if (dma_unmap_len(tx_buf, len)) {
				165	dma_unmap_page(tx_ring->dev,
				166	dma_unmap_addr(tx_buf, dma),
				167	dma_unmap_len(tx_buf, len),
				168	DMA_TO_DEVICE);
				169	dma_unmap_len_set(tx_buf, len, 0);
				170	}
				171	}
				172
				173	/* move us one more past the eop_desc for start of next pkt */
				174	tx_buf++;
				175	tx_desc++;
				176	i++;
				177	if (unlikely(!i)) {
				178	i -= tx_ring->count;
				179	tx_buf = tx_ring->tx_buf;
				180	tx_desc = ICE_TX_DESC(tx_ring, 0);
				181	}
				182
				183	prefetch(tx_desc);
				184
				185	/* update budget accounting */
				186	budget--;
				187	} while (likely(budget));
				188
				189	i += tx_ring->count;
				190	tx_ring->next_to_clean = i;
				191	u64_stats_update_begin(&tx_ring->syncp);
				192	tx_ring->stats.bytes += total_bytes;
				193	tx_ring->stats.pkts += total_pkts;
				194	u64_stats_update_end(&tx_ring->syncp);
				195	tx_ring->q_vector->tx.total_bytes += total_bytes;
				196	tx_ring->q_vector->tx.total_pkts += total_pkts;
				197
				198	netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts,
				199	total_bytes);
				200
				201	#define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
				202	if (unlikely(total_pkts && netif_carrier_ok(tx_ring->netdev) &&
				203	(ICE_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
				204	/* Make sure that anybody stopping the queue after this
				205	* sees the new next_to_clean.
				206	*/
				207	smp_mb();
				208	if (__netif_subqueue_stopped(tx_ring->netdev,
				209	tx_ring->q_index) &&
				210	!test_bit(__ICE_DOWN, vsi->state)) {
				211	netif_wake_subqueue(tx_ring->netdev,
				212	tx_ring->q_index);
				213	++tx_ring->tx_stats.restart_q;
				214	}
				215	}
				216
				217	return !!budget;
				218	}
				219
				220	/**
Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	221	* ice_setup_tx_ring - Allocate the Tx descriptors
				222	* @tx_ring: the tx ring to set up
				223	*
				224	* Return 0 on success, negative on error
				225	*/
				226	int ice_setup_tx_ring(struct ice_ring *tx_ring)
				227	{
				228	struct device *dev = tx_ring->dev;
				229	int bi_size;
				230
				231	if (!dev)
				232	return -ENOMEM;
				233
				234	/* warn if we are about to overwrite the pointer */
				235	WARN_ON(tx_ring->tx_buf);
				236	bi_size = sizeof(struct ice_tx_buf) * tx_ring->count;
				237	tx_ring->tx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
				238	if (!tx_ring->tx_buf)
				239	return -ENOMEM;
				240
				241	/* round up to nearest 4K */
				242	tx_ring->size = tx_ring->count * sizeof(struct ice_tx_desc);
				243	tx_ring->size = ALIGN(tx_ring->size, 4096);
				244	tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma,
				245	GFP_KERNEL);
				246	if (!tx_ring->desc) {
				247	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
				248	tx_ring->size);
				249	goto err;
				250	}
				251
				252	tx_ring->next_to_use = 0;
				253	tx_ring->next_to_clean = 0;
				254	return 0;
				255
				256	err:
				257	devm_kfree(dev, tx_ring->tx_buf);
				258	tx_ring->tx_buf = NULL;
				259	return -ENOMEM;
				260	}
				261
				262	/**
				263	* ice_clean_rx_ring - Free Rx buffers
				264	* @rx_ring: ring to be cleaned
				265	*/
				266	void ice_clean_rx_ring(struct ice_ring *rx_ring)
				267	{
				268	struct device *dev = rx_ring->dev;
				269	unsigned long size;
				270	u16 i;
				271
				272	/* ring already cleared, nothing to do */
				273	if (!rx_ring->rx_buf)
				274	return;
				275
				276	/* Free all the Rx ring sk_buffs */
				277	for (i = 0; i < rx_ring->count; i++) {
				278	struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
				279
				280	if (rx_buf->skb) {
				281	dev_kfree_skb(rx_buf->skb);
				282	rx_buf->skb = NULL;
				283	}
				284	if (!rx_buf->page)
				285	continue;
				286
				287	dma_unmap_page(dev, rx_buf->dma, PAGE_SIZE, DMA_FROM_DEVICE);
				288	__free_pages(rx_buf->page, 0);
				289
				290	rx_buf->page = NULL;
				291	rx_buf->page_offset = 0;
				292	}
				293
				294	size = sizeof(struct ice_rx_buf) * rx_ring->count;
				295	memset(rx_ring->rx_buf, 0, size);
				296
				297	/* Zero out the descriptor ring */
				298	memset(rx_ring->desc, 0, rx_ring->size);
				299
				300	rx_ring->next_to_alloc = 0;
				301	rx_ring->next_to_clean = 0;
				302	rx_ring->next_to_use = 0;
				303	}
				304
				305	/**
				306	* ice_free_rx_ring - Free Rx resources
				307	* @rx_ring: ring to clean the resources from
				308	*
				309	* Free all receive software resources
				310	*/
				311	void ice_free_rx_ring(struct ice_ring *rx_ring)
				312	{
				313	ice_clean_rx_ring(rx_ring);
				314	devm_kfree(rx_ring->dev, rx_ring->rx_buf);
				315	rx_ring->rx_buf = NULL;
				316
				317	if (rx_ring->desc) {
				318	dmam_free_coherent(rx_ring->dev, rx_ring->size,
				319	rx_ring->desc, rx_ring->dma);
				320	rx_ring->desc = NULL;
				321	}
				322	}
				323
				324	/**
				325	* ice_setup_rx_ring - Allocate the Rx descriptors
				326	* @rx_ring: the rx ring to set up
				327	*
				328	* Return 0 on success, negative on error
				329	*/
				330	int ice_setup_rx_ring(struct ice_ring *rx_ring)
				331	{
				332	struct device *dev = rx_ring->dev;
				333	int bi_size;
				334
				335	if (!dev)
				336	return -ENOMEM;
				337
				338	/* warn if we are about to overwrite the pointer */
				339	WARN_ON(rx_ring->rx_buf);
				340	bi_size = sizeof(struct ice_rx_buf) * rx_ring->count;
				341	rx_ring->rx_buf = devm_kzalloc(dev, bi_size, GFP_KERNEL);
				342	if (!rx_ring->rx_buf)
				343	return -ENOMEM;
				344
				345	/* round up to nearest 4K */
				346	rx_ring->size = rx_ring->count * sizeof(union ice_32byte_rx_desc);
				347	rx_ring->size = ALIGN(rx_ring->size, 4096);
				348	rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma,
				349	GFP_KERNEL);
				350	if (!rx_ring->desc) {
				351	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
				352	rx_ring->size);
				353	goto err;
				354	}
				355
				356	rx_ring->next_to_use = 0;
				357	rx_ring->next_to_clean = 0;
				358	return 0;
				359
				360	err:
				361	devm_kfree(dev, rx_ring->rx_buf);
				362	rx_ring->rx_buf = NULL;
				363	return -ENOMEM;
				364	}
				365
				366	/**
				367	* ice_release_rx_desc - Store the new tail and head values
				368	* @rx_ring: ring to bump
				369	* @val: new head index
				370	*/
				371	static void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
				372	{
				373	rx_ring->next_to_use = val;
				374
				375	/* update next to alloc since we have filled the ring */
				376	rx_ring->next_to_alloc = val;
				377
				378	/* Force memory writes to complete before letting h/w
				379	* know there are new descriptors to fetch. (Only
				380	* applicable for weak-ordered memory model archs,
				381	* such as IA-64).
				382	*/
				383	wmb();
				384	writel(val, rx_ring->tail);
				385	}
				386
				387	/**
				388	* ice_alloc_mapped_page - recycle or make a new page
				389	* @rx_ring: ring to use
				390	* @bi: rx_buf struct to modify
				391	*
				392	* Returns true if the page was successfully allocated or
				393	* reused.
				394	*/
				395	static bool ice_alloc_mapped_page(struct ice_ring *rx_ring,
				396	struct ice_rx_buf *bi)
				397	{
				398	struct page *page = bi->page;
				399	dma_addr_t dma;
				400
				401	/* since we are recycling buffers we should seldom need to alloc */
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	402	if (likely(page)) {
				403	rx_ring->rx_stats.page_reuse_count++;
Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	404	return true;
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	405	}
Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	406
				407	/* alloc new page for storage */
				408	page = alloc_page(GFP_ATOMIC \| __GFP_NOWARN);
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	409	if (unlikely(!page)) {
				410	rx_ring->rx_stats.alloc_page_failed++;
Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	411	return false;
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	412	}
Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	413
				414	/* map page for use */
				415	dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE);
				416
				417	/* if mapping failed free memory back to system since
				418	* there isn't much point in holding memory we can't use
				419	*/
				420	if (dma_mapping_error(rx_ring->dev, dma)) {
				421	__free_pages(page, 0);
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	422	rx_ring->rx_stats.alloc_page_failed++;
Anirudh Venkataramanan	cdedef5	2018-03-20 07:58:13 -0700	[diff] [blame]	423	return false;
				424	}
				425
				426	bi->dma = dma;
				427	bi->page = page;
				428	bi->page_offset = 0;
				429
				430	return true;
				431	}
				432
				433	/**
				434	* ice_alloc_rx_bufs - Replace used receive buffers
				435	* @rx_ring: ring to place buffers on
				436	* @cleaned_count: number of buffers to replace
				437	*
				438	* Returns false if all allocations were successful, true if any fail
				439	*/
				440	bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
				441	{
				442	union ice_32b_rx_flex_desc *rx_desc;
				443	u16 ntu = rx_ring->next_to_use;
				444	struct ice_rx_buf *bi;
				445
				446	/* do nothing if no valid netdev defined */
				447	if (!rx_ring->netdev \|\| !cleaned_count)
				448	return false;
				449
				450	/* get the RX descriptor and buffer based on next_to_use */
				451	rx_desc = ICE_RX_DESC(rx_ring, ntu);
				452	bi = &rx_ring->rx_buf[ntu];
				453
				454	do {
				455	if (!ice_alloc_mapped_page(rx_ring, bi))
				456	goto no_bufs;
				457
				458	/* Refresh the desc even if buffer_addrs didn't change
				459	* because each write-back erases this info.
				460	*/
				461	rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
				462
				463	rx_desc++;
				464	bi++;
				465	ntu++;
				466	if (unlikely(ntu == rx_ring->count)) {
				467	rx_desc = ICE_RX_DESC(rx_ring, 0);
				468	bi = rx_ring->rx_buf;
				469	ntu = 0;
				470	}
				471
				472	/* clear the status bits for the next_to_use descriptor */
				473	rx_desc->wb.status_error0 = 0;
				474
				475	cleaned_count--;
				476	} while (cleaned_count);
				477
				478	if (rx_ring->next_to_use != ntu)
				479	ice_release_rx_desc(rx_ring, ntu);
				480
				481	return false;
				482
				483	no_bufs:
				484	if (rx_ring->next_to_use != ntu)
				485	ice_release_rx_desc(rx_ring, ntu);
				486
				487	/* make sure to come back via polling to try again after
				488	* allocation failure
				489	*/
				490	return true;
				491	}
Anirudh Venkataramanan	2b245cb	2018-03-20 07:58:14 -0700	[diff] [blame^]	492
				493	/**
				494	* ice_page_is_reserved - check if reuse is possible
				495	* @page: page struct to check
				496	*/
				497	static bool ice_page_is_reserved(struct page *page)
				498	{
				499	return (page_to_nid(page) != numa_mem_id()) \|\| page_is_pfmemalloc(page);
				500	}
				501
				502	/**
				503	* ice_add_rx_frag - Add contents of Rx buffer to sk_buff
				504	* @rx_buf: buffer containing page to add
				505	* @rx_desc: descriptor containing length of buffer written by hardware
				506	* @skb: sk_buf to place the data into
				507	*
				508	* This function will add the data contained in rx_buf->page to the skb.
				509	* This is done either through a direct copy if the data in the buffer is
				510	* less than the skb header size, otherwise it will just attach the page as
				511	* a frag to the skb.
				512	*
				513	* The function will then update the page offset if necessary and return
				514	* true if the buffer can be reused by the adapter.
				515	*/
				516	static bool ice_add_rx_frag(struct ice_rx_buf *rx_buf,
				517	union ice_32b_rx_flex_desc *rx_desc,
				518	struct sk_buff *skb)
				519	{
				520	#if (PAGE_SIZE < 8192)
				521	unsigned int truesize = ICE_RXBUF_2048;
				522	#else
				523	unsigned int last_offset = PAGE_SIZE - ICE_RXBUF_2048;
				524	unsigned int truesize;
				525	#endif /* PAGE_SIZE < 8192) */
				526
				527	struct page *page;
				528	unsigned int size;
				529
				530	size = le16_to_cpu(rx_desc->wb.pkt_len) &
				531	ICE_RX_FLX_DESC_PKT_LEN_M;
				532
				533	page = rx_buf->page;
				534
				535	#if (PAGE_SIZE >= 8192)
				536	truesize = ALIGN(size, L1_CACHE_BYTES);
				537	#endif /* PAGE_SIZE >= 8192) */
				538
				539	/* will the data fit in the skb we allocated? if so, just
				540	* copy it as it is pretty small anyway
				541	*/
				542	if (size <= ICE_RX_HDR_SIZE && !skb_is_nonlinear(skb)) {
				543	unsigned char *va = page_address(page) + rx_buf->page_offset;
				544
				545	memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
				546
				547	/* page is not reserved, we can reuse buffer as-is */
				548	if (likely(!ice_page_is_reserved(page)))
				549	return true;
				550
				551	/* this page cannot be reused so discard it */
				552	__free_pages(page, 0);
				553	return false;
				554	}
				555
				556	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
				557	rx_buf->page_offset, size, truesize);
				558
				559	/* avoid re-using remote pages */
				560	if (unlikely(ice_page_is_reserved(page)))
				561	return false;
				562
				563	#if (PAGE_SIZE < 8192)
				564	/* if we are only owner of page we can reuse it */
				565	if (unlikely(page_count(page) != 1))
				566	return false;
				567
				568	/* flip page offset to other buffer */
				569	rx_buf->page_offset ^= truesize;
				570	#else
				571	/* move offset up to the next cache line */
				572	rx_buf->page_offset += truesize;
				573
				574	if (rx_buf->page_offset > last_offset)
				575	return false;
				576	#endif /* PAGE_SIZE < 8192) */
				577
				578	/* Even if we own the page, we are not allowed to use atomic_set()
				579	* This would break get_page_unless_zero() users.
				580	*/
				581	get_page(rx_buf->page);
				582
				583	return true;
				584	}
				585
				586	/**
				587	* ice_reuse_rx_page - page flip buffer and store it back on the ring
				588	* @rx_ring: rx descriptor ring to store buffers on
				589	* @old_buf: donor buffer to have page reused
				590	*
				591	* Synchronizes page for reuse by the adapter
				592	*/
				593	static void ice_reuse_rx_page(struct ice_ring *rx_ring,
				594	struct ice_rx_buf *old_buf)
				595	{
				596	u16 nta = rx_ring->next_to_alloc;
				597	struct ice_rx_buf *new_buf;
				598
				599	new_buf = &rx_ring->rx_buf[nta];
				600
				601	/* update, and store next to alloc */
				602	nta++;
				603	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
				604
				605	/* transfer page from old buffer to new buffer */
				606	new_buf = old_buf;
				607	}
				608
				609	/**
				610	* ice_fetch_rx_buf - Allocate skb and populate it
				611	* @rx_ring: rx descriptor ring to transact packets on
				612	* @rx_desc: descriptor containing info written by hardware
				613	*
				614	* This function allocates an skb on the fly, and populates it with the page
				615	* data from the current receive descriptor, taking care to set up the skb
				616	* correctly, as well as handling calling the page recycle function if
				617	* necessary.
				618	*/
				619	static struct sk_buff ice_fetch_rx_buf(struct ice_ring rx_ring,
				620	union ice_32b_rx_flex_desc *rx_desc)
				621	{
				622	struct ice_rx_buf *rx_buf;
				623	struct sk_buff *skb;
				624	struct page *page;
				625
				626	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
				627	page = rx_buf->page;
				628	prefetchw(page);
				629
				630	skb = rx_buf->skb;
				631
				632	if (likely(!skb)) {
				633	u8 *page_addr = page_address(page) + rx_buf->page_offset;
				634
				635	/* prefetch first cache line of first page */
				636	prefetch(page_addr);
				637	#if L1_CACHE_BYTES < 128
				638	prefetch((void *)(page_addr + L1_CACHE_BYTES));
				639	#endif /* L1_CACHE_BYTES */
				640
				641	/* allocate a skb to store the frags */
				642	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
				643	ICE_RX_HDR_SIZE,
				644	GFP_ATOMIC \| __GFP_NOWARN);
				645	if (unlikely(!skb)) {
				646	rx_ring->rx_stats.alloc_buf_failed++;
				647	return NULL;
				648	}
				649
				650	/* we will be copying header into skb->data in
				651	* pskb_may_pull so it is in our interest to prefetch
				652	* it now to avoid a possible cache miss
				653	*/
				654	prefetchw(skb->data);
				655
				656	skb_record_rx_queue(skb, rx_ring->q_index);
				657	} else {
				658	/* we are reusing so sync this buffer for CPU use */
				659	dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma,
				660	rx_buf->page_offset,
				661	ICE_RXBUF_2048,
				662	DMA_FROM_DEVICE);
				663
				664	rx_buf->skb = NULL;
				665	}
				666
				667	/* pull page into skb */
				668	if (ice_add_rx_frag(rx_buf, rx_desc, skb)) {
				669	/* hand second half of page back to the ring */
				670	ice_reuse_rx_page(rx_ring, rx_buf);
				671	rx_ring->rx_stats.page_reuse_count++;
				672	} else {
				673	/* we are not reusing the buffer so unmap it */
				674	dma_unmap_page(rx_ring->dev, rx_buf->dma, PAGE_SIZE,
				675	DMA_FROM_DEVICE);
				676	}
				677
				678	/* clear contents of buffer_info */
				679	rx_buf->page = NULL;
				680
				681	return skb;
				682	}
				683
				684	/**
				685	* ice_pull_tail - ice specific version of skb_pull_tail
				686	* @skb: pointer to current skb being adjusted
				687	*
				688	* This function is an ice specific version of __pskb_pull_tail. The
				689	* main difference between this version and the original function is that
				690	* this function can make several assumptions about the state of things
				691	* that allow for significant optimizations versus the standard function.
				692	* As a result we can do things like drop a frag and maintain an accurate
				693	* truesize for the skb.
				694	*/
				695	static void ice_pull_tail(struct sk_buff *skb)
				696	{
				697	struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
				698	unsigned int pull_len;
				699	unsigned char *va;
				700
				701	/* it is valid to use page_address instead of kmap since we are
				702	* working with pages allocated out of the lomem pool per
				703	* alloc_page(GFP_ATOMIC)
				704	*/
				705	va = skb_frag_address(frag);
				706
				707	/* we need the header to contain the greater of either ETH_HLEN or
				708	* 60 bytes if the skb->len is less than 60 for skb_pad.
				709	*/
				710	pull_len = eth_get_headlen(va, ICE_RX_HDR_SIZE);
				711
				712	/* align pull length to size of long to optimize memcpy performance */
				713	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
				714
				715	/* update all of the pointers */
				716	skb_frag_size_sub(frag, pull_len);
				717	frag->page_offset += pull_len;
				718	skb->data_len -= pull_len;
				719	skb->tail += pull_len;
				720	}
				721
				722	/**
				723	* ice_cleanup_headers - Correct empty headers
				724	* @skb: pointer to current skb being fixed
				725	*
				726	* Also address the case where we are pulling data in on pages only
				727	* and as such no data is present in the skb header.
				728	*
				729	* In addition if skb is not at least 60 bytes we need to pad it so that
				730	* it is large enough to qualify as a valid Ethernet frame.
				731	*
				732	* Returns true if an error was encountered and skb was freed.
				733	*/
				734	static bool ice_cleanup_headers(struct sk_buff *skb)
				735	{
				736	/* place header in linear portion of buffer */
				737	if (skb_is_nonlinear(skb))
				738	ice_pull_tail(skb);
				739
				740	/* if eth_skb_pad returns an error the skb was freed */
				741	if (eth_skb_pad(skb))
				742	return true;
				743
				744	return false;
				745	}
				746
				747	/**
				748	* ice_test_staterr - tests bits in Rx descriptor status and error fields
				749	* @rx_desc: pointer to receive descriptor (in le64 format)
				750	* @stat_err_bits: value to mask
				751	*
				752	* This function does some fast chicanery in order to return the
				753	* value of the mask which is really only used for boolean tests.
				754	* The status_error_len doesn't need to be shifted because it begins
				755	* at offset zero.
				756	*/
				757	static bool ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc,
				758	const u16 stat_err_bits)
				759	{
				760	return !!(rx_desc->wb.status_error0 &
				761	cpu_to_le16(stat_err_bits));
				762	}
				763
				764	/**
				765	* ice_is_non_eop - process handling of non-EOP buffers
				766	* @rx_ring: Rx ring being processed
				767	* @rx_desc: Rx descriptor for current buffer
				768	* @skb: Current socket buffer containing buffer in progress
				769	*
				770	* This function updates next to clean. If the buffer is an EOP buffer
				771	* this function exits returning false, otherwise it will place the
				772	* sk_buff in the next buffer to be chained and return true indicating
				773	* that this is in fact a non-EOP buffer.
				774	*/
				775	static bool ice_is_non_eop(struct ice_ring *rx_ring,
				776	union ice_32b_rx_flex_desc *rx_desc,
				777	struct sk_buff *skb)
				778	{
				779	u32 ntc = rx_ring->next_to_clean + 1;
				780
				781	/* fetch, update, and store next to clean */
				782	ntc = (ntc < rx_ring->count) ? ntc : 0;
				783	rx_ring->next_to_clean = ntc;
				784
				785	prefetch(ICE_RX_DESC(rx_ring, ntc));
				786
				787	/* if we are the last buffer then there is nothing else to do */
				788	#define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)
				789	if (likely(ice_test_staterr(rx_desc, ICE_RXD_EOF)))
				790	return false;
				791
				792	/* place skb in next buffer to be received */
				793	rx_ring->rx_buf[ntc].skb = skb;
				794	rx_ring->rx_stats.non_eop_descs++;
				795
				796	return true;
				797	}
				798
				799	/**
				800	* ice_receive_skb - Send a completed packet up the stack
				801	* @rx_ring: rx ring in play
				802	* @skb: packet to send up
				803	* @vlan_tag: vlan tag for packet
				804	*
				805	* This function sends the completed packet (via. skb) up the stack using
				806	* gro receive functions (with/without vlan tag)
				807	*/
				808	static void ice_receive_skb(struct ice_ring rx_ring, struct sk_buff skb,
				809	u16 vlan_tag)
				810	{
				811	if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
				812	(vlan_tag & VLAN_VID_MASK)) {
				813	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
				814	}
				815	napi_gro_receive(&rx_ring->q_vector->napi, skb);
				816	}
				817
				818	/**
				819	* ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
				820	* @rx_ring: rx descriptor ring to transact packets on
				821	* @budget: Total limit on number of packets to process
				822	*
				823	* This function provides a "bounce buffer" approach to Rx interrupt
				824	* processing. The advantage to this is that on systems that have
				825	* expensive overhead for IOMMU access this provides a means of avoiding
				826	* it by maintaining the mapping of the page to the system.
				827	*
				828	* Returns amount of work completed
				829	*/
				830	static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
				831	{
				832	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
				833	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
				834	bool failure = false;
				835
				836	/* start the loop to process RX packets bounded by 'budget' */
				837	while (likely(total_rx_pkts < (unsigned int)budget)) {
				838	union ice_32b_rx_flex_desc *rx_desc;
				839	struct sk_buff *skb;
				840	u16 stat_err_bits;
				841	u16 vlan_tag = 0;
				842
				843	/* return some buffers to hardware, one at a time is too slow */
				844	if (cleaned_count >= ICE_RX_BUF_WRITE) {
				845	failure = failure \|\|
				846	ice_alloc_rx_bufs(rx_ring, cleaned_count);
				847	cleaned_count = 0;
				848	}
				849
				850	/* get the RX desc from RX ring based on 'next_to_clean' */
				851	rx_desc = ICE_RX_DESC(rx_ring, rx_ring->next_to_clean);
				852
				853	/* status_error_len will always be zero for unused descriptors
				854	* because it's cleared in cleanup, and overlaps with hdr_addr
				855	* which is always zero because packet split isn't used, if the
				856	* hardware wrote DD then it will be non-zero
				857	*/
				858	stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S);
				859	if (!ice_test_staterr(rx_desc, stat_err_bits))
				860	break;
				861
				862	/* This memory barrier is needed to keep us from reading
				863	* any other fields out of the rx_desc until we know the
				864	* DD bit is set.
				865	*/
				866	dma_rmb();
				867
				868	/* allocate (if needed) and populate skb */
				869	skb = ice_fetch_rx_buf(rx_ring, rx_desc);
				870	if (!skb)
				871	break;
				872
				873	cleaned_count++;
				874
				875	/* skip if it is NOP desc */
				876	if (ice_is_non_eop(rx_ring, rx_desc, skb))
				877	continue;
				878
				879	stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S);
				880	if (unlikely(ice_test_staterr(rx_desc, stat_err_bits))) {
				881	dev_kfree_skb_any(skb);
				882	continue;
				883	}
				884
				885	stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_L2TAG1P_S);
				886	if (ice_test_staterr(rx_desc, stat_err_bits))
				887	vlan_tag = le16_to_cpu(rx_desc->wb.l2tag1);
				888
				889	/* correct empty headers and pad skb if needed (to make valid
				890	* ethernet frame
				891	*/
				892	if (ice_cleanup_headers(skb)) {
				893	skb = NULL;
				894	continue;
				895	}
				896
				897	/* probably a little skewed due to removing CRC */
				898	total_rx_bytes += skb->len;
				899
				900	/* send completed skb up the stack */
				901	ice_receive_skb(rx_ring, skb, vlan_tag);
				902
				903	/* update budget accounting */
				904	total_rx_pkts++;
				905	}
				906
				907	/* update queue and vector specific stats */
				908	u64_stats_update_begin(&rx_ring->syncp);
				909	rx_ring->stats.pkts += total_rx_pkts;
				910	rx_ring->stats.bytes += total_rx_bytes;
				911	u64_stats_update_end(&rx_ring->syncp);
				912	rx_ring->q_vector->rx.total_pkts += total_rx_pkts;
				913	rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
				914
				915	/* guarantee a trip back through this routine if there was a failure */
				916	return failure ? budget : (int)total_rx_pkts;
				917	}
				918
				919	/**
				920	* ice_napi_poll - NAPI polling Rx/Tx cleanup routine
				921	* @napi: napi struct with our devices info in it
				922	* @budget: amount of work driver is allowed to do this pass, in packets
				923	*
				924	* This function will clean all queues associated with a q_vector.
				925	*
				926	* Returns the amount of work done
				927	*/
				928	int ice_napi_poll(struct napi_struct *napi, int budget)
				929	{
				930	struct ice_q_vector *q_vector =
				931	container_of(napi, struct ice_q_vector, napi);
				932	struct ice_vsi *vsi = q_vector->vsi;
				933	struct ice_pf *pf = vsi->back;
				934	bool clean_complete = true;
				935	int budget_per_ring = 0;
				936	struct ice_ring *ring;
				937	int work_done = 0;
				938
				939	/* Since the actual Tx work is minimal, we can give the Tx a larger
				940	* budget and be more aggressive about cleaning up the Tx descriptors.
				941	*/
				942	ice_for_each_ring(ring, q_vector->tx)
				943	if (!ice_clean_tx_irq(vsi, ring, budget))
				944	clean_complete = false;
				945
				946	/* Handle case where we are called by netpoll with a budget of 0 */
				947	if (budget <= 0)
				948	return budget;
				949
				950	/* We attempt to distribute budget to each Rx queue fairly, but don't
				951	* allow the budget to go below 1 because that would exit polling early.
				952	*/
				953	if (q_vector->num_ring_rx)
				954	budget_per_ring = max(budget / q_vector->num_ring_rx, 1);
				955
				956	ice_for_each_ring(ring, q_vector->rx) {
				957	int cleaned;
				958
				959	cleaned = ice_clean_rx_irq(ring, budget_per_ring);
				960	work_done += cleaned;
				961	/* if we clean as many as budgeted, we must not be done */
				962	if (cleaned >= budget_per_ring)
				963	clean_complete = false;
				964	}
				965
				966	/* If work not completed, return budget and polling will return */
				967	if (!clean_complete)
				968	return budget;
				969
				970	/* Work is done so exit the polling mode and re-enable the interrupt */
				971	napi_complete_done(napi, work_done);
				972	if (test_bit(ICE_FLAG_MSIX_ENA, pf->flags))
				973	ice_irq_dynamic_ena(&vsi->back->hw, vsi, q_vector);
				974	return 0;
				975	}
				976
				977	/* helper function for building cmd/type/offset */
				978	static __le64
				979	build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
				980	{
				981	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA \|
				982	(td_cmd << ICE_TXD_QW1_CMD_S) \|
				983	(td_offset << ICE_TXD_QW1_OFFSET_S) \|
				984	((u64)size << ICE_TXD_QW1_TX_BUF_SZ_S) \|
				985	(td_tag << ICE_TXD_QW1_L2TAG1_S));
				986	}
				987
				988	/**
				989	* __ice_maybe_stop_tx - 2nd level check for tx stop conditions
				990	* @tx_ring: the ring to be checked
				991	* @size: the size buffer we want to assure is available
				992	*
				993	* Returns -EBUSY if a stop is needed, else 0
				994	*/
				995	static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
				996	{
				997	netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index);
				998	/* Memory barrier before checking head and tail */
				999	smp_mb();
				1000
				1001	/* Check again in a case another CPU has just made room available. */
				1002	if (likely(ICE_DESC_UNUSED(tx_ring) < size))
				1003	return -EBUSY;
				1004
				1005	/* A reprieve! - use start_subqueue because it doesn't call schedule */
				1006	netif_start_subqueue(tx_ring->netdev, tx_ring->q_index);
				1007	++tx_ring->tx_stats.restart_q;
				1008	return 0;
				1009	}
				1010
				1011	/**
				1012	* ice_maybe_stop_tx - 1st level check for tx stop conditions
				1013	* @tx_ring: the ring to be checked
				1014	* @size: the size buffer we want to assure is available
				1015	*
				1016	* Returns 0 if stop is not needed
				1017	*/
				1018	static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size)
				1019	{
				1020	if (likely(ICE_DESC_UNUSED(tx_ring) >= size))
				1021	return 0;
				1022	return __ice_maybe_stop_tx(tx_ring, size);
				1023	}
				1024
				1025	/**
				1026	* ice_tx_map - Build the Tx descriptor
				1027	* @tx_ring: ring to send buffer on
				1028	* @first: first buffer info buffer to use
				1029	*
				1030	* This function loops over the skb data pointed to by *first
				1031	* and gets a physical address for each memory location and programs
				1032	* it and the length into the transmit descriptor.
				1033	*/
				1034	static void ice_tx_map(struct ice_ring tx_ring, struct ice_tx_buf first)
				1035	{
				1036	u64 td_offset = 0, td_tag = 0, td_cmd = 0;
				1037	u16 i = tx_ring->next_to_use;
				1038	struct skb_frag_struct *frag;
				1039	unsigned int data_len, size;
				1040	struct ice_tx_desc *tx_desc;
				1041	struct ice_tx_buf *tx_buf;
				1042	struct sk_buff *skb;
				1043	dma_addr_t dma;
				1044
				1045	skb = first->skb;
				1046
				1047	data_len = skb->data_len;
				1048	size = skb_headlen(skb);
				1049
				1050	tx_desc = ICE_TX_DESC(tx_ring, i);
				1051
				1052	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
				1053
				1054	tx_buf = first;
				1055
				1056	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
				1057	unsigned int max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
				1058
				1059	if (dma_mapping_error(tx_ring->dev, dma))
				1060	goto dma_error;
				1061
				1062	/* record length, and DMA address */
				1063	dma_unmap_len_set(tx_buf, len, size);
				1064	dma_unmap_addr_set(tx_buf, dma, dma);
				1065
				1066	/* align size to end of page */
				1067	max_data += -dma & (ICE_MAX_READ_REQ_SIZE - 1);
				1068	tx_desc->buf_addr = cpu_to_le64(dma);
				1069
				1070	/* account for data chunks larger than the hardware
				1071	* can handle
				1072	*/
				1073	while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
				1074	tx_desc->cmd_type_offset_bsz =
				1075	build_ctob(td_cmd, td_offset, max_data, td_tag);
				1076
				1077	tx_desc++;
				1078	i++;
				1079
				1080	if (i == tx_ring->count) {
				1081	tx_desc = ICE_TX_DESC(tx_ring, 0);
				1082	i = 0;
				1083	}
				1084
				1085	dma += max_data;
				1086	size -= max_data;
				1087
				1088	max_data = ICE_MAX_DATA_PER_TXD_ALIGNED;
				1089	tx_desc->buf_addr = cpu_to_le64(dma);
				1090	}
				1091
				1092	if (likely(!data_len))
				1093	break;
				1094
				1095	tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
				1096	size, td_tag);
				1097
				1098	tx_desc++;
				1099	i++;
				1100
				1101	if (i == tx_ring->count) {
				1102	tx_desc = ICE_TX_DESC(tx_ring, 0);
				1103	i = 0;
				1104	}
				1105
				1106	size = skb_frag_size(frag);
				1107	data_len -= size;
				1108
				1109	dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
				1110	DMA_TO_DEVICE);
				1111
				1112	tx_buf = &tx_ring->tx_buf[i];
				1113	}
				1114
				1115	/* record bytecount for BQL */
				1116	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
				1117
				1118	/* record SW timestamp if HW timestamp is not available */
				1119	skb_tx_timestamp(first->skb);
				1120
				1121	i++;
				1122	if (i == tx_ring->count)
				1123	i = 0;
				1124
				1125	/* write last descriptor with RS and EOP bits */
				1126	td_cmd \|= (u64)(ICE_TX_DESC_CMD_EOP \| ICE_TX_DESC_CMD_RS);
				1127	tx_desc->cmd_type_offset_bsz =
				1128	build_ctob(td_cmd, td_offset, size, td_tag);
				1129
				1130	/* Force memory writes to complete before letting h/w know there
				1131	* are new descriptors to fetch.
				1132	*
				1133	* We also use this memory barrier to make certain all of the
				1134	* status bits have been updated before next_to_watch is written.
				1135	*/
				1136	wmb();
				1137
				1138	/* set next_to_watch value indicating a packet is present */
				1139	first->next_to_watch = tx_desc;
				1140
				1141	tx_ring->next_to_use = i;
				1142
				1143	ice_maybe_stop_tx(tx_ring, DESC_NEEDED);
				1144
				1145	/* notify HW of packet */
				1146	if (netif_xmit_stopped(txring_txq(tx_ring)) \|\| !skb->xmit_more) {
				1147	writel(i, tx_ring->tail);
				1148
				1149	/* we need this if more than one processor can write to our tail
				1150	* at a time, it synchronizes IO on IA64/Altix systems
				1151	*/
				1152	mmiowb();
				1153	}
				1154
				1155	return;
				1156
				1157	dma_error:
				1158	/* clear dma mappings for failed tx_buf map */
				1159	for (;;) {
				1160	tx_buf = &tx_ring->tx_buf[i];
				1161	ice_unmap_and_free_tx_buf(tx_ring, tx_buf);
				1162	if (tx_buf == first)
				1163	break;
				1164	if (i == 0)
				1165	i = tx_ring->count;
				1166	i--;
				1167	}
				1168
				1169	tx_ring->next_to_use = i;
				1170	}
				1171
				1172	/**
				1173	* ice_txd_use_count - estimate the number of descriptors needed for Tx
				1174	* @size: transmit request size in bytes
				1175	*
				1176	* Due to hardware alignment restrictions (4K alignment), we need to
				1177	* assume that we can have no more than 12K of data per descriptor, even
				1178	* though each descriptor can take up to 16K - 1 bytes of aligned memory.
				1179	* Thus, we need to divide by 12K. But division is slow! Instead,
				1180	* we decompose the operation into shifts and one relatively cheap
				1181	* multiply operation.
				1182	*
				1183	* To divide by 12K, we first divide by 4K, then divide by 3:
				1184	* To divide by 4K, shift right by 12 bits
				1185	* To divide by 3, multiply by 85, then divide by 256
				1186	* (Divide by 256 is done by shifting right by 8 bits)
				1187	* Finally, we add one to round up. Because 256 isn't an exact multiple of
				1188	* 3, we'll underestimate near each multiple of 12K. This is actually more
				1189	* accurate as we have 4K - 1 of wiggle room that we can fit into the last
				1190	* segment. For our purposes this is accurate out to 1M which is orders of
				1191	* magnitude greater than our largest possible GSO size.
				1192	*
				1193	* This would then be implemented as:
				1194	* return (((size >> 12) * 85) >> 8) + 1;
				1195	*
				1196	* Since multiplication and division are commutative, we can reorder
				1197	* operations into:
				1198	* return ((size * 85) >> 20) + 1;
				1199	*/
				1200	static unsigned int ice_txd_use_count(unsigned int size)
				1201	{
				1202	return ((size * 85) >> 20) + 1;
				1203	}
				1204
				1205	/**
				1206	* ice_xmit_desc_count - calculate number of tx descriptors needed
				1207	* @skb: send buffer
				1208	*
				1209	* Returns number of data descriptors needed for this skb.
				1210	*/
				1211	static unsigned int ice_xmit_desc_count(struct sk_buff *skb)
				1212	{
				1213	const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
				1214	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
				1215	unsigned int count = 0, size = skb_headlen(skb);
				1216
				1217	for (;;) {
				1218	count += ice_txd_use_count(size);
				1219
				1220	if (!nr_frags--)
				1221	break;
				1222
				1223	size = skb_frag_size(frag++);
				1224	}
				1225
				1226	return count;
				1227	}
				1228
				1229	/**
				1230	* __ice_chk_linearize - Check if there are more than 8 buffers per packet
				1231	* @skb: send buffer
				1232	*
				1233	* Note: This HW can't DMA more than 8 buffers to build a packet on the wire
				1234	* and so we need to figure out the cases where we need to linearize the skb.
				1235	*
				1236	* For TSO we need to count the TSO header and segment payload separately.
				1237	* As such we need to check cases where we have 7 fragments or more as we
				1238	* can potentially require 9 DMA transactions, 1 for the TSO header, 1 for
				1239	* the segment payload in the first descriptor, and another 7 for the
				1240	* fragments.
				1241	*/
				1242	static bool __ice_chk_linearize(struct sk_buff *skb)
				1243	{
				1244	const struct skb_frag_struct frag, stale;
				1245	int nr_frags, sum;
				1246
				1247	/* no need to check if number of frags is less than 7 */
				1248	nr_frags = skb_shinfo(skb)->nr_frags;
				1249	if (nr_frags < (ICE_MAX_BUF_TXD - 1))
				1250	return false;
				1251
				1252	/* We need to walk through the list and validate that each group
				1253	* of 6 fragments totals at least gso_size.
				1254	*/
				1255	nr_frags -= ICE_MAX_BUF_TXD - 2;
				1256	frag = &skb_shinfo(skb)->frags[0];
				1257
				1258	/* Initialize size to the negative value of gso_size minus 1. We
				1259	* use this as the worst case scenerio in which the frag ahead
				1260	* of us only provides one byte which is why we are limited to 6
				1261	* descriptors for a single transmit as the header and previous
				1262	* fragment are already consuming 2 descriptors.
				1263	*/
				1264	sum = 1 - skb_shinfo(skb)->gso_size;
				1265
				1266	/* Add size of frags 0 through 4 to create our initial sum */
				1267	sum += skb_frag_size(frag++);
				1268	sum += skb_frag_size(frag++);
				1269	sum += skb_frag_size(frag++);
				1270	sum += skb_frag_size(frag++);
				1271	sum += skb_frag_size(frag++);
				1272
				1273	/* Walk through fragments adding latest fragment, testing it, and
				1274	* then removing stale fragments from the sum.
				1275	*/
				1276	stale = &skb_shinfo(skb)->frags[0];
				1277	for (;;) {
				1278	sum += skb_frag_size(frag++);
				1279
				1280	/* if sum is negative we failed to make sufficient progress */
				1281	if (sum < 0)
				1282	return true;
				1283
				1284	if (!nr_frags--)
				1285	break;
				1286
				1287	sum -= skb_frag_size(stale++);
				1288	}
				1289
				1290	return false;
				1291	}
				1292
				1293	/**
				1294	* ice_chk_linearize - Check if there are more than 8 fragments per packet
				1295	* @skb: send buffer
				1296	* @count: number of buffers used
				1297	*
				1298	* Note: Our HW can't scatter-gather more than 8 fragments to build
				1299	* a packet on the wire and so we need to figure out the cases where we
				1300	* need to linearize the skb.
				1301	*/
				1302	static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count)
				1303	{
				1304	/* Both TSO and single send will work if count is less than 8 */
				1305	if (likely(count < ICE_MAX_BUF_TXD))
				1306	return false;
				1307
				1308	if (skb_is_gso(skb))
				1309	return __ice_chk_linearize(skb);
				1310
				1311	/* we can support up to 8 data buffers for a single send */
				1312	return count != ICE_MAX_BUF_TXD;
				1313	}
				1314
				1315	/**
				1316	* ice_xmit_frame_ring - Sends buffer on Tx ring
				1317	* @skb: send buffer
				1318	* @tx_ring: ring to send buffer on
				1319	*
				1320	* Returns NETDEV_TX_OK if sent, else an error code
				1321	*/
				1322	static netdev_tx_t
				1323	ice_xmit_frame_ring(struct sk_buff skb, struct ice_ring tx_ring)
				1324	{
				1325	struct ice_tx_buf *first;
				1326	unsigned int count;
				1327
				1328	count = ice_xmit_desc_count(skb);
				1329	if (ice_chk_linearize(skb, count)) {
				1330	if (__skb_linearize(skb))
				1331	goto out_drop;
				1332	count = ice_txd_use_count(skb->len);
				1333	tx_ring->tx_stats.tx_linearize++;
				1334	}
				1335
				1336	/* need: 1 descriptor per page * PAGE_SIZE/ICE_MAX_DATA_PER_TXD,
				1337	* + 1 desc for skb_head_len/ICE_MAX_DATA_PER_TXD,
				1338	* + 4 desc gap to avoid the cache line where head is,
				1339	* + 1 desc for context descriptor,
				1340	* otherwise try next time
				1341	*/
				1342	if (ice_maybe_stop_tx(tx_ring, count + 4 + 1)) {
				1343	tx_ring->tx_stats.tx_busy++;
				1344	return NETDEV_TX_BUSY;
				1345	}
				1346
				1347	/* record the location of the first descriptor for this packet */
				1348	first = &tx_ring->tx_buf[tx_ring->next_to_use];
				1349	first->skb = skb;
				1350	first->bytecount = max_t(unsigned int, skb->len, ETH_ZLEN);
				1351	first->gso_segs = 1;
				1352
				1353	ice_tx_map(tx_ring, first);
				1354	return NETDEV_TX_OK;
				1355
				1356	out_drop:
				1357	dev_kfree_skb_any(skb);
				1358	return NETDEV_TX_OK;
				1359	}
				1360
				1361	/**
				1362	* ice_start_xmit - Selects the correct VSI and Tx queue to send buffer
				1363	* @skb: send buffer
				1364	* @netdev: network interface device structure
				1365	*
				1366	* Returns NETDEV_TX_OK if sent, else an error code
				1367	*/
				1368	netdev_tx_t ice_start_xmit(struct sk_buff skb, struct net_device netdev)
				1369	{
				1370	struct ice_netdev_priv *np = netdev_priv(netdev);
				1371	struct ice_vsi *vsi = np->vsi;
				1372	struct ice_ring *tx_ring;
				1373
				1374	tx_ring = vsi->tx_rings[skb->queue_mapping];
				1375
				1376	/* hardware can't handle really short frames, hardware padding works
				1377	* beyond this point
				1378	*/
				1379	if (skb_put_padto(skb, ICE_MIN_TX_LEN))
				1380	return NETDEV_TX_OK;
				1381
				1382	return ice_xmit_frame_ring(skb, tx_ring);
				1383	}