drivers/net/intel: use napi_complete_done()

As per Eric Dumazet's previous patches:
(see commit (24d2e4a50737) - tg3: use napi_complete_done())

Quoting verbatim:
Using napi_complete_done() instead of napi_complete() allows
us to use /sys/class/net/ethX/gro_flush_timeout

GRO layer can aggregate more packets if the flush is delayed a bit,
without having to set too big coalescing parameters that impact
latencies.
</end quote>

Tested
configuration: low latency via ethtool -C ethx adaptive-rx off
				rx-usecs 10 adaptive-tx off tx-usecs 15
workload: streaming rx using netperf TCP_MAERTS

igb:
MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () port 0 AF_INET : demo
...
Interim result:  941.48 10^6bits/s over 1.000 seconds ending at 1440193171.589

Alignment      Offset         Bytes    Bytes       Recvs   Bytes    Sends
Local  Remote  Local  Remote  Xfered   Per                 Per
Recv   Send    Recv   Send             Recv (avg)          Send (avg)
    8       8      0       0 1176930056  1475.36    797726   16384.00  71905

MIGRATED TCP MAERTS TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 10.0.0.1 () port 0 AF_INET : demo
...
Interim result:  941.49 10^6bits/s over 0.997 seconds ending at 1440193142.763

Alignment      Offset         Bytes    Bytes       Recvs   Bytes    Sends
Local  Remote  Local  Remote  Xfered   Per                 Per
Recv   Send    Recv   Send             Recv (avg)          Send (avg)
    8       8      0       0 1175182320  50476.00     23282   16384.00  71816

i40e:
Hard to test because the traffic is incoming so fast (24Gb/s) that GRO
always receives 87kB, even at the highest interrupt rate.

Other drivers were only compile tested.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 7e62675..ea7b098 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -151,7 +151,7 @@
 #endif /* CONFIG_IGB_DCA */
 static int igb_poll(struct napi_struct *, int);
 static bool igb_clean_tx_irq(struct igb_q_vector *);
-static bool igb_clean_rx_irq(struct igb_q_vector *, int);
+static int igb_clean_rx_irq(struct igb_q_vector *, int);
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
@@ -6364,6 +6364,7 @@
 						     struct igb_q_vector,
 						     napi);
 	bool clean_complete = true;
+	int work_done = 0;
 
 #ifdef CONFIG_IGB_DCA
 	if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
@@ -6372,15 +6373,19 @@
 	if (q_vector->tx.ring)
 		clean_complete = igb_clean_tx_irq(q_vector);
 
-	if (q_vector->rx.ring)
-		clean_complete &= igb_clean_rx_irq(q_vector, budget);
+	if (q_vector->rx.ring) {
+		int cleaned = igb_clean_rx_irq(q_vector, budget);
+
+		work_done += cleaned;
+		clean_complete &= (cleaned < budget);
+	}
 
 	/* If all work not completed, return budget and keep polling */
 	if (!clean_complete)
 		return budget;
 
 	/* If not enough Rx work done, exit the polling mode */
-	napi_complete(napi);
+	napi_complete_done(napi, work_done);
 	igb_ring_irq_enable(q_vector);
 
 	return 0;
@@ -6904,7 +6909,7 @@
 	skb->protocol = eth_type_trans(skb, rx_ring->netdev);
 }
 
-static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
+static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
 {
 	struct igb_ring *rx_ring = q_vector->rx.ring;
 	struct sk_buff *skb = rx_ring->skb;
@@ -6978,7 +6983,7 @@
 	if (cleaned_count)
 		igb_alloc_rx_buffers(rx_ring, cleaned_count);
 
-	return total_packets < budget;
+	return total_packets;
 }
 
 static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,