qla3xxx: Kernic Panic on pSeries under stress conditions To reproduce this panic consistently, we run an intensive network application like 'netperf'. After waiting for a couple of seconds, you will see a stack trace and a kernel panic where we are calling pci_unmap_single() in ql_poll(). Changes: 1) Check the flags on the Response MAC IO Control block to check for errors 2) Ensure that if we are on the 4022 we only use one segment 3) Before, we were reading the memory mapped producer index register everytime we iterated in the loop when clearing the queue. We should only be iterating to a known point, not as the producer index is being updated. Signed-off-by: Benjamin Li <benjamin.li@qlogic.com> Signed-off-by: Ron Mercer <ron.mercer@qlogic.com> Signed-off-by: Jeff Garzik <jeff@garzik.org>

commit: e8f4df2491d07fc369e9d631e0638da03a9b3a38 [log] [tgz]
author: Benjamin Li <benjamin.li@qlogic.com> Mon Feb 26 11:06:42 2007 -0800
committer: Jeff Garzik <jeff@garzik.org> Tue Feb 27 04:21:45 2007 -0500
tree: dc4b183711b05b7717e36cf5fb2c5b915b357c45
parent: 63b66d12de57d8455615d9f619e18824137ed547 [diff] [blame]
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 5bf446f..e44e850 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c

@@ -1747,8 +1747,31 @@
 {
 	struct ql_tx_buf_cb *tx_cb;
 	int i;
+	int retval = 0;
 
+	if(mac_rsp->flags & OB_MAC_IOCB_RSP_S) {
+		printk(KERN_WARNING "Frame short but, frame was padded and sent.\n");
+	}
+	
 	tx_cb = &qdev->tx_buf[mac_rsp->transaction_id];
+
+	/*  Check the transmit response flags for any errors */
+	if(mac_rsp->flags & OB_MAC_IOCB_RSP_S) {
+		printk(KERN_ERR "Frame too short to be legal, frame not sent.\n");
+
+		qdev->stats.tx_errors++;
+		retval = -EIO;
+		goto frame_not_sent;
+	}
+
+	if(tx_cb->seg_count == 0) {
+		printk(KERN_ERR "tx_cb->seg_count == 0: %d\n", mac_rsp->transaction_id);
+
+		qdev->stats.tx_errors++;
+		retval = -EIO;
+		goto invalid_seg_count;
+	}
+
 	pci_unmap_single(qdev->pdev,
 			 pci_unmap_addr(&tx_cb->map[0], mapaddr),
 			 pci_unmap_len(&tx_cb->map[0], maplen),
@@ -1765,8 +1788,12 @@
 	}
 	qdev->stats.tx_packets++;
 	qdev->stats.tx_bytes += tx_cb->skb->len;
+
+frame_not_sent:
 	dev_kfree_skb_irq(tx_cb->skb);
 	tx_cb->skb = NULL;
+
+invalid_seg_count:
 	atomic_inc(&qdev->tx_count);
 }
 
@@ -1923,8 +1950,10 @@
 	unsigned long hw_flags;
 	int work_done = 0;
 
+	u32 rsp_producer_index = le32_to_cpu(*(qdev->prsp_producer_index));
+
 	/* While there are entries in the completion queue. */
-	while ((cpu_to_le32(*(qdev->prsp_producer_index)) !=
+	while ((rsp_producer_index !=
 		qdev->rsp_consumer_index) && (work_done < work_to_do)) {
 
 		net_rsp = qdev->rsp_current;
@@ -2004,13 +2033,6 @@
 		}
 
 		spin_unlock_irqrestore(&qdev->hw_lock, hw_flags);
-
-		if (unlikely(netif_queue_stopped(qdev->ndev))) {
-			if (netif_queue_stopped(qdev->ndev) &&
-			    (atomic_read(&qdev->tx_count) > 
-			     (NUM_REQ_Q_ENTRIES / 4)))
-				netif_wake_queue(qdev->ndev);
-		}
 	}
 
 	return *tx_cleaned + *rx_cleaned;
@@ -2031,7 +2053,8 @@
 	*budget -= rx_cleaned;
 	ndev->quota -= rx_cleaned;
 
-	if ((!tx_cleaned && !rx_cleaned) || !netif_running(ndev)) {
+	if( tx_cleaned + rx_cleaned != work_to_do ||
+	    !netif_running(ndev)) {
 quit_polling:
 		netif_rx_complete(ndev);
 
@@ -2093,8 +2116,8 @@
 		queue_delayed_work(qdev->workqueue, &qdev->reset_work, 0);
 		spin_unlock(&qdev->adapter_lock);
 	} else if (value & ISP_IMR_DISABLE_CMPL_INT) {
+		ql_disable_interrupts(qdev);
 		if (likely(netif_rx_schedule_prep(ndev))) {
-			ql_disable_interrupts(qdev);
 			__netif_rx_schedule(ndev);
 		}
 	} else {
@@ -2113,8 +2136,12 @@
  * the next AOL if more frags are coming.  
  * That is why the frags:segment count  ratio is not linear.
  */
-static int ql_get_seg_count(unsigned short frags)
+static int ql_get_seg_count(struct ql3_adapter *qdev,
+			    unsigned short frags)
 {
+	if (qdev->device_id == QL3022_DEVICE_ID)
+		return 1;
+
 	switch(frags) {
 	case 0:	return 1;	/* just the skb->data seg */
 	case 1:	return 2;	/* skb->data + 1 frag */
@@ -2183,14 +2210,15 @@
 {
 	struct oal *oal;
 	struct oal_entry *oal_entry;
-	int len = skb_headlen(skb);
+	int len = skb->len;
 	dma_addr_t map;
 	int err;
 	int completed_segs, i;
 	int seg_cnt, seg = 0;
 	int frag_cnt = (int)skb_shinfo(skb)->nr_frags;
 
-	seg_cnt = tx_cb->seg_count = ql_get_seg_count((skb_shinfo(skb)->nr_frags));
+	seg_cnt = tx_cb->seg_count = ql_get_seg_count(qdev,
+						      (skb_shinfo(skb)->nr_frags));
 	if(seg_cnt == -1) {
 		printk(KERN_ERR PFX"%s: invalid segment count!\n",__func__);
 		return NETDEV_TX_BUSY;
@@ -2216,7 +2244,7 @@
 	pci_unmap_len_set(&tx_cb->map[seg], maplen, len);
 	seg++;
 
-	if (!skb_shinfo(skb)->nr_frags) {
+	if (seg_cnt == 1) {
 		/* Terminate the last segment. */
 		oal_entry->len =
 		    cpu_to_le32(le32_to_cpu(oal_entry->len) | OAL_LAST_ENTRY);
@@ -2341,13 +2369,12 @@
 	struct ob_mac_iocb_req *mac_iocb_ptr;
 
 	if (unlikely(atomic_read(&qdev->tx_count) < 2)) {
-		if (!netif_queue_stopped(ndev))
-			netif_stop_queue(ndev);
 		return NETDEV_TX_BUSY;
 	}
 	
 	tx_cb = &qdev->tx_buf[qdev->req_producer_index] ;
-	if((tx_cb->seg_count = ql_get_seg_count((skb_shinfo(skb)->nr_frags))) == -1) {
+	if((tx_cb->seg_count = ql_get_seg_count(qdev,
+						(skb_shinfo(skb)->nr_frags))) == -1) {
 		printk(KERN_ERR PFX"%s: invalid segment count!\n",__func__);
 		return NETDEV_TX_OK;
 	}
@@ -2359,7 +2386,8 @@
 	mac_iocb_ptr->transaction_id = qdev->req_producer_index;
 	mac_iocb_ptr->data_len = cpu_to_le16((u16) tot_len);
 	tx_cb->skb = skb;
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
+	if (qdev->device_id == QL3032_DEVICE_ID &&
+	    skb->ip_summed == CHECKSUM_PARTIAL)
 		ql_hw_csum_setup(skb, mac_iocb_ptr);
 	
 	if(ql_send_map(qdev,mac_iocb_ptr,tx_cb,skb) != NETDEV_TX_OK) {
commit	e8f4df2491d07fc369e9d631e0638da03a9b3a38	[log] [tgz]
author	Benjamin Li <benjamin.li@qlogic.com>	Mon Feb 26 11:06:42 2007 -0800
committer	Jeff Garzik <jeff@garzik.org>	Tue Feb 27 04:21:45 2007 -0500
tree	dc4b183711b05b7717e36cf5fb2c5b915b357c45
parent	63b66d12de57d8455615d9f619e18824137ed547 [diff] [blame]