IB/ipath: Performance optimization for CPU differences
Different processors have different ordering restrictions for write
combining. By taking advantage of this, we can eliminate some write
barriers when writing to the send buffers.
Signed-off-by: Ralph Campbell <ralph.campbell@qlogic.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 16aa61f..559d4a6 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -631,7 +631,7 @@
#endif
static void copy_io(u32 __iomem *piobuf, struct ipath_sge_state *ss,
- u32 length)
+ u32 length, unsigned flush_wc)
{
u32 extra = 0;
u32 data = 0;
@@ -757,11 +757,14 @@
}
/* Update address before sending packet. */
update_sge(ss, length);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(last, piobuf);
- /* be sure trigger word is written */
- ipath_flush_wc();
+ if (flush_wc) {
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(last, piobuf);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ } else
+ __raw_writel(last, piobuf);
}
/**
@@ -776,6 +779,7 @@
u32 *hdr, u32 len, struct ipath_sge_state *ss)
{
u32 __iomem *piobuf;
+ unsigned flush_wc;
u32 plen;
int ret;
@@ -799,47 +803,55 @@
* or WC buffer can be written out of order.
*/
writeq(plen, piobuf);
- ipath_flush_wc();
piobuf += 2;
+
+ flush_wc = dd->ipath_flags & IPATH_PIO_FLUSH_WC;
if (len == 0) {
/*
* If there is just the header portion, must flush before
* writing last word of header for correctness, and after
* the last header word (trigger word).
*/
- __iowrite32_copy(piobuf, hdr, hdrwords - 1);
- ipath_flush_wc();
- __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
- ipath_flush_wc();
- ret = 0;
- goto bail;
+ if (flush_wc) {
+ ipath_flush_wc();
+ __iowrite32_copy(piobuf, hdr, hdrwords - 1);
+ ipath_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+ ipath_flush_wc();
+ } else
+ __iowrite32_copy(piobuf, hdr, hdrwords);
+ goto done;
}
+ if (flush_wc)
+ ipath_flush_wc();
__iowrite32_copy(piobuf, hdr, hdrwords);
piobuf += hdrwords;
/* The common case is aligned and contained in one segment. */
if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
!((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
- u32 w;
+ u32 dwords;
u32 *addr = (u32 *) ss->sge.vaddr;
/* Update address before sending packet. */
update_sge(ss, len);
/* Need to round up for the last dword in the packet. */
- w = (len + 3) >> 2;
- __iowrite32_copy(piobuf, addr, w - 1);
- /* must flush early everything before trigger word */
- ipath_flush_wc();
- __raw_writel(addr[w - 1], piobuf + w - 1);
- /* be sure trigger word is written */
- ipath_flush_wc();
- ret = 0;
- goto bail;
+ dwords = (len + 3) >> 2;
+ if (flush_wc) {
+ __iowrite32_copy(piobuf, addr, dwords - 1);
+ /* must flush early everything before trigger word */
+ ipath_flush_wc();
+ __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+ /* be sure trigger word is written */
+ ipath_flush_wc();
+ } else
+ __iowrite32_copy(piobuf, addr, dwords);
+ goto done;
}
- copy_io(piobuf, ss, len);
+ copy_io(piobuf, ss, len, flush_wc);
+done:
ret = 0;
-
bail:
return ret;
}