ioat2,3: cacheline align software descriptor allocations All the necessary fields for handling an ioat2,3 ring entry can fit into one cacheline. Move ->len prior to ->txd in struct ioat_ring_ent, and move allocation of these entries to a hw-cache-aligned kmem cache to reduce the number of cachelines dirtied for descriptor management. Signed-off-by: Dan Williams <dan.j.williams@intel.com>

commit: 162b96e63e518aa6ff029ce23de12d7f027483bf [log] [tgz]
author: Dan Williams <dan.j.williams@intel.com> Tue Sep 08 17:53:04 2009 -0700
committer: Dan Williams <dan.j.williams@intel.com> Tue Sep 08 17:53:04 2009 -0700
tree: 532191d0cef7cf975b70a07b1c69a293d6f552f7
parent: 0803172778901e24a75ab074798d98c2b7411559 [diff] [blame]
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 460b773..fa3d6db 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c

@@ -399,11 +399,12 @@
 		return NULL;
 	memset(hw, 0, sizeof(*hw));
 
-	desc = kzalloc(sizeof(*desc), flags);
+	desc = kmem_cache_alloc(ioat2_cache, flags);
 	if (!desc) {
 		pci_pool_free(dma->dma_pool, hw, phys);
 		return NULL;
 	}
+	memset(desc, 0, sizeof(*desc));
 
 	dma_async_tx_descriptor_init(&desc->txd, chan);
 	desc->txd.tx_submit = ioat2_tx_submit_unlock;
@@ -418,7 +419,7 @@
 
 	dma = to_ioatdma_device(chan->device);
 	pci_pool_free(dma->dma_pool, desc->hw, desc->txd.phys);
-	kfree(desc);
+	kmem_cache_free(ioat2_cache, desc);
 }
 
 static struct ioat_ring_ent **ioat2_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
commit	162b96e63e518aa6ff029ce23de12d7f027483bf	[log] [tgz]
author	Dan Williams <dan.j.williams@intel.com>	Tue Sep 08 17:53:04 2009 -0700
committer	Dan Williams <dan.j.williams@intel.com>	Tue Sep 08 17:53:04 2009 -0700
tree	532191d0cef7cf975b70a07b1c69a293d6f552f7
parent	0803172778901e24a75ab074798d98c2b7411559 [diff] [blame]