async_tx: replace 'int_en' with operation preparation flags

Pass a full set of flags to drivers' per-operation 'prep' routines.
Currently the only flag passed is DMA_PREP_INTERRUPT.  The expectation is
that arch-specific async_tx_find_channel() implementations can exploit this
capability to find the best channel for an operation.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Shannon Nelson <shannon.nelson@intel.com>
Reviewed-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index faca0bc..25dcf33 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,6 +52,7 @@
 
 	if (device) {
 		dma_addr_t dma_dest, dma_src;
+		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 
 		dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
 					DMA_FROM_DEVICE);
@@ -60,7 +61,7 @@
 				       DMA_TO_DEVICE);
 
 		tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src,
-						    len, cb_fn != NULL);
+						    len, dma_prep_flags);
 	}
 
 	if (tx) {
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index 0c94851..8e98ab0 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -52,12 +52,13 @@
 
 	if (device) {
 		dma_addr_t dma_dest;
+		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 
 		dma_dest = dma_map_page(device->dev, dest, offset, len,
 					DMA_FROM_DEVICE);
 
 		tx = device->device_prep_dma_memset(chan, dma_dest, val, len,
-						    cb_fn != NULL);
+						    dma_prep_flags);
 	}
 
 	if (tx) {
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 12cba1a..68d2fe4 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -45,6 +45,7 @@
 	dma_addr_t *dma_src = (dma_addr_t *) src_list;
 	struct dma_async_tx_descriptor *tx;
 	int i;
+	unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 
 	pr_debug("%s: len: %zu\n", __FUNCTION__, len);
 
@@ -60,7 +61,7 @@
 	 * in case they can not provide a descriptor
 	 */
 	tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len,
-					 cb_fn != NULL);
+					 dma_prep_flags);
 	if (!tx) {
 		if (depend_tx)
 			dma_wait_for_async_tx(depend_tx);
@@ -68,7 +69,7 @@
 		while (!tx)
 			tx = device->device_prep_dma_xor(chan, dma_dest,
 							 dma_src, src_cnt, len,
-							 cb_fn != NULL);
+							 dma_prep_flags);
 	}
 
 	async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
@@ -268,6 +269,7 @@
 
 	if (device) {
 		dma_addr_t *dma_src = (dma_addr_t *) src_list;
+		unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0;
 		int i;
 
 		pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len);
@@ -278,7 +280,7 @@
 
 		tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt,
 						      len, result,
-						      cb_fn != NULL);
+						      dma_prep_flags);
 		if (!tx) {
 			if (depend_tx)
 				dma_wait_for_async_tx(depend_tx);
@@ -286,7 +288,7 @@
 			while (!tx)
 				tx = device->device_prep_dma_zero_sum(chan,
 					dma_src, src_cnt, len, result,
-					cb_fn != NULL);
+					dma_prep_flags);
 		}
 
 		async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param);
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
index 5bcfc55..dff38ac 100644
--- a/drivers/dma/ioat_dma.c
+++ b/drivers/dma/ioat_dma.c
@@ -701,7 +701,7 @@
 						dma_addr_t dma_dest,
 						dma_addr_t dma_src,
 						size_t len,
-						int int_en)
+						unsigned long flags)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 	struct ioat_desc_sw *new;
@@ -724,7 +724,7 @@
 						dma_addr_t dma_dest,
 						dma_addr_t dma_src,
 						size_t len,
-						int int_en)
+						unsigned long flags)
 {
 	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
 	struct ioat_desc_sw *new;
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index eda841c..3986d54 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -537,7 +537,7 @@
 
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
-			 dma_addr_t dma_src, size_t len, int int_en)
+			 dma_addr_t dma_src, size_t len, unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -555,7 +555,7 @@
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_memcpy(grp_start, int_en);
+		iop_desc_init_memcpy(grp_start, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
@@ -569,7 +569,7 @@
 
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
-			 int value, size_t len, int int_en)
+			 int value, size_t len, unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -587,7 +587,7 @@
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_memset(grp_start, int_en);
+		iop_desc_init_memset(grp_start, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_block_fill_val(grp_start, value);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
@@ -602,7 +602,7 @@
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
 		      dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
-		      int int_en)
+		      unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -613,15 +613,15 @@
 	BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
 
 	dev_dbg(iop_chan->device->common.dev,
-		"%s src_cnt: %d len: %u int_en: %d\n",
-		__FUNCTION__, src_cnt, len, int_en);
+		"%s src_cnt: %d len: %u flags: %lx\n",
+		__FUNCTION__, src_cnt, len, flags);
 
 	spin_lock_bh(&iop_chan->lock);
 	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_xor(grp_start, src_cnt, int_en);
+		iop_desc_init_xor(grp_start, src_cnt, flags);
 		iop_desc_set_byte_count(grp_start, iop_chan, len);
 		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
 		sw_desc->unmap_src_cnt = src_cnt;
@@ -638,7 +638,7 @@
 static struct dma_async_tx_descriptor *
 iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
 			   unsigned int src_cnt, size_t len, u32 *result,
-			   int int_en)
+			   unsigned long flags)
 {
 	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
 	struct iop_adma_desc_slot *sw_desc, *grp_start;
@@ -655,7 +655,7 @@
 	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
 	if (sw_desc) {
 		grp_start = sw_desc->group_head;
-		iop_desc_init_zero_sum(grp_start, src_cnt, int_en);
+		iop_desc_init_zero_sum(grp_start, src_cnt, flags);
 		iop_desc_set_zero_sum_byte_count(grp_start, len);
 		grp_start->xor_check_result = result;
 		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
diff --git a/include/asm-arm/arch-iop13xx/adma.h b/include/asm-arm/arch-iop13xx/adma.h
index 04006c1..efd9a5e 100644
--- a/include/asm-arm/arch-iop13xx/adma.h
+++ b/include/asm-arm/arch-iop13xx/adma.h
@@ -247,7 +247,7 @@
 }
 
 static inline void
-iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -257,13 +257,13 @@
 
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 }
 
 static inline void
-iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -274,14 +274,15 @@
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
 	u_desc_ctrl.field.block_fill_en = 1;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 }
 
 /* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
 static inline void
-iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		  unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -292,7 +293,7 @@
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.src_select = src_cnt - 1;
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 
@@ -301,7 +302,8 @@
 
 /* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */
 static inline int
-iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
 {
 	struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc;
 	union {
@@ -314,7 +316,7 @@
 	u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */
 	u_desc_ctrl.field.zero_result = 1;
 	u_desc_ctrl.field.status_write_back_en = 1;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->crc_addr = 0;
 
diff --git a/include/asm-arm/hardware/iop3xx-adma.h b/include/asm-arm/hardware/iop3xx-adma.h
index 10834b5..5c529e6 100644
--- a/include/asm-arm/hardware/iop3xx-adma.h
+++ b/include/asm-arm/hardware/iop3xx-adma.h
@@ -414,7 +414,7 @@
 }
 
 static inline void
-iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
 	union {
@@ -425,14 +425,14 @@
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.mem_to_mem_en = 1;
 	u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 	hw_desc->upper_pci_src_addr = 0;
 	hw_desc->crc_addr = 0;
 }
 
 static inline void
-iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en)
+iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
 {
 	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
 	union {
@@ -443,12 +443,13 @@
 	u_desc_ctrl.value = 0;
 	u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
 	u_desc_ctrl.field.dest_write_en = 1;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 }
 
 static inline u32
-iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en)
+iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt,
+		     unsigned long flags)
 {
 	int i, shift;
 	u32 edcr;
@@ -509,21 +510,23 @@
 
 	u_desc_ctrl.field.dest_write_en = 1;
 	u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 
 	return u_desc_ctrl.value;
 }
 
 static inline void
-iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		  unsigned long flags)
 {
-	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en);
+	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags);
 }
 
 /* return the number of operations */
 static inline int
-iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
 {
 	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
 	struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
@@ -538,10 +541,10 @@
 	for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
 		i += slots_per_op, j++) {
 		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en);
+		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags);
 		u_desc_ctrl.field.dest_write_en = 0;
 		u_desc_ctrl.field.zero_result_en = 1;
-		u_desc_ctrl.field.int_en = int_en;
+		u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 		iter->desc_ctrl = u_desc_ctrl.value;
 
 		/* for the subsequent descriptors preserve the store queue
@@ -559,7 +562,8 @@
 }
 
 static inline void
-iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en)
+iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt,
+		       unsigned long flags)
 {
 	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
 	union {
@@ -591,7 +595,7 @@
 	}
 
 	u_desc_ctrl.field.dest_write_en = 0;
-	u_desc_ctrl.field.int_en = int_en;
+	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
 	hw_desc->desc_ctrl = u_desc_ctrl.value;
 }
 
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index b0864f5..acbb364 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -95,6 +95,15 @@
 #define DMA_TX_TYPE_END (DMA_INTERRUPT + 1)
 
 /**
+ * enum dma_prep_flags - DMA flags to augment operation preparation
+ * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
+ * 	this transaction
+ */
+enum dma_prep_flags {
+	DMA_PREP_INTERRUPT = (1 << 0),
+};
+
+/**
  * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t.
  * See linux/cpumask.h
  */
@@ -274,16 +283,16 @@
 
 	struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
-		size_t len, int int_en);
+		size_t len, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_xor)(
 		struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
-		unsigned int src_cnt, size_t len, int int_en);
+		unsigned int src_cnt, size_t len, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)(
 		struct dma_chan *chan, dma_addr_t *src,	unsigned int src_cnt,
-		size_t len, u32 *result, int int_en);
+		size_t len, u32 *result, unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_memset)(
 		struct dma_chan *chan, dma_addr_t dest, int value, size_t len,
-		int int_en);
+		unsigned long flags);
 	struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)(
 		struct dma_chan *chan);