firewire: octlet AT payloads can be stack-allocated

We do not need slab allocations anymore in order to satisfy
streaming DMA mapping constraints, thanks to commit da28947e7e36
"firewire: ohci: avoid separate DMA mapping for small AT payloads".

(Besides, the slab-allocated buffers that firewire-core, firewire-sbp2,
and firedtv used to provide for 8-byte write and lock requests were
still not fully portable since they crossed cacheline boundaries or
shared a cacheline with unrelated CPU-accessed data.  snd-firewire-lib
got this aspect right by using an extra kmalloc/ kfree just for the
8-byte transaction buffer.)

This change replaces kmalloc'ed lock transaction scratch buffers in
firewire-core, firedtv, and snd-firewire-lib by local stack allocations.
Perhaps the most notable result of the change is simpler locking because
there is no need to serialize usages of preallocated per-device buffers
anymore.  Also, allocations and deallocations are simpler.

Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
Acked-by: Clemens Ladisch <clemens@ladisch.de>
diff --git a/drivers/firewire/core-card.c b/drivers/firewire/core-card.c
index 3c44fbc8..e119f1e 100644
--- a/drivers/firewire/core-card.c
+++ b/drivers/firewire/core-card.c
@@ -258,8 +258,7 @@
 
 	if (!card->broadcast_channel_allocated) {
 		fw_iso_resource_manage(card, generation, 1ULL << 31,
-				       &channel, &bandwidth, true,
-				       card->bm_transaction_data);
+				       &channel, &bandwidth, true);
 		if (channel != 31) {
 			fw_notify("failed to allocate broadcast channel\n");
 			return;
@@ -294,6 +293,7 @@
 	bool root_device_is_cmc;
 	bool irm_is_1394_1995_only;
 	bool keep_this_irm;
+	__be32 transaction_data[2];
 
 	spin_lock_irq(&card->lock);
 
@@ -355,21 +355,21 @@
 			goto pick_me;
 		}
 
-		card->bm_transaction_data[0] = cpu_to_be32(0x3f);
-		card->bm_transaction_data[1] = cpu_to_be32(local_id);
+		transaction_data[0] = cpu_to_be32(0x3f);
+		transaction_data[1] = cpu_to_be32(local_id);
 
 		spin_unlock_irq(&card->lock);
 
 		rcode = fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
 				irm_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_BUS_MANAGER_ID,
-				card->bm_transaction_data, 8);
+				transaction_data, 8);
 
 		if (rcode == RCODE_GENERATION)
 			/* Another bus reset, BM work has been rescheduled. */
 			goto out;
 
-		bm_id = be32_to_cpu(card->bm_transaction_data[0]);
+		bm_id = be32_to_cpu(transaction_data[0]);
 
 		spin_lock_irq(&card->lock);
 		if (rcode == RCODE_COMPLETE && generation == card->generation)
@@ -490,11 +490,11 @@
 		/*
 		 * Make sure that the cycle master sends cycle start packets.
 		 */
-		card->bm_transaction_data[0] = cpu_to_be32(CSR_STATE_BIT_CMSTR);
+		transaction_data[0] = cpu_to_be32(CSR_STATE_BIT_CMSTR);
 		rcode = fw_run_transaction(card, TCODE_WRITE_QUADLET_REQUEST,
 				root_id, generation, SCODE_100,
 				CSR_REGISTER_BASE + CSR_STATE_SET,
-				card->bm_transaction_data, 4);
+				transaction_data, 4);
 		if (rcode == RCODE_GENERATION)
 			goto out;
 	}
diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 62ac111..2a3f1c4 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -141,7 +141,6 @@
 	int generation;
 	u64 channels;
 	s32 bandwidth;
-	__be32 transaction_data[2];
 	struct iso_resource_event *e_alloc, *e_dealloc;
 };
 
@@ -1229,8 +1228,7 @@
 			r->channels, &channel, &bandwidth,
 			todo == ISO_RES_ALLOC ||
 			todo == ISO_RES_REALLOC ||
-			todo == ISO_RES_ALLOC_ONCE,
-			r->transaction_data);
+			todo == ISO_RES_ALLOC_ONCE);
 	/*
 	 * Is this generation outdated already?  As long as this resource sticks
 	 * in the idr, it will be scheduled again for a newer generation or at
diff --git a/drivers/firewire/core-iso.c b/drivers/firewire/core-iso.c
index 481056d..f872ede 100644
--- a/drivers/firewire/core-iso.c
+++ b/drivers/firewire/core-iso.c
@@ -196,9 +196,10 @@
  */
 
 static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
-			    int bandwidth, bool allocate, __be32 data[2])
+			    int bandwidth, bool allocate)
 {
 	int try, new, old = allocate ? BANDWIDTH_AVAILABLE_INITIAL : 0;
+	__be32 data[2];
 
 	/*
 	 * On a 1394a IRM with low contention, try < 1 is enough.
@@ -233,9 +234,10 @@
 }
 
 static int manage_channel(struct fw_card *card, int irm_id, int generation,
-		u32 channels_mask, u64 offset, bool allocate, __be32 data[2])
+		u32 channels_mask, u64 offset, bool allocate)
 {
 	__be32 bit, all, old;
+	__be32 data[2];
 	int channel, ret = -EIO, retry = 5;
 
 	old = all = allocate ? cpu_to_be32(~0) : 0;
@@ -284,7 +286,7 @@
 }
 
 static void deallocate_channel(struct fw_card *card, int irm_id,
-			       int generation, int channel, __be32 buffer[2])
+			       int generation, int channel)
 {
 	u32 mask;
 	u64 offset;
@@ -293,7 +295,7 @@
 	offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI :
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO;
 
-	manage_channel(card, irm_id, generation, mask, offset, false, buffer);
+	manage_channel(card, irm_id, generation, mask, offset, false);
 }
 
 /**
@@ -322,7 +324,7 @@
  */
 void fw_iso_resource_manage(struct fw_card *card, int generation,
 			    u64 channels_mask, int *channel, int *bandwidth,
-			    bool allocate, __be32 buffer[2])
+			    bool allocate)
 {
 	u32 channels_hi = channels_mask;	/* channels 31...0 */
 	u32 channels_lo = channels_mask >> 32;	/* channels 63...32 */
@@ -335,11 +337,11 @@
 	if (channels_hi)
 		c = manage_channel(card, irm_id, generation, channels_hi,
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI,
-				allocate, buffer);
+				allocate);
 	if (channels_lo && c < 0) {
 		c = manage_channel(card, irm_id, generation, channels_lo,
 				CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO,
-				allocate, buffer);
+				allocate);
 		if (c >= 0)
 			c += 32;
 	}
@@ -351,14 +353,13 @@
 	if (*bandwidth == 0)
 		return;
 
-	ret = manage_bandwidth(card, irm_id, generation, *bandwidth,
-			       allocate, buffer);
+	ret = manage_bandwidth(card, irm_id, generation, *bandwidth, allocate);
 	if (ret < 0)
 		*bandwidth = 0;
 
 	if (allocate && ret < 0) {
 		if (c >= 0)
-			deallocate_channel(card, irm_id, generation, c, buffer);
+			deallocate_channel(card, irm_id, generation, c);
 		*channel = ret;
 	}
 }
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index d00f8ce..77275fd 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -326,8 +326,8 @@
  * It will contain tag, channel, and sy data instead of a node ID then.
  *
  * The payload buffer at @data is going to be DMA-mapped except in case of
- * quadlet-sized payload or of local (loopback) requests.  Hence make sure that
- * the buffer complies with the restrictions for DMA-mapped memory.  The
+ * @length <= 8 or of local (loopback) requests.  Hence make sure that the
+ * buffer complies with the restrictions of the streaming DMA mapping API.
  * @payload must not be freed before the @callback is called.
  *
  * In case of request types without payload, @data is NULL and @length is 0.
@@ -411,7 +411,8 @@
  *
  * Returns the RCODE.  See fw_send_request() for parameter documentation.
  * Unlike fw_send_request(), @data points to the payload of the request or/and
- * to the payload of the response.
+ * to the payload of the response.  DMA mapping restrictions apply to outbound
+ * request payloads of >= 8 bytes but not to inbound response payloads.
  */
 int fw_run_transaction(struct fw_card *card, int tcode, int destination_id,
 		       int generation, int speed, unsigned long long offset,
diff --git a/drivers/media/dvb/firewire/firedtv-avc.c b/drivers/media/dvb/firewire/firedtv-avc.c
index fc5ccd8..21c52e3 100644
--- a/drivers/media/dvb/firewire/firedtv-avc.c
+++ b/drivers/media/dvb/firewire/firedtv-avc.c
@@ -1320,14 +1320,10 @@
 {
 	int ret;
 
-	mutex_lock(&fdtv->avc_mutex);
-
 	ret = fdtv_read(fdtv, addr, data);
 	if (ret < 0)
 		dev_err(fdtv->device, "CMP: read I/O error\n");
 
-	mutex_unlock(&fdtv->avc_mutex);
-
 	return ret;
 }
 
@@ -1335,18 +1331,9 @@
 {
 	int ret;
 
-	mutex_lock(&fdtv->avc_mutex);
-
-	/* data[] is stack-allocated and should not be DMA-mapped. */
-	memcpy(fdtv->avc_data, data, 8);
-
-	ret = fdtv_lock(fdtv, addr, fdtv->avc_data);
+	ret = fdtv_lock(fdtv, addr, data);
 	if (ret < 0)
 		dev_err(fdtv->device, "CMP: lock I/O error\n");
-	else
-		memcpy(data, fdtv->avc_data, 8);
-
-	mutex_unlock(&fdtv->avc_mutex);
 
 	return ret;
 }