firewire: optimize iso queueing by setting wake only after the last packet

When queueing iso packets, the run time is dominated by the two
MMIO accesses that set the DMA context's wake bit.  Because most
drivers submit packets in batches, we can save much time by
removing all but the last wakeup.

The internal kernel API is changed to require a call to
fw_iso_context_queue_flush() after a batch of queued packets.
The user space API does not change, so one call to
FW_CDEV_IOC_QUEUE_ISO must specify multiple packets to take
advantage of this optimization.

In my measurements, this patch reduces the time needed to queue
fifty skip packets from userspace to one sixth on a 2.5 GHz CPU,
or to one third at 800 MHz.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de>
diff --git a/drivers/firewire/ohci.c b/drivers/firewire/ohci.c
index f9f5570..438e6c8 100644
--- a/drivers/firewire/ohci.c
+++ b/drivers/firewire/ohci.c
@@ -1192,9 +1192,6 @@
 	wmb(); /* finish init of new descriptors before branch_address update */
 	ctx->prev->branch_address = cpu_to_le32(d_bus | z);
 	ctx->prev = find_branch_descriptor(d, z);
-
-	reg_write(ctx->ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
-	flush_writes(ctx->ohci);
 }
 
 static void context_stop(struct context *ctx)
@@ -1348,8 +1345,12 @@
 
 	context_append(ctx, d, z, 4 - z);
 
-	if (!ctx->running)
+	if (ctx->running) {
+		reg_write(ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
+		flush_writes(ohci);
+	} else {
 		context_run(ctx, 0);
+	}
 
 	return 0;
 }
@@ -3121,6 +3122,15 @@
 	return ret;
 }
 
+static void ohci_flush_queue_iso(struct fw_iso_context *base)
+{
+	struct context *ctx =
+			&container_of(base, struct iso_context, base)->context;
+
+	reg_write(ctx->ohci, CONTROL_SET(ctx->regs), CONTEXT_WAKE);
+	flush_writes(ctx->ohci);
+}
+
 static const struct fw_card_driver ohci_driver = {
 	.enable			= ohci_enable,
 	.read_phy_reg		= ohci_read_phy_reg,
@@ -3137,6 +3147,7 @@
 	.free_iso_context	= ohci_free_iso_context,
 	.set_iso_channels	= ohci_set_iso_channels,
 	.queue_iso		= ohci_queue_iso,
+	.flush_queue_iso	= ohci_flush_queue_iso,
 	.start_iso		= ohci_start_iso,
 	.stop_iso		= ohci_stop_iso,
 };