ALSA: improve buffer size computations for USB PCM audio

This patch changes the way URBs are allocated and their sizes are
determined for PCM playback in the snd-usb-audio driver.  Currently
the driver allocates too few URBs for endpoints that don't use
implicit sync, making underruns more likely to occur.  This may be a
holdover from before I/O delays could be measured accurately; in any
case, it is no longer necessary.

The patch allocates as many URBs as possible, subject to four
limitations:

	The total number of URBs for the endpoint is not allowed to
	exceed MAX_URBS (which the patch increases from 8 to 12).

	The total number of packets per URB is not allowed to exceed
	MAX_PACKS (or MAX_PACKS_HS for high-speed devices), which is
	decreased from 20 to 6.

	The total duration of queued data is not allowed to exceed
	MAX_QUEUE, which is decreased from 24 ms to 18 ms.

	The total number of ALSA frames in the output queue is not
	allowed to exceed the ALSA buffer size.

The last requirement is the hardest to implement.  Currently the
number of URBs needed to fill a buffer cannot be determined in
advance, because a buffer contains a fixed number of frames whereas
the number of frames in an URB varies to match shifts in the device's
clock rate.  To solve this problem, the patch changes the logic for
deciding how many packets an URB should contain.  Rather than using as
many as possible without exceeding an ALSA period boundary, now the
driver uses only as many packets as needed to transfer a predetermined
number of frames.  As a result, unless the device's clock has an
exceedingly variable rate, the number of URBs making up each period
(and hence each buffer) will remain constant.

The overall effect of the patch is that playback works better in
low-latency settings.  The user can still specify values for
frames/period and periods/buffer that exceed the capabilities of the
hardware, of course.  But for values that are within those
capabilities, the performance will be improved.  For example, testing
shows that a high-speed device can handle 32 frames/period and 3
periods/buffer at 48 KHz, whereas the current driver starts to get
glitchy at 64 frames/period and 2 periods/buffer.

A side effect of these changes is that the "nrpacks" module parameter
is no longer used.  The patch removes it.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Tested-by: Daniel Mack <zonque@gmail.com>
Tested-by: Eldad Zack <eldad@fogrefinery.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 93e970f..21dc642 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -574,11 +574,14 @@
 			      snd_pcm_format_t pcm_format,
 			      unsigned int channels,
 			      unsigned int period_bytes,
+			      unsigned int frames_per_period,
+			      unsigned int periods_per_buffer,
 			      struct audioformat *fmt,
 			      struct snd_usb_endpoint *sync_ep)
 {
-	unsigned int maxsize, i, urb_packs, total_packs, packs_per_ms;
-	int is_playback = usb_pipeout(ep->pipe);
+	unsigned int maxsize, minsize, packs_per_ms, max_packs_per_urb;
+	unsigned int max_packs_per_period, urbs_per_period, urb_packs;
+	unsigned int max_urbs, i;
 	int frame_bits = snd_pcm_format_physical_width(pcm_format) * channels;
 
 	if (pcm_format == SNDRV_PCM_FORMAT_DSD_U16_LE && fmt->dsd_dop) {
@@ -611,58 +614,67 @@
 	else
 		ep->curpacksize = maxsize;
 
-	if (snd_usb_get_speed(ep->chip->dev) != USB_SPEED_FULL)
+	if (snd_usb_get_speed(ep->chip->dev) != USB_SPEED_FULL) {
 		packs_per_ms = 8 >> ep->datainterval;
-	else
-		packs_per_ms = 1;
-
-	if (is_playback && !snd_usb_endpoint_implicit_feedback_sink(ep)) {
-		urb_packs = max(ep->chip->nrpacks, 1);
-		urb_packs = min(urb_packs, (unsigned int) MAX_PACKS);
+		max_packs_per_urb = MAX_PACKS_HS;
 	} else {
-		urb_packs = 1;
+		packs_per_ms = 1;
+		max_packs_per_urb = MAX_PACKS;
 	}
-
-	urb_packs *= packs_per_ms;
-
 	if (sync_ep && !snd_usb_endpoint_implicit_feedback_sink(ep))
-		urb_packs = min(urb_packs, 1U << sync_ep->syncinterval);
+		max_packs_per_urb = min(max_packs_per_urb,
+					1U << sync_ep->syncinterval);
+	max_packs_per_urb = max(1u, max_packs_per_urb >> ep->datainterval);
 
-	/* decide how many packets to be used */
-	if (is_playback && !snd_usb_endpoint_implicit_feedback_sink(ep)) {
-		unsigned int minsize, maxpacks;
+	/*
+	 * Capture endpoints need to use small URBs because there's no way
+	 * to tell in advance where the next period will end, and we don't
+	 * want the next URB to complete much after the period ends.
+	 *
+	 * Playback endpoints with implicit sync much use the same parameters
+	 * as their corresponding capture endpoint.
+	 */
+	if (usb_pipein(ep->pipe) ||
+			snd_usb_endpoint_implicit_feedback_sink(ep)) {
+
+		/* make capture URBs <= 1 ms and smaller than a period */
+		urb_packs = min(max_packs_per_urb, packs_per_ms);
+		while (urb_packs > 1 && urb_packs * maxsize >= period_bytes)
+			urb_packs >>= 1;
+		ep->nurbs = MAX_URBS;
+
+	/*
+	 * Playback endpoints without implicit sync are adjusted so that
+	 * a period fits as evenly as possible in the smallest number of
+	 * URBs.  The total number of URBs is adjusted to the size of the
+	 * ALSA buffer, subject to the MAX_URBS and MAX_QUEUE limits.
+	 */
+	} else {
 		/* determine how small a packet can be */
-		minsize = (ep->freqn >> (16 - ep->datainterval))
-			  * (frame_bits >> 3);
+		minsize = (ep->freqn >> (16 - ep->datainterval)) *
+				(frame_bits >> 3);
 		/* with sync from device, assume it can be 12% lower */
 		if (sync_ep)
 			minsize -= minsize >> 3;
 		minsize = max(minsize, 1u);
-		total_packs = (period_bytes + minsize - 1) / minsize;
-		/* we need at least two URBs for queueing */
-		if (total_packs < 2) {
-			total_packs = 2;
-		} else {
-			/* and we don't want too long a queue either */
-			maxpacks = max(MAX_QUEUE * packs_per_ms, urb_packs * 2);
-			total_packs = min(total_packs, maxpacks);
-		}
-	} else {
-		while (urb_packs > 1 && urb_packs * maxsize >= period_bytes)
-			urb_packs >>= 1;
-		total_packs = MAX_URBS * urb_packs;
-	}
 
-	ep->nurbs = (total_packs + urb_packs - 1) / urb_packs;
-	if (ep->nurbs > MAX_URBS) {
-		/* too much... */
-		ep->nurbs = MAX_URBS;
-		total_packs = MAX_URBS * urb_packs;
-	} else if (ep->nurbs < 2) {
-		/* too little - we need at least two packets
-		 * to ensure contiguous playback/capture
-		 */
-		ep->nurbs = 2;
+		/* how many packets will contain an entire ALSA period? */
+		max_packs_per_period = DIV_ROUND_UP(period_bytes, minsize);
+
+		/* how many URBs will contain a period? */
+		urbs_per_period = DIV_ROUND_UP(max_packs_per_period,
+				max_packs_per_urb);
+		/* how many packets are needed in each URB? */
+		urb_packs = DIV_ROUND_UP(max_packs_per_period, urbs_per_period);
+
+		/* limit the number of frames in a single URB */
+		ep->max_urb_frames = DIV_ROUND_UP(frames_per_period,
+					urbs_per_period);
+
+		/* try to use enough URBs to contain an entire ALSA buffer */
+		max_urbs = min((unsigned) MAX_URBS,
+				MAX_QUEUE * packs_per_ms / urb_packs);
+		ep->nurbs = min(max_urbs, urbs_per_period * periods_per_buffer);
 	}
 
 	/* allocate and initialize data urbs */
@@ -670,8 +682,7 @@
 		struct snd_urb_ctx *u = &ep->urb[i];
 		u->index = i;
 		u->ep = ep;
-		u->packets = (i + 1) * total_packs / ep->nurbs
-			- i * total_packs / ep->nurbs;
+		u->packets = urb_packs;
 		u->buffer_size = maxsize * u->packets;
 
 		if (fmt->fmt_type == UAC_FORMAT_TYPE_II)
@@ -748,6 +759,8 @@
  * @pcm_format: the audio fomat.
  * @channels: the number of audio channels.
  * @period_bytes: the number of bytes in one alsa period.
+ * @period_frames: the number of frames in one alsa period.
+ * @buffer_periods: the number of periods in one alsa buffer.
  * @rate: the frame rate.
  * @fmt: the USB audio format information
  * @sync_ep: the sync endpoint to use, if any
@@ -760,6 +773,8 @@
 				snd_pcm_format_t pcm_format,
 				unsigned int channels,
 				unsigned int period_bytes,
+				unsigned int period_frames,
+				unsigned int buffer_periods,
 				unsigned int rate,
 				struct audioformat *fmt,
 				struct snd_usb_endpoint *sync_ep)
@@ -793,7 +808,8 @@
 	switch (ep->type) {
 	case  SND_USB_ENDPOINT_TYPE_DATA:
 		err = data_ep_set_params(ep, pcm_format, channels,
-					 period_bytes, fmt, sync_ep);
+					 period_bytes, period_frames,
+					 buffer_periods, fmt, sync_ep);
 		break;
 	case  SND_USB_ENDPOINT_TYPE_SYNC:
 		err = sync_ep_set_params(ep, fmt);