[DCCP] ccid3: Finer-grained resolution of sending rates

This patch
 * resolves a bug where packets smaller than 32/64 bytes resulted in sending rates of 0
 * supports all sending rates from 1/64 bytes/second up to 4Gbyte/second
 * simplifies the present overflow problems in calculations

Current sending rate X and the cached value X_recv of the receiver-estimated
sending rate are both scaled by 64 (2^6) in order to
 * cope with low sending rates (minimally 1 byte/second)
 * allow upgrading to use a packets-per-second implementation of CCID 3
 * avoid calculation errors due to integer arithmetic cut-off

The patch implements a revised strategy from
http://www.mail-archive.com/dccp@vger.kernel.org/msg01040.html

The only difference with regard to that strategy is that t_ipi is already
used in the calculation of the nofeedback timeout, which saves one division.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Acked-by: Ian McDonald <ian.mcdonald@jandi.co.nz>
Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index c54663f..aa355d4 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -108,8 +108,9 @@
 {
 	timeval_sub_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
 
-	/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
-	hctx->ccid3hctx_t_ipi = usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_x);
+	/* Calculate new t_ipi = s / X_inst (X_inst is in 64 * bytes/second) */
+	hctx->ccid3hctx_t_ipi = scaled_div(hctx->ccid3hctx_s,
+					   hctx->ccid3hctx_x >> 6);
 
 	/* Update nominal send time with regard to the new t_ipi */
 	timeval_add_usecs(&hctx->ccid3hctx_t_nom, hctx->ccid3hctx_t_ipi);
@@ -128,26 +129,33 @@
  *          X = max(min(2 * X, 2 * X_recv), s / R);
  *          tld = now;
  *
+ * Note: X and X_recv are both stored in units of 64 * bytes/second, to support
+ *       fine-grained resolution of sending rates. This requires scaling by 2^6
+ *       throughout the code. Only X_calc is unscaled (in bytes/second).
+ *
  * If X has changed, we also update the scheduled send time t_now,
  * the inter-packet interval t_ipi, and the delta value.
- */ 
+ */
 static void ccid3_hc_tx_update_x(struct sock *sk, struct timeval *now)
 
 {
 	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
-	const __u32 old_x = hctx->ccid3hctx_x;
+	const  __u64 old_x = hctx->ccid3hctx_x;
 
 	if (hctx->ccid3hctx_p > 0) {
-		hctx->ccid3hctx_x = max_t(u32, min(hctx->ccid3hctx_x_calc,
-						   hctx->ccid3hctx_x_recv * 2),
-					       hctx->ccid3hctx_s / TFRC_T_MBI);
+
+		hctx->ccid3hctx_x = min_t(u64, hctx->ccid3hctx_x_calc << 6,
+					       hctx->ccid3hctx_x_recv * 2  );
+		hctx->ccid3hctx_x = max_t(u64, hctx->ccid3hctx_x,
+					  (hctx->ccid3hctx_s << 6)/TFRC_T_MBI);
 
 	} else if (timeval_delta(now, &hctx->ccid3hctx_t_ld) >=
 							  hctx->ccid3hctx_rtt) {
-		hctx->ccid3hctx_x = max(min(hctx->ccid3hctx_x_recv,
-					    hctx->ccid3hctx_x      ) * 2,
-					usecs_div(hctx->ccid3hctx_s,
-					       	  hctx->ccid3hctx_rtt)   );
+
+		hctx->ccid3hctx_x = max(2 * min(hctx->ccid3hctx_x,
+						hctx->ccid3hctx_x_recv),
+					scaled_div(hctx->ccid3hctx_s << 6,
+						   hctx->ccid3hctx_rtt    ));
 		hctx->ccid3hctx_t_ld = *now;
 	}
 
@@ -194,13 +202,13 @@
 	case TFRC_SSTATE_NO_FBACK:
 		/* RFC 3448, 4.4: Halve send rate directly */
 		hctx->ccid3hctx_x = max_t(u32, hctx->ccid3hctx_x / 2,
-					       hctx->ccid3hctx_s / TFRC_T_MBI);
+					  (hctx->ccid3hctx_s << 6)/TFRC_T_MBI);
 
-		ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d "
+		ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %u "
 			       "bytes/s\n",
 			       dccp_role(sk), sk,
 			       ccid3_tx_state_name(hctx->ccid3hctx_state),
-			       hctx->ccid3hctx_x);
+			       (unsigned)(hctx->ccid3hctx_x >> 6));
 		/* The value of R is still undefined and so we can not recompute
 		 * the timout value. Keep initial value as per [RFC 4342, 5]. */
 		t_nfb = TFRC_INITIAL_TIMEOUT;
@@ -209,11 +217,11 @@
 	case TFRC_SSTATE_FBACK:
 		/*
 		 * Check if IDLE since last timeout and recv rate is less than
-		 * 4 packets per RTT
+		 * 4 packets (in units of 64*bytes/sec) per RTT
 		 */
 		if (!hctx->ccid3hctx_idle ||
-		    (hctx->ccid3hctx_x_recv >=
-		     4 * usecs_div(hctx->ccid3hctx_s, hctx->ccid3hctx_rtt))) {
+		    (hctx->ccid3hctx_x_recv >= 4 *
+		     scaled_div(hctx->ccid3hctx_s << 6, hctx->ccid3hctx_rtt))) {
 			struct timeval now;
 
 			ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n",
@@ -227,17 +235,23 @@
 			 *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
 			 *  Else
 			 *    X_recv = X_calc / 4;
+			 *
+			 *  Note that X_recv is scaled by 2^6 while X_calc is not
 			 */
 			BUG_ON(hctx->ccid3hctx_p && !hctx->ccid3hctx_x_calc);
 
 			if (hctx->ccid3hctx_p  == 0 ||
-			    hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)  {
-				hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
-								    hctx->ccid3hctx_s / (2 * TFRC_T_MBI));
+			    hctx->ccid3hctx_x_calc > (hctx->ccid3hctx_x_recv >> 5))  {
+
+				hctx->ccid3hctx_x_recv =
+					max_t(u64, hctx->ccid3hctx_x_recv / 2,
+					      	  (hctx->ccid3hctx_s << 6) /
+					      			(2*TFRC_T_MBI));
+
 				if (hctx->ccid3hctx_p == 0)
 					dccp_timestamp(sk, &now);
 			} else
-				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
+				hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc << 4;
 
 			/* Now recalculate X [RFC 3448, 4.3, step (4)] */
 			ccid3_hc_tx_update_x(sk, &now);
@@ -315,9 +329,9 @@
 		hctx->ccid3hctx_t_last_win_count = now;
 		ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
 
-		/* Set initial sending rate to 1 packet per second */
+		/* Set initial sending rate X/s to 1pps (X is scaled by 2^6) */
 		ccid3_hc_tx_update_s(hctx, skb->len);
-		hctx->ccid3hctx_x     = hctx->ccid3hctx_s;
+		hctx->ccid3hctx_x = hctx->ccid3hctx_s << 6;
 
 		/* First timeout, according to [RFC 3448, 4.2], is 1 second */
 		hctx->ccid3hctx_t_ipi = USEC_PER_SEC;
@@ -438,8 +452,8 @@
 			return;
 		}
 
-		/* Update receive rate */
-		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate;
+		/* Update receive rate in units of 64 * bytes/second */
+		hctx->ccid3hctx_x_recv = opt_recv->ccid3or_receive_rate << 6;
 
 		/* Update loss event rate */
 		pinv = opt_recv->ccid3or_loss_event_rate;
@@ -475,12 +489,14 @@
 		 * q is a constant, RFC 3448 recomments 0.9
 		 */
 		if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
-			/* Use Larger Initial Windows [RFC 4342, sec. 5]
-			 * We deviate in that we use `s' instead of `MSS'. */
+			/*
+			 * Larger Initial Windows [RFC 4342, sec. 5]
+			 * We deviate in that we use `s' instead of `MSS'.
+			 */
 			u16 w_init = min(    4 * hctx->ccid3hctx_s,
 					 max(2 * hctx->ccid3hctx_s, 4380));
 			hctx->ccid3hctx_rtt  = r_sample;
-			hctx->ccid3hctx_x    = usecs_div(w_init, r_sample);
+			hctx->ccid3hctx_x    = scaled_div(w_init<< 6, r_sample);
 			hctx->ccid3hctx_t_ld = now;
 
 			ccid3_update_send_time(hctx);
@@ -488,7 +504,7 @@
 			ccid3_pr_debug("%s(%p), s=%u, w_init=%u, "
 				       "R_sample=%ldus, X=%u\n", dccp_role(sk),
 				       sk, hctx->ccid3hctx_s, w_init, r_sample,
-				       hctx->ccid3hctx_x);
+				       (unsigned)(hctx->ccid3hctx_x >> 6));
 
 			ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
 		} else {
@@ -508,7 +524,7 @@
 				       sk, hctx->ccid3hctx_rtt, r_sample,
 				       hctx->ccid3hctx_s, hctx->ccid3hctx_p,
 				       hctx->ccid3hctx_x_calc,
-				       hctx->ccid3hctx_x);
+				       (unsigned)(hctx->ccid3hctx_x >> 6));
 		}
 
 		/* unschedule no feedback timer */