Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6 
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 007c290..8bf4bac 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -432,7 +432,10 @@
 	struct ccid			*dccps_hc_rx_ccid;
 	struct ccid			*dccps_hc_tx_ccid;
 	struct dccp_options_received	dccps_options_received;
+	struct timeval			dccps_epoch;
 	enum dccp_role			dccps_role:2;
+	__u8				dccps_hc_rx_insert_options:1;
+	__u8				dccps_hc_tx_insert_options:1;
 };
  
 static inline struct dccp_sock *dccp_sk(const struct sock *sk)
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 7bf3b3a..ea30012d 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -43,12 +43,22 @@
 #include "ccid3.h"
 
 /*
- * Reason for maths with 10 here is to avoid 32 bit overflow when a is big.
+ * Reason for maths here is to avoid 32 bit overflow when a is big.
+ * With this we get close to the limit.
  */
 static inline u32 usecs_div(const u32 a, const u32 b)
 {
-	const u32 tmp = a * (USEC_PER_SEC / 10);
-	return b > 20 ? tmp / (b / 10) : tmp;
+	const u32 div = a < (UINT_MAX / (USEC_PER_SEC /    10)) ?    10 :
+			a < (UINT_MAX / (USEC_PER_SEC /    50)) ?    50 :
+			a < (UINT_MAX / (USEC_PER_SEC /   100)) ?   100 :
+			a < (UINT_MAX / (USEC_PER_SEC /   500)) ?   500 :
+			a < (UINT_MAX / (USEC_PER_SEC /  1000)) ?  1000 :
+			a < (UINT_MAX / (USEC_PER_SEC /  5000)) ?  5000 :
+			a < (UINT_MAX / (USEC_PER_SEC / 10000)) ? 10000 :
+			a < (UINT_MAX / (USEC_PER_SEC / 50000)) ? 50000 :
+								 100000;
+	const u32 tmp = a * (USEC_PER_SEC / div);
+	return (b >= 2 * div) ? tmp / (b / div) : tmp;
 }
 
 static int ccid3_debug;
@@ -102,8 +112,7 @@
 static inline void ccid3_hc_tx_set_state(struct sock *sk,
 					 enum ccid3_hc_tx_states state)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
 
 	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
@@ -144,8 +153,7 @@
  */ 
 static void ccid3_hc_tx_update_x(struct sock *sk)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
 	/* To avoid large error in calcX */
 	if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {
@@ -159,7 +167,7 @@
 	} else {
 		struct timeval now;
 
-		do_gettimeofday(&now);
+		dccp_timestamp(sk, &now);
 	       	if (timeval_delta(&now, &hctx->ccid3hctx_t_ld) >=
 		    hctx->ccid3hctx_rtt) {
 			hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_recv,
@@ -174,9 +182,8 @@
 static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
 {
 	struct sock *sk = (struct sock *)data;
-	struct dccp_sock *dp = dccp_sk(sk);
 	unsigned long next_tmout = 0;
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
 	bh_lock_sock(sk);
 	if (sock_owned_by_user(sk)) {
@@ -274,7 +281,7 @@
 				   struct sk_buff *skb, int len)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct dccp_tx_hist_entry *new_packet;
 	struct timeval now;
 	long delay;
@@ -307,7 +314,7 @@
 		dccp_tx_hist_add_entry(&hctx->ccid3hctx_hist, new_packet);
 	}
 
-	do_gettimeofday(&now);
+	dccp_timestamp(sk, &now);
 
 	switch (hctx->ccid3hctx_state) {
 	case TFRC_SSTATE_NO_SENT:
@@ -348,18 +355,20 @@
 	}
 
 	/* Can we send? if so add options and add to packet history */
-	if (rc == 0)
+	if (rc == 0) {
+		dp->dccps_hc_tx_insert_options = 1;
 		new_packet->dccphtx_ccval =
 			DCCP_SKB_CB(skb)->dccpd_ccval =
 				hctx->ccid3hctx_last_win_count;
+	}
 out:
 	return rc;
 }
 
 static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	const struct dccp_sock *dp = dccp_sk(sk);
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct timeval now;
 
 	BUG_ON(hctx == NULL);
@@ -370,7 +379,7 @@
 		return;
 	}
 
-	do_gettimeofday(&now);
+	dccp_timestamp(sk, &now);
 
 	/* check if we have sent a data packet */
 	if (len > 0) {
@@ -445,10 +454,11 @@
 
 static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	const struct dccp_sock *dp = dccp_sk(sk);
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct ccid3_options_received *opt_recv;
 	struct dccp_tx_hist_entry *packet;
+	struct timeval now;
 	unsigned long next_tmout; 
 	u32 t_elapsed;
 	u32 pinv;
@@ -471,7 +481,7 @@
 
 	opt_recv = &hctx->ccid3hctx_options_received;
 
-	t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
+	t_elapsed = dp->dccps_options_received.dccpor_elapsed_time * 10;
 	x_recv = opt_recv->ccid3or_receive_rate;
 	pinv = opt_recv->ccid3or_loss_event_rate;
 
@@ -496,9 +506,14 @@
 		}
 
 		/* Update RTT */
-		r_sample = timeval_now_delta(&packet->dccphtx_tstamp);
-		/* FIXME: */
-		// r_sample -= usecs_to_jiffies(t_elapsed * 10);
+		dccp_timestamp(sk, &now);
+		r_sample = timeval_delta(&now, &packet->dccphtx_tstamp);
+		if (unlikely(r_sample <= t_elapsed))
+			LIMIT_NETDEBUG(KERN_WARNING
+				       "%s: r_sample=%uus, t_elapsed=%uus\n",
+				       __FUNCTION__, r_sample, t_elapsed);
+		else
+			r_sample -= t_elapsed;
 
 		/* Update RTT estimate by 
 		 * If (No feedback recv)
@@ -591,8 +606,7 @@
 
 static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
 	if (hctx == NULL || !(sk->sk_state == DCCP_OPEN ||
 			      sk->sk_state == DCCP_PARTOPEN))
@@ -606,8 +620,8 @@
 				     unsigned char *value)
 {
 	int rc = 0;
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	const struct dccp_sock *dp = dccp_sk(sk);
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 	struct ccid3_options_received *opt_recv;
 
 	if (hctx == NULL)
@@ -670,11 +684,11 @@
 
 	ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
 
-	hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx),
-						      gfp_any());
-	if (hctx == NULL)
+	dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any());
+	if (dp->dccps_hc_tx_ccid_private == NULL)
 		return -ENOMEM;
 
+	hctx = ccid3_hc_tx_sk(sk);
 	memset(hctx, 0, sizeof(*hctx));
 
 	if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
@@ -696,7 +710,7 @@
 static void ccid3_hc_tx_exit(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
 	ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
 	BUG_ON(hctx == NULL);
@@ -738,8 +752,7 @@
 static inline void ccid3_hc_rx_set_state(struct sock *sk,
 					 enum ccid3_hc_rx_states state)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
 
 	ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
@@ -751,14 +764,14 @@
 
 static void ccid3_hc_rx_send_feedback(struct sock *sk)
 {
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
 	struct dccp_rx_hist_entry *packet;
 	struct timeval now;
 
 	ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
 
-	do_gettimeofday(&now);
+	dccp_timestamp(sk, &now);
 
 	switch (hcrx->ccid3hcrx_state) {
 	case TFRC_RSTATE_NO_DATA:
@@ -767,11 +780,8 @@
 	case TFRC_RSTATE_DATA: {
 		const u32 delta = timeval_delta(&now,
 					&hcrx->ccid3hcrx_tstamp_last_feedback);
-
-		hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv *
-					  USEC_PER_SEC);
-		if (likely(delta > 1))
-			hcrx->ccid3hcrx_x_recv /= delta;
+		hcrx->ccid3hcrx_x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv,
+						   delta);
 	}
 		break;
 	default:
@@ -801,14 +811,14 @@
 		hcrx->ccid3hcrx_pinv = ~0;
 	else
 		hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
+	dp->dccps_hc_rx_insert_options = 1;
 	dccp_send_ack(sk);
 }
 
 static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	u32 x_recv, pinv;
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
 
 	if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN ||
 			      sk->sk_state == DCCP_PARTOPEN))
@@ -837,8 +847,7 @@
 
 static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_rx_hist_entry *entry, *next, *tail = NULL;
 	u32 rtt, delta, x_recv, fval, p, tmp2;
 	struct timeval tstamp = { 0, };
@@ -889,10 +898,9 @@
 	if (rtt == 0)
 		rtt = 1;
 
-	delta = timeval_now_delta(&hcrx->ccid3hcrx_tstamp_last_feedback);
-	x_recv = hcrx->ccid3hcrx_bytes_recv * USEC_PER_SEC;
-	if (likely(delta > 1))
-		x_recv /= delta;
+	dccp_timestamp(sk, &tstamp);
+	delta = timeval_delta(&tstamp, &hcrx->ccid3hcrx_tstamp_last_feedback);
+	x_recv = usecs_div(hcrx->ccid3hcrx_bytes_recv, delta);
 
 	tmp1 = (u64)x_recv * (u64)rtt;
 	do_div(tmp1,10000000);
@@ -911,8 +919,7 @@
 
 static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 
 	if (seq_loss != DCCP_MAX_SEQNO + 1 &&
 	    list_empty(&hcrx->ccid3hcrx_li_hist)) {
@@ -930,8 +937,7 @@
 
 static void ccid3_hc_rx_detect_loss(struct sock *sk)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	u8 win_loss;
 	const u64 seq_loss = dccp_rx_hist_detect_loss(&hcrx->ccid3hcrx_hist,
 						      &hcrx->ccid3hcrx_li_hist,
@@ -942,13 +948,12 @@
 
 static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
 {
-	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	const struct dccp_options_received *opt_recv;
 	struct dccp_rx_hist_entry *packet;
 	struct timeval now;
 	u8 win_count;
-	u32 p_prev;
+	u32 p_prev, r_sample, t_elapsed;
 	int ins;
 
 	if (hcrx == NULL)
@@ -957,7 +962,7 @@
 	BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
 		 hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
 
-	opt_recv = &dp->dccps_options_received;
+	opt_recv = &dccp_sk(sk)->dccps_options_received;
 
 	switch (DCCP_SKB_CB(skb)->dccpd_type) {
 	case DCCP_PKT_ACK:
@@ -967,10 +972,24 @@
 		if (opt_recv->dccpor_timestamp_echo == 0)
 			break;
 		p_prev = hcrx->ccid3hcrx_rtt;
-		do_gettimeofday(&now);
-		hcrx->ccid3hcrx_rtt = timeval_usecs(&now) -
-				     (opt_recv->dccpor_timestamp_echo -
-				      opt_recv->dccpor_elapsed_time) * 10;
+		dccp_timestamp(sk, &now);
+		timeval_sub_usecs(&now, opt_recv->dccpor_timestamp_echo * 10);
+		r_sample = timeval_usecs(&now);
+		t_elapsed = opt_recv->dccpor_elapsed_time * 10;
+
+		if (unlikely(r_sample <= t_elapsed))
+			LIMIT_NETDEBUG(KERN_WARNING
+				       "%s: r_sample=%uus, t_elapsed=%uus\n",
+				       __FUNCTION__, r_sample, t_elapsed);
+		else
+			r_sample -= t_elapsed;
+
+		if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
+			hcrx->ccid3hcrx_rtt = r_sample;
+		else
+			hcrx->ccid3hcrx_rtt = (hcrx->ccid3hcrx_rtt * 9) / 10 +
+					      r_sample / 10;
+
 		if (p_prev != hcrx->ccid3hcrx_rtt)
 			ccid3_pr_debug("%s, New RTT=%luus, elapsed time=%u\n",
 				       dccp_role(sk), hcrx->ccid3hcrx_rtt,
@@ -985,7 +1004,7 @@
 		return;
 	}
 
-	packet = dccp_rx_hist_entry_new(ccid3_rx_hist, opt_recv->dccpor_ndp,
+	packet = dccp_rx_hist_entry_new(ccid3_rx_hist, sk, opt_recv->dccpor_ndp,
 					skb, SLAB_ATOMIC);
 	if (packet == NULL) {
 		ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet "
@@ -1017,7 +1036,7 @@
 		if (ins != 0)
 			break;
 
-		do_gettimeofday(&now);
+		dccp_timestamp(sk, &now);
 		if (timeval_delta(&now, &hcrx->ccid3hcrx_tstamp_last_ack) >=
 		    hcrx->ccid3hcrx_rtt) {
 			hcrx->ccid3hcrx_tstamp_last_ack = now;
@@ -1056,11 +1075,11 @@
 
 	ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
 
-	hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx),
-						      gfp_any());
-	if (hcrx == NULL)
+	dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any());
+	if (dp->dccps_hc_rx_ccid_private == NULL)
 		return -ENOMEM;
 
+	hcrx = ccid3_hc_rx_sk(sk);
 	memset(hcrx, 0, sizeof(*hcrx));
 
 	if (dp->dccps_packet_size >= TFRC_MIN_PACKET_SIZE &&
@@ -1072,18 +1091,16 @@
 	hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
 	INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
 	INIT_LIST_HEAD(&hcrx->ccid3hcrx_li_hist);
-	/*
-	 * XXX this seems to be paranoid, need to think more about this, for
-	 * now start with something different than zero. -acme
-	 */
-	hcrx->ccid3hcrx_rtt = USEC_PER_SEC / 5;
+	dccp_timestamp(sk, &hcrx->ccid3hcrx_tstamp_last_ack);
+	hcrx->ccid3hcrx_tstamp_last_feedback = hcrx->ccid3hcrx_tstamp_last_ack;
+	hcrx->ccid3hcrx_rtt = 5000; /* XXX 5ms for now... */
 	return 0;
 }
 
 static void ccid3_hc_rx_exit(struct sock *sk)
 {
+	struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
 
 	ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
 
@@ -1104,8 +1121,7 @@
 
 static void ccid3_hc_rx_get_info(struct sock *sk, struct tcp_info *info)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	const struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+	const struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk);
 
 	if (hcrx == NULL)
 		return;
@@ -1117,8 +1133,7 @@
 
 static void ccid3_hc_tx_get_info(struct sock *sk, struct tcp_info *info)
 {
-	const struct dccp_sock *dp = dccp_sk(sk);
-	const struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+	const struct ccid3_hc_tx_sock *hctx = ccid3_hc_tx_sk(sk);
 
 	if (hctx == NULL)
 		return;
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
index ee8cbac..d16f00d 100644
--- a/net/dccp/ccids/ccid3.h
+++ b/net/dccp/ccids/ccid3.h
@@ -115,7 +115,7 @@
   	u64			ccid3hcrx_seqno_last_counter:48,
 				ccid3hcrx_state:8,
 				ccid3hcrx_last_counter:4;
-	unsigned long		ccid3hcrx_rtt;
+	u32			ccid3hcrx_rtt;
   	u32			ccid3hcrx_p;
   	u32			ccid3hcrx_bytes_recv;
   	struct timeval		ccid3hcrx_tstamp_last_feedback;
@@ -128,10 +128,14 @@
   	u32			ccid3hcrx_x_recv;
 };
 
-#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
-    ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
+static inline struct ccid3_hc_tx_sock *ccid3_hc_tx_sk(const struct sock *sk)
+{
+    return dccp_sk(sk)->dccps_hc_tx_ccid_private;
+}
 
-#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
-    ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
+static inline struct ccid3_hc_rx_sock *ccid3_hc_rx_sk(const struct sock *sk)
+{
+    return dccp_sk(sk)->dccps_hc_rx_ccid_private;
+}
 
 #endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/ccids/lib/packet_history.h b/net/dccp/ccids/lib/packet_history.h
index fb90a91..b375ebd 100644
--- a/net/dccp/ccids/lib/packet_history.h
+++ b/net/dccp/ccids/lib/packet_history.h
@@ -134,6 +134,7 @@
 
 static inline struct dccp_rx_hist_entry *
 		     dccp_rx_hist_entry_new(struct dccp_rx_hist *hist,
+				     	    const struct sock *sk, 
 				     	    const u32 ndp, 
 					    const struct sk_buff *skb,
 					    const unsigned int __nocast prio)
@@ -148,7 +149,7 @@
 		entry->dccphrx_ccval = dh->dccph_ccval;
 		entry->dccphrx_type  = dh->dccph_type;
 		entry->dccphrx_ndp   = ndp;
-		do_gettimeofday(&(entry->dccphrx_tstamp));
+		dccp_timestamp(sk, &entry->dccphrx_tstamp);
 	}
 
 	return entry;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 33456c0..95c4630 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -426,10 +426,13 @@
 		dccp_ackpkts_alloc(unsigned int len,
 				  const unsigned int __nocast priority);
 extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
-extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
+extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
+			    u64 ackno, u8 state);
 extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
 					 struct sock *sk, u64 ackno);
 
+extern void dccp_timestamp(const struct sock *sk, struct timeval *tv);
+
 static inline suseconds_t timeval_usecs(const struct timeval *tv)
 {
 	return tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
@@ -468,17 +471,6 @@
 	}
 }
 
-/*
- * Returns the difference in usecs between timeval
- * passed in and current time
- */
-static inline suseconds_t timeval_now_delta(const struct timeval *tv)
-{
-	struct timeval now;
-	do_gettimeofday(&now);
-	return timeval_delta(&now, tv);
-}
-
 #ifdef CONFIG_IP_DCCP_DEBUG
 extern void dccp_ackvector_print(const u64 ackno,
 				 const unsigned char *vector, int len);
diff --git a/net/dccp/input.c b/net/dccp/input.c
index ef29cef..c60bc34 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -170,7 +170,7 @@
 	if (dp->dccps_options.dccpo_send_ack_vector) {
 		struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
 
-		if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+		if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
 				     DCCP_SKB_CB(skb)->dccpd_seq,
 				     DCCP_ACKPKTS_STATE_RECEIVED)) {
 			LIMIT_NETDEBUG(KERN_WARNING "DCCP: acknowledgeable "
@@ -498,7 +498,7 @@
 		 * DCCP_ACKPKTS_STATE_ECN_MARKED
 		 */
 		if (dp->dccps_options.dccpo_send_ack_vector) {
-			if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+			if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
 					     DCCP_SKB_CB(skb)->dccpd_seq,
 					     DCCP_ACKPKTS_STATE_RECEIVED))
 				goto discard;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 3fc75db..fee9a8c 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -1243,6 +1243,7 @@
 	static int dccp_ctl_socket_init = 1;
 
 	dccp_options_init(&dp->dccps_options);
+	do_gettimeofday(&dp->dccps_epoch);
 
 	if (dp->dccps_options.dccpo_send_ack_vector) {
 		dp->dccps_hc_rx_ackpkts =
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index ce5dff4..18461bc 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -96,6 +96,7 @@
 		newdp->dccps_hc_rx_ackpkts = NULL;
 		newdp->dccps_role = DCCP_ROLE_SERVER;
 		newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
+		do_gettimeofday(&newdp->dccps_epoch);
 
 		if (newdp->dccps_options.dccpo_send_ack_vector) {
 			newdp->dccps_hc_rx_ackpkts =
diff --git a/net/dccp/options.c b/net/dccp/options.c
index 382c589..d4c4242 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -72,6 +72,7 @@
 	struct dccp_options_received *opt_recv = &dp->dccps_options_received;
 	unsigned char opt, len;
 	unsigned char *value;
+	u32 elapsed_time;
 
 	memset(opt_recv, 0, sizeof(*opt_recv));
 
@@ -139,7 +140,7 @@
 			opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
 
 			dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
-			do_gettimeofday(&dp->dccps_timestamp_time);
+			dccp_timestamp(sk, &dp->dccps_timestamp_time);
 
 			dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
 				      debug_prefix, opt_recv->dccpor_timestamp,
@@ -159,18 +160,18 @@
 				      (unsigned long long)
 				      DCCP_SKB_CB(skb)->dccpd_ack_seq);
 
-			if (len > 4) {
-				if (len == 6)
-					opt_recv->dccpor_elapsed_time =
-						 ntohs(*(u16 *)(value + 4));
-				else
-					opt_recv->dccpor_elapsed_time =
-						 ntohl(*(u32 *)(value + 4));
 
-				dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n",
-				      debug_prefix,
-				      opt_recv->dccpor_elapsed_time);
-			}
+			if (len == 4)
+				break;
+
+			if (len == 6)
+				elapsed_time = ntohs(*(u16 *)(value + 4));
+			else
+				elapsed_time = ntohl(*(u32 *)(value + 4));
+
+			/* Give precedence to the biggest ELAPSED_TIME */
+			if (elapsed_time > opt_recv->dccpor_elapsed_time)
+				opt_recv->dccpor_elapsed_time = elapsed_time;
 			break;
 		case DCCPO_ELAPSED_TIME:
 			if (len != 2 && len != 4)
@@ -180,14 +181,15 @@
 				continue;
 
 			if (len == 2)
-				opt_recv->dccpor_elapsed_time =
-							ntohs(*(u16 *)value);
+				elapsed_time = ntohs(*(u16 *)value);
 			else
-				opt_recv->dccpor_elapsed_time =
-							ntohl(*(u32 *)value);
+				elapsed_time = ntohl(*(u32 *)value);
+
+			if (elapsed_time > opt_recv->dccpor_elapsed_time)
+				opt_recv->dccpor_elapsed_time = elapsed_time;
 
 			dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
-				      opt_recv->dccpor_elapsed_time);
+				      elapsed_time);
 			break;
 			/*
 			 * From draft-ietf-dccp-spec-11.txt:
@@ -359,9 +361,13 @@
 #endif
 	struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
 	int len = ap->dccpap_buf_vector_len + 2;
-	const u32 elapsed_time = timeval_now_delta(&ap->dccpap_time) / 10;
+	struct timeval now;
+	u32 elapsed_time;
 	unsigned char *to, *from;
 
+	dccp_timestamp(sk, &now);
+	elapsed_time = timeval_delta(&now, &ap->dccpap_time) / 10;
+
 	if (elapsed_time != 0)
 		dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
 
@@ -426,13 +432,29 @@
 		      (unsigned long long) ap->dccpap_ack_ackno);
 }
 
+void dccp_timestamp(const struct sock *sk, struct timeval *tv)
+{
+	const struct dccp_sock *dp = dccp_sk(sk);
+
+	do_gettimeofday(tv);
+	tv->tv_sec  -= dp->dccps_epoch.tv_sec;
+	tv->tv_usec -= dp->dccps_epoch.tv_usec;
+
+	while (tv->tv_usec < 0) {
+		tv->tv_sec--;
+		tv->tv_usec += USEC_PER_SEC;
+	}
+}
+
+EXPORT_SYMBOL_GPL(dccp_timestamp);
+
 void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
 {
 	struct timeval tv;
 	u32 now;
 	
-	do_gettimeofday(&tv);
-	now = (tv.tv_sec * USEC_PER_SEC + tv.tv_usec) / 10;
+	dccp_timestamp(sk, &tv);
+	now = timeval_usecs(&tv) / 10;
 	/* yes this will overflow but that is the point as we want a
 	 * 10 usec 32 bit timer which mean it wraps every 11.9 hours */
 
@@ -450,13 +472,17 @@
 	const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ?
 					"CLIENT TX opt: " : "server TX opt: ";
 #endif
+	struct timeval now;
 	u32 tstamp_echo;
-	const u32 elapsed_time =
-			timeval_now_delta(&dp->dccps_timestamp_time) / 10;
-	const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
-	const int len = 6 + elapsed_time_len;
+	u32 elapsed_time;
+	int len, elapsed_time_len;
 	unsigned char *to;
 
+	dccp_timestamp(sk, &now);
+	elapsed_time = timeval_delta(&now, &dp->dccps_timestamp_time) / 10;
+	elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+	len = 6 + elapsed_time_len;
+
 	if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
 		LIMIT_NETDEBUG(KERN_INFO "DCCP: packet too small to insert "
 					 "timestamp echo!\n");
@@ -505,13 +531,18 @@
 		    (dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno !=
 		     DCCP_MAX_SEQNO + 1))
 			dccp_insert_option_ack_vector(sk, skb);
-
 		if (dp->dccps_timestamp_echo != 0)
 			dccp_insert_option_timestamp_echo(sk, skb);
 	}
 
-	ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
-	ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
+	if (dp->dccps_hc_rx_insert_options) {
+		ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
+		dp->dccps_hc_rx_insert_options = 0;
+	}
+	if (dp->dccps_hc_tx_insert_options) {
+		ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
+		dp->dccps_hc_tx_insert_options = 0;
+	}
 
 	/* XXX: insert other options when appropriate */
 
@@ -616,7 +647,8 @@
 /*
  * Implements the draft-ietf-dccp-spec-11.txt Appendix A
  */
-int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
+int dccp_ackpkts_add(struct dccp_ackpkts *ap, const struct sock *sk,
+		     u64 ackno, u8 state)
 {
 	/*
 	 * Check at the right places if the buffer is full, if it is, tell the
@@ -697,7 +729,7 @@
 	}
 
 	ap->dccpap_buf_ackno = ackno;
-	do_gettimeofday(&ap->dccpap_time);
+	dccp_timestamp(sk, &ap->dccpap_time);
 out:
 	dccp_pr_debug("");
 	dccp_ackpkts_print(ap);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index bf147f8..a9d84f9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1248,11 +1248,6 @@
 /* ------------------------------------------------------------------------ */
 
 #ifdef CONFIG_PROC_FS
-#ifdef CONFIG_IP_FIB_TRIE
-extern int  fib_stat_proc_init(void);
-extern void fib_stat_proc_exit(void);
-#endif
-
 static int __init ipv4_proc_init(void)
 {
 	int rc = 0;
@@ -1265,19 +1260,11 @@
 		goto out_udp;
 	if (fib_proc_init())
 		goto out_fib;
-#ifdef CONFIG_IP_FIB_TRIE
-         if (fib_stat_proc_init())
-                 goto out_fib_stat;
-#endif
 	if (ip_misc_proc_init())
 		goto out_misc;
 out:
 	return rc;
 out_misc:
-#ifdef CONFIG_IP_FIB_TRIE
- 	fib_stat_proc_exit();
-out_fib_stat:
-#endif
 	fib_proc_exit();
 out_fib:
 	udp4_proc_exit();
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index b2dea4e..1b63b48 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -43,7 +43,7 @@
  *		2 of the License, or (at your option) any later version.
  */
 
-#define VERSION "0.402"
+#define VERSION "0.403"
 
 #include <linux/config.h>
 #include <asm/uaccess.h>
@@ -164,7 +164,6 @@
 static struct tnode *inflate(struct trie *t, struct tnode *tn);
 static struct tnode *halve(struct trie *t, struct tnode *tn);
 static void tnode_free(struct tnode *tn);
-static void trie_dump_seq(struct seq_file *seq, struct trie *t);
 
 static kmem_cache_t *fn_alias_kmem __read_mostly;
 static struct trie *trie_local = NULL, *trie_main = NULL;
@@ -1971,378 +1970,137 @@
 	return tb;
 }
 
-/* Trie dump functions */
-
-static void putspace_seq(struct seq_file *seq, int n)
-{
-	while (n--)
-		seq_printf(seq, " ");
-}
-
-static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
-{
-	while (bits--)
-		seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
-}
-
-static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
-		   int pend, int cindex, int bits)
-{
-	putspace_seq(seq, indent);
-	if (IS_LEAF(n))
-		seq_printf(seq, "|");
-	else
-		seq_printf(seq, "+");
-	if (bits) {
-		seq_printf(seq, "%d/", cindex);
-		printbin_seq(seq, cindex, bits);
-		seq_printf(seq, ": ");
-	} else
-		seq_printf(seq, "<root>: ");
-	seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
-
-	if (IS_LEAF(n)) {
-		struct leaf *l = (struct leaf *)n;
-		struct fib_alias *fa;
-		int i;
-
-		seq_printf(seq, "key=%d.%d.%d.%d\n",
-			   n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
-
-		for (i = 32; i >= 0; i--)
-			if (find_leaf_info(&l->list, i)) {
-				struct list_head *fa_head = get_fa_head(l, i);
-
-				if (!fa_head)
-					continue;
-
-				if (list_empty(fa_head))
-					continue;
-
-				putspace_seq(seq, indent+2);
-				seq_printf(seq, "{/%d...dumping}\n", i);
-
-				list_for_each_entry_rcu(fa, fa_head, fa_list) {
-					putspace_seq(seq, indent+2);
-					if (fa->fa_info == NULL) {
-						seq_printf(seq, "Error fa_info=NULL\n");
-						continue;
-					}
-					if (fa->fa_info->fib_nh == NULL) {
-						seq_printf(seq, "Error _fib_nh=NULL\n");
-						continue;
-					}
-
-					seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
-					      fa->fa_type,
-					      fa->fa_scope,
-					      fa->fa_tos);
-				}
-			}
-	} else {
-		struct tnode *tn = (struct tnode *)n;
-		int plen = ((struct tnode *)n)->pos;
-		t_key prf = MASK_PFX(n->key, plen);
-
-		seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
-			   prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
-
-		putspace_seq(seq, indent); seq_printf(seq, "|    ");
-		seq_printf(seq, "{key prefix=%08x/", tn->key & TKEY_GET_MASK(0, tn->pos));
-		printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
-		seq_printf(seq, "}\n");
-		putspace_seq(seq, indent); seq_printf(seq, "|    ");
-		seq_printf(seq, "{pos=%d", tn->pos);
-		seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
-		seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
-		putspace_seq(seq, indent); seq_printf(seq, "|    ");
-		seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
-	}
-}
-
-static void trie_dump_seq(struct seq_file *seq, struct trie *t)
-{
-	struct node *n;
-	int cindex = 0;
-	int indent = 1;
-	int pend = 0;
-	int depth = 0;
-	struct tnode *tn;
-
-	rcu_read_lock();
-	n = rcu_dereference(t->trie);
-	seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
-
-	if (!n) {
-		seq_printf(seq, "------ trie is empty\n");
-
-		rcu_read_unlock();
-		return;
-	}
-
-	printnode_seq(seq, indent, n, pend, cindex, 0);
-
-	if (!IS_TNODE(n)) {
-		rcu_read_unlock();
-		return;
-	}
-
-	tn = (struct tnode *)n;
-	pend = tn->pos+tn->bits;
-	putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
-	indent += 3;
-	depth++;
-
-	while (tn && cindex < (1 << tn->bits)) {
-		struct node *child = rcu_dereference(tn->child[cindex]);
-		if (!child)
-			cindex++;
-		else {
-			/* Got a child */
-			printnode_seq(seq, indent, child, pend,
-				      cindex, tn->bits);
-
-			if (IS_LEAF(child))
-				cindex++;
-
-			else {
-				/*
-				 * New tnode. Decend one level
-				 */
-
-				depth++;
-				n = child;
-				tn = (struct tnode *)n;
-				pend = tn->pos+tn->bits;
-				putspace_seq(seq, indent);
-				seq_printf(seq, "\\--\n");
-				indent += 3;
-				cindex = 0;
-			}
-		}
-
-		/*
-		 * Test if we are done
-		 */
-
-		while (cindex >= (1 << tn->bits)) {
-			/*
-			 * Move upwards and test for root
-			 * pop off all traversed  nodes
-			 */
-
-			if (NODE_PARENT(tn) == NULL) {
-				tn = NULL;
-				break;
-			}
-
-			cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
-			cindex++;
-			tn = NODE_PARENT(tn);
-			pend = tn->pos + tn->bits;
-			indent -= 3;
-			depth--;
-		}
-	}
-	rcu_read_unlock();
-}
-
-static struct trie_stat *trie_stat_new(void)
-{
-	struct trie_stat *s;
-	int i;
-
-	s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
-	if (!s)
-		return NULL;
-
-	s->totdepth = 0;
-	s->maxdepth = 0;
-	s->tnodes = 0;
-	s->leaves = 0;
-	s->nullpointers = 0;
-
-	for (i = 0; i < MAX_CHILDS; i++)
-		s->nodesizes[i] = 0;
-
-	return s;
-}
-
-static struct trie_stat *trie_collect_stats(struct trie *t)
-{
-	struct node *n;
-	struct trie_stat *s = trie_stat_new();
-	int cindex = 0;
-	int pend = 0;
-	int depth = 0;
-
-	if (!s)
-		return NULL;
-
-	rcu_read_lock();
-	n = rcu_dereference(t->trie);
-
-	if (!n)
-		return s;
-
-	if (IS_TNODE(n)) {
-		struct tnode *tn = (struct tnode *)n;
-		pend = tn->pos+tn->bits;
-		s->nodesizes[tn->bits]++;
-		depth++;
-
-		while (tn && cindex < (1 << tn->bits)) {
-			struct node *ch = rcu_dereference(tn->child[cindex]);
-			if (ch) {
-
-				/* Got a child */
-
-				if (IS_LEAF(tn->child[cindex])) {
-					cindex++;
-
-					/* stats */
-					if (depth > s->maxdepth)
-						s->maxdepth = depth;
-					s->totdepth += depth;
-					s->leaves++;
-				} else {
-					/*
-					 * New tnode. Decend one level
-					 */
-
-					s->tnodes++;
-					s->nodesizes[tn->bits]++;
-					depth++;
-
-					n = ch;
-					tn = (struct tnode *)n;
-					pend = tn->pos+tn->bits;
-
-					cindex = 0;
-				}
-			} else {
-				cindex++;
-				s->nullpointers++;
-			}
-
-			/*
-			 * Test if we are done
-			 */
-
-			while (cindex >= (1 << tn->bits)) {
-				/*
-				 * Move upwards and test for root
-				 * pop off all traversed  nodes
-				 */
-
-				if (NODE_PARENT(tn) == NULL) {
-					tn = NULL;
-					n = NULL;
-					break;
-				}
-
-				cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
-				tn = NODE_PARENT(tn);
-				cindex++;
-				n = (struct node *)tn;
-				pend = tn->pos+tn->bits;
-				depth--;
- 			}
-		}
-	}
-
-	rcu_read_unlock();
-	return s;
-}
-
 #ifdef CONFIG_PROC_FS
+/* Depth first Trie walk iterator */
+struct fib_trie_iter {
+	struct tnode *tnode;
+	struct trie *trie;
+	unsigned index;
+	unsigned depth;
+};
 
-static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
+static struct node *fib_trie_get_next(struct fib_trie_iter *iter)
 {
+	struct tnode *tn = iter->tnode;
+	unsigned cindex = iter->index;
+	struct tnode *p;
+
+	pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
+		 iter->tnode, iter->index, iter->depth);
+rescan:
+	while (cindex < (1<<tn->bits)) {
+		struct node *n = tnode_get_child(tn, cindex);
+
+		if (n) {
+			if (IS_LEAF(n)) {
+				iter->tnode = tn;
+				iter->index = cindex + 1;
+			} else {
+				/* push down one level */
+				iter->tnode = (struct tnode *) n;
+				iter->index = 0;
+				++iter->depth;
+			}
+			return n;
+		}
+
+		++cindex;
+	}
+
+	/* Current node exhausted, pop back up */
+	p = NODE_PARENT(tn);
+	if (p) {
+		cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
+		tn = p;
+		--iter->depth;
+		goto rescan;
+	}
+
+	/* got root? */
 	return NULL;
 }
 
-static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
+static struct node *fib_trie_get_first(struct fib_trie_iter *iter,
+				       struct trie *t)
 {
+	struct node *n = rcu_dereference(t->trie);
+
+	if (n && IS_TNODE(n)) {
+		iter->tnode = (struct tnode *) n;
+		iter->trie = t;
+		iter->index = 0;
+		iter->depth = 0;
+		return n;
+	}
 	return NULL;
 }
 
-static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
+static void trie_collect_stats(struct trie *t, struct trie_stat *s)
 {
-	if (!ip_fib_main_table)
-		return NULL;
+	struct node *n;
+	struct fib_trie_iter iter;
 
-	if (*pos)
-		return fib_triestat_get_next(seq);
-	else
-		return SEQ_START_TOKEN;
-}
+	memset(s, 0, sizeof(*s));
 
-static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
-	++*pos;
-	if (v == SEQ_START_TOKEN)
-		return fib_triestat_get_first(seq);
-	else
-		return fib_triestat_get_next(seq);
-}
+	rcu_read_lock();
+	for (n = fib_trie_get_first(&iter, t); n;
+	     n = fib_trie_get_next(&iter)) {
+		if (IS_LEAF(n)) {
+			s->leaves++;
+			s->totdepth += iter.depth;
+			if (iter.depth > s->maxdepth)
+				s->maxdepth = iter.depth;
+		} else {
+			const struct tnode *tn = (const struct tnode *) n;
+			int i;
 
-static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
-{
-
+			s->tnodes++;
+			s->nodesizes[tn->bits]++;
+			for (i = 0; i < (1<<tn->bits); i++)
+				if (!tn->child[i])
+					s->nullpointers++;
+		}
+	}
+	rcu_read_unlock();
 }
 
 /*
  *	This outputs /proc/net/fib_triestats
- *
- *	It always works in backward compatibility mode.
- *	The format of the file is not supposed to be changed.
  */
-
-static void collect_and_show(struct trie *t, struct seq_file *seq)
+static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
 {
-	int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
-	int i, max, pointers;
-	struct trie_stat *stat;
-	int avdepth;
+	unsigned i, max, pointers, bytes, avdepth;
 
-	stat = trie_collect_stats(t);
+	if (stat->leaves)
+		avdepth = stat->totdepth*100 / stat->leaves;
+	else
+		avdepth = 0;
 
-	bytes = 0;
-	seq_printf(seq, "trie=%p\n", t);
+	seq_printf(seq, "\tAver depth:     %d.%02d\n", avdepth / 100, avdepth % 100 );
+	seq_printf(seq, "\tMax depth:      %u\n", stat->maxdepth);
 
-	if (stat) {
-		if (stat->leaves)
-			avdepth = stat->totdepth*100 / stat->leaves;
-		else
-			avdepth = 0;
-		seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100);
-		seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
+	seq_printf(seq, "\tLeaves:         %u\n", stat->leaves);
 
-		seq_printf(seq, "Leaves: %d\n", stat->leaves);
-		bytes += sizeof(struct leaf) * stat->leaves;
-		seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
-		bytes += sizeof(struct tnode) * stat->tnodes;
+	bytes = sizeof(struct leaf) * stat->leaves;
+	seq_printf(seq, "\tInternal nodes: %d\n\t", stat->tnodes);
+	bytes += sizeof(struct tnode) * stat->tnodes;
 
-		max = MAX_CHILDS-1;
+	max = MAX_CHILDS-1;
+	while (max >= 0 && stat->nodesizes[max] == 0)
+		max--;
 
-		while (max >= 0 && stat->nodesizes[max] == 0)
-			max--;
-		pointers = 0;
+	pointers = 0;
+	for (i = 1; i <= max; i++)
+		if (stat->nodesizes[i] != 0) {
+			seq_printf(seq, "  %d: %d",  i, stat->nodesizes[i]);
+			pointers += (1<<i) * stat->nodesizes[i];
+		}
+	seq_putc(seq, '\n');
+	seq_printf(seq, "\tPointers: %d\n", pointers);
 
-		for (i = 1; i <= max; i++)
-			if (stat->nodesizes[i] != 0) {
-				seq_printf(seq, "  %d: %d",  i, stat->nodesizes[i]);
-				pointers += (1<<i) * stat->nodesizes[i];
-			}
-		seq_printf(seq, "\n");
-		seq_printf(seq, "Pointers: %d\n", pointers);
-		bytes += sizeof(struct node *) * pointers;
-		seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
-		seq_printf(seq, "Total size: %d  kB\n", bytes / 1024);
-
-		kfree(stat);
-	}
+	bytes += sizeof(struct node *) * pointers;
+	seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
+	seq_printf(seq, "Total size: %d  kB\n", (bytes + 1023) / 1024);
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	seq_printf(seq, "Counters:\n---------\n");
@@ -2360,169 +2118,377 @@
 
 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
 {
-	char bf[128];
+	struct trie_stat *stat;
 
-	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
-			   sizeof(struct leaf), sizeof(struct tnode));
-		if (trie_local)
-			collect_and_show(trie_local, seq);
+	stat = kmalloc(sizeof(*stat), GFP_KERNEL);
+	if (!stat)
+		return -ENOMEM;
 
-		if (trie_main)
-			collect_and_show(trie_main, seq);
-	} else {
-		snprintf(bf, sizeof(bf), "*\t%08X\t%08X", 200, 400);
+	seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
+		   sizeof(struct leaf), sizeof(struct tnode));
 
-		seq_printf(seq, "%-127s\n", bf);
+	if (trie_local) {
+		seq_printf(seq, "Local:\n");
+		trie_collect_stats(trie_local, stat);
+		trie_show_stats(seq, stat);
 	}
+
+	if (trie_main) {
+		seq_printf(seq, "Main:\n");
+		trie_collect_stats(trie_main, stat);
+		trie_show_stats(seq, stat);
+	}
+	kfree(stat);
+
 	return 0;
 }
 
-static struct seq_operations fib_triestat_seq_ops = {
-	.start = fib_triestat_seq_start,
-	.next  = fib_triestat_seq_next,
-	.stop  = fib_triestat_seq_stop,
-	.show  = fib_triestat_seq_show,
-};
-
 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
 {
-	struct seq_file *seq;
-	int rc = -ENOMEM;
-
-	rc = seq_open(file, &fib_triestat_seq_ops);
-	if (rc)
-		goto out_kfree;
-
-	seq = file->private_data;
-out:
-	return rc;
-out_kfree:
-	goto out;
+	return single_open(file, fib_triestat_seq_show, NULL);
 }
 
-static struct file_operations fib_triestat_seq_fops = {
+static struct file_operations fib_triestat_fops = {
 	.owner	= THIS_MODULE,
 	.open	= fib_triestat_seq_open,
 	.read	= seq_read,
 	.llseek	= seq_lseek,
-	.release = seq_release_private,
+	.release = single_release,
 };
 
-int __init fib_stat_proc_init(void)
+static struct node *fib_trie_get_idx(struct fib_trie_iter *iter,
+				      loff_t pos)
 {
-	if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
+	loff_t idx = 0;
+	struct node *n;
 
-void __init fib_stat_proc_exit(void)
-{
-	proc_net_remove("fib_triestat");
-}
+	for (n = fib_trie_get_first(iter, trie_local);
+	     n; ++idx, n = fib_trie_get_next(iter)) {
+		if (pos == idx)
+			return n;
+	}
 
-static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
-{
-	return NULL;
-}
-
-static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
-{
+	for (n = fib_trie_get_first(iter, trie_main);
+	     n; ++idx, n = fib_trie_get_next(iter)) {
+		if (pos == idx)
+			return n;
+	}
 	return NULL;
 }
 
 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	if (!ip_fib_main_table)
-		return NULL;
-
-	if (*pos)
-		return fib_trie_get_next(seq);
-	else
+	rcu_read_lock();
+	if (*pos == 0)
 		return SEQ_START_TOKEN;
+	return fib_trie_get_idx(seq->private, *pos - 1);
 }
 
 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+	struct fib_trie_iter *iter = seq->private;
+	void *l = v;
+
 	++*pos;
 	if (v == SEQ_START_TOKEN)
-		return fib_trie_get_first(seq);
-	else
-		return fib_trie_get_next(seq);
+		return fib_trie_get_idx(iter, 0);
 
+	v = fib_trie_get_next(iter);
+	BUG_ON(v == l);
+	if (v)
+		return v;
+
+	/* continue scan in next trie */
+	if (iter->trie == trie_local)
+		return fib_trie_get_first(iter, trie_main);
+
+	return NULL;
 }
 
 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
 {
+	rcu_read_unlock();
 }
 
-/*
- *	This outputs /proc/net/fib_trie.
- *
- *	It always works in backward compatibility mode.
- *	The format of the file is not supposed to be changed.
- */
+static void seq_indent(struct seq_file *seq, int n)
+{
+	while (n-- > 0) seq_puts(seq, "   ");
+}
 
+static inline const char *rtn_scope(enum rt_scope_t s)
+{
+	static char buf[32];
+
+	switch(s) {
+	case RT_SCOPE_UNIVERSE: return "universe";
+	case RT_SCOPE_SITE:	return "site";
+	case RT_SCOPE_LINK:	return "link";
+	case RT_SCOPE_HOST:	return "host";
+	case RT_SCOPE_NOWHERE:	return "nowhere";
+	default:
+		snprintf(buf, sizeof(buf), "scope=%d", s);
+		return buf;
+	}
+}
+
+static const char *rtn_type_names[__RTN_MAX] = {
+	[RTN_UNSPEC] = "UNSPEC",
+	[RTN_UNICAST] = "UNICAST",
+	[RTN_LOCAL] = "LOCAL",
+	[RTN_BROADCAST] = "BROADCAST",
+	[RTN_ANYCAST] = "ANYCAST",
+	[RTN_MULTICAST] = "MULTICAST",
+	[RTN_BLACKHOLE] = "BLACKHOLE",
+	[RTN_UNREACHABLE] = "UNREACHABLE",
+	[RTN_PROHIBIT] = "PROHIBIT",
+	[RTN_THROW] = "THROW",
+	[RTN_NAT] = "NAT",
+	[RTN_XRESOLVE] = "XRESOLVE",
+};
+
+static inline const char *rtn_type(unsigned t)
+{
+	static char buf[32];
+
+	if (t < __RTN_MAX && rtn_type_names[t])
+		return rtn_type_names[t];
+	snprintf(buf, sizeof(buf), "type %d", t);
+	return buf;
+}
+
+/* Pretty print the trie */
 static int fib_trie_seq_show(struct seq_file *seq, void *v)
 {
-	char bf[128];
+	const struct fib_trie_iter *iter = seq->private;
+	struct node *n = v;
 
-	if (v == SEQ_START_TOKEN) {
-		if (trie_local)
-			trie_dump_seq(seq, trie_local);
+	if (v == SEQ_START_TOKEN)
+		return 0;
 
-		if (trie_main)
-			trie_dump_seq(seq, trie_main);
+	if (IS_TNODE(n)) {
+		struct tnode *tn = (struct tnode *) n;
+		t_key prf = ntohl(MASK_PFX(tn->key, tn->pos));
+
+		if (!NODE_PARENT(n)) {
+			if (iter->trie == trie_local)
+				seq_puts(seq, "<local>:\n");
+			else
+				seq_puts(seq, "<main>:\n");
+		} else {
+			seq_indent(seq, iter->depth-1);
+			seq_printf(seq, "  +-- %d.%d.%d.%d/%d\n",
+				   NIPQUAD(prf), tn->pos);
+		}
 	} else {
-		snprintf(bf, sizeof(bf),
-			 "*\t%08X\t%08X", 200, 400);
-		seq_printf(seq, "%-127s\n", bf);
+		struct leaf *l = (struct leaf *) n;
+		int i;
+		u32 val = ntohl(l->key);
+
+		seq_indent(seq, iter->depth);
+		seq_printf(seq, "  |-- %d.%d.%d.%d\n", NIPQUAD(val));
+		for (i = 32; i >= 0; i--) {
+			struct leaf_info *li = find_leaf_info(&l->list, i);
+			if (li) {
+				struct fib_alias *fa;
+				list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+					seq_indent(seq, iter->depth+1);
+					seq_printf(seq, "  /%d %s %s", i,
+						   rtn_scope(fa->fa_scope),
+						   rtn_type(fa->fa_type));
+					if (fa->fa_tos)
+						seq_printf(seq, "tos =%d\n",
+							   fa->fa_tos);
+					seq_putc(seq, '\n');
+				}
+			}
+		}
 	}
 
 	return 0;
 }
 
 static struct seq_operations fib_trie_seq_ops = {
-	.start = fib_trie_seq_start,
-	.next  = fib_trie_seq_next,
-	.stop  = fib_trie_seq_stop,
-	.show  = fib_trie_seq_show,
+	.start  = fib_trie_seq_start,
+	.next   = fib_trie_seq_next,
+	.stop   = fib_trie_seq_stop,
+	.show   = fib_trie_seq_show,
 };
 
 static int fib_trie_seq_open(struct inode *inode, struct file *file)
 {
 	struct seq_file *seq;
 	int rc = -ENOMEM;
+	struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
 
 	rc = seq_open(file, &fib_trie_seq_ops);
 	if (rc)
 		goto out_kfree;
 
-	seq = file->private_data;
+	seq	     = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
 out:
 	return rc;
 out_kfree:
+	kfree(s);
 	goto out;
 }
 
-static struct file_operations fib_trie_seq_fops = {
-	.owner	= THIS_MODULE,
-	.open	= fib_trie_seq_open,
-	.read	= seq_read,
-	.llseek	= seq_lseek,
-	.release= seq_release_private,
+static struct file_operations fib_trie_fops = {
+	.owner  = THIS_MODULE,
+	.open   = fib_trie_seq_open,
+	.read   = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
+static unsigned fib_flag_trans(int type, u32 mask, const struct fib_info *fi)
+{
+	static unsigned type2flags[RTN_MAX + 1] = {
+		[7] = RTF_REJECT, [8] = RTF_REJECT,
+	};
+	unsigned flags = type2flags[type];
+
+	if (fi && fi->fib_nh->nh_gw)
+		flags |= RTF_GATEWAY;
+	if (mask == 0xFFFFFFFF)
+		flags |= RTF_HOST;
+	flags |= RTF_UP;
+	return flags;
+}
+
+/*
+ *	This outputs /proc/net/route.
+ *	The format of the file is not supposed to be changed
+ * 	and needs to be same as fib_hash output to avoid breaking
+ *	legacy utilities
+ */
+static int fib_route_seq_show(struct seq_file *seq, void *v)
+{
+	struct leaf *l = v;
+	int i;
+	char bf[128];
+
+	if (v == SEQ_START_TOKEN) {
+		seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
+			   "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
+			   "\tWindow\tIRTT");
+		return 0;
+	}
+
+	if (IS_TNODE(l))
+		return 0;
+
+	for (i=32; i>=0; i--) {
+		struct leaf_info *li = find_leaf_info(&l->list, i);
+		struct fib_alias *fa;
+		u32 mask, prefix;
+
+		if (!li)
+			continue;
+
+		mask = inet_make_mask(li->plen);
+		prefix = htonl(l->key);
+
+		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+			const struct fib_info *fi = rcu_dereference(fa->fa_info);
+			unsigned flags = fib_flag_trans(fa->fa_type, mask, fi);
+
+			if (fa->fa_type == RTN_BROADCAST
+			    || fa->fa_type == RTN_MULTICAST)
+				continue;
+
+			if (fi)
+				snprintf(bf, sizeof(bf),
+					 "%s\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+					 fi->fib_dev ? fi->fib_dev->name : "*",
+					 prefix,
+					 fi->fib_nh->nh_gw, flags, 0, 0,
+					 fi->fib_priority,
+					 mask,
+					 (fi->fib_advmss ? fi->fib_advmss + 40 : 0),
+					 fi->fib_window,
+					 fi->fib_rtt >> 3);
+			else
+				snprintf(bf, sizeof(bf),
+					 "*\t%08X\t%08X\t%04X\t%d\t%u\t%d\t%08X\t%d\t%u\t%u",
+					 prefix, 0, flags, 0, 0, 0,
+					 mask, 0, 0, 0);
+
+			seq_printf(seq, "%-127s\n", bf);
+		}
+	}
+
+	return 0;
+}
+
+static struct seq_operations fib_route_seq_ops = {
+	.start  = fib_trie_seq_start,
+	.next   = fib_trie_seq_next,
+	.stop   = fib_trie_seq_stop,
+	.show   = fib_route_seq_show,
+};
+
+static int fib_route_seq_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int rc = -ENOMEM;
+	struct fib_trie_iter *s = kmalloc(sizeof(*s), GFP_KERNEL);
+
+	if (!s)
+		goto out;
+
+	rc = seq_open(file, &fib_route_seq_ops);
+	if (rc)
+		goto out_kfree;
+
+	seq	     = file->private_data;
+	seq->private = s;
+	memset(s, 0, sizeof(*s));
+out:
+	return rc;
+out_kfree:
+	kfree(s);
+	goto out;
+}
+
+static struct file_operations fib_route_fops = {
+	.owner  = THIS_MODULE,
+	.open   = fib_route_seq_open,
+	.read   = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
 };
 
 int __init fib_proc_init(void)
 {
-	if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
-		return -ENOMEM;
+	if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_fops))
+		goto out1;
+
+	if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_fops))
+		goto out2;
+
+	if (!proc_net_fops_create("route", S_IRUGO, &fib_route_fops))
+		goto out3;
+
 	return 0;
+
+out3:
+	proc_net_remove("fib_triestat");
+out2:
+	proc_net_remove("fib_trie");
+out1:
+	return -ENOMEM;
 }
 
 void __init fib_proc_exit(void)
 {
 	proc_net_remove("fib_trie");
+	proc_net_remove("fib_triestat");
+	proc_net_remove("route");
 }
 
 #endif /* CONFIG_PROC_FS */