rto support for ip command
Enable users of ip to specify the times for rtt, rttvar and rto_min
in human-friendly terms a la "tc" while maintaining backwards
compatability with the previous "raw" mechanism. Builds upon
David Miller's uncommited patch to set rto_min.
Signed-off-by: Rick Jones <rick.jones2@hp.com>
Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
diff --git a/include/utils.h b/include/utils.h
index a3fd335..7da2b29 100644
--- a/include/utils.h
+++ b/include/utils.h
@@ -77,6 +77,7 @@
extern int get_integer(int *val, const char *arg, int base);
extern int get_unsigned(unsigned *val, const char *arg, int base);
+extern int get_jiffies(unsigned *val, const char *arg, int base, int *raw);
#define get_byte get_u8
#define get_ushort get_u16
#define get_short get_s16
diff --git a/ip/iproute.c b/ip/iproute.c
index 9694bc7..77bdf83 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -51,6 +51,7 @@
[RTAX_HOPLIMIT] = "hoplimit",
[RTAX_INITCWND] = "initcwnd",
[RTAX_FEATURES] = "features",
+ [RTAX_RTO_MIN] = "rto_min",
};
static void usage(void) __attribute__((noreturn));
@@ -71,9 +72,10 @@
fprintf(stderr, "INFO_SPEC := NH OPTIONS FLAGS [ nexthop NH ]...\n");
fprintf(stderr, "NH := [ via ADDRESS ] [ dev STRING ] [ weight NUMBER ] NHFLAGS\n");
fprintf(stderr, "OPTIONS := FLAGS [ mtu NUMBER ] [ advmss NUMBER ]\n");
- fprintf(stderr, " [ rtt NUMBER ] [ rttvar NUMBER ]\n");
+ fprintf(stderr, " [ rtt TIME ] [ rttvar TIME ]\n");
fprintf(stderr, " [ window NUMBER] [ cwnd NUMBER ] [ initcwnd NUMBER ]\n");
fprintf(stderr, " [ ssthresh NUMBER ] [ realms REALM ]\n");
+ fprintf(stderr, " [ rto_min TIME ]\n");
fprintf(stderr, "TYPE := [ unicast | local | broadcast | multicast | throw |\n");
fprintf(stderr, " unreachable | prohibit | blackhole | nat ]\n");
fprintf(stderr, "TABLE_ID := [ local | main | default | all | NUMBER ]\n");
@@ -82,6 +84,7 @@
fprintf(stderr, "MP_ALGO := { rr | drr | random | wrandom }\n");
fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
fprintf(stderr, "RTPROTO := [ kernel | boot | static | NUMBER ]\n");
+ fprintf(stderr, "TIME := NUMBER[s|ms|us|ns|j]\n");
exit(-1);
}
@@ -520,7 +523,8 @@
if (mxlock & (1<<i))
fprintf(fp, " lock");
- if (i != RTAX_RTT && i != RTAX_RTTVAR)
+ if (i != RTAX_RTT && i != RTAX_RTTVAR &&
+ i != RTAX_RTO_MIN)
fprintf(fp, " %u", *(unsigned*)RTA_DATA(mxrta[i]));
else {
unsigned val = *(unsigned*)RTA_DATA(mxrta[i]);
@@ -528,7 +532,7 @@
val *= 1000;
if (i == RTAX_RTT)
val /= 8;
- else
+ else if (i == RTAX_RTTVAR)
val /= 4;
if (val >= hz)
fprintf(fp, " %ums", val/hz);
@@ -693,6 +697,7 @@
int table_ok = 0;
int proto_ok = 0;
int type_ok = 0;
+ int raw = 0;
memset(&req, 0, sizeof(req));
@@ -800,9 +805,19 @@
mxlock |= (1<<RTAX_RTT);
NEXT_ARG();
}
- if (get_unsigned(&rtt, *argv, 0))
+ if (get_jiffies(&rtt, *argv, 0, &raw))
invarg("\"rtt\" value is invalid\n", *argv);
- rta_addattr32(mxrta, sizeof(mxbuf), RTAX_RTT, rtt);
+ rta_addattr32(mxrta, sizeof(mxbuf), RTAX_RTT,
+ (raw) ? rtt : rtt * 8);
+ } else if (strcmp(*argv, "rto_min") == 0) {
+ unsigned rto_min;
+ NEXT_ARG();
+ mxlock |= (1<<RTAX_RTO_MIN);
+ if (get_jiffies(&rto_min, *argv, 0, &raw))
+ invarg("\"rto_min\" value is invalid\n",
+ *argv);
+ rta_addattr32(mxrta, sizeof(mxbuf), RTAX_RTO_MIN,
+ rto_min);
} else if (matches(*argv, "window") == 0) {
unsigned win;
NEXT_ARG();
@@ -840,9 +855,10 @@
mxlock |= (1<<RTAX_RTTVAR);
NEXT_ARG();
}
- if (get_unsigned(&win, *argv, 0))
+ if (get_jiffies(&win, *argv, 0, &raw))
invarg("\"rttvar\" value is invalid\n", *argv);
- rta_addattr32(mxrta, sizeof(mxbuf), RTAX_RTTVAR, win);
+ rta_addattr32(mxrta, sizeof(mxbuf), RTAX_RTTVAR,
+ (raw) ? win : win * 4);
} else if (matches(*argv, "ssthresh") == 0) {
unsigned win;
NEXT_ARG();
diff --git a/lib/utils.c b/lib/utils.c
index 4f35a60..4c42dfd 100644
--- a/lib/utils.c
+++ b/lib/utils.c
@@ -61,6 +61,70 @@
return 0;
}
+/*
+ * get_jiffies is "translated" from a similar routine "get_time" in
+ * tc_util.c. we don't use the exact same routine because tc passes
+ * microseconds to the kernel and the callers of get_jiffies want
+ * to pass jiffies, and have a different assumption for the units of
+ * a "raw" number.
+ */
+
+int get_jiffies(unsigned *jiffies, const char *arg, int base, int *raw)
+{
+ double t;
+ unsigned long res;
+ char *p;
+
+ if (strchr(arg,'.') != NULL) {
+ t = strtod(arg,&p);
+ if (t < 0.0)
+ return -1;
+ }
+ else {
+ res = strtoul(arg,&p,base);
+ if (res > UINT_MAX)
+ return -1;
+ t = (double)res;
+ }
+ if (p == arg)
+ return -1;
+
+ if (__iproute2_hz_internal == 0)
+ __iproute2_hz_internal = __get_hz();
+
+ *raw = 1;
+
+ if (*p) {
+ *raw = 0;
+ if (strcasecmp(p, "s") == 0 || strcasecmp(p, "sec")==0 ||
+ strcasecmp(p, "secs")==0)
+ t *= __iproute2_hz_internal;
+ else if (strcasecmp(p, "ms") == 0 || strcasecmp(p, "msec")==0 ||
+ strcasecmp(p, "msecs") == 0)
+ t *= __iproute2_hz_internal/1000.0;
+ else if (strcasecmp(p, "us") == 0 || strcasecmp(p, "usec")==0 ||
+ strcasecmp(p, "usecs") == 0)
+ t *= __iproute2_hz_internal/1000000.0;
+ else if (strcasecmp(p, "ns") == 0 || strcasecmp(p, "nsec")==0 ||
+ strcasecmp(p, "nsecs") == 0)
+ t *= __iproute2_hz_internal/1000000000.0;
+ else if (strcasecmp(p, "j") == 0 || strcasecmp(p, "hz") == 0 ||
+ strcasecmp(p,"jiffies") == 0)
+ t *= 1.0; /* allow suffix, do nothing */
+ else
+ return -1;
+ }
+
+ /* emulate ceil() without having to bring-in -lm and always be >= 1 */
+
+ *jiffies = t;
+ if (*jiffies < t)
+ *jiffies += 1;
+
+ return 0;
+
+}
+
int get_u64(__u64 *val, const char *arg, int base)
{
unsigned long long res;
diff --git a/man/man8/ip.8 b/man/man8/ip.8
index a9132da..7181054 100644
--- a/man/man8/ip.8
+++ b/man/man8/ip.8
@@ -169,9 +169,9 @@
.B advmss
.IR NUMBER " ] [ "
.B rtt
-.IR NUMBER " ] [ "
+.IR TIME " ] [ "
.B rttvar
-.IR NUMBER " ] [ "
+.IR TIME " ] [ "
.B window
.IR NUMBER " ] [ "
.B cwnd
@@ -179,7 +179,9 @@
.B ssthresh
.IR REALM " ] [ "
.B realms
-.IR REALM " ]"
+.IR REALM " ] [ "
+.B rto_min
+.IR TIME " ]"
.ti -8
.IR TYPE " := [ "
@@ -301,6 +303,9 @@
.IR KEY " := { " DOTTED_QUAD " | " NUMBER " }"
.ti -8
+.IR TIME " := " NUMBER "[s|ms|us|ns|j]"
+
+.ti -8
.BR "ip maddr" " [ " add " | " del " ]"
.IB MULTIADDR " dev " STRING
@@ -1062,12 +1067,29 @@
peers are allowed to send to us.
.TP
-.BI rtt " NUMBER"
-the initial RTT ('Round Trip Time') estimate.
+.BI rtt " TIME"
+the initial RTT ('Round Trip Time') estimate. If no suffix is
+specified the units are raw values passed directly to the
+routing code to maintain compatability with previous releases.
+Otherwise if a suffix of s, sec or secs is used to specify
+seconds; ms, msec or msecs to specify milliseconds; us, usec
+or usecs to specify microseconds; ns, nsec or nsecs to specify
+nanoseconds; j, hz or jiffies to specify jiffies, the value is
+converted to what the routing code expects.
+
.TP
-.BI rttvar " NUMBER " "(2.3.15+ only)"
-the initial RTT variance estimate.
+.BI rttvar " TIME " "(2.3.15+ only)"
+the initial RTT variance estimate. Values are specified as with
+.BI rtt
+above.
+
+.TP
+.BI rto_min " TIME " "(2.6.23+ only)"
+the minimum TCP Retransmission TimeOut to use when communicating with this
+destination. Values are specified as with
+.BI rtt
+above.
.TP
.BI ssthresh " NUMBER " "(2.3.15+ only)"