Blame - doc/ip-cref.tex - platform/external/iproute2

blob: 242cc266b7acd34adcf390e6272944e333d6160e [file] [log] [blame]

osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	1	\documentstyle[12pt,twoside]{article}
				2	\def\TITLE{IP Command Reference}
				3	\input preamble
				4	\begin{center}
				5	\Large\bf IP Command Reference.
				6	\end{center}
				7
				8
				9	\begin{center}
				10	{ \large Alexey~N.~Kuznetsov } \\
				11	\em Institute for Nuclear Research, Moscow \\
				12	\verb\|kuznet@ms2.inr.ac.ru\| \\
				13	\rm April 14, 1999
				14	\end{center}
				15
				16	\vspace{5mm}
				17
				18	\tableofcontents
				19
				20	\newpage
				21
				22	\section{About this document}
				23
				24	This document presents a comprehensive description of the \verb\|ip\| utility
				25	from the \verb\|iproute2\| package. It is not a tutorial or user's guide.
				26	It is a {\em dictionary\/}, not explaining terms,
				27	but translating them into other terms, which may also be unknown to the reader.
				28	However, the document is self-contained and the reader, provided they have a
				29	basic networking background, will find enough information
				30	and examples to understand and configure Linux-2.2 IP and IPv6
				31	networking.
				32
				33	This document is split into sections explaining \verb\|ip\| commands
				34	and options, decrypting \verb\|ip\| output and containing a few examples.
				35	More voluminous examples and some topics, which require more elaborate
				36	discussion, are in the appendix.
				37
				38	The paragraphs beginning with NB contain side notes, warnings about
				39	bugs and design drawbacks. They may be skipped at the first reading.
				40
				41	\section{{\tt ip} --- command syntax}
				42
				43	The generic form of an \verb\|ip\| command is:
				44	\begin{verbatim}
				45	ip [ OPTIONS ] OBJECT [ COMMAND [ ARGUMENTS ]]
				46	\end{verbatim}
				47	where \verb\|OPTIONS\| is a set of optional modifiers affecting the
				48	general behaviour of the \verb\|ip\| utility or changing its output. All options
				49	begin with the character \verb\|'-'\| and may be used in either long or abbreviated
				50	forms. Currently, the following options are available:
				51
				52	\begin{itemize}
				53	\item \verb\|-V\|, \verb\|-Version\|
				54
				55	--- print the version of the \verb\|ip\| utility and exit.
				56
				57
				58	\item \verb\|-s\|, \verb\|-stats\|, \verb\|-statistics\|
				59
				60	--- output more information. If the option
				61	appears twice or more, the amount of information increases.
				62	As a rule, the information is statistics or some time values.
				63
vadimk	5cb6aa0	2014-11-04 18:54:30 +0200	[diff] [blame]	64	\item \verb\|-d\|, \verb\|-details\|
				65
				66	--- output more detailed information.
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	67
				68	\item \verb\|-f\|, \verb\|-family\| followed by a protocol family
				69	identifier: \verb\|inet\|, \verb\|inet6\| or \verb\|link\|.
				70
				71	--- enforce the protocol family to use. If the option is not present,
				72	the protocol family is guessed from other arguments. If the rest of the command
				73	line does not give enough information to guess the family, \verb\|ip\| falls back to the default
				74	one, usually \verb\|inet\| or \verb\|any\|. \verb\|link\| is a special family
				75	identifier meaning that no networking protocol is involved.
				76
				77	\item \verb\|-4\|
				78
				79	--- shortcut for \verb\|-family inet\|.
				80
				81	\item \verb\|-6\|
				82
				83	--- shortcut for \verb\|-family inet6\|.
				84
				85	\item \verb\|-0\|
				86
				87	--- shortcut for \verb\|-family link\|.
				88
				89
				90	\item \verb\|-o\|, \verb\|-oneline\|
				91
				92	--- output each record on a single line, replacing line feeds
				93	with the \verb\|'\'\| character. This is convenient when you want to
				94	count records with \verb\|wc\| or to \verb\|grep\| the output. The trivial
				95	script \verb\|rtpr\| converts the output back into readable form.
				96
				97	\item \verb\|-r\|, \verb\|-resolve\|
				98
				99	--- use the system's name resolver to print DNS names instead of
				100	host addresses.
				101
				102	\begin{NB}
				103	Do not use this option when reporting bugs or asking for advice.
				104	\end{NB}
				105	\begin{NB}
				106	\verb\|ip\| never uses DNS to resolve names to addresses.
				107	\end{NB}
				108
vadimk	5cb6aa0	2014-11-04 18:54:30 +0200	[diff] [blame]	109	\item \verb\|-b\|, \verb\|-batch FILE\|
				110
				111	--- read commands from provided file or standart input and invoke them.
				112	First failure will cause termination of \verb\|ip\|.
				113	In batch \verb\|FILE\| everything which begins with \verb\|#\| symbol is
				114	ignored and can be used for comments.
				115	\paragraph{Example:}
				116	\begin{verbatim}
				117	kuznet@kaiser $ cat /tmp/ip_batch.ip
				118	# This is a comment
				119	tuntap add mode tap tap1 # This is an another comment
				120	link set up dev tap1
				121	addr add 10.0.0.1/24 dev tap1
				122	kuznet@kaiser $ sudo ip -b /tmp/ip_batch.ip
				123	\end{verbatim}
				124	or from standart input:
				125	\begin{verbatim}
				126	kuznet@kaiser $ cat /tmp/ip_batch.ip \| sudo ip -b -
				127	\end{verbatim}
				128
				129	\item \verb\|-force\|
				130
				131	--- don't terminate ip on errors in batch mode.
				132	If there were any errors during execution of the commands,
				133	the application return code will be non zero.
				134
				135	\item \verb\|-l\|, \verb\|-loops COUNT\|
				136
				137	--- specify maximum number of loops the 'ip addr flush' logic will attempt
				138	before giving up. The default is 10. Zero (0) means loop until all
				139	addresses are removed.
				140
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	141	\end{itemize}
				142
				143	\verb\|OBJECT\| is the object to manage or to get information about.
				144	The object types currently understood by \verb\|ip\| are:
				145
				146	\begin{itemize}
				147	\item \verb\|link\| --- network device
				148	\item \verb\|address\| --- protocol (IP or IPv6) address on a device
				149	\item \verb\|neighbour\| --- ARP or NDISC cache entry
				150	\item \verb\|route\| --- routing table entry
				151	\item \verb\|rule\| --- rule in routing policy database
				152	\item \verb\|maddress\| --- multicast address
				153	\item \verb\|mroute\| --- multicast routing cache entry
				154	\item \verb\|tunnel\| --- tunnel over IP
				155	\end{itemize}
				156
				157	Again, the names of all objects may be written in full or
				158	abbreviated form, f.e.\ \verb\|address\| is abbreviated as \verb\|addr\|
				159	or just \verb\|a\|.
				160
				161	\verb\|COMMAND\| specifies the action to perform on the object.
				162	The set of possible actions depends on the object type.
				163	As a rule, it is possible to \verb\|add\|, \verb\|delete\| and
				164	\verb\|show\| (or \verb\|list\|) objects, but some objects
				165	do not allow all of these operations or have some additional commands.
				166	The \verb\|help\| command is available for all objects. It prints
				167	out a list of available commands and argument syntax conventions.
				168
				169	If no command is given, some default command is assumed.
				170	Usually it is \verb\|list\| or, if the objects of this class
				171	cannot be listed, \verb\|help\|.
				172
				173	\verb\|ARGUMENTS\| is a list of arguments to the command.
				174	The arguments depend on the command and object. There are two types of arguments:
				175	{\em flags\/}, consisting of a single keyword, and {\em parameters\/},
				176	consisting of a keyword followed by a value. For convenience,
				177	each command has some {\em default parameter\/}
				178	which may be omitted. F.e.\ parameter \verb\|dev\| is the default
				179	for the {\tt ip link} command, so {\tt ip link ls eth0} is equivalent
				180	to {\tt ip link ls dev eth0}.
				181	In the command descriptions below such parameters
				182	are distinguished with the marker: ``(default)''.
				183
				184	Almost all keywords may be abbreviated with several first (or even single)
				185	letters. The shortcuts are convenient when \verb\|ip\| is used interactively,
				186	but they are not recommended in scripts or when reporting bugs
				187	or asking for advice. ``Officially'' allowed abbreviations are listed
				188	in the document body.
				189
				190
				191
				192	\section{{\tt ip} --- error messages}
				193
				194	\verb\|ip\| may fail for one of the following reasons:
				195
				196	\begin{itemize}
				197	\item
				198	A syntax error on the command line: an unknown keyword, incorrectly formatted
				199	IP address {\em et al\/}. In this case \verb\|ip\| prints an error message
				200	and exits. As a rule, the error message will contain information
				201	about the reason for the failure. Sometimes it also prints a help page.
				202
				203	\item
				204	The arguments did not pass verification for self-consistency.
				205
				206	\item
				207	\verb\|ip\| failed to compile a kernel request from the arguments
				208	because the user didn't give enough information.
				209
				210	\item
				211	The kernel returned an error to some syscall. In this case \verb\|ip\|
				212	prints the error message, as it is output with \verb\|perror(3)\|,
				213	prefixed with a comment and a syscall identifier.
				214
				215	\item
				216	The kernel returned an error to some RTNETLINK request.
				217	In this case \verb\|ip\| prints the error message, as it is output
				218	with \verb\|perror(3)\| prefixed with ``RTNETLINK answers:''.
				219
				220	\end{itemize}
				221
				222	All the operations are atomic, i.e.\
				223	if the \verb\|ip\| utility fails, it does not change anything
				224	in the system. One harmful exception is \verb\|ip link\| command
				225	(Sec.\ref{IP-LINK}, p.\pageref{IP-LINK}),
				226	which may change only some of the device parameters given
				227	on command line.
				228
				229	It is difficult to list all the error messages (especially
				230	syntax errors). However, as a rule, their meaning is clear
				231	from the context of the command.
				232
				233	The most common mistakes are:
				234
				235	\begin{enumerate}
				236	\item Netlink is not configured in the kernel. The message is:
				237	\begin{verbatim}
				238	Cannot open netlink socket: Invalid value
				239	\end{verbatim}
				240
				241	\item RTNETLINK is not configured in the kernel. In this case
				242	one of the following messages may be printed, depending on the command:
				243	\begin{verbatim}
				244	Cannot talk to rtnetlink: Connection refused
				245	Cannot send dump request: Connection refused
				246	\end{verbatim}
				247
				248	\item The \verb\|CONFIG_IP_MULTIPLE_TABLES\| option was not selected
				249	when configuring the kernel. In this case any attempt to use the
				250	\verb\|ip\| \verb\|rule\| command will fail, f.e.
				251	\begin{verbatim}
				252	kuznet@kaiser $ ip rule list
				253	RTNETLINK error: Invalid argument
				254	dump terminated
				255	\end{verbatim}
				256
				257	\end{enumerate}
				258
				259
				260	\section{{\tt ip link} --- network device configuration}
				261	\label{IP-LINK}
				262
				263	\paragraph{Object:} A \verb\|link\| is a network device and the corresponding
				264	commands display and change the state of devices.
				265
				266	\paragraph{Commands:} \verb\|set\| and \verb\|show\| (or \verb\|list\|).
				267
				268	\subsection{{\tt ip link set} --- change device attributes}
				269
				270	\paragraph{Abbreviations:} \verb\|set\|, \verb\|s\|.
				271
				272	\paragraph{Arguments:}
				273
				274	\begin{itemize}
				275	\item \verb\|dev NAME\| (default)
				276
				277	--- \verb\|NAME\| specifies the network device on which to operate.
				278
				279	\item \verb\|up\| and \verb\|down\|
				280
				281	--- change the state of the device to \verb\|UP\| or \verb\|DOWN\|.
				282
				283	\item \verb\|arp on\| or \verb\|arp off\|
				284
				285	--- change the \verb\|NOARP\| flag on the device.
				286
				287	\begin{NB}
				288	This operation is {\em not allowed\/} if the device is in state \verb\|UP\|.
				289	Though neither the \verb\|ip\| utility nor the kernel check for this condition.
				290	You can get unpredictable results changing this flag while the
				291	device is running.
				292	\end{NB}
				293
				294	\item \verb\|multicast on\| or \verb\|multicast off\|
				295
				296	--- change the \verb\|MULTICAST\| flag on the device.
				297
				298	\item \verb\|dynamic on\| or \verb\|dynamic off\|
				299
				300	--- change the \verb\|DYNAMIC\| flag on the device.
				301
				302	\item \verb\|name NAME\|
				303
				304	--- change the name of the device. This operation is not
				305	recommended if the device is running or has some addresses
				306	already configured.
				307
				308	\item \verb\|txqueuelen NUMBER\| or \verb\|txqlen NUMBER\|
				309
				310	--- change the transmit queue length of the device.
				311
				312	\item \verb\|mtu NUMBER\|
				313
				314	--- change the MTU of the device.
				315
				316	\item \verb\|address LLADDRESS\|
				317
				318	--- change the station address of the interface.
				319
				320	\item \verb\|broadcast LLADDRESS\|, \verb\|brd LLADDRESS\| or \verb\|peer LLADDRESS\|
				321
				322	--- change the link layer broadcast address or the peer address when
				323	the interface is \verb\|POINTOPOINT\|.
				324
				325	\vskip 1mm
				326	\begin{NB}
				327	For most devices (f.e.\ for Ethernet) changing the link layer
				328	broadcast address will break networking.
				329	Do not use it, if you do not understand what this operation really does.
				330	\end{NB}
				331
Benjamin Thery	e2613dc	2008-06-20 11:07:35 +0200	[diff] [blame]	332	\item \verb\|netns PID\|
				333
				334	--- move the device to the network namespace associated with the process PID.
				335
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	336	\end{itemize}
				337
				338	\vskip 1mm
				339	\begin{NB}
Tomas Janousek	e17b733	2007-11-20 15:38:21 +0100	[diff] [blame]	340	The \verb\|PROMISC\| and \verb\|ALLMULTI\| flags are considered
				341	obsolete and should not be changed administratively, though
				342	the {\tt ip} utility will allow that.
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	343	\end{NB}
				344
				345	\paragraph{Warning:} If multiple parameter changes are requested,
				346	\verb\|ip\| aborts immediately after any of the changes have failed.
				347	This is the only case when \verb\|ip\| can move the system to
				348	an unpredictable state. The solution is to avoid changing
				349	several parameters with one {\tt ip link set} call.
				350
				351	\paragraph{Examples:}
				352	\begin{itemize}
				353	\item \verb\|ip link set dummy address 00:00:00:00:00:01\|
				354
				355	--- change the station address of the interface \verb\|dummy\|.
				356
				357	\item \verb\|ip link set dummy up\|
				358
				359	--- start the interface \verb\|dummy\|.
				360
				361	\end{itemize}
				362
				363
				364	\subsection{{\tt ip link show} --- display device attributes}
				365	\label{IP-LINK-SHOW}
				366
				367	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|lst\|, \verb\|sh\|, \verb\|ls\|,
				368	\verb\|l\|.
				369
				370	\paragraph{Arguments:}
				371	\begin{itemize}
				372	\item \verb\|dev NAME\| (default)
				373
				374	--- \verb\|NAME\| specifies the network device to show.
				375	If this argument is omitted all devices are listed.
				376
				377	\item \verb\|up\|
				378
				379	--- only display running interfaces.
				380
				381	\end{itemize}
				382
				383
				384	\paragraph{Output format:}
				385
				386	\begin{verbatim}
				387	kuznet@alisa:~ $ ip link ls eth0
				388	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				389	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				390	kuznet@alisa:~ $ ip link ls sit0
				391	5: sit0@NONE: <NOARP,UP> mtu 1480 qdisc noqueue
				392	link/sit 0.0.0.0 brd 0.0.0.0
				393	kuznet@alisa:~ $ ip link ls dummy
				394	2: dummy: <BROADCAST,NOARP> mtu 1500 qdisc noop
				395	link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
				396	kuznet@alisa:~ $
				397	\end{verbatim}
				398
				399
				400	The number before each colon is an {\em interface index\/} or {\em ifindex\/}.
				401	This number uniquely identifies the interface. This is followed by the {\em interface name\/}
				402	(\verb\|eth0\|, \verb\|sit0\| etc.). The interface name is also
				403	unique at every given moment. However, the interface may disappear from the
				404	list (f.e.\ when the corresponding driver module is unloaded) and another
				405	one with the same name may be created later. Besides that,
				406	the administrator may change the name of any device with
				407	\verb\|ip\| \verb\|link\| \verb\|set\| \verb\|name\|
				408	to make it more intelligible.
				409
				410	The interface name may have another name or \verb\|NONE\| appended
				411	after the \verb\|@\| sign. This means that this device is bound to some other
				412	device,
				413	i.e.\ packets send through it are encapsulated and sent via the ``master''
				414	device. If the name is \verb\|NONE\|, the master is unknown.
				415
				416	Then we see the interface {\em mtu\/} (``maximal transfer unit''). This determines
				417	the maximal size of data which can be sent as a single packet over this interface.
				418
				419	{\em qdisc\/} (``queuing discipline'') shows the queuing algorithm used
				420	on the interface. Particularly, \verb\|noqueue\| means that this interface
				421	does not queue anything and \verb\|noop\| means that the interface is in blackhole
				422	mode i.e.\ all packets sent to it are immediately discarded.
				423	{\em qlen\/} is the default transmit queue length of the device measured
				424	in packets.
				425
				426	The interface flags are summarized in the angle brackets.
				427
				428	\begin{itemize}
				429	\item \verb\|UP\| --- the device is turned on. It is ready to accept
				430	packets for transmission and it may inject into the kernel packets received
				431	from other nodes on the network.
				432
				433	\item \verb\|LOOPBACK\| --- the interface does not communicate with other
				434	hosts. All packets sent through it will be returned
				435	and nothing but bounced packets can be received.
				436
				437	\item \verb\|BROADCAST\| --- the device has the facility to send packets
				438	to all hosts sharing the same link. A typical example is an Ethernet link.
				439
				440	\item \verb\|POINTOPOINT\| --- the link has only two ends with one node
				441	attached to each end. All packets sent to this link will reach the peer
				442	and all packets received by us came from this single peer.
				443
				444	If neither \verb\|LOOPBACK\| nor \verb\|BROADCAST\| nor \verb\|POINTOPOINT\|
				445	are set, the interface is assumed to be NMBA (Non-Broadcast Multi-Access).
				446	This is the most generic type of device and the most complicated one, because
				447	the host attached to a NBMA link has no means to send to anyone
				448	without additionally configured information.
				449
				450	\item \verb\|MULTICAST\| --- is an advisory flag indicating that the interface
				451	is aware of multicasting i.e.\ sending packets to some subset of neighbouring
				452	nodes. Broadcasting is a particular case of multicasting, where the multicast
				453	group consists of all nodes on the link. It is important to emphasize
				454	that software {\em must not\/} interpret the absence of this flag as the inability
				455	to use multicasting on this interface. Any \verb\|POINTOPOINT\| and
				456	\verb\|BROADCAST\| link is multicasting by definition, because we have
				457	direct access to all the neighbours and, hence, to any part of them.
				458	Certainly, the use of high bandwidth multicast transfers is not recommended
				459	on broadcast-only links because of high expense, but it is not strictly
				460	prohibited.
				461
				462	\item \verb\|PROMISC\| --- the device listens to and feeds to the kernel all
				463	traffic on the link even if it is not destined for us, not broadcasted
				464	and not destined for a multicast group of which we are member. Usually
				465	this mode exists only on broadcast links and is used by bridges and for network
				466	monitoring.
				467
				468	\item \verb\|ALLMULTI\| --- the device receives all multicast packets
				469	wandering on the link. This mode is used by multicast routers.
				470
				471	\item \verb\|NOARP\| --- this flag is different from the other ones. It has
				472	no invariant value and its interpretation depends on the network protocols
				473	involved. As a rule, it indicates that the device needs no address
				474	resolution and that the software or hardware knows how to deliver packets
				475	without any help from the protocol stacks.
				476
				477	\item \verb\|DYNAMIC\| --- is an advisory flag indicating that the interface is
				478	dynamically created and destroyed.
				479
				480	\item \verb\|SLAVE\| --- this interface is bonded to some other interfaces
				481	to share link capacities.
				482
				483	\end{itemize}
				484
				485	\vskip 1mm
				486	\begin{NB}
				487	There are other flags but they are either obsolete (\verb\|NOTRAILERS\|)
				488	or not implemented (\verb\|DEBUG\|) or specific to some devices
				489	(\verb\|MASTER\|, \verb\|AUTOMEDIA\| and \verb\|PORTSEL\|). We do not discuss
				490	them here.
				491	\end{NB}
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	492
				493
				494	The second line contains information on the link layer addresses
				495	associated with the device. The first word (\verb\|ether\|, \verb\|sit\|)
				496	defines the interface hardware type. This type determines the format and semantics
				497	of the addresses and is logically part of the address.
				498	The default format of the station address and the broadcast address
				499	(or the peer address for pointopoint links) is a
				500	sequence of hexadecimal bytes separated by colons, but some link
				501	types may have their natural address format, f.e.\ addresses
				502	of tunnels over IP are printed as dotted-quad IP addresses.
				503
				504	\vskip 1mm
				505	\begin{NB}
				506	NBMA links have no well-defined broadcast or peer address,
				507	however this field may contain useful information, f.e.\
				508	about the address of broadcast relay or about the address of the ARP server.
				509	\end{NB}
				510	\begin{NB}
				511	Multicast addresses are not shown by this command, see
				512	\verb\|ip maddr ls\| in~Sec.\ref{IP-MADDR} (p.\pageref{IP-MADDR} of this
				513	document).
				514	\end{NB}
				515
				516
				517	\paragraph{Statistics:} With the \verb\|-statistics\| option, \verb\|ip\| also
				518	prints interface statistics:
				519
				520	\begin{verbatim}
				521	kuznet@alisa:~ $ ip -s link ls eth0
				522	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				523	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				524	RX: bytes packets errors dropped overrun mcast
				525	2449949362 2786187 0 0 0 0
				526	TX: bytes packets errors dropped carrier collsns
				527	178558497 1783945 332 0 332 35172
				528	kuznet@alisa:~ $
				529	\end{verbatim}
				530	\verb\|RX:\| and \verb\|TX:\| lines summarize receiver and transmitter
				531	statistics. They contain:
				532	\begin{itemize}
				533	\item \verb\|bytes\| --- the total number of bytes received or transmitted
				534	on the interface. This number wraps when the maximal length of the data type
				535	natural for the architecture is exceeded, so continuous monitoring requires
				536	a user level daemon snapping it periodically.
				537	\item \verb\|packets\| --- the total number of packets received or transmitted
				538	on the interface.
				539	\item \verb\|errors\| --- the total number of receiver or transmitter errors.
				540	\item \verb\|dropped\| --- the total number of packets dropped due to lack
				541	of resources.
				542	\item \verb\|overrun\| --- the total number of receiver overruns resulting
				543	in dropped packets. As a rule, if the interface is overrun, it means
				544	serious problems in the kernel or that your machine is too slow
				545	for this interface.
				546	\item \verb\|mcast\| --- the total number of received multicast packets. This option
				547	is only supported by a few devices.
				548	\item \verb\|carrier\| --- total number of link media failures f.e.\ because
				549	of lost carrier.
				550	\item \verb\|collsns\| --- the total number of collision events
				551	on Ethernet-like media. This number may have a different sense on other
				552	link types.
				553	\item \verb\|compressed\| --- the total number of compressed packets. This is
				554	available only for links using VJ header compression.
				555	\end{itemize}
				556
				557
				558	If the \verb\|-s\| option is entered twice or more,
				559	\verb\|ip\| prints more detailed statistics on receiver
				560	and transmitter errors.
				561
				562	\begin{verbatim}
				563	kuznet@alisa:~ $ ip -s -s link ls eth0
				564	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				565	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				566	RX: bytes packets errors dropped overrun mcast
				567	2449949362 2786187 0 0 0 0
				568	RX errors: length crc frame fifo missed
				569	0 0 0 0 0
				570	TX: bytes packets errors dropped carrier collsns
				571	178558497 1783945 332 0 332 35172
				572	TX errors: aborted fifo window heartbeat
				573	0 0 0 332
				574	kuznet@alisa:~ $
				575	\end{verbatim}
				576	These error names are pure Ethernetisms. Other devices
				577	may have non zero values in these fields but they may be
				578	interpreted differently.
				579
				580
				581	\section{{\tt ip address} --- protocol address management}
				582
				583	\paragraph{Abbreviations:} \verb\|address\|, \verb\|addr\|, \verb\|a\|.
				584
				585	\paragraph{Object:} The \verb\|address\| is a protocol (IP or IPv6) address attached
				586	to a network device. Each device must have at least one address
				587	to use the corresponding protocol. It is possible to have several
				588	different addresses attached to one device. These addresses are not
				589	discriminated, so that the term {\em alias\/} is not quite appropriate
				590	for them and we do not use it in this document.
				591
				592	The \verb\|ip addr\| command displays addresses and their properties,
				593	adds new addresses and deletes old ones.
				594
				595	\paragraph{Commands:} \verb\|add\|, \verb\|delete\|, \verb\|flush\| and \verb\|show\|
				596	(or \verb\|list\|).
				597
				598
				599	\subsection{{\tt ip address add} --- add a new protocol address}
				600	\label{IP-ADDR-ADD}
				601
				602	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|.
				603
				604	\paragraph{Arguments:}
				605
				606	\begin{itemize}
				607	\item \verb\|dev NAME\|
				608
				609	\noindent--- the name of the device to add the address to.
				610
				611	\item \verb\|local ADDRESS\| (default)
				612
				613	--- the address of the interface. The format of the address depends
				614	on the protocol. It is a dotted quad for IP and a sequence of hexadecimal halfwords
				615	separated by colons for IPv6. The \verb\|ADDRESS\| may be followed by
				616	a slash and a decimal number which encodes the network prefix length.
				617
				618
				619	\item \verb\|peer ADDRESS\|
				620
				621	--- the address of the remote endpoint for pointopoint interfaces.
				622	Again, the \verb\|ADDRESS\| may be followed by a slash and a decimal number,
				623	encoding the network prefix length. If a peer address is specified,
				624	the local address {\em cannot\/} have a prefix length. The network prefix is associated
				625	with the peer rather than with the local address.
				626
				627
				628	\item \verb\|broadcast ADDRESS\|
				629
				630	--- the broadcast address on the interface.
				631
				632	It is possible to use the special symbols \verb\|'+'\| and \verb\|'-'\|
				633	instead of the broadcast address. In this case, the broadcast address
				634	is derived by setting/resetting the host bits of the interface prefix.
				635
				636	\vskip 1mm
				637	\begin{NB}
				638	Unlike \verb\|ifconfig\|, the \verb\|ip\| utility {\em does not\/} set any broadcast
				639	address unless explicitly requested.
				640	\end{NB}
				641
				642
				643	\item \verb\|label NAME\|
				644
				645	--- Each address may be tagged with a label string.
				646	In order to preserve compatibility with Linux-2.0 net aliases,
				647	this string must coincide with the name of the device or must be prefixed
				648	with the device name followed by colon.
				649
				650
				651	\item \verb\|scope SCOPE_VALUE\|
				652
				653	--- the scope of the area where this address is valid.
				654	The available scopes are listed in file \verb\|/etc/iproute2/rt_scopes\|.
				655	Predefined scope values are:
				656
				657	\begin{itemize}
				658	\item \verb\|global\| --- the address is globally valid.
				659	\item \verb\|site\| --- (IPv6 only) the address is site local,
				660	i.e.\ it is valid inside this site.
				661	\item \verb\|link\| --- the address is link local, i.e.\
				662	it is valid only on this device.
				663	\item \verb\|host\| --- the address is valid only inside this host.
				664	\end{itemize}
				665
				666	Appendix~\ref{ADDR-SEL} (p.\pageref{ADDR-SEL} of this document)
				667	contains more details on address scopes.
				668
				669	\end{itemize}
				670
				671	\paragraph{Examples:}
				672	\begin{itemize}
				673	\item \verb\|ip addr add 127.0.0.1/8 dev lo brd + scope host\|
				674
				675	--- add the usual loopback address to the loopback device.
				676
				677	\item \verb\|ip addr add 10.0.0.1/24 brd + dev eth0 label eth0:Alias\|
				678
				679	--- add the address 10.0.0.1 with prefix length 24 (i.e.\ netmask
				680	\verb\|255.255.255.0\|), standard broadcast and label \verb\|eth0:Alias\|
				681	to the interface \verb\|eth0\|.
				682	\end{itemize}
				683
				684
				685	\subsection{{\tt ip address delete} --- delete a protocol address}
				686
				687	\paragraph{Abbreviations:} \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				688
				689	\paragraph{Arguments:} coincide with the arguments of \verb\|ip addr add\|.
				690	The device name is a required argument. The rest are optional.
				691	If no arguments are given, the first address is deleted.
				692
				693	\paragraph{Examples:}
				694	\begin{itemize}
				695	\item \verb\|ip addr del 127.0.0.1/8 dev lo\|
				696
				697	--- deletes the loopback address from the loopback device.
				698	It would be best not to repeat this experiment.
				699
				700	\item Disable IP on the interface \verb\|eth0\|:
				701	\begin{verbatim}
				702	while ip -f inet addr del dev eth0; do
				703	: nothing
				704	done
				705	\end{verbatim}
				706	Another method to disable IP on an interface using {\tt ip addr flush}
				707	may be found in sec.\ref{IP-ADDR-FLUSH}, p.\pageref{IP-ADDR-FLUSH}.
				708
				709	\end{itemize}
				710
				711
				712	\subsection{{\tt ip address show} --- display protocol addresses}
				713
				714	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|lst\|, \verb\|sh\|, \verb\|ls\|,
				715	\verb\|l\|.
				716
				717	\paragraph{Arguments:}
				718
				719	\begin{itemize}
				720	\item \verb\|dev NAME\| (default)
				721
				722	--- the name of the device.
				723
				724	\item \verb\|scope SCOPE_VAL\|
				725
				726	--- only list addresses with this scope.
				727
				728	\item \verb\|to PREFIX\|
				729
				730	--- only list addresses matching this prefix.
				731
				732	\item \verb\|label PATTERN\|
				733
				734	--- only list addresses with labels matching the \verb\|PATTERN\|.
				735	\verb\|PATTERN\| is a usual shell style pattern.
				736
				737
				738	\item \verb\|dynamic\| and \verb\|permanent\|
				739
				740	--- (IPv6 only) only list addresses installed due to stateless
				741	address configuration or only list permanent (not dynamic) addresses.
				742
				743	\item \verb\|tentative\|
				744
				745	--- (IPv6 only) only list addresses which did not pass duplicate
				746	address detection.
				747
				748	\item \verb\|deprecated\|
				749
				750	--- (IPv6 only) only list deprecated addresses.
				751
				752
				753	\item \verb\|primary\| and \verb\|secondary\|
				754
				755	--- only list primary (or secondary) addresses.
				756
				757	\end{itemize}
				758
				759
				760	\paragraph{Output format:}
				761
				762	\begin{verbatim}
				763	kuznet@alisa:~ $ ip addr ls eth0
				764	3: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
				765	link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
				766	inet 193.233.7.90/24 brd 193.233.7.255 scope global eth0
				767	inet6 3ffe:2400:0:1:2a0:ccff:fe66:1878/64 scope global dynamic
				768	valid_lft forever preferred_lft 604746sec
				769	inet6 fe80::2a0:ccff:fe66:1878/10 scope link
				770	kuznet@alisa:~ $
				771	\end{verbatim}
				772
				773	The first two lines coincide with the output of \verb\|ip link ls\|.
				774	It is natural to interpret link layer addresses
				775	as addresses of the protocol family \verb\|AF_PACKET\|.
				776
				777	Then the list of IP and IPv6 addresses follows, accompanied by
				778	additional address attributes: scope value (see Sec.\ref{IP-ADDR-ADD},
				779	p.\pageref{IP-ADDR-ADD} above), flags and the address label.
				780
				781	Address flags are set by the kernel and cannot be changed
				782	administratively. Currently, the following flags are defined:
				783
				784	\begin{enumerate}
				785	\item \verb\|secondary\|
				786
				787	--- the address is not used when selecting the default source address
				788	of outgoing packets (Cf.\ Appendix~\ref{ADDR-SEL}, p.\pageref{ADDR-SEL}.).
				789	An IP address becomes secondary if another address with the same
				790	prefix bits already exists. The first address is primary.
				791	It is the leader of the group of all secondary addresses. When the leader
				792	is deleted, all secondaries are purged too.
Andreas Henriksson	3a9e482	2008-01-12 17:17:02 +0100	[diff] [blame]	793	There is a tweak in \verb\|/proc/sys/net/ipv4/conf/<dev>/promote_secondaries\|
				794	which activate secondaries promotion when a primary is deleted.
				795	To permanently enable this feature on all devices add
				796	\verb\|net.ipv4.conf.all.promote_secondaries=1\| to \verb\|/etc/sysctl.conf\|.
				797	This tweak is available in linux 2.6.15 and later.
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	798
				799
				800	\item \verb\|dynamic\|
				801
				802	--- the address was created due to stateless autoconfiguration~\cite{RFC-ADDRCONF}.
				803	In this case the output also contains information on times, when
				804	the address is still valid. After \verb\|preferred_lft\| expires the address is
				805	moved to the deprecated state. After \verb\|valid_lft\| expires the address
				806	is finally invalidated.
				807
				808	\item \verb\|deprecated\|
				809
				810	--- the address is deprecated, i.e.\ it is still valid, but cannot
				811	be used by newly created connections.
				812
				813	\item \verb\|tentative\|
				814
				815	--- the address is not used because duplicate address detection~\cite{RFC-ADDRCONF}
				816	is still not complete or failed.
				817
				818	\end{enumerate}
				819
				820
				821	\subsection{{\tt ip address flush} --- flush protocol addresses}
				822	\label{IP-ADDR-FLUSH}
				823
				824	\paragraph{Abbreviations:} \verb\|flush\|, \verb\|f\|.
				825
				826	\paragraph{Description:}This command flushes the protocol addresses
				827	selected by some criteria.
				828
				829	\paragraph{Arguments:} This command has the same arguments as \verb\|show\|.
				830	The difference is that it does not run when no arguments are given.
				831
				832	\paragraph{Warning:} This command (and other \verb\|flush\| commands
				833	described below) is pretty dangerous. If you make a mistake, it will
				834	not forgive it, but will cruelly purge all the addresses.
				835
				836	\paragraph{Statistics:} With the \verb\|-statistics\| option, the command
				837	becomes verbose. It prints out the number of deleted addresses and the number
				838	of rounds made to flush the address list. If this option is given
				839	twice, \verb\|ip addr flush\| also dumps all the deleted addresses
				840	in the format described in the previous subsection.
				841
				842	\paragraph{Example:} Delete all the addresses from the private network
				843	10.0.0.0/8:
				844	\begin{verbatim}
				845	netadm@amber:~ # ip -s -s a f to 10/8
				846	2: dummy inet 10.7.7.7/16 brd 10.7.255.255 scope global dummy
				847	3: eth0 inet 10.10.7.7/16 brd 10.10.255.255 scope global eth0
				848	4: eth1 inet 10.8.7.7/16 brd 10.8.255.255 scope global eth1
				849
				850	* Round 1, deleting 3 addresses *
				851	* Flush is complete after 1 round *
				852	netadm@amber:~ #
				853	\end{verbatim}
				854	Another instructive example is disabling IP on all the Ethernets:
				855	\begin{verbatim}
				856	netadm@amber:~ # ip -4 addr flush label "eth*"
				857	\end{verbatim}
				858	And the last example shows how to flush all the IPv6 addresses
				859	acquired by the host from stateless address autoconfiguration
				860	after you enabled forwarding or disabled autoconfiguration.
				861	\begin{verbatim}
				862	netadm@amber:~ # ip -6 addr flush dynamic
				863	\end{verbatim}
				864
				865
				866
				867	\section{{\tt ip neighbour} --- neighbour/arp tables management}
				868
				869	\paragraph{Abbreviations:} \verb\|neighbour\|, \verb\|neighbor\|, \verb\|neigh\|,
				870	\verb\|n\|.
				871
				872	\paragraph{Object:} \verb\|neighbour\| objects establish bindings between protocol
				873	addresses and link layer addresses for hosts sharing the same link.
				874	Neighbour entries are organized into tables. The IPv4 neighbour table
				875	is known by another name --- the ARP table.
				876
				877	The corresponding commands display neighbour bindings
				878	and their properties, add new neighbour entries and delete old ones.
				879
				880	\paragraph{Commands:} \verb\|add\|, \verb\|change\|, \verb\|replace\|,
				881	\verb\|delete\|, \verb\|flush\| and \verb\|show\| (or \verb\|list\|).
				882
				883	\paragraph{See also:} Appendix~\ref{PROXY-NEIGH}, p.\pageref{PROXY-NEIGH}
				884	describes how to manage proxy ARP/NDISC with the \verb\|ip\| utility.
				885
				886
				887	\subsection{{\tt ip neighbour add} --- add a new neighbour entry\\
				888	{\tt ip neighbour change} --- change an existing entry\\
				889	{\tt ip neighbour replace} --- add a new entry or change an existing one}
				890
				891	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|change\|, \verb\|chg\|;
				892	\verb\|replace\|, \verb\|repl\|.
				893
				894	\paragraph{Description:} These commands create new neighbour records
				895	or update existing ones.
				896
				897	\paragraph{Arguments:}
				898
				899	\begin{itemize}
				900	\item \verb\|to ADDRESS\| (default)
				901
				902	--- the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
				903
				904	\item \verb\|dev NAME\|
				905
				906	--- the interface to which this neighbour is attached.
				907
				908
				909	\item \verb\|lladdr LLADDRESS\|
				910
				911	--- the link layer address of the neighbour. \verb\|LLADDRESS\| can also be
				912	\verb\|null\|.
				913
				914	\item \verb\|nud NUD_STATE\|
				915
				916	--- the state of the neighbour entry. \verb\|nud\| is an abbreviation for ``Neighbour
				917	Unreachability Detection''. The state can take one of the following values:
				918
				919	\begin{enumerate}
				920	\item \verb\|permanent\| --- the neighbour entry is valid forever and can be only be removed
				921	administratively.
				922	\item \verb\|noarp\| --- the neighbour entry is valid. No attempts to validate
				923	this entry will be made but it can be removed when its lifetime expires.
				924	\item \verb\|reachable\| --- the neighbour entry is valid until the reachability
				925	timeout expires.
				926	\item \verb\|stale\| --- the neighbour entry is valid but suspicious.
				927	This option to \verb\|ip neigh\| does not change the neighbour state if
				928	it was valid and the address is not changed by this command.
				929	\end{enumerate}
				930
				931	\end{itemize}
				932
				933	\paragraph{Examples:}
				934	\begin{itemize}
				935	\item \verb\|ip neigh add 10.0.0.3 lladdr 0:0:0:0:0:1 dev eth0 nud perm\|
				936
				937	--- add a permanent ARP entry for the neighbour 10.0.0.3 on the device \verb\|eth0\|.
				938
				939	\item \verb\|ip neigh chg 10.0.0.3 dev eth0 nud reachable\|
				940
				941	--- change its state to \verb\|reachable\|.
				942	\end{itemize}
				943
				944
				945	\subsection{{\tt ip neighbour delete} --- delete a neighbour entry}
				946
				947	\paragraph{Abbreviations:} \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				948
				949	\paragraph{Description:} This command invalidates a neighbour entry.
				950
				951	\paragraph{Arguments:} The arguments are the same as with \verb\|ip neigh add\|,
				952	except that \verb\|lladdr\| and \verb\|nud\| are ignored.
				953
				954
				955	\paragraph{Example:}
				956	\begin{itemize}
				957	\item \verb\|ip neigh del 10.0.0.3 dev eth0\|
				958
				959	--- invalidate an ARP entry for the neighbour 10.0.0.3 on the device \verb\|eth0\|.
				960
				961	\end{itemize}
				962
				963	\begin{NB}
				964	The deleted neighbour entry will not disappear from the tables
				965	immediately. If it is in use it cannot be deleted until the last
				966	client releases it. Otherwise it will be destroyed during
				967	the next garbage collection.
				968	\end{NB}
				969
				970
				971	\paragraph{Warning:} Attempts to delete or manually change
				972	a \verb\|noarp\| entry created by the kernel may result in unpredictable behaviour.
				973	Particularly, the kernel may try to resolve this address even
				974	on a \verb\|NOARP\| interface or if the address is multicast or broadcast.
				975
				976
				977	\subsection{{\tt ip neighbour show} --- list neighbour entries}
				978
				979	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|.
				980
				981	\paragraph{Description:}This commands displays neighbour tables.
				982
				983	\paragraph{Arguments:}
				984
				985	\begin{itemize}
				986
				987	\item \verb\|to ADDRESS\| (default)
				988
				989	--- the prefix selecting the neighbours to list.
				990
				991	\item \verb\|dev NAME\|
				992
				993	--- only list the neighbours attached to this device.
				994
				995	\item \verb\|unused\|
				996
				997	--- only list neighbours which are not currently in use.
				998
				999	\item \verb\|nud NUD_STATE\|
				1000
				1001	--- only list neighbour entries in this state. \verb\|NUD_STATE\| takes
				1002	values listed below or the special value \verb\|all\| which means all states.
				1003	This option may occur more than once. If this option is absent, \verb\|ip\|
				1004	lists all entries except for \verb\|none\| and \verb\|noarp\|.
				1005
				1006	\end{itemize}
				1007
				1008
				1009	\paragraph{Output format:}
				1010
				1011	\begin{verbatim}
				1012	kuznet@alisa:~ $ ip neigh ls
				1013	:: dev lo lladdr 00:00:00:00:00:00 nud noarp
				1014	fe80::200:cff:fe76:3f85 dev eth0 lladdr 00:00:0c:76:3f:85 router \
				1015	nud stale
				1016	0.0.0.0 dev lo lladdr 00:00:00:00:00:00 nud noarp
				1017	193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 nud reachable
				1018	193.233.7.85 dev eth0 lladdr 00:e0:1e:63:39:00 nud stale
				1019	kuznet@alisa:~ $
				1020	\end{verbatim}
				1021
				1022	The first word of each line is the protocol address of the neighbour.
				1023	Then the device name follows. The rest of the line describes the contents of
				1024	the neighbour entry identified by the pair (device, address).
				1025
				1026	\verb\|lladdr\| is the link layer address of the neighbour.
				1027
				1028	\verb\|nud\| is the state of the ``neighbour unreachability detection'' machine
				1029	for this entry. The detailed description of the neighbour
				1030	state machine can be found in~\cite{RFC-NDISC}. Here is the full list
				1031	of the states with short descriptions:
				1032
				1033	\begin{enumerate}
				1034	\item\verb\|none\| --- the state of the neighbour is void.
				1035	\item\verb\|incomplete\| --- the neighbour is in the process of resolution.
				1036	\item\verb\|reachable\| --- the neighbour is valid and apparently reachable.
				1037	\item\verb\|stale\| --- the neighbour is valid, but is probably already
				1038	unreachable, so the kernel will try to check it at the first transmission.
				1039	\item\verb\|delay\| --- a packet has been sent to the stale neighbour and the kernel is waiting
				1040	for confirmation.
				1041	\item\verb\|probe\| --- the delay timer expired but no confirmation was received.
				1042	The kernel has started to probe the neighbour with ARP/NDISC messages.
				1043	\item\verb\|failed\| --- resolution has failed.
				1044	\item\verb\|noarp\| --- the neighbour is valid. No attempts to check the entry
				1045	will be made.
				1046	\item\verb\|permanent\| --- it is a \verb\|noarp\| entry, but only the administrator
				1047	may remove the entry from the neighbour table.
				1048	\end{enumerate}
				1049
				1050	The link layer address is valid in all states except for \verb\|none\|,
				1051	\verb\|failed\| and \verb\|incomplete\|.
				1052
				1053	IPv6 neighbours can be marked with the additional flag \verb\|router\|
				1054	which means that the neighbour introduced itself as an IPv6 router~\cite{RFC-NDISC}.
				1055
				1056	\paragraph{Statistics:} The \verb\|-statistics\| option displays some usage
				1057	statistics, f.e.\
				1058
				1059	\begin{verbatim}
				1060	kuznet@alisa:~ $ ip -s n ls 193.233.7.254
				1061	193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
				1062	nud reachable
				1063	kuznet@alisa:~ $
				1064	\end{verbatim}
				1065
				1066	Here \verb\|ref\| is the number of users of this entry
				1067	and \verb\|used\| is a triplet of time intervals in seconds
				1068	separated by slashes. In this case they show that:
				1069
				1070	\begin{enumerate}
				1071	\item the entry was used 12 seconds ago.
				1072	\item the entry was confirmed 13 seconds ago.
				1073	\item the entry was updated 20 seconds ago.
				1074	\end{enumerate}
				1075
				1076	\subsection{{\tt ip neighbour flush} --- flush neighbour entries}
				1077
				1078	\paragraph{Abbreviations:} \verb\|flush\|, \verb\|f\|.
				1079
				1080	\paragraph{Description:}This command flushes neighbour tables, selecting
				1081	entries to flush by some criteria.
				1082
				1083	\paragraph{Arguments:} This command has the same arguments as \verb\|show\|.
				1084	The differences are that it does not run when no arguments are given,
				1085	and that the default neighbour states to be flushed do not include
				1086	\verb\|permanent\| and \verb\|noarp\|.
				1087
				1088
				1089	\paragraph{Statistics:} With the \verb\|-statistics\| option, the command
				1090	becomes verbose. It prints out the number of deleted neighbours and the number
				1091	of rounds made to flush the neighbour table. If the option is given
				1092	twice, \verb\|ip neigh flush\| also dumps all the deleted neighbours
				1093	in the format described in the previous subsection.
				1094
				1095	\paragraph{Example:}
				1096	\begin{verbatim}
				1097	netadm@alisa:~ # ip -s -s n f 193.233.7.254
				1098	193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
				1099	nud reachable
				1100
				1101	* Round 1, deleting 1 entries *
				1102	* Flush is complete after 1 round *
				1103	netadm@alisa:~ #
				1104	\end{verbatim}
				1105
				1106
				1107	\section{{\tt ip route} --- routing table management}
				1108	\label{IP-ROUTE}
				1109
				1110	\paragraph{Abbreviations:} \verb\|route\|, \verb\|ro\|, \verb\|r\|.
				1111
				1112	\paragraph{Object:} \verb\|route\| entries in the kernel routing tables keep
				1113	information about paths to other networked nodes.
				1114
				1115	Each route entry has a {\em key\/} consisting of a {\em prefix\/}
				1116	(i.e.\ a pair containing a network address and the length of its mask) and,
				1117	optionally, the TOS value. An IP packet matches the route if the highest
				1118	bits of its destination address are equal to the route prefix at least
				1119	up to the prefix length and if the TOS of the route is zero or equal to
				1120	the TOS of the packet.
				1121
				1122	If several routes match the packet, the following pruning rules
				1123	are used to select the best one (see~\cite{RFC1812}):
				1124	\begin{enumerate}
				1125	\item The longest matching prefix is selected. All shorter ones
				1126	are dropped.
				1127
				1128	\item If the TOS of some route with the longest prefix is equal to the TOS
				1129	of the packet, the routes with different TOS are dropped.
				1130
				1131	If no exact TOS match was found and routes with TOS=0 exist,
				1132	the rest of routes are pruned.
				1133
				1134	Otherwise, the route lookup fails.
				1135
				1136	\item If several routes remain after the previous steps, then
				1137	the routes with the best preference values are selected.
				1138
				1139	\item If we still have several routes, then the {\em first\/} of them
				1140	is selected.
				1141
				1142	\begin{NB}
				1143	Note the ambiguity of the last step. Unfortunately, Linux
				1144	historically allows such a bizarre situation. The sense of the
				1145	word ``first'' depends on the order of route additions and it is practically
				1146	impossible to maintain a bundle of such routes in this order.
				1147	\end{NB}
				1148
				1149	For simplicity we will limit ourselves to the case where such a situation
				1150	is impossible and routes are uniquely identified by the triplet
				1151	\{prefix, tos, preference\}. Actually, it is impossible to create
				1152	non-unique routes with \verb\|ip\| commands described in this section.
				1153
				1154	One useful exception to this rule is the default route on non-forwarding
				1155	hosts. It is ``officially'' allowed to have several fallback routes
				1156	when several routers are present on directly connected networks.
				1157	In this case, Linux-2.2 makes ``dead gateway detection''~\cite{RFC1122}
				1158	controlled by neighbour unreachability detection and by advice
				1159	from transport protocols to select a working router, so the order
				1160	of the routes is not essential. However, in this case,
				1161	fiddling with default routes manually is not recommended. Use the Router Discovery
				1162	protocol (see Appendix~\ref{EXAMPLE-SETUP}, p.\pageref{EXAMPLE-SETUP})
				1163	instead. Actually, Linux-2.2 IPv6 does not give user level applications
				1164	any access to default routes.
				1165	\end{enumerate}
				1166
				1167	Certainly, the steps above are not performed exactly
				1168	in this sequence. Instead, the routing table in the kernel is kept
				1169	in some data structure to achieve the final result
				1170	with minimal cost. However, not depending on a particular
				1171	routing algorithm implemented in the kernel, we can summarize
				1172	the statements above as: a route is identified by the triplet
				1173	\{prefix, tos, preference\}. This {\em key\/} lets us locate
				1174	the route in the routing table.
				1175
				1176	\paragraph{Route attributes:} Each route key refers to a routing
				1177	information record containing
				1178	the data required to deliver IP packets (f.e.\ output device and
				1179	next hop router) and some optional attributes (f.e. the path MTU or
				1180	the preferred source address when communicating with this destination).
				1181	These attributes are described in the following subsection.
				1182
				1183	\paragraph{Route types:} \label{IP-ROUTE-TYPES}
				1184	It is important that the set
				1185	of required and optional attributes depend on the route {\em type\/}.
				1186	The most important route type
				1187	is \verb\|unicast\|. It describes real paths to other hosts.
				1188	As a rule, common routing tables contain only such routes. However,
				1189	there are other types of routes with different semantics. The
				1190	full list of types understood by Linux-2.2 is:
				1191	\begin{itemize}
				1192	\item \verb\|unicast\| --- the route entry describes real paths to the
				1193	destinations covered by the route prefix.
				1194	\item \verb\|unreachable\| --- these destinations are unreachable. Packets
				1195	are discarded and the ICMP message {\em host unreachable\/} is generated.
				1196	The local senders get an \verb\|EHOSTUNREACH\| error.
				1197	\item \verb\|blackhole\| --- these destinations are unreachable. Packets
				1198	are discarded silently. The local senders get an \verb\|EINVAL\| error.
				1199	\item \verb\|prohibit\| --- these destinations are unreachable. Packets
				1200	are discarded and the ICMP message {\em communication administratively
				1201	prohibited\/} is generated. The local senders get an \verb\|EACCES\| error.
				1202	\item \verb\|local\| --- the destinations are assigned to this
				1203	host. The packets are looped back and delivered locally.
				1204	\item \verb\|broadcast\| --- the destinations are broadcast addresses.
				1205	The packets are sent as link broadcasts.
				1206	\item \verb\|throw\| --- a special control route used together with policy
				1207	rules (see sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). If such a route is selected, lookup
				1208	in this table is terminated pretending that no route was found.
				1209	Without policy routing it is equivalent to the absence of the route in the routing
				1210	table. The packets are dropped and the ICMP message {\em net unreachable\/}
				1211	is generated. The local senders get an \verb\|ENETUNREACH\| error.
				1212	\item \verb\|nat\| --- a special NAT route. Destinations covered by the prefix
				1213	are considered to be dummy (or external) addresses which require translation
				1214	to real (or internal) ones before forwarding. The addresses to translate to
				1215	are selected with the attribute \verb\|via\|. More about NAT is
				1216	in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
				1217	\item \verb\|anycast\| --- ({\em not implemented\/}) the destinations are
				1218	{\em anycast\/} addresses assigned to this host. They are mainly equivalent
				1219	to \verb\|local\| with one difference: such addresses are invalid when used
				1220	as the source address of any packet.
				1221	\item \verb\|multicast\| --- a special type used for multicast routing.
				1222	It is not present in normal routing tables.
				1223	\end{itemize}
				1224
				1225	\paragraph{Route tables:} Linux-2.2 can pack routes into several routing
				1226	tables identified by a number in the range from 1 to 255 or by
				1227	name from the file \verb\|/etc/iproute2/rt_tables\|. By default all normal
				1228	routes are inserted into the \verb\|main\| table (ID 254) and the kernel only uses
				1229	this table when calculating routes.
				1230
				1231	Actually, one other table always exists, which is invisible but
				1232	even more important. It is the \verb\|local\| table (ID 255). This table
				1233	consists of routes for local and broadcast addresses. The kernel maintains
				1234	this table automatically and the administrator usually need not modify it
				1235	or even look at it.
				1236
				1237	The multiple routing tables enter the game when {\em policy routing\/}
				1238	is used. See sec.\ref{IP-RULE}, p.\pageref{IP-RULE}.
				1239	In this case, the table identifier effectively becomes
				1240	one more parameter, which should be added to the triplet
				1241	\{prefix, tos, preference\} to uniquely identify the route.
				1242
				1243
				1244	\subsection{{\tt ip route add} --- add a new route\\
				1245	{\tt ip route change} --- change a route\\
				1246	{\tt ip route replace} --- change a route or add a new one}
				1247	\label{IP-ROUTE-ADD}
				1248
				1249	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|change\|, \verb\|chg\|;
				1250	\verb\|replace\|, \verb\|repl\|.
				1251
				1252
				1253	\paragraph{Arguments:}
				1254	\begin{itemize}
				1255	\item \verb\|to PREFIX\| or \verb\|to TYPE PREFIX\| (default)
				1256
				1257	--- the destination prefix of the route. If \verb\|TYPE\| is omitted,
				1258	\verb\|ip\| assumes type \verb\|unicast\|. Other values of \verb\|TYPE\|
				1259	are listed above. \verb\|PREFIX\| is an IP or IPv6 address optionally followed
				1260	by a slash and the prefix length. If the length of the prefix is missing,
				1261	\verb\|ip\| assumes a full-length host route. There is also a special
				1262	\verb\|PREFIX\| --- \verb\|default\| --- which is equivalent to IP \verb\|0/0\| or
				1263	to IPv6 \verb\|::/0\|.
				1264
				1265	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				1266
				1267	--- the Type Of Service (TOS) key. This key has no associated mask and
				1268	the longest match is understood as: First, compare the TOS
				1269	of the route and of the packet. If they are not equal, then the packet
				1270	may still match a route with a zero TOS. \verb\|TOS\| is either an 8 bit hexadecimal
				1271	number or an identifier from {\tt /etc/iproute2/rt\_dsfield}.
				1272
				1273
				1274	\item \verb\|metric NUMBER\| or \verb\|preference NUMBER\|
				1275
				1276	--- the preference value of the route. \verb\|NUMBER\| is an arbitrary 32bit number.
				1277
				1278	\item \verb\|table TABLEID\|
				1279
				1280	--- the table to add this route to.
				1281	\verb\|TABLEID\| may be a number or a string from the file
				1282	\verb\|/etc/iproute2/rt_tables\|. If this parameter is omitted,
				1283	\verb\|ip\| assumes the \verb\|main\| table, with the exception of
				1284	\verb\|local\|, \verb\|broadcast\| and \verb\|nat\| routes, which are
				1285	put into the \verb\|local\| table by default.
				1286
				1287	\item \verb\|dev NAME\|
				1288
				1289	--- the output device name.
				1290
				1291	\item \verb\|via ADDRESS\|
				1292
				1293	--- the address of the nexthop router. Actually, the sense of this field depends
				1294	on the route type. For normal \verb\|unicast\| routes it is either the true nexthop
				1295	router or, if it is a direct route installed in BSD compatibility mode,
				1296	it can be a local address of the interface.
				1297	For NAT routes it is the first address of the block of translated IP destinations.
				1298
				1299	\item \verb\|src ADDRESS\|
				1300
				1301	--- the source address to prefer when sending to the destinations
				1302	covered by the route prefix.
				1303
				1304	\item \verb\|realm REALMID\|
				1305
				1306	--- the realm to which this route is assigned.
				1307	\verb\|REALMID\| may be a number or a string from the file
				1308	\verb\|/etc/iproute2/rt_realms\|. Sec.\ref{RT-REALMS} (p.\pageref{RT-REALMS})
				1309	contains more information on realms.
				1310
				1311	\item \verb\|mtu MTU\| or \verb\|mtu lock MTU\|
				1312
				1313	--- the MTU along the path to the destination. If the modifier \verb\|lock\| is
				1314	not used, the MTU may be updated by the kernel due to Path MTU Discovery.
				1315	If the modifier \verb\|lock\| is used, no path MTU discovery will be tried,
				1316	all packets will be sent without the DF bit in IPv4 case
				1317	or fragmented to MTU for IPv6.
				1318
				1319	\item \verb\|window NUMBER\|
				1320
				1321	--- the maximal window for TCP to advertise to these destinations,
				1322	measured in bytes. It limits maximal data bursts that our TCP
				1323	peers are allowed to send to us.
				1324
				1325	\item \verb\|rtt NUMBER\|
				1326
				1327	--- the initial RTT (``Round Trip Time'') estimate.
				1328
				1329
				1330	\item \verb\|rttvar NUMBER\|
				1331
				1332	--- \threeonly the initial RTT variance estimate.
				1333
				1334
				1335	\item \verb\|ssthresh NUMBER\|
				1336
				1337	--- \threeonly an estimate for the initial slow start threshold.
				1338
				1339
				1340	\item \verb\|cwnd NUMBER\|
				1341
				1342	--- \threeonly the clamp for congestion window. It is ignored if the \verb\|lock\|
				1343	flag is not used.
				1344
				1345
				1346	\item \verb\|advmss NUMBER\|
				1347
				1348	--- \threeonly the MSS (``Maximal Segment Size'') to advertise to these
				1349	destinations when establishing TCP connections. If it is not given,
				1350	Linux uses a default value calculated from the first hop device MTU.
				1351
				1352	\begin{NB}
				1353	If the path to these destination is asymmetric, this guess may be wrong.
				1354	\end{NB}
				1355
				1356	\item \verb\|reordering NUMBER\|
				1357
				1358	--- \threeonly Maximal reordering on the path to this destination.
				1359	If it is not given, Linux uses the value selected with \verb\|sysctl\|
				1360	variable \verb\|net/ipv4/tcp_reordering\|.
				1361
Gilad Ben-Yossef	71e5815	2009-10-06 15:40:34 +0200	[diff] [blame]	1362	\item \verb\|hoplimit NUMBER\|
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	1363
Gilad Ben-Yossef	71e5815	2009-10-06 15:40:34 +0200	[diff] [blame]	1364	--- [2.5.74+ only] Maximum number of hops on the path to this destination.
				1365	The default is the value selected with the \verb\|sysctl\| variable
				1366	\verb\|net/ipv4/ip_default_ttl\|.
				1367
				1368	\item \verb\|initcwnd NUMBER\|
				1369	--- [2.5.70+ only] Initial congestion window size for connections to
				1370	this destination. Actual window size is this value multiplied by the
				1371	MSS (``Maximal Segment Size'') for same connection. The default is
				1372	zero, meaning to use the values specified in~\cite{RFC2414}.
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	1373
laurent chavey	f5fd800	2009-12-15 13:05:15 +0000	[diff] [blame]	1374	+\item \verb\|initrwnd NUMBER\|
				1375
				1376	+--- [2.6.33+ only] Initial receive window size for connections to
				1377	+ this destination. The actual window size is this value multiplied
				1378	+ by the MSS (''Maximal Segment Size'') of the connection. The default
				1379	+ value is zero, meaning to use Slow Start value.
				1380
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	1381	\item \verb\|nexthop NEXTHOP\|
				1382
				1383	--- the nexthop of a multipath route. \verb\|NEXTHOP\| is a complex value
				1384	with its own syntax similar to the top level argument lists:
				1385	\begin{itemize}
				1386	\item \verb\|via ADDRESS\| is the nexthop router.
				1387	\item \verb\|dev NAME\| is the output device.
				1388	\item \verb\|weight NUMBER\| is a weight for this element of a multipath
				1389	route reflecting its relative bandwidth or quality.
				1390	\end{itemize}
				1391
				1392	\item \verb\|scope SCOPE_VAL\|
				1393
				1394	--- the scope of the destinations covered by the route prefix.
				1395	\verb\|SCOPE_VAL\| may be a number or a string from the file
				1396	\verb\|/etc/iproute2/rt_scopes\|.
				1397	If this parameter is omitted,
				1398	\verb\|ip\| assumes scope \verb\|global\| for all gatewayed \verb\|unicast\|
				1399	routes, scope \verb\|link\| for direct \verb\|unicast\| and \verb\|broadcast\| routes
				1400	and scope \verb\|host\| for \verb\|local\| routes.
				1401
				1402	\item \verb\|protocol RTPROTO\|
				1403
				1404	--- the routing protocol identifier of this route.
				1405	\verb\|RTPROTO\| may be a number or a string from the file
				1406	\verb\|/etc/iproute2/rt_protos\|. If the routing protocol ID is
				1407	not given, \verb\|ip\| assumes protocol \verb\|boot\| (i.e.\
				1408	it assumes the route was added by someone who doesn't
				1409	understand what they are doing). Several protocol values have a fixed interpretation.
				1410	Namely:
				1411	\begin{itemize}
				1412	\item \verb\|redirect\| --- the route was installed due to an ICMP redirect.
				1413	\item \verb\|kernel\| --- the route was installed by the kernel during
				1414	autoconfiguration.
				1415	\item \verb\|boot\| --- the route was installed during the bootup sequence.
				1416	If a routing daemon starts, it will purge all of them.
				1417	\item \verb\|static\| --- the route was installed by the administrator
				1418	to override dynamic routing. Routing daemon will respect them
				1419	and, probably, even advertise them to its peers.
				1420	\item \verb\|ra\| --- the route was installed by Router Discovery protocol.
				1421	\end{itemize}
				1422	The rest of the values are not reserved and the administrator is free
				1423	to assign (or not to assign) protocol tags. At least, routing
				1424	daemons should take care of setting some unique protocol values,
				1425	f.e.\ as they are assigned in \verb\|rtnetlink.h\| or in \verb\|rt_protos\|
				1426	database.
				1427
				1428
				1429	\item \verb\|onlink\|
				1430
				1431	--- pretend that the nexthop is directly attached to this link,
				1432	even if it does not match any interface prefix. One application of this
				1433	option may be found in~\cite{IP-TUNNELS}.
				1434
Lubomir Rintel	194e9b8	2015-03-16 16:01:47 +0100	[diff] [blame]	1435	\item \verb\|pref PREF\|
				1436
				1437	--- the IPv6 route preference.
				1438	\verb\|PREF\| PREF is a string specifying the route preference as defined in
				1439	RFC4191 for Router Discovery messages. Namely:
				1440	\begin{itemize}
				1441	\item \verb\|low\| --- the route has a lowest priority.
				1442	\item \verb\|medium\| --- the route has a default priority.
				1443	\item \verb\|high\| --- the route has a highest priority.
				1444	\end{itemize}
				1445
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	1446	\end{itemize}
				1447
				1448
				1449	\begin{NB}
				1450	Actually there are more commands: \verb\|prepend\| does the same
				1451	thing as classic \verb\|route add\|, i.e.\ adds a route, even if another
				1452	route to the same destination exists. Its opposite case is \verb\|append\|,
				1453	which adds the route to the end of the list. Avoid these
				1454	features.
				1455	\end{NB}
				1456	\begin{NB}
				1457	More sad news, IPv6 only understands the \verb\|append\| command correctly.
				1458	All the others are translated into \verb\|append\| commands. Certainly,
				1459	this will change in the future.
				1460	\end{NB}
				1461
				1462	\paragraph{Examples:}
				1463	\begin{itemize}
				1464	\item add a plain route to network 10.0.0/24 via gateway 193.233.7.65
				1465	\begin{verbatim}
				1466	ip route add 10.0.0/24 via 193.233.7.65
				1467	\end{verbatim}
				1468	\item change it to a direct route via the \verb\|dummy\| device
				1469	\begin{verbatim}
				1470	ip ro chg 10.0.0/24 dev dummy
				1471	\end{verbatim}
				1472	\item add a default multipath route splitting the load between \verb\|ppp0\|
				1473	and \verb\|ppp1\|
				1474	\begin{verbatim}
				1475	ip route add default scope global nexthop dev ppp0 \
				1476	nexthop dev ppp1
				1477	\end{verbatim}
				1478	Note the scope value. It is not necessary but it informs the kernel
				1479	that this route is gatewayed rather than direct. Actually, if you
				1480	know the addresses of remote endpoints it would be better to use the
				1481	\verb\|via\| parameter.
				1482	\item announce that the address 192.203.80.144 is not a real one, but
				1483	should be translated to 193.233.7.83 before forwarding
				1484	\begin{verbatim}
				1485	ip route add nat 192.203.80.144 via 193.233.7.83
				1486	\end{verbatim}
				1487	Backward translation is setup with policy rules described
				1488	in the following section (sec.\ref{IP-RULE}, p.\pageref{IP-RULE}).
				1489	\end{itemize}
				1490
				1491	\subsection{{\tt ip route delete} --- delete a route}
				1492
				1493	\paragraph{Abbreviations:} \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				1494
				1495	\paragraph{Arguments:} \verb\|ip route del\| has the same arguments as
				1496	\verb\|ip route add\|, but their semantics are a bit different.
				1497
				1498	Key values (\verb\|to\|, \verb\|tos\|, \verb\|preference\| and \verb\|table\|)
				1499	select the route to delete. If optional attributes are present, \verb\|ip\|
				1500	verifies that they coincide with the attributes of the route to delete.
				1501	If no route with the given key and attributes was found, \verb\|ip route del\|
				1502	fails.
				1503	\begin{NB}
				1504	Linux-2.0 had the option to delete a route selected only by prefix address,
				1505	ignoring its length (i.e.\ netmask). This option no longer exists
				1506	because it was ambiguous. However, look at {\tt ip route flush}
				1507	(sec.\ref{IP-ROUTE-FLUSH}, p.\pageref{IP-ROUTE-FLUSH}) which
				1508	provides similar and even richer functionality.
				1509	\end{NB}
				1510
				1511	\paragraph{Example:}
				1512	\begin{itemize}
				1513	\item delete the multipath route created by the command in previous subsection
				1514	\begin{verbatim}
				1515	ip route del default scope global nexthop dev ppp0 \
				1516	nexthop dev ppp1
				1517	\end{verbatim}
				1518	\end{itemize}
				1519
				1520
				1521
				1522	\subsection{{\tt ip route show} --- list routes}
				1523
				1524	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				1525
				1526	\paragraph{Description:} the command displays the contents of the routing tables
				1527	or the route(s) selected by some criteria.
				1528
				1529
				1530	\paragraph{Arguments:}
				1531	\begin{itemize}
				1532	\item \verb\|to SELECTOR\| (default)
				1533
				1534	--- only select routes from the given range of destinations. \verb\|SELECTOR\|
				1535	consists of an optional modifier (\verb\|root\|, \verb\|match\| or \verb\|exact\|)
				1536	and a prefix. \verb\|root PREFIX\| selects routes with prefixes not shorter
				1537	than \verb\|PREFIX\|. F.e.\ \verb\|root 0/0\| selects the entire routing table.
				1538	\verb\|match PREFIX\| selects routes with prefixes not longer than
				1539	\verb\|PREFIX\|. F.e.\ \verb\|match 10.0/16\| selects \verb\|10.0/16\|,
				1540	\verb\|10/8\| and \verb\|0/0\|, but it does not select \verb\|10.1/16\| and
				1541	\verb\|10.0.0/24\|. And \verb\|exact PREFIX\| (or just \verb\|PREFIX\|)
				1542	selects routes with this exact prefix. If neither of these options
				1543	are present, \verb\|ip\| assumes \verb\|root 0/0\| i.e.\ it lists the entire table.
				1544
				1545
				1546	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				1547
				1548	--- only select routes with the given TOS.
				1549
				1550
				1551	\item \verb\|table TABLEID\|
				1552
				1553	--- show the routes from this table(s). The default setting is to show
				1554	\verb\|table\| \verb\|main\|. \verb\|TABLEID\| may either be the ID of a real table
				1555	or one of the special values:
				1556	\begin{itemize}
				1557	\item \verb\|all\| --- list all of the tables.
				1558	\item \verb\|cache\| --- dump the routing cache.
				1559	\end{itemize}
				1560	\begin{NB}
				1561	IPv6 has a single table. However, splitting it into \verb\|main\|, \verb\|local\|
				1562	and \verb\|cache\| is emulated by the \verb\|ip\| utility.
				1563	\end{NB}
				1564
				1565	\item \verb\|cloned\| or \verb\|cached\|
				1566
				1567	--- list cloned routes i.e.\ routes which were dynamically forked from
				1568	other routes because some route attribute (f.e.\ MTU) was updated.
				1569	Actually, it is equivalent to \verb\|table cache\|.
				1570
				1571	\item \verb\|from SELECTOR\|
				1572
				1573	--- the same syntax as for \verb\|to\|, but it binds the source address range
				1574	rather than destinations. Note that the \verb\|from\| option only works with
				1575	cloned routes.
				1576
				1577	\item \verb\|protocol RTPROTO\|
				1578
				1579	--- only list routes of this protocol.
				1580
				1581
				1582	\item \verb\|scope SCOPE_VAL\|
				1583
				1584	--- only list routes with this scope.
				1585
				1586	\item \verb\|type TYPE\|
				1587
				1588	--- only list routes of this type.
				1589
				1590	\item \verb\|dev NAME\|
				1591
				1592	--- only list routes going via this device.
				1593
				1594	\item \verb\|via PREFIX\|
				1595
				1596	--- only list routes going via the nexthop routers selected by \verb\|PREFIX\|.
				1597
				1598	\item \verb\|src PREFIX\|
				1599
				1600	--- only list routes with preferred source addresses selected
				1601	by \verb\|PREFIX\|.
				1602
				1603	\item \verb\|realm REALMID\| or \verb\|realms FROMREALM/TOREALM\|
				1604
				1605	--- only list routes with these realms.
				1606
				1607	\end{itemize}
				1608
				1609	\paragraph{Examples:} Let us count routes of protocol \verb\|gated/bgp\|
				1610	on a router:
				1611	\begin{verbatim}
				1612	kuznet@amber:~ $ ip ro ls proto gated/bgp \| wc
				1613	1413 9891 79010
				1614	kuznet@amber:~ $
				1615	\end{verbatim}
				1616	To count the size of the routing cache, we have to use the \verb\|-o\| option
				1617	because cached attributes can take more than one line of output:
				1618	\begin{verbatim}
				1619	kuznet@amber:~ $ ip -o ro ls cloned \| wc
				1620	159 2543 18707
				1621	kuznet@amber:~ $
				1622	\end{verbatim}
				1623
				1624
				1625	\paragraph{Output format:} The output of this command consists
				1626	of per route records separated by line feeds.
				1627	However, some records may consist
				1628	of more than one line: particularly, this is the case when the route
				1629	is cloned or you requested additional statistics. If the
				1630	\verb\|-o\| option was given, then line feeds separating lines inside
				1631	records are replaced with the backslash sign.
				1632
				1633	The output has the same syntax as arguments given to {\tt ip route add},
				1634	so that it can be understood easily. F.e.\
				1635	\begin{verbatim}
				1636	kuznet@amber:~ $ ip ro ls 193.233.7/24
				1637	193.233.7.0/24 dev eth0 proto gated/conn scope link \
				1638	src 193.233.7.65 realms inr.ac
				1639	kuznet@amber:~ $
				1640	\end{verbatim}
				1641
				1642	If you list cloned entries, the output contains other attributes which
				1643	are evaluated during route calculation and updated during route
				1644	lifetime. An example of the output is:
				1645	\begin{verbatim}
				1646	kuznet@amber:~ $ ip ro ls 193.233.7.82 tab cache
				1647	193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
				1648	realms inr.ac/inr.ac
				1649	cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
				1650	193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
				1651	cache mtu 1500 rtt 300
				1652	kuznet@amber:~ $
				1653	\end{verbatim}
				1654	\begin{NB}
				1655	\label{NB-strange-route}
				1656	The route looks a bit strange, doesn't it? Did you notice that
				1657	it is a path from 193.233.7.82 back to 193.233.82? Well, you will
				1658	see in the section on \verb\|ip route get\| (p.\pageref{NB-nature-of-strangeness})
				1659	how it appeared.
				1660	\end{NB}
				1661	The second line, starting with the word \verb\|cache\|, shows
				1662	additional attributes which normal routes do not possess.
				1663	Cached flags are summarized in angle brackets:
				1664	\begin{itemize}
				1665	\item \verb\|local\| --- packets are delivered locally.
				1666	It stands for loopback unicast routes, for broadcast routes
				1667	and for multicast routes, if this host is a member of the corresponding
				1668	group.
				1669
				1670	\item \verb\|reject\| --- the path is bad. Any attempt to use it results
				1671	in an error. See attribute \verb\|error\| below (p.\pageref{IP-ROUTE-GET-error}).
				1672
				1673	\item \verb\|mc\| --- the destination is multicast.
				1674
				1675	\item \verb\|brd\| --- the destination is broadcast.
				1676
				1677	\item \verb\|src-direct\| --- the source is on a directly connected
				1678	interface.
				1679
				1680	\item \verb\|redirected\| --- the route was created by an ICMP Redirect.
				1681
				1682	\item \verb\|redirect\| --- packets going via this route will
				1683	trigger an ICMP redirect.
				1684
				1685	\item \verb\|fastroute\| --- the route is eligible to be used for fastroute.
				1686
				1687	\item \verb\|equalize\| --- make packet by packet randomization
				1688	along this path.
				1689
				1690	\item \verb\|dst-nat\| --- the destination address requires translation.
				1691
				1692	\item \verb\|src-nat\| --- the source address requires translation.
				1693
				1694	\item \verb\|masq\| --- the source address requires masquerading.
				1695	This feature disappeared in linux-2.4.
				1696
				1697	\item \verb\|notify\| --- ({\em not implemented}) change/deletion
				1698	of this route will trigger RTNETLINK notification.
				1699	\end{itemize}
				1700
				1701	Then some optional attributes follow:
				1702	\begin{itemize}
				1703	\item \verb\|error\| --- on \verb\|reject\| routes it is error code
				1704	returned to local senders when they try to use this route.
				1705	These error codes are translated into ICMP error codes, sent to remote
				1706	senders, according to the rules described above in the subsection
				1707	devoted to route types (p.\pageref{IP-ROUTE-TYPES}).
				1708	\label{IP-ROUTE-GET-error}
				1709
				1710	\item \verb\|expires\| --- this entry will expire after this timeout.
				1711
				1712	\item \verb\|iif\| --- the packets for this path are expected to arrive
				1713	on this interface.
				1714	\end{itemize}
				1715
				1716	\paragraph{Statistics:} With the \verb\|-statistics\| option, more
				1717	information about this route is shown:
				1718	\begin{itemize}
				1719	\item \verb\|users\| --- the number of users of this entry.
				1720	\item \verb\|age\| --- shows when this route was last used.
				1721	\item \verb\|used\| --- the number of lookups of this route since its creation.
				1722	\end{itemize}
				1723
Dan Smith	f4ff11e	2010-12-01 11:24:58 -0800	[diff] [blame]	1724	\subsection{{\tt ip route save} -- save routing tables}
				1725	\label{IP-ROUTE-SAVE}
				1726
				1727	\paragraph{Description:} this command saves the contents of the routing
				1728	tables or the route(s) selected by some criteria to standard output.
				1729
				1730	\paragraph{Arguments:} \verb\|ip route save\| has the same arguments as
				1731	\verb\|ip route show\|.
				1732
				1733	\paragraph{Example:} This saves all the routes to the {\tt saved\_routes}
				1734	file:
				1735	\begin{verbatim}
				1736	dan@caffeine:~ # ip route save > saved_routes
				1737	\end{verbatim}
				1738
				1739	\paragraph{Output format:} The format of the data stream provided by
				1740	\verb\|ip route save\| is that of \verb\|rtnetlink\|. See
				1741	\verb\|rtnetlink(7)\| for more information.
				1742
				1743	\subsection{{\tt ip route restore} -- restore routing tables}
				1744	\label{IP-ROUTE-RESTORE}
				1745
				1746	\paragraph{Description:} this command restores the contents of the routing
				1747	tables according to a data stream as provided by \verb\|ip route save\| via
				1748	standard input. Note that any routes already in the table are left unchanged.
				1749	Any routes in the input stream that already exist in the tables are ignored.
				1750
				1751	\paragraph{Arguments:} This command takes no arguments.
				1752
				1753	\paragraph{Example:} This restores all routes that were saved to the
				1754	{\tt saved\_routes} file:
				1755
				1756	\begin{verbatim}
				1757	dan@caffeine:~ # ip route restore < saved_routes
				1758	\end{verbatim}
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	1759
				1760	\subsection{{\tt ip route flush} --- flush routing tables}
				1761	\label{IP-ROUTE-FLUSH}
				1762
				1763	\paragraph{Abbreviations:} \verb\|flush\|, \verb\|f\|.
				1764
				1765	\paragraph{Description:} this command flushes routes selected
				1766	by some criteria.
				1767
				1768	\paragraph{Arguments:} the arguments have the same syntax and semantics
				1769	as the arguments of \verb\|ip route show\|, but routing tables are not
				1770	listed but purged. The only difference is the default action: \verb\|show\|
				1771	dumps all the IP main routing table but \verb\|flush\| prints the helper page.
				1772	The reason for this difference does not require any explanation, does it?
				1773
				1774
				1775	\paragraph{Statistics:} With the \verb\|-statistics\| option, the command
				1776	becomes verbose. It prints out the number of deleted routes and the number
				1777	of rounds made to flush the routing table. If the option is given
				1778	twice, \verb\|ip route flush\| also dumps all the deleted routes
				1779	in the format described in the previous subsection.
				1780
				1781	\paragraph{Examples:} The first example flushes all the
				1782	gatewayed routes from the main table (f.e.\ after a routing daemon crash).
				1783	\begin{verbatim}
				1784	netadm@amber:~ # ip -4 ro flush scope global type unicast
				1785	\end{verbatim}
				1786	This option deserves to be put into a scriptlet \verb\|routef\|.
				1787	\begin{NB}
				1788	This option was described in the \verb\|route(8)\| man page borrowed
				1789	from BSD, but was never implemented in Linux.
				1790	\end{NB}
				1791
				1792	The second example flushes all IPv6 cloned routes:
				1793	\begin{verbatim}
				1794	netadm@amber:~ # ip -6 -s -s ro flush cache
				1795	3ffe:2400::220:afff:fef4:c5d1 via 3ffe:2400::220:afff:fef4:c5d1 \
				1796	dev eth0 metric 0
				1797	cache used 2 age 12sec mtu 1500 rtt 300
				1798	3ffe:2400::280:adff:feb7:8034 via 3ffe:2400::280:adff:feb7:8034 \
				1799	dev eth0 metric 0
				1800	cache used 2 age 15sec mtu 1500 rtt 300
				1801	3ffe:2400::280:c8ff:fe59:5bcc via 3ffe:2400::280:c8ff:fe59:5bcc \
				1802	dev eth0 metric 0
				1803	cache users 1 used 1 age 23sec mtu 1500 rtt 300
				1804	3ffe:2400:0:1:2a0:ccff:fe66:1878 via 3ffe:2400:0:1:2a0:ccff:fe66:1878 \
				1805	dev eth1 metric 0
				1806	cache used 2 age 20sec mtu 1500 rtt 300
				1807	3ffe:2400:0:1:a00:20ff:fe71:fb30 via 3ffe:2400:0:1:a00:20ff:fe71:fb30 \
				1808	dev eth1 metric 0
				1809	cache used 2 age 33sec mtu 1500 rtt 300
				1810	ff02::1 via ff02::1 dev eth1 metric 0
				1811	cache users 1 used 1 age 45sec mtu 1500 rtt 300
				1812
				1813	* Round 1, deleting 6 entries *
				1814	* Flush is complete after 1 round *
				1815	netadm@amber:~ # ip -6 -s -s ro flush cache
				1816	Nothing to flush.
				1817	netadm@amber:~ #
				1818	\end{verbatim}
				1819
				1820	The third example flushes BGP routing tables after a \verb\|gated\|
				1821	death.
				1822	\begin{verbatim}
				1823	netadm@amber:~ # ip ro ls proto gated/bgp \| wc
				1824	1408 9856 78730
				1825	netadm@amber:~ # ip -s ro f proto gated/bgp
				1826
				1827	* Round 1, deleting 1408 entries *
				1828	* Flush is complete after 1 round *
				1829	netadm@amber:~ # ip ro f proto gated/bgp
				1830	Nothing to flush.
				1831	netadm@amber:~ # ip ro ls proto gated/bgp
				1832	netadm@amber:~ #
				1833	\end{verbatim}
				1834
				1835
				1836	\subsection{{\tt ip route get} --- get a single route}
				1837	\label{IP-ROUTE-GET}
				1838
				1839	\paragraph{Abbreviations:} \verb\|get\|, \verb\|g\|.
				1840
				1841	\paragraph{Description:} this command gets a single route to a destination
				1842	and prints its contents exactly as the kernel sees it.
				1843
				1844	\paragraph{Arguments:}
				1845	\begin{itemize}
				1846	\item \verb\|to ADDRESS\| (default)
				1847
				1848	--- the destination address.
				1849
				1850	\item \verb\|from ADDRESS\|
				1851
				1852	--- the source address.
				1853
				1854	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				1855
				1856	--- the Type Of Service.
				1857
				1858	\item \verb\|iif NAME\|
				1859
				1860	--- the device from which this packet is expected to arrive.
				1861
				1862	\item \verb\|oif NAME\|
				1863
				1864	--- force the output device on which this packet will be routed.
				1865
				1866	\item \verb\|connected\|
				1867
				1868	--- if no source address (option \verb\|from\|) was given, relookup
				1869	the route with the source set to the preferred address received from the first lookup.
				1870	If policy routing is used, it may be a different route.
				1871
				1872	\end{itemize}
				1873
				1874	Note that this operation is not equivalent to \verb\|ip route show\|.
				1875	\verb\|show\| shows existing routes. \verb\|get\| resolves them and
				1876	creates new clones if necessary. Essentially, \verb\|get\|
				1877	is equivalent to sending a packet along this path.
				1878	If the \verb\|iif\| argument is not given, the kernel creates a route
				1879	to output packets towards the requested destination.
				1880	This is equivalent to pinging the destination
				1881	with a subsequent {\tt ip route ls cache}, however, no packets are
				1882	actually sent. With the \verb\|iif\| argument, the kernel pretends
				1883	that a packet arrived from this interface and searches for
				1884	a path to forward the packet.
				1885
				1886	\paragraph{Output format:} This command outputs routes in the same
				1887	format as \verb\|ip route ls\|.
				1888
				1889	\paragraph{Examples:}
				1890	\begin{itemize}
				1891	\item Find a route to output packets to 193.233.7.82:
				1892	\begin{verbatim}
				1893	kuznet@amber:~ $ ip route get 193.233.7.82
				1894	193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
				1895	cache mtu 1500 rtt 300
				1896	kuznet@amber:~ $
				1897	\end{verbatim}
				1898
				1899	\item Find a route to forward packets arriving on \verb\|eth0\|
				1900	from 193.233.7.82 and destined for 193.233.7.82:
				1901	\begin{verbatim}
				1902	kuznet@amber:~ $ ip r g 193.233.7.82 from 193.233.7.82 iif eth0
				1903	193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
				1904	realms inr.ac/inr.ac
				1905	cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
				1906	kuznet@amber:~ $
				1907	\end{verbatim}
				1908	\begin{NB}
				1909	\label{NB-nature-of-strangeness}
				1910	This is the command that created the funny route from 193.233.7.82
				1911	looped back to 193.233.7.82 (cf.\ NB on~p.\pageref{NB-strange-route}).
				1912	Note the \verb\|redirect\| flag on it.
				1913	\end{NB}
				1914
				1915	\item Find a multicast route for packets arriving on \verb\|eth0\|
				1916	from host 193.233.7.82 and destined for multicast group 224.2.127.254
				1917	(it is assumed that a multicast routing daemon is running.
				1918	In this case, it is \verb\|pimd\|)
				1919	\begin{verbatim}
				1920	kuznet@amber:~ $ ip r g 224.2.127.254 from 193.233.7.82 iif eth0
				1921	multicast 224.2.127.254 from 193.233.7.82 dev lo \
				1922	src 193.233.7.65 realms inr.ac/cosmos
				1923	cache <mc> iif eth0 Oifs: eth1 pimreg
				1924	kuznet@amber:~ $
				1925	\end{verbatim}
				1926	This route differs from the ones seen before. It contains a ``normal'' part
				1927	and a ``multicast'' part. The normal part is used to deliver (or not to
				1928	deliver) the packet to local IP listeners. In this case the router
				1929	is not a member
				1930	of this group, so that route has no \verb\|local\| flag and only
				1931	forwards packets. The output device for such entries is always loopback.
				1932	The multicast part consists of an additional \verb\|Oifs:\| list showing
				1933	the output interfaces.
				1934	\end{itemize}
				1935
				1936
				1937	It is time for a more complicated example. Let us add an invalid
				1938	gatewayed route for a destination which is really directly connected:
				1939	\begin{verbatim}
				1940	netadm@alisa:~ # ip route add 193.233.7.98 via 193.233.7.254
				1941	netadm@alisa:~ # ip route get 193.233.7.98
				1942	193.233.7.98 via 193.233.7.254 dev eth0 src 193.233.7.90
				1943	cache mtu 1500 rtt 3072
				1944	netadm@alisa:~ #
				1945	\end{verbatim}
				1946	and probe it with ping:
				1947	\begin{verbatim}
				1948	netadm@alisa:~ # ping -n 193.233.7.98
				1949	PING 193.233.7.98 (193.233.7.98) from 193.233.7.90 : 56 data bytes
				1950	From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
				1951	64 bytes from 193.233.7.98: icmp_seq=0 ttl=255 time=3.5 ms
				1952	From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
				1953	64 bytes from 193.233.7.98: icmp_seq=1 ttl=255 time=2.2 ms
				1954	64 bytes from 193.233.7.98: icmp_seq=2 ttl=255 time=0.4 ms
				1955	64 bytes from 193.233.7.98: icmp_seq=3 ttl=255 time=0.4 ms
				1956	64 bytes from 193.233.7.98: icmp_seq=4 ttl=255 time=0.4 ms
				1957	^C
				1958	--- 193.233.7.98 ping statistics ---
				1959	5 packets transmitted, 5 packets received, 0% packet loss
				1960	round-trip min/avg/max = 0.4/1.3/3.5 ms
				1961	netadm@alisa:~ #
				1962	\end{verbatim}
				1963	What happened? Router 193.233.7.254 understood that we have a much
				1964	better path to the destination and sent us an ICMP redirect message.
				1965	We may retry \verb\|ip route get\| to see what we have in the routing
				1966	tables now:
				1967	\begin{verbatim}
				1968	netadm@alisa:~ # ip route get 193.233.7.98
				1969	193.233.7.98 dev eth0 src 193.233.7.90
				1970	cache <redirected> mtu 1500 rtt 3072
				1971	netadm@alisa:~ #
				1972	\end{verbatim}
				1973
				1974
				1975
				1976	\section{{\tt ip rule} --- routing policy database management}
				1977	\label{IP-RULE}
				1978
				1979	\paragraph{Abbreviations:} \verb\|rule\|, \verb\|ru\|.
				1980
				1981	\paragraph{Object:} \verb\|rule\|s in the routing policy database control
				1982	the route selection algorithm.
				1983
				1984	Classic routing algorithms used in the Internet make routing decisions
				1985	based only on the destination address of packets (and in theory,
				1986	but not in practice, on the TOS field). The seminal review of classic
				1987	routing algorithms and their modifications can be found in~\cite{RFC1812}.
				1988
				1989	In some circumstances we want to route packets differently depending not only
				1990	on destination addresses, but also on other packet fields: source address,
				1991	IP protocol, transport protocol ports or even packet payload.
				1992	This task is called ``policy routing''.
				1993
				1994	\begin{NB}
				1995	``policy routing'' $\neq$ ``routing policy''.
				1996
				1997	\noindent ``policy routing'' $=$ ``cunning routing''.
				1998
				1999	\noindent ``routing policy'' $=$ ``routing tactics'' or ``routing plan''.
				2000	\end{NB}
				2001
				2002	To solve this task, the conventional destination based routing table, ordered
				2003	according to the longest match rule, is replaced with a ``routing policy
				2004	database'' (or RPDB), which selects routes
				2005	by executing some set of rules. The rules may have lots of keys of different
				2006	natures and therefore they have no natural ordering, but one imposed
				2007	by the administrator. Linux-2.2 RPDB is a linear list of rules
				2008	ordered by numeric priority value.
				2009	RPDB explicitly allows matching a few packet fields:
				2010
				2011	\begin{itemize}
				2012	\item packet source address.
				2013	\item packet destination address.
				2014	\item TOS.
				2015	\item incoming interface (which is packet metadata, rather than a packet field).
				2016	\end{itemize}
				2017
				2018	Matching IP protocols and transport ports is also possible,
				2019	indirectly, via \verb\|ipchains\|, by exploiting their ability
				2020	to mark some classes of packets with \verb\|fwmark\|. Therefore,
				2021	\verb\|fwmark\| is also included in the set of keys checked by rules.
				2022
				2023	Each policy routing rule consists of a {\em selector\/} and an {\em action\/}
				2024	predicate. The RPDB is scanned in the order of increasing priority. The selector
				2025	of each rule is applied to \{source address, destination address, incoming
				2026	interface, tos, fwmark\} and, if the selector matches the packet,
				2027	the action is performed. The action predicate may return with success.
				2028	In this case, it will either give a route or failure indication
				2029	and the RPDB lookup is terminated. Otherwise, the RPDB program
				2030	continues on the next rule.
				2031
				2032	What is the action, semantically? The natural action is to select the
				2033	nexthop and the output device. This is what
				2034	Cisco IOS~\cite{IOS} does. Let us call it ``match \& set''.
				2035	The Linux-2.2 approach is more flexible. The action includes
				2036	lookups in destination-based routing tables and selecting
				2037	a route from these tables according to the classic longest match algorithm.
				2038	The ``match \& set'' approach is the simplest case of the Linux one. It is realized
				2039	when a second level routing table contains a single default route.
				2040	Recall that Linux-2.2 supports multiple tables
				2041	managed with the \verb\|ip route\| command, described in the previous section.
				2042
				2043	At startup time the kernel configures the default RPDB consisting of three
				2044	rules:
				2045
				2046	\begin{enumerate}
				2047	\item Priority: 0, Selector: match anything, Action: lookup routing
				2048	table \verb\|local\| (ID 255).
				2049	The \verb\|local\| table is a special routing table containing
				2050	high priority control routes for local and broadcast addresses.
				2051
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	2052	\item Priority: 32766, Selector: match anything, Action: lookup routing
				2053	table \verb\|main\| (ID 254).
				2054	The \verb\|main\| table is the normal routing table containing all non-policy
				2055	routes. This rule may be deleted and/or overridden with other
				2056	ones by the administrator.
				2057
				2058	\item Priority: 32767, Selector: match anything, Action: lookup routing
				2059	table \verb\|default\| (ID 253).
				2060	The \verb\|default\| table is empty. It is reserved for some
				2061	post-processing if no previous default rules selected the packet.
				2062	This rule may also be deleted.
				2063
				2064	\end{enumerate}
				2065
				2066	Do not confuse routing tables with rules: rules point to routing tables,
				2067	several rules may refer to one routing table and some routing tables
				2068	may have no rules pointing to them. If the administrator deletes all the rules
				2069	referring to a table, the table is not used, but it still exists
				2070	and will disappear only after all the routes contained in it are deleted.
				2071
				2072
				2073	\paragraph{Rule attributes:} Each RPDB entry has additional
				2074	attributes. F.e.\ each rule has a pointer to some routing
				2075	table. NAT and masquerading rules have an attribute to select new IP
				2076	address to translate/masquerade. Besides that, rules have some
				2077	optional attributes, which routes have, namely \verb\|realms\|.
				2078	These values do not override those contained in the routing tables. They
				2079	are only used if the route did not select any attributes.
				2080
				2081
				2082	\paragraph{Rule types:} The RPDB may contain rules of the following
				2083	types:
				2084	\begin{itemize}
				2085	\item \verb\|unicast\| --- the rule prescribes to return the route found
				2086	in the routing table referenced by the rule.
				2087	\item \verb\|blackhole\| --- the rule prescribes to silently drop the packet.
				2088	\item \verb\|unreachable\| --- the rule prescribes to generate a ``Network
				2089	is unreachable'' error.
				2090	\item \verb\|prohibit\| --- the rule prescribes to generate
				2091	``Communication is administratively prohibited'' error.
				2092	\item \verb\|nat\| --- the rule prescribes to translate the source address
				2093	of the IP packet into some other value. More about NAT is
				2094	in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
				2095	\end{itemize}
				2096
				2097
				2098	\paragraph{Commands:} \verb\|add\|, \verb\|delete\| and \verb\|show\|
				2099	(or \verb\|list\|).
				2100
				2101	\subsection{{\tt ip rule add} --- insert a new rule\\
				2102	{\tt ip rule delete} --- delete a rule}
				2103	\label{IP-RULE-ADD}
				2104
				2105	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|delete\|, \verb\|del\|,
				2106	\verb\|d\|.
				2107
				2108	\paragraph{Arguments:}
				2109
				2110	\begin{itemize}
				2111	\item \verb\|type TYPE\| (default)
				2112
				2113	--- the type of this rule. The list of valid types was given in the previous
				2114	subsection.
				2115
				2116	\item \verb\|from PREFIX\|
				2117
				2118	--- select the source prefix to match.
				2119
				2120	\item \verb\|to PREFIX\|
				2121
				2122	--- select the destination prefix to match.
				2123
				2124	\item \verb\|iif NAME\|
				2125
				2126	--- select the incoming device to match. If the interface is loopback,
				2127	the rule only matches packets originating from this host. This means that you
				2128	may create separate routing tables for forwarded and local packets and,
				2129	hence, completely segregate them.
				2130
				2131	\item \verb\|tos TOS\| or \verb\|dsfield TOS\|
				2132
				2133	--- select the TOS value to match.
				2134
				2135	\item \verb\|fwmark MARK\|
				2136
				2137	--- select the \verb\|fwmark\| value to match.
				2138
				2139	\item \verb\|priority PREFERENCE\|
				2140
				2141	--- the priority of this rule. Each rule should have an explicitly
				2142	set {\em unique\/} priority value.
				2143	\begin{NB}
				2144	Really, for historical reasons \verb\|ip rule add\| does not require a
				2145	priority value and allows them to be non-unique.
				2146	If the user does not supplied a priority, it is selected by the kernel.
				2147	If the user creates a rule with a priority value that
				2148	already exists, the kernel does not reject the request. It adds
				2149	the new rule before all old rules of the same priority.
				2150
				2151	It is mistake in design, no more. And it will be fixed one day,
				2152	so do not rely on this feature. Use explicit priorities.
				2153	\end{NB}
				2154
				2155
				2156	\item \verb\|table TABLEID\|
				2157
				2158	--- the routing table identifier to lookup if the rule selector matches.
				2159
				2160	\item \verb\|realms FROM/TO\|
				2161
				2162	--- Realms to select if the rule matched and the routing table lookup
				2163	succeeded. Realm \verb\|TO\| is only used if the route did not select
				2164	any realm.
				2165
				2166	\item \verb\|nat ADDRESS\|
				2167
				2168	--- The base of the IP address block to translate (for source addresses).
				2169	The \verb\|ADDRESS\| may be either the start of the block of NAT addresses
				2170	(selected by NAT routes) or in linux-2.2 a local host address (or even zero).
				2171	In the last case the router does not translate the packets,
				2172	but masquerades them to this address; this feature disappered in 2.4.
				2173	More about NAT is in Appendix~\ref{ROUTE-NAT},
				2174	p.\pageref{ROUTE-NAT}.
				2175
				2176	\end{itemize}
				2177
				2178	\paragraph{Warning:} Changes to the RPDB made with these commands
				2179	do not become active immediately. It is assumed that after
				2180	a script finishes a batch of updates, it flushes the routing cache
				2181	with \verb\|ip route flush cache\|.
				2182
				2183	\paragraph{Examples:}
				2184	\begin{itemize}
				2185	\item Route packets with source addresses from 192.203.80/24
				2186	according to routing table \verb\|inr.ruhep\|:
				2187	\begin{verbatim}
				2188	ip ru add from 192.203.80.0/24 table inr.ruhep prio 220
				2189	\end{verbatim}
				2190
				2191	\item Translate packet source address 193.233.7.83 into 192.203.80.144
				2192	and route it according to table \#1 (actually, it is \verb\|inr.ruhep\|):
				2193	\begin{verbatim}
				2194	ip ru add from 193.233.7.83 nat 192.203.80.144 table 1 prio 320
				2195	\end{verbatim}
				2196
				2197	\item Delete the unused default rule:
				2198	\begin{verbatim}
				2199	ip ru del prio 32767
				2200	\end{verbatim}
				2201
				2202	\end{itemize}
				2203
				2204
				2205
				2206	\subsection{{\tt ip rule show} --- list rules}
				2207	\label{IP-RULE-SHOW}
				2208
				2209	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2210
				2211
				2212	\paragraph{Arguments:} Good news, this is one command that has no arguments.
				2213
				2214	\paragraph{Output format:}
				2215
				2216	\begin{verbatim}
				2217	kuznet@amber:~ $ ip ru ls
				2218	0: from all lookup local
				2219	200: from 192.203.80.0/24 to 193.233.7.0/24 lookup main
				2220	210: from 192.203.80.0/24 to 192.203.80.0/24 lookup main
				2221	220: from 192.203.80.0/24 lookup inr.ruhep realms inr.ruhep/radio-msu
				2222	300: from 193.233.7.83 to 193.233.7.0/24 lookup main
				2223	310: from 193.233.7.83 to 192.203.80.0/24 lookup main
				2224	320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
				2225	32766: from all lookup main
				2226	kuznet@amber:~ $
				2227	\end{verbatim}
				2228
				2229	In the first column is the rule priority value followed
				2230	by a colon. Then the selectors follow. Each key is prefixed
				2231	with the same keyword that was used to create the rule.
				2232
				2233	The keyword \verb\|lookup\| is followed by a routing table identifier,
				2234	as it is recorded in the file \verb\|/etc/iproute2/rt_tables\|.
				2235
				2236	If the rule does NAT (f.e.\ rule \#320), it is shown by the keyword
				2237	\verb\|map-to\| followed by the start of the block of addresses to map.
				2238
				2239	The sense of this example is pretty simple. The prefixes
				2240	192.203.80.0/24 and 193.233.7.0/24 form the internal network, but
				2241	they are routed differently when the packets leave it.
				2242	Besides that, the host 193.233.7.83 is translated into
				2243	another prefix to look like 192.203.80.144 when talking
				2244	to the outer world.
				2245
Kirill Tkhai	2f4e171	2015-10-20 13:41:48 +0300	[diff] [blame]	2246	\subsection{{\tt ip rule save} -- save rules tables}
				2247	\label{IP-RULE-SAVE}
				2248
				2249	\paragraph{Description:} this command saves the contents of the rules
				2250	tables or the rule(s) selected by some criteria to standard output.
				2251
				2252	\paragraph{Arguments:} \verb\|ip rule save\| has the same arguments as
				2253	\verb\|ip rule show\|.
				2254
				2255	\paragraph{Example:} This saves all the rules to the {\tt saved\_rules}
				2256	file:
				2257	\begin{verbatim}
				2258	dan@caffeine:~ # ip rule save > saved_rules
				2259	\end{verbatim}
				2260
				2261	\paragraph{Output format:} The format of the data stream provided by
				2262	\verb\|ip rule save\| is that of \verb\|rtnetlink\|. See
				2263	\verb\|rtnetlink(7)\| for more information.
				2264
				2265	\subsection{{\tt ip rule restore} -- restore rules tables}
				2266	\label{IP-RULE-RESTORE}
				2267
				2268	\paragraph{Description:} this command restores the contents of the rules
				2269	tables according to a data stream as provided by \verb\|ip rule save\| via
				2270	standard input. Note that any rules already in the table are left unchanged,
				2271	and duplicates are not ignored.
				2272
				2273	\paragraph{Arguments:} This command takes no arguments.
				2274
				2275	\paragraph{Example:} This restores all rules that were saved to the
				2276	{\tt saved\_rules} file:
				2277
				2278	\begin{verbatim}
				2279	dan@caffeine:~ # ip rule restore < saved_rules
				2280	\end{verbatim}
				2281
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	2282
				2283
				2284	\section{{\tt ip maddress} --- multicast addresses management}
				2285	\label{IP-MADDR}
				2286
				2287	\paragraph{Object:} \verb\|maddress\| objects are multicast addresses.
				2288
				2289	\paragraph{Commands:} \verb\|add\|, \verb\|delete\|, \verb\|show\| (or \verb\|list\|).
				2290
				2291	\subsection{{\tt ip maddress show} --- list multicast addresses}
				2292
				2293	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2294
				2295	\paragraph{Arguments:}
				2296
				2297	\begin{itemize}
				2298
				2299	\item \verb\|dev NAME\| (default)
				2300
				2301	--- the device name.
				2302
				2303	\end{itemize}
				2304
				2305	\paragraph{Output format:}
				2306
				2307	\begin{verbatim}
				2308	kuznet@alisa:~ $ ip maddr ls dummy
				2309	2: dummy
				2310	link 33:33:00:00:00:01
				2311	link 01:00:5e:00:00:01
				2312	inet 224.0.0.1 users 2
				2313	inet6 ff02::1
				2314	kuznet@alisa:~ $
				2315	\end{verbatim}
				2316
				2317	The first line of the output shows the interface index and its name.
				2318	Then the multicast address list follows. Each line starts with the
				2319	protocol identifier. The word \verb\|link\| denotes a link layer
				2320	multicast addresses.
				2321
				2322	If a multicast address has more than one user, the number
				2323	of users is shown after the \verb\|users\| keyword.
				2324
				2325	One additional feature not present in the example above
				2326	is the \verb\|static\| flag, which indicates that the address was joined
				2327	with \verb\|ip maddr add\|. See the following subsection.
				2328
				2329
				2330
				2331	\subsection{{\tt ip maddress add} --- add a multicast address\\
				2332	{\tt ip maddress delete} --- delete a multicast address}
				2333
				2334	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|delete\|, \verb\|del\|, \verb\|d\|.
				2335
				2336	\paragraph{Description:} these commands attach/detach
				2337	a static link layer multicast address to listen on the interface.
				2338	Note that it is impossible to join protocol multicast groups
				2339	statically. This command only manages link layer addresses.
				2340
				2341
				2342	\paragraph{Arguments:}
				2343
				2344	\begin{itemize}
				2345	\item \verb\|address LLADDRESS\| (default)
				2346
				2347	--- the link layer multicast address.
				2348
				2349	\item \verb\|dev NAME\|
				2350
				2351	--- the device to join/leave this multicast address.
				2352
				2353	\end{itemize}
				2354
				2355
				2356	\paragraph{Example:} Let us continue with the example from the previous subsection.
				2357
				2358	\begin{verbatim}
				2359	netadm@alisa:~ # ip maddr add 33:33:00:00:00:01 dev dummy
				2360	netadm@alisa:~ # ip -0 maddr ls dummy
				2361	2: dummy
				2362	link 33:33:00:00:00:01 users 2 static
				2363	link 01:00:5e:00:00:01
				2364	netadm@alisa:~ # ip maddr del 33:33:00:00:00:01 dev dummy
				2365	\end{verbatim}
				2366
				2367	\begin{NB}
				2368	Neither \verb\|ip\| nor the kernel check for multicast address validity.
				2369	Particularly, this means that you can try to load a unicast address
				2370	instead of a multicast address. Most drivers will ignore such addresses,
				2371	but several (f.e.\ Tulip) will intern it to their on-board filter.
				2372	The effects may be strange. Namely, the addresses become additional
				2373	local link addresses and, if you loaded the address of another host
				2374	to the router, wait for duplicated packets on the wire.
				2375	It is not a bug, but rather a hole in the API and intra-kernel interfaces.
				2376	This feature is really more useful for traffic monitoring, but using it
				2377	with Linux-2.2 you {\em have to\/} be sure that the host is not
				2378	a router and, especially, that it is not a transparent proxy or masquerading
				2379	agent.
				2380	\end{NB}
				2381
				2382
				2383
				2384	\section{{\tt ip mroute} --- multicast routing cache management}
				2385	\label{IP-MROUTE}
				2386
				2387	\paragraph{Abbreviations:} \verb\|mroute\|, \verb\|mr\|.
				2388
				2389	\paragraph{Object:} \verb\|mroute\| objects are multicast routing cache
				2390	entries created by a user level mrouting daemon
				2391	(f.e.\ \verb\|pimd\| or \verb\|mrouted\|).
				2392
				2393	Due to the limitations of the current interface to the multicast routing
				2394	engine, it is impossible to change \verb\|mroute\| objects administratively,
				2395	so we may only display them. This limitation will be removed
				2396	in the future.
				2397
				2398	\paragraph{Commands:} \verb\|show\| (or \verb\|list\|).
				2399
				2400
				2401	\subsection{{\tt ip mroute show} --- list mroute cache entries}
				2402
				2403	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2404
				2405	\paragraph{Arguments:}
				2406
				2407	\begin{itemize}
				2408	\item \verb\|to PREFIX\| (default)
				2409
				2410	--- the prefix selecting the destination multicast addresses to list.
				2411
				2412
				2413	\item \verb\|iif NAME\|
				2414
				2415	--- the interface on which multicast packets are received.
				2416
				2417
				2418	\item \verb\|from PREFIX\|
				2419
				2420	--- the prefix selecting the IP source addresses of the multicast route.
				2421
				2422
				2423	\end{itemize}
				2424
				2425	\paragraph{Output format:}
				2426
				2427	\begin{verbatim}
				2428	kuznet@amber:~ $ ip mroute ls
				2429	(193.232.127.6, 224.0.1.39) Iif: unresolved
				2430	(193.232.244.34, 224.0.1.40) Iif: unresolved
				2431	(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
				2432	kuznet@amber:~ $
				2433	\end{verbatim}
				2434
				2435	Each line shows one (S,G) entry in the multicast routing cache,
				2436	where S is the source address and G is the multicast group. \verb\|Iif\| is
				2437	the interface on which multicast packets are expected to arrive.
				2438	If the word \verb\|unresolved\| is there instead of the interface name,
				2439	it means that the routing daemon still hasn't resolved this entry.
				2440	The keyword \verb\|oifs\| is followed by a list of output interfaces, separated
				2441	by spaces. If a multicast routing entry is created with non-trivial
				2442	TTL scope, administrative distances are appended to the device names
				2443	in the \verb\|oifs\| list.
				2444
				2445	\paragraph{Statistics:} The \verb\|-statistics\| option also prints the
				2446	number of packets and bytes forwarded along this route and
				2447	the number of packets that arrived on the wrong interface, if this number is not zero.
				2448
				2449	\begin{verbatim}
				2450	kuznet@amber:~ $ ip -s mr ls 224.66/16
				2451	(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
				2452	9383 packets, 300256 bytes
				2453	kuznet@amber:~ $
				2454	\end{verbatim}
				2455
				2456
				2457	\section{{\tt ip tunnel} --- tunnel configuration}
				2458	\label{IP-TUNNEL}
				2459
				2460	\paragraph{Abbreviations:} \verb\|tunnel\|, \verb\|tunl\|.
				2461
				2462	\paragraph{Object:} \verb\|tunnel\| objects are tunnels, encapsulating
				2463	packets in IPv4 packets and then sending them over the IP infrastructure.
				2464
				2465	\paragraph{Commands:} \verb\|add\|, \verb\|delete\|, \verb\|change\|, \verb\|show\|
				2466	(or \verb\|list\|).
				2467
				2468	\paragraph{See also:} A more informal discussion of tunneling
				2469	over IP and the \verb\|ip tunnel\| command can be found in~\cite{IP-TUNNELS}.
				2470
				2471	\subsection{{\tt ip tunnel add} --- add a new tunnel\\
				2472	{\tt ip tunnel change} --- change an existing tunnel\\
				2473	{\tt ip tunnel delete} --- destroy a tunnel}
				2474
				2475	\paragraph{Abbreviations:} \verb\|add\|, \verb\|a\|; \verb\|change\|, \verb\|chg\|;
				2476	\verb\|delete\|, \verb\|del\|, \verb\|d\|.
				2477
				2478
				2479	\paragraph{Arguments:}
				2480
				2481	\begin{itemize}
				2482
				2483	\item \verb\|name NAME\| (default)
				2484
				2485	--- select the tunnel device name.
				2486
				2487	\item \verb\|mode MODE\|
				2488
				2489	--- set the tunnel mode. Three modes are currently available:
				2490	\verb\|ipip\|, \verb\|sit\| and \verb\|gre\|.
				2491
				2492	\item \verb\|remote ADDRESS\|
				2493
				2494	--- set the remote endpoint of the tunnel.
				2495
				2496	\item \verb\|local ADDRESS\|
				2497
				2498	--- set the fixed local address for tunneled packets.
				2499	It must be an address on another interface of this host.
				2500
				2501	\item \verb\|ttl N\|
				2502
				2503	--- set a fixed TTL \verb\|N\| on tunneled packets.
				2504	\verb\|N\| is a number in the range 1--255. 0 is a special value
				2505	meaning that packets inherit the TTL value.
				2506	The default value is: \verb\|inherit\|.
				2507
				2508	\item \verb\|tos T\| or \verb\|dsfield T\|
				2509
				2510	--- set a fixed TOS \verb\|T\| on tunneled packets.
				2511	The default value is: \verb\|inherit\|.
				2512
				2513
				2514
				2515	\item \verb\|dev NAME\|
				2516
				2517	--- bind the tunnel to the device \verb\|NAME\| so that
				2518	tunneled packets will only be routed via this device and will
				2519	not be able to escape to another device when the route to endpoint changes.
				2520
				2521	\item \verb\|nopmtudisc\|
				2522
				2523	--- disable Path MTU Discovery on this tunnel.
				2524	It is enabled by default. Note that a fixed ttl is incompatible
				2525	with this option: tunnelling with a fixed ttl always makes pmtu discovery.
				2526
				2527	\item \verb\|key K\|, \verb\|ikey K\|, \verb\|okey K\|
				2528
				2529	--- (only GRE tunnels) use keyed GRE with key \verb\|K\|. \verb\|K\| is
				2530	either a number or an IP address-like dotted quad.
				2531	The \verb\|key\| parameter sets the key to use in both directions.
				2532	The \verb\|ikey\| and \verb\|okey\| parameters set different keys for input and output.
				2533
				2534
				2535	\item \verb\|csum\|, \verb\|icsum\|, \verb\|ocsum\|
				2536
				2537	--- (only GRE tunnels) generate/require checksums for tunneled packets.
				2538	The \verb\|ocsum\| flag calculates checksums for outgoing packets.
				2539	The \verb\|icsum\| flag requires that all input packets have the correct
				2540	checksum. The \verb\|csum\| flag is equivalent to the combination
				2541	``\verb\|icsum\| \verb\|ocsum\|''.
				2542
				2543	\item \verb\|seq\|, \verb\|iseq\|, \verb\|oseq\|
				2544
				2545	--- (only GRE tunnels) serialize packets.
				2546	The \verb\|oseq\| flag enables sequencing of outgoing packets.
				2547	The \verb\|iseq\| flag requires that all input packets are serialized.
				2548	The \verb\|seq\| flag is equivalent to the combination ``\verb\|iseq\| \verb\|oseq\|''.
				2549
				2550	\begin{NB}
				2551	I think this option does not
				2552	work. At least, I did not test it, did not debug it and
				2553	do not even understand how it is supposed to work or for what
				2554	purpose Cisco planned to use it. Do not use it.
				2555	\end{NB}
				2556
				2557
				2558	\end{itemize}
				2559
				2560	\paragraph{Example:} Create a pointopoint IPv6 tunnel with maximal TTL of 32.
				2561	\begin{verbatim}
				2562	netadm@amber:~ # ip tunl add Cisco mode sit remote 192.31.7.104 \
				2563	local 192.203.80.142 ttl 32
				2564	\end{verbatim}
				2565
				2566	\subsection{{\tt ip tunnel show} --- list tunnels}
				2567
				2568	\paragraph{Abbreviations:} \verb\|show\|, \verb\|list\|, \verb\|sh\|, \verb\|ls\|, \verb\|l\|.
				2569
				2570
				2571	\paragraph{Arguments:} None.
				2572
				2573	\paragraph{Output format:}
				2574	\begin{verbatim}
				2575	kuznet@amber:~ $ ip tunl ls Cisco
				2576	Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
				2577	kuznet@amber:~ $
				2578	\end{verbatim}
				2579	The line starts with the tunnel device name followed by a colon.
				2580	Then the tunnel mode follows. The parameters of the tunnel are listed
				2581	with the same keywords that were used when creating the tunnel.
				2582
				2583	\paragraph{Statistics:}
				2584
				2585	\begin{verbatim}
				2586	kuznet@amber:~ $ ip -s tunl ls Cisco
				2587	Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
				2588	RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts
				2589	12566 1707516 0 0 0 0
				2590	TX: Packets Bytes Errors DeadLoop NoRoute NoBufs
				2591	13445 1879677 0 0 0 0
				2592	kuznet@amber:~ $
				2593	\end{verbatim}
				2594	Essentially, these numbers are the same as the numbers
				2595	printed with {\tt ip -s link show}
				2596	(sec.\ref{IP-LINK-SHOW}, p.\pageref{IP-LINK-SHOW}) but the tags are different
				2597	to reflect that they are tunnel specific.
				2598	\begin{itemize}
				2599	\item \verb\|CsumErrs\| --- the total number of packets dropped
				2600	because of checksum failures for a GRE tunnel with checksumming enabled.
				2601	\item \verb\|OutOfSeq\| --- the total number of packets dropped
				2602	because they arrived out of sequence for a GRE tunnel with
				2603	serialization enabled.
				2604	\item \verb\|Mcasts\| --- the total number of multicast packets
				2605	received on a broadcast GRE tunnel.
				2606	\item \verb\|DeadLoop\| --- the total number of packets which were not
				2607	transmitted because the tunnel is looped back to itself.
				2608	\item \verb\|NoRoute\| --- the total number of packets which were not
				2609	transmitted because there is no IP route to the remote endpoint.
				2610	\item \verb\|NoBufs\| --- the total number of packets which were not
				2611	transmitted because the kernel failed to allocate a buffer.
				2612	\end{itemize}
				2613
				2614
				2615	\section{{\tt ip monitor} and {\tt rtmon} --- state monitoring}
				2616	\label{IP-MONITOR}
				2617
				2618	The \verb\|ip\| utility can monitor the state of devices, addresses
				2619	and routes continuously. This option has a slightly different format.
				2620	Namely,
				2621	the \verb\|monitor\| command is the first in the command line and then
				2622	the object list follows:
				2623	\begin{verbatim}
Martin Schwenke	488c41d	2013-08-19 15:43:30 +1000	[diff] [blame]	2624	ip monitor [ file FILE ] [ all \| OBJECT-LIST ] [ label ]
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	2625	\end{verbatim}
Martin Schwenke	488c41d	2013-08-19 15:43:30 +1000	[diff] [blame]	2626	\verb\|OBJECT-LIST\| is the list of object types that we want to
				2627	monitor. It may contain \verb\|link\|, \verb\|address\| and \verb\|route\|.
				2628	Specifying \verb\|label\| indicates that output lines should be labelled
				2629	with the type of object being printed --- this happens by default if
				2630	\verb\|all\| is specified. If no \verb\|file\| argument is given,
				2631	\verb\|ip\| opens RTNETLINK, listens on it and dumps state changes in
				2632	the format described in previous sections.
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	2633
				2634	If a file name is given, it does not listen on RTNETLINK,
				2635	but opens the file containing RTNETLINK messages saved in binary format
				2636	and dumps them. Such a history file can be generated with the
				2637	\verb\|rtmon\| utility. This utility has a command line syntax similar to
				2638	\verb\|ip monitor\|.
				2639	Ideally, \verb\|rtmon\| should be started before
				2640	the first network configuration command is issued. F.e.\ if
				2641	you insert:
				2642	\begin{verbatim}
				2643	rtmon file /var/log/rtmon.log
				2644	\end{verbatim}
				2645	in a startup script, you will be able to view the full history
				2646	later.
				2647
				2648	Certainly, it is possible to start \verb\|rtmon\| at any time.
				2649	It prepends the history with the state snapshot dumped at the moment
				2650	of starting.
				2651
				2652
				2653	\section{Route realms and policy propagation, {\tt rtacct}}
				2654	\label{RT-REALMS}
				2655
				2656	On routers using OSPF ASE or, especially, the BGP protocol, routing
				2657	tables may be huge. If we want to classify or to account for the packets
				2658	per route, we will have to keep lots of information. Even worse, if we
				2659	want to distinguish the packets not only by their destination, but
				2660	also by their source, the task gets quadratic complexity and its solution
				2661	is physically impossible.
				2662
				2663	One approach to propagating the policy from routing protocols
				2664	to the forwarding engine has been proposed in~\cite{IOS-BGP-PP}.
				2665	Essentially, Cisco Policy Propagation via BGP is based on the fact
				2666	that dedicated routers all have the RIB (Routing Information Base)
				2667	close to the forwarding engine, so policy routing rules can
				2668	check all the route attributes, including ASPATH information
				2669	and community strings.
				2670
				2671	The Linux architecture, splitting the RIB (maintained by a user level
				2672	daemon) and the kernel based FIB (Forwarding Information Base),
				2673	does not allow such a simple approach.
				2674
				2675	It is to our fortune because there is another solution
				2676	which allows even more flexible policy and richer semantics.
				2677
				2678	Namely, routes can be clustered together in user space, based on their
				2679	attributes. F.e.\ a BGP router knows route ASPATH, its community;
				2680	an OSPF router knows the route tag or its area. The administrator, when adding
				2681	routes manually, also knows their nature. Providing that the number of such
				2682	aggregates (we call them {\em realms\/}) is low, the task of full
				2683	classification both by source and destination becomes quite manageable.
				2684
				2685	So each route may be assigned to a realm. It is assumed that
				2686	this identification is made by a routing daemon, but static routes
				2687	can also be handled manually with \verb\|ip route\| (see sec.\ref{IP-ROUTE},
				2688	p.\pageref{IP-ROUTE}).
				2689	\begin{NB}
				2690	There is a patch to \verb\|gated\|, allowing classification of routes
				2691	to realms with all the set of policy rules implemented in \verb\|gated\|:
				2692	by prefix, by ASPATH, by origin, by tag etc.
				2693	\end{NB}
				2694
				2695	To facilitate the construction (f.e.\ in case the routing
				2696	daemon is not aware of realms), missing realms may be completed
				2697	with routing policy rules, see sec.~\ref{IP-RULE}, p.\pageref{IP-RULE}.
				2698
				2699	For each packet the kernel calculates a tuple of realms: source realm
				2700	and destination realm, using the following algorithm:
				2701
				2702	\begin{enumerate}
				2703	\item If the route has a realm, the destination realm of the packet is set to it.
				2704	\item If the rule has a source realm, the source realm of the packet is set to it.
				2705	If the destination realm was not inherited from the route and the rule has a destination realm,
				2706	it is also set.
				2707	\item If at least one of the realms is still unknown, the kernel finds
				2708	the reversed route to the source of the packet.
				2709	\item If the source realm is still unknown, get it from the reversed route.
				2710	\item If one of the realms is still unknown, swap the realms of reversed
				2711	routes and apply step 2 again.
				2712	\end{enumerate}
				2713
				2714	After this procedure is completed we know what realm the packet
				2715	arrived from and the realm where it is going to propagate to.
				2716	If some of the realms are unknown, they are initialized to zero
				2717	(or realm \verb\|unknown\|).
				2718
				2719	The main application of realms is the TC \verb\|route\| classifier~\cite{TC-CREF},
				2720	where they are used to help assign packets to traffic classes,
				2721	to account, police and schedule them according to this
				2722	classification.
				2723
				2724	A much simpler but still very useful application is incoming packet
				2725	accounting by realms. The kernel gathers a packet statistics summary
				2726	which can be viewed with the \verb\|rtacct\| utility.
				2727	\begin{verbatim}
				2728	kuznet@amber:~ $ rtacct russia
				2729	Realm BytesTo PktsTo BytesFrom PktsFrom
				2730	russia 20576778 169176 47080168 153805
				2731	kuznet@amber:~ $
				2732	\end{verbatim}
				2733	This shows that this router received 153805 packets from
				2734	the realm \verb\|russia\| and forwarded 169176 packets to \verb\|russia\|.
				2735	The realm \verb\|russia\| consists of routes with ASPATHs not leaving
				2736	Russia.
				2737
				2738	Note that locally originating packets are not accounted here,
				2739	\verb\|rtacct\| shows incoming packets only. Using the \verb\|route\|
				2740	classifier (see~\cite{TC-CREF}) you can get even more detailed
				2741	accounting information about outgoing packets, optionally
				2742	summarizing traffic not only by source or destination, but
				2743	by any pair of source and destination realms.
				2744
				2745
				2746	\begin{thebibliography}{99}
				2747	\addcontentsline{toc}{section}{References}
				2748	\bibitem{RFC-NDISC} T.~Narten, E.~Nordmark, W.~Simpson.
				2749	``Neighbor Discovery for IP Version 6 (IPv6)'', RFC-2461.
				2750
				2751	\bibitem{RFC-ADDRCONF} S.~Thomson, T.~Narten.
				2752	``IPv6 Stateless Address Autoconfiguration'', RFC-2462.
				2753
				2754	\bibitem{RFC1812} F.~Baker.
				2755	``Requirements for IP Version 4 Routers'', RFC-1812.
				2756
				2757	\bibitem{RFC1122} R.~T.~Braden.
				2758	``Requirements for Internet hosts --- communication layers'', RFC-1122.
				2759
				2760	\bibitem{IOS} ``Cisco IOS Release 12.0 Network Protocols
				2761	Command Reference, Part 1'' and
				2762	``Cisco IOS Release 12.0 Quality of Service Solutions
				2763	Configuration Guide: Configuring Policy-Based Routing'',\\
				2764	http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
				2765
				2766	\bibitem{IP-TUNNELS} A.~N.~Kuznetsov.
				2767	``Tunnels over IP in Linux-2.2'', \\
				2768	In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
				2769
				2770	\bibitem{TC-CREF} A.~N.~Kuznetsov. ``TC Command Reference'',\\
				2771	In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
				2772
				2773	\bibitem{IOS-BGP-PP} ``Cisco IOS Release 12.0 Quality of Service Solutions
				2774	Configuration Guide: Configuring QoS Policy Propagation via
				2775	Border Gateway Protocol'',\\
				2776	http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
				2777
				2778	\bibitem{RFC-DHCP} R.~Droms.
				2779	``Dynamic Host Configuration Protocol.'', RFC-2131
				2780
Gilad Ben-Yossef	71e5815	2009-10-06 15:40:34 +0200	[diff] [blame]	2781	\bibitem{RFC2414} M.~Allman, S.~Floyd, C.~Partridge.
				2782	``Increasing TCP's Initial Window'', RFC-2414.
				2783
osdl.org!shemminger	aba5acd	2004-04-15 20:56:59 +0000	[diff] [blame]	2784	\end{thebibliography}
				2785
				2786
				2787
				2788
				2789	\appendix
				2790	\addcontentsline{toc}{section}{Appendix}
				2791
				2792	\section{Source address selection}
				2793	\label{ADDR-SEL}
				2794
				2795	When a host creates an IP packet, it must select some source
				2796	address. Correct source address selection is a critical procedure,
				2797	because it gives the receiver the information needed to deliver a
				2798	reply. If the source is selected incorrectly, in the best case,
				2799	the backward path may appear different to the forward one which
				2800	is harmful for performance. In the worst case, when the addresses
				2801	are administratively scoped, the reply may be lost entirely.
				2802
				2803	Linux-2.2 selects source addresses using the following algorithm:
				2804
				2805	\begin{itemize}
				2806	\item
				2807	The application may select a source address explicitly with \verb\|bind(2)\|
				2808	syscall or supplying it to \verb\|sendmsg(2)\| via the ancillary data object
				2809	\verb\|IP_PKTINFO\|. In this case the kernel only checks the validity
				2810	of the address and never tries to ``improve'' an incorrect user choice,
				2811	generating an error instead.
				2812	\begin{NB}
				2813	Never say ``Never''. The sysctl option \verb\|ip_dynaddr\| breaks
				2814	this axiom. It has been made deliberately with the purpose
				2815	of automatically reselecting the address on hosts with dynamic dial-out interfaces.
				2816	However, this hack {\em must not\/} be used on multihomed hosts
				2817	and especially on routers: it would break them.
				2818	\end{NB}
				2819
				2820
				2821	\item Otherwise, IP routing tables can contain an explicit source
				2822	address hint for this destination. The hint is set with the \verb\|src\| parameter
				2823	to the \verb\|ip route\| command, sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}.
				2824
				2825
				2826	\item Otherwise, the kernel searches through the list of addresses
				2827	attached to the interface through which the packets will be routed.
				2828	The search strategies are different for IP and IPv6. Namely:
				2829
				2830	\begin{itemize}
				2831	\item IPv6 searches for the first valid, not deprecated address
				2832	with the same scope as the destination.
				2833
				2834	\item IP searches for the first valid address with a scope wider
				2835	than the scope of the destination but it prefers addresses
				2836	which fall to the same subnet as the nexthop of the route
				2837	to the destination. Unlike IPv6, the scopes of IPv4 destinations
				2838	are not encoded in their addresses but are supplied
				2839	in routing tables instead (the \verb\|scope\| parameter to the \verb\|ip route\| command,
				2840	sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}).
				2841
				2842	\end{itemize}
				2843
				2844
				2845	\item Otherwise, if the scope of the destination is \verb\|link\| or \verb\|host\|,
				2846	the algorithm fails and returns a zero source address.
				2847
				2848	\item Otherwise, all interfaces are scanned to search for an address
				2849	with an appropriate scope. The loopback device \verb\|lo\| is always the first
				2850	in the search list, so that if an address with global scope (not 127.0.0.1!)
				2851	is configured on loopback, it is always preferred.
				2852
				2853	\end{itemize}
				2854
				2855
				2856	\section{Proxy ARP/NDISC}
				2857	\label{PROXY-NEIGH}
				2858
				2859	Routers may answer ARP/NDISC solicitations on behalf of other hosts.
				2860	In Linux-2.2 proxy ARP on an interface may be enabled
				2861	by setting the kernel \verb\|sysctl\| variable
				2862	\verb\|/proc/sys/net/ipv4/conf/<dev>/proxy_arp\| to 1. After this, the router
				2863	starts to answer ARP requests on the interface \verb\|<dev>\|, provided
				2864	the route to the requested destination does {\em not\/} go back via the same
				2865	device.
				2866
				2867	The variable \verb\|/proc/sys/net/ipv4/conf/all/proxy_arp\| enables proxy
				2868	ARP on all the IP devices.
				2869
				2870	However, this approach fails in the case of IPv6 because the router
				2871	must join the solicited node multicast address to listen for the corresponding
				2872	NDISC queries. It means that proxy NDISC is possible only on a per destination
				2873	basis.
				2874
				2875	Logically, proxy ARP/NDISC is not a kernel task. It can easily be implemented
				2876	in user space. However, similar functionality was present in BSD kernels
				2877	and in Linux-2.0, so we have to preserve it at least to the extent that
				2878	is standardized in BSD.
				2879	\begin{NB}
				2880	Linux-2.0 ARP had a feature called {\em subnet\/} proxy ARP.
				2881	It is replaced with the sysctl flag in Linux-2.2.
				2882	\end{NB}
				2883
				2884
				2885	The \verb\|ip\| utility provides a way to manage proxy ARP/NDISC
				2886	with the \verb\|ip neigh\| command, namely:
				2887	\begin{verbatim}
				2888	ip neigh add proxy ADDRESS [ dev NAME ]
				2889	\end{verbatim}
				2890	adds a new proxy ARP/NDISC record and
				2891	\begin{verbatim}
				2892	ip neigh del proxy ADDRESS [ dev NAME ]
				2893	\end{verbatim}
				2894	deletes it.
				2895
				2896	If the name of the device is not given, the router will answer solicitations
				2897	for address \verb\|ADDRESS\| on all devices, otherwise it will only serve
				2898	the device \verb\|NAME\|. Even if the proxy entry is created with
				2899	\verb\|ip neigh\|, the router {\em will not\/} answer a query if the route
				2900	to the destination goes back via the interface from which the solicitation
				2901	was received.
				2902
				2903	It is important to emphasize that proxy entries have {\em no\/}
				2904	parameters other than these (IP/IPv6 address and optional device).
				2905	Particularly, the entry does not store any link layer address.
				2906	It always advertises the station address of the interface
				2907	on which it sends advertisements (i.e. it's own station address).
				2908
				2909	\section{Route NAT status}
				2910	\label{ROUTE-NAT}
				2911
				2912	NAT (or ``Network Address Translation'') remaps some parts
				2913	of the IP address space into other ones. Linux-2.2 route NAT is supposed
				2914	to be used to facilitate policy routing by rewriting addresses
				2915	to other routing domains or to help while renumbering sites
				2916	to another prefix.
				2917
				2918	\paragraph{What it is not:}
				2919	It is necessary to emphasize that {\em it is not supposed\/}
				2920	to be used to compress address space or to split load.
				2921	This is not missing functionality but a design principle.
				2922	Route NAT is {\em stateless\/}. It does not hold any state
				2923	about translated sessions. This means that it handles any number
				2924	of sessions flawlessly. But it also means that it is {\em static\/}.
				2925	It cannot detect the moment when the last TCP client stops
				2926	using an address. For the same reason, it will not help to split
				2927	load between several servers.
				2928	\begin{NB}
				2929	It is a pretty commonly held belief that it is useful to split load between
				2930	several servers with NAT. This is a mistake. All you get from this
				2931	is the requirement that the router keep the state of all the TCP connections
				2932	going via it. Well, if the router is so powerful, run apache on it. 8)
				2933	\end{NB}
				2934
				2935	The second feature: it does not touch packet payload,
				2936	does not try to ``improve'' broken protocols by looking
				2937	through its data and mangling it. It mangles IP addresses,
				2938	only IP addresses and nothing but IP addresses.
				2939	This also, is not missing any functionality.
				2940
				2941	To resume: if you need to compress address space or keep
				2942	active FTP clients happy, your choice is not route NAT but masquerading,
				2943	port forwarding, NAPT etc.
				2944	\begin{NB}
				2945	By the way, you may also want to look at
				2946	http://www.suse.com/\~mha/HyperNews/get/linux-ip-nat.html
				2947	\end{NB}
				2948
				2949
				2950	\paragraph{How it works.}
				2951	Some part of the address space is reserved for dummy addresses
				2952	which will look for all the world like some host addresses
				2953	inside your network. No other hosts may use these addresses,
				2954	however other routers may also be configured to translate them.
				2955	\begin{NB}
				2956	A great advantage of route NAT is that it may be used not
				2957	only in stub networks but in environments with arbitrarily complicated
				2958	structure. It does not firewall, it {\em forwards.}
				2959	\end{NB}
				2960	These addresses are selected by the \verb\|ip route\| command
				2961	(sec.\ref{IP-ROUTE-ADD}, p.\pageref{IP-ROUTE-ADD}). F.e.\
				2962	\begin{verbatim}
				2963	ip route add nat 192.203.80.144 via 193.233.7.83
				2964	\end{verbatim}
				2965	states that the single address 192.203.80.144 is a dummy NAT address.
				2966	For all the world it looks like a host address inside our network.
				2967	For neighbouring hosts and routers it looks like the local address
				2968	of the translating router. The router answers ARP for it, advertises
				2969	this address as routed via it, {\em et al\/}. When the router
				2970	receives a packet destined for 192.203.80.144, it replaces
				2971	this address with 193.233.7.83 which is the address of some real
				2972	host and forwards the packet. If you need to remap
				2973	blocks of addresses, you may use a command like:
				2974	\begin{verbatim}
				2975	ip route add nat 192.203.80.192/26 via 193.233.7.64
				2976	\end{verbatim}
				2977	This command will map a block of 63 addresses 192.203.80.192-255 to
				2978	193.233.7.64-127.
				2979
				2980	When an internal host (193.233.7.83 in the example above)
				2981	sends something to the outer world and these packets are forwarded
				2982	by our router, it should translate the source address 193.233.7.83
				2983	into 192.203.80.144. This task is solved by setting a special
				2984	policy rule (sec.\ref{IP-RULE-ADD}, p.\pageref{IP-RULE-ADD}):
				2985	\begin{verbatim}
				2986	ip rule add prio 320 from 193.233.7.83 nat 192.203.80.144
				2987	\end{verbatim}
				2988	This rule says that the source address 193.233.7.83
				2989	should be translated into 192.203.80.144 before forwarding.
				2990	It is important that the address after the \verb\|nat\| keyword
				2991	is some NAT address, declared by {\tt ip route add nat}.
				2992	If it is just a random address the router will not map to it.
				2993	\begin{NB}
				2994	The exception is when the address is a local address of this
				2995	router (or 0.0.0.0) and masquerading is configured in the linux-2.2
				2996	kernel. In this case the router will masquerade the packets as this address.
				2997	If 0.0.0.0 is selected, the result is equivalent to one
				2998	obtained with firewalling rules. Otherwise, you have the way
				2999	to order Linux to masquerade to this fixed address.
				3000	NAT mechanism used in linux-2.4 is more flexible than
				3001	masquerading, so that this feature has lost meaning and disabled.
				3002	\end{NB}
				3003
				3004	If the network has non-trivial internal structure, it is
				3005	useful and even necessary to add rules disabling translation
				3006	when a packet does not leave this network. Let us return to the
				3007	example from sec.\ref{IP-RULE-SHOW} (p.\pageref{IP-RULE-SHOW}).
				3008	\begin{verbatim}
				3009	300: from 193.233.7.83 to 193.233.7.0/24 lookup main
				3010	310: from 193.233.7.83 to 192.203.80.0/24 lookup main
				3011	320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
				3012	\end{verbatim}
				3013	This block of rules causes normal forwarding when
				3014	packets from 193.233.7.83 do not leave networks 193.233.7/24
				3015	and 192.203.80/24. Also, if the \verb\|inr.ruhep\| table does not
				3016	contain a route to the destination (which means that the routing
				3017	domain owning addresses from 192.203.80/24 is dead), no translation
				3018	will occur. Otherwise, the packets are translated.
				3019
				3020	\paragraph{How to only translate selected ports:}
				3021	If you only want to translate selected ports (f.e.\ http)
				3022	and leave the rest intact, you may use \verb\|ipchains\|
				3023	to \verb\|fwmark\| a class of packets.
				3024	Suppose you did and all the packets from 193.233.7.83
				3025	destined for port 80 are marked with marker 0x1234 in input fwchain.
				3026	In this case you may replace rule \#320 with:
				3027	\begin{verbatim}
				3028	320: from 193.233.7.83 fwmark 1234 lookup main map-to 192.203.80.144
				3029	\end{verbatim}
				3030	and translation will only be enabled for outgoing http requests.
				3031
				3032	\section{Example: minimal host setup}
				3033	\label{EXAMPLE-SETUP}
				3034
				3035	The following script gives an example of a fault safe
				3036	setup of IP (and IPv6, if it is compiled into the kernel)
				3037	in the common case of a node attached to a single broadcast
				3038	network. A more advanced script, which may be used both on multihomed
				3039	hosts and on routers, is described in the following
				3040	section.
				3041
				3042	The utilities used in the script may be found in the
				3043	directory ftp://ftp.inr.ac.ru/ip-routing/:
				3044	\begin{enumerate}
				3045	\item \verb\|ip\| --- package \verb\|iproute2\|.
				3046	\item \verb\|arping\| --- package \verb\|iputils\|.
				3047	\item \verb\|rdisc\| --- package \verb\|iputils\|.
				3048	\end{enumerate}
				3049	\begin{NB}
				3050	It also refers to a DHCP client, \verb\|dhcpcd\|. I should refrain from
				3051	recommending a good DHCP client to use. All that I can
				3052	say is that ISC \verb\|dhcp-2.0b1pl6\| patched with the patch that
				3053	can be found in the \verb\|dhcp.bootp.rarp\| subdirectory of
				3054	the same ftp site {\em does\/} work,
				3055	at least on Ethernet and Token Ring.
				3056	\end{NB}
				3057
				3058	\begin{verbatim}
				3059	#! /bin/bash
				3060	\end{verbatim}
				3061	\begin{flushleft}
				3062	\# {\bf Usage: \verb\|ifone ADDRESS[/PREFIX-LENGTH] [DEVICE]\|}\\
				3063	\# {\bf Parameters:}\\
				3064	\# \$1 --- Static IP address, optionally followed by prefix length.\\
				3065	\# \$2 --- Device name. If it is missing, \verb\|eth0\| is asssumed.\\
				3066	\# F.e. \verb\|ifone 193.233.7.90\|
				3067	\end{flushleft}
				3068	\begin{verbatim}
				3069	dev=$2
				3070	: ${dev:=eth0}
				3071	ipaddr=
				3072	\end{verbatim}
				3073	\# Parse IP address, splitting prefix length.
				3074	\begin{verbatim}
				3075	if [ "$1" != "" ]; then
				3076	ipaddr=${1%/*}
				3077	if [ "$1" != "$ipaddr" ]; then
				3078	pfxlen=${1#*/}
				3079	fi
				3080	: ${pfxlen:=24}
				3081	fi
				3082	pfx="${ipaddr}/${pfxlen}"
				3083	\end{verbatim}
				3084
				3085	\begin{flushleft}
				3086	\# {\bf Step 0} --- enable loopback.\\
				3087	\#\\
				3088	\# This step is necessary on any networked box before attempt\\
				3089	\# to configure any other device.\\
				3090	\end{flushleft}
				3091	\begin{verbatim}
				3092	ip link set up dev lo
				3093	ip addr add 127.0.0.1/8 dev lo brd + scope host
				3094	\end{verbatim}
				3095	\begin{flushleft}
				3096	\# IPv6 autoconfigure themself on loopback.\\
				3097	\#\\
				3098	\# If user gave loopback as device, we add the address as alias and exit.
				3099	\end{flushleft}
				3100	\begin{verbatim}
				3101	if [ "$dev" = "lo" ]; then
				3102	if [ "$ipaddr" != "" -a "$ipaddr" != "127.0.0.1" ]; then
				3103	ip address add $ipaddr dev $dev
				3104	exit $?
				3105	fi
				3106	exit 0
				3107	fi
				3108	\end{verbatim}
				3109
				3110	\noindent\# {\bf Step 1} --- enable device \verb\|$dev\|
				3111
				3112	\begin{verbatim}
				3113	if ! ip link set up dev $dev ; then
				3114	echo "Cannot enable interface $dev. Aborting." 1>&2
				3115	exit 1
				3116	fi
				3117	\end{verbatim}
				3118	\begin{flushleft}
				3119	\# The interface is \verb\|UP\|. IPv6 started stateless autoconfiguration itself,\\
				3120	\# and its configuration finishes here. However,\\
				3121	\# IP still needs some static preconfigured address.
				3122	\end{flushleft}
				3123	\begin{verbatim}
				3124	if [ "$ipaddr" = "" ]; then
				3125	echo "No address for $dev is configured, trying DHCP..." 1>&2
				3126	dhcpcd
				3127	exit $?
				3128	fi
				3129	\end{verbatim}
				3130
				3131	\begin{flushleft}
				3132	\# {\bf Step 2} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
				3133	\# Send two probes and wait for result for 3 seconds.\\
				3134	\# If the interface opens slower f.e.\ due to long media detection,\\
				3135	\# you want to increase the timeout.\\
				3136	\end{flushleft}
				3137	\begin{verbatim}
				3138	if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
				3139	echo "Address $ipaddr is busy, trying DHCP..." 1>&2
				3140	dhcpcd
				3141	exit $?
				3142	fi
				3143	\end{verbatim}
				3144	\begin{flushleft}
				3145	\# OK, the address is unique, we may add it on the interface.\\
				3146	\#\\
				3147	\# {\bf Step 3} --- Configure the address on the interface.
				3148	\end{flushleft}
				3149
				3150	\begin{verbatim}
				3151	if ! ip address add $pfx brd + dev $dev; then
				3152	echo "Failed to add $pfx on $dev, trying DHCP..." 1>&2
				3153	dhcpcd
				3154	exit $?
				3155	fi
				3156	\end{verbatim}
				3157
				3158	\noindent\# {\bf Step 4} --- Announce our presence on the link.
				3159	\begin{verbatim}
				3160	arping -A -c 1 -I $dev $ipaddr
				3161	noarp=$?
				3162	( sleep 2;
				3163	arping -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
				3164	\end{verbatim}
				3165
				3166	\begin{flushleft}
				3167	\# {\bf Step 5} (optional) --- Add some control routes.\\
				3168	\#\\
				3169	\# 1. Prohibit link local multicast addresses.\\
				3170	\# 2. Prohibit link local (alias, limited) broadcast.\\
				3171	\# 3. Add default multicast route.
				3172	\end{flushleft}
				3173	\begin{verbatim}
				3174	ip route add unreachable 224.0.0.0/24
				3175	ip route add unreachable 255.255.255.255
				3176	if [ `ip link ls $dev \| grep -c MULTICAST` -ge 1 ]; then
				3177	ip route add 224.0.0.0/4 dev $dev scope global
				3178	fi
				3179	\end{verbatim}
				3180
				3181	\begin{flushleft}
				3182	\# {\bf Step 6} --- Add fallback default route with huge metric.\\
				3183	\# If a proxy ARP server is present on the interface, we will be\\
				3184	\# able to talk to all the Internet without further configuration.\\
				3185	\# It is not so cheap though and we still hope that this route\\
				3186	\# will be overridden by more correct one by rdisc.\\
				3187	\# Do not make this step if the device is not ARPable,\\
				3188	\# because dead nexthop detection does not work on them.
				3189	\end{flushleft}
				3190	\begin{verbatim}
				3191	if [ "$noarp" = "0" ]; then
				3192	ip ro add default dev $dev metric 30000 scope global
				3193	fi
				3194	\end{verbatim}
				3195
				3196	\begin{flushleft}
				3197	\# {\bf Step 7} --- Restart router discovery and exit.
				3198	\end{flushleft}
				3199	\begin{verbatim}
				3200	killall -HUP rdisc \|\| rdisc -fs
				3201	exit 0
				3202	\end{verbatim}
				3203
				3204
				3205	\section{Example: {\protect\tt ifcfg} --- interface address management}
				3206	\label{EXAMPLE-IFCFG}
				3207
				3208	This is a simplistic script replacing one option of \verb\|ifconfig\|,
				3209	namely, IP address management. It not only adds
				3210	addresses, but also carries out Duplicate Address Detection~\cite{RFC-DHCP},
				3211	sends unsolicited ARP to update the caches of other hosts sharing
				3212	the interface, adds some control routes and restarts Router Discovery
				3213	when it is necessary.
				3214
				3215	I strongly recommend using it {\em instead\/} of \verb\|ifconfig\| both
				3216	on hosts and on routers.
				3217
				3218	\begin{verbatim}
				3219	#! /bin/bash
				3220	\end{verbatim}
				3221	\begin{flushleft}
				3222	\# {\bf Usage: \verb?ifcfg DEVICE[:ALIAS] [add\|del] ADDRESS[/LENGTH] [PEER]?}\\
				3223	\# {\bf Parameters:}\\
				3224	\# ---Device name. It may have alias suffix, separated by colon.\\
				3225	\# ---Command: add, delete or stop.\\
				3226	\# ---IP address, optionally followed by prefix length.\\
				3227	\# ---Optional peer address for pointopoint interfaces.\\
				3228	\# F.e. \verb\|ifcfg eth0 193.233.7.90/24\|
				3229
				3230	\noindent\# This function determines, whether it is router or host.\\
				3231	\# It returns 0, if the host is apparently not router.
				3232	\end{flushleft}
				3233	\begin{verbatim}
				3234	CheckForwarding () {
				3235	local sbase fwd
				3236	sbase=/proc/sys/net/ipv4/conf
				3237	fwd=0
				3238	if [ -d $sbase ]; then
				3239	for dir in $sbase/*/forwarding; do
				3240	fwd=$[$fwd + `cat $dir`]
				3241	done
				3242	else
				3243	fwd=2
				3244	fi
				3245	return $fwd
				3246	}
				3247	\end{verbatim}
				3248	\begin{flushleft}
				3249	\# This function restarts Router Discovery.\\
				3250	\end{flushleft}
				3251	\begin{verbatim}
				3252	RestartRDISC () {
				3253	killall -HUP rdisc \|\| rdisc -fs
				3254	}
				3255	\end{verbatim}
				3256	\begin{flushleft}
				3257	\# Calculate ABC "natural" mask length\\
				3258	\# Arg: \$1 = dotquad address
				3259	\end{flushleft}
				3260	\begin{verbatim}
				3261	ABCMaskLen () {
				3262	local class;
				3263	class=${1%%.*}
				3264	if [ $class -eq 0 -o $class -ge 224 ]; then return 0
				3265	elif [ $class -ge 192 ]; then return 24
				3266	elif [ $class -ge 128 ]; then return 16
				3267	else return 8 ; fi
				3268	}
				3269	\end{verbatim}
				3270
				3271
				3272	\begin{flushleft}
				3273	\# {\bf MAIN()}\\
				3274	\#\\
				3275	\# Strip alias suffix separated by colon.
				3276	\end{flushleft}
				3277	\begin{verbatim}
				3278	label="label $1"
				3279	ldev=$1
				3280	dev=${1%:*}
				3281	if [ "$dev" = "" -o "$1" = "help" ]; then
				3282	echo "Usage: ifcfg DEV [[add\|del [ADDR[/LEN]] [PEER] \| stop]" 1>&2
				3283	echo " add - add new address" 1>&2
				3284	echo " del - delete address" 1>&2
				3285	echo " stop - completely disable IP" 1>&2
				3286	exit 1
				3287	fi
				3288	shift
				3289
				3290	CheckForwarding
				3291	fwd=$?
				3292	\end{verbatim}
				3293	\begin{flushleft}
				3294	\# Parse command. If it is ``stop'', flush and exit.
				3295	\end{flushleft}
				3296	\begin{verbatim}
				3297	deleting=0
				3298	case "$1" in
				3299	add) shift ;;
				3300	stop)
				3301	if [ "$ldev" != "$dev" ]; then
				3302	echo "Cannot stop alias $ldev" 1>&2
				3303	exit 1;
				3304	fi
				3305	ip -4 addr flush dev $dev $label \|\| exit 1
				3306	if [ $fwd -eq 0 ]; then RestartRDISC; fi
				3307	exit 0 ;;
				3308	del*)
				3309	deleting=1; shift ;;
				3310	*)
				3311	esac
				3312	\end{verbatim}
				3313	\begin{flushleft}
				3314	\# Parse prefix, split prefix length, separated by slash.
				3315	\end{flushleft}
				3316	\begin{verbatim}
				3317	ipaddr=
				3318	pfxlen=
				3319	if [ "$1" != "" ]; then
				3320	ipaddr=${1%/*}
				3321	if [ "$1" != "$ipaddr" ]; then
				3322	pfxlen=${1#*/}
				3323	fi
				3324	if [ "$ipaddr" = "" ]; then
				3325	echo "$1 is bad IP address." 1>&2
				3326	exit 1
				3327	fi
				3328	fi
				3329	shift
				3330	\end{verbatim}
				3331	\begin{flushleft}
				3332	\# If peer address is present, prefix length is 32.\\
				3333	\# Otherwise, if prefix length was not given, guess it.
				3334	\end{flushleft}
				3335	\begin{verbatim}
				3336	peer=$1
				3337	if [ "$peer" != "" ]; then
				3338	if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
				3339	echo "Peer address with non-trivial netmask." 1>&2
				3340	exit 1
				3341	fi
				3342	pfx="$ipaddr peer $peer"
				3343	else
				3344	if [ "$pfxlen" = "" ]; then
				3345	ABCMaskLen $ipaddr
				3346	pfxlen=$?
				3347	fi
				3348	pfx="$ipaddr/$pfxlen"
				3349	fi
				3350	if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
				3351	label=
				3352	fi
				3353	\end{verbatim}
				3354	\begin{flushleft}
				3355	\# If deletion was requested, delete the address and restart RDISC
				3356	\end{flushleft}
				3357	\begin{verbatim}
				3358	if [ $deleting -ne 0 ]; then
				3359	ip addr del $pfx dev $dev $label \|\| exit 1
				3360	if [ $fwd -eq 0 ]; then RestartRDISC; fi
				3361	exit 0
				3362	fi
				3363	\end{verbatim}
				3364	\begin{flushleft}
				3365	\# Start interface initialization.\\
				3366	\#\\
				3367	\# {\bf Step 0} --- enable device \verb\|$dev\|
				3368	\end{flushleft}
				3369	\begin{verbatim}
				3370	if ! ip link set up dev $dev ; then
				3371	echo "Error: cannot enable interface $dev." 1>&2
				3372	exit 1
				3373	fi
				3374	if [ "$ipaddr" = "" ]; then exit 0; fi
				3375	\end{verbatim}
				3376	\begin{flushleft}
				3377	\# {\bf Step 1} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
				3378	\# Send two probes and wait for result for 3 seconds.\\
				3379	\# If the interface opens slower f.e.\ due to long media detection,\\
				3380	\# you want to increase the timeout.\\
				3381	\end{flushleft}
				3382	\begin{verbatim}
				3383	if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
				3384	echo "Error: some host already uses address $ipaddr on $dev." 1>&2
				3385	exit 1
				3386	fi
				3387	\end{verbatim}
				3388	\begin{flushleft}
				3389	\# OK, the address is unique. We may add it to the interface.\\
				3390	\#\\
				3391	\# {\bf Step 2} --- Configure the address on the interface.
				3392	\end{flushleft}
				3393	\begin{verbatim}
				3394	if ! ip address add $pfx brd + dev $dev $label; then
				3395	echo "Error: failed to add $pfx on $dev." 1>&2
				3396	exit 1
				3397	fi
				3398	\end{verbatim}
				3399	\noindent\# {\bf Step 3} --- Announce our presence on the link
				3400	\begin{verbatim}
				3401	arping -q -A -c 1 -I $dev $ipaddr
				3402	noarp=$?
				3403	( sleep 2 ;
				3404	arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
				3405	\end{verbatim}
				3406	\begin{flushleft}
				3407	\# {\bf Step 4} (optional) --- Add some control routes.\\
				3408	\#\\
				3409	\# 1. Prohibit link local multicast addresses.\\
				3410	\# 2. Prohibit link local (alias, limited) broadcast.\\
				3411	\# 3. Add default multicast route.
				3412	\end{flushleft}
				3413	\begin{verbatim}
				3414	ip route add unreachable 224.0.0.0/24 >& /dev/null
				3415	ip route add unreachable 255.255.255.255 >& /dev/null
				3416	if [ `ip link ls $dev \| grep -c MULTICAST` -ge 1 ]; then
				3417	ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
				3418	fi
				3419	\end{verbatim}
				3420	\begin{flushleft}
				3421	\# {\bf Step 5} --- Add fallback default route with huge metric.\\
				3422	\# If a proxy ARP server is present on the interface, we will be\\
				3423	\# able to talk to all the Internet without further configuration.\\
				3424	\# Do not make this step on router or if the device is not ARPable.\\
				3425	\# because dead nexthop detection does not work on them.
				3426	\end{flushleft}
				3427	\begin{verbatim}
				3428	if [ $fwd -eq 0 ]; then
				3429	if [ $noarp -eq 0 ]; then
				3430	ip ro append default dev $dev metric 30000 scope global
				3431	elif [ "$peer" != "" ]; then
				3432	if ping -q -c 2 -w 4 $peer ; then
				3433	ip ro append default via $peer dev $dev metric 30001
				3434	fi
				3435	fi
				3436	RestartRDISC
				3437	fi
				3438
				3439	exit 0
				3440	\end{verbatim}
				3441	\begin{flushleft}
				3442	\# End of {\bf MAIN()}
				3443	\end{flushleft}
				3444
				3445
				3446	\end{document}