blob: ea147950ff4af1795301db26a1ed937120bf76a0 [file] [log] [blame]
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00001\documentstyle[12pt,twoside]{article}
2\def\TITLE{IP Command Reference}
3\input preamble
4\begin{center}
5\Large\bf IP Command Reference.
6\end{center}
7
8
9\begin{center}
10{ \large Alexey~N.~Kuznetsov } \\
11\em Institute for Nuclear Research, Moscow \\
12\verb|kuznet@ms2.inr.ac.ru| \\
13\rm April 14, 1999
14\end{center}
15
16\vspace{5mm}
17
18\tableofcontents
19
20\newpage
21
22\section{About this document}
23
24This document presents a comprehensive description of the \verb|ip| utility
25from the \verb|iproute2| package. It is not a tutorial or user's guide.
26It is a {\em dictionary\/}, not explaining terms,
27but translating them into other terms, which may also be unknown to the reader.
28However, the document is self-contained and the reader, provided they have a
29basic networking background, will find enough information
30and examples to understand and configure Linux-2.2 IP and IPv6
31networking.
32
33This document is split into sections explaining \verb|ip| commands
34and options, decrypting \verb|ip| output and containing a few examples.
35More voluminous examples and some topics, which require more elaborate
36discussion, are in the appendix.
37
38The paragraphs beginning with NB contain side notes, warnings about
39bugs and design drawbacks. They may be skipped at the first reading.
40
41\section{{\tt ip} --- command syntax}
42
43The generic form of an \verb|ip| command is:
44\begin{verbatim}
45ip [ OPTIONS ] OBJECT [ COMMAND [ ARGUMENTS ]]
46\end{verbatim}
47where \verb|OPTIONS| is a set of optional modifiers affecting the
48general behaviour of the \verb|ip| utility or changing its output. All options
49begin with the character \verb|'-'| and may be used in either long or abbreviated
50forms. Currently, the following options are available:
51
52\begin{itemize}
53\item \verb|-V|, \verb|-Version|
54
55--- print the version of the \verb|ip| utility and exit.
56
57
58\item \verb|-s|, \verb|-stats|, \verb|-statistics|
59
60--- output more information. If the option
61appears twice or more, the amount of information increases.
62As a rule, the information is statistics or some time values.
63
vadimk5cb6aa02014-11-04 18:54:30 +020064\item \verb|-d|, \verb|-details|
65
66--- output more detailed information.
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +000067
68\item \verb|-f|, \verb|-family| followed by a protocol family
69identifier: \verb|inet|, \verb|inet6| or \verb|link|.
70
71--- enforce the protocol family to use. If the option is not present,
72the protocol family is guessed from other arguments. If the rest of the command
73line does not give enough information to guess the family, \verb|ip| falls back to the default
74one, usually \verb|inet| or \verb|any|. \verb|link| is a special family
75identifier meaning that no networking protocol is involved.
76
77\item \verb|-4|
78
79--- shortcut for \verb|-family inet|.
80
81\item \verb|-6|
82
83--- shortcut for \verb|-family inet6|.
84
85\item \verb|-0|
86
87--- shortcut for \verb|-family link|.
88
89
90\item \verb|-o|, \verb|-oneline|
91
92--- output each record on a single line, replacing line feeds
93with the \verb|'\'| character. This is convenient when you want to
94count records with \verb|wc| or to \verb|grep| the output. The trivial
95script \verb|rtpr| converts the output back into readable form.
96
97\item \verb|-r|, \verb|-resolve|
98
99--- use the system's name resolver to print DNS names instead of
100host addresses.
101
102\begin{NB}
103 Do not use this option when reporting bugs or asking for advice.
104\end{NB}
105\begin{NB}
106 \verb|ip| never uses DNS to resolve names to addresses.
107\end{NB}
108
vadimk5cb6aa02014-11-04 18:54:30 +0200109\item \verb|-b|, \verb|-batch FILE|
110
111--- read commands from provided file or standart input and invoke them.
112First failure will cause termination of \verb|ip|.
113In batch \verb|FILE| everything which begins with \verb|#| symbol is
114ignored and can be used for comments.
115\paragraph{Example:}
116\begin{verbatim}
117kuznet@kaiser $ cat /tmp/ip_batch.ip
118# This is a comment
119tuntap add mode tap tap1 # This is an another comment
120link set up dev tap1
121addr add 10.0.0.1/24 dev tap1
122kuznet@kaiser $ sudo ip -b /tmp/ip_batch.ip
123\end{verbatim}
124or from standart input:
125\begin{verbatim}
126kuznet@kaiser $ cat /tmp/ip_batch.ip | sudo ip -b -
127\end{verbatim}
128
129\item \verb|-force|
130
131--- don't terminate ip on errors in batch mode.
132If there were any errors during execution of the commands,
133the application return code will be non zero.
134
135\item \verb|-l|, \verb|-loops COUNT|
136
137--- specify maximum number of loops the 'ip addr flush' logic will attempt
138before giving up. The default is 10. Zero (0) means loop until all
139addresses are removed.
140
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +0000141\end{itemize}
142
143\verb|OBJECT| is the object to manage or to get information about.
144The object types currently understood by \verb|ip| are:
145
146\begin{itemize}
147\item \verb|link| --- network device
148\item \verb|address| --- protocol (IP or IPv6) address on a device
149\item \verb|neighbour| --- ARP or NDISC cache entry
150\item \verb|route| --- routing table entry
151\item \verb|rule| --- rule in routing policy database
152\item \verb|maddress| --- multicast address
153\item \verb|mroute| --- multicast routing cache entry
154\item \verb|tunnel| --- tunnel over IP
155\end{itemize}
156
157Again, the names of all objects may be written in full or
158abbreviated form, f.e.\ \verb|address| is abbreviated as \verb|addr|
159or just \verb|a|.
160
161\verb|COMMAND| specifies the action to perform on the object.
162The set of possible actions depends on the object type.
163As a rule, it is possible to \verb|add|, \verb|delete| and
164\verb|show| (or \verb|list|) objects, but some objects
165do not allow all of these operations or have some additional commands.
166The \verb|help| command is available for all objects. It prints
167out a list of available commands and argument syntax conventions.
168
169If no command is given, some default command is assumed.
170Usually it is \verb|list| or, if the objects of this class
171cannot be listed, \verb|help|.
172
173\verb|ARGUMENTS| is a list of arguments to the command.
174The arguments depend on the command and object. There are two types of arguments:
175{\em flags\/}, consisting of a single keyword, and {\em parameters\/},
176consisting of a keyword followed by a value. For convenience,
177each command has some {\em default parameter\/}
178which may be omitted. F.e.\ parameter \verb|dev| is the default
179for the {\tt ip link} command, so {\tt ip link ls eth0} is equivalent
180to {\tt ip link ls dev eth0}.
181In the command descriptions below such parameters
182are distinguished with the marker: ``(default)''.
183
184Almost all keywords may be abbreviated with several first (or even single)
185letters. The shortcuts are convenient when \verb|ip| is used interactively,
186but they are not recommended in scripts or when reporting bugs
187or asking for advice. ``Officially'' allowed abbreviations are listed
188in the document body.
189
190
191
192\section{{\tt ip} --- error messages}
193
194\verb|ip| may fail for one of the following reasons:
195
196\begin{itemize}
197\item
198A syntax error on the command line: an unknown keyword, incorrectly formatted
199IP address {\em et al\/}. In this case \verb|ip| prints an error message
200and exits. As a rule, the error message will contain information
201about the reason for the failure. Sometimes it also prints a help page.
202
203\item
204The arguments did not pass verification for self-consistency.
205
206\item
207\verb|ip| failed to compile a kernel request from the arguments
208because the user didn't give enough information.
209
210\item
211The kernel returned an error to some syscall. In this case \verb|ip|
212prints the error message, as it is output with \verb|perror(3)|,
213prefixed with a comment and a syscall identifier.
214
215\item
216The kernel returned an error to some RTNETLINK request.
217In this case \verb|ip| prints the error message, as it is output
218with \verb|perror(3)| prefixed with ``RTNETLINK answers:''.
219
220\end{itemize}
221
222All the operations are atomic, i.e.\
223if the \verb|ip| utility fails, it does not change anything
224in the system. One harmful exception is \verb|ip link| command
225(Sec.\ref{IP-LINK}, p.\pageref{IP-LINK}),
226which may change only some of the device parameters given
227on command line.
228
229It is difficult to list all the error messages (especially
230syntax errors). However, as a rule, their meaning is clear
231from the context of the command.
232
233The most common mistakes are:
234
235\begin{enumerate}
236\item Netlink is not configured in the kernel. The message is:
237\begin{verbatim}
238Cannot open netlink socket: Invalid value
239\end{verbatim}
240
241\item RTNETLINK is not configured in the kernel. In this case
242one of the following messages may be printed, depending on the command:
243\begin{verbatim}
244Cannot talk to rtnetlink: Connection refused
245Cannot send dump request: Connection refused
246\end{verbatim}
247
248\item The \verb|CONFIG_IP_MULTIPLE_TABLES| option was not selected
249when configuring the kernel. In this case any attempt to use the
250\verb|ip| \verb|rule| command will fail, f.e.
251\begin{verbatim}
252kuznet@kaiser $ ip rule list
253RTNETLINK error: Invalid argument
254dump terminated
255\end{verbatim}
256
257\end{enumerate}
258
259
260\section{{\tt ip link} --- network device configuration}
261\label{IP-LINK}
262
263\paragraph{Object:} A \verb|link| is a network device and the corresponding
264commands display and change the state of devices.
265
266\paragraph{Commands:} \verb|set| and \verb|show| (or \verb|list|).
267
268\subsection{{\tt ip link set} --- change device attributes}
269
270\paragraph{Abbreviations:} \verb|set|, \verb|s|.
271
272\paragraph{Arguments:}
273
274\begin{itemize}
275\item \verb|dev NAME| (default)
276
277--- \verb|NAME| specifies the network device on which to operate.
278
279\item \verb|up| and \verb|down|
280
281--- change the state of the device to \verb|UP| or \verb|DOWN|.
282
283\item \verb|arp on| or \verb|arp off|
284
285--- change the \verb|NOARP| flag on the device.
286
287\begin{NB}
288This operation is {\em not allowed\/} if the device is in state \verb|UP|.
289Though neither the \verb|ip| utility nor the kernel check for this condition.
290You can get unpredictable results changing this flag while the
291device is running.
292\end{NB}
293
294\item \verb|multicast on| or \verb|multicast off|
295
296--- change the \verb|MULTICAST| flag on the device.
297
298\item \verb|dynamic on| or \verb|dynamic off|
299
300--- change the \verb|DYNAMIC| flag on the device.
301
302\item \verb|name NAME|
303
304--- change the name of the device. This operation is not
305recommended if the device is running or has some addresses
306already configured.
307
308\item \verb|txqueuelen NUMBER| or \verb|txqlen NUMBER|
309
310--- change the transmit queue length of the device.
311
312\item \verb|mtu NUMBER|
313
314--- change the MTU of the device.
315
316\item \verb|address LLADDRESS|
317
318--- change the station address of the interface.
319
320\item \verb|broadcast LLADDRESS|, \verb|brd LLADDRESS| or \verb|peer LLADDRESS|
321
322--- change the link layer broadcast address or the peer address when
323the interface is \verb|POINTOPOINT|.
324
325\vskip 1mm
326\begin{NB}
327For most devices (f.e.\ for Ethernet) changing the link layer
328broadcast address will break networking.
329Do not use it, if you do not understand what this operation really does.
330\end{NB}
331
Benjamin Therye2613dc2008-06-20 11:07:35 +0200332\item \verb|netns PID|
333
334--- move the device to the network namespace associated with the process PID.
335
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +0000336\end{itemize}
337
338\vskip 1mm
339\begin{NB}
Tomas Janouseke17b7332007-11-20 15:38:21 +0100340The \verb|PROMISC| and \verb|ALLMULTI| flags are considered
341obsolete and should not be changed administratively, though
342the {\tt ip} utility will allow that.
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +0000343\end{NB}
344
345\paragraph{Warning:} If multiple parameter changes are requested,
346\verb|ip| aborts immediately after any of the changes have failed.
347This is the only case when \verb|ip| can move the system to
348an unpredictable state. The solution is to avoid changing
349several parameters with one {\tt ip link set} call.
350
351\paragraph{Examples:}
352\begin{itemize}
353\item \verb|ip link set dummy address 00:00:00:00:00:01|
354
355--- change the station address of the interface \verb|dummy|.
356
357\item \verb|ip link set dummy up|
358
359--- start the interface \verb|dummy|.
360
361\end{itemize}
362
363
364\subsection{{\tt ip link show} --- display device attributes}
365\label{IP-LINK-SHOW}
366
367\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
368\verb|l|.
369
370\paragraph{Arguments:}
371\begin{itemize}
372\item \verb|dev NAME| (default)
373
374--- \verb|NAME| specifies the network device to show.
375If this argument is omitted all devices are listed.
376
377\item \verb|up|
378
379--- only display running interfaces.
380
381\end{itemize}
382
383
384\paragraph{Output format:}
385
386\begin{verbatim}
387kuznet@alisa:~ $ ip link ls eth0
3883: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
389 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
390kuznet@alisa:~ $ ip link ls sit0
3915: sit0@NONE: <NOARP,UP> mtu 1480 qdisc noqueue
392 link/sit 0.0.0.0 brd 0.0.0.0
393kuznet@alisa:~ $ ip link ls dummy
3942: dummy: <BROADCAST,NOARP> mtu 1500 qdisc noop
395 link/ether 00:00:00:00:00:00 brd ff:ff:ff:ff:ff:ff
396kuznet@alisa:~ $
397\end{verbatim}
398
399
400The number before each colon is an {\em interface index\/} or {\em ifindex\/}.
401This number uniquely identifies the interface. This is followed by the {\em interface name\/}
402(\verb|eth0|, \verb|sit0| etc.). The interface name is also
403unique at every given moment. However, the interface may disappear from the
404list (f.e.\ when the corresponding driver module is unloaded) and another
405one with the same name may be created later. Besides that,
406the administrator may change the name of any device with
407\verb|ip| \verb|link| \verb|set| \verb|name|
408to make it more intelligible.
409
410The interface name may have another name or \verb|NONE| appended
411after the \verb|@| sign. This means that this device is bound to some other
412device,
413i.e.\ packets send through it are encapsulated and sent via the ``master''
414device. If the name is \verb|NONE|, the master is unknown.
415
416Then we see the interface {\em mtu\/} (``maximal transfer unit''). This determines
417the maximal size of data which can be sent as a single packet over this interface.
418
419{\em qdisc\/} (``queuing discipline'') shows the queuing algorithm used
420on the interface. Particularly, \verb|noqueue| means that this interface
421does not queue anything and \verb|noop| means that the interface is in blackhole
422mode i.e.\ all packets sent to it are immediately discarded.
423{\em qlen\/} is the default transmit queue length of the device measured
424in packets.
425
426The interface flags are summarized in the angle brackets.
427
428\begin{itemize}
429\item \verb|UP| --- the device is turned on. It is ready to accept
430packets for transmission and it may inject into the kernel packets received
431from other nodes on the network.
432
433\item \verb|LOOPBACK| --- the interface does not communicate with other
434hosts. All packets sent through it will be returned
435and nothing but bounced packets can be received.
436
437\item \verb|BROADCAST| --- the device has the facility to send packets
438to all hosts sharing the same link. A typical example is an Ethernet link.
439
440\item \verb|POINTOPOINT| --- the link has only two ends with one node
441attached to each end. All packets sent to this link will reach the peer
442and all packets received by us came from this single peer.
443
444If neither \verb|LOOPBACK| nor \verb|BROADCAST| nor \verb|POINTOPOINT|
445are set, the interface is assumed to be NMBA (Non-Broadcast Multi-Access).
446This is the most generic type of device and the most complicated one, because
447the host attached to a NBMA link has no means to send to anyone
448without additionally configured information.
449
450\item \verb|MULTICAST| --- is an advisory flag indicating that the interface
451is aware of multicasting i.e.\ sending packets to some subset of neighbouring
452nodes. Broadcasting is a particular case of multicasting, where the multicast
453group consists of all nodes on the link. It is important to emphasize
454that software {\em must not\/} interpret the absence of this flag as the inability
455to use multicasting on this interface. Any \verb|POINTOPOINT| and
456\verb|BROADCAST| link is multicasting by definition, because we have
457direct access to all the neighbours and, hence, to any part of them.
458Certainly, the use of high bandwidth multicast transfers is not recommended
459on broadcast-only links because of high expense, but it is not strictly
460prohibited.
461
462\item \verb|PROMISC| --- the device listens to and feeds to the kernel all
463traffic on the link even if it is not destined for us, not broadcasted
464and not destined for a multicast group of which we are member. Usually
465this mode exists only on broadcast links and is used by bridges and for network
466monitoring.
467
468\item \verb|ALLMULTI| --- the device receives all multicast packets
469wandering on the link. This mode is used by multicast routers.
470
471\item \verb|NOARP| --- this flag is different from the other ones. It has
472no invariant value and its interpretation depends on the network protocols
473involved. As a rule, it indicates that the device needs no address
474resolution and that the software or hardware knows how to deliver packets
475without any help from the protocol stacks.
476
477\item \verb|DYNAMIC| --- is an advisory flag indicating that the interface is
478dynamically created and destroyed.
479
480\item \verb|SLAVE| --- this interface is bonded to some other interfaces
481to share link capacities.
482
483\end{itemize}
484
485\vskip 1mm
486\begin{NB}
487There are other flags but they are either obsolete (\verb|NOTRAILERS|)
488or not implemented (\verb|DEBUG|) or specific to some devices
489(\verb|MASTER|, \verb|AUTOMEDIA| and \verb|PORTSEL|). We do not discuss
490them here.
491\end{NB}
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +0000492
493
494The second line contains information on the link layer addresses
495associated with the device. The first word (\verb|ether|, \verb|sit|)
496defines the interface hardware type. This type determines the format and semantics
497of the addresses and is logically part of the address.
498The default format of the station address and the broadcast address
499(or the peer address for pointopoint links) is a
500sequence of hexadecimal bytes separated by colons, but some link
501types may have their natural address format, f.e.\ addresses
502of tunnels over IP are printed as dotted-quad IP addresses.
503
504\vskip 1mm
505\begin{NB}
506 NBMA links have no well-defined broadcast or peer address,
507 however this field may contain useful information, f.e.\
508 about the address of broadcast relay or about the address of the ARP server.
509\end{NB}
510\begin{NB}
511Multicast addresses are not shown by this command, see
512\verb|ip maddr ls| in~Sec.\ref{IP-MADDR} (p.\pageref{IP-MADDR} of this
513document).
514\end{NB}
515
516
517\paragraph{Statistics:} With the \verb|-statistics| option, \verb|ip| also
518prints interface statistics:
519
520\begin{verbatim}
521kuznet@alisa:~ $ ip -s link ls eth0
5223: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
523 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
524 RX: bytes packets errors dropped overrun mcast
525 2449949362 2786187 0 0 0 0
526 TX: bytes packets errors dropped carrier collsns
527 178558497 1783945 332 0 332 35172
528kuznet@alisa:~ $
529\end{verbatim}
530\verb|RX:| and \verb|TX:| lines summarize receiver and transmitter
531statistics. They contain:
532\begin{itemize}
533\item \verb|bytes| --- the total number of bytes received or transmitted
534on the interface. This number wraps when the maximal length of the data type
535natural for the architecture is exceeded, so continuous monitoring requires
536a user level daemon snapping it periodically.
537\item \verb|packets| --- the total number of packets received or transmitted
538on the interface.
539\item \verb|errors| --- the total number of receiver or transmitter errors.
540\item \verb|dropped| --- the total number of packets dropped due to lack
541of resources.
542\item \verb|overrun| --- the total number of receiver overruns resulting
543in dropped packets. As a rule, if the interface is overrun, it means
544serious problems in the kernel or that your machine is too slow
545for this interface.
546\item \verb|mcast| --- the total number of received multicast packets. This option
547is only supported by a few devices.
548\item \verb|carrier| --- total number of link media failures f.e.\ because
549of lost carrier.
550\item \verb|collsns| --- the total number of collision events
551on Ethernet-like media. This number may have a different sense on other
552link types.
553\item \verb|compressed| --- the total number of compressed packets. This is
554available only for links using VJ header compression.
555\end{itemize}
556
557
558If the \verb|-s| option is entered twice or more,
559\verb|ip| prints more detailed statistics on receiver
560and transmitter errors.
561
562\begin{verbatim}
563kuznet@alisa:~ $ ip -s -s link ls eth0
5643: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
565 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
566 RX: bytes packets errors dropped overrun mcast
567 2449949362 2786187 0 0 0 0
568 RX errors: length crc frame fifo missed
569 0 0 0 0 0
570 TX: bytes packets errors dropped carrier collsns
571 178558497 1783945 332 0 332 35172
572 TX errors: aborted fifo window heartbeat
573 0 0 0 332
574kuznet@alisa:~ $
575\end{verbatim}
576These error names are pure Ethernetisms. Other devices
577may have non zero values in these fields but they may be
578interpreted differently.
579
580
581\section{{\tt ip address} --- protocol address management}
582
583\paragraph{Abbreviations:} \verb|address|, \verb|addr|, \verb|a|.
584
585\paragraph{Object:} The \verb|address| is a protocol (IP or IPv6) address attached
586to a network device. Each device must have at least one address
587to use the corresponding protocol. It is possible to have several
588different addresses attached to one device. These addresses are not
589discriminated, so that the term {\em alias\/} is not quite appropriate
590for them and we do not use it in this document.
591
592The \verb|ip addr| command displays addresses and their properties,
593adds new addresses and deletes old ones.
594
595\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|flush| and \verb|show|
596(or \verb|list|).
597
598
599\subsection{{\tt ip address add} --- add a new protocol address}
600\label{IP-ADDR-ADD}
601
602\paragraph{Abbreviations:} \verb|add|, \verb|a|.
603
604\paragraph{Arguments:}
605
606\begin{itemize}
607\item \verb|dev NAME|
608
609\noindent--- the name of the device to add the address to.
610
611\item \verb|local ADDRESS| (default)
612
613--- the address of the interface. The format of the address depends
614on the protocol. It is a dotted quad for IP and a sequence of hexadecimal halfwords
615separated by colons for IPv6. The \verb|ADDRESS| may be followed by
616a slash and a decimal number which encodes the network prefix length.
617
618
619\item \verb|peer ADDRESS|
620
621--- the address of the remote endpoint for pointopoint interfaces.
622Again, the \verb|ADDRESS| may be followed by a slash and a decimal number,
623encoding the network prefix length. If a peer address is specified,
624the local address {\em cannot\/} have a prefix length. The network prefix is associated
625with the peer rather than with the local address.
626
627
628\item \verb|broadcast ADDRESS|
629
630--- the broadcast address on the interface.
631
632It is possible to use the special symbols \verb|'+'| and \verb|'-'|
633instead of the broadcast address. In this case, the broadcast address
634is derived by setting/resetting the host bits of the interface prefix.
635
636\vskip 1mm
637\begin{NB}
638Unlike \verb|ifconfig|, the \verb|ip| utility {\em does not\/} set any broadcast
639address unless explicitly requested.
640\end{NB}
641
642
643\item \verb|label NAME|
644
645--- Each address may be tagged with a label string.
646In order to preserve compatibility with Linux-2.0 net aliases,
647this string must coincide with the name of the device or must be prefixed
648with the device name followed by colon.
649
650
651\item \verb|scope SCOPE_VALUE|
652
653--- the scope of the area where this address is valid.
654The available scopes are listed in file \verb|/etc/iproute2/rt_scopes|.
655Predefined scope values are:
656
657 \begin{itemize}
658 \item \verb|global| --- the address is globally valid.
659 \item \verb|site| --- (IPv6 only) the address is site local,
660 i.e.\ it is valid inside this site.
661 \item \verb|link| --- the address is link local, i.e.\
662 it is valid only on this device.
663 \item \verb|host| --- the address is valid only inside this host.
664 \end{itemize}
665
666Appendix~\ref{ADDR-SEL} (p.\pageref{ADDR-SEL} of this document)
667contains more details on address scopes.
668
669\end{itemize}
670
671\paragraph{Examples:}
672\begin{itemize}
673\item \verb|ip addr add 127.0.0.1/8 dev lo brd + scope host|
674
675--- add the usual loopback address to the loopback device.
676
677\item \verb|ip addr add 10.0.0.1/24 brd + dev eth0 label eth0:Alias|
678
679--- add the address 10.0.0.1 with prefix length 24 (i.e.\ netmask
680\verb|255.255.255.0|), standard broadcast and label \verb|eth0:Alias|
681to the interface \verb|eth0|.
682\end{itemize}
683
684
685\subsection{{\tt ip address delete} --- delete a protocol address}
686
687\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
688
689\paragraph{Arguments:} coincide with the arguments of \verb|ip addr add|.
690The device name is a required argument. The rest are optional.
691If no arguments are given, the first address is deleted.
692
693\paragraph{Examples:}
694\begin{itemize}
695\item \verb|ip addr del 127.0.0.1/8 dev lo|
696
697--- deletes the loopback address from the loopback device.
698It would be best not to repeat this experiment.
699
700\item Disable IP on the interface \verb|eth0|:
701\begin{verbatim}
702 while ip -f inet addr del dev eth0; do
703 : nothing
704 done
705\end{verbatim}
706Another method to disable IP on an interface using {\tt ip addr flush}
707may be found in sec.\ref{IP-ADDR-FLUSH}, p.\pageref{IP-ADDR-FLUSH}.
708
709\end{itemize}
710
711
712\subsection{{\tt ip address show} --- display protocol addresses}
713
714\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|lst|, \verb|sh|, \verb|ls|,
715\verb|l|.
716
717\paragraph{Arguments:}
718
719\begin{itemize}
720\item \verb|dev NAME| (default)
721
722--- the name of the device.
723
724\item \verb|scope SCOPE_VAL|
725
726--- only list addresses with this scope.
727
728\item \verb|to PREFIX|
729
730--- only list addresses matching this prefix.
731
732\item \verb|label PATTERN|
733
734--- only list addresses with labels matching the \verb|PATTERN|.
735\verb|PATTERN| is a usual shell style pattern.
736
737
738\item \verb|dynamic| and \verb|permanent|
739
740--- (IPv6 only) only list addresses installed due to stateless
741address configuration or only list permanent (not dynamic) addresses.
742
743\item \verb|tentative|
744
745--- (IPv6 only) only list addresses which did not pass duplicate
746address detection.
747
748\item \verb|deprecated|
749
750--- (IPv6 only) only list deprecated addresses.
751
752
753\item \verb|primary| and \verb|secondary|
754
755--- only list primary (or secondary) addresses.
756
757\end{itemize}
758
759
760\paragraph{Output format:}
761
762\begin{verbatim}
763kuznet@alisa:~ $ ip addr ls eth0
7643: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc cbq qlen 100
765 link/ether 00:a0:cc:66:18:78 brd ff:ff:ff:ff:ff:ff
766 inet 193.233.7.90/24 brd 193.233.7.255 scope global eth0
767 inet6 3ffe:2400:0:1:2a0:ccff:fe66:1878/64 scope global dynamic
768 valid_lft forever preferred_lft 604746sec
769 inet6 fe80::2a0:ccff:fe66:1878/10 scope link
770kuznet@alisa:~ $
771\end{verbatim}
772
773The first two lines coincide with the output of \verb|ip link ls|.
774It is natural to interpret link layer addresses
775as addresses of the protocol family \verb|AF_PACKET|.
776
777Then the list of IP and IPv6 addresses follows, accompanied by
778additional address attributes: scope value (see Sec.\ref{IP-ADDR-ADD},
779p.\pageref{IP-ADDR-ADD} above), flags and the address label.
780
781Address flags are set by the kernel and cannot be changed
782administratively. Currently, the following flags are defined:
783
784\begin{enumerate}
785\item \verb|secondary|
786
787--- the address is not used when selecting the default source address
788of outgoing packets (Cf.\ Appendix~\ref{ADDR-SEL}, p.\pageref{ADDR-SEL}.).
789An IP address becomes secondary if another address with the same
790prefix bits already exists. The first address is primary.
791It is the leader of the group of all secondary addresses. When the leader
792is deleted, all secondaries are purged too.
Andreas Henriksson3a9e4822008-01-12 17:17:02 +0100793There is a tweak in \verb|/proc/sys/net/ipv4/conf/<dev>/promote_secondaries|
794which activate secondaries promotion when a primary is deleted.
795To permanently enable this feature on all devices add
796\verb|net.ipv4.conf.all.promote_secondaries=1| to \verb|/etc/sysctl.conf|.
797This tweak is available in linux 2.6.15 and later.
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +0000798
799
800\item \verb|dynamic|
801
802--- the address was created due to stateless autoconfiguration~\cite{RFC-ADDRCONF}.
803In this case the output also contains information on times, when
804the address is still valid. After \verb|preferred_lft| expires the address is
805moved to the deprecated state. After \verb|valid_lft| expires the address
806is finally invalidated.
807
808\item \verb|deprecated|
809
810--- the address is deprecated, i.e.\ it is still valid, but cannot
811be used by newly created connections.
812
813\item \verb|tentative|
814
815--- the address is not used because duplicate address detection~\cite{RFC-ADDRCONF}
816is still not complete or failed.
817
818\end{enumerate}
819
820
821\subsection{{\tt ip address flush} --- flush protocol addresses}
822\label{IP-ADDR-FLUSH}
823
824\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
825
826\paragraph{Description:}This command flushes the protocol addresses
827selected by some criteria.
828
829\paragraph{Arguments:} This command has the same arguments as \verb|show|.
830The difference is that it does not run when no arguments are given.
831
832\paragraph{Warning:} This command (and other \verb|flush| commands
833described below) is pretty dangerous. If you make a mistake, it will
834not forgive it, but will cruelly purge all the addresses.
835
836\paragraph{Statistics:} With the \verb|-statistics| option, the command
837becomes verbose. It prints out the number of deleted addresses and the number
838of rounds made to flush the address list. If this option is given
839twice, \verb|ip addr flush| also dumps all the deleted addresses
840in the format described in the previous subsection.
841
842\paragraph{Example:} Delete all the addresses from the private network
84310.0.0.0/8:
844\begin{verbatim}
845netadm@amber:~ # ip -s -s a f to 10/8
8462: dummy inet 10.7.7.7/16 brd 10.7.255.255 scope global dummy
8473: eth0 inet 10.10.7.7/16 brd 10.10.255.255 scope global eth0
8484: eth1 inet 10.8.7.7/16 brd 10.8.255.255 scope global eth1
849
850*** Round 1, deleting 3 addresses ***
851*** Flush is complete after 1 round ***
852netadm@amber:~ #
853\end{verbatim}
854Another instructive example is disabling IP on all the Ethernets:
855\begin{verbatim}
856netadm@amber:~ # ip -4 addr flush label "eth*"
857\end{verbatim}
858And the last example shows how to flush all the IPv6 addresses
859acquired by the host from stateless address autoconfiguration
860after you enabled forwarding or disabled autoconfiguration.
861\begin{verbatim}
862netadm@amber:~ # ip -6 addr flush dynamic
863\end{verbatim}
864
865
866
867\section{{\tt ip neighbour} --- neighbour/arp tables management}
868
869\paragraph{Abbreviations:} \verb|neighbour|, \verb|neighbor|, \verb|neigh|,
870\verb|n|.
871
872\paragraph{Object:} \verb|neighbour| objects establish bindings between protocol
873addresses and link layer addresses for hosts sharing the same link.
874Neighbour entries are organized into tables. The IPv4 neighbour table
875is known by another name --- the ARP table.
876
877The corresponding commands display neighbour bindings
878and their properties, add new neighbour entries and delete old ones.
879
880\paragraph{Commands:} \verb|add|, \verb|change|, \verb|replace|,
881\verb|delete|, \verb|flush| and \verb|show| (or \verb|list|).
882
883\paragraph{See also:} Appendix~\ref{PROXY-NEIGH}, p.\pageref{PROXY-NEIGH}
884describes how to manage proxy ARP/NDISC with the \verb|ip| utility.
885
886
887\subsection{{\tt ip neighbour add} --- add a new neighbour entry\\
888 {\tt ip neighbour change} --- change an existing entry\\
889 {\tt ip neighbour replace} --- add a new entry or change an existing one}
890
891\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
892\verb|replace|, \verb|repl|.
893
894\paragraph{Description:} These commands create new neighbour records
895or update existing ones.
896
897\paragraph{Arguments:}
898
899\begin{itemize}
900\item \verb|to ADDRESS| (default)
901
902--- the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
903
904\item \verb|dev NAME|
905
906--- the interface to which this neighbour is attached.
907
908
909\item \verb|lladdr LLADDRESS|
910
911--- the link layer address of the neighbour. \verb|LLADDRESS| can also be
912\verb|null|.
913
914\item \verb|nud NUD_STATE|
915
916--- the state of the neighbour entry. \verb|nud| is an abbreviation for ``Neighbour
917Unreachability Detection''. The state can take one of the following values:
918
919\begin{enumerate}
920\item \verb|permanent| --- the neighbour entry is valid forever and can be only be removed
921administratively.
922\item \verb|noarp| --- the neighbour entry is valid. No attempts to validate
923this entry will be made but it can be removed when its lifetime expires.
924\item \verb|reachable| --- the neighbour entry is valid until the reachability
925timeout expires.
926\item \verb|stale| --- the neighbour entry is valid but suspicious.
927This option to \verb|ip neigh| does not change the neighbour state if
928it was valid and the address is not changed by this command.
929\end{enumerate}
930
931\end{itemize}
932
933\paragraph{Examples:}
934\begin{itemize}
935\item \verb|ip neigh add 10.0.0.3 lladdr 0:0:0:0:0:1 dev eth0 nud perm|
936
937--- add a permanent ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
938
939\item \verb|ip neigh chg 10.0.0.3 dev eth0 nud reachable|
940
941--- change its state to \verb|reachable|.
942\end{itemize}
943
944
945\subsection{{\tt ip neighbour delete} --- delete a neighbour entry}
946
947\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
948
949\paragraph{Description:} This command invalidates a neighbour entry.
950
951\paragraph{Arguments:} The arguments are the same as with \verb|ip neigh add|,
952except that \verb|lladdr| and \verb|nud| are ignored.
953
954
955\paragraph{Example:}
956\begin{itemize}
957\item \verb|ip neigh del 10.0.0.3 dev eth0|
958
959--- invalidate an ARP entry for the neighbour 10.0.0.3 on the device \verb|eth0|.
960
961\end{itemize}
962
963\begin{NB}
964 The deleted neighbour entry will not disappear from the tables
965 immediately. If it is in use it cannot be deleted until the last
966 client releases it. Otherwise it will be destroyed during
967 the next garbage collection.
968\end{NB}
969
970
971\paragraph{Warning:} Attempts to delete or manually change
972a \verb|noarp| entry created by the kernel may result in unpredictable behaviour.
973Particularly, the kernel may try to resolve this address even
974on a \verb|NOARP| interface or if the address is multicast or broadcast.
975
976
977\subsection{{\tt ip neighbour show} --- list neighbour entries}
978
979\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|.
980
981\paragraph{Description:}This commands displays neighbour tables.
982
983\paragraph{Arguments:}
984
985\begin{itemize}
986
987\item \verb|to ADDRESS| (default)
988
989--- the prefix selecting the neighbours to list.
990
991\item \verb|dev NAME|
992
993--- only list the neighbours attached to this device.
994
995\item \verb|unused|
996
997--- only list neighbours which are not currently in use.
998
999\item \verb|nud NUD_STATE|
1000
1001--- only list neighbour entries in this state. \verb|NUD_STATE| takes
1002values listed below or the special value \verb|all| which means all states.
1003This option may occur more than once. If this option is absent, \verb|ip|
1004lists all entries except for \verb|none| and \verb|noarp|.
1005
1006\end{itemize}
1007
1008
1009\paragraph{Output format:}
1010
1011\begin{verbatim}
1012kuznet@alisa:~ $ ip neigh ls
1013:: dev lo lladdr 00:00:00:00:00:00 nud noarp
1014fe80::200:cff:fe76:3f85 dev eth0 lladdr 00:00:0c:76:3f:85 router \
1015 nud stale
10160.0.0.0 dev lo lladdr 00:00:00:00:00:00 nud noarp
1017193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 nud reachable
1018193.233.7.85 dev eth0 lladdr 00:e0:1e:63:39:00 nud stale
1019kuznet@alisa:~ $
1020\end{verbatim}
1021
1022The first word of each line is the protocol address of the neighbour.
1023Then the device name follows. The rest of the line describes the contents of
1024the neighbour entry identified by the pair (device, address).
1025
1026\verb|lladdr| is the link layer address of the neighbour.
1027
1028\verb|nud| is the state of the ``neighbour unreachability detection'' machine
1029for this entry. The detailed description of the neighbour
1030state machine can be found in~\cite{RFC-NDISC}. Here is the full list
1031of the states with short descriptions:
1032
1033\begin{enumerate}
1034\item\verb|none| --- the state of the neighbour is void.
1035\item\verb|incomplete| --- the neighbour is in the process of resolution.
1036\item\verb|reachable| --- the neighbour is valid and apparently reachable.
1037\item\verb|stale| --- the neighbour is valid, but is probably already
1038unreachable, so the kernel will try to check it at the first transmission.
1039\item\verb|delay| --- a packet has been sent to the stale neighbour and the kernel is waiting
1040for confirmation.
1041\item\verb|probe| --- the delay timer expired but no confirmation was received.
1042The kernel has started to probe the neighbour with ARP/NDISC messages.
1043\item\verb|failed| --- resolution has failed.
1044\item\verb|noarp| --- the neighbour is valid. No attempts to check the entry
1045will be made.
1046\item\verb|permanent| --- it is a \verb|noarp| entry, but only the administrator
1047may remove the entry from the neighbour table.
1048\end{enumerate}
1049
1050The link layer address is valid in all states except for \verb|none|,
1051\verb|failed| and \verb|incomplete|.
1052
1053IPv6 neighbours can be marked with the additional flag \verb|router|
1054which means that the neighbour introduced itself as an IPv6 router~\cite{RFC-NDISC}.
1055
1056\paragraph{Statistics:} The \verb|-statistics| option displays some usage
1057statistics, f.e.\
1058
1059\begin{verbatim}
1060kuznet@alisa:~ $ ip -s n ls 193.233.7.254
1061193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
1062 nud reachable
1063kuznet@alisa:~ $
1064\end{verbatim}
1065
1066Here \verb|ref| is the number of users of this entry
1067and \verb|used| is a triplet of time intervals in seconds
1068separated by slashes. In this case they show that:
1069
1070\begin{enumerate}
1071\item the entry was used 12 seconds ago.
1072\item the entry was confirmed 13 seconds ago.
1073\item the entry was updated 20 seconds ago.
1074\end{enumerate}
1075
1076\subsection{{\tt ip neighbour flush} --- flush neighbour entries}
1077
1078\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
1079
1080\paragraph{Description:}This command flushes neighbour tables, selecting
1081entries to flush by some criteria.
1082
1083\paragraph{Arguments:} This command has the same arguments as \verb|show|.
1084The differences are that it does not run when no arguments are given,
1085and that the default neighbour states to be flushed do not include
1086\verb|permanent| and \verb|noarp|.
1087
1088
1089\paragraph{Statistics:} With the \verb|-statistics| option, the command
1090becomes verbose. It prints out the number of deleted neighbours and the number
1091of rounds made to flush the neighbour table. If the option is given
1092twice, \verb|ip neigh flush| also dumps all the deleted neighbours
1093in the format described in the previous subsection.
1094
1095\paragraph{Example:}
1096\begin{verbatim}
1097netadm@alisa:~ # ip -s -s n f 193.233.7.254
1098193.233.7.254 dev eth0 lladdr 00:00:0c:76:3f:85 ref 5 used 12/13/20 \
1099 nud reachable
1100
1101*** Round 1, deleting 1 entries ***
1102*** Flush is complete after 1 round ***
1103netadm@alisa:~ #
1104\end{verbatim}
1105
1106
1107\section{{\tt ip route} --- routing table management}
1108\label{IP-ROUTE}
1109
1110\paragraph{Abbreviations:} \verb|route|, \verb|ro|, \verb|r|.
1111
1112\paragraph{Object:} \verb|route| entries in the kernel routing tables keep
1113information about paths to other networked nodes.
1114
1115Each route entry has a {\em key\/} consisting of a {\em prefix\/}
1116(i.e.\ a pair containing a network address and the length of its mask) and,
1117optionally, the TOS value. An IP packet matches the route if the highest
1118bits of its destination address are equal to the route prefix at least
1119up to the prefix length and if the TOS of the route is zero or equal to
1120the TOS of the packet.
1121
1122If several routes match the packet, the following pruning rules
1123are used to select the best one (see~\cite{RFC1812}):
1124\begin{enumerate}
1125\item The longest matching prefix is selected. All shorter ones
1126are dropped.
1127
1128\item If the TOS of some route with the longest prefix is equal to the TOS
1129of the packet, the routes with different TOS are dropped.
1130
1131If no exact TOS match was found and routes with TOS=0 exist,
1132the rest of routes are pruned.
1133
1134Otherwise, the route lookup fails.
1135
1136\item If several routes remain after the previous steps, then
1137the routes with the best preference values are selected.
1138
1139\item If we still have several routes, then the {\em first\/} of them
1140is selected.
1141
1142\begin{NB}
1143 Note the ambiguity of the last step. Unfortunately, Linux
1144 historically allows such a bizarre situation. The sense of the
1145word ``first'' depends on the order of route additions and it is practically
1146impossible to maintain a bundle of such routes in this order.
1147\end{NB}
1148
1149For simplicity we will limit ourselves to the case where such a situation
1150is impossible and routes are uniquely identified by the triplet
1151\{prefix, tos, preference\}. Actually, it is impossible to create
1152non-unique routes with \verb|ip| commands described in this section.
1153
1154One useful exception to this rule is the default route on non-forwarding
1155hosts. It is ``officially'' allowed to have several fallback routes
1156when several routers are present on directly connected networks.
1157In this case, Linux-2.2 makes ``dead gateway detection''~\cite{RFC1122}
1158controlled by neighbour unreachability detection and by advice
1159from transport protocols to select a working router, so the order
1160of the routes is not essential. However, in this case,
1161fiddling with default routes manually is not recommended. Use the Router Discovery
1162protocol (see Appendix~\ref{EXAMPLE-SETUP}, p.\pageref{EXAMPLE-SETUP})
1163instead. Actually, Linux-2.2 IPv6 does not give user level applications
1164any access to default routes.
1165\end{enumerate}
1166
1167Certainly, the steps above are not performed exactly
1168in this sequence. Instead, the routing table in the kernel is kept
1169in some data structure to achieve the final result
1170with minimal cost. However, not depending on a particular
1171routing algorithm implemented in the kernel, we can summarize
1172the statements above as: a route is identified by the triplet
1173\{prefix, tos, preference\}. This {\em key\/} lets us locate
1174the route in the routing table.
1175
1176\paragraph{Route attributes:} Each route key refers to a routing
1177information record containing
1178the data required to deliver IP packets (f.e.\ output device and
1179next hop router) and some optional attributes (f.e. the path MTU or
1180the preferred source address when communicating with this destination).
1181These attributes are described in the following subsection.
1182
1183\paragraph{Route types:} \label{IP-ROUTE-TYPES}
1184It is important that the set
1185of required and optional attributes depend on the route {\em type\/}.
1186The most important route type
1187is \verb|unicast|. It describes real paths to other hosts.
1188As a rule, common routing tables contain only such routes. However,
1189there are other types of routes with different semantics. The
1190full list of types understood by Linux-2.2 is:
1191\begin{itemize}
1192\item \verb|unicast| --- the route entry describes real paths to the
1193destinations covered by the route prefix.
1194\item \verb|unreachable| --- these destinations are unreachable. Packets
1195are discarded and the ICMP message {\em host unreachable\/} is generated.
1196The local senders get an \verb|EHOSTUNREACH| error.
1197\item \verb|blackhole| --- these destinations are unreachable. Packets
1198are discarded silently. The local senders get an \verb|EINVAL| error.
1199\item \verb|prohibit| --- these destinations are unreachable. Packets
1200are discarded and the ICMP message {\em communication administratively
1201prohibited\/} is generated. The local senders get an \verb|EACCES| error.
1202\item \verb|local| --- the destinations are assigned to this
1203host. The packets are looped back and delivered locally.
1204\item \verb|broadcast| --- the destinations are broadcast addresses.
1205The packets are sent as link broadcasts.
1206\item \verb|throw| --- a special control route used together with policy
1207rules (see sec.\ref{IP-RULE}, p.\pageref{IP-RULE}). If such a route is selected, lookup
1208in this table is terminated pretending that no route was found.
1209Without policy routing it is equivalent to the absence of the route in the routing
1210table. The packets are dropped and the ICMP message {\em net unreachable\/}
1211is generated. The local senders get an \verb|ENETUNREACH| error.
1212\item \verb|nat| --- a special NAT route. Destinations covered by the prefix
1213are considered to be dummy (or external) addresses which require translation
1214to real (or internal) ones before forwarding. The addresses to translate to
1215are selected with the attribute \verb|via|. More about NAT is
1216in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
1217\item \verb|anycast| --- ({\em not implemented\/}) the destinations are
1218{\em anycast\/} addresses assigned to this host. They are mainly equivalent
1219to \verb|local| with one difference: such addresses are invalid when used
1220as the source address of any packet.
1221\item \verb|multicast| --- a special type used for multicast routing.
1222It is not present in normal routing tables.
1223\end{itemize}
1224
1225\paragraph{Route tables:} Linux-2.2 can pack routes into several routing
1226tables identified by a number in the range from 1 to 255 or by
1227name from the file \verb|/etc/iproute2/rt_tables|. By default all normal
1228routes are inserted into the \verb|main| table (ID 254) and the kernel only uses
1229this table when calculating routes.
1230
1231Actually, one other table always exists, which is invisible but
1232even more important. It is the \verb|local| table (ID 255). This table
1233consists of routes for local and broadcast addresses. The kernel maintains
1234this table automatically and the administrator usually need not modify it
1235or even look at it.
1236
1237The multiple routing tables enter the game when {\em policy routing\/}
1238is used. See sec.\ref{IP-RULE}, p.\pageref{IP-RULE}.
1239In this case, the table identifier effectively becomes
1240one more parameter, which should be added to the triplet
1241\{prefix, tos, preference\} to uniquely identify the route.
1242
1243
1244\subsection{{\tt ip route add} --- add a new route\\
1245 {\tt ip route change} --- change a route\\
1246 {\tt ip route replace} --- change a route or add a new one}
1247\label{IP-ROUTE-ADD}
1248
1249\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
1250 \verb|replace|, \verb|repl|.
1251
1252
1253\paragraph{Arguments:}
1254\begin{itemize}
1255\item \verb|to PREFIX| or \verb|to TYPE PREFIX| (default)
1256
1257--- the destination prefix of the route. If \verb|TYPE| is omitted,
1258\verb|ip| assumes type \verb|unicast|. Other values of \verb|TYPE|
1259are listed above. \verb|PREFIX| is an IP or IPv6 address optionally followed
1260by a slash and the prefix length. If the length of the prefix is missing,
1261\verb|ip| assumes a full-length host route. There is also a special
1262\verb|PREFIX| --- \verb|default| --- which is equivalent to IP \verb|0/0| or
1263to IPv6 \verb|::/0|.
1264
1265\item \verb|tos TOS| or \verb|dsfield TOS|
1266
1267--- the Type Of Service (TOS) key. This key has no associated mask and
1268the longest match is understood as: First, compare the TOS
1269of the route and of the packet. If they are not equal, then the packet
1270may still match a route with a zero TOS. \verb|TOS| is either an 8 bit hexadecimal
1271number or an identifier from {\tt /etc/iproute2/rt\_dsfield}.
1272
1273
1274\item \verb|metric NUMBER| or \verb|preference NUMBER|
1275
1276--- the preference value of the route. \verb|NUMBER| is an arbitrary 32bit number.
1277
1278\item \verb|table TABLEID|
1279
1280--- the table to add this route to.
1281\verb|TABLEID| may be a number or a string from the file
1282\verb|/etc/iproute2/rt_tables|. If this parameter is omitted,
1283\verb|ip| assumes the \verb|main| table, with the exception of
1284\verb|local|, \verb|broadcast| and \verb|nat| routes, which are
1285put into the \verb|local| table by default.
1286
1287\item \verb|dev NAME|
1288
1289--- the output device name.
1290
1291\item \verb|via ADDRESS|
1292
1293--- the address of the nexthop router. Actually, the sense of this field depends
1294on the route type. For normal \verb|unicast| routes it is either the true nexthop
1295router or, if it is a direct route installed in BSD compatibility mode,
1296it can be a local address of the interface.
1297For NAT routes it is the first address of the block of translated IP destinations.
1298
1299\item \verb|src ADDRESS|
1300
1301--- the source address to prefer when sending to the destinations
1302covered by the route prefix.
1303
1304\item \verb|realm REALMID|
1305
1306--- the realm to which this route is assigned.
1307\verb|REALMID| may be a number or a string from the file
1308\verb|/etc/iproute2/rt_realms|. Sec.\ref{RT-REALMS} (p.\pageref{RT-REALMS})
1309contains more information on realms.
1310
1311\item \verb|mtu MTU| or \verb|mtu lock MTU|
1312
1313--- the MTU along the path to the destination. If the modifier \verb|lock| is
1314not used, the MTU may be updated by the kernel due to Path MTU Discovery.
1315If the modifier \verb|lock| is used, no path MTU discovery will be tried,
1316all packets will be sent without the DF bit in IPv4 case
1317or fragmented to MTU for IPv6.
1318
1319\item \verb|window NUMBER|
1320
1321--- the maximal window for TCP to advertise to these destinations,
1322measured in bytes. It limits maximal data bursts that our TCP
1323peers are allowed to send to us.
1324
1325\item \verb|rtt NUMBER|
1326
1327--- the initial RTT (``Round Trip Time'') estimate.
1328
1329
1330\item \verb|rttvar NUMBER|
1331
1332--- \threeonly the initial RTT variance estimate.
1333
1334
1335\item \verb|ssthresh NUMBER|
1336
1337--- \threeonly an estimate for the initial slow start threshold.
1338
1339
1340\item \verb|cwnd NUMBER|
1341
1342--- \threeonly the clamp for congestion window. It is ignored if the \verb|lock|
1343 flag is not used.
1344
1345
1346\item \verb|advmss NUMBER|
1347
1348--- \threeonly the MSS (``Maximal Segment Size'') to advertise to these
1349 destinations when establishing TCP connections. If it is not given,
1350 Linux uses a default value calculated from the first hop device MTU.
1351
1352\begin{NB}
1353 If the path to these destination is asymmetric, this guess may be wrong.
1354\end{NB}
1355
1356\item \verb|reordering NUMBER|
1357
1358--- \threeonly Maximal reordering on the path to this destination.
1359 If it is not given, Linux uses the value selected with \verb|sysctl|
1360 variable \verb|net/ipv4/tcp_reordering|.
1361
Gilad Ben-Yossef71e58152009-10-06 15:40:34 +02001362\item \verb|hoplimit NUMBER|
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00001363
Gilad Ben-Yossef71e58152009-10-06 15:40:34 +02001364--- [2.5.74+ only] Maximum number of hops on the path to this destination.
1365 The default is the value selected with the \verb|sysctl| variable
1366 \verb|net/ipv4/ip_default_ttl|.
1367
1368\item \verb|initcwnd NUMBER|
1369--- [2.5.70+ only] Initial congestion window size for connections to
1370 this destination. Actual window size is this value multiplied by the
1371 MSS (``Maximal Segment Size'') for same connection. The default is
1372 zero, meaning to use the values specified in~\cite{RFC2414}.
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00001373
laurent chaveyf5fd8002009-12-15 13:05:15 +00001374+\item \verb|initrwnd NUMBER|
1375
1376+--- [2.6.33+ only] Initial receive window size for connections to
1377+ this destination. The actual window size is this value multiplied
1378+ by the MSS (''Maximal Segment Size'') of the connection. The default
1379+ value is zero, meaning to use Slow Start value.
1380
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00001381\item \verb|nexthop NEXTHOP|
1382
1383--- the nexthop of a multipath route. \verb|NEXTHOP| is a complex value
1384with its own syntax similar to the top level argument lists:
1385\begin{itemize}
1386\item \verb|via ADDRESS| is the nexthop router.
1387\item \verb|dev NAME| is the output device.
1388\item \verb|weight NUMBER| is a weight for this element of a multipath
1389route reflecting its relative bandwidth or quality.
1390\end{itemize}
1391
1392\item \verb|scope SCOPE_VAL|
1393
1394--- the scope of the destinations covered by the route prefix.
1395\verb|SCOPE_VAL| may be a number or a string from the file
1396\verb|/etc/iproute2/rt_scopes|.
1397If this parameter is omitted,
1398\verb|ip| assumes scope \verb|global| for all gatewayed \verb|unicast|
1399routes, scope \verb|link| for direct \verb|unicast| and \verb|broadcast| routes
1400and scope \verb|host| for \verb|local| routes.
1401
1402\item \verb|protocol RTPROTO|
1403
1404--- the routing protocol identifier of this route.
1405\verb|RTPROTO| may be a number or a string from the file
1406\verb|/etc/iproute2/rt_protos|. If the routing protocol ID is
1407not given, \verb|ip| assumes protocol \verb|boot| (i.e.\
1408it assumes the route was added by someone who doesn't
1409understand what they are doing). Several protocol values have a fixed interpretation.
1410Namely:
1411\begin{itemize}
1412\item \verb|redirect| --- the route was installed due to an ICMP redirect.
1413\item \verb|kernel| --- the route was installed by the kernel during
1414autoconfiguration.
1415\item \verb|boot| --- the route was installed during the bootup sequence.
1416If a routing daemon starts, it will purge all of them.
1417\item \verb|static| --- the route was installed by the administrator
1418to override dynamic routing. Routing daemon will respect them
1419and, probably, even advertise them to its peers.
1420\item \verb|ra| --- the route was installed by Router Discovery protocol.
1421\end{itemize}
1422The rest of the values are not reserved and the administrator is free
1423to assign (or not to assign) protocol tags. At least, routing
1424daemons should take care of setting some unique protocol values,
1425f.e.\ as they are assigned in \verb|rtnetlink.h| or in \verb|rt_protos|
1426database.
1427
1428
1429\item \verb|onlink|
1430
1431--- pretend that the nexthop is directly attached to this link,
1432even if it does not match any interface prefix. One application of this
1433option may be found in~\cite{IP-TUNNELS}.
1434
Lubomir Rintel194e9b82015-03-16 16:01:47 +01001435\item \verb|pref PREF|
1436
1437--- the IPv6 route preference.
1438\verb|PREF| PREF is a string specifying the route preference as defined in
1439RFC4191 for Router Discovery messages. Namely:
1440\begin{itemize}
1441\item \verb|low| --- the route has a lowest priority.
1442\item \verb|medium| --- the route has a default priority.
1443\item \verb|high| --- the route has a highest priority.
1444\end{itemize}
1445
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00001446\end{itemize}
1447
1448
1449\begin{NB}
1450 Actually there are more commands: \verb|prepend| does the same
1451 thing as classic \verb|route add|, i.e.\ adds a route, even if another
1452 route to the same destination exists. Its opposite case is \verb|append|,
1453 which adds the route to the end of the list. Avoid these
1454 features.
1455\end{NB}
1456\begin{NB}
1457 More sad news, IPv6 only understands the \verb|append| command correctly.
1458 All the others are translated into \verb|append| commands. Certainly,
1459 this will change in the future.
1460\end{NB}
1461
1462\paragraph{Examples:}
1463\begin{itemize}
1464\item add a plain route to network 10.0.0/24 via gateway 193.233.7.65
1465\begin{verbatim}
1466 ip route add 10.0.0/24 via 193.233.7.65
1467\end{verbatim}
1468\item change it to a direct route via the \verb|dummy| device
1469\begin{verbatim}
1470 ip ro chg 10.0.0/24 dev dummy
1471\end{verbatim}
1472\item add a default multipath route splitting the load between \verb|ppp0|
1473and \verb|ppp1|
1474\begin{verbatim}
1475 ip route add default scope global nexthop dev ppp0 \
1476 nexthop dev ppp1
1477\end{verbatim}
1478Note the scope value. It is not necessary but it informs the kernel
1479that this route is gatewayed rather than direct. Actually, if you
1480know the addresses of remote endpoints it would be better to use the
1481\verb|via| parameter.
1482\item announce that the address 192.203.80.144 is not a real one, but
1483should be translated to 193.233.7.83 before forwarding
1484\begin{verbatim}
1485 ip route add nat 192.203.80.144 via 193.233.7.83
1486\end{verbatim}
1487Backward translation is setup with policy rules described
1488in the following section (sec.\ref{IP-RULE}, p.\pageref{IP-RULE}).
1489\end{itemize}
1490
1491\subsection{{\tt ip route delete} --- delete a route}
1492
1493\paragraph{Abbreviations:} \verb|delete|, \verb|del|, \verb|d|.
1494
1495\paragraph{Arguments:} \verb|ip route del| has the same arguments as
1496\verb|ip route add|, but their semantics are a bit different.
1497
1498Key values (\verb|to|, \verb|tos|, \verb|preference| and \verb|table|)
1499select the route to delete. If optional attributes are present, \verb|ip|
1500verifies that they coincide with the attributes of the route to delete.
1501If no route with the given key and attributes was found, \verb|ip route del|
1502fails.
1503\begin{NB}
1504Linux-2.0 had the option to delete a route selected only by prefix address,
1505ignoring its length (i.e.\ netmask). This option no longer exists
1506because it was ambiguous. However, look at {\tt ip route flush}
1507(sec.\ref{IP-ROUTE-FLUSH}, p.\pageref{IP-ROUTE-FLUSH}) which
1508provides similar and even richer functionality.
1509\end{NB}
1510
1511\paragraph{Example:}
1512\begin{itemize}
1513\item delete the multipath route created by the command in previous subsection
1514\begin{verbatim}
1515 ip route del default scope global nexthop dev ppp0 \
1516 nexthop dev ppp1
1517\end{verbatim}
1518\end{itemize}
1519
1520
1521
1522\subsection{{\tt ip route show} --- list routes}
1523
1524\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
1525
1526\paragraph{Description:} the command displays the contents of the routing tables
1527or the route(s) selected by some criteria.
1528
1529
1530\paragraph{Arguments:}
1531\begin{itemize}
1532\item \verb|to SELECTOR| (default)
1533
1534--- only select routes from the given range of destinations. \verb|SELECTOR|
1535consists of an optional modifier (\verb|root|, \verb|match| or \verb|exact|)
1536and a prefix. \verb|root PREFIX| selects routes with prefixes not shorter
1537than \verb|PREFIX|. F.e.\ \verb|root 0/0| selects the entire routing table.
1538\verb|match PREFIX| selects routes with prefixes not longer than
1539\verb|PREFIX|. F.e.\ \verb|match 10.0/16| selects \verb|10.0/16|,
1540\verb|10/8| and \verb|0/0|, but it does not select \verb|10.1/16| and
1541\verb|10.0.0/24|. And \verb|exact PREFIX| (or just \verb|PREFIX|)
1542selects routes with this exact prefix. If neither of these options
1543are present, \verb|ip| assumes \verb|root 0/0| i.e.\ it lists the entire table.
1544
1545
1546\item \verb|tos TOS| or \verb|dsfield TOS|
1547
1548 --- only select routes with the given TOS.
1549
1550
1551\item \verb|table TABLEID|
1552
1553 --- show the routes from this table(s). The default setting is to show
1554\verb|table| \verb|main|. \verb|TABLEID| may either be the ID of a real table
1555or one of the special values:
1556 \begin{itemize}
1557 \item \verb|all| --- list all of the tables.
1558 \item \verb|cache| --- dump the routing cache.
1559 \end{itemize}
1560\begin{NB}
1561 IPv6 has a single table. However, splitting it into \verb|main|, \verb|local|
1562 and \verb|cache| is emulated by the \verb|ip| utility.
1563\end{NB}
1564
1565\item \verb|cloned| or \verb|cached|
1566
1567--- list cloned routes i.e.\ routes which were dynamically forked from
1568other routes because some route attribute (f.e.\ MTU) was updated.
1569Actually, it is equivalent to \verb|table cache|.
1570
1571\item \verb|from SELECTOR|
1572
1573--- the same syntax as for \verb|to|, but it binds the source address range
1574rather than destinations. Note that the \verb|from| option only works with
1575cloned routes.
1576
1577\item \verb|protocol RTPROTO|
1578
1579--- only list routes of this protocol.
1580
1581
1582\item \verb|scope SCOPE_VAL|
1583
1584--- only list routes with this scope.
1585
1586\item \verb|type TYPE|
1587
1588--- only list routes of this type.
1589
1590\item \verb|dev NAME|
1591
1592--- only list routes going via this device.
1593
1594\item \verb|via PREFIX|
1595
1596--- only list routes going via the nexthop routers selected by \verb|PREFIX|.
1597
1598\item \verb|src PREFIX|
1599
1600--- only list routes with preferred source addresses selected
1601by \verb|PREFIX|.
1602
1603\item \verb|realm REALMID| or \verb|realms FROMREALM/TOREALM|
1604
1605--- only list routes with these realms.
1606
1607\end{itemize}
1608
1609\paragraph{Examples:} Let us count routes of protocol \verb|gated/bgp|
1610on a router:
1611\begin{verbatim}
1612kuznet@amber:~ $ ip ro ls proto gated/bgp | wc
1613 1413 9891 79010
1614kuznet@amber:~ $
1615\end{verbatim}
1616To count the size of the routing cache, we have to use the \verb|-o| option
1617because cached attributes can take more than one line of output:
1618\begin{verbatim}
1619kuznet@amber:~ $ ip -o ro ls cloned | wc
1620 159 2543 18707
1621kuznet@amber:~ $
1622\end{verbatim}
1623
1624
1625\paragraph{Output format:} The output of this command consists
1626of per route records separated by line feeds.
1627However, some records may consist
1628of more than one line: particularly, this is the case when the route
1629is cloned or you requested additional statistics. If the
1630\verb|-o| option was given, then line feeds separating lines inside
1631records are replaced with the backslash sign.
1632
1633The output has the same syntax as arguments given to {\tt ip route add},
1634so that it can be understood easily. F.e.\
1635\begin{verbatim}
1636kuznet@amber:~ $ ip ro ls 193.233.7/24
1637193.233.7.0/24 dev eth0 proto gated/conn scope link \
1638 src 193.233.7.65 realms inr.ac
1639kuznet@amber:~ $
1640\end{verbatim}
1641
1642If you list cloned entries, the output contains other attributes which
1643are evaluated during route calculation and updated during route
1644lifetime. An example of the output is:
1645\begin{verbatim}
1646kuznet@amber:~ $ ip ro ls 193.233.7.82 tab cache
1647193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
1648 realms inr.ac/inr.ac
1649 cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
1650193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
1651 cache mtu 1500 rtt 300
1652kuznet@amber:~ $
1653\end{verbatim}
1654\begin{NB}
1655 \label{NB-strange-route}
1656 The route looks a bit strange, doesn't it? Did you notice that
1657 it is a path from 193.233.7.82 back to 193.233.82? Well, you will
1658 see in the section on \verb|ip route get| (p.\pageref{NB-nature-of-strangeness})
1659 how it appeared.
1660\end{NB}
1661The second line, starting with the word \verb|cache|, shows
1662additional attributes which normal routes do not possess.
1663Cached flags are summarized in angle brackets:
1664\begin{itemize}
1665\item \verb|local| --- packets are delivered locally.
1666It stands for loopback unicast routes, for broadcast routes
1667and for multicast routes, if this host is a member of the corresponding
1668group.
1669
1670\item \verb|reject| --- the path is bad. Any attempt to use it results
1671in an error. See attribute \verb|error| below (p.\pageref{IP-ROUTE-GET-error}).
1672
1673\item \verb|mc| --- the destination is multicast.
1674
1675\item \verb|brd| --- the destination is broadcast.
1676
1677\item \verb|src-direct| --- the source is on a directly connected
1678interface.
1679
1680\item \verb|redirected| --- the route was created by an ICMP Redirect.
1681
1682\item \verb|redirect| --- packets going via this route will
1683trigger an ICMP redirect.
1684
1685\item \verb|fastroute| --- the route is eligible to be used for fastroute.
1686
1687\item \verb|equalize| --- make packet by packet randomization
1688along this path.
1689
1690\item \verb|dst-nat| --- the destination address requires translation.
1691
1692\item \verb|src-nat| --- the source address requires translation.
1693
1694\item \verb|masq| --- the source address requires masquerading.
1695This feature disappeared in linux-2.4.
1696
1697\item \verb|notify| --- ({\em not implemented}) change/deletion
1698of this route will trigger RTNETLINK notification.
1699\end{itemize}
1700
1701Then some optional attributes follow:
1702\begin{itemize}
1703\item \verb|error| --- on \verb|reject| routes it is error code
1704returned to local senders when they try to use this route.
1705These error codes are translated into ICMP error codes, sent to remote
1706senders, according to the rules described above in the subsection
1707devoted to route types (p.\pageref{IP-ROUTE-TYPES}).
1708\label{IP-ROUTE-GET-error}
1709
1710\item \verb|expires| --- this entry will expire after this timeout.
1711
1712\item \verb|iif| --- the packets for this path are expected to arrive
1713on this interface.
1714\end{itemize}
1715
1716\paragraph{Statistics:} With the \verb|-statistics| option, more
1717information about this route is shown:
1718\begin{itemize}
1719\item \verb|users| --- the number of users of this entry.
1720\item \verb|age| --- shows when this route was last used.
1721\item \verb|used| --- the number of lookups of this route since its creation.
1722\end{itemize}
1723
Dan Smithf4ff11e2010-12-01 11:24:58 -08001724\subsection{{\tt ip route save} -- save routing tables}
1725\label{IP-ROUTE-SAVE}
1726
1727\paragraph{Description:} this command saves the contents of the routing
1728tables or the route(s) selected by some criteria to standard output.
1729
1730\paragraph{Arguments:} \verb|ip route save| has the same arguments as
1731\verb|ip route show|.
1732
1733\paragraph{Example:} This saves all the routes to the {\tt saved\_routes}
1734file:
1735\begin{verbatim}
1736dan@caffeine:~ # ip route save > saved_routes
1737\end{verbatim}
1738
1739\paragraph{Output format:} The format of the data stream provided by
1740\verb|ip route save| is that of \verb|rtnetlink|. See
1741\verb|rtnetlink(7)| for more information.
1742
1743\subsection{{\tt ip route restore} -- restore routing tables}
1744\label{IP-ROUTE-RESTORE}
1745
1746\paragraph{Description:} this command restores the contents of the routing
1747tables according to a data stream as provided by \verb|ip route save| via
1748standard input. Note that any routes already in the table are left unchanged.
1749Any routes in the input stream that already exist in the tables are ignored.
1750
1751\paragraph{Arguments:} This command takes no arguments.
1752
1753\paragraph{Example:} This restores all routes that were saved to the
1754{\tt saved\_routes} file:
1755
1756\begin{verbatim}
1757dan@caffeine:~ # ip route restore < saved_routes
1758\end{verbatim}
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00001759
1760\subsection{{\tt ip route flush} --- flush routing tables}
1761\label{IP-ROUTE-FLUSH}
1762
1763\paragraph{Abbreviations:} \verb|flush|, \verb|f|.
1764
1765\paragraph{Description:} this command flushes routes selected
1766by some criteria.
1767
1768\paragraph{Arguments:} the arguments have the same syntax and semantics
1769as the arguments of \verb|ip route show|, but routing tables are not
1770listed but purged. The only difference is the default action: \verb|show|
1771dumps all the IP main routing table but \verb|flush| prints the helper page.
1772The reason for this difference does not require any explanation, does it?
1773
1774
1775\paragraph{Statistics:} With the \verb|-statistics| option, the command
1776becomes verbose. It prints out the number of deleted routes and the number
1777of rounds made to flush the routing table. If the option is given
1778twice, \verb|ip route flush| also dumps all the deleted routes
1779in the format described in the previous subsection.
1780
1781\paragraph{Examples:} The first example flushes all the
1782gatewayed routes from the main table (f.e.\ after a routing daemon crash).
1783\begin{verbatim}
1784netadm@amber:~ # ip -4 ro flush scope global type unicast
1785\end{verbatim}
1786This option deserves to be put into a scriptlet \verb|routef|.
1787\begin{NB}
1788This option was described in the \verb|route(8)| man page borrowed
1789from BSD, but was never implemented in Linux.
1790\end{NB}
1791
1792The second example flushes all IPv6 cloned routes:
1793\begin{verbatim}
1794netadm@amber:~ # ip -6 -s -s ro flush cache
17953ffe:2400::220:afff:fef4:c5d1 via 3ffe:2400::220:afff:fef4:c5d1 \
1796 dev eth0 metric 0
1797 cache used 2 age 12sec mtu 1500 rtt 300
17983ffe:2400::280:adff:feb7:8034 via 3ffe:2400::280:adff:feb7:8034 \
1799 dev eth0 metric 0
1800 cache used 2 age 15sec mtu 1500 rtt 300
18013ffe:2400::280:c8ff:fe59:5bcc via 3ffe:2400::280:c8ff:fe59:5bcc \
1802 dev eth0 metric 0
1803 cache users 1 used 1 age 23sec mtu 1500 rtt 300
18043ffe:2400:0:1:2a0:ccff:fe66:1878 via 3ffe:2400:0:1:2a0:ccff:fe66:1878 \
1805 dev eth1 metric 0
1806 cache used 2 age 20sec mtu 1500 rtt 300
18073ffe:2400:0:1:a00:20ff:fe71:fb30 via 3ffe:2400:0:1:a00:20ff:fe71:fb30 \
1808 dev eth1 metric 0
1809 cache used 2 age 33sec mtu 1500 rtt 300
1810ff02::1 via ff02::1 dev eth1 metric 0
1811 cache users 1 used 1 age 45sec mtu 1500 rtt 300
1812
1813*** Round 1, deleting 6 entries ***
1814*** Flush is complete after 1 round ***
1815netadm@amber:~ # ip -6 -s -s ro flush cache
1816Nothing to flush.
1817netadm@amber:~ #
1818\end{verbatim}
1819
1820The third example flushes BGP routing tables after a \verb|gated|
1821death.
1822\begin{verbatim}
1823netadm@amber:~ # ip ro ls proto gated/bgp | wc
1824 1408 9856 78730
1825netadm@amber:~ # ip -s ro f proto gated/bgp
1826
1827*** Round 1, deleting 1408 entries ***
1828*** Flush is complete after 1 round ***
1829netadm@amber:~ # ip ro f proto gated/bgp
1830Nothing to flush.
1831netadm@amber:~ # ip ro ls proto gated/bgp
1832netadm@amber:~ #
1833\end{verbatim}
1834
1835
1836\subsection{{\tt ip route get} --- get a single route}
1837\label{IP-ROUTE-GET}
1838
1839\paragraph{Abbreviations:} \verb|get|, \verb|g|.
1840
1841\paragraph{Description:} this command gets a single route to a destination
1842and prints its contents exactly as the kernel sees it.
1843
1844\paragraph{Arguments:}
1845\begin{itemize}
1846\item \verb|to ADDRESS| (default)
1847
1848--- the destination address.
1849
1850\item \verb|from ADDRESS|
1851
1852--- the source address.
1853
1854\item \verb|tos TOS| or \verb|dsfield TOS|
1855
1856--- the Type Of Service.
1857
1858\item \verb|iif NAME|
1859
1860--- the device from which this packet is expected to arrive.
1861
1862\item \verb|oif NAME|
1863
1864--- force the output device on which this packet will be routed.
1865
1866\item \verb|connected|
1867
1868--- if no source address (option \verb|from|) was given, relookup
1869the route with the source set to the preferred address received from the first lookup.
1870If policy routing is used, it may be a different route.
1871
1872\end{itemize}
1873
1874Note that this operation is not equivalent to \verb|ip route show|.
1875\verb|show| shows existing routes. \verb|get| resolves them and
1876creates new clones if necessary. Essentially, \verb|get|
1877is equivalent to sending a packet along this path.
1878If the \verb|iif| argument is not given, the kernel creates a route
1879to output packets towards the requested destination.
1880This is equivalent to pinging the destination
1881with a subsequent {\tt ip route ls cache}, however, no packets are
1882actually sent. With the \verb|iif| argument, the kernel pretends
1883that a packet arrived from this interface and searches for
1884a path to forward the packet.
1885
1886\paragraph{Output format:} This command outputs routes in the same
1887format as \verb|ip route ls|.
1888
1889\paragraph{Examples:}
1890\begin{itemize}
1891\item Find a route to output packets to 193.233.7.82:
1892\begin{verbatim}
1893kuznet@amber:~ $ ip route get 193.233.7.82
1894193.233.7.82 dev eth0 src 193.233.7.65 realms inr.ac
1895 cache mtu 1500 rtt 300
1896kuznet@amber:~ $
1897\end{verbatim}
1898
1899\item Find a route to forward packets arriving on \verb|eth0|
1900from 193.233.7.82 and destined for 193.233.7.82:
1901\begin{verbatim}
1902kuznet@amber:~ $ ip r g 193.233.7.82 from 193.233.7.82 iif eth0
1903193.233.7.82 from 193.233.7.82 dev eth0 src 193.233.7.65 \
1904 realms inr.ac/inr.ac
1905 cache <src-direct,redirect> mtu 1500 rtt 300 iif eth0
1906kuznet@amber:~ $
1907\end{verbatim}
1908\begin{NB}
1909 \label{NB-nature-of-strangeness}
1910 This is the command that created the funny route from 193.233.7.82
1911 looped back to 193.233.7.82 (cf.\ NB on~p.\pageref{NB-strange-route}).
1912 Note the \verb|redirect| flag on it.
1913\end{NB}
1914
1915\item Find a multicast route for packets arriving on \verb|eth0|
1916from host 193.233.7.82 and destined for multicast group 224.2.127.254
1917(it is assumed that a multicast routing daemon is running.
1918In this case, it is \verb|pimd|)
1919\begin{verbatim}
1920kuznet@amber:~ $ ip r g 224.2.127.254 from 193.233.7.82 iif eth0
1921multicast 224.2.127.254 from 193.233.7.82 dev lo \
1922 src 193.233.7.65 realms inr.ac/cosmos
1923 cache <mc> iif eth0 Oifs: eth1 pimreg
1924kuznet@amber:~ $
1925\end{verbatim}
1926This route differs from the ones seen before. It contains a ``normal'' part
1927and a ``multicast'' part. The normal part is used to deliver (or not to
1928deliver) the packet to local IP listeners. In this case the router
1929is not a member
1930of this group, so that route has no \verb|local| flag and only
1931forwards packets. The output device for such entries is always loopback.
1932The multicast part consists of an additional \verb|Oifs:| list showing
1933the output interfaces.
1934\end{itemize}
1935
1936
1937It is time for a more complicated example. Let us add an invalid
1938gatewayed route for a destination which is really directly connected:
1939\begin{verbatim}
1940netadm@alisa:~ # ip route add 193.233.7.98 via 193.233.7.254
1941netadm@alisa:~ # ip route get 193.233.7.98
1942193.233.7.98 via 193.233.7.254 dev eth0 src 193.233.7.90
1943 cache mtu 1500 rtt 3072
1944netadm@alisa:~ #
1945\end{verbatim}
1946and probe it with ping:
1947\begin{verbatim}
1948netadm@alisa:~ # ping -n 193.233.7.98
1949PING 193.233.7.98 (193.233.7.98) from 193.233.7.90 : 56 data bytes
1950From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
195164 bytes from 193.233.7.98: icmp_seq=0 ttl=255 time=3.5 ms
1952From 193.233.7.254: Redirect Host(New nexthop: 193.233.7.98)
195364 bytes from 193.233.7.98: icmp_seq=1 ttl=255 time=2.2 ms
195464 bytes from 193.233.7.98: icmp_seq=2 ttl=255 time=0.4 ms
195564 bytes from 193.233.7.98: icmp_seq=3 ttl=255 time=0.4 ms
195664 bytes from 193.233.7.98: icmp_seq=4 ttl=255 time=0.4 ms
1957^C
1958--- 193.233.7.98 ping statistics ---
19595 packets transmitted, 5 packets received, 0% packet loss
1960round-trip min/avg/max = 0.4/1.3/3.5 ms
1961netadm@alisa:~ #
1962\end{verbatim}
1963What happened? Router 193.233.7.254 understood that we have a much
1964better path to the destination and sent us an ICMP redirect message.
1965We may retry \verb|ip route get| to see what we have in the routing
1966tables now:
1967\begin{verbatim}
1968netadm@alisa:~ # ip route get 193.233.7.98
1969193.233.7.98 dev eth0 src 193.233.7.90
1970 cache <redirected> mtu 1500 rtt 3072
1971netadm@alisa:~ #
1972\end{verbatim}
1973
1974
1975
1976\section{{\tt ip rule} --- routing policy database management}
1977\label{IP-RULE}
1978
1979\paragraph{Abbreviations:} \verb|rule|, \verb|ru|.
1980
1981\paragraph{Object:} \verb|rule|s in the routing policy database control
1982the route selection algorithm.
1983
1984Classic routing algorithms used in the Internet make routing decisions
1985based only on the destination address of packets (and in theory,
1986but not in practice, on the TOS field). The seminal review of classic
1987routing algorithms and their modifications can be found in~\cite{RFC1812}.
1988
1989In some circumstances we want to route packets differently depending not only
1990on destination addresses, but also on other packet fields: source address,
1991IP protocol, transport protocol ports or even packet payload.
1992This task is called ``policy routing''.
1993
1994\begin{NB}
1995 ``policy routing'' $\neq$ ``routing policy''.
1996
1997\noindent ``policy routing'' $=$ ``cunning routing''.
1998
1999\noindent ``routing policy'' $=$ ``routing tactics'' or ``routing plan''.
2000\end{NB}
2001
2002To solve this task, the conventional destination based routing table, ordered
2003according to the longest match rule, is replaced with a ``routing policy
2004database'' (or RPDB), which selects routes
2005by executing some set of rules. The rules may have lots of keys of different
2006natures and therefore they have no natural ordering, but one imposed
2007by the administrator. Linux-2.2 RPDB is a linear list of rules
2008ordered by numeric priority value.
2009RPDB explicitly allows matching a few packet fields:
2010
2011\begin{itemize}
2012\item packet source address.
2013\item packet destination address.
2014\item TOS.
2015\item incoming interface (which is packet metadata, rather than a packet field).
2016\end{itemize}
2017
2018Matching IP protocols and transport ports is also possible,
2019indirectly, via \verb|ipchains|, by exploiting their ability
2020to mark some classes of packets with \verb|fwmark|. Therefore,
2021\verb|fwmark| is also included in the set of keys checked by rules.
2022
2023Each policy routing rule consists of a {\em selector\/} and an {\em action\/}
2024predicate. The RPDB is scanned in the order of increasing priority. The selector
2025of each rule is applied to \{source address, destination address, incoming
2026interface, tos, fwmark\} and, if the selector matches the packet,
2027the action is performed. The action predicate may return with success.
2028In this case, it will either give a route or failure indication
2029and the RPDB lookup is terminated. Otherwise, the RPDB program
2030continues on the next rule.
2031
2032What is the action, semantically? The natural action is to select the
2033nexthop and the output device. This is what
2034Cisco IOS~\cite{IOS} does. Let us call it ``match \& set''.
2035The Linux-2.2 approach is more flexible. The action includes
2036lookups in destination-based routing tables and selecting
2037a route from these tables according to the classic longest match algorithm.
2038The ``match \& set'' approach is the simplest case of the Linux one. It is realized
2039when a second level routing table contains a single default route.
2040Recall that Linux-2.2 supports multiple tables
2041managed with the \verb|ip route| command, described in the previous section.
2042
2043At startup time the kernel configures the default RPDB consisting of three
2044rules:
2045
2046\begin{enumerate}
2047\item Priority: 0, Selector: match anything, Action: lookup routing
2048table \verb|local| (ID 255).
2049The \verb|local| table is a special routing table containing
2050high priority control routes for local and broadcast addresses.
2051
2052Rule 0 is special. It cannot be deleted or overridden.
2053
2054
2055\item Priority: 32766, Selector: match anything, Action: lookup routing
2056table \verb|main| (ID 254).
2057The \verb|main| table is the normal routing table containing all non-policy
2058routes. This rule may be deleted and/or overridden with other
2059ones by the administrator.
2060
2061\item Priority: 32767, Selector: match anything, Action: lookup routing
2062table \verb|default| (ID 253).
2063The \verb|default| table is empty. It is reserved for some
2064post-processing if no previous default rules selected the packet.
2065This rule may also be deleted.
2066
2067\end{enumerate}
2068
2069Do not confuse routing tables with rules: rules point to routing tables,
2070several rules may refer to one routing table and some routing tables
2071may have no rules pointing to them. If the administrator deletes all the rules
2072referring to a table, the table is not used, but it still exists
2073and will disappear only after all the routes contained in it are deleted.
2074
2075
2076\paragraph{Rule attributes:} Each RPDB entry has additional
2077attributes. F.e.\ each rule has a pointer to some routing
2078table. NAT and masquerading rules have an attribute to select new IP
2079address to translate/masquerade. Besides that, rules have some
2080optional attributes, which routes have, namely \verb|realms|.
2081These values do not override those contained in the routing tables. They
2082are only used if the route did not select any attributes.
2083
2084
2085\paragraph{Rule types:} The RPDB may contain rules of the following
2086types:
2087\begin{itemize}
2088\item \verb|unicast| --- the rule prescribes to return the route found
2089in the routing table referenced by the rule.
2090\item \verb|blackhole| --- the rule prescribes to silently drop the packet.
2091\item \verb|unreachable| --- the rule prescribes to generate a ``Network
2092is unreachable'' error.
2093\item \verb|prohibit| --- the rule prescribes to generate
2094``Communication is administratively prohibited'' error.
2095\item \verb|nat| --- the rule prescribes to translate the source address
2096of the IP packet into some other value. More about NAT is
2097in Appendix~\ref{ROUTE-NAT}, p.\pageref{ROUTE-NAT}.
2098\end{itemize}
2099
2100
2101\paragraph{Commands:} \verb|add|, \verb|delete| and \verb|show|
2102(or \verb|list|).
2103
2104\subsection{{\tt ip rule add} --- insert a new rule\\
2105 {\tt ip rule delete} --- delete a rule}
2106\label{IP-RULE-ADD}
2107
2108\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|,
2109 \verb|d|.
2110
2111\paragraph{Arguments:}
2112
2113\begin{itemize}
2114\item \verb|type TYPE| (default)
2115
2116--- the type of this rule. The list of valid types was given in the previous
2117subsection.
2118
2119\item \verb|from PREFIX|
2120
2121--- select the source prefix to match.
2122
2123\item \verb|to PREFIX|
2124
2125--- select the destination prefix to match.
2126
2127\item \verb|iif NAME|
2128
2129--- select the incoming device to match. If the interface is loopback,
2130the rule only matches packets originating from this host. This means that you
2131may create separate routing tables for forwarded and local packets and,
2132hence, completely segregate them.
2133
2134\item \verb|tos TOS| or \verb|dsfield TOS|
2135
2136--- select the TOS value to match.
2137
2138\item \verb|fwmark MARK|
2139
2140--- select the \verb|fwmark| value to match.
2141
2142\item \verb|priority PREFERENCE|
2143
2144--- the priority of this rule. Each rule should have an explicitly
2145set {\em unique\/} priority value.
2146\begin{NB}
2147 Really, for historical reasons \verb|ip rule add| does not require a
2148 priority value and allows them to be non-unique.
2149 If the user does not supplied a priority, it is selected by the kernel.
2150 If the user creates a rule with a priority value that
2151 already exists, the kernel does not reject the request. It adds
2152 the new rule before all old rules of the same priority.
2153
2154 It is mistake in design, no more. And it will be fixed one day,
2155 so do not rely on this feature. Use explicit priorities.
2156\end{NB}
2157
2158
2159\item \verb|table TABLEID|
2160
2161--- the routing table identifier to lookup if the rule selector matches.
2162
2163\item \verb|realms FROM/TO|
2164
2165--- Realms to select if the rule matched and the routing table lookup
2166succeeded. Realm \verb|TO| is only used if the route did not select
2167any realm.
2168
2169\item \verb|nat ADDRESS|
2170
2171--- The base of the IP address block to translate (for source addresses).
2172The \verb|ADDRESS| may be either the start of the block of NAT addresses
2173(selected by NAT routes) or in linux-2.2 a local host address (or even zero).
2174In the last case the router does not translate the packets,
2175but masquerades them to this address; this feature disappered in 2.4.
2176More about NAT is in Appendix~\ref{ROUTE-NAT},
2177p.\pageref{ROUTE-NAT}.
2178
2179\end{itemize}
2180
2181\paragraph{Warning:} Changes to the RPDB made with these commands
2182do not become active immediately. It is assumed that after
2183a script finishes a batch of updates, it flushes the routing cache
2184with \verb|ip route flush cache|.
2185
2186\paragraph{Examples:}
2187\begin{itemize}
2188\item Route packets with source addresses from 192.203.80/24
2189according to routing table \verb|inr.ruhep|:
2190\begin{verbatim}
2191ip ru add from 192.203.80.0/24 table inr.ruhep prio 220
2192\end{verbatim}
2193
2194\item Translate packet source address 193.233.7.83 into 192.203.80.144
2195and route it according to table \#1 (actually, it is \verb|inr.ruhep|):
2196\begin{verbatim}
2197ip ru add from 193.233.7.83 nat 192.203.80.144 table 1 prio 320
2198\end{verbatim}
2199
2200\item Delete the unused default rule:
2201\begin{verbatim}
2202ip ru del prio 32767
2203\end{verbatim}
2204
2205\end{itemize}
2206
2207
2208
2209\subsection{{\tt ip rule show} --- list rules}
2210\label{IP-RULE-SHOW}
2211
2212\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2213
2214
2215\paragraph{Arguments:} Good news, this is one command that has no arguments.
2216
2217\paragraph{Output format:}
2218
2219\begin{verbatim}
2220kuznet@amber:~ $ ip ru ls
22210: from all lookup local
2222200: from 192.203.80.0/24 to 193.233.7.0/24 lookup main
2223210: from 192.203.80.0/24 to 192.203.80.0/24 lookup main
2224220: from 192.203.80.0/24 lookup inr.ruhep realms inr.ruhep/radio-msu
2225300: from 193.233.7.83 to 193.233.7.0/24 lookup main
2226310: from 193.233.7.83 to 192.203.80.0/24 lookup main
2227320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
222832766: from all lookup main
2229kuznet@amber:~ $
2230\end{verbatim}
2231
2232In the first column is the rule priority value followed
2233by a colon. Then the selectors follow. Each key is prefixed
2234with the same keyword that was used to create the rule.
2235
2236The keyword \verb|lookup| is followed by a routing table identifier,
2237as it is recorded in the file \verb|/etc/iproute2/rt_tables|.
2238
2239If the rule does NAT (f.e.\ rule \#320), it is shown by the keyword
2240\verb|map-to| followed by the start of the block of addresses to map.
2241
2242The sense of this example is pretty simple. The prefixes
2243192.203.80.0/24 and 193.233.7.0/24 form the internal network, but
2244they are routed differently when the packets leave it.
2245Besides that, the host 193.233.7.83 is translated into
2246another prefix to look like 192.203.80.144 when talking
2247to the outer world.
2248
2249
2250
2251\section{{\tt ip maddress} --- multicast addresses management}
2252\label{IP-MADDR}
2253
2254\paragraph{Object:} \verb|maddress| objects are multicast addresses.
2255
2256\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|show| (or \verb|list|).
2257
2258\subsection{{\tt ip maddress show} --- list multicast addresses}
2259
2260\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2261
2262\paragraph{Arguments:}
2263
2264\begin{itemize}
2265
2266\item \verb|dev NAME| (default)
2267
2268--- the device name.
2269
2270\end{itemize}
2271
2272\paragraph{Output format:}
2273
2274\begin{verbatim}
2275kuznet@alisa:~ $ ip maddr ls dummy
22762: dummy
2277 link 33:33:00:00:00:01
2278 link 01:00:5e:00:00:01
2279 inet 224.0.0.1 users 2
2280 inet6 ff02::1
2281kuznet@alisa:~ $
2282\end{verbatim}
2283
2284The first line of the output shows the interface index and its name.
2285Then the multicast address list follows. Each line starts with the
2286protocol identifier. The word \verb|link| denotes a link layer
2287multicast addresses.
2288
2289If a multicast address has more than one user, the number
2290of users is shown after the \verb|users| keyword.
2291
2292One additional feature not present in the example above
2293is the \verb|static| flag, which indicates that the address was joined
2294with \verb|ip maddr add|. See the following subsection.
2295
2296
2297
2298\subsection{{\tt ip maddress add} --- add a multicast address\\
2299 {\tt ip maddress delete} --- delete a multicast address}
2300
2301\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|delete|, \verb|del|, \verb|d|.
2302
2303\paragraph{Description:} these commands attach/detach
2304a static link layer multicast address to listen on the interface.
2305Note that it is impossible to join protocol multicast groups
2306statically. This command only manages link layer addresses.
2307
2308
2309\paragraph{Arguments:}
2310
2311\begin{itemize}
2312\item \verb|address LLADDRESS| (default)
2313
2314--- the link layer multicast address.
2315
2316\item \verb|dev NAME|
2317
2318--- the device to join/leave this multicast address.
2319
2320\end{itemize}
2321
2322
2323\paragraph{Example:} Let us continue with the example from the previous subsection.
2324
2325\begin{verbatim}
2326netadm@alisa:~ # ip maddr add 33:33:00:00:00:01 dev dummy
2327netadm@alisa:~ # ip -0 maddr ls dummy
23282: dummy
2329 link 33:33:00:00:00:01 users 2 static
2330 link 01:00:5e:00:00:01
2331netadm@alisa:~ # ip maddr del 33:33:00:00:00:01 dev dummy
2332\end{verbatim}
2333
2334\begin{NB}
2335 Neither \verb|ip| nor the kernel check for multicast address validity.
2336 Particularly, this means that you can try to load a unicast address
2337 instead of a multicast address. Most drivers will ignore such addresses,
2338 but several (f.e.\ Tulip) will intern it to their on-board filter.
2339 The effects may be strange. Namely, the addresses become additional
2340 local link addresses and, if you loaded the address of another host
2341 to the router, wait for duplicated packets on the wire.
2342 It is not a bug, but rather a hole in the API and intra-kernel interfaces.
2343 This feature is really more useful for traffic monitoring, but using it
2344 with Linux-2.2 you {\em have to\/} be sure that the host is not
2345 a router and, especially, that it is not a transparent proxy or masquerading
2346 agent.
2347\end{NB}
2348
2349
2350
2351\section{{\tt ip mroute} --- multicast routing cache management}
2352\label{IP-MROUTE}
2353
2354\paragraph{Abbreviations:} \verb|mroute|, \verb|mr|.
2355
2356\paragraph{Object:} \verb|mroute| objects are multicast routing cache
2357entries created by a user level mrouting daemon
2358(f.e.\ \verb|pimd| or \verb|mrouted|).
2359
2360Due to the limitations of the current interface to the multicast routing
2361engine, it is impossible to change \verb|mroute| objects administratively,
2362so we may only display them. This limitation will be removed
2363in the future.
2364
2365\paragraph{Commands:} \verb|show| (or \verb|list|).
2366
2367
2368\subsection{{\tt ip mroute show} --- list mroute cache entries}
2369
2370\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2371
2372\paragraph{Arguments:}
2373
2374\begin{itemize}
2375\item \verb|to PREFIX| (default)
2376
2377--- the prefix selecting the destination multicast addresses to list.
2378
2379
2380\item \verb|iif NAME|
2381
2382--- the interface on which multicast packets are received.
2383
2384
2385\item \verb|from PREFIX|
2386
2387--- the prefix selecting the IP source addresses of the multicast route.
2388
2389
2390\end{itemize}
2391
2392\paragraph{Output format:}
2393
2394\begin{verbatim}
2395kuznet@amber:~ $ ip mroute ls
2396(193.232.127.6, 224.0.1.39) Iif: unresolved
2397(193.232.244.34, 224.0.1.40) Iif: unresolved
2398(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
2399kuznet@amber:~ $
2400\end{verbatim}
2401
2402Each line shows one (S,G) entry in the multicast routing cache,
2403where S is the source address and G is the multicast group. \verb|Iif| is
2404the interface on which multicast packets are expected to arrive.
2405If the word \verb|unresolved| is there instead of the interface name,
2406it means that the routing daemon still hasn't resolved this entry.
2407The keyword \verb|oifs| is followed by a list of output interfaces, separated
2408by spaces. If a multicast routing entry is created with non-trivial
2409TTL scope, administrative distances are appended to the device names
2410in the \verb|oifs| list.
2411
2412\paragraph{Statistics:} The \verb|-statistics| option also prints the
2413number of packets and bytes forwarded along this route and
2414the number of packets that arrived on the wrong interface, if this number is not zero.
2415
2416\begin{verbatim}
2417kuznet@amber:~ $ ip -s mr ls 224.66/16
2418(193.233.7.65, 224.66.66.66) Iif: eth0 Oifs: pimreg
2419 9383 packets, 300256 bytes
2420kuznet@amber:~ $
2421\end{verbatim}
2422
2423
2424\section{{\tt ip tunnel} --- tunnel configuration}
2425\label{IP-TUNNEL}
2426
2427\paragraph{Abbreviations:} \verb|tunnel|, \verb|tunl|.
2428
2429\paragraph{Object:} \verb|tunnel| objects are tunnels, encapsulating
2430packets in IPv4 packets and then sending them over the IP infrastructure.
2431
2432\paragraph{Commands:} \verb|add|, \verb|delete|, \verb|change|, \verb|show|
2433(or \verb|list|).
2434
2435\paragraph{See also:} A more informal discussion of tunneling
2436over IP and the \verb|ip tunnel| command can be found in~\cite{IP-TUNNELS}.
2437
2438\subsection{{\tt ip tunnel add} --- add a new tunnel\\
2439 {\tt ip tunnel change} --- change an existing tunnel\\
2440 {\tt ip tunnel delete} --- destroy a tunnel}
2441
2442\paragraph{Abbreviations:} \verb|add|, \verb|a|; \verb|change|, \verb|chg|;
2443\verb|delete|, \verb|del|, \verb|d|.
2444
2445
2446\paragraph{Arguments:}
2447
2448\begin{itemize}
2449
2450\item \verb|name NAME| (default)
2451
2452--- select the tunnel device name.
2453
2454\item \verb|mode MODE|
2455
2456--- set the tunnel mode. Three modes are currently available:
2457 \verb|ipip|, \verb|sit| and \verb|gre|.
2458
2459\item \verb|remote ADDRESS|
2460
2461--- set the remote endpoint of the tunnel.
2462
2463\item \verb|local ADDRESS|
2464
2465--- set the fixed local address for tunneled packets.
2466It must be an address on another interface of this host.
2467
2468\item \verb|ttl N|
2469
2470--- set a fixed TTL \verb|N| on tunneled packets.
2471 \verb|N| is a number in the range 1--255. 0 is a special value
2472 meaning that packets inherit the TTL value.
2473 The default value is: \verb|inherit|.
2474
2475\item \verb|tos T| or \verb|dsfield T|
2476
2477--- set a fixed TOS \verb|T| on tunneled packets.
2478 The default value is: \verb|inherit|.
2479
2480
2481
2482\item \verb|dev NAME|
2483
2484--- bind the tunnel to the device \verb|NAME| so that
2485 tunneled packets will only be routed via this device and will
2486 not be able to escape to another device when the route to endpoint changes.
2487
2488\item \verb|nopmtudisc|
2489
2490--- disable Path MTU Discovery on this tunnel.
2491 It is enabled by default. Note that a fixed ttl is incompatible
2492 with this option: tunnelling with a fixed ttl always makes pmtu discovery.
2493
2494\item \verb|key K|, \verb|ikey K|, \verb|okey K|
2495
2496--- (only GRE tunnels) use keyed GRE with key \verb|K|. \verb|K| is
2497 either a number or an IP address-like dotted quad.
2498 The \verb|key| parameter sets the key to use in both directions.
2499 The \verb|ikey| and \verb|okey| parameters set different keys for input and output.
2500
2501
2502\item \verb|csum|, \verb|icsum|, \verb|ocsum|
2503
2504--- (only GRE tunnels) generate/require checksums for tunneled packets.
2505 The \verb|ocsum| flag calculates checksums for outgoing packets.
2506 The \verb|icsum| flag requires that all input packets have the correct
2507 checksum. The \verb|csum| flag is equivalent to the combination
2508 ``\verb|icsum| \verb|ocsum|''.
2509
2510\item \verb|seq|, \verb|iseq|, \verb|oseq|
2511
2512--- (only GRE tunnels) serialize packets.
2513 The \verb|oseq| flag enables sequencing of outgoing packets.
2514 The \verb|iseq| flag requires that all input packets are serialized.
2515 The \verb|seq| flag is equivalent to the combination ``\verb|iseq| \verb|oseq|''.
2516
2517\begin{NB}
2518 I think this option does not
2519 work. At least, I did not test it, did not debug it and
2520 do not even understand how it is supposed to work or for what
2521 purpose Cisco planned to use it. Do not use it.
2522\end{NB}
2523
2524
2525\end{itemize}
2526
2527\paragraph{Example:} Create a pointopoint IPv6 tunnel with maximal TTL of 32.
2528\begin{verbatim}
2529netadm@amber:~ # ip tunl add Cisco mode sit remote 192.31.7.104 \
2530 local 192.203.80.142 ttl 32
2531\end{verbatim}
2532
2533\subsection{{\tt ip tunnel show} --- list tunnels}
2534
2535\paragraph{Abbreviations:} \verb|show|, \verb|list|, \verb|sh|, \verb|ls|, \verb|l|.
2536
2537
2538\paragraph{Arguments:} None.
2539
2540\paragraph{Output format:}
2541\begin{verbatim}
2542kuznet@amber:~ $ ip tunl ls Cisco
2543Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
2544kuznet@amber:~ $
2545\end{verbatim}
2546The line starts with the tunnel device name followed by a colon.
2547Then the tunnel mode follows. The parameters of the tunnel are listed
2548with the same keywords that were used when creating the tunnel.
2549
2550\paragraph{Statistics:}
2551
2552\begin{verbatim}
2553kuznet@amber:~ $ ip -s tunl ls Cisco
2554Cisco: ipv6/ip remote 192.31.7.104 local 192.203.80.142 ttl 32
2555RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts
2556 12566 1707516 0 0 0 0
2557TX: Packets Bytes Errors DeadLoop NoRoute NoBufs
2558 13445 1879677 0 0 0 0
2559kuznet@amber:~ $
2560\end{verbatim}
2561Essentially, these numbers are the same as the numbers
2562printed with {\tt ip -s link show}
2563(sec.\ref{IP-LINK-SHOW}, p.\pageref{IP-LINK-SHOW}) but the tags are different
2564to reflect that they are tunnel specific.
2565\begin{itemize}
2566\item \verb|CsumErrs| --- the total number of packets dropped
2567because of checksum failures for a GRE tunnel with checksumming enabled.
2568\item \verb|OutOfSeq| --- the total number of packets dropped
2569because they arrived out of sequence for a GRE tunnel with
2570serialization enabled.
2571\item \verb|Mcasts| --- the total number of multicast packets
2572received on a broadcast GRE tunnel.
2573\item \verb|DeadLoop| --- the total number of packets which were not
2574transmitted because the tunnel is looped back to itself.
2575\item \verb|NoRoute| --- the total number of packets which were not
2576transmitted because there is no IP route to the remote endpoint.
2577\item \verb|NoBufs| --- the total number of packets which were not
2578transmitted because the kernel failed to allocate a buffer.
2579\end{itemize}
2580
2581
2582\section{{\tt ip monitor} and {\tt rtmon} --- state monitoring}
2583\label{IP-MONITOR}
2584
2585The \verb|ip| utility can monitor the state of devices, addresses
2586and routes continuously. This option has a slightly different format.
2587Namely,
2588the \verb|monitor| command is the first in the command line and then
2589the object list follows:
2590\begin{verbatim}
Martin Schwenke488c41d2013-08-19 15:43:30 +10002591 ip monitor [ file FILE ] [ all | OBJECT-LIST ] [ label ]
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00002592\end{verbatim}
Martin Schwenke488c41d2013-08-19 15:43:30 +10002593\verb|OBJECT-LIST| is the list of object types that we want to
2594monitor. It may contain \verb|link|, \verb|address| and \verb|route|.
2595Specifying \verb|label| indicates that output lines should be labelled
2596with the type of object being printed --- this happens by default if
2597\verb|all| is specified. If no \verb|file| argument is given,
2598\verb|ip| opens RTNETLINK, listens on it and dumps state changes in
2599the format described in previous sections.
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00002600
2601If a file name is given, it does not listen on RTNETLINK,
2602but opens the file containing RTNETLINK messages saved in binary format
2603and dumps them. Such a history file can be generated with the
2604\verb|rtmon| utility. This utility has a command line syntax similar to
2605\verb|ip monitor|.
2606Ideally, \verb|rtmon| should be started before
2607the first network configuration command is issued. F.e.\ if
2608you insert:
2609\begin{verbatim}
2610 rtmon file /var/log/rtmon.log
2611\end{verbatim}
2612in a startup script, you will be able to view the full history
2613later.
2614
2615Certainly, it is possible to start \verb|rtmon| at any time.
2616It prepends the history with the state snapshot dumped at the moment
2617of starting.
2618
2619
2620\section{Route realms and policy propagation, {\tt rtacct}}
2621\label{RT-REALMS}
2622
2623On routers using OSPF ASE or, especially, the BGP protocol, routing
2624tables may be huge. If we want to classify or to account for the packets
2625per route, we will have to keep lots of information. Even worse, if we
2626want to distinguish the packets not only by their destination, but
2627also by their source, the task gets quadratic complexity and its solution
2628is physically impossible.
2629
2630One approach to propagating the policy from routing protocols
2631to the forwarding engine has been proposed in~\cite{IOS-BGP-PP}.
2632Essentially, Cisco Policy Propagation via BGP is based on the fact
2633that dedicated routers all have the RIB (Routing Information Base)
2634close to the forwarding engine, so policy routing rules can
2635check all the route attributes, including ASPATH information
2636and community strings.
2637
2638The Linux architecture, splitting the RIB (maintained by a user level
2639daemon) and the kernel based FIB (Forwarding Information Base),
2640does not allow such a simple approach.
2641
2642It is to our fortune because there is another solution
2643which allows even more flexible policy and richer semantics.
2644
2645Namely, routes can be clustered together in user space, based on their
2646attributes. F.e.\ a BGP router knows route ASPATH, its community;
2647an OSPF router knows the route tag or its area. The administrator, when adding
2648routes manually, also knows their nature. Providing that the number of such
2649aggregates (we call them {\em realms\/}) is low, the task of full
2650classification both by source and destination becomes quite manageable.
2651
2652So each route may be assigned to a realm. It is assumed that
2653this identification is made by a routing daemon, but static routes
2654can also be handled manually with \verb|ip route| (see sec.\ref{IP-ROUTE},
2655p.\pageref{IP-ROUTE}).
2656\begin{NB}
2657 There is a patch to \verb|gated|, allowing classification of routes
2658 to realms with all the set of policy rules implemented in \verb|gated|:
2659 by prefix, by ASPATH, by origin, by tag etc.
2660\end{NB}
2661
2662To facilitate the construction (f.e.\ in case the routing
2663daemon is not aware of realms), missing realms may be completed
2664with routing policy rules, see sec.~\ref{IP-RULE}, p.\pageref{IP-RULE}.
2665
2666For each packet the kernel calculates a tuple of realms: source realm
2667and destination realm, using the following algorithm:
2668
2669\begin{enumerate}
2670\item If the route has a realm, the destination realm of the packet is set to it.
2671\item If the rule has a source realm, the source realm of the packet is set to it.
2672If the destination realm was not inherited from the route and the rule has a destination realm,
2673it is also set.
2674\item If at least one of the realms is still unknown, the kernel finds
2675the reversed route to the source of the packet.
2676\item If the source realm is still unknown, get it from the reversed route.
2677\item If one of the realms is still unknown, swap the realms of reversed
2678routes and apply step 2 again.
2679\end{enumerate}
2680
2681After this procedure is completed we know what realm the packet
2682arrived from and the realm where it is going to propagate to.
2683If some of the realms are unknown, they are initialized to zero
2684(or realm \verb|unknown|).
2685
2686The main application of realms is the TC \verb|route| classifier~\cite{TC-CREF},
2687where they are used to help assign packets to traffic classes,
2688to account, police and schedule them according to this
2689classification.
2690
2691A much simpler but still very useful application is incoming packet
2692accounting by realms. The kernel gathers a packet statistics summary
2693which can be viewed with the \verb|rtacct| utility.
2694\begin{verbatim}
2695kuznet@amber:~ $ rtacct russia
2696Realm BytesTo PktsTo BytesFrom PktsFrom
2697russia 20576778 169176 47080168 153805
2698kuznet@amber:~ $
2699\end{verbatim}
2700This shows that this router received 153805 packets from
2701the realm \verb|russia| and forwarded 169176 packets to \verb|russia|.
2702The realm \verb|russia| consists of routes with ASPATHs not leaving
2703Russia.
2704
2705Note that locally originating packets are not accounted here,
2706\verb|rtacct| shows incoming packets only. Using the \verb|route|
2707classifier (see~\cite{TC-CREF}) you can get even more detailed
2708accounting information about outgoing packets, optionally
2709summarizing traffic not only by source or destination, but
2710by any pair of source and destination realms.
2711
2712
2713\begin{thebibliography}{99}
2714\addcontentsline{toc}{section}{References}
2715\bibitem{RFC-NDISC} T.~Narten, E.~Nordmark, W.~Simpson.
2716``Neighbor Discovery for IP Version 6 (IPv6)'', RFC-2461.
2717
2718\bibitem{RFC-ADDRCONF} S.~Thomson, T.~Narten.
2719``IPv6 Stateless Address Autoconfiguration'', RFC-2462.
2720
2721\bibitem{RFC1812} F.~Baker.
2722``Requirements for IP Version 4 Routers'', RFC-1812.
2723
2724\bibitem{RFC1122} R.~T.~Braden.
2725``Requirements for Internet hosts --- communication layers'', RFC-1122.
2726
2727\bibitem{IOS} ``Cisco IOS Release 12.0 Network Protocols
2728Command Reference, Part 1'' and
2729``Cisco IOS Release 12.0 Quality of Service Solutions
2730Configuration Guide: Configuring Policy-Based Routing'',\\
2731http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
2732
2733\bibitem{IP-TUNNELS} A.~N.~Kuznetsov.
2734``Tunnels over IP in Linux-2.2'', \\
2735In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
2736
2737\bibitem{TC-CREF} A.~N.~Kuznetsov. ``TC Command Reference'',\\
2738In: {\tt ftp://ftp.inr.ac.ru/ip-routing/iproute2-current.tar.gz}.
2739
2740\bibitem{IOS-BGP-PP} ``Cisco IOS Release 12.0 Quality of Service Solutions
2741Configuration Guide: Configuring QoS Policy Propagation via
2742Border Gateway Protocol'',\\
2743http://www.cisco.com/univercd/cc/td/doc/product/software/ios120.
2744
2745\bibitem{RFC-DHCP} R.~Droms.
2746``Dynamic Host Configuration Protocol.'', RFC-2131
2747
Gilad Ben-Yossef71e58152009-10-06 15:40:34 +02002748\bibitem{RFC2414} M.~Allman, S.~Floyd, C.~Partridge.
2749``Increasing TCP's Initial Window'', RFC-2414.
2750
osdl.org!shemmingeraba5acd2004-04-15 20:56:59 +00002751\end{thebibliography}
2752
2753
2754
2755
2756\appendix
2757\addcontentsline{toc}{section}{Appendix}
2758
2759\section{Source address selection}
2760\label{ADDR-SEL}
2761
2762When a host creates an IP packet, it must select some source
2763address. Correct source address selection is a critical procedure,
2764because it gives the receiver the information needed to deliver a
2765reply. If the source is selected incorrectly, in the best case,
2766the backward path may appear different to the forward one which
2767is harmful for performance. In the worst case, when the addresses
2768are administratively scoped, the reply may be lost entirely.
2769
2770Linux-2.2 selects source addresses using the following algorithm:
2771
2772\begin{itemize}
2773\item
2774The application may select a source address explicitly with \verb|bind(2)|
2775syscall or supplying it to \verb|sendmsg(2)| via the ancillary data object
2776\verb|IP_PKTINFO|. In this case the kernel only checks the validity
2777of the address and never tries to ``improve'' an incorrect user choice,
2778generating an error instead.
2779\begin{NB}
2780 Never say ``Never''. The sysctl option \verb|ip_dynaddr| breaks
2781 this axiom. It has been made deliberately with the purpose
2782 of automatically reselecting the address on hosts with dynamic dial-out interfaces.
2783 However, this hack {\em must not\/} be used on multihomed hosts
2784 and especially on routers: it would break them.
2785\end{NB}
2786
2787
2788\item Otherwise, IP routing tables can contain an explicit source
2789address hint for this destination. The hint is set with the \verb|src| parameter
2790to the \verb|ip route| command, sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}.
2791
2792
2793\item Otherwise, the kernel searches through the list of addresses
2794attached to the interface through which the packets will be routed.
2795The search strategies are different for IP and IPv6. Namely:
2796
2797\begin{itemize}
2798\item IPv6 searches for the first valid, not deprecated address
2799with the same scope as the destination.
2800
2801\item IP searches for the first valid address with a scope wider
2802than the scope of the destination but it prefers addresses
2803which fall to the same subnet as the nexthop of the route
2804to the destination. Unlike IPv6, the scopes of IPv4 destinations
2805are not encoded in their addresses but are supplied
2806in routing tables instead (the \verb|scope| parameter to the \verb|ip route| command,
2807sec.\ref{IP-ROUTE}, p.\pageref{IP-ROUTE}).
2808
2809\end{itemize}
2810
2811
2812\item Otherwise, if the scope of the destination is \verb|link| or \verb|host|,
2813the algorithm fails and returns a zero source address.
2814
2815\item Otherwise, all interfaces are scanned to search for an address
2816with an appropriate scope. The loopback device \verb|lo| is always the first
2817in the search list, so that if an address with global scope (not 127.0.0.1!)
2818is configured on loopback, it is always preferred.
2819
2820\end{itemize}
2821
2822
2823\section{Proxy ARP/NDISC}
2824\label{PROXY-NEIGH}
2825
2826Routers may answer ARP/NDISC solicitations on behalf of other hosts.
2827In Linux-2.2 proxy ARP on an interface may be enabled
2828by setting the kernel \verb|sysctl| variable
2829\verb|/proc/sys/net/ipv4/conf/<dev>/proxy_arp| to 1. After this, the router
2830starts to answer ARP requests on the interface \verb|<dev>|, provided
2831the route to the requested destination does {\em not\/} go back via the same
2832device.
2833
2834The variable \verb|/proc/sys/net/ipv4/conf/all/proxy_arp| enables proxy
2835ARP on all the IP devices.
2836
2837However, this approach fails in the case of IPv6 because the router
2838must join the solicited node multicast address to listen for the corresponding
2839NDISC queries. It means that proxy NDISC is possible only on a per destination
2840basis.
2841
2842Logically, proxy ARP/NDISC is not a kernel task. It can easily be implemented
2843in user space. However, similar functionality was present in BSD kernels
2844and in Linux-2.0, so we have to preserve it at least to the extent that
2845is standardized in BSD.
2846\begin{NB}
2847 Linux-2.0 ARP had a feature called {\em subnet\/} proxy ARP.
2848 It is replaced with the sysctl flag in Linux-2.2.
2849\end{NB}
2850
2851
2852The \verb|ip| utility provides a way to manage proxy ARP/NDISC
2853with the \verb|ip neigh| command, namely:
2854\begin{verbatim}
2855 ip neigh add proxy ADDRESS [ dev NAME ]
2856\end{verbatim}
2857adds a new proxy ARP/NDISC record and
2858\begin{verbatim}
2859 ip neigh del proxy ADDRESS [ dev NAME ]
2860\end{verbatim}
2861deletes it.
2862
2863If the name of the device is not given, the router will answer solicitations
2864for address \verb|ADDRESS| on all devices, otherwise it will only serve
2865the device \verb|NAME|. Even if the proxy entry is created with
2866\verb|ip neigh|, the router {\em will not\/} answer a query if the route
2867to the destination goes back via the interface from which the solicitation
2868was received.
2869
2870It is important to emphasize that proxy entries have {\em no\/}
2871parameters other than these (IP/IPv6 address and optional device).
2872Particularly, the entry does not store any link layer address.
2873It always advertises the station address of the interface
2874on which it sends advertisements (i.e. it's own station address).
2875
2876\section{Route NAT status}
2877\label{ROUTE-NAT}
2878
2879NAT (or ``Network Address Translation'') remaps some parts
2880of the IP address space into other ones. Linux-2.2 route NAT is supposed
2881to be used to facilitate policy routing by rewriting addresses
2882to other routing domains or to help while renumbering sites
2883to another prefix.
2884
2885\paragraph{What it is not:}
2886It is necessary to emphasize that {\em it is not supposed\/}
2887to be used to compress address space or to split load.
2888This is not missing functionality but a design principle.
2889Route NAT is {\em stateless\/}. It does not hold any state
2890about translated sessions. This means that it handles any number
2891of sessions flawlessly. But it also means that it is {\em static\/}.
2892It cannot detect the moment when the last TCP client stops
2893using an address. For the same reason, it will not help to split
2894load between several servers.
2895\begin{NB}
2896It is a pretty commonly held belief that it is useful to split load between
2897several servers with NAT. This is a mistake. All you get from this
2898is the requirement that the router keep the state of all the TCP connections
2899going via it. Well, if the router is so powerful, run apache on it. 8)
2900\end{NB}
2901
2902The second feature: it does not touch packet payload,
2903does not try to ``improve'' broken protocols by looking
2904through its data and mangling it. It mangles IP addresses,
2905only IP addresses and nothing but IP addresses.
2906This also, is not missing any functionality.
2907
2908To resume: if you need to compress address space or keep
2909active FTP clients happy, your choice is not route NAT but masquerading,
2910port forwarding, NAPT etc.
2911\begin{NB}
2912By the way, you may also want to look at
2913http://www.suse.com/\~mha/HyperNews/get/linux-ip-nat.html
2914\end{NB}
2915
2916
2917\paragraph{How it works.}
2918Some part of the address space is reserved for dummy addresses
2919which will look for all the world like some host addresses
2920inside your network. No other hosts may use these addresses,
2921however other routers may also be configured to translate them.
2922\begin{NB}
2923A great advantage of route NAT is that it may be used not
2924only in stub networks but in environments with arbitrarily complicated
2925structure. It does not firewall, it {\em forwards.}
2926\end{NB}
2927These addresses are selected by the \verb|ip route| command
2928(sec.\ref{IP-ROUTE-ADD}, p.\pageref{IP-ROUTE-ADD}). F.e.\
2929\begin{verbatim}
2930 ip route add nat 192.203.80.144 via 193.233.7.83
2931\end{verbatim}
2932states that the single address 192.203.80.144 is a dummy NAT address.
2933For all the world it looks like a host address inside our network.
2934For neighbouring hosts and routers it looks like the local address
2935of the translating router. The router answers ARP for it, advertises
2936this address as routed via it, {\em et al\/}. When the router
2937receives a packet destined for 192.203.80.144, it replaces
2938this address with 193.233.7.83 which is the address of some real
2939host and forwards the packet. If you need to remap
2940blocks of addresses, you may use a command like:
2941\begin{verbatim}
2942 ip route add nat 192.203.80.192/26 via 193.233.7.64
2943\end{verbatim}
2944This command will map a block of 63 addresses 192.203.80.192-255 to
2945193.233.7.64-127.
2946
2947When an internal host (193.233.7.83 in the example above)
2948sends something to the outer world and these packets are forwarded
2949by our router, it should translate the source address 193.233.7.83
2950into 192.203.80.144. This task is solved by setting a special
2951policy rule (sec.\ref{IP-RULE-ADD}, p.\pageref{IP-RULE-ADD}):
2952\begin{verbatim}
2953 ip rule add prio 320 from 193.233.7.83 nat 192.203.80.144
2954\end{verbatim}
2955This rule says that the source address 193.233.7.83
2956should be translated into 192.203.80.144 before forwarding.
2957It is important that the address after the \verb|nat| keyword
2958is some NAT address, declared by {\tt ip route add nat}.
2959If it is just a random address the router will not map to it.
2960\begin{NB}
2961The exception is when the address is a local address of this
2962router (or 0.0.0.0) and masquerading is configured in the linux-2.2
2963kernel. In this case the router will masquerade the packets as this address.
2964If 0.0.0.0 is selected, the result is equivalent to one
2965obtained with firewalling rules. Otherwise, you have the way
2966to order Linux to masquerade to this fixed address.
2967NAT mechanism used in linux-2.4 is more flexible than
2968masquerading, so that this feature has lost meaning and disabled.
2969\end{NB}
2970
2971If the network has non-trivial internal structure, it is
2972useful and even necessary to add rules disabling translation
2973when a packet does not leave this network. Let us return to the
2974example from sec.\ref{IP-RULE-SHOW} (p.\pageref{IP-RULE-SHOW}).
2975\begin{verbatim}
2976300: from 193.233.7.83 to 193.233.7.0/24 lookup main
2977310: from 193.233.7.83 to 192.203.80.0/24 lookup main
2978320: from 193.233.7.83 lookup inr.ruhep map-to 192.203.80.144
2979\end{verbatim}
2980This block of rules causes normal forwarding when
2981packets from 193.233.7.83 do not leave networks 193.233.7/24
2982and 192.203.80/24. Also, if the \verb|inr.ruhep| table does not
2983contain a route to the destination (which means that the routing
2984domain owning addresses from 192.203.80/24 is dead), no translation
2985will occur. Otherwise, the packets are translated.
2986
2987\paragraph{How to only translate selected ports:}
2988If you only want to translate selected ports (f.e.\ http)
2989and leave the rest intact, you may use \verb|ipchains|
2990to \verb|fwmark| a class of packets.
2991Suppose you did and all the packets from 193.233.7.83
2992destined for port 80 are marked with marker 0x1234 in input fwchain.
2993In this case you may replace rule \#320 with:
2994\begin{verbatim}
2995320: from 193.233.7.83 fwmark 1234 lookup main map-to 192.203.80.144
2996\end{verbatim}
2997and translation will only be enabled for outgoing http requests.
2998
2999\section{Example: minimal host setup}
3000\label{EXAMPLE-SETUP}
3001
3002The following script gives an example of a fault safe
3003setup of IP (and IPv6, if it is compiled into the kernel)
3004in the common case of a node attached to a single broadcast
3005network. A more advanced script, which may be used both on multihomed
3006hosts and on routers, is described in the following
3007section.
3008
3009The utilities used in the script may be found in the
3010directory ftp://ftp.inr.ac.ru/ip-routing/:
3011\begin{enumerate}
3012\item \verb|ip| --- package \verb|iproute2|.
3013\item \verb|arping| --- package \verb|iputils|.
3014\item \verb|rdisc| --- package \verb|iputils|.
3015\end{enumerate}
3016\begin{NB}
3017It also refers to a DHCP client, \verb|dhcpcd|. I should refrain from
3018recommending a good DHCP client to use. All that I can
3019say is that ISC \verb|dhcp-2.0b1pl6| patched with the patch that
3020can be found in the \verb|dhcp.bootp.rarp| subdirectory of
3021the same ftp site {\em does\/} work,
3022at least on Ethernet and Token Ring.
3023\end{NB}
3024
3025\begin{verbatim}
3026#! /bin/bash
3027\end{verbatim}
3028\begin{flushleft}
3029\# {\bf Usage: \verb|ifone ADDRESS[/PREFIX-LENGTH] [DEVICE]|}\\
3030\# {\bf Parameters:}\\
3031\# \$1 --- Static IP address, optionally followed by prefix length.\\
3032\# \$2 --- Device name. If it is missing, \verb|eth0| is asssumed.\\
3033\# F.e. \verb|ifone 193.233.7.90|
3034\end{flushleft}
3035\begin{verbatim}
3036dev=$2
3037: ${dev:=eth0}
3038ipaddr=
3039\end{verbatim}
3040\# Parse IP address, splitting prefix length.
3041\begin{verbatim}
3042if [ "$1" != "" ]; then
3043 ipaddr=${1%/*}
3044 if [ "$1" != "$ipaddr" ]; then
3045 pfxlen=${1#*/}
3046 fi
3047 : ${pfxlen:=24}
3048fi
3049pfx="${ipaddr}/${pfxlen}"
3050\end{verbatim}
3051
3052\begin{flushleft}
3053\# {\bf Step 0} --- enable loopback.\\
3054\#\\
3055\# This step is necessary on any networked box before attempt\\
3056\# to configure any other device.\\
3057\end{flushleft}
3058\begin{verbatim}
3059ip link set up dev lo
3060ip addr add 127.0.0.1/8 dev lo brd + scope host
3061\end{verbatim}
3062\begin{flushleft}
3063\# IPv6 autoconfigure themself on loopback.\\
3064\#\\
3065\# If user gave loopback as device, we add the address as alias and exit.
3066\end{flushleft}
3067\begin{verbatim}
3068if [ "$dev" = "lo" ]; then
3069 if [ "$ipaddr" != "" -a "$ipaddr" != "127.0.0.1" ]; then
3070 ip address add $ipaddr dev $dev
3071 exit $?
3072 fi
3073 exit 0
3074fi
3075\end{verbatim}
3076
3077\noindent\# {\bf Step 1} --- enable device \verb|$dev|
3078
3079\begin{verbatim}
3080if ! ip link set up dev $dev ; then
3081 echo "Cannot enable interface $dev. Aborting." 1>&2
3082 exit 1
3083fi
3084\end{verbatim}
3085\begin{flushleft}
3086\# The interface is \verb|UP|. IPv6 started stateless autoconfiguration itself,\\
3087\# and its configuration finishes here. However,\\
3088\# IP still needs some static preconfigured address.
3089\end{flushleft}
3090\begin{verbatim}
3091if [ "$ipaddr" = "" ]; then
3092 echo "No address for $dev is configured, trying DHCP..." 1>&2
3093 dhcpcd
3094 exit $?
3095fi
3096\end{verbatim}
3097
3098\begin{flushleft}
3099\# {\bf Step 2} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
3100\# Send two probes and wait for result for 3 seconds.\\
3101\# If the interface opens slower f.e.\ due to long media detection,\\
3102\# you want to increase the timeout.\\
3103\end{flushleft}
3104\begin{verbatim}
3105if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
3106 echo "Address $ipaddr is busy, trying DHCP..." 1>&2
3107 dhcpcd
3108 exit $?
3109fi
3110\end{verbatim}
3111\begin{flushleft}
3112\# OK, the address is unique, we may add it on the interface.\\
3113\#\\
3114\# {\bf Step 3} --- Configure the address on the interface.
3115\end{flushleft}
3116
3117\begin{verbatim}
3118if ! ip address add $pfx brd + dev $dev; then
3119 echo "Failed to add $pfx on $dev, trying DHCP..." 1>&2
3120 dhcpcd
3121 exit $?
3122fi
3123\end{verbatim}
3124
3125\noindent\# {\bf Step 4} --- Announce our presence on the link.
3126\begin{verbatim}
3127arping -A -c 1 -I $dev $ipaddr
3128noarp=$?
3129( sleep 2;
3130 arping -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
3131\end{verbatim}
3132
3133\begin{flushleft}
3134\# {\bf Step 5} (optional) --- Add some control routes.\\
3135\#\\
3136\# 1. Prohibit link local multicast addresses.\\
3137\# 2. Prohibit link local (alias, limited) broadcast.\\
3138\# 3. Add default multicast route.
3139\end{flushleft}
3140\begin{verbatim}
3141ip route add unreachable 224.0.0.0/24
3142ip route add unreachable 255.255.255.255
3143if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
3144 ip route add 224.0.0.0/4 dev $dev scope global
3145fi
3146\end{verbatim}
3147
3148\begin{flushleft}
3149\# {\bf Step 6} --- Add fallback default route with huge metric.\\
3150\# If a proxy ARP server is present on the interface, we will be\\
3151\# able to talk to all the Internet without further configuration.\\
3152\# It is not so cheap though and we still hope that this route\\
3153\# will be overridden by more correct one by rdisc.\\
3154\# Do not make this step if the device is not ARPable,\\
3155\# because dead nexthop detection does not work on them.
3156\end{flushleft}
3157\begin{verbatim}
3158if [ "$noarp" = "0" ]; then
3159 ip ro add default dev $dev metric 30000 scope global
3160fi
3161\end{verbatim}
3162
3163\begin{flushleft}
3164\# {\bf Step 7} --- Restart router discovery and exit.
3165\end{flushleft}
3166\begin{verbatim}
3167killall -HUP rdisc || rdisc -fs
3168exit 0
3169\end{verbatim}
3170
3171
3172\section{Example: {\protect\tt ifcfg} --- interface address management}
3173\label{EXAMPLE-IFCFG}
3174
3175This is a simplistic script replacing one option of \verb|ifconfig|,
3176namely, IP address management. It not only adds
3177addresses, but also carries out Duplicate Address Detection~\cite{RFC-DHCP},
3178sends unsolicited ARP to update the caches of other hosts sharing
3179the interface, adds some control routes and restarts Router Discovery
3180when it is necessary.
3181
3182I strongly recommend using it {\em instead\/} of \verb|ifconfig| both
3183on hosts and on routers.
3184
3185\begin{verbatim}
3186#! /bin/bash
3187\end{verbatim}
3188\begin{flushleft}
3189\# {\bf Usage: \verb?ifcfg DEVICE[:ALIAS] [add|del] ADDRESS[/LENGTH] [PEER]?}\\
3190\# {\bf Parameters:}\\
3191\# ---Device name. It may have alias suffix, separated by colon.\\
3192\# ---Command: add, delete or stop.\\
3193\# ---IP address, optionally followed by prefix length.\\
3194\# ---Optional peer address for pointopoint interfaces.\\
3195\# F.e. \verb|ifcfg eth0 193.233.7.90/24|
3196
3197\noindent\# This function determines, whether it is router or host.\\
3198\# It returns 0, if the host is apparently not router.
3199\end{flushleft}
3200\begin{verbatim}
3201CheckForwarding () {
3202 local sbase fwd
3203 sbase=/proc/sys/net/ipv4/conf
3204 fwd=0
3205 if [ -d $sbase ]; then
3206 for dir in $sbase/*/forwarding; do
3207 fwd=$[$fwd + `cat $dir`]
3208 done
3209 else
3210 fwd=2
3211 fi
3212 return $fwd
3213}
3214\end{verbatim}
3215\begin{flushleft}
3216\# This function restarts Router Discovery.\\
3217\end{flushleft}
3218\begin{verbatim}
3219RestartRDISC () {
3220 killall -HUP rdisc || rdisc -fs
3221}
3222\end{verbatim}
3223\begin{flushleft}
3224\# Calculate ABC "natural" mask length\\
3225\# Arg: \$1 = dotquad address
3226\end{flushleft}
3227\begin{verbatim}
3228ABCMaskLen () {
3229 local class;
3230 class=${1%%.*}
3231 if [ $class -eq 0 -o $class -ge 224 ]; then return 0
3232 elif [ $class -ge 192 ]; then return 24
3233 elif [ $class -ge 128 ]; then return 16
3234 else return 8 ; fi
3235}
3236\end{verbatim}
3237
3238
3239\begin{flushleft}
3240\# {\bf MAIN()}\\
3241\#\\
3242\# Strip alias suffix separated by colon.
3243\end{flushleft}
3244\begin{verbatim}
3245label="label $1"
3246ldev=$1
3247dev=${1%:*}
3248if [ "$dev" = "" -o "$1" = "help" ]; then
3249 echo "Usage: ifcfg DEV [[add|del [ADDR[/LEN]] [PEER] | stop]" 1>&2
3250 echo " add - add new address" 1>&2
3251 echo " del - delete address" 1>&2
3252 echo " stop - completely disable IP" 1>&2
3253 exit 1
3254fi
3255shift
3256
3257CheckForwarding
3258fwd=$?
3259\end{verbatim}
3260\begin{flushleft}
3261\# Parse command. If it is ``stop'', flush and exit.
3262\end{flushleft}
3263\begin{verbatim}
3264deleting=0
3265case "$1" in
3266add) shift ;;
3267stop)
3268 if [ "$ldev" != "$dev" ]; then
3269 echo "Cannot stop alias $ldev" 1>&2
3270 exit 1;
3271 fi
3272 ip -4 addr flush dev $dev $label || exit 1
3273 if [ $fwd -eq 0 ]; then RestartRDISC; fi
3274 exit 0 ;;
3275del*)
3276 deleting=1; shift ;;
3277*)
3278esac
3279\end{verbatim}
3280\begin{flushleft}
3281\# Parse prefix, split prefix length, separated by slash.
3282\end{flushleft}
3283\begin{verbatim}
3284ipaddr=
3285pfxlen=
3286if [ "$1" != "" ]; then
3287 ipaddr=${1%/*}
3288 if [ "$1" != "$ipaddr" ]; then
3289 pfxlen=${1#*/}
3290 fi
3291 if [ "$ipaddr" = "" ]; then
3292 echo "$1 is bad IP address." 1>&2
3293 exit 1
3294 fi
3295fi
3296shift
3297\end{verbatim}
3298\begin{flushleft}
3299\# If peer address is present, prefix length is 32.\\
3300\# Otherwise, if prefix length was not given, guess it.
3301\end{flushleft}
3302\begin{verbatim}
3303peer=$1
3304if [ "$peer" != "" ]; then
3305 if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
3306 echo "Peer address with non-trivial netmask." 1>&2
3307 exit 1
3308 fi
3309 pfx="$ipaddr peer $peer"
3310else
3311 if [ "$pfxlen" = "" ]; then
3312 ABCMaskLen $ipaddr
3313 pfxlen=$?
3314 fi
3315 pfx="$ipaddr/$pfxlen"
3316fi
3317if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
3318 label=
3319fi
3320\end{verbatim}
3321\begin{flushleft}
3322\# If deletion was requested, delete the address and restart RDISC
3323\end{flushleft}
3324\begin{verbatim}
3325if [ $deleting -ne 0 ]; then
3326 ip addr del $pfx dev $dev $label || exit 1
3327 if [ $fwd -eq 0 ]; then RestartRDISC; fi
3328 exit 0
3329fi
3330\end{verbatim}
3331\begin{flushleft}
3332\# Start interface initialization.\\
3333\#\\
3334\# {\bf Step 0} --- enable device \verb|$dev|
3335\end{flushleft}
3336\begin{verbatim}
3337if ! ip link set up dev $dev ; then
3338 echo "Error: cannot enable interface $dev." 1>&2
3339 exit 1
3340fi
3341if [ "$ipaddr" = "" ]; then exit 0; fi
3342\end{verbatim}
3343\begin{flushleft}
3344\# {\bf Step 1} --- IP Duplicate Address Detection~\cite{RFC-DHCP}.\\
3345\# Send two probes and wait for result for 3 seconds.\\
3346\# If the interface opens slower f.e.\ due to long media detection,\\
3347\# you want to increase the timeout.\\
3348\end{flushleft}
3349\begin{verbatim}
3350if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
3351 echo "Error: some host already uses address $ipaddr on $dev." 1>&2
3352 exit 1
3353fi
3354\end{verbatim}
3355\begin{flushleft}
3356\# OK, the address is unique. We may add it to the interface.\\
3357\#\\
3358\# {\bf Step 2} --- Configure the address on the interface.
3359\end{flushleft}
3360\begin{verbatim}
3361if ! ip address add $pfx brd + dev $dev $label; then
3362 echo "Error: failed to add $pfx on $dev." 1>&2
3363 exit 1
3364fi
3365\end{verbatim}
3366\noindent\# {\bf Step 3} --- Announce our presence on the link
3367\begin{verbatim}
3368arping -q -A -c 1 -I $dev $ipaddr
3369noarp=$?
3370( sleep 2 ;
3371 arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
3372\end{verbatim}
3373\begin{flushleft}
3374\# {\bf Step 4} (optional) --- Add some control routes.\\
3375\#\\
3376\# 1. Prohibit link local multicast addresses.\\
3377\# 2. Prohibit link local (alias, limited) broadcast.\\
3378\# 3. Add default multicast route.
3379\end{flushleft}
3380\begin{verbatim}
3381ip route add unreachable 224.0.0.0/24 >& /dev/null
3382ip route add unreachable 255.255.255.255 >& /dev/null
3383if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
3384 ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
3385fi
3386\end{verbatim}
3387\begin{flushleft}
3388\# {\bf Step 5} --- Add fallback default route with huge metric.\\
3389\# If a proxy ARP server is present on the interface, we will be\\
3390\# able to talk to all the Internet without further configuration.\\
3391\# Do not make this step on router or if the device is not ARPable.\\
3392\# because dead nexthop detection does not work on them.
3393\end{flushleft}
3394\begin{verbatim}
3395if [ $fwd -eq 0 ]; then
3396 if [ $noarp -eq 0 ]; then
3397 ip ro append default dev $dev metric 30000 scope global
3398 elif [ "$peer" != "" ]; then
3399 if ping -q -c 2 -w 4 $peer ; then
3400 ip ro append default via $peer dev $dev metric 30001
3401 fi
3402 fi
3403 RestartRDISC
3404fi
3405
3406exit 0
3407\end{verbatim}
3408\begin{flushleft}
3409\# End of {\bf MAIN()}
3410\end{flushleft}
3411
3412
3413\end{document}