Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
| 3 | * operating system. INET is implemented using the BSD Socket |
| 4 | * interface as the means of communication with the user level. |
| 5 | * |
| 6 | * IPv4 Forwarding Information Base: policy rules. |
| 7 | * |
| 8 | * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $ |
| 9 | * |
| 10 | * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> |
| 11 | * |
| 12 | * This program is free software; you can redistribute it and/or |
| 13 | * modify it under the terms of the GNU General Public License |
| 14 | * as published by the Free Software Foundation; either version |
| 15 | * 2 of the License, or (at your option) any later version. |
| 16 | * |
| 17 | * Fixes: |
| 18 | * Rani Assaf : local_rule cannot be deleted |
| 19 | * Marc Boucher : routing by fwmark |
| 20 | */ |
| 21 | |
| 22 | #include <linux/config.h> |
| 23 | #include <asm/uaccess.h> |
| 24 | #include <asm/system.h> |
| 25 | #include <linux/bitops.h> |
| 26 | #include <linux/types.h> |
| 27 | #include <linux/kernel.h> |
| 28 | #include <linux/sched.h> |
| 29 | #include <linux/mm.h> |
| 30 | #include <linux/string.h> |
| 31 | #include <linux/socket.h> |
| 32 | #include <linux/sockios.h> |
| 33 | #include <linux/errno.h> |
| 34 | #include <linux/in.h> |
| 35 | #include <linux/inet.h> |
| 36 | #include <linux/netdevice.h> |
| 37 | #include <linux/if_arp.h> |
| 38 | #include <linux/proc_fs.h> |
| 39 | #include <linux/skbuff.h> |
| 40 | #include <linux/netlink.h> |
| 41 | #include <linux/init.h> |
| 42 | |
| 43 | #include <net/ip.h> |
| 44 | #include <net/protocol.h> |
| 45 | #include <net/route.h> |
| 46 | #include <net/tcp.h> |
| 47 | #include <net/sock.h> |
| 48 | #include <net/ip_fib.h> |
| 49 | |
| 50 | #define FRprintk(a...) |
| 51 | |
| 52 | struct fib_rule |
| 53 | { |
| 54 | struct fib_rule *r_next; |
| 55 | atomic_t r_clntref; |
| 56 | u32 r_preference; |
| 57 | unsigned char r_table; |
| 58 | unsigned char r_action; |
| 59 | unsigned char r_dst_len; |
| 60 | unsigned char r_src_len; |
| 61 | u32 r_src; |
| 62 | u32 r_srcmask; |
| 63 | u32 r_dst; |
| 64 | u32 r_dstmask; |
| 65 | u32 r_srcmap; |
| 66 | u8 r_flags; |
| 67 | u8 r_tos; |
| 68 | #ifdef CONFIG_IP_ROUTE_FWMARK |
| 69 | u32 r_fwmark; |
| 70 | #endif |
| 71 | int r_ifindex; |
| 72 | #ifdef CONFIG_NET_CLS_ROUTE |
| 73 | __u32 r_tclassid; |
| 74 | #endif |
| 75 | char r_ifname[IFNAMSIZ]; |
| 76 | int r_dead; |
| 77 | }; |
| 78 | |
| 79 | static struct fib_rule default_rule = { |
| 80 | .r_clntref = ATOMIC_INIT(2), |
| 81 | .r_preference = 0x7FFF, |
| 82 | .r_table = RT_TABLE_DEFAULT, |
| 83 | .r_action = RTN_UNICAST, |
| 84 | }; |
| 85 | |
| 86 | static struct fib_rule main_rule = { |
| 87 | .r_next = &default_rule, |
| 88 | .r_clntref = ATOMIC_INIT(2), |
| 89 | .r_preference = 0x7FFE, |
| 90 | .r_table = RT_TABLE_MAIN, |
| 91 | .r_action = RTN_UNICAST, |
| 92 | }; |
| 93 | |
| 94 | static struct fib_rule local_rule = { |
| 95 | .r_next = &main_rule, |
| 96 | .r_clntref = ATOMIC_INIT(2), |
| 97 | .r_table = RT_TABLE_LOCAL, |
| 98 | .r_action = RTN_UNICAST, |
| 99 | }; |
| 100 | |
| 101 | static struct fib_rule *fib_rules = &local_rule; |
| 102 | static DEFINE_RWLOCK(fib_rules_lock); |
| 103 | |
| 104 | int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
| 105 | { |
| 106 | struct rtattr **rta = arg; |
| 107 | struct rtmsg *rtm = NLMSG_DATA(nlh); |
| 108 | struct fib_rule *r, **rp; |
| 109 | int err = -ESRCH; |
| 110 | |
| 111 | for (rp=&fib_rules; (r=*rp) != NULL; rp=&r->r_next) { |
| 112 | if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && |
| 113 | rtm->rtm_src_len == r->r_src_len && |
| 114 | rtm->rtm_dst_len == r->r_dst_len && |
| 115 | (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) && |
| 116 | rtm->rtm_tos == r->r_tos && |
| 117 | #ifdef CONFIG_IP_ROUTE_FWMARK |
| 118 | (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && |
| 119 | #endif |
| 120 | (!rtm->rtm_type || rtm->rtm_type == r->r_action) && |
| 121 | (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && |
| 122 | (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && |
| 123 | (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { |
| 124 | err = -EPERM; |
| 125 | if (r == &local_rule) |
| 126 | break; |
| 127 | |
| 128 | write_lock_bh(&fib_rules_lock); |
| 129 | *rp = r->r_next; |
| 130 | r->r_dead = 1; |
| 131 | write_unlock_bh(&fib_rules_lock); |
| 132 | fib_rule_put(r); |
| 133 | err = 0; |
| 134 | break; |
| 135 | } |
| 136 | } |
| 137 | return err; |
| 138 | } |
| 139 | |
| 140 | /* Allocate new unique table id */ |
| 141 | |
| 142 | static struct fib_table *fib_empty_table(void) |
| 143 | { |
| 144 | int id; |
| 145 | |
| 146 | for (id = 1; id <= RT_TABLE_MAX; id++) |
| 147 | if (fib_tables[id] == NULL) |
| 148 | return __fib_new_table(id); |
| 149 | return NULL; |
| 150 | } |
| 151 | |
| 152 | void fib_rule_put(struct fib_rule *r) |
| 153 | { |
| 154 | if (atomic_dec_and_test(&r->r_clntref)) { |
| 155 | if (r->r_dead) |
| 156 | kfree(r); |
| 157 | else |
| 158 | printk("Freeing alive rule %p\n", r); |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) |
| 163 | { |
| 164 | struct rtattr **rta = arg; |
| 165 | struct rtmsg *rtm = NLMSG_DATA(nlh); |
| 166 | struct fib_rule *r, *new_r, **rp; |
| 167 | unsigned char table_id; |
| 168 | |
| 169 | if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || |
| 170 | (rtm->rtm_tos & ~IPTOS_TOS_MASK)) |
| 171 | return -EINVAL; |
| 172 | |
| 173 | if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) |
| 174 | return -EINVAL; |
| 175 | |
| 176 | table_id = rtm->rtm_table; |
| 177 | if (table_id == RT_TABLE_UNSPEC) { |
| 178 | struct fib_table *table; |
| 179 | if (rtm->rtm_type == RTN_UNICAST) { |
| 180 | if ((table = fib_empty_table()) == NULL) |
| 181 | return -ENOBUFS; |
| 182 | table_id = table->tb_id; |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | new_r = kmalloc(sizeof(*new_r), GFP_KERNEL); |
| 187 | if (!new_r) |
| 188 | return -ENOMEM; |
| 189 | memset(new_r, 0, sizeof(*new_r)); |
| 190 | if (rta[RTA_SRC-1]) |
| 191 | memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); |
| 192 | if (rta[RTA_DST-1]) |
| 193 | memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4); |
| 194 | if (rta[RTA_GATEWAY-1]) |
| 195 | memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4); |
| 196 | new_r->r_src_len = rtm->rtm_src_len; |
| 197 | new_r->r_dst_len = rtm->rtm_dst_len; |
| 198 | new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); |
| 199 | new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); |
| 200 | new_r->r_tos = rtm->rtm_tos; |
| 201 | #ifdef CONFIG_IP_ROUTE_FWMARK |
| 202 | if (rta[RTA_PROTOINFO-1]) |
| 203 | memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); |
| 204 | #endif |
| 205 | new_r->r_action = rtm->rtm_type; |
| 206 | new_r->r_flags = rtm->rtm_flags; |
| 207 | if (rta[RTA_PRIORITY-1]) |
| 208 | memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); |
| 209 | new_r->r_table = table_id; |
| 210 | if (rta[RTA_IIF-1]) { |
| 211 | struct net_device *dev; |
| 212 | rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); |
| 213 | new_r->r_ifindex = -1; |
| 214 | dev = __dev_get_by_name(new_r->r_ifname); |
| 215 | if (dev) |
| 216 | new_r->r_ifindex = dev->ifindex; |
| 217 | } |
| 218 | #ifdef CONFIG_NET_CLS_ROUTE |
| 219 | if (rta[RTA_FLOW-1]) |
| 220 | memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); |
| 221 | #endif |
| 222 | |
| 223 | rp = &fib_rules; |
| 224 | if (!new_r->r_preference) { |
| 225 | r = fib_rules; |
| 226 | if (r && (r = r->r_next) != NULL) { |
| 227 | rp = &fib_rules->r_next; |
| 228 | if (r->r_preference) |
| 229 | new_r->r_preference = r->r_preference - 1; |
| 230 | } |
| 231 | } |
| 232 | |
| 233 | while ( (r = *rp) != NULL ) { |
| 234 | if (r->r_preference > new_r->r_preference) |
| 235 | break; |
| 236 | rp = &r->r_next; |
| 237 | } |
| 238 | |
| 239 | new_r->r_next = r; |
| 240 | atomic_inc(&new_r->r_clntref); |
| 241 | write_lock_bh(&fib_rules_lock); |
| 242 | *rp = new_r; |
| 243 | write_unlock_bh(&fib_rules_lock); |
| 244 | return 0; |
| 245 | } |
| 246 | |
| 247 | #ifdef CONFIG_NET_CLS_ROUTE |
| 248 | u32 fib_rules_tclass(struct fib_result *res) |
| 249 | { |
| 250 | if (res->r) |
| 251 | return res->r->r_tclassid; |
| 252 | return 0; |
| 253 | } |
| 254 | #endif |
| 255 | |
| 256 | |
| 257 | static void fib_rules_detach(struct net_device *dev) |
| 258 | { |
| 259 | struct fib_rule *r; |
| 260 | |
| 261 | for (r=fib_rules; r; r=r->r_next) { |
| 262 | if (r->r_ifindex == dev->ifindex) { |
| 263 | write_lock_bh(&fib_rules_lock); |
| 264 | r->r_ifindex = -1; |
| 265 | write_unlock_bh(&fib_rules_lock); |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | static void fib_rules_attach(struct net_device *dev) |
| 271 | { |
| 272 | struct fib_rule *r; |
| 273 | |
| 274 | for (r=fib_rules; r; r=r->r_next) { |
| 275 | if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) { |
| 276 | write_lock_bh(&fib_rules_lock); |
| 277 | r->r_ifindex = dev->ifindex; |
| 278 | write_unlock_bh(&fib_rules_lock); |
| 279 | } |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | int fib_lookup(const struct flowi *flp, struct fib_result *res) |
| 284 | { |
| 285 | int err; |
| 286 | struct fib_rule *r, *policy; |
| 287 | struct fib_table *tb; |
| 288 | |
| 289 | u32 daddr = flp->fl4_dst; |
| 290 | u32 saddr = flp->fl4_src; |
| 291 | |
| 292 | FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", |
| 293 | NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); |
| 294 | read_lock(&fib_rules_lock); |
| 295 | for (r = fib_rules; r; r=r->r_next) { |
| 296 | if (((saddr^r->r_src) & r->r_srcmask) || |
| 297 | ((daddr^r->r_dst) & r->r_dstmask) || |
| 298 | (r->r_tos && r->r_tos != flp->fl4_tos) || |
| 299 | #ifdef CONFIG_IP_ROUTE_FWMARK |
| 300 | (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || |
| 301 | #endif |
| 302 | (r->r_ifindex && r->r_ifindex != flp->iif)) |
| 303 | continue; |
| 304 | |
| 305 | FRprintk("tb %d r %d ", r->r_table, r->r_action); |
| 306 | switch (r->r_action) { |
| 307 | case RTN_UNICAST: |
| 308 | policy = r; |
| 309 | break; |
| 310 | case RTN_UNREACHABLE: |
| 311 | read_unlock(&fib_rules_lock); |
| 312 | return -ENETUNREACH; |
| 313 | default: |
| 314 | case RTN_BLACKHOLE: |
| 315 | read_unlock(&fib_rules_lock); |
| 316 | return -EINVAL; |
| 317 | case RTN_PROHIBIT: |
| 318 | read_unlock(&fib_rules_lock); |
| 319 | return -EACCES; |
| 320 | } |
| 321 | |
| 322 | if ((tb = fib_get_table(r->r_table)) == NULL) |
| 323 | continue; |
| 324 | err = tb->tb_lookup(tb, flp, res); |
| 325 | if (err == 0) { |
| 326 | res->r = policy; |
| 327 | if (policy) |
| 328 | atomic_inc(&policy->r_clntref); |
| 329 | read_unlock(&fib_rules_lock); |
| 330 | return 0; |
| 331 | } |
| 332 | if (err < 0 && err != -EAGAIN) { |
| 333 | read_unlock(&fib_rules_lock); |
| 334 | return err; |
| 335 | } |
| 336 | } |
| 337 | FRprintk("FAILURE\n"); |
| 338 | read_unlock(&fib_rules_lock); |
| 339 | return -ENETUNREACH; |
| 340 | } |
| 341 | |
| 342 | void fib_select_default(const struct flowi *flp, struct fib_result *res) |
| 343 | { |
| 344 | if (res->r && res->r->r_action == RTN_UNICAST && |
| 345 | FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { |
| 346 | struct fib_table *tb; |
| 347 | if ((tb = fib_get_table(res->r->r_table)) != NULL) |
| 348 | tb->tb_select_default(tb, flp, res); |
| 349 | } |
| 350 | } |
| 351 | |
| 352 | static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) |
| 353 | { |
| 354 | struct net_device *dev = ptr; |
| 355 | |
| 356 | if (event == NETDEV_UNREGISTER) |
| 357 | fib_rules_detach(dev); |
| 358 | else if (event == NETDEV_REGISTER) |
| 359 | fib_rules_attach(dev); |
| 360 | return NOTIFY_DONE; |
| 361 | } |
| 362 | |
| 363 | |
| 364 | static struct notifier_block fib_rules_notifier = { |
| 365 | .notifier_call =fib_rules_event, |
| 366 | }; |
| 367 | |
| 368 | static __inline__ int inet_fill_rule(struct sk_buff *skb, |
| 369 | struct fib_rule *r, |
Jamal Hadi Salim | b6544c0 | 2005-06-18 22:54:12 -0700 | [diff] [blame] | 370 | struct netlink_callback *cb, |
| 371 | unsigned int flags) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 372 | { |
| 373 | struct rtmsg *rtm; |
| 374 | struct nlmsghdr *nlh; |
| 375 | unsigned char *b = skb->tail; |
| 376 | |
Jamal Hadi Salim | b6544c0 | 2005-06-18 22:54:12 -0700 | [diff] [blame] | 377 | nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 378 | rtm = NLMSG_DATA(nlh); |
| 379 | rtm->rtm_family = AF_INET; |
| 380 | rtm->rtm_dst_len = r->r_dst_len; |
| 381 | rtm->rtm_src_len = r->r_src_len; |
| 382 | rtm->rtm_tos = r->r_tos; |
| 383 | #ifdef CONFIG_IP_ROUTE_FWMARK |
| 384 | if (r->r_fwmark) |
| 385 | RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); |
| 386 | #endif |
| 387 | rtm->rtm_table = r->r_table; |
| 388 | rtm->rtm_protocol = 0; |
| 389 | rtm->rtm_scope = 0; |
| 390 | rtm->rtm_type = r->r_action; |
| 391 | rtm->rtm_flags = r->r_flags; |
| 392 | |
| 393 | if (r->r_dst_len) |
| 394 | RTA_PUT(skb, RTA_DST, 4, &r->r_dst); |
| 395 | if (r->r_src_len) |
| 396 | RTA_PUT(skb, RTA_SRC, 4, &r->r_src); |
| 397 | if (r->r_ifname[0]) |
| 398 | RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); |
| 399 | if (r->r_preference) |
| 400 | RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); |
| 401 | if (r->r_srcmap) |
| 402 | RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); |
| 403 | #ifdef CONFIG_NET_CLS_ROUTE |
| 404 | if (r->r_tclassid) |
| 405 | RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); |
| 406 | #endif |
| 407 | nlh->nlmsg_len = skb->tail - b; |
| 408 | return skb->len; |
| 409 | |
| 410 | nlmsg_failure: |
| 411 | rtattr_failure: |
| 412 | skb_trim(skb, b - skb->data); |
| 413 | return -1; |
| 414 | } |
| 415 | |
| 416 | int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) |
| 417 | { |
| 418 | int idx; |
| 419 | int s_idx = cb->args[0]; |
| 420 | struct fib_rule *r; |
| 421 | |
| 422 | read_lock(&fib_rules_lock); |
| 423 | for (r=fib_rules, idx=0; r; r = r->r_next, idx++) { |
| 424 | if (idx < s_idx) |
| 425 | continue; |
Jamal Hadi Salim | b6544c0 | 2005-06-18 22:54:12 -0700 | [diff] [blame] | 426 | if (inet_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 427 | break; |
| 428 | } |
| 429 | read_unlock(&fib_rules_lock); |
| 430 | cb->args[0] = idx; |
| 431 | |
| 432 | return skb->len; |
| 433 | } |
| 434 | |
| 435 | void __init fib_rules_init(void) |
| 436 | { |
| 437 | register_netdevice_notifier(&fib_rules_notifier); |
| 438 | } |