Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * net/sched/cls_tcindex.c Packet classifier for skb->tc_index |
| 3 | * |
| 4 | * Written 1998,1999 by Werner Almesberger, EPFL ICA |
| 5 | */ |
| 6 | |
| 7 | #include <linux/config.h> |
| 8 | #include <linux/module.h> |
| 9 | #include <linux/types.h> |
| 10 | #include <linux/kernel.h> |
| 11 | #include <linux/skbuff.h> |
| 12 | #include <linux/errno.h> |
| 13 | #include <linux/netdevice.h> |
| 14 | #include <net/ip.h> |
| 15 | #include <net/act_api.h> |
| 16 | #include <net/pkt_cls.h> |
| 17 | #include <net/route.h> |
| 18 | |
| 19 | |
| 20 | /* |
| 21 | * Not quite sure if we need all the xchgs Alexey uses when accessing things. |
| 22 | * Can always add them later ... :) |
| 23 | */ |
| 24 | |
| 25 | /* |
| 26 | * Passing parameters to the root seems to be done more awkwardly than really |
| 27 | * necessary. At least, u32 doesn't seem to use such dirty hacks. To be |
| 28 | * verified. FIXME. |
| 29 | */ |
| 30 | |
| 31 | #define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */ |
| 32 | #define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */ |
| 33 | |
| 34 | |
| 35 | #if 1 /* control */ |
| 36 | #define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) |
| 37 | #else |
| 38 | #define DPRINTK(format,args...) |
| 39 | #endif |
| 40 | |
| 41 | #if 0 /* data */ |
| 42 | #define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) |
| 43 | #else |
| 44 | #define D2PRINTK(format,args...) |
| 45 | #endif |
| 46 | |
| 47 | |
| 48 | #define PRIV(tp) ((struct tcindex_data *) (tp)->root) |
| 49 | |
| 50 | |
| 51 | struct tcindex_filter_result { |
| 52 | struct tcf_exts exts; |
| 53 | struct tcf_result res; |
| 54 | }; |
| 55 | |
| 56 | struct tcindex_filter { |
| 57 | u16 key; |
| 58 | struct tcindex_filter_result result; |
| 59 | struct tcindex_filter *next; |
| 60 | }; |
| 61 | |
| 62 | |
| 63 | struct tcindex_data { |
| 64 | struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */ |
| 65 | struct tcindex_filter **h; /* imperfect hash; only used if !perfect; |
| 66 | NULL if unused */ |
| 67 | u16 mask; /* AND key with mask */ |
| 68 | int shift; /* shift ANDed key to the right */ |
| 69 | int hash; /* hash table size; 0 if undefined */ |
| 70 | int alloc_hash; /* allocated size */ |
| 71 | int fall_through; /* 0: only classify if explicit match */ |
| 72 | }; |
| 73 | |
| 74 | static struct tcf_ext_map tcindex_ext_map = { |
| 75 | .police = TCA_TCINDEX_POLICE, |
| 76 | .action = TCA_TCINDEX_ACT |
| 77 | }; |
| 78 | |
| 79 | static inline int |
| 80 | tcindex_filter_is_set(struct tcindex_filter_result *r) |
| 81 | { |
| 82 | return tcf_exts_is_predicative(&r->exts) || r->res.classid; |
| 83 | } |
| 84 | |
| 85 | static struct tcindex_filter_result * |
| 86 | tcindex_lookup(struct tcindex_data *p, u16 key) |
| 87 | { |
| 88 | struct tcindex_filter *f; |
| 89 | |
| 90 | if (p->perfect) |
| 91 | return tcindex_filter_is_set(p->perfect + key) ? |
| 92 | p->perfect + key : NULL; |
| 93 | else if (p->h) { |
| 94 | for (f = p->h[key % p->hash]; f; f = f->next) |
| 95 | if (f->key == key) |
| 96 | return &f->result; |
| 97 | } |
| 98 | |
| 99 | return NULL; |
| 100 | } |
| 101 | |
| 102 | |
| 103 | static int tcindex_classify(struct sk_buff *skb, struct tcf_proto *tp, |
| 104 | struct tcf_result *res) |
| 105 | { |
| 106 | struct tcindex_data *p = PRIV(tp); |
| 107 | struct tcindex_filter_result *f; |
| 108 | int key = (skb->tc_index & p->mask) >> p->shift; |
| 109 | |
| 110 | D2PRINTK("tcindex_classify(skb %p,tp %p,res %p),p %p\n",skb,tp,res,p); |
| 111 | |
| 112 | f = tcindex_lookup(p, key); |
| 113 | if (!f) { |
| 114 | if (!p->fall_through) |
| 115 | return -1; |
| 116 | res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); |
| 117 | res->class = 0; |
| 118 | D2PRINTK("alg 0x%x\n",res->classid); |
| 119 | return 0; |
| 120 | } |
| 121 | *res = f->res; |
| 122 | D2PRINTK("map 0x%x\n",res->classid); |
| 123 | |
| 124 | return tcf_exts_exec(skb, &f->exts, res); |
| 125 | } |
| 126 | |
| 127 | |
| 128 | static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) |
| 129 | { |
| 130 | struct tcindex_data *p = PRIV(tp); |
| 131 | struct tcindex_filter_result *r; |
| 132 | |
| 133 | DPRINTK("tcindex_get(tp %p,handle 0x%08x)\n",tp,handle); |
| 134 | if (p->perfect && handle >= p->alloc_hash) |
| 135 | return 0; |
| 136 | r = tcindex_lookup(p, handle); |
| 137 | return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL; |
| 138 | } |
| 139 | |
| 140 | |
| 141 | static void tcindex_put(struct tcf_proto *tp, unsigned long f) |
| 142 | { |
| 143 | DPRINTK("tcindex_put(tp %p,f 0x%lx)\n",tp,f); |
| 144 | } |
| 145 | |
| 146 | |
| 147 | static int tcindex_init(struct tcf_proto *tp) |
| 148 | { |
| 149 | struct tcindex_data *p; |
| 150 | |
| 151 | DPRINTK("tcindex_init(tp %p)\n",tp); |
| 152 | p = kmalloc(sizeof(struct tcindex_data),GFP_KERNEL); |
| 153 | if (!p) |
| 154 | return -ENOMEM; |
| 155 | |
| 156 | memset(p, 0, sizeof(*p)); |
| 157 | p->mask = 0xffff; |
| 158 | p->hash = DEFAULT_HASH_SIZE; |
| 159 | p->fall_through = 1; |
| 160 | |
| 161 | tp->root = p; |
| 162 | return 0; |
| 163 | } |
| 164 | |
| 165 | |
| 166 | static int |
| 167 | __tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock) |
| 168 | { |
| 169 | struct tcindex_data *p = PRIV(tp); |
| 170 | struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; |
| 171 | struct tcindex_filter *f = NULL; |
| 172 | |
| 173 | DPRINTK("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n",tp,arg,p,f); |
| 174 | if (p->perfect) { |
| 175 | if (!r->res.class) |
| 176 | return -ENOENT; |
| 177 | } else { |
| 178 | int i; |
| 179 | struct tcindex_filter **walk = NULL; |
| 180 | |
| 181 | for (i = 0; i < p->hash; i++) |
| 182 | for (walk = p->h+i; *walk; walk = &(*walk)->next) |
| 183 | if (&(*walk)->result == r) |
| 184 | goto found; |
| 185 | return -ENOENT; |
| 186 | |
| 187 | found: |
| 188 | f = *walk; |
| 189 | if (lock) |
| 190 | tcf_tree_lock(tp); |
| 191 | *walk = f->next; |
| 192 | if (lock) |
| 193 | tcf_tree_unlock(tp); |
| 194 | } |
| 195 | tcf_unbind_filter(tp, &r->res); |
| 196 | tcf_exts_destroy(tp, &r->exts); |
| 197 | if (f) |
| 198 | kfree(f); |
| 199 | return 0; |
| 200 | } |
| 201 | |
| 202 | static int tcindex_delete(struct tcf_proto *tp, unsigned long arg) |
| 203 | { |
| 204 | return __tcindex_delete(tp, arg, 1); |
| 205 | } |
| 206 | |
| 207 | static inline int |
| 208 | valid_perfect_hash(struct tcindex_data *p) |
| 209 | { |
| 210 | return p->hash > (p->mask >> p->shift); |
| 211 | } |
| 212 | |
| 213 | static int |
| 214 | tcindex_set_parms(struct tcf_proto *tp, unsigned long base, u32 handle, |
| 215 | struct tcindex_data *p, struct tcindex_filter_result *r, |
| 216 | struct rtattr **tb, struct rtattr *est) |
| 217 | { |
| 218 | int err, balloc = 0; |
| 219 | struct tcindex_filter_result new_filter_result, *old_r = r; |
| 220 | struct tcindex_filter_result cr; |
| 221 | struct tcindex_data cp; |
| 222 | struct tcindex_filter *f = NULL; /* make gcc behave */ |
| 223 | struct tcf_exts e; |
| 224 | |
| 225 | err = tcf_exts_validate(tp, tb, est, &e, &tcindex_ext_map); |
| 226 | if (err < 0) |
| 227 | return err; |
| 228 | |
| 229 | memcpy(&cp, p, sizeof(cp)); |
| 230 | memset(&new_filter_result, 0, sizeof(new_filter_result)); |
| 231 | |
| 232 | if (old_r) |
| 233 | memcpy(&cr, r, sizeof(cr)); |
| 234 | else |
| 235 | memset(&cr, 0, sizeof(cr)); |
| 236 | |
| 237 | err = -EINVAL; |
| 238 | if (tb[TCA_TCINDEX_HASH-1]) { |
| 239 | if (RTA_PAYLOAD(tb[TCA_TCINDEX_HASH-1]) < sizeof(u32)) |
| 240 | goto errout; |
| 241 | cp.hash = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_HASH-1]); |
| 242 | } |
| 243 | |
| 244 | if (tb[TCA_TCINDEX_MASK-1]) { |
| 245 | if (RTA_PAYLOAD(tb[TCA_TCINDEX_MASK-1]) < sizeof(u16)) |
| 246 | goto errout; |
| 247 | cp.mask = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_MASK-1]); |
| 248 | } |
| 249 | |
| 250 | if (tb[TCA_TCINDEX_SHIFT-1]) { |
| 251 | if (RTA_PAYLOAD(tb[TCA_TCINDEX_SHIFT-1]) < sizeof(u16)) |
| 252 | goto errout; |
| 253 | cp.shift = *(u16 *) RTA_DATA(tb[TCA_TCINDEX_SHIFT-1]); |
| 254 | } |
| 255 | |
| 256 | err = -EBUSY; |
| 257 | /* Hash already allocated, make sure that we still meet the |
| 258 | * requirements for the allocated hash. |
| 259 | */ |
| 260 | if (cp.perfect) { |
| 261 | if (!valid_perfect_hash(&cp) || |
| 262 | cp.hash > cp.alloc_hash) |
| 263 | goto errout; |
| 264 | } else if (cp.h && cp.hash != cp.alloc_hash) |
| 265 | goto errout; |
| 266 | |
| 267 | err = -EINVAL; |
| 268 | if (tb[TCA_TCINDEX_FALL_THROUGH-1]) { |
| 269 | if (RTA_PAYLOAD(tb[TCA_TCINDEX_FALL_THROUGH-1]) < sizeof(u32)) |
| 270 | goto errout; |
| 271 | cp.fall_through = |
| 272 | *(u32 *) RTA_DATA(tb[TCA_TCINDEX_FALL_THROUGH-1]); |
| 273 | } |
| 274 | |
| 275 | if (!cp.hash) { |
| 276 | /* Hash not specified, use perfect hash if the upper limit |
| 277 | * of the hashing index is below the threshold. |
| 278 | */ |
| 279 | if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD) |
| 280 | cp.hash = (cp.mask >> cp.shift)+1; |
| 281 | else |
| 282 | cp.hash = DEFAULT_HASH_SIZE; |
| 283 | } |
| 284 | |
| 285 | if (!cp.perfect && !cp.h) |
| 286 | cp.alloc_hash = cp.hash; |
| 287 | |
| 288 | /* Note: this could be as restrictive as if (handle & ~(mask >> shift)) |
| 289 | * but then, we'd fail handles that may become valid after some future |
| 290 | * mask change. While this is extremely unlikely to ever matter, |
| 291 | * the check below is safer (and also more backwards-compatible). |
| 292 | */ |
| 293 | if (cp.perfect || valid_perfect_hash(&cp)) |
| 294 | if (handle >= cp.alloc_hash) |
| 295 | goto errout; |
| 296 | |
| 297 | |
| 298 | err = -ENOMEM; |
| 299 | if (!cp.perfect && !cp.h) { |
| 300 | if (valid_perfect_hash(&cp)) { |
| 301 | cp.perfect = kmalloc(cp.hash * sizeof(*r), GFP_KERNEL); |
| 302 | if (!cp.perfect) |
| 303 | goto errout; |
| 304 | memset(cp.perfect, 0, cp.hash * sizeof(*r)); |
| 305 | balloc = 1; |
| 306 | } else { |
| 307 | cp.h = kmalloc(cp.hash * sizeof(f), GFP_KERNEL); |
| 308 | if (!cp.h) |
| 309 | goto errout; |
| 310 | memset(cp.h, 0, cp.hash * sizeof(f)); |
| 311 | balloc = 2; |
| 312 | } |
| 313 | } |
| 314 | |
| 315 | if (cp.perfect) |
| 316 | r = cp.perfect + handle; |
| 317 | else |
| 318 | r = tcindex_lookup(&cp, handle) ? : &new_filter_result; |
| 319 | |
| 320 | if (r == &new_filter_result) { |
| 321 | f = kmalloc(sizeof(*f), GFP_KERNEL); |
| 322 | if (!f) |
| 323 | goto errout_alloc; |
| 324 | memset(f, 0, sizeof(*f)); |
| 325 | } |
| 326 | |
| 327 | if (tb[TCA_TCINDEX_CLASSID-1]) { |
| 328 | cr.res.classid = *(u32 *) RTA_DATA(tb[TCA_TCINDEX_CLASSID-1]); |
| 329 | tcf_bind_filter(tp, &cr.res, base); |
| 330 | } |
| 331 | |
| 332 | tcf_exts_change(tp, &cr.exts, &e); |
| 333 | |
| 334 | tcf_tree_lock(tp); |
| 335 | if (old_r && old_r != r) |
| 336 | memset(old_r, 0, sizeof(*old_r)); |
| 337 | |
| 338 | memcpy(p, &cp, sizeof(cp)); |
| 339 | memcpy(r, &cr, sizeof(cr)); |
| 340 | |
| 341 | if (r == &new_filter_result) { |
| 342 | struct tcindex_filter **fp; |
| 343 | |
| 344 | f->key = handle; |
| 345 | f->result = new_filter_result; |
| 346 | f->next = NULL; |
| 347 | for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next) |
| 348 | /* nothing */; |
| 349 | *fp = f; |
| 350 | } |
| 351 | tcf_tree_unlock(tp); |
| 352 | |
| 353 | return 0; |
| 354 | |
| 355 | errout_alloc: |
| 356 | if (balloc == 1) |
| 357 | kfree(cp.perfect); |
| 358 | else if (balloc == 2) |
| 359 | kfree(cp.h); |
| 360 | errout: |
| 361 | tcf_exts_destroy(tp, &e); |
| 362 | return err; |
| 363 | } |
| 364 | |
| 365 | static int |
| 366 | tcindex_change(struct tcf_proto *tp, unsigned long base, u32 handle, |
| 367 | struct rtattr **tca, unsigned long *arg) |
| 368 | { |
| 369 | struct rtattr *opt = tca[TCA_OPTIONS-1]; |
| 370 | struct rtattr *tb[TCA_TCINDEX_MAX]; |
| 371 | struct tcindex_data *p = PRIV(tp); |
| 372 | struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; |
| 373 | |
| 374 | DPRINTK("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," |
| 375 | "p %p,r %p,*arg 0x%lx\n", |
| 376 | tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L); |
| 377 | |
| 378 | if (!opt) |
| 379 | return 0; |
| 380 | |
| 381 | if (rtattr_parse_nested(tb, TCA_TCINDEX_MAX, opt) < 0) |
| 382 | return -EINVAL; |
| 383 | |
| 384 | return tcindex_set_parms(tp, base, handle, p, r, tb, tca[TCA_RATE-1]); |
| 385 | } |
| 386 | |
| 387 | |
| 388 | static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) |
| 389 | { |
| 390 | struct tcindex_data *p = PRIV(tp); |
| 391 | struct tcindex_filter *f,*next; |
| 392 | int i; |
| 393 | |
| 394 | DPRINTK("tcindex_walk(tp %p,walker %p),p %p\n",tp,walker,p); |
| 395 | if (p->perfect) { |
| 396 | for (i = 0; i < p->hash; i++) { |
| 397 | if (!p->perfect[i].res.class) |
| 398 | continue; |
| 399 | if (walker->count >= walker->skip) { |
| 400 | if (walker->fn(tp, |
| 401 | (unsigned long) (p->perfect+i), walker) |
| 402 | < 0) { |
| 403 | walker->stop = 1; |
| 404 | return; |
| 405 | } |
| 406 | } |
| 407 | walker->count++; |
| 408 | } |
| 409 | } |
| 410 | if (!p->h) |
| 411 | return; |
| 412 | for (i = 0; i < p->hash; i++) { |
| 413 | for (f = p->h[i]; f; f = next) { |
| 414 | next = f->next; |
| 415 | if (walker->count >= walker->skip) { |
| 416 | if (walker->fn(tp,(unsigned long) &f->result, |
| 417 | walker) < 0) { |
| 418 | walker->stop = 1; |
| 419 | return; |
| 420 | } |
| 421 | } |
| 422 | walker->count++; |
| 423 | } |
| 424 | } |
| 425 | } |
| 426 | |
| 427 | |
| 428 | static int tcindex_destroy_element(struct tcf_proto *tp, |
| 429 | unsigned long arg, struct tcf_walker *walker) |
| 430 | { |
| 431 | return __tcindex_delete(tp, arg, 0); |
| 432 | } |
| 433 | |
| 434 | |
| 435 | static void tcindex_destroy(struct tcf_proto *tp) |
| 436 | { |
| 437 | struct tcindex_data *p = PRIV(tp); |
| 438 | struct tcf_walker walker; |
| 439 | |
| 440 | DPRINTK("tcindex_destroy(tp %p),p %p\n",tp,p); |
| 441 | walker.count = 0; |
| 442 | walker.skip = 0; |
| 443 | walker.fn = &tcindex_destroy_element; |
| 444 | tcindex_walk(tp,&walker); |
| 445 | if (p->perfect) |
| 446 | kfree(p->perfect); |
| 447 | if (p->h) |
| 448 | kfree(p->h); |
| 449 | kfree(p); |
| 450 | tp->root = NULL; |
| 451 | } |
| 452 | |
| 453 | |
| 454 | static int tcindex_dump(struct tcf_proto *tp, unsigned long fh, |
| 455 | struct sk_buff *skb, struct tcmsg *t) |
| 456 | { |
| 457 | struct tcindex_data *p = PRIV(tp); |
| 458 | struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; |
| 459 | unsigned char *b = skb->tail; |
| 460 | struct rtattr *rta; |
| 461 | |
| 462 | DPRINTK("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p,b %p\n", |
| 463 | tp,fh,skb,t,p,r,b); |
| 464 | DPRINTK("p->perfect %p p->h %p\n",p->perfect,p->h); |
| 465 | rta = (struct rtattr *) b; |
| 466 | RTA_PUT(skb,TCA_OPTIONS,0,NULL); |
| 467 | if (!fh) { |
| 468 | t->tcm_handle = ~0; /* whatever ... */ |
| 469 | RTA_PUT(skb,TCA_TCINDEX_HASH,sizeof(p->hash),&p->hash); |
| 470 | RTA_PUT(skb,TCA_TCINDEX_MASK,sizeof(p->mask),&p->mask); |
| 471 | RTA_PUT(skb,TCA_TCINDEX_SHIFT,sizeof(p->shift),&p->shift); |
| 472 | RTA_PUT(skb,TCA_TCINDEX_FALL_THROUGH,sizeof(p->fall_through), |
| 473 | &p->fall_through); |
| 474 | rta->rta_len = skb->tail-b; |
| 475 | } else { |
| 476 | if (p->perfect) { |
| 477 | t->tcm_handle = r-p->perfect; |
| 478 | } else { |
| 479 | struct tcindex_filter *f; |
| 480 | int i; |
| 481 | |
| 482 | t->tcm_handle = 0; |
| 483 | for (i = 0; !t->tcm_handle && i < p->hash; i++) { |
| 484 | for (f = p->h[i]; !t->tcm_handle && f; |
| 485 | f = f->next) { |
| 486 | if (&f->result == r) |
| 487 | t->tcm_handle = f->key; |
| 488 | } |
| 489 | } |
| 490 | } |
| 491 | DPRINTK("handle = %d\n",t->tcm_handle); |
| 492 | if (r->res.class) |
| 493 | RTA_PUT(skb, TCA_TCINDEX_CLASSID, 4, &r->res.classid); |
| 494 | |
| 495 | if (tcf_exts_dump(skb, &r->exts, &tcindex_ext_map) < 0) |
| 496 | goto rtattr_failure; |
| 497 | rta->rta_len = skb->tail-b; |
| 498 | |
| 499 | if (tcf_exts_dump_stats(skb, &r->exts, &tcindex_ext_map) < 0) |
| 500 | goto rtattr_failure; |
| 501 | } |
| 502 | |
| 503 | return skb->len; |
| 504 | |
| 505 | rtattr_failure: |
| 506 | skb_trim(skb, b - skb->data); |
| 507 | return -1; |
| 508 | } |
| 509 | |
| 510 | static struct tcf_proto_ops cls_tcindex_ops = { |
| 511 | .next = NULL, |
| 512 | .kind = "tcindex", |
| 513 | .classify = tcindex_classify, |
| 514 | .init = tcindex_init, |
| 515 | .destroy = tcindex_destroy, |
| 516 | .get = tcindex_get, |
| 517 | .put = tcindex_put, |
| 518 | .change = tcindex_change, |
| 519 | .delete = tcindex_delete, |
| 520 | .walk = tcindex_walk, |
| 521 | .dump = tcindex_dump, |
| 522 | .owner = THIS_MODULE, |
| 523 | }; |
| 524 | |
| 525 | static int __init init_tcindex(void) |
| 526 | { |
| 527 | return register_tcf_proto_ops(&cls_tcindex_ops); |
| 528 | } |
| 529 | |
| 530 | static void __exit exit_tcindex(void) |
| 531 | { |
| 532 | unregister_tcf_proto_ops(&cls_tcindex_ops); |
| 533 | } |
| 534 | |
| 535 | module_init(init_tcindex) |
| 536 | module_exit(exit_tcindex) |
| 537 | MODULE_LICENSE("GPL"); |