Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * This is a module which is used for setting the MSS option in TCP packets. |
| 3 | * |
| 4 | * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or modify |
| 7 | * it under the terms of the GNU General Public License version 2 as |
| 8 | * published by the Free Software Foundation. |
| 9 | */ |
| 10 | |
| 11 | #include <linux/module.h> |
| 12 | #include <linux/skbuff.h> |
| 13 | |
| 14 | #include <linux/ip.h> |
| 15 | #include <net/tcp.h> |
| 16 | |
| 17 | #include <linux/netfilter_ipv4/ip_tables.h> |
| 18 | #include <linux/netfilter_ipv4/ipt_TCPMSS.h> |
| 19 | |
| 20 | MODULE_LICENSE("GPL"); |
| 21 | MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); |
| 22 | MODULE_DESCRIPTION("iptables TCP MSS modification module"); |
| 23 | |
| 24 | #if 0 |
| 25 | #define DEBUGP printk |
| 26 | #else |
| 27 | #define DEBUGP(format, args...) |
| 28 | #endif |
| 29 | |
| 30 | static u_int16_t |
| 31 | cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) |
| 32 | { |
| 33 | u_int32_t diffs[] = { oldvalinv, newval }; |
| 34 | return csum_fold(csum_partial((char *)diffs, sizeof(diffs), |
| 35 | oldcheck^0xFFFF)); |
| 36 | } |
| 37 | |
| 38 | static inline unsigned int |
| 39 | optlen(const u_int8_t *opt, unsigned int offset) |
| 40 | { |
| 41 | /* Beware zero-length options: make finite progress */ |
| 42 | if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; |
| 43 | else return opt[offset+1]; |
| 44 | } |
| 45 | |
| 46 | static unsigned int |
| 47 | ipt_tcpmss_target(struct sk_buff **pskb, |
| 48 | const struct net_device *in, |
| 49 | const struct net_device *out, |
| 50 | unsigned int hooknum, |
| 51 | const void *targinfo, |
| 52 | void *userinfo) |
| 53 | { |
| 54 | const struct ipt_tcpmss_info *tcpmssinfo = targinfo; |
| 55 | struct tcphdr *tcph; |
| 56 | struct iphdr *iph; |
| 57 | u_int16_t tcplen, newtotlen, oldval, newmss; |
| 58 | unsigned int i; |
| 59 | u_int8_t *opt; |
| 60 | |
| 61 | if (!skb_ip_make_writable(pskb, (*pskb)->len)) |
| 62 | return NF_DROP; |
| 63 | |
| 64 | iph = (*pskb)->nh.iph; |
| 65 | tcplen = (*pskb)->len - iph->ihl*4; |
| 66 | |
| 67 | tcph = (void *)iph + iph->ihl*4; |
| 68 | |
| 69 | /* Since it passed flags test in tcp match, we know it is is |
| 70 | not a fragment, and has data >= tcp header length. SYN |
| 71 | packets should not contain data: if they did, then we risk |
| 72 | running over MTU, sending Frag Needed and breaking things |
| 73 | badly. --RR */ |
| 74 | if (tcplen != tcph->doff*4) { |
| 75 | if (net_ratelimit()) |
| 76 | printk(KERN_ERR |
| 77 | "ipt_tcpmss_target: bad length (%d bytes)\n", |
| 78 | (*pskb)->len); |
| 79 | return NF_DROP; |
| 80 | } |
| 81 | |
| 82 | if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { |
| 83 | if(!(*pskb)->dst) { |
| 84 | if (net_ratelimit()) |
| 85 | printk(KERN_ERR |
| 86 | "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); |
| 87 | return NF_DROP; /* or IPT_CONTINUE ?? */ |
| 88 | } |
| 89 | |
| 90 | if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { |
| 91 | if (net_ratelimit()) |
| 92 | printk(KERN_ERR |
| 93 | "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst)); |
| 94 | return NF_DROP; /* or IPT_CONTINUE ?? */ |
| 95 | } |
| 96 | |
| 97 | newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr); |
| 98 | } else |
| 99 | newmss = tcpmssinfo->mss; |
| 100 | |
| 101 | opt = (u_int8_t *)tcph; |
| 102 | for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ |
| 103 | if ((opt[i] == TCPOPT_MSS) && |
| 104 | ((tcph->doff*4 - i) >= TCPOLEN_MSS) && |
| 105 | (opt[i+1] == TCPOLEN_MSS)) { |
| 106 | u_int16_t oldmss; |
| 107 | |
| 108 | oldmss = (opt[i+2] << 8) | opt[i+3]; |
| 109 | |
| 110 | if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && |
| 111 | (oldmss <= newmss)) |
| 112 | return IPT_CONTINUE; |
| 113 | |
| 114 | opt[i+2] = (newmss & 0xff00) >> 8; |
| 115 | opt[i+3] = (newmss & 0x00ff); |
| 116 | |
| 117 | tcph->check = cheat_check(htons(oldmss)^0xFFFF, |
| 118 | htons(newmss), |
| 119 | tcph->check); |
| 120 | |
| 121 | DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" |
| 122 | "->%u.%u.%u.%u:%hu changed TCP MSS option" |
| 123 | " (from %u to %u)\n", |
| 124 | NIPQUAD((*pskb)->nh.iph->saddr), |
| 125 | ntohs(tcph->source), |
| 126 | NIPQUAD((*pskb)->nh.iph->daddr), |
| 127 | ntohs(tcph->dest), |
| 128 | oldmss, newmss); |
| 129 | goto retmodified; |
| 130 | } |
| 131 | } |
| 132 | |
| 133 | /* |
| 134 | * MSS Option not found ?! add it.. |
| 135 | */ |
| 136 | if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { |
| 137 | struct sk_buff *newskb; |
| 138 | |
| 139 | newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), |
| 140 | TCPOLEN_MSS, GFP_ATOMIC); |
| 141 | if (!newskb) { |
| 142 | if (net_ratelimit()) |
| 143 | printk(KERN_ERR "ipt_tcpmss_target:" |
| 144 | " unable to allocate larger skb\n"); |
| 145 | return NF_DROP; |
| 146 | } |
| 147 | |
| 148 | kfree_skb(*pskb); |
| 149 | *pskb = newskb; |
| 150 | iph = (*pskb)->nh.iph; |
| 151 | tcph = (void *)iph + iph->ihl*4; |
| 152 | } |
| 153 | |
| 154 | skb_put((*pskb), TCPOLEN_MSS); |
| 155 | |
| 156 | opt = (u_int8_t *)tcph + sizeof(struct tcphdr); |
| 157 | memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); |
| 158 | |
| 159 | tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, |
| 160 | htons(tcplen + TCPOLEN_MSS), tcph->check); |
| 161 | tcplen += TCPOLEN_MSS; |
| 162 | |
| 163 | opt[0] = TCPOPT_MSS; |
| 164 | opt[1] = TCPOLEN_MSS; |
| 165 | opt[2] = (newmss & 0xff00) >> 8; |
| 166 | opt[3] = (newmss & 0x00ff); |
| 167 | |
| 168 | tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); |
| 169 | |
| 170 | oldval = ((u_int16_t *)tcph)[6]; |
| 171 | tcph->doff += TCPOLEN_MSS/4; |
| 172 | tcph->check = cheat_check(oldval ^ 0xFFFF, |
| 173 | ((u_int16_t *)tcph)[6], tcph->check); |
| 174 | |
| 175 | newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); |
| 176 | iph->check = cheat_check(iph->tot_len ^ 0xFFFF, |
| 177 | newtotlen, iph->check); |
| 178 | iph->tot_len = newtotlen; |
| 179 | |
| 180 | DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" |
| 181 | "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n", |
| 182 | NIPQUAD((*pskb)->nh.iph->saddr), |
| 183 | ntohs(tcph->source), |
| 184 | NIPQUAD((*pskb)->nh.iph->daddr), |
| 185 | ntohs(tcph->dest), |
| 186 | newmss); |
| 187 | |
| 188 | retmodified: |
| 189 | /* We never hw checksum SYN packets. */ |
| 190 | BUG_ON((*pskb)->ip_summed == CHECKSUM_HW); |
| 191 | |
| 192 | (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; |
| 193 | return IPT_CONTINUE; |
| 194 | } |
| 195 | |
| 196 | #define TH_SYN 0x02 |
| 197 | |
| 198 | static inline int find_syn_match(const struct ipt_entry_match *m) |
| 199 | { |
| 200 | const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; |
| 201 | |
| 202 | if (strcmp(m->u.kernel.match->name, "tcp") == 0 |
| 203 | && (tcpinfo->flg_cmp & TH_SYN) |
| 204 | && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) |
| 205 | return 1; |
| 206 | |
| 207 | return 0; |
| 208 | } |
| 209 | |
| 210 | /* Must specify -p tcp --syn/--tcp-flags SYN */ |
| 211 | static int |
| 212 | ipt_tcpmss_checkentry(const char *tablename, |
| 213 | const struct ipt_entry *e, |
| 214 | void *targinfo, |
| 215 | unsigned int targinfosize, |
| 216 | unsigned int hook_mask) |
| 217 | { |
| 218 | const struct ipt_tcpmss_info *tcpmssinfo = targinfo; |
| 219 | |
| 220 | if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) { |
| 221 | DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n", |
| 222 | targinfosize, IPT_ALIGN(sizeof(struct ipt_tcpmss_info))); |
| 223 | return 0; |
| 224 | } |
| 225 | |
| 226 | |
| 227 | if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && |
| 228 | ((hook_mask & ~((1 << NF_IP_FORWARD) |
| 229 | | (1 << NF_IP_LOCAL_OUT) |
| 230 | | (1 << NF_IP_POST_ROUTING))) != 0)) { |
| 231 | printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); |
| 232 | return 0; |
| 233 | } |
| 234 | |
| 235 | if (e->ip.proto == IPPROTO_TCP |
| 236 | && !(e->ip.invflags & IPT_INV_PROTO) |
| 237 | && IPT_MATCH_ITERATE(e, find_syn_match)) |
| 238 | return 1; |
| 239 | |
| 240 | printk("TCPMSS: Only works on TCP SYN packets\n"); |
| 241 | return 0; |
| 242 | } |
| 243 | |
| 244 | static struct ipt_target ipt_tcpmss_reg = { |
| 245 | .name = "TCPMSS", |
| 246 | .target = ipt_tcpmss_target, |
| 247 | .checkentry = ipt_tcpmss_checkentry, |
| 248 | .me = THIS_MODULE, |
| 249 | }; |
| 250 | |
| 251 | static int __init init(void) |
| 252 | { |
| 253 | return ipt_register_target(&ipt_tcpmss_reg); |
| 254 | } |
| 255 | |
| 256 | static void __exit fini(void) |
| 257 | { |
| 258 | ipt_unregister_target(&ipt_tcpmss_reg); |
| 259 | } |
| 260 | |
| 261 | module_init(init); |
| 262 | module_exit(fini); |