blob: d952d67f904d1124ed0c5adfa20a51f82207181c [file] [log] [blame]
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +01001#include <linux/kernel.h>
2#include <linux/ip.h>
3#include <linux/sctp.h>
4#include <net/ip.h>
5#include <net/ip6_checksum.h>
6#include <linux/netfilter.h>
7#include <linux/netfilter_ipv4.h>
8#include <net/sctp/checksum.h>
9#include <net/ip_vs.h>
10
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010011static int
Eric W. Biedermand8f44c32015-09-21 13:02:43 -050012sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb,
13 struct ip_vs_proto_data *pd,
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +020014 int *verdict, struct ip_vs_conn **cpp,
15 struct ip_vs_iphdr *iph)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010016{
17 struct ip_vs_service *svc;
18 sctp_chunkhdr_t _schunkh, *sch;
19 sctp_sctphdr_t *sh, _sctph;
Alex Gartrell5e26b1b2015-08-26 09:40:41 -070020 __be16 _ports[2], *ports = NULL;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010021
Alex Gartrell5e26b1b2015-08-26 09:40:41 -070022 if (likely(!ip_vs_iph_icmp(iph))) {
23 sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph);
24 if (sh) {
25 sch = skb_header_pointer(
26 skb, iph->len + sizeof(sctp_sctphdr_t),
27 sizeof(_schunkh), &_schunkh);
28 if (sch && (sch->type == SCTP_CID_INIT ||
29 sysctl_sloppy_sctp(ipvs)))
30 ports = &sh->source;
31 }
32 } else {
33 ports = skb_header_pointer(
34 skb, iph->len, sizeof(_ports), &_ports);
Alex Gartrell6044eef2015-08-26 09:40:37 -070035 }
36
Alex Gartrell5e26b1b2015-08-26 09:40:41 -070037 if (!ports) {
Daniel Borkmann6e7cd272013-10-25 11:05:04 +020038 *verdict = NF_DROP;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010039 return 0;
Daniel Borkmann6e7cd272013-10-25 11:05:04 +020040 }
41
Julian Anastasovceec4c32013-03-22 11:46:53 +020042 rcu_read_lock();
Alex Gartrell5e26b1b2015-08-26 09:40:41 -070043 if (likely(!ip_vs_iph_inverse(iph)))
Eric W. Biederman0a4fd6c2015-09-21 13:01:49 -050044 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
Alex Gartrell5e26b1b2015-08-26 09:40:41 -070045 &iph->daddr, ports[1]);
46 else
Eric W. Biederman0a4fd6c2015-09-21 13:01:49 -050047 svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol,
Alex Gartrell5e26b1b2015-08-26 09:40:41 -070048 &iph->saddr, ports[0]);
49 if (svc) {
Julian Anastasov190ecd22010-10-17 16:24:37 +030050 int ignored;
51
Alexander Frolkinc6c96c12013-06-13 08:56:15 +010052 if (ip_vs_todrop(ipvs)) {
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010053 /*
54 * It seems that we are very loaded.
55 * We have to drop this packet :(
56 */
Julian Anastasovceec4c32013-03-22 11:46:53 +020057 rcu_read_unlock();
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010058 *verdict = NF_DROP;
59 return 0;
60 }
61 /*
62 * Let the virtual server select a real server for the
63 * incoming connection, and create a connection entry.
64 */
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +020065 *cpp = ip_vs_schedule(svc, skb, pd, &ignored, iph);
Hans Schillstroma5959d52010-11-19 14:25:10 +010066 if (!*cpp && ignored <= 0) {
67 if (!ignored)
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +020068 *verdict = ip_vs_leave(svc, skb, pd, iph);
Julian Anastasovceec4c32013-03-22 11:46:53 +020069 else
Hans Schillstroma5959d52010-11-19 14:25:10 +010070 *verdict = NF_DROP;
Julian Anastasovceec4c32013-03-22 11:46:53 +020071 rcu_read_unlock();
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010072 return 0;
73 }
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010074 }
Julian Anastasovceec4c32013-03-22 11:46:53 +020075 rcu_read_unlock();
Hans Schillstroma5959d52010-11-19 14:25:10 +010076 /* NF_ACCEPT */
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010077 return 1;
78}
79
Daniel Borkmann4b47bc92013-02-05 17:21:31 +010080static void sctp_nat_csum(struct sk_buff *skb, sctp_sctphdr_t *sctph,
81 unsigned int sctphoff)
82{
Joe Stringer024ec3d2013-07-25 10:52:05 +090083 sctph->checksum = sctp_compute_cksum(skb, sctphoff);
Daniel Borkmann4b47bc92013-02-05 17:21:31 +010084 skb->ip_summed = CHECKSUM_UNNECESSARY;
85}
86
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010087static int
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +020088sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
89 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010090{
91 sctp_sctphdr_t *sctph;
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +020092 unsigned int sctphoff = iph->len;
Daniel Borkmann97203ab2013-10-28 10:56:20 +010093 bool payload_csum = false;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010094
95#ifdef CONFIG_IP_VS_IPV6
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +020096 if (cp->af == AF_INET6 && iph->fragoffs)
Jesper Dangaard Brouer63dca2c2012-09-26 14:06:41 +020097 return 1;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010098#endif
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +010099
100 /* csum_check requires unshared skb */
101 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
102 return 0;
103
104 if (unlikely(cp->app != NULL)) {
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100105 int ret;
106
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100107 /* Some checks before mangling */
108 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
109 return 0;
110
111 /* Call application helper if needed */
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100112 ret = ip_vs_app_pkt_out(cp, skb);
113 if (ret == 0)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100114 return 0;
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100115 /* ret=2: csum update is needed after payload mangling */
116 if (ret == 2)
117 payload_csum = true;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100118 }
119
120 sctph = (void *) skb_network_header(skb) + sctphoff;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100121
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100122 /* Only update csum if we really have to */
123 if (sctph->source != cp->vport || payload_csum ||
124 skb->ip_summed == CHECKSUM_PARTIAL) {
125 sctph->source = cp->vport;
126 sctp_nat_csum(skb, sctph, sctphoff);
127 } else {
128 skb->ip_summed = CHECKSUM_UNNECESSARY;
129 }
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100130
131 return 1;
132}
133
134static int
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200135sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp,
136 struct ip_vs_conn *cp, struct ip_vs_iphdr *iph)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100137{
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100138 sctp_sctphdr_t *sctph;
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200139 unsigned int sctphoff = iph->len;
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100140 bool payload_csum = false;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100141
142#ifdef CONFIG_IP_VS_IPV6
Jesper Dangaard Brouerd4383f02012-09-26 14:07:17 +0200143 if (cp->af == AF_INET6 && iph->fragoffs)
Jesper Dangaard Brouer63dca2c2012-09-26 14:06:41 +0200144 return 1;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100145#endif
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100146
147 /* csum_check requires unshared skb */
148 if (!skb_make_writable(skb, sctphoff + sizeof(*sctph)))
149 return 0;
150
151 if (unlikely(cp->app != NULL)) {
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100152 int ret;
153
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100154 /* Some checks before mangling */
155 if (pp->csum_check && !pp->csum_check(cp->af, skb, pp))
156 return 0;
157
158 /* Call application helper if needed */
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100159 ret = ip_vs_app_pkt_in(cp, skb);
160 if (ret == 0)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100161 return 0;
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100162 /* ret=2: csum update is needed after payload mangling */
163 if (ret == 2)
164 payload_csum = true;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100165 }
166
167 sctph = (void *) skb_network_header(skb) + sctphoff;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100168
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100169 /* Only update csum if we really have to */
170 if (sctph->dest != cp->dport || payload_csum ||
171 (skb->ip_summed == CHECKSUM_PARTIAL &&
Tom Herbert53692b12015-12-14 11:19:41 -0800172 !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) {
Daniel Borkmann97203ab2013-10-28 10:56:20 +0100173 sctph->dest = cp->dport;
174 sctp_nat_csum(skb, sctph, sctphoff);
175 } else if (skb->ip_summed != CHECKSUM_PARTIAL) {
176 skb->ip_summed = CHECKSUM_UNNECESSARY;
177 }
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100178
179 return 1;
180}
181
182static int
183sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
184{
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100185 unsigned int sctphoff;
186 struct sctphdr *sh, _sctph;
Joe Stringer024ec3d2013-07-25 10:52:05 +0900187 __le32 cmp, val;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100188
189#ifdef CONFIG_IP_VS_IPV6
190 if (af == AF_INET6)
191 sctphoff = sizeof(struct ipv6hdr);
192 else
193#endif
194 sctphoff = ip_hdrlen(skb);
195
196 sh = skb_header_pointer(skb, sctphoff, sizeof(_sctph), &_sctph);
197 if (sh == NULL)
198 return 0;
199
200 cmp = sh->checksum;
Joe Stringer024ec3d2013-07-25 10:52:05 +0900201 val = sctp_compute_cksum(skb, sctphoff);
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100202
203 if (val != cmp) {
204 /* CRC failure, dump it. */
Julian Anastasov0d796412010-10-17 16:46:17 +0300205 IP_VS_DBG_RL_PKT(0, af, pp, skb, 0,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100206 "Failed checksum for");
207 return 0;
208 }
209 return 1;
210}
211
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100212enum ipvs_sctp_event_t {
Julian Anastasov61e7c422013-06-18 10:08:07 +0300213 IP_VS_SCTP_DATA = 0, /* DATA, SACK, HEARTBEATs */
214 IP_VS_SCTP_INIT,
215 IP_VS_SCTP_INIT_ACK,
216 IP_VS_SCTP_COOKIE_ECHO,
217 IP_VS_SCTP_COOKIE_ACK,
218 IP_VS_SCTP_SHUTDOWN,
219 IP_VS_SCTP_SHUTDOWN_ACK,
220 IP_VS_SCTP_SHUTDOWN_COMPLETE,
221 IP_VS_SCTP_ERROR,
222 IP_VS_SCTP_ABORT,
223 IP_VS_SCTP_EVENT_LAST
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100224};
225
Julian Anastasov61e7c422013-06-18 10:08:07 +0300226/* RFC 2960, 3.2 Chunk Field Descriptions */
227static __u8 sctp_events[] = {
228 [SCTP_CID_DATA] = IP_VS_SCTP_DATA,
229 [SCTP_CID_INIT] = IP_VS_SCTP_INIT,
230 [SCTP_CID_INIT_ACK] = IP_VS_SCTP_INIT_ACK,
231 [SCTP_CID_SACK] = IP_VS_SCTP_DATA,
232 [SCTP_CID_HEARTBEAT] = IP_VS_SCTP_DATA,
233 [SCTP_CID_HEARTBEAT_ACK] = IP_VS_SCTP_DATA,
234 [SCTP_CID_ABORT] = IP_VS_SCTP_ABORT,
235 [SCTP_CID_SHUTDOWN] = IP_VS_SCTP_SHUTDOWN,
236 [SCTP_CID_SHUTDOWN_ACK] = IP_VS_SCTP_SHUTDOWN_ACK,
237 [SCTP_CID_ERROR] = IP_VS_SCTP_ERROR,
238 [SCTP_CID_COOKIE_ECHO] = IP_VS_SCTP_COOKIE_ECHO,
239 [SCTP_CID_COOKIE_ACK] = IP_VS_SCTP_COOKIE_ACK,
240 [SCTP_CID_ECN_ECNE] = IP_VS_SCTP_DATA,
241 [SCTP_CID_ECN_CWR] = IP_VS_SCTP_DATA,
242 [SCTP_CID_SHUTDOWN_COMPLETE] = IP_VS_SCTP_SHUTDOWN_COMPLETE,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100243};
244
Julian Anastasov61e7c422013-06-18 10:08:07 +0300245/* SCTP States:
246 * See RFC 2960, 4. SCTP Association State Diagram
247 *
248 * New states (not in diagram):
249 * - INIT1 state: use shorter timeout for dropped INIT packets
250 * - REJECTED state: use shorter timeout if INIT is rejected with ABORT
251 * - INIT, COOKIE_SENT, COOKIE_REPLIED, COOKIE states: for better debugging
252 *
253 * The states are as seen in real server. In the diagram, INIT1, INIT,
254 * COOKIE_SENT and COOKIE_REPLIED processing happens in CLOSED state.
255 *
256 * States as per packets from client (C) and server (S):
257 *
258 * Setup of client connection:
259 * IP_VS_SCTP_S_INIT1: First C:INIT sent, wait for S:INIT-ACK
260 * IP_VS_SCTP_S_INIT: Next C:INIT sent, wait for S:INIT-ACK
261 * IP_VS_SCTP_S_COOKIE_SENT: S:INIT-ACK sent, wait for C:COOKIE-ECHO
262 * IP_VS_SCTP_S_COOKIE_REPLIED: C:COOKIE-ECHO sent, wait for S:COOKIE-ACK
263 *
264 * Setup of server connection:
265 * IP_VS_SCTP_S_COOKIE_WAIT: S:INIT sent, wait for C:INIT-ACK
266 * IP_VS_SCTP_S_COOKIE: C:INIT-ACK sent, wait for S:COOKIE-ECHO
267 * IP_VS_SCTP_S_COOKIE_ECHOED: S:COOKIE-ECHO sent, wait for C:COOKIE-ACK
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100268 */
Julian Anastasov61e7c422013-06-18 10:08:07 +0300269
270#define sNO IP_VS_SCTP_S_NONE
271#define sI1 IP_VS_SCTP_S_INIT1
272#define sIN IP_VS_SCTP_S_INIT
273#define sCS IP_VS_SCTP_S_COOKIE_SENT
274#define sCR IP_VS_SCTP_S_COOKIE_REPLIED
275#define sCW IP_VS_SCTP_S_COOKIE_WAIT
276#define sCO IP_VS_SCTP_S_COOKIE
277#define sCE IP_VS_SCTP_S_COOKIE_ECHOED
278#define sES IP_VS_SCTP_S_ESTABLISHED
279#define sSS IP_VS_SCTP_S_SHUTDOWN_SENT
280#define sSR IP_VS_SCTP_S_SHUTDOWN_RECEIVED
281#define sSA IP_VS_SCTP_S_SHUTDOWN_ACK_SENT
282#define sRJ IP_VS_SCTP_S_REJECTED
283#define sCL IP_VS_SCTP_S_CLOSED
284
285static const __u8 sctp_states
286 [IP_VS_DIR_LAST][IP_VS_SCTP_EVENT_LAST][IP_VS_SCTP_S_LAST] = {
287 { /* INPUT */
288/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
289/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
290/* i */{sI1, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
291/* i_a */{sCW, sCW, sCW, sCS, sCR, sCO, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
292/* c_e */{sCR, sIN, sIN, sCR, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
293/* c_a */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sES, sES, sSS, sSR, sSA, sRJ, sCL},
294/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
295/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sES, sCL, sSR, sCL, sRJ, sCL},
296/* s_c */{sCL, sCL, sCL, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sCL, sRJ, sCL},
297/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCL, sES, sSS, sSR, sSA, sRJ, sCL},
298/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
299 },
300 { /* OUTPUT */
301/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
302/* d */{sES, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
303/* i */{sCW, sCW, sCW, sCW, sCW, sCW, sCW, sCW, sES, sCW, sCW, sCW, sCW, sCW},
304/* i_a */{sCS, sCS, sCS, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
305/* c_e */{sCE, sCE, sCE, sCE, sCE, sCE, sCE, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
306/* c_a */{sES, sES, sES, sES, sES, sES, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
307/* s */{sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSS, sSR, sSA, sRJ, sCL},
308/* s_a */{sSA, sSA, sSA, sSA, sSA, sCW, sCO, sCE, sES, sSA, sSA, sSA, sRJ, sCL},
309/* s_c */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
310/* err */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
311/* ab */{sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
312 },
313 { /* INPUT-ONLY */
314/* sNO, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL*/
315/* d */{sES, sI1, sIN, sCS, sCR, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
316/* i */{sI1, sIN, sIN, sIN, sIN, sIN, sCO, sCE, sES, sSS, sSR, sSA, sIN, sIN},
317/* i_a */{sCE, sCE, sCE, sCE, sCE, sCE, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
318/* c_e */{sES, sES, sES, sES, sES, sES, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
319/* c_a */{sES, sI1, sIN, sES, sES, sCW, sES, sES, sES, sSS, sSR, sSA, sRJ, sCL},
320/* s */{sSR, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sSR, sSS, sSR, sSA, sRJ, sCL},
321/* s_a */{sCL, sIN, sIN, sCS, sCR, sCW, sCO, sCE, sCL, sCL, sSR, sCL, sRJ, sCL},
322/* s_c */{sCL, sCL, sCL, sCL, sCL, sCW, sCO, sCE, sES, sSS, sCL, sCL, sRJ, sCL},
323/* err */{sCL, sI1, sIN, sCS, sCR, sCW, sCO, sCE, sES, sSS, sSR, sSA, sRJ, sCL},
324/* ab */{sCL, sCL, sCL, sCL, sCL, sRJ, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
325 },
326};
327
328#define IP_VS_SCTP_MAX_RTO ((60 + 1) * HZ)
329
330/* Timeout table[state] */
Hans Schillstrom9d934872011-01-03 14:44:49 +0100331static const int sctp_timeouts[IP_VS_SCTP_S_LAST + 1] = {
Julian Anastasov61e7c422013-06-18 10:08:07 +0300332 [IP_VS_SCTP_S_NONE] = 2 * HZ,
333 [IP_VS_SCTP_S_INIT1] = (0 + 3 + 1) * HZ,
334 [IP_VS_SCTP_S_INIT] = IP_VS_SCTP_MAX_RTO,
335 [IP_VS_SCTP_S_COOKIE_SENT] = IP_VS_SCTP_MAX_RTO,
336 [IP_VS_SCTP_S_COOKIE_REPLIED] = IP_VS_SCTP_MAX_RTO,
337 [IP_VS_SCTP_S_COOKIE_WAIT] = IP_VS_SCTP_MAX_RTO,
338 [IP_VS_SCTP_S_COOKIE] = IP_VS_SCTP_MAX_RTO,
339 [IP_VS_SCTP_S_COOKIE_ECHOED] = IP_VS_SCTP_MAX_RTO,
340 [IP_VS_SCTP_S_ESTABLISHED] = 15 * 60 * HZ,
341 [IP_VS_SCTP_S_SHUTDOWN_SENT] = IP_VS_SCTP_MAX_RTO,
342 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = IP_VS_SCTP_MAX_RTO,
343 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = IP_VS_SCTP_MAX_RTO,
344 [IP_VS_SCTP_S_REJECTED] = (0 + 3 + 1) * HZ,
345 [IP_VS_SCTP_S_CLOSED] = IP_VS_SCTP_MAX_RTO,
346 [IP_VS_SCTP_S_LAST] = 2 * HZ,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100347};
348
349static const char *sctp_state_name_table[IP_VS_SCTP_S_LAST + 1] = {
Julian Anastasov61e7c422013-06-18 10:08:07 +0300350 [IP_VS_SCTP_S_NONE] = "NONE",
351 [IP_VS_SCTP_S_INIT1] = "INIT1",
352 [IP_VS_SCTP_S_INIT] = "INIT",
353 [IP_VS_SCTP_S_COOKIE_SENT] = "C-SENT",
354 [IP_VS_SCTP_S_COOKIE_REPLIED] = "C-REPLIED",
355 [IP_VS_SCTP_S_COOKIE_WAIT] = "C-WAIT",
356 [IP_VS_SCTP_S_COOKIE] = "COOKIE",
357 [IP_VS_SCTP_S_COOKIE_ECHOED] = "C-ECHOED",
358 [IP_VS_SCTP_S_ESTABLISHED] = "ESTABLISHED",
359 [IP_VS_SCTP_S_SHUTDOWN_SENT] = "S-SENT",
360 [IP_VS_SCTP_S_SHUTDOWN_RECEIVED] = "S-RECEIVED",
361 [IP_VS_SCTP_S_SHUTDOWN_ACK_SENT] = "S-ACK-SENT",
362 [IP_VS_SCTP_S_REJECTED] = "REJECTED",
363 [IP_VS_SCTP_S_CLOSED] = "CLOSED",
364 [IP_VS_SCTP_S_LAST] = "BUG!",
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100365};
366
367
368static const char *sctp_state_name(int state)
369{
370 if (state >= IP_VS_SCTP_S_LAST)
371 return "ERR!";
372 if (sctp_state_name_table[state])
373 return sctp_state_name_table[state];
374 return "?";
375}
376
Simon Horman4a516f12011-09-16 14:11:49 +0900377static inline void
Hans Schillstrom93304192011-01-03 14:44:51 +0100378set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100379 int direction, const struct sk_buff *skb)
380{
381 sctp_chunkhdr_t _sctpch, *sch;
382 unsigned char chunk_type;
383 int event, next_state;
Julian Anastasovcf2e3942013-03-09 23:25:06 +0200384 int ihl, cofs;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100385
386#ifdef CONFIG_IP_VS_IPV6
387 ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
388#else
389 ihl = ip_hdrlen(skb);
390#endif
391
Julian Anastasovcf2e3942013-03-09 23:25:06 +0200392 cofs = ihl + sizeof(sctp_sctphdr_t);
393 sch = skb_header_pointer(skb, cofs, sizeof(_sctpch), &_sctpch);
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100394 if (sch == NULL)
Simon Horman4a516f12011-09-16 14:11:49 +0900395 return;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100396
397 chunk_type = sch->type;
398 /*
399 * Section 3: Multiple chunks can be bundled into one SCTP packet
400 * up to the MTU size, except for the INIT, INIT ACK, and
401 * SHUTDOWN COMPLETE chunks. These chunks MUST NOT be bundled with
402 * any other chunk in a packet.
403 *
404 * Section 3.3.7: DATA chunks MUST NOT be bundled with ABORT. Control
405 * chunks (except for INIT, INIT ACK, and SHUTDOWN COMPLETE) MAY be
406 * bundled with an ABORT, but they MUST be placed before the ABORT
407 * in the SCTP packet or they will be ignored by the receiver.
408 */
409 if ((sch->type == SCTP_CID_COOKIE_ECHO) ||
410 (sch->type == SCTP_CID_COOKIE_ACK)) {
Julian Anastasovcf2e3942013-03-09 23:25:06 +0200411 int clen = ntohs(sch->length);
412
413 if (clen >= sizeof(sctp_chunkhdr_t)) {
414 sch = skb_header_pointer(skb, cofs + ALIGN(clen, 4),
415 sizeof(_sctpch), &_sctpch);
416 if (sch && sch->type == SCTP_CID_ABORT)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100417 chunk_type = sch->type;
418 }
419 }
420
Julian Anastasov61e7c422013-06-18 10:08:07 +0300421 event = (chunk_type < sizeof(sctp_events)) ?
422 sctp_events[chunk_type] : IP_VS_SCTP_DATA;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100423
Julian Anastasov61e7c422013-06-18 10:08:07 +0300424 /* Update direction to INPUT_ONLY if necessary
425 * or delete NO_OUTPUT flag if output packet detected
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100426 */
Julian Anastasov61e7c422013-06-18 10:08:07 +0300427 if (cp->flags & IP_VS_CONN_F_NOOUTPUT) {
428 if (direction == IP_VS_DIR_OUTPUT)
429 cp->flags &= ~IP_VS_CONN_F_NOOUTPUT;
430 else
431 direction = IP_VS_DIR_INPUT_ONLY;
432 }
433
434 next_state = sctp_states[direction][event][cp->state];
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100435
436 if (next_state != cp->state) {
437 struct ip_vs_dest *dest = cp->dest;
438
439 IP_VS_DBG_BUF(8, "%s %s %s:%d->"
440 "%s:%d state: %s->%s conn->refcnt:%d\n",
Hans Schillstrom93304192011-01-03 14:44:51 +0100441 pd->pp->name,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100442 ((direction == IP_VS_DIR_OUTPUT) ?
443 "output " : "input "),
Julian Anastasovf18ae722014-09-09 16:40:38 -0700444 IP_VS_DBG_ADDR(cp->daf, &cp->daddr),
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100445 ntohs(cp->dport),
446 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
447 ntohs(cp->cport),
448 sctp_state_name(cp->state),
449 sctp_state_name(next_state),
450 atomic_read(&cp->refcnt));
451 if (dest) {
452 if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
453 (next_state != IP_VS_SCTP_S_ESTABLISHED)) {
454 atomic_dec(&dest->activeconns);
455 atomic_inc(&dest->inactconns);
456 cp->flags |= IP_VS_CONN_F_INACTIVE;
457 } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
458 (next_state == IP_VS_SCTP_S_ESTABLISHED)) {
459 atomic_inc(&dest->activeconns);
460 atomic_dec(&dest->inactconns);
461 cp->flags &= ~IP_VS_CONN_F_INACTIVE;
462 }
463 }
464 }
Hans Schillstrom9d934872011-01-03 14:44:49 +0100465 if (likely(pd))
466 cp->timeout = pd->timeout_table[cp->state = next_state];
467 else /* What to do ? */
468 cp->timeout = sctp_timeouts[cp->state = next_state];
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100469}
470
Simon Horman4a516f12011-09-16 14:11:49 +0900471static void
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100472sctp_state_transition(struct ip_vs_conn *cp, int direction,
Hans Schillstrom93304192011-01-03 14:44:51 +0100473 const struct sk_buff *skb, struct ip_vs_proto_data *pd)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100474{
Julian Anastasovac692692013-03-22 11:46:54 +0200475 spin_lock_bh(&cp->lock);
Simon Horman4a516f12011-09-16 14:11:49 +0900476 set_sctp_state(pd, cp, direction, skb);
Julian Anastasovac692692013-03-22 11:46:54 +0200477 spin_unlock_bh(&cp->lock);
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100478}
479
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100480static inline __u16 sctp_app_hashkey(__be16 port)
481{
482 return (((__force u16)port >> SCTP_APP_TAB_BITS) ^ (__force u16)port)
483 & SCTP_APP_TAB_MASK;
484}
485
Eric W. Biederman19648912015-09-21 13:02:29 -0500486static int sctp_register_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100487{
488 struct ip_vs_app *i;
489 __u16 hash;
490 __be16 port = inc->port;
491 int ret = 0;
Eric W. Biederman18d6ade2015-09-21 13:02:01 -0500492 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100493
494 hash = sctp_app_hashkey(port);
495
Hans Schillstrom9d934872011-01-03 14:44:49 +0100496 list_for_each_entry(i, &ipvs->sctp_apps[hash], p_list) {
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100497 if (i->port == port) {
498 ret = -EEXIST;
499 goto out;
500 }
501 }
Julian Anastasov363c97d2013-03-21 11:58:07 +0200502 list_add_rcu(&inc->p_list, &ipvs->sctp_apps[hash]);
Hans Schillstrom9bbac6a2011-01-03 14:44:52 +0100503 atomic_inc(&pd->appcnt);
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100504out:
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100505
506 return ret;
507}
508
Eric W. Biederman19648912015-09-21 13:02:29 -0500509static void sctp_unregister_app(struct netns_ipvs *ipvs, struct ip_vs_app *inc)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100510{
Eric W. Biederman19648912015-09-21 13:02:29 -0500511 struct ip_vs_proto_data *pd = ip_vs_proto_data_get(ipvs, IPPROTO_SCTP);
Hans Schillstrom9d934872011-01-03 14:44:49 +0100512
Hans Schillstrom9bbac6a2011-01-03 14:44:52 +0100513 atomic_dec(&pd->appcnt);
Julian Anastasov363c97d2013-03-21 11:58:07 +0200514 list_del_rcu(&inc->p_list);
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100515}
516
517static int sctp_app_conn_bind(struct ip_vs_conn *cp)
518{
Eric W. Biederman58dbc6f2015-09-21 13:01:41 -0500519 struct netns_ipvs *ipvs = cp->ipvs;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100520 int hash;
521 struct ip_vs_app *inc;
522 int result = 0;
523
524 /* Default binding: bind app only for NAT */
525 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ)
526 return 0;
527 /* Lookup application incarnations and bind the right one */
528 hash = sctp_app_hashkey(cp->vport);
529
Julian Anastasov363c97d2013-03-21 11:58:07 +0200530 rcu_read_lock();
531 list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) {
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100532 if (inc->port == cp->vport) {
533 if (unlikely(!ip_vs_app_inc_get(inc)))
534 break;
Julian Anastasov363c97d2013-03-21 11:58:07 +0200535 rcu_read_unlock();
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100536
537 IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->"
538 "%s:%u to app %s on port %u\n",
539 __func__,
540 IP_VS_DBG_ADDR(cp->af, &cp->caddr),
541 ntohs(cp->cport),
542 IP_VS_DBG_ADDR(cp->af, &cp->vaddr),
543 ntohs(cp->vport),
544 inc->name, ntohs(inc->port));
545 cp->app = inc;
546 if (inc->init_conn)
547 result = inc->init_conn(inc, cp);
548 goto out;
549 }
550 }
Julian Anastasov363c97d2013-03-21 11:58:07 +0200551 rcu_read_unlock();
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100552out:
553 return result;
554}
555
Hans Schillstrom9d934872011-01-03 14:44:49 +0100556/* ---------------------------------------------
557 * timeouts is netns related now.
558 * ---------------------------------------------
559 */
Eric W. Biederman1281a9c22015-09-21 13:02:36 -0500560static int __ip_vs_sctp_init(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100561{
Hans Schillstrom9d934872011-01-03 14:44:49 +0100562 ip_vs_init_hash_table(ipvs->sctp_apps, SCTP_APP_TAB_SIZE);
Hans Schillstrom9d934872011-01-03 14:44:49 +0100563 pd->timeout_table = ip_vs_create_timeout_table((int *)sctp_timeouts,
564 sizeof(sctp_timeouts));
Hans Schillstrom582b8e32012-04-26 09:45:35 +0200565 if (!pd->timeout_table)
566 return -ENOMEM;
567 return 0;
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100568}
569
Eric W. Biederman1281a9c22015-09-21 13:02:36 -0500570static void __ip_vs_sctp_exit(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd)
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100571{
Hans Schillstrom9d934872011-01-03 14:44:49 +0100572 kfree(pd->timeout_table);
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100573}
574
575struct ip_vs_protocol ip_vs_protocol_sctp = {
Hans Schillstrom9d934872011-01-03 14:44:49 +0100576 .name = "SCTP",
577 .protocol = IPPROTO_SCTP,
578 .num_states = IP_VS_SCTP_S_LAST,
579 .dont_defrag = 0,
580 .init = NULL,
581 .exit = NULL,
582 .init_netns = __ip_vs_sctp_init,
583 .exit_netns = __ip_vs_sctp_exit,
584 .register_app = sctp_register_app,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100585 .unregister_app = sctp_unregister_app,
Hans Schillstrom9d934872011-01-03 14:44:49 +0100586 .conn_schedule = sctp_conn_schedule,
587 .conn_in_get = ip_vs_conn_in_get_proto,
588 .conn_out_get = ip_vs_conn_out_get_proto,
589 .snat_handler = sctp_snat_handler,
590 .dnat_handler = sctp_dnat_handler,
591 .csum_check = sctp_csum_check,
592 .state_name = sctp_state_name,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100593 .state_transition = sctp_state_transition,
Hans Schillstrom9d934872011-01-03 14:44:49 +0100594 .app_conn_bind = sctp_app_conn_bind,
595 .debug_packet = ip_vs_tcpudp_debug_packet,
596 .timeout_change = NULL,
Venkata Mohan Reddy2906f662010-02-18 12:31:05 +0100597};