Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * IPVS: Source Hashing scheduling module |
| 3 | * |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 4 | * Authors: Wensong Zhang <wensong@gnuchina.org> |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License |
| 8 | * as published by the Free Software Foundation; either version |
| 9 | * 2 of the License, or (at your option) any later version. |
| 10 | * |
| 11 | * Changes: |
| 12 | * |
| 13 | */ |
| 14 | |
| 15 | /* |
| 16 | * The sh algorithm is to select server by the hash key of source IP |
| 17 | * address. The pseudo code is as follows: |
| 18 | * |
| 19 | * n <- servernode[src_ip]; |
| 20 | * if (n is dead) OR |
| 21 | * (n is overloaded) or (n.weight <= 0) then |
| 22 | * return NULL; |
| 23 | * |
| 24 | * return n; |
| 25 | * |
| 26 | * Notes that servernode is a 256-bucket hash table that maps the hash |
| 27 | * index derived from packet source IP address to the current server |
| 28 | * array. If the sh scheduler is used in cache cluster, it is good to |
| 29 | * combine it with cache_bypass feature. When the statically assigned |
| 30 | * server is dead or overloaded, the load balancer can bypass the cache |
| 31 | * server and send requests to the original server directly. |
| 32 | * |
Michael Maxim | 76ad94f | 2011-12-08 10:55:09 -0500 | [diff] [blame] | 33 | * The weight destination attribute can be used to control the |
| 34 | * distribution of connections to the destinations in servernode. The |
| 35 | * greater the weight, the more connections the destination |
| 36 | * will receive. |
| 37 | * |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 38 | */ |
| 39 | |
Hannes Eder | 9aada7a | 2009-07-30 14:29:44 -0700 | [diff] [blame] | 40 | #define KMSG_COMPONENT "IPVS" |
| 41 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
| 42 | |
Arnaldo Carvalho de Melo | 14c8502 | 2005-12-27 02:43:12 -0200 | [diff] [blame] | 43 | #include <linux/ip.h> |
Tejun Heo | 5a0e3ad | 2010-03-24 17:04:11 +0900 | [diff] [blame] | 44 | #include <linux/slab.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 45 | #include <linux/module.h> |
| 46 | #include <linux/kernel.h> |
Arnaldo Carvalho de Melo | 14c8502 | 2005-12-27 02:43:12 -0200 | [diff] [blame] | 47 | #include <linux/skbuff.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 48 | |
| 49 | #include <net/ip_vs.h> |
| 50 | |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 51 | #include <net/tcp.h> |
| 52 | #include <linux/udp.h> |
| 53 | #include <linux/sctp.h> |
| 54 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 55 | |
| 56 | /* |
| 57 | * IPVS SH bucket |
| 58 | */ |
| 59 | struct ip_vs_sh_bucket { |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 60 | struct ip_vs_dest __rcu *dest; /* real server (cache) */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 61 | }; |
| 62 | |
| 63 | /* |
| 64 | * for IPVS SH entry hash table |
| 65 | */ |
| 66 | #ifndef CONFIG_IP_VS_SH_TAB_BITS |
| 67 | #define CONFIG_IP_VS_SH_TAB_BITS 8 |
| 68 | #endif |
| 69 | #define IP_VS_SH_TAB_BITS CONFIG_IP_VS_SH_TAB_BITS |
| 70 | #define IP_VS_SH_TAB_SIZE (1 << IP_VS_SH_TAB_BITS) |
| 71 | #define IP_VS_SH_TAB_MASK (IP_VS_SH_TAB_SIZE - 1) |
| 72 | |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 73 | struct ip_vs_sh_state { |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 74 | struct rcu_head rcu_head; |
Jan Beulich | a70b964 | 2013-05-29 13:33:51 +0100 | [diff] [blame] | 75 | struct ip_vs_sh_bucket buckets[IP_VS_SH_TAB_SIZE]; |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 76 | }; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 77 | |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 78 | /* Helper function to determine if server is unavailable */ |
| 79 | static inline bool is_unavailable(struct ip_vs_dest *dest) |
| 80 | { |
| 81 | return atomic_read(&dest->weight) <= 0 || |
| 82 | dest->flags & IP_VS_DEST_F_OVERLOAD; |
| 83 | } |
| 84 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 85 | /* |
| 86 | * Returns hash value for IPVS SH entry |
| 87 | */ |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 88 | static inline unsigned int |
| 89 | ip_vs_sh_hashkey(int af, const union nf_inet_addr *addr, |
| 90 | __be16 port, unsigned int offset) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 91 | { |
Julius Volz | 20971a0 | 2008-11-01 13:13:19 +0000 | [diff] [blame] | 92 | __be32 addr_fold = addr->ip; |
| 93 | |
| 94 | #ifdef CONFIG_IP_VS_IPV6 |
| 95 | if (af == AF_INET6) |
| 96 | addr_fold = addr->ip6[0]^addr->ip6[1]^ |
| 97 | addr->ip6[2]^addr->ip6[3]; |
| 98 | #endif |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 99 | return (offset + (ntohs(port) + ntohl(addr_fold))*2654435761UL) & |
| 100 | IP_VS_SH_TAB_MASK; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 101 | } |
| 102 | |
| 103 | |
| 104 | /* |
| 105 | * Get ip_vs_dest associated with supplied parameters. |
| 106 | */ |
| 107 | static inline struct ip_vs_dest * |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 108 | ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, |
| 109 | const union nf_inet_addr *addr, __be16 port) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 110 | { |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 111 | unsigned int hash = ip_vs_sh_hashkey(svc->af, addr, port, 0); |
| 112 | struct ip_vs_dest *dest = rcu_dereference(s->buckets[hash].dest); |
| 113 | |
| 114 | return (!dest || is_unavailable(dest)) ? NULL : dest; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 115 | } |
| 116 | |
| 117 | |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 118 | /* As ip_vs_sh_get, but with fallback if selected server is unavailable |
| 119 | * |
| 120 | * The fallback strategy loops around the table starting from a "random" |
| 121 | * point (in fact, it is chosen to be the original hash value to make the |
| 122 | * algorithm deterministic) to find a new server. |
| 123 | */ |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 124 | static inline struct ip_vs_dest * |
| 125 | ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, |
| 126 | const union nf_inet_addr *addr, __be16 port) |
| 127 | { |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 128 | unsigned int offset, roffset; |
| 129 | unsigned int hash, ihash; |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 130 | struct ip_vs_dest *dest; |
| 131 | |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 132 | /* first try the dest it's supposed to go to */ |
| 133 | ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); |
| 134 | dest = rcu_dereference(s->buckets[ihash].dest); |
| 135 | if (!dest) |
| 136 | return NULL; |
| 137 | if (!is_unavailable(dest)) |
| 138 | return dest; |
| 139 | |
| 140 | IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", |
Julian Anastasov | 4d316f3 | 2014-09-17 00:09:00 +0300 | [diff] [blame] | 141 | IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port)); |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 142 | |
| 143 | /* if the original dest is unavailable, loop around the table |
| 144 | * starting from ihash to find a new dest |
| 145 | */ |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 146 | for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 147 | roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; |
| 148 | hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 149 | dest = rcu_dereference(s->buckets[hash].dest); |
| 150 | if (!dest) |
| 151 | break; |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 152 | if (!is_unavailable(dest)) |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 153 | return dest; |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 154 | IP_VS_DBG_BUF(6, "SH: selected unavailable " |
| 155 | "server %s:%d (offset %d), reselecting", |
Julian Anastasov | 4d316f3 | 2014-09-17 00:09:00 +0300 | [diff] [blame] | 156 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
Alexander Frolkin | 1255ce5 | 2013-09-27 11:06:23 +0100 | [diff] [blame] | 157 | ntohs(dest->port), roffset); |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 158 | } |
| 159 | |
| 160 | return NULL; |
| 161 | } |
| 162 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 163 | /* |
| 164 | * Assign all the hash buckets of the specified table with the service. |
| 165 | */ |
| 166 | static int |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 167 | ip_vs_sh_reassign(struct ip_vs_sh_state *s, struct ip_vs_service *svc) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 168 | { |
| 169 | int i; |
| 170 | struct ip_vs_sh_bucket *b; |
| 171 | struct list_head *p; |
| 172 | struct ip_vs_dest *dest; |
Michael Maxim | 76ad94f | 2011-12-08 10:55:09 -0500 | [diff] [blame] | 173 | int d_count; |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 174 | bool empty; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 175 | |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 176 | b = &s->buckets[0]; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 177 | p = &svc->destinations; |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 178 | empty = list_empty(p); |
Michael Maxim | 76ad94f | 2011-12-08 10:55:09 -0500 | [diff] [blame] | 179 | d_count = 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 180 | for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 181 | dest = rcu_dereference_protected(b->dest, 1); |
| 182 | if (dest) |
| 183 | ip_vs_dest_put(dest); |
| 184 | if (empty) |
| 185 | RCU_INIT_POINTER(b->dest, NULL); |
| 186 | else { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 187 | if (p == &svc->destinations) |
| 188 | p = p->next; |
| 189 | |
| 190 | dest = list_entry(p, struct ip_vs_dest, n_list); |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 191 | ip_vs_dest_hold(dest); |
| 192 | RCU_INIT_POINTER(b->dest, dest); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 193 | |
Michael Maxim | 76ad94f | 2011-12-08 10:55:09 -0500 | [diff] [blame] | 194 | IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", |
Julian Anastasov | 4d316f3 | 2014-09-17 00:09:00 +0300 | [diff] [blame] | 195 | i, IP_VS_DBG_ADDR(dest->af, &dest->addr), |
Michael Maxim | 76ad94f | 2011-12-08 10:55:09 -0500 | [diff] [blame] | 196 | atomic_read(&dest->weight)); |
| 197 | |
| 198 | /* Don't move to next dest until filling weight */ |
| 199 | if (++d_count >= atomic_read(&dest->weight)) { |
| 200 | p = p->next; |
| 201 | d_count = 0; |
| 202 | } |
| 203 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 204 | } |
| 205 | b++; |
| 206 | } |
| 207 | return 0; |
| 208 | } |
| 209 | |
| 210 | |
| 211 | /* |
| 212 | * Flush all the hash buckets of the specified table. |
| 213 | */ |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 214 | static void ip_vs_sh_flush(struct ip_vs_sh_state *s) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 215 | { |
| 216 | int i; |
| 217 | struct ip_vs_sh_bucket *b; |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 218 | struct ip_vs_dest *dest; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 219 | |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 220 | b = &s->buckets[0]; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 222 | dest = rcu_dereference_protected(b->dest, 1); |
| 223 | if (dest) { |
| 224 | ip_vs_dest_put(dest); |
| 225 | RCU_INIT_POINTER(b->dest, NULL); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 226 | } |
| 227 | b++; |
| 228 | } |
| 229 | } |
| 230 | |
| 231 | |
| 232 | static int ip_vs_sh_init_svc(struct ip_vs_service *svc) |
| 233 | { |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 234 | struct ip_vs_sh_state *s; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 235 | |
| 236 | /* allocate the SH table for this service */ |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 237 | s = kzalloc(sizeof(struct ip_vs_sh_state), GFP_KERNEL); |
| 238 | if (s == NULL) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 239 | return -ENOMEM; |
Joe Perches | 0a9ee81 | 2011-08-29 14:17:25 -0700 | [diff] [blame] | 240 | |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 241 | svc->sched_data = s; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 242 | IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) allocated for " |
| 243 | "current service\n", |
| 244 | sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); |
| 245 | |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 246 | /* assign the hash buckets with current dests */ |
| 247 | ip_vs_sh_reassign(s, svc); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 248 | |
| 249 | return 0; |
| 250 | } |
| 251 | |
| 252 | |
Julian Anastasov | ed3ffc4 | 2013-03-22 11:46:50 +0200 | [diff] [blame] | 253 | static void ip_vs_sh_done_svc(struct ip_vs_service *svc) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 254 | { |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 255 | struct ip_vs_sh_state *s = svc->sched_data; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 256 | |
| 257 | /* got to clean up hash buckets here */ |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 258 | ip_vs_sh_flush(s); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 259 | |
| 260 | /* release the table itself */ |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 261 | kfree_rcu(s, rcu_head); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 262 | IP_VS_DBG(6, "SH hash table (memory=%Zdbytes) released\n", |
| 263 | sizeof(struct ip_vs_sh_bucket)*IP_VS_SH_TAB_SIZE); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 264 | } |
| 265 | |
| 266 | |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 267 | static int ip_vs_sh_dest_changed(struct ip_vs_service *svc, |
| 268 | struct ip_vs_dest *dest) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 269 | { |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 270 | struct ip_vs_sh_state *s = svc->sched_data; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 271 | |
| 272 | /* assign the hash buckets with the updated service */ |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 273 | ip_vs_sh_reassign(s, svc); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 274 | |
| 275 | return 0; |
| 276 | } |
| 277 | |
| 278 | |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 279 | /* Helper function to get port number */ |
| 280 | static inline __be16 |
| 281 | ip_vs_sh_get_port(const struct sk_buff *skb, struct ip_vs_iphdr *iph) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 282 | { |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 283 | __be16 _ports[2], *ports; |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 284 | |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 285 | /* At this point we know that we have a valid packet of some kind. |
| 286 | * Because ICMP packets are only guaranteed to have the first 8 |
| 287 | * bytes, let's just grab the ports. Fortunately they're in the |
| 288 | * same position for all three of the protocols we care about. |
| 289 | */ |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 290 | switch (iph->protocol) { |
| 291 | case IPPROTO_TCP: |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 292 | case IPPROTO_UDP: |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 293 | case IPPROTO_SCTP: |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 294 | ports = skb_header_pointer(skb, iph->len, sizeof(_ports), |
| 295 | &_ports); |
| 296 | if (unlikely(!ports)) |
Daniel Borkmann | 54e35cc | 2013-08-06 11:20:23 +0200 | [diff] [blame] | 297 | return 0; |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 298 | |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 299 | if (likely(!ip_vs_iph_inverse(iph))) |
| 300 | return ports[0]; |
| 301 | else |
| 302 | return ports[1]; |
| 303 | default: |
| 304 | return 0; |
| 305 | } |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 306 | } |
| 307 | |
| 308 | |
| 309 | /* |
| 310 | * Source Hashing scheduling |
| 311 | */ |
| 312 | static struct ip_vs_dest * |
Julian Anastasov | bba54de | 2013-06-16 09:09:36 +0300 | [diff] [blame] | 313 | ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, |
| 314 | struct ip_vs_iphdr *iph) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 315 | { |
| 316 | struct ip_vs_dest *dest; |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 317 | struct ip_vs_sh_state *s; |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 318 | __be16 port = 0; |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 319 | const union nf_inet_addr *hash_addr; |
| 320 | |
| 321 | hash_addr = ip_vs_iph_inverse(iph) ? &iph->daddr : &iph->saddr; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 322 | |
| 323 | IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n"); |
| 324 | |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 325 | if (svc->flags & IP_VS_SVC_F_SCHED_SH_PORT) |
| 326 | port = ip_vs_sh_get_port(skb, iph); |
| 327 | |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 328 | s = (struct ip_vs_sh_state *) svc->sched_data; |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 329 | |
| 330 | if (svc->flags & IP_VS_SVC_F_SCHED_SH_FALLBACK) |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 331 | dest = ip_vs_sh_get_fallback(svc, s, hash_addr, port); |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 332 | else |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 333 | dest = ip_vs_sh_get(svc, s, hash_addr, port); |
Alexander Frolkin | eba3b5a | 2013-06-19 10:54:25 +0100 | [diff] [blame] | 334 | |
| 335 | if (!dest) { |
Patrick Schaaf | 41ac51e | 2011-02-11 14:01:12 +0100 | [diff] [blame] | 336 | ip_vs_scheduler_err(svc, "no destination available"); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 337 | return NULL; |
| 338 | } |
| 339 | |
Julius Volz | 20971a0 | 2008-11-01 13:13:19 +0000 | [diff] [blame] | 340 | IP_VS_DBG_BUF(6, "SH: source IP address %s --> server %s:%d\n", |
Alex Gartrell | 1471f35 | 2015-08-26 09:40:36 -0700 | [diff] [blame] | 341 | IP_VS_DBG_ADDR(svc->af, hash_addr), |
Julian Anastasov | 4d316f3 | 2014-09-17 00:09:00 +0300 | [diff] [blame] | 342 | IP_VS_DBG_ADDR(dest->af, &dest->addr), |
Julius Volz | 20971a0 | 2008-11-01 13:13:19 +0000 | [diff] [blame] | 343 | ntohs(dest->port)); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 344 | |
| 345 | return dest; |
| 346 | } |
| 347 | |
| 348 | |
| 349 | /* |
| 350 | * IPVS SH Scheduler structure |
| 351 | */ |
| 352 | static struct ip_vs_scheduler ip_vs_sh_scheduler = |
| 353 | { |
| 354 | .name = "sh", |
| 355 | .refcnt = ATOMIC_INIT(0), |
| 356 | .module = THIS_MODULE, |
Sven Wegener | d149ccc | 2008-08-10 09:18:02 +0000 | [diff] [blame] | 357 | .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 358 | .init_service = ip_vs_sh_init_svc, |
| 359 | .done_service = ip_vs_sh_done_svc, |
Julian Anastasov | 1acb7f6 | 2013-03-22 11:46:46 +0200 | [diff] [blame] | 360 | .add_dest = ip_vs_sh_dest_changed, |
| 361 | .del_dest = ip_vs_sh_dest_changed, |
| 362 | .upd_dest = ip_vs_sh_dest_changed, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 363 | .schedule = ip_vs_sh_schedule, |
| 364 | }; |
| 365 | |
| 366 | |
| 367 | static int __init ip_vs_sh_init(void) |
| 368 | { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 369 | return register_ip_vs_scheduler(&ip_vs_sh_scheduler); |
| 370 | } |
| 371 | |
| 372 | |
| 373 | static void __exit ip_vs_sh_cleanup(void) |
| 374 | { |
| 375 | unregister_ip_vs_scheduler(&ip_vs_sh_scheduler); |
Julian Anastasov | ceec4c3 | 2013-03-22 11:46:53 +0200 | [diff] [blame] | 376 | synchronize_rcu(); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 377 | } |
| 378 | |
| 379 | |
| 380 | module_init(ip_vs_sh_init); |
| 381 | module_exit(ip_vs_sh_cleanup); |
| 382 | MODULE_LICENSE("GPL"); |