Blame - net/netfilter/ipvs/ip_vs_sed.c - kernel/msm-4.9

blob: f8e2d00f528b945e774564854fc66f53dbc61970 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* IPVS: Shortest Expected Delay scheduling module
				3	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	* Changes:
				12	*
				13	*/
				14
				15	/*
				16	* The SED algorithm attempts to minimize each job's expected delay until
				17	* completion. The expected delay that the job will experience is
				18	* (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
Michael Opdenacker	59c5159	2007-05-09 08:57:56 +0200	[diff] [blame]	19	* jobs on the ith server and Ui is the fixed service rate (weight) of
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	* the ith server. The SED algorithm adopts a greedy policy that each does
				21	* what is in its own best interest, i.e. to join the queue which would
				22	* minimize its expected delay of completion.
				23	*
				24	* See the following paper for more information:
				25	* A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
				26	* in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
				27	* pages 986-994, 1988.
				28	*
				29	* Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
				30	*
				31	* The difference between SED and WLC is that SED includes the incoming
				32	* job in the cost function (the increment of 1). SED may outperform
				33	* WLC, while scheduling big jobs under larger heterogeneous systems
				34	* (the server weight varies a lot).
				35	*
				36	*/
				37
Hannes Eder	9aada7a	2009-07-30 14:29:44 -0700	[diff] [blame]	38	#define KMSG_COMPONENT "IPVS"
				39	#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
				40
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	41	#include <linux/module.h>
				42	#include <linux/kernel.h>
				43
				44	#include <net/ip_vs.h>
				45
				46
Simon Kirby	c16526a	2013-08-10 01:26:18 -0700	[diff] [blame]	47	static inline int
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	48	ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
				49	{
				50	/*
				51	* We only use the active connection number in the cost
				52	* calculation here.
				53	*/
				54	return atomic_read(&dest->activeconns) + 1;
				55	}
				56
				57
				58	/*
				59	* Weighted Least Connection scheduling
				60	*/
				61	static struct ip_vs_dest *
Julian Anastasov	bba54de	2013-06-16 09:09:36 +0300	[diff] [blame]	62	ip_vs_sed_schedule(struct ip_vs_service svc, const struct sk_buff skb,
				63	struct ip_vs_iphdr *iph)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	64	{
				65	struct ip_vs_dest dest, least;
Simon Kirby	c16526a	2013-08-10 01:26:18 -0700	[diff] [blame]	66	int loh, doh;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	67
Hannes Eder	1e3e238	2009-08-02 11:05:41 +0000	[diff] [blame]	68	IP_VS_DBG(6, "%s(): Scheduling...\n", __func__);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	69
				70	/*
				71	* We calculate the load of each dest server as follows:
				72	* (server expected overhead) / dest->weight
				73	*
				74	* Remember -- no floats in kernel mode!!!
				75	* The comparison of h1w2 > h2w1 is equivalent to that of
				76	* h1/w1 > h2/w2
				77	* if every weight is larger than zero.
				78	*
				79	* The server with weight=0 is quiesced and will not receive any
				80	* new connections.
				81	*/
				82
Julian Anastasov	9be52ab	2013-03-22 11:46:45 +0200	[diff] [blame]	83	list_for_each_entry_rcu(dest, &svc->destinations, n_list) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	84	if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
				85	atomic_read(&dest->weight) > 0) {
				86	least = dest;
				87	loh = ip_vs_sed_dest_overhead(least);
				88	goto nextstage;
				89	}
				90	}
Patrick Schaaf	41ac51e	2011-02-11 14:01:12 +0100	[diff] [blame]	91	ip_vs_scheduler_err(svc, "no destination available");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	92	return NULL;
				93
				94	/*
				95	* Find the destination with the least load.
				96	*/
				97	nextstage:
Julian Anastasov	9be52ab	2013-03-22 11:46:45 +0200	[diff] [blame]	98	list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	99	if (dest->flags & IP_VS_DEST_F_OVERLOAD)
				100	continue;
				101	doh = ip_vs_sed_dest_overhead(dest);
Simon Kirby	c16526a	2013-08-10 01:26:18 -0700	[diff] [blame]	102	if ((__s64)loh * atomic_read(&dest->weight) >
				103	(__s64)doh * atomic_read(&least->weight)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	104	least = dest;
				105	loh = doh;
				106	}
				107	}
				108
Julius Volz	b14198f	2008-09-02 15:55:39 +0200	[diff] [blame]	109	IP_VS_DBG_BUF(6, "SED: server %s:%u "
				110	"activeconns %d refcnt %d weight %d overhead %d\n",
Julian Anastasov	4d316f3	2014-09-17 00:09:00 +0300	[diff] [blame]	111	IP_VS_DBG_ADDR(least->af, &least->addr),
				112	ntohs(least->port),
Julius Volz	b14198f	2008-09-02 15:55:39 +0200	[diff] [blame]	113	atomic_read(&least->activeconns),
				114	atomic_read(&least->refcnt),
				115	atomic_read(&least->weight), loh);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	116
				117	return least;
				118	}
				119
				120
				121	static struct ip_vs_scheduler ip_vs_sed_scheduler =
				122	{
				123	.name = "sed",
				124	.refcnt = ATOMIC_INIT(0),
				125	.module = THIS_MODULE,
Sven Wegener	d149ccc	2008-08-10 09:18:02 +0000	[diff] [blame]	126	.n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	127	.schedule = ip_vs_sed_schedule,
				128	};
				129
				130
				131	static int __init ip_vs_sed_init(void)
				132	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	133	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
				134	}
				135
				136	static void __exit ip_vs_sed_cleanup(void)
				137	{
				138	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
Julian Anastasov	ceec4c3	2013-03-22 11:46:53 +0200	[diff] [blame]	139	synchronize_rcu();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	140	}
				141
				142	module_init(ip_vs_sed_init);
				143	module_exit(ip_vs_sed_cleanup);
				144	MODULE_LICENSE("GPL");