Blame - net/ipv4/ipvs/ip_vs_sed.c - kernel/msm-4.9

blob: 2a7d31358181fea64782bda6aac11a27552f450c [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* IPVS: Shortest Expected Delay scheduling module
				3	*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	4	* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
				5	*
				6	* This program is free software; you can redistribute it and/or
				7	* modify it under the terms of the GNU General Public License
				8	* as published by the Free Software Foundation; either version
				9	* 2 of the License, or (at your option) any later version.
				10	*
				11	* Changes:
				12	*
				13	*/
				14
				15	/*
				16	* The SED algorithm attempts to minimize each job's expected delay until
				17	* completion. The expected delay that the job will experience is
				18	* (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
Michael Opdenacker	59c5159	2007-05-09 08:57:56 +0200	[diff] [blame]	19	* jobs on the ith server and Ui is the fixed service rate (weight) of
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	20	* the ith server. The SED algorithm adopts a greedy policy that each does
				21	* what is in its own best interest, i.e. to join the queue which would
				22	* minimize its expected delay of completion.
				23	*
				24	* See the following paper for more information:
				25	* A. Weinrib and S. Shenker, Greed is not enough: Adaptive load sharing
				26	* in large heterogeneous systems. In Proceedings IEEE INFOCOM'88,
				27	* pages 986-994, 1988.
				28	*
				29	* Thanks must go to Marko Buuri <marko@buuri.name> for talking SED to me.
				30	*
				31	* The difference between SED and WLC is that SED includes the incoming
				32	* job in the cost function (the increment of 1). SED may outperform
				33	* WLC, while scheduling big jobs under larger heterogeneous systems
				34	* (the server weight varies a lot).
				35	*
				36	*/
				37
				38	#include <linux/module.h>
				39	#include <linux/kernel.h>
				40
				41	#include <net/ip_vs.h>
				42
				43
				44	static int
				45	ip_vs_sed_init_svc(struct ip_vs_service *svc)
				46	{
				47	return 0;
				48	}
				49
				50
				51	static int
				52	ip_vs_sed_done_svc(struct ip_vs_service *svc)
				53	{
				54	return 0;
				55	}
				56
				57
				58	static int
				59	ip_vs_sed_update_svc(struct ip_vs_service *svc)
				60	{
				61	return 0;
				62	}
				63
				64
				65	static inline unsigned int
				66	ip_vs_sed_dest_overhead(struct ip_vs_dest *dest)
				67	{
				68	/*
				69	* We only use the active connection number in the cost
				70	* calculation here.
				71	*/
				72	return atomic_read(&dest->activeconns) + 1;
				73	}
				74
				75
				76	/*
				77	* Weighted Least Connection scheduling
				78	*/
				79	static struct ip_vs_dest *
				80	ip_vs_sed_schedule(struct ip_vs_service svc, const struct sk_buff skb)
				81	{
				82	struct ip_vs_dest dest, least;
				83	unsigned int loh, doh;
				84
				85	IP_VS_DBG(6, "ip_vs_sed_schedule(): Scheduling...\n");
				86
				87	/*
				88	* We calculate the load of each dest server as follows:
				89	* (server expected overhead) / dest->weight
				90	*
				91	* Remember -- no floats in kernel mode!!!
				92	* The comparison of h1w2 > h2w1 is equivalent to that of
				93	* h1/w1 > h2/w2
				94	* if every weight is larger than zero.
				95	*
				96	* The server with weight=0 is quiesced and will not receive any
				97	* new connections.
				98	*/
				99
				100	list_for_each_entry(dest, &svc->destinations, n_list) {
				101	if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
				102	atomic_read(&dest->weight) > 0) {
				103	least = dest;
				104	loh = ip_vs_sed_dest_overhead(least);
				105	goto nextstage;
				106	}
				107	}
				108	return NULL;
				109
				110	/*
				111	* Find the destination with the least load.
				112	*/
				113	nextstage:
				114	list_for_each_entry_continue(dest, &svc->destinations, n_list) {
				115	if (dest->flags & IP_VS_DEST_F_OVERLOAD)
				116	continue;
				117	doh = ip_vs_sed_dest_overhead(dest);
				118	if (loh * atomic_read(&dest->weight) >
				119	doh * atomic_read(&least->weight)) {
				120	least = dest;
				121	loh = doh;
				122	}
				123	}
				124
				125	IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u "
				126	"activeconns %d refcnt %d weight %d overhead %d\n",
				127	NIPQUAD(least->addr), ntohs(least->port),
				128	atomic_read(&least->activeconns),
				129	atomic_read(&least->refcnt),
				130	atomic_read(&least->weight), loh);
				131
				132	return least;
				133	}
				134
				135
				136	static struct ip_vs_scheduler ip_vs_sed_scheduler =
				137	{
				138	.name = "sed",
				139	.refcnt = ATOMIC_INIT(0),
				140	.module = THIS_MODULE,
				141	.init_service = ip_vs_sed_init_svc,
				142	.done_service = ip_vs_sed_done_svc,
				143	.update_service = ip_vs_sed_update_svc,
				144	.schedule = ip_vs_sed_schedule,
				145	};
				146
				147
				148	static int __init ip_vs_sed_init(void)
				149	{
				150	INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
				151	return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
				152	}
				153
				154	static void __exit ip_vs_sed_cleanup(void)
				155	{
				156	unregister_ip_vs_scheduler(&ip_vs_sed_scheduler);
				157	}
				158
				159	module_init(ip_vs_sed_init);
				160	module_exit(ip_vs_sed_cleanup);
				161	MODULE_LICENSE("GPL");