Blame - include/net/red.h - kernel/msm-4.9

blob: a4eb37946f2cdf1c63f22cc19341a88e6bd76cca [file] [log] [blame]

Thomas Graf	a783474	2005-11-05 21:14:03 +0100	[diff] [blame]	1	#ifndef __NET_SCHED_RED_H
				2	#define __NET_SCHED_RED_H
				3
Thomas Graf	a783474	2005-11-05 21:14:03 +0100	[diff] [blame]	4	#include <linux/types.h>
				5	#include <net/pkt_sched.h>
				6	#include <net/inet_ecn.h>
				7	#include <net/dsfield.h>
				8
				9	/* Random Early Detection (RED) algorithm.
				10	=======================================
				11
				12	Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
				13	for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
				14
				15	This file codes a "divisionless" version of RED algorithm
				16	as written down in Fig.17 of the paper.
				17
				18	Short description.
				19	------------------
				20
				21	When a new packet arrives we calculate the average queue length:
				22
				23	avg = (1-W)avg + Wcurrent_queue_len,
				24
				25	W is the filter time constant (chosen as 2^(-Wlog)), it controls
				26	the inertia of the algorithm. To allow larger bursts, W should be
				27	decreased.
				28
				29	if (avg > th_max) -> packet marked (dropped).
				30	if (avg < th_min) -> packet passes.
				31	if (th_min < avg < th_max) we calculate probability:
				32
				33	Pb = max_P * (avg - th_min)/(th_max-th_min)
				34
				35	and mark (drop) packet with this probability.
				36	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
				37	max_P should be small (not 1), usually 0.01..0.02 is good value.
				38
				39	max_P is chosen as a number, so that max_P/(th_max-th_min)
				40	is a negative power of two in order arithmetics to contain
				41	only shifts.
				42
				43
				44	Parameters, settable by user:
				45	-----------------------------
				46
				47	qth_min - bytes (should be < qth_max/2)
				48	qth_max - bytes (should be at least 2*qth_min and less limit)
				49	Wlog - bits (<32) log(1/W).
				50	Plog - bits (<32)
				51
				52	Plog is related to max_P by formula:
				53
				54	max_P = (qth_max-qth_min)/2^Plog;
				55
				56	F.e. if qth_max=128K and qth_min=32K, then Plog=22
				57	corresponds to max_P=0.02
				58
				59	Scell_log
				60	Stab
				61
				62	Lookup table for log((1-W)^(t/t_ave).
				63
				64
				65	NOTES:
				66
				67	Upper bound on W.
				68	-----------------
				69
				70	If you want to allow bursts of L packets of size S,
				71	you should choose W:
				72
				73	L + 1 - th_min/S < (1-(1-W)^L)/W
				74
				75	th_min/S = 32 th_min/S = 4
				76
				77	log(W) L
				78	-1 33
				79	-2 35
				80	-3 39
				81	-4 46
				82	-5 57
				83	-6 75
				84	-7 101
				85	-8 135
				86	-9 190
				87	etc.
				88	*/
				89
				90	#define RED_STAB_SIZE 256
				91	#define RED_STAB_MASK (RED_STAB_SIZE - 1)
				92
				93	struct red_stats
				94	{
				95	u32 prob_drop; /* Early probability drops */
				96	u32 prob_mark; /* Early probability marks */
				97	u32 forced_drop; /* Forced drops, qavg > max_thresh */
				98	u32 forced_mark; /* Forced marks, qavg > max_thresh */
				99	u32 pdrop; /* Drops due to queue limits */
				100	u32 other; /* Drops due to drop() calls */
				101	u32 backlog;
				102	};
				103
				104	struct red_parms
				105	{
				106	/* Parameters */
				107	u32 qth_min; /* Min avg length threshold: A scaled */
				108	u32 qth_max; /* Max avg length threshold: A scaled */
				109	u32 Scell_max;
				110	u32 Rmask; /* Cached random mask, see red_rmask */
				111	u8 Scell_log;
				112	u8 Wlog; /* log(W) */
				113	u8 Plog; /* random number bits */
				114	u8 Stab[RED_STAB_SIZE];
				115
				116	/* Variables */
				117	int qcount; /* Number of packets since last random
				118	number generation */
				119	u32 qR; /* Cached random number */
				120
				121	unsigned long qavg; /* Average queue length: A scaled */
				122	psched_time_t qidlestart; /* Start of current idle period */
				123	};
				124
				125	static inline u32 red_rmask(u8 Plog)
				126	{
				127	return Plog < 32 ? ((1 << Plog) - 1) : ~0UL;
				128	}
				129
				130	static inline void red_set_parms(struct red_parms *p,
				131	u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
				132	u8 Scell_log, u8 *stab)
				133	{
				134	/* Reset average queue length, the value is strictly bound
				135	* to the parameters below, reseting hurts a bit but leaving
				136	* it might result in an unreasonable qavg for a while. --TGR
				137	*/
				138	p->qavg = 0;
				139
				140	p->qcount = -1;
				141	p->qth_min = qth_min << Wlog;
				142	p->qth_max = qth_max << Wlog;
				143	p->Wlog = Wlog;
				144	p->Plog = Plog;
				145	p->Rmask = red_rmask(Plog);
				146	p->Scell_log = Scell_log;
				147	p->Scell_max = (255 << Scell_log);
				148
				149	memcpy(p->Stab, stab, sizeof(p->Stab));
				150	}
				151
				152	static inline int red_is_idling(struct red_parms *p)
				153	{
				154	return !PSCHED_IS_PASTPERFECT(p->qidlestart);
				155	}
				156
				157	static inline void red_start_of_idle_period(struct red_parms *p)
				158	{
				159	PSCHED_GET_TIME(p->qidlestart);
				160	}
				161
				162	static inline void red_end_of_idle_period(struct red_parms *p)
				163	{
				164	PSCHED_SET_PASTPERFECT(p->qidlestart);
				165	}
				166
				167	static inline void red_restart(struct red_parms *p)
				168	{
				169	red_end_of_idle_period(p);
				170	p->qavg = 0;
				171	p->qcount = -1;
				172	}
				173
				174	static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
				175	{
				176	psched_time_t now;
				177	long us_idle;
				178	int shift;
				179
				180	PSCHED_GET_TIME(now);
				181	us_idle = PSCHED_TDIFF_SAFE(now, p->qidlestart, p->Scell_max);
				182
				183	/*
				184	* The problem: ideally, average length queue recalcultion should
				185	* be done over constant clock intervals. This is too expensive, so
				186	* that the calculation is driven by outgoing packets.
				187	* When the queue is idle we have to model this clock by hand.
				188	*
				189	* SF+VJ proposed to "generate":
				190	*
				191	* m = idletime / (average_pkt_size / bandwidth)
				192	*
				193	* dummy packets as a burst after idle time, i.e.
				194	*
				195	* p->qavg *= (1-W)^m
				196	*
				197	* This is an apparently overcomplicated solution (f.e. we have to
				198	* precompute a table to make this calculation in reasonable time)
				199	* I believe that a simpler model may be used here,
				200	* but it is field for experiments.
				201	*/
				202
				203	shift = p->Stab[(us_idle >> p->Scell_log) & RED_STAB_MASK];
				204
				205	if (shift)
				206	return p->qavg >> shift;
				207	else {
				208	/* Approximate initial part of exponent with linear function:
				209	*
				210	* (1-W)^m ~= 1-mW + ...
				211	*
				212	* Seems, it is the best solution to
				213	* problem of too coarse exponent tabulation.
				214	*/
Ilpo Järvinen	c4c0ce5	2006-08-04 16:36:18 -0700	[diff] [blame]	215	us_idle = (p->qavg * (u64)us_idle) >> p->Scell_log;
Thomas Graf	a783474	2005-11-05 21:14:03 +0100	[diff] [blame]	216
				217	if (us_idle < (p->qavg >> 1))
				218	return p->qavg - us_idle;
				219	else
				220	return p->qavg >> 1;
				221	}
				222	}
				223
				224	static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
				225	unsigned int backlog)
				226	{
				227	/*
				228	* NOTE: p->qavg is fixed point number with point at Wlog.
				229	* The formula below is equvalent to floating point
				230	* version:
				231	*
				232	* qavg = qavg(1-W) + backlogW;
				233	*
				234	* --ANK (980924)
				235	*/
				236	return p->qavg + (backlog - (p->qavg >> p->Wlog));
				237	}
				238
				239	static inline unsigned long red_calc_qavg(struct red_parms *p,
				240	unsigned int backlog)
				241	{
				242	if (!red_is_idling(p))
				243	return red_calc_qavg_no_idle_time(p, backlog);
				244	else
				245	return red_calc_qavg_from_idle_time(p);
				246	}
				247
				248	static inline u32 red_random(struct red_parms *p)
				249	{
				250	return net_random() & p->Rmask;
				251	}
				252
				253	static inline int red_mark_probability(struct red_parms *p, unsigned long qavg)
				254	{
				255	/* The formula used below causes questions.
				256
				257	OK. qR is random number in the interval 0..Rmask
				258	i.e. 0..(2^Plog). If we used floating point
				259	arithmetics, it would be: (2^Plog)*rnd_num,
				260	where rnd_num is less 1.
				261
				262	Taking into account, that qavg have fixed
				263	point at Wlog, and Plog is related to max_P by
				264	max_P = (qth_max-qth_min)/2^Plog; two lines
				265	below have the following floating point equivalent:
				266
				267	max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
				268
				269	Any questions? --ANK (980924)
				270	*/
				271	return !(((qavg - p->qth_min) >> p->Wlog) * p->qcount < p->qR);
				272	}
				273
				274	enum {
				275	RED_BELOW_MIN_THRESH,
				276	RED_BETWEEN_TRESH,
				277	RED_ABOVE_MAX_TRESH,
				278	};
				279
				280	static inline int red_cmp_thresh(struct red_parms *p, unsigned long qavg)
				281	{
				282	if (qavg < p->qth_min)
				283	return RED_BELOW_MIN_THRESH;
				284	else if (qavg >= p->qth_max)
				285	return RED_ABOVE_MAX_TRESH;
				286	else
				287	return RED_BETWEEN_TRESH;
				288	}
				289
				290	enum {
				291	RED_DONT_MARK,
				292	RED_PROB_MARK,
				293	RED_HARD_MARK,
				294	};
				295
				296	static inline int red_action(struct red_parms *p, unsigned long qavg)
				297	{
				298	switch (red_cmp_thresh(p, qavg)) {
				299	case RED_BELOW_MIN_THRESH:
				300	p->qcount = -1;
				301	return RED_DONT_MARK;
				302
				303	case RED_BETWEEN_TRESH:
				304	if (++p->qcount) {
				305	if (red_mark_probability(p, qavg)) {
				306	p->qcount = 0;
				307	p->qR = red_random(p);
				308	return RED_PROB_MARK;
				309	}
				310	} else
				311	p->qR = red_random(p);
				312
				313	return RED_DONT_MARK;
				314
				315	case RED_ABOVE_MAX_TRESH:
				316	p->qcount = -1;
				317	return RED_HARD_MARK;
				318	}
				319
				320	BUG();
				321	return RED_DONT_MARK;
				322	}
				323
				324	#endif