red.h source code [linux/include/net/red.h]

1	/ SPDX-License-Identifier: GPL-2.0 /
2	#ifndef __NET_SCHED_RED_H
3	#define __NET_SCHED_RED_H
4
5	#include <linux/types.h>
6	#include <linux/bug.h>
7	#include <net/pkt_sched.h>
8	#include <net/inet_ecn.h>
9	#include <net/dsfield.h>
10	#include <linux/reciprocal_div.h>
11
12	/ Random Early Detection (RED) algorithm.*
13	=======================================
14
15	Source: Sally Floyd and Van Jacobson, "Random Early Detection Gateways
16	for Congestion Avoidance", 1993, IEEE/ACM Transactions on Networking.
17
18	This file codes a "divisionless" version of RED algorithm
19	as written down in Fig.17 of the paper.
20
21	Short description.
22	------------------
23
24	When a new packet arrives we calculate the average queue length:
25
26	avg = (1-W)avg + Wcurrent_queue_len,
27
28	W is the filter time constant (chosen as 2^(-Wlog)), it controls
29	the inertia of the algorithm. To allow larger bursts, W should be
30	decreased.
31
32	if (avg > th_max) -> packet marked (dropped).
33	if (avg < th_min) -> packet passes.
34	if (th_min < avg < th_max) we calculate probability:
35
36	Pb = max_P (avg - th_min)/(th_max-th_min)*
37
38	and mark (drop) packet with this probability.
39	Pb changes from 0 (at avg==th_min) to max_P (avg==th_max).
40	max_P should be small (not 1), usually 0.01..0.02 is good value.
41
42	max_P is chosen as a number, so that max_P/(th_max-th_min)
43	is a negative power of two in order arithmetic to contain
44	only shifts.
45
46
47	Parameters, settable by user:
48	-----------------------------
49
50	qth_min - bytes (should be < qth_max/2)
51	qth_max - bytes (should be at least 2qth_min and less limit)*
52	Wlog - bits (<32) log(1/W).
53	Plog - bits (<32)
54
55	Plog is related to max_P by formula:
56
57	max_P = (qth_max-qth_min)/2^Plog;
58
59	F.e. if qth_max=128K and qth_min=32K, then Plog=22
60	corresponds to max_P=0.02
61
62	Scell_log
63	Stab
64
65	Lookup table for log((1-W)^(t/t_ave).
66
67
68	NOTES:
69
70	Upper bound on W.
71	-----------------
72
73	If you want to allow bursts of L packets of size S,
74	you should choose W:
75
76	L + 1 - th_min/S < (1-(1-W)^L)/W
77
78	th_min/S = 32 th_min/S = 4
79
80	log(W) L
81	-1 33
82	-2 35
83	-3 39
84	-4 46
85	-5 57
86	-6 75
87	-7 101
88	-8 135
89	-9 190
90	etc.
91	*/
92
93	/*
94	* Adaptative RED : An Algorithm for Increasing the Robustness of RED's AQM
95	* (Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker) August 2001
96	*
97	* Every 500 ms:
98	* if (avg > target and max_p <= 0.5)
99	* increase max_p : max_p += alpha;
100	* else if (avg < target and max_p >= 0.01)
101	* decrease max_p : max_p *= beta;
102	*
103	* target :[qth_min + 0.4*(qth_min - qth_max),
104	* qth_min + 0.6*(qth_min - qth_max)].
105	* alpha : min(0.01, max_p / 4)
106	* beta : 0.9
107	* max_P is a Q0.32 fixed point number (with 32 bits mantissa)
108	* max_P between 0.01 and 0.5 (1% - 50%) [ Its no longer a negative power of two ]
109	*/
110	#define RED_ONE_PERCENT ((u32)DIV_ROUND_CLOSEST(1ULL<<32, 100))
111
112	#define MAX_P_MIN (1 * RED_ONE_PERCENT)
113	#define MAX_P_MAX (50 * RED_ONE_PERCENT)
114	#define MAX_P_ALPHA(val) min(MAX_P_MIN, val / 4)
115
116	#define RED_STAB_SIZE 256
117	#define RED_STAB_MASK (RED_STAB_SIZE - 1)
118
119	struct red_stats {
120	u32 prob_drop; / Early probability drops /
121	u32 prob_mark; / Early probability marks /
122	u32 forced_drop; / Forced drops, qavg > max_thresh /
123	u32 forced_mark; / Forced marks, qavg > max_thresh /
124	u32 pdrop; / Drops due to queue limits /
125	};
126
127	struct red_parms {
128	/ Parameters /
129	u32 qth_min; / Min avg length threshold: Wlog scaled /
130	u32 qth_max; / Max avg length threshold: Wlog scaled /
131	u32 Scell_max;
132	u32 max_P; / probability, [0 .. 1.0] 32 scaled /
133	/ reciprocal_value(max_P / qth_delta) /
134	struct reciprocal_value max_P_reciprocal;
135	u32 qth_delta; / max_th - min_th /
136	u32 target_min; / min_th + 0.4(max_th - min_th) /*
137	u32 target_max; / min_th + 0.6(max_th - min_th) /*
138	u8 Scell_log;
139	u8 Wlog; / log(W) /
140	u8 Plog; / random number bits /
141	u8 Stab[RED_STAB_SIZE];
142	};
143
144	struct red_vars {
145	/ Variables /
146	int qcount; / Number of packets since last random*
147	number generation /*
148	u32 qR; / Cached random number /
149
150	unsigned long qavg; / Average queue length: Wlog scaled /
151	ktime_t qidlestart; / Start of current idle period /
152	};
153
154	static inline u32 red_maxp(u8 Plog)
155	{
156	return Plog < `32` ? (~`0U` >> Plog) : ~`0U`;
157	}
158
159	static inline void red_set_vars(struct red_vars *v)
160	{
161	/ Reset average queue length, the value is strictly bound*
162	* to the parameters below, resetting hurts a bit but leaving
163	* it might result in an unreasonable qavg for a while. --TGR
164	*/
165	v->qavg = `0`;
166
167	v->qcount = -`1`;
168	}
169
170	static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog,
171	u8 Scell_log, u8 *stab)
172	{
173	if (fls(x: qth_min) + Wlog >= `32`)
174	return false;
175	if (fls(x: qth_max) + Wlog >= `32`)
176	return false;
177	if (Scell_log >= `32`)
178	return false;
179	if (qth_max < qth_min)
180	return false;
181	if (stab) {
182	int i;
183
184	for (i = `0`; i < RED_STAB_SIZE; i++)
185	if (stab[i] >= `32`)
186	return false;
187	}
188	return true;
189	}
190
191	static inline int red_get_flags(unsigned char qopt_flags,
192	unsigned char historic_mask,
193	struct nlattr *flags_attr,
194	unsigned char supported_mask,
195	struct nla_bitfield32 *p_flags,
196	unsigned char *p_userbits,
197	struct netlink_ext_ack *extack)
198	{
199	struct nla_bitfield32 flags;
200
201	if (qopt_flags && flags_attr) {
202	NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute");
203	return -EINVAL;
204	}
205
206	if (flags_attr) {
207	flags = nla_get_bitfield32(nla: flags_attr);
208	} else {
209	flags.selector = historic_mask;
210	flags.value = qopt_flags & historic_mask;
211	}
212
213	*p_flags = flags;
214	*p_userbits = qopt_flags & ~historic_mask;
215	return `0`;
216	}
217
218	static inline int red_validate_flags(unsigned char flags,
219	struct netlink_ext_ack *extack)
220	{
221	if ((flags & TC_RED_NODROP) && !(flags & TC_RED_ECN)) {
222	NL_SET_ERR_MSG_MOD(extack, "nodrop mode is only meaningful with ECN");
223	return -EINVAL;
224	}
225
226	return `0`;
227	}
228
229	static inline void red_set_parms(struct red_parms *p,
230	u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
231	u8 Scell_log, u8 *stab, u32 max_P)
232	{
233	int delta = qth_max - qth_min;
234	u32 max_p_delta;
235
236	WRITE_ONCE(p->qth_min, qth_min << Wlog);
237	WRITE_ONCE(p->qth_max, qth_max << Wlog);
238	WRITE_ONCE(p->Wlog, Wlog);
239	WRITE_ONCE(p->Plog, Plog);
240	if (delta <= `0`)
241	delta = `1`;
242	p->qth_delta = delta;
243	if (!max_P) {
244	max_P = red_maxp(Plog);
245	max_P = delta; /* max_P = (qth_max - qth_min)/2^Plog /
246	}
247	WRITE_ONCE(p->max_P, max_P);
248	max_p_delta = max_P / delta;
249	max_p_delta = max(max_p_delta, `1U`);
250	p->max_P_reciprocal = reciprocal_value(d: max_p_delta);
251
252	/ RED Adaptative target :*
253	* [min_th + 0.4*(min_th - max_th),
254	* min_th + 0.6*(min_th - max_th)].
255	*/
256	delta /= `5`;
257	p->target_min = qth_min + `2`*delta;
258	p->target_max = qth_min + `3`*delta;
259
260	WRITE_ONCE(p->Scell_log, Scell_log);
261	p->Scell_max = (`255` << Scell_log);
262
263	if (stab)
264	memcpy(p->Stab, stab, sizeof(p->Stab));
265	}
266
267	static inline int red_is_idling(const struct red_vars *v)
268	{
269	return v->qidlestart != `0`;
270	}
271
272	static inline void red_start_of_idle_period(struct red_vars *v)
273	{
274	v->qidlestart = ktime_get();
275	}
276
277	static inline void red_end_of_idle_period(struct red_vars *v)
278	{
279	v->qidlestart = `0`;
280	}
281
282	static inline void red_restart(struct red_vars *v)
283	{
284	red_end_of_idle_period(v);
285	v->qavg = `0`;
286	v->qcount = -`1`;
287	}
288
289	static inline unsigned long red_calc_qavg_from_idle_time(const struct red_parms *p,
290	const struct red_vars *v)
291	{
292	s64 delta = ktime_us_delta(later: ktime_get(), earlier: v->qidlestart);
293	long us_idle = min_t(s64, delta, p->Scell_max);
294	int shift;
295
296	/*
297	* The problem: ideally, average length queue recalculation should
298	* be done over constant clock intervals. This is too expensive, so
299	* that the calculation is driven by outgoing packets.
300	* When the queue is idle we have to model this clock by hand.
301	*
302	* SF+VJ proposed to "generate":
303	*
304	* m = idletime / (average_pkt_size / bandwidth)
305	*
306	* dummy packets as a burst after idle time, i.e.
307	*
308	* v->qavg *= (1-W)^m
309	*
310	* This is an apparently overcomplicated solution (f.e. we have to
311	* precompute a table to make this calculation in reasonable time)
312	* I believe that a simpler model may be used here,
313	* but it is field for experiments.
314	*/
315
316	shift = p->Stab[(us_idle >> p->Scell_log) & RED_STAB_MASK];
317
318	if (shift)
319	return v->qavg >> shift;
320	else {
321	/ Approximate initial part of exponent with linear function:*
322	*
323	* (1-W)^m ~= 1-mW + ...
324	*
325	* Seems, it is the best solution to
326	* problem of too coarse exponent tabulation.
327	*/
328	us_idle = (v->qavg * (u64)us_idle) >> p->Scell_log;
329
330	if (us_idle < (v->qavg >> `1`))
331	return v->qavg - us_idle;
332	else
333	return v->qavg >> `1`;
334	}
335	}
336
337	static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p,
338	const struct red_vars *v,
339	unsigned int backlog)
340	{
341	/*
342	* NOTE: v->qavg is fixed point number with point at Wlog.
343	* The formula below is equivalent to floating point
344	* version:
345	*
346	* qavg = qavg(1-W) + backlogW;
347	*
348	* --ANK (980924)
349	*/
350	return v->qavg + (backlog - (v->qavg >> p->Wlog));
351	}
352
353	static inline unsigned long red_calc_qavg(const struct red_parms *p,
354	const struct red_vars *v,
355	unsigned int backlog)
356	{
357	if (!red_is_idling(v))
358	return red_calc_qavg_no_idle_time(p, v, backlog);
359	else
360	return red_calc_qavg_from_idle_time(p, v);
361	}
362
363
364	static inline u32 red_random(const struct red_parms *p)
365	{
366	return reciprocal_divide(a: get_random_u32(), R: p->max_P_reciprocal);
367	}
368
369	static inline int red_mark_probability(const struct red_parms *p,
370	const struct red_vars *v,
371	unsigned long qavg)
372	{
373	/ The formula used below causes questions.*
374
375	OK. qR is random number in the interval
376	(0..1/max_P)(qth_max-qth_min)*
377	i.e. 0..(2^Plog). If we used floating point
378	arithmetic, it would be: (2^Plog)rnd_num,*
379	where rnd_num is less 1.
380
381	Taking into account, that qavg have fixed
382	point at Wlog, two lines
383	below have the following floating point equivalent:
384
385	max_P(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount*
386
387	Any questions? --ANK (980924)
388	*/
389	return !(((qavg - p->qth_min) >> p->Wlog) * v->qcount < v->qR);
390	}
391
392	enum {
393	RED_BELOW_MIN_THRESH,
394	RED_BETWEEN_TRESH,
395	RED_ABOVE_MAX_TRESH,
396	};
397
398	static inline int red_cmp_thresh(const struct red_parms p, unsigned* long qavg)
399	{
400	if (qavg < p->qth_min)
401	return RED_BELOW_MIN_THRESH;
402	else if (qavg >= p->qth_max)
403	return RED_ABOVE_MAX_TRESH;
404	else
405	return RED_BETWEEN_TRESH;
406	}
407
408	enum {
409	RED_DONT_MARK,
410	RED_PROB_MARK,
411	RED_HARD_MARK,
412	};
413
414	static inline int red_action(const struct red_parms *p,
415	struct red_vars *v,
416	unsigned long qavg)
417	{
418	switch (red_cmp_thresh(p, qavg)) {
419	case RED_BELOW_MIN_THRESH:
420	v->qcount = -`1`;
421	return RED_DONT_MARK;
422
423	case RED_BETWEEN_TRESH:
424	if (++v->qcount) {
425	if (red_mark_probability(p, v, qavg)) {
426	v->qcount = `0`;
427	v->qR = red_random(p);
428	return RED_PROB_MARK;
429	}
430	} else
431	v->qR = red_random(p);
432
433	return RED_DONT_MARK;
434
435	case RED_ABOVE_MAX_TRESH:
436	v->qcount = -`1`;
437	return RED_HARD_MARK;
438	}
439
440	BUG();
441	return RED_DONT_MARK;
442	}
443
444	static inline void red_adaptative_algo(struct red_parms p, struct* red_vars *v)
445	{
446	unsigned long qavg;
447	u32 max_p_delta;
448
449	qavg = v->qavg;
450	if (red_is_idling(v))
451	qavg = red_calc_qavg_from_idle_time(p, v);
452
453	/ v->qavg is fixed point number with point at Wlog /
454	qavg >>= p->Wlog;
455
456	if (qavg > p->target_max && p->max_P <= MAX_P_MAX)
457	p->max_P += MAX_P_ALPHA(p->max_P); / maxp = maxp + alpha /
458	else if (qavg < p->target_min && p->max_P >= MAX_P_MIN)
459	p->max_P = (p->max_P/`10`)`9`; /* maxp = maxp * Beta /
460
461	max_p_delta = DIV_ROUND_CLOSEST(p->max_P, p->qth_delta);
462	max_p_delta = max(max_p_delta, `1U`);
463	p->max_P_reciprocal = reciprocal_value(d: max_p_delta);
464	}
465	#endif
466

source code of linux/include/net/red.h