PageRenderTime 42ms CodeModel.GetById 13ms app.highlight 25ms RepoModel.GetById 0ms app.codeStats 1ms

/net/sched/sch_red.c

http://github.com/mirrors/linux
C | 509 lines | 398 code | 84 blank | 27 comment | 48 complexity | eead3d931e87d38825045ee676de0cb4 MD5 | raw file
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/*
  3 * net/sched/sch_red.c	Random Early Detection queue.
  4 *
  5 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  6 *
  7 * Changes:
  8 * J Hadi Salim 980914:	computation fixes
  9 * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
 10 * J Hadi Salim 980816:  ECN support
 11 */
 12
 13#include <linux/module.h>
 14#include <linux/types.h>
 15#include <linux/kernel.h>
 16#include <linux/skbuff.h>
 17#include <net/pkt_sched.h>
 18#include <net/pkt_cls.h>
 19#include <net/inet_ecn.h>
 20#include <net/red.h>
 21
 22
 23/*	Parameters, settable by user:
 24	-----------------------------
 25
 26	limit		- bytes (must be > qth_max + burst)
 27
 28	Hard limit on queue length, should be chosen >qth_max
 29	to allow packet bursts. This parameter does not
 30	affect the algorithms behaviour and can be chosen
 31	arbitrarily high (well, less than ram size)
 32	Really, this limit will never be reached
 33	if RED works correctly.
 34 */
 35
 36struct red_sched_data {
 37	u32			limit;		/* HARD maximal queue length */
 38
 39	unsigned char		flags;
 40	/* Non-flags in tc_red_qopt.flags. */
 41	unsigned char		userbits;
 42
 43	struct timer_list	adapt_timer;
 44	struct Qdisc		*sch;
 45	struct red_parms	parms;
 46	struct red_vars		vars;
 47	struct red_stats	stats;
 48	struct Qdisc		*qdisc;
 49};
 50
 51static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
 52
 53static inline int red_use_ecn(struct red_sched_data *q)
 54{
 55	return q->flags & TC_RED_ECN;
 56}
 57
 58static inline int red_use_harddrop(struct red_sched_data *q)
 59{
 60	return q->flags & TC_RED_HARDDROP;
 61}
 62
 63static int red_use_nodrop(struct red_sched_data *q)
 64{
 65	return q->flags & TC_RED_NODROP;
 66}
 67
 68static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 69		       struct sk_buff **to_free)
 70{
 71	struct red_sched_data *q = qdisc_priv(sch);
 72	struct Qdisc *child = q->qdisc;
 73	int ret;
 74
 75	q->vars.qavg = red_calc_qavg(&q->parms,
 76				     &q->vars,
 77				     child->qstats.backlog);
 78
 79	if (red_is_idling(&q->vars))
 80		red_end_of_idle_period(&q->vars);
 81
 82	switch (red_action(&q->parms, &q->vars, q->vars.qavg)) {
 83	case RED_DONT_MARK:
 84		break;
 85
 86	case RED_PROB_MARK:
 87		qdisc_qstats_overlimit(sch);
 88		if (!red_use_ecn(q)) {
 89			q->stats.prob_drop++;
 90			goto congestion_drop;
 91		}
 92
 93		if (INET_ECN_set_ce(skb)) {
 94			q->stats.prob_mark++;
 95		} else if (!red_use_nodrop(q)) {
 96			q->stats.prob_drop++;
 97			goto congestion_drop;
 98		}
 99
100		/* Non-ECT packet in ECN nodrop mode: queue it. */
101		break;
102
103	case RED_HARD_MARK:
104		qdisc_qstats_overlimit(sch);
105		if (red_use_harddrop(q) || !red_use_ecn(q)) {
106			q->stats.forced_drop++;
107			goto congestion_drop;
108		}
109
110		if (INET_ECN_set_ce(skb)) {
111			q->stats.forced_mark++;
112		} else if (!red_use_nodrop(q)) {
113			q->stats.forced_drop++;
114			goto congestion_drop;
115		}
116
117		/* Non-ECT packet in ECN nodrop mode: queue it. */
118		break;
119	}
120
121	ret = qdisc_enqueue(skb, child, to_free);
122	if (likely(ret == NET_XMIT_SUCCESS)) {
123		qdisc_qstats_backlog_inc(sch, skb);
124		sch->q.qlen++;
125	} else if (net_xmit_drop_count(ret)) {
126		q->stats.pdrop++;
127		qdisc_qstats_drop(sch);
128	}
129	return ret;
130
131congestion_drop:
132	qdisc_drop(skb, sch, to_free);
133	return NET_XMIT_CN;
134}
135
136static struct sk_buff *red_dequeue(struct Qdisc *sch)
137{
138	struct sk_buff *skb;
139	struct red_sched_data *q = qdisc_priv(sch);
140	struct Qdisc *child = q->qdisc;
141
142	skb = child->dequeue(child);
143	if (skb) {
144		qdisc_bstats_update(sch, skb);
145		qdisc_qstats_backlog_dec(sch, skb);
146		sch->q.qlen--;
147	} else {
148		if (!red_is_idling(&q->vars))
149			red_start_of_idle_period(&q->vars);
150	}
151	return skb;
152}
153
154static struct sk_buff *red_peek(struct Qdisc *sch)
155{
156	struct red_sched_data *q = qdisc_priv(sch);
157	struct Qdisc *child = q->qdisc;
158
159	return child->ops->peek(child);
160}
161
162static void red_reset(struct Qdisc *sch)
163{
164	struct red_sched_data *q = qdisc_priv(sch);
165
166	qdisc_reset(q->qdisc);
167	sch->qstats.backlog = 0;
168	sch->q.qlen = 0;
169	red_restart(&q->vars);
170}
171
172static int red_offload(struct Qdisc *sch, bool enable)
173{
174	struct red_sched_data *q = qdisc_priv(sch);
175	struct net_device *dev = qdisc_dev(sch);
176	struct tc_red_qopt_offload opt = {
177		.handle = sch->handle,
178		.parent = sch->parent,
179	};
180
181	if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
182		return -EOPNOTSUPP;
183
184	if (enable) {
185		opt.command = TC_RED_REPLACE;
186		opt.set.min = q->parms.qth_min >> q->parms.Wlog;
187		opt.set.max = q->parms.qth_max >> q->parms.Wlog;
188		opt.set.probability = q->parms.max_P;
189		opt.set.limit = q->limit;
190		opt.set.is_ecn = red_use_ecn(q);
191		opt.set.is_harddrop = red_use_harddrop(q);
192		opt.set.is_nodrop = red_use_nodrop(q);
193		opt.set.qstats = &sch->qstats;
194	} else {
195		opt.command = TC_RED_DESTROY;
196	}
197
198	return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt);
199}
200
201static void red_destroy(struct Qdisc *sch)
202{
203	struct red_sched_data *q = qdisc_priv(sch);
204
205	del_timer_sync(&q->adapt_timer);
206	red_offload(sch, false);
207	qdisc_put(q->qdisc);
208}
209
210static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
211	[TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS },
212	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
213	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
214	[TCA_RED_MAX_P] = { .type = NLA_U32 },
215	[TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
216			    .validation_data = &red_supported_flags },
217};
218
219static int red_change(struct Qdisc *sch, struct nlattr *opt,
220		      struct netlink_ext_ack *extack)
221{
222	struct Qdisc *old_child = NULL, *child = NULL;
223	struct red_sched_data *q = qdisc_priv(sch);
224	struct nlattr *tb[TCA_RED_MAX + 1];
225	struct nla_bitfield32 flags_bf;
226	struct tc_red_qopt *ctl;
227	unsigned char userbits;
228	unsigned char flags;
229	int err;
230	u32 max_P;
231
232	if (opt == NULL)
233		return -EINVAL;
234
235	err = nla_parse_nested_deprecated(tb, TCA_RED_MAX, opt, red_policy,
236					  NULL);
237	if (err < 0)
238		return err;
239
240	if (tb[TCA_RED_PARMS] == NULL ||
241	    tb[TCA_RED_STAB] == NULL)
242		return -EINVAL;
243
244	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
245
246	ctl = nla_data(tb[TCA_RED_PARMS]);
247	if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog))
248		return -EINVAL;
249
250	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
251			    tb[TCA_RED_FLAGS], red_supported_flags,
252			    &flags_bf, &userbits, extack);
253	if (err)
254		return err;
255
256	if (ctl->limit > 0) {
257		child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit,
258					 extack);
259		if (IS_ERR(child))
260			return PTR_ERR(child);
261
262		/* child is fifo, no need to check for noop_qdisc */
263		qdisc_hash_add(child, true);
264	}
265
266	sch_tree_lock(sch);
267
268	flags = (q->flags & ~flags_bf.selector) | flags_bf.value;
269	err = red_validate_flags(flags, extack);
270	if (err)
271		goto unlock_out;
272
273	q->flags = flags;
274	q->userbits = userbits;
275	q->limit = ctl->limit;
276	if (child) {
277		qdisc_tree_flush_backlog(q->qdisc);
278		old_child = q->qdisc;
279		q->qdisc = child;
280	}
281
282	red_set_parms(&q->parms,
283		      ctl->qth_min, ctl->qth_max, ctl->Wlog,
284		      ctl->Plog, ctl->Scell_log,
285		      nla_data(tb[TCA_RED_STAB]),
286		      max_P);
287	red_set_vars(&q->vars);
288
289	del_timer(&q->adapt_timer);
290	if (ctl->flags & TC_RED_ADAPTATIVE)
291		mod_timer(&q->adapt_timer, jiffies + HZ/2);
292
293	if (!q->qdisc->q.qlen)
294		red_start_of_idle_period(&q->vars);
295
296	sch_tree_unlock(sch);
297
298	red_offload(sch, true);
299
300	if (old_child)
301		qdisc_put(old_child);
302	return 0;
303
304unlock_out:
305	sch_tree_unlock(sch);
306	if (child)
307		qdisc_put(child);
308	return err;
309}
310
311static inline void red_adaptative_timer(struct timer_list *t)
312{
313	struct red_sched_data *q = from_timer(q, t, adapt_timer);
314	struct Qdisc *sch = q->sch;
315	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
316
317	spin_lock(root_lock);
318	red_adaptative_algo(&q->parms, &q->vars);
319	mod_timer(&q->adapt_timer, jiffies + HZ/2);
320	spin_unlock(root_lock);
321}
322
323static int red_init(struct Qdisc *sch, struct nlattr *opt,
324		    struct netlink_ext_ack *extack)
325{
326	struct red_sched_data *q = qdisc_priv(sch);
327
328	q->qdisc = &noop_qdisc;
329	q->sch = sch;
330	timer_setup(&q->adapt_timer, red_adaptative_timer, 0);
331	return red_change(sch, opt, extack);
332}
333
334static int red_dump_offload_stats(struct Qdisc *sch)
335{
336	struct tc_red_qopt_offload hw_stats = {
337		.command = TC_RED_STATS,
338		.handle = sch->handle,
339		.parent = sch->parent,
340		{
341			.stats.bstats = &sch->bstats,
342			.stats.qstats = &sch->qstats,
343		},
344	};
345
346	return qdisc_offload_dump_helper(sch, TC_SETUP_QDISC_RED, &hw_stats);
347}
348
349static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
350{
351	struct red_sched_data *q = qdisc_priv(sch);
352	struct nlattr *opts = NULL;
353	struct tc_red_qopt opt = {
354		.limit		= q->limit,
355		.flags		= (q->flags & TC_RED_HISTORIC_FLAGS) |
356				  q->userbits,
357		.qth_min	= q->parms.qth_min >> q->parms.Wlog,
358		.qth_max	= q->parms.qth_max >> q->parms.Wlog,
359		.Wlog		= q->parms.Wlog,
360		.Plog		= q->parms.Plog,
361		.Scell_log	= q->parms.Scell_log,
362	};
363	int err;
364
365	err = red_dump_offload_stats(sch);
366	if (err)
367		goto nla_put_failure;
368
369	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
370	if (opts == NULL)
371		goto nla_put_failure;
372	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
373	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
374	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
375			       q->flags, red_supported_flags))
376		goto nla_put_failure;
377	return nla_nest_end(skb, opts);
378
379nla_put_failure:
380	nla_nest_cancel(skb, opts);
381	return -EMSGSIZE;
382}
383
384static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
385{
386	struct red_sched_data *q = qdisc_priv(sch);
387	struct net_device *dev = qdisc_dev(sch);
388	struct tc_red_xstats st = {0};
389
390	if (sch->flags & TCQ_F_OFFLOADED) {
391		struct tc_red_qopt_offload hw_stats_request = {
392			.command = TC_RED_XSTATS,
393			.handle = sch->handle,
394			.parent = sch->parent,
395			{
396				.xstats = &q->stats,
397			},
398		};
399		dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED,
400					      &hw_stats_request);
401	}
402	st.early = q->stats.prob_drop + q->stats.forced_drop;
403	st.pdrop = q->stats.pdrop;
404	st.other = q->stats.other;
405	st.marked = q->stats.prob_mark + q->stats.forced_mark;
406
407	return gnet_stats_copy_app(d, &st, sizeof(st));
408}
409
410static int red_dump_class(struct Qdisc *sch, unsigned long cl,
411			  struct sk_buff *skb, struct tcmsg *tcm)
412{
413	struct red_sched_data *q = qdisc_priv(sch);
414
415	tcm->tcm_handle |= TC_H_MIN(1);
416	tcm->tcm_info = q->qdisc->handle;
417	return 0;
418}
419
420static void red_graft_offload(struct Qdisc *sch,
421			      struct Qdisc *new, struct Qdisc *old,
422			      struct netlink_ext_ack *extack)
423{
424	struct tc_red_qopt_offload graft_offload = {
425		.handle		= sch->handle,
426		.parent		= sch->parent,
427		.child_handle	= new->handle,
428		.command	= TC_RED_GRAFT,
429	};
430
431	qdisc_offload_graft_helper(qdisc_dev(sch), sch, new, old,
432				   TC_SETUP_QDISC_RED, &graft_offload, extack);
433}
434
435static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
436		     struct Qdisc **old, struct netlink_ext_ack *extack)
437{
438	struct red_sched_data *q = qdisc_priv(sch);
439
440	if (new == NULL)
441		new = &noop_qdisc;
442
443	*old = qdisc_replace(sch, new, &q->qdisc);
444
445	red_graft_offload(sch, new, *old, extack);
446	return 0;
447}
448
449static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
450{
451	struct red_sched_data *q = qdisc_priv(sch);
452	return q->qdisc;
453}
454
455static unsigned long red_find(struct Qdisc *sch, u32 classid)
456{
457	return 1;
458}
459
460static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
461{
462	if (!walker->stop) {
463		if (walker->count >= walker->skip)
464			if (walker->fn(sch, 1, walker) < 0) {
465				walker->stop = 1;
466				return;
467			}
468		walker->count++;
469	}
470}
471
472static const struct Qdisc_class_ops red_class_ops = {
473	.graft		=	red_graft,
474	.leaf		=	red_leaf,
475	.find		=	red_find,
476	.walk		=	red_walk,
477	.dump		=	red_dump_class,
478};
479
480static struct Qdisc_ops red_qdisc_ops __read_mostly = {
481	.id		=	"red",
482	.priv_size	=	sizeof(struct red_sched_data),
483	.cl_ops		=	&red_class_ops,
484	.enqueue	=	red_enqueue,
485	.dequeue	=	red_dequeue,
486	.peek		=	red_peek,
487	.init		=	red_init,
488	.reset		=	red_reset,
489	.destroy	=	red_destroy,
490	.change		=	red_change,
491	.dump		=	red_dump,
492	.dump_stats	=	red_dump_stats,
493	.owner		=	THIS_MODULE,
494};
495
496static int __init red_module_init(void)
497{
498	return register_qdisc(&red_qdisc_ops);
499}
500
501static void __exit red_module_exit(void)
502{
503	unregister_qdisc(&red_qdisc_ops);
504}
505
506module_init(red_module_init)
507module_exit(red_module_exit)
508
509MODULE_LICENSE("GPL");