PageRenderTime 59ms CodeModel.GetById 16ms app.highlight 38ms RepoModel.GetById 1ms app.codeStats 1ms

/net/sched/sch_teql.c

http://github.com/mirrors/linux
C | 523 lines | 398 code | 85 blank | 40 comment | 72 complexity | 1e702144282e5d718f2d34a2fb4535c5 MD5 | raw file
  1// SPDX-License-Identifier: GPL-2.0-or-later
  2/* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
  3 *
  4 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  5 */
  6
  7#include <linux/module.h>
  8#include <linux/types.h>
  9#include <linux/kernel.h>
 10#include <linux/slab.h>
 11#include <linux/string.h>
 12#include <linux/errno.h>
 13#include <linux/if_arp.h>
 14#include <linux/netdevice.h>
 15#include <linux/init.h>
 16#include <linux/skbuff.h>
 17#include <linux/moduleparam.h>
 18#include <net/dst.h>
 19#include <net/neighbour.h>
 20#include <net/pkt_sched.h>
 21
 22/*
 23   How to setup it.
 24   ----------------
 25
 26   After loading this module you will find a new device teqlN
 27   and new qdisc with the same name. To join a slave to the equalizer
 28   you should just set this qdisc on a device f.e.
 29
 30   # tc qdisc add dev eth0 root teql0
 31   # tc qdisc add dev eth1 root teql0
 32
 33   That's all. Full PnP 8)
 34
 35   Applicability.
 36   --------------
 37
 38   1. Slave devices MUST be active devices, i.e., they must raise the tbusy
 39      signal and generate EOI events. If you want to equalize virtual devices
 40      like tunnels, use a normal eql device.
 41   2. This device puts no limitations on physical slave characteristics
 42      f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
 43      Certainly, large difference in link speeds will make the resulting
 44      eqalized link unusable, because of huge packet reordering.
 45      I estimate an upper useful difference as ~10 times.
 46   3. If the slave requires address resolution, only protocols using
 47      neighbour cache (IPv4/IPv6) will work over the equalized link.
 48      Other protocols are still allowed to use the slave device directly,
 49      which will not break load balancing, though native slave
 50      traffic will have the highest priority.  */
 51
 52struct teql_master {
 53	struct Qdisc_ops qops;
 54	struct net_device *dev;
 55	struct Qdisc *slaves;
 56	struct list_head master_list;
 57	unsigned long	tx_bytes;
 58	unsigned long	tx_packets;
 59	unsigned long	tx_errors;
 60	unsigned long	tx_dropped;
 61};
 62
 63struct teql_sched_data {
 64	struct Qdisc *next;
 65	struct teql_master *m;
 66	struct sk_buff_head q;
 67};
 68
 69#define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
 70
 71#define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
 72
 73/* "teql*" qdisc routines */
 74
 75static int
 76teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
 77{
 78	struct net_device *dev = qdisc_dev(sch);
 79	struct teql_sched_data *q = qdisc_priv(sch);
 80
 81	if (q->q.qlen < dev->tx_queue_len) {
 82		__skb_queue_tail(&q->q, skb);
 83		return NET_XMIT_SUCCESS;
 84	}
 85
 86	return qdisc_drop(skb, sch, to_free);
 87}
 88
 89static struct sk_buff *
 90teql_dequeue(struct Qdisc *sch)
 91{
 92	struct teql_sched_data *dat = qdisc_priv(sch);
 93	struct netdev_queue *dat_queue;
 94	struct sk_buff *skb;
 95	struct Qdisc *q;
 96
 97	skb = __skb_dequeue(&dat->q);
 98	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
 99	q = rcu_dereference_bh(dat_queue->qdisc);
100
101	if (skb == NULL) {
102		struct net_device *m = qdisc_dev(q);
103		if (m) {
104			dat->m->slaves = sch;
105			netif_wake_queue(m);
106		}
107	} else {
108		qdisc_bstats_update(sch, skb);
109	}
110	sch->q.qlen = dat->q.qlen + q->q.qlen;
111	return skb;
112}
113
114static struct sk_buff *
115teql_peek(struct Qdisc *sch)
116{
117	/* teql is meant to be used as root qdisc */
118	return NULL;
119}
120
121static void
122teql_reset(struct Qdisc *sch)
123{
124	struct teql_sched_data *dat = qdisc_priv(sch);
125
126	skb_queue_purge(&dat->q);
127	sch->q.qlen = 0;
128}
129
130static void
131teql_destroy(struct Qdisc *sch)
132{
133	struct Qdisc *q, *prev;
134	struct teql_sched_data *dat = qdisc_priv(sch);
135	struct teql_master *master = dat->m;
136
137	prev = master->slaves;
138	if (prev) {
139		do {
140			q = NEXT_SLAVE(prev);
141			if (q == sch) {
142				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
143				if (q == master->slaves) {
144					master->slaves = NEXT_SLAVE(q);
145					if (q == master->slaves) {
146						struct netdev_queue *txq;
147						spinlock_t *root_lock;
148
149						txq = netdev_get_tx_queue(master->dev, 0);
150						master->slaves = NULL;
151
152						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
153						spin_lock_bh(root_lock);
154						qdisc_reset(rtnl_dereference(txq->qdisc));
155						spin_unlock_bh(root_lock);
156					}
157				}
158				skb_queue_purge(&dat->q);
159				break;
160			}
161
162		} while ((prev = q) != master->slaves);
163	}
164}
165
166static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
167			   struct netlink_ext_ack *extack)
168{
169	struct net_device *dev = qdisc_dev(sch);
170	struct teql_master *m = (struct teql_master *)sch->ops;
171	struct teql_sched_data *q = qdisc_priv(sch);
172
173	if (dev->hard_header_len > m->dev->hard_header_len)
174		return -EINVAL;
175
176	if (m->dev == dev)
177		return -ELOOP;
178
179	q->m = m;
180
181	skb_queue_head_init(&q->q);
182
183	if (m->slaves) {
184		if (m->dev->flags & IFF_UP) {
185			if ((m->dev->flags & IFF_POINTOPOINT &&
186			     !(dev->flags & IFF_POINTOPOINT)) ||
187			    (m->dev->flags & IFF_BROADCAST &&
188			     !(dev->flags & IFF_BROADCAST)) ||
189			    (m->dev->flags & IFF_MULTICAST &&
190			     !(dev->flags & IFF_MULTICAST)) ||
191			    dev->mtu < m->dev->mtu)
192				return -EINVAL;
193		} else {
194			if (!(dev->flags&IFF_POINTOPOINT))
195				m->dev->flags &= ~IFF_POINTOPOINT;
196			if (!(dev->flags&IFF_BROADCAST))
197				m->dev->flags &= ~IFF_BROADCAST;
198			if (!(dev->flags&IFF_MULTICAST))
199				m->dev->flags &= ~IFF_MULTICAST;
200			if (dev->mtu < m->dev->mtu)
201				m->dev->mtu = dev->mtu;
202		}
203		q->next = NEXT_SLAVE(m->slaves);
204		NEXT_SLAVE(m->slaves) = sch;
205	} else {
206		q->next = sch;
207		m->slaves = sch;
208		m->dev->mtu = dev->mtu;
209		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
210	}
211	return 0;
212}
213
214
215static int
216__teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
217	       struct net_device *dev, struct netdev_queue *txq,
218	       struct dst_entry *dst)
219{
220	struct neighbour *n;
221	int err = 0;
222
223	n = dst_neigh_lookup_skb(dst, skb);
224	if (!n)
225		return -ENOENT;
226
227	if (dst->dev != dev) {
228		struct neighbour *mn;
229
230		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
231		neigh_release(n);
232		if (IS_ERR(mn))
233			return PTR_ERR(mn);
234		n = mn;
235	}
236
237	if (neigh_event_send(n, skb_res) == 0) {
238		int err;
239		char haddr[MAX_ADDR_LEN];
240
241		neigh_ha_snapshot(haddr, n, dev);
242		err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
243				      haddr, NULL, skb->len);
244
245		if (err < 0)
246			err = -EINVAL;
247	} else {
248		err = (skb_res == NULL) ? -EAGAIN : 1;
249	}
250	neigh_release(n);
251	return err;
252}
253
254static inline int teql_resolve(struct sk_buff *skb,
255			       struct sk_buff *skb_res,
256			       struct net_device *dev,
257			       struct netdev_queue *txq)
258{
259	struct dst_entry *dst = skb_dst(skb);
260	int res;
261
262	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
263		return -ENODEV;
264
265	if (!dev->header_ops || !dst)
266		return 0;
267
268	rcu_read_lock();
269	res = __teql_resolve(skb, skb_res, dev, txq, dst);
270	rcu_read_unlock();
271
272	return res;
273}
274
275static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
276{
277	struct teql_master *master = netdev_priv(dev);
278	struct Qdisc *start, *q;
279	int busy;
280	int nores;
281	int subq = skb_get_queue_mapping(skb);
282	struct sk_buff *skb_res = NULL;
283
284	start = master->slaves;
285
286restart:
287	nores = 0;
288	busy = 0;
289
290	q = start;
291	if (!q)
292		goto drop;
293
294	do {
295		struct net_device *slave = qdisc_dev(q);
296		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
297
298		if (slave_txq->qdisc_sleeping != q)
299			continue;
300		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
301		    !netif_running(slave)) {
302			busy = 1;
303			continue;
304		}
305
306		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
307		case 0:
308			if (__netif_tx_trylock(slave_txq)) {
309				unsigned int length = qdisc_pkt_len(skb);
310
311				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
312				    netdev_start_xmit(skb, slave, slave_txq, false) ==
313				    NETDEV_TX_OK) {
314					__netif_tx_unlock(slave_txq);
315					master->slaves = NEXT_SLAVE(q);
316					netif_wake_queue(dev);
317					master->tx_packets++;
318					master->tx_bytes += length;
319					return NETDEV_TX_OK;
320				}
321				__netif_tx_unlock(slave_txq);
322			}
323			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
324				busy = 1;
325			break;
326		case 1:
327			master->slaves = NEXT_SLAVE(q);
328			return NETDEV_TX_OK;
329		default:
330			nores = 1;
331			break;
332		}
333		__skb_pull(skb, skb_network_offset(skb));
334	} while ((q = NEXT_SLAVE(q)) != start);
335
336	if (nores && skb_res == NULL) {
337		skb_res = skb;
338		goto restart;
339	}
340
341	if (busy) {
342		netif_stop_queue(dev);
343		return NETDEV_TX_BUSY;
344	}
345	master->tx_errors++;
346
347drop:
348	master->tx_dropped++;
349	dev_kfree_skb(skb);
350	return NETDEV_TX_OK;
351}
352
353static int teql_master_open(struct net_device *dev)
354{
355	struct Qdisc *q;
356	struct teql_master *m = netdev_priv(dev);
357	int mtu = 0xFFFE;
358	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
359
360	if (m->slaves == NULL)
361		return -EUNATCH;
362
363	flags = FMASK;
364
365	q = m->slaves;
366	do {
367		struct net_device *slave = qdisc_dev(q);
368
369		if (slave == NULL)
370			return -EUNATCH;
371
372		if (slave->mtu < mtu)
373			mtu = slave->mtu;
374		if (slave->hard_header_len > LL_MAX_HEADER)
375			return -EINVAL;
376
377		/* If all the slaves are BROADCAST, master is BROADCAST
378		   If all the slaves are PtP, master is PtP
379		   Otherwise, master is NBMA.
380		 */
381		if (!(slave->flags&IFF_POINTOPOINT))
382			flags &= ~IFF_POINTOPOINT;
383		if (!(slave->flags&IFF_BROADCAST))
384			flags &= ~IFF_BROADCAST;
385		if (!(slave->flags&IFF_MULTICAST))
386			flags &= ~IFF_MULTICAST;
387	} while ((q = NEXT_SLAVE(q)) != m->slaves);
388
389	m->dev->mtu = mtu;
390	m->dev->flags = (m->dev->flags&~FMASK) | flags;
391	netif_start_queue(m->dev);
392	return 0;
393}
394
395static int teql_master_close(struct net_device *dev)
396{
397	netif_stop_queue(dev);
398	return 0;
399}
400
401static void teql_master_stats64(struct net_device *dev,
402				struct rtnl_link_stats64 *stats)
403{
404	struct teql_master *m = netdev_priv(dev);
405
406	stats->tx_packets	= m->tx_packets;
407	stats->tx_bytes		= m->tx_bytes;
408	stats->tx_errors	= m->tx_errors;
409	stats->tx_dropped	= m->tx_dropped;
410}
411
412static int teql_master_mtu(struct net_device *dev, int new_mtu)
413{
414	struct teql_master *m = netdev_priv(dev);
415	struct Qdisc *q;
416
417	q = m->slaves;
418	if (q) {
419		do {
420			if (new_mtu > qdisc_dev(q)->mtu)
421				return -EINVAL;
422		} while ((q = NEXT_SLAVE(q)) != m->slaves);
423	}
424
425	dev->mtu = new_mtu;
426	return 0;
427}
428
429static const struct net_device_ops teql_netdev_ops = {
430	.ndo_open	= teql_master_open,
431	.ndo_stop	= teql_master_close,
432	.ndo_start_xmit	= teql_master_xmit,
433	.ndo_get_stats64 = teql_master_stats64,
434	.ndo_change_mtu	= teql_master_mtu,
435};
436
437static __init void teql_master_setup(struct net_device *dev)
438{
439	struct teql_master *master = netdev_priv(dev);
440	struct Qdisc_ops *ops = &master->qops;
441
442	master->dev	= dev;
443	ops->priv_size  = sizeof(struct teql_sched_data);
444
445	ops->enqueue	=	teql_enqueue;
446	ops->dequeue	=	teql_dequeue;
447	ops->peek	=	teql_peek;
448	ops->init	=	teql_qdisc_init;
449	ops->reset	=	teql_reset;
450	ops->destroy	=	teql_destroy;
451	ops->owner	=	THIS_MODULE;
452
453	dev->netdev_ops =       &teql_netdev_ops;
454	dev->type		= ARPHRD_VOID;
455	dev->mtu		= 1500;
456	dev->min_mtu		= 68;
457	dev->max_mtu		= 65535;
458	dev->tx_queue_len	= 100;
459	dev->flags		= IFF_NOARP;
460	dev->hard_header_len	= LL_MAX_HEADER;
461	netif_keep_dst(dev);
462}
463
464static LIST_HEAD(master_dev_list);
465static int max_equalizers = 1;
466module_param(max_equalizers, int, 0);
467MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
468
469static int __init teql_init(void)
470{
471	int i;
472	int err = -ENODEV;
473
474	for (i = 0; i < max_equalizers; i++) {
475		struct net_device *dev;
476		struct teql_master *master;
477
478		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
479				   NET_NAME_UNKNOWN, teql_master_setup);
480		if (!dev) {
481			err = -ENOMEM;
482			break;
483		}
484
485		if ((err = register_netdev(dev))) {
486			free_netdev(dev);
487			break;
488		}
489
490		master = netdev_priv(dev);
491
492		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
493		err = register_qdisc(&master->qops);
494
495		if (err) {
496			unregister_netdev(dev);
497			free_netdev(dev);
498			break;
499		}
500
501		list_add_tail(&master->master_list, &master_dev_list);
502	}
503	return i ? 0 : err;
504}
505
506static void __exit teql_exit(void)
507{
508	struct teql_master *master, *nxt;
509
510	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
511
512		list_del(&master->master_list);
513
514		unregister_qdisc(&master->qops);
515		unregister_netdev(master->dev);
516		free_netdev(master->dev);
517	}
518}
519
520module_init(teql_init);
521module_exit(teql_exit);
522
523MODULE_LICENSE("GPL");