PageRenderTime 176ms CodeModel.GetById 26ms app.highlight 118ms RepoModel.GetById 1ms app.codeStats 4ms

/net/core/dev.c

http://github.com/mirrors/linux
C | 10568 lines | 6899 code | 1665 blank | 2004 comment | 1201 complexity | b85658a488468d1c3f7f658206f1a07c MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// SPDX-License-Identifier: GPL-2.0-or-later
   2/*
   3 *      NET3    Protocol independent device support routines.
   4 *
   5 *	Derived from the non IP parts of dev.c 1.0.19
   6 *              Authors:	Ross Biro
   7 *				Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   8 *				Mark Evans, <evansmp@uhura.aston.ac.uk>
   9 *
  10 *	Additional Authors:
  11 *		Florian la Roche <rzsfl@rz.uni-sb.de>
  12 *		Alan Cox <gw4pts@gw4pts.ampr.org>
  13 *		David Hinds <dahinds@users.sourceforge.net>
  14 *		Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  15 *		Adam Sulmicki <adam@cfar.umd.edu>
  16 *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  17 *
  18 *	Changes:
  19 *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  20 *                                      to 2 if register_netdev gets called
  21 *                                      before net_dev_init & also removed a
  22 *                                      few lines of code in the process.
  23 *		Alan Cox	:	device private ioctl copies fields back.
  24 *		Alan Cox	:	Transmit queue code does relevant
  25 *					stunts to keep the queue safe.
  26 *		Alan Cox	:	Fixed double lock.
  27 *		Alan Cox	:	Fixed promisc NULL pointer trap
  28 *		????????	:	Support the full private ioctl range
  29 *		Alan Cox	:	Moved ioctl permission check into
  30 *					drivers
  31 *		Tim Kordas	:	SIOCADDMULTI/SIOCDELMULTI
  32 *		Alan Cox	:	100 backlog just doesn't cut it when
  33 *					you start doing multicast video 8)
  34 *		Alan Cox	:	Rewrote net_bh and list manager.
  35 *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  36 *		Alan Cox	:	Took out transmit every packet pass
  37 *					Saved a few bytes in the ioctl handler
  38 *		Alan Cox	:	Network driver sets packet type before
  39 *					calling netif_rx. Saves a function
  40 *					call a packet.
  41 *		Alan Cox	:	Hashed net_bh()
  42 *		Richard Kooijman:	Timestamp fixes.
  43 *		Alan Cox	:	Wrong field in SIOCGIFDSTADDR
  44 *		Alan Cox	:	Device lock protection.
  45 *              Alan Cox        :       Fixed nasty side effect of device close
  46 *					changes.
  47 *		Rudi Cilibrasi	:	Pass the right thing to
  48 *					set_mac_address()
  49 *		Dave Miller	:	32bit quantity for the device lock to
  50 *					make it work out on a Sparc.
  51 *		Bjorn Ekwall	:	Added KERNELD hack.
  52 *		Alan Cox	:	Cleaned up the backlog initialise.
  53 *		Craig Metz	:	SIOCGIFCONF fix if space for under
  54 *					1 device.
  55 *	    Thomas Bogendoerfer :	Return ENODEV for dev_open, if there
  56 *					is no device open function.
  57 *		Andi Kleen	:	Fix error reporting for SIOCGIFCONF
  58 *	    Michael Chastain	:	Fix signed/unsigned for SIOCGIFCONF
  59 *		Cyrus Durgin	:	Cleaned for KMOD
  60 *		Adam Sulmicki   :	Bug Fix : Network Device Unload
  61 *					A network device unload needs to purge
  62 *					the backlog queue.
  63 *	Paul Rusty Russell	:	SIOCSIFNAME
  64 *              Pekka Riikonen  :	Netdev boot-time settings code
  65 *              Andrew Morton   :       Make unregister_netdevice wait
  66 *                                      indefinitely on dev->refcnt
  67 *              J Hadi Salim    :       - Backlog queue sampling
  68 *				        - netif_rx() feedback
  69 */
  70
  71#include <linux/uaccess.h>
  72#include <linux/bitops.h>
  73#include <linux/capability.h>
  74#include <linux/cpu.h>
  75#include <linux/types.h>
  76#include <linux/kernel.h>
  77#include <linux/hash.h>
  78#include <linux/slab.h>
  79#include <linux/sched.h>
  80#include <linux/sched/mm.h>
  81#include <linux/mutex.h>
  82#include <linux/string.h>
  83#include <linux/mm.h>
  84#include <linux/socket.h>
  85#include <linux/sockios.h>
  86#include <linux/errno.h>
  87#include <linux/interrupt.h>
  88#include <linux/if_ether.h>
  89#include <linux/netdevice.h>
  90#include <linux/etherdevice.h>
  91#include <linux/ethtool.h>
  92#include <linux/skbuff.h>
  93#include <linux/bpf.h>
  94#include <linux/bpf_trace.h>
  95#include <net/net_namespace.h>
  96#include <net/sock.h>
  97#include <net/busy_poll.h>
  98#include <linux/rtnetlink.h>
  99#include <linux/stat.h>
 100#include <net/dst.h>
 101#include <net/dst_metadata.h>
 102#include <net/pkt_sched.h>
 103#include <net/pkt_cls.h>
 104#include <net/checksum.h>
 105#include <net/xfrm.h>
 106#include <linux/highmem.h>
 107#include <linux/init.h>
 108#include <linux/module.h>
 109#include <linux/netpoll.h>
 110#include <linux/rcupdate.h>
 111#include <linux/delay.h>
 112#include <net/iw_handler.h>
 113#include <asm/current.h>
 114#include <linux/audit.h>
 115#include <linux/dmaengine.h>
 116#include <linux/err.h>
 117#include <linux/ctype.h>
 118#include <linux/if_arp.h>
 119#include <linux/if_vlan.h>
 120#include <linux/ip.h>
 121#include <net/ip.h>
 122#include <net/mpls.h>
 123#include <linux/ipv6.h>
 124#include <linux/in.h>
 125#include <linux/jhash.h>
 126#include <linux/random.h>
 127#include <trace/events/napi.h>
 128#include <trace/events/net.h>
 129#include <trace/events/skb.h>
 130#include <linux/inetdevice.h>
 131#include <linux/cpu_rmap.h>
 132#include <linux/static_key.h>
 133#include <linux/hashtable.h>
 134#include <linux/vmalloc.h>
 135#include <linux/if_macvlan.h>
 136#include <linux/errqueue.h>
 137#include <linux/hrtimer.h>
 138#include <linux/netfilter_ingress.h>
 139#include <linux/crash_dump.h>
 140#include <linux/sctp.h>
 141#include <net/udp_tunnel.h>
 142#include <linux/net_namespace.h>
 143#include <linux/indirect_call_wrapper.h>
 144#include <net/devlink.h>
 145
 146#include "net-sysfs.h"
 147
 148#define MAX_GRO_SKBS 8
 149
 150/* This should be increased if a protocol with a bigger head is added. */
 151#define GRO_MAX_HEAD (MAX_HEADER + 128)
 152
 153static DEFINE_SPINLOCK(ptype_lock);
 154static DEFINE_SPINLOCK(offload_lock);
 155struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
 156struct list_head ptype_all __read_mostly;	/* Taps */
 157static struct list_head offload_base __read_mostly;
 158
 159static int netif_rx_internal(struct sk_buff *skb);
 160static int call_netdevice_notifiers_info(unsigned long val,
 161					 struct netdev_notifier_info *info);
 162static int call_netdevice_notifiers_extack(unsigned long val,
 163					   struct net_device *dev,
 164					   struct netlink_ext_ack *extack);
 165static struct napi_struct *napi_by_id(unsigned int napi_id);
 166
 167/*
 168 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 169 * semaphore.
 170 *
 171 * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
 172 *
 173 * Writers must hold the rtnl semaphore while they loop through the
 174 * dev_base_head list, and hold dev_base_lock for writing when they do the
 175 * actual updates.  This allows pure readers to access the list even
 176 * while a writer is preparing to update it.
 177 *
 178 * To put it another way, dev_base_lock is held for writing only to
 179 * protect against pure readers; the rtnl semaphore provides the
 180 * protection against other writers.
 181 *
 182 * See, for example usages, register_netdevice() and
 183 * unregister_netdevice(), which must be called with the rtnl
 184 * semaphore held.
 185 */
 186DEFINE_RWLOCK(dev_base_lock);
 187EXPORT_SYMBOL(dev_base_lock);
 188
 189static DEFINE_MUTEX(ifalias_mutex);
 190
 191/* protects napi_hash addition/deletion and napi_gen_id */
 192static DEFINE_SPINLOCK(napi_hash_lock);
 193
 194static unsigned int napi_gen_id = NR_CPUS;
 195static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
 196
 197static seqcount_t devnet_rename_seq;
 198
 199static inline void dev_base_seq_inc(struct net *net)
 200{
 201	while (++net->dev_base_seq == 0)
 202		;
 203}
 204
 205static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
 206{
 207	unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
 208
 209	return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
 210}
 211
 212static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 213{
 214	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 215}
 216
 217static inline void rps_lock(struct softnet_data *sd)
 218{
 219#ifdef CONFIG_RPS
 220	spin_lock(&sd->input_pkt_queue.lock);
 221#endif
 222}
 223
 224static inline void rps_unlock(struct softnet_data *sd)
 225{
 226#ifdef CONFIG_RPS
 227	spin_unlock(&sd->input_pkt_queue.lock);
 228#endif
 229}
 230
 231static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
 232						       const char *name)
 233{
 234	struct netdev_name_node *name_node;
 235
 236	name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
 237	if (!name_node)
 238		return NULL;
 239	INIT_HLIST_NODE(&name_node->hlist);
 240	name_node->dev = dev;
 241	name_node->name = name;
 242	return name_node;
 243}
 244
 245static struct netdev_name_node *
 246netdev_name_node_head_alloc(struct net_device *dev)
 247{
 248	struct netdev_name_node *name_node;
 249
 250	name_node = netdev_name_node_alloc(dev, dev->name);
 251	if (!name_node)
 252		return NULL;
 253	INIT_LIST_HEAD(&name_node->list);
 254	return name_node;
 255}
 256
 257static void netdev_name_node_free(struct netdev_name_node *name_node)
 258{
 259	kfree(name_node);
 260}
 261
 262static void netdev_name_node_add(struct net *net,
 263				 struct netdev_name_node *name_node)
 264{
 265	hlist_add_head_rcu(&name_node->hlist,
 266			   dev_name_hash(net, name_node->name));
 267}
 268
 269static void netdev_name_node_del(struct netdev_name_node *name_node)
 270{
 271	hlist_del_rcu(&name_node->hlist);
 272}
 273
 274static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
 275							const char *name)
 276{
 277	struct hlist_head *head = dev_name_hash(net, name);
 278	struct netdev_name_node *name_node;
 279
 280	hlist_for_each_entry(name_node, head, hlist)
 281		if (!strcmp(name_node->name, name))
 282			return name_node;
 283	return NULL;
 284}
 285
 286static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
 287							    const char *name)
 288{
 289	struct hlist_head *head = dev_name_hash(net, name);
 290	struct netdev_name_node *name_node;
 291
 292	hlist_for_each_entry_rcu(name_node, head, hlist)
 293		if (!strcmp(name_node->name, name))
 294			return name_node;
 295	return NULL;
 296}
 297
 298int netdev_name_node_alt_create(struct net_device *dev, const char *name)
 299{
 300	struct netdev_name_node *name_node;
 301	struct net *net = dev_net(dev);
 302
 303	name_node = netdev_name_node_lookup(net, name);
 304	if (name_node)
 305		return -EEXIST;
 306	name_node = netdev_name_node_alloc(dev, name);
 307	if (!name_node)
 308		return -ENOMEM;
 309	netdev_name_node_add(net, name_node);
 310	/* The node that holds dev->name acts as a head of per-device list. */
 311	list_add_tail(&name_node->list, &dev->name_node->list);
 312
 313	return 0;
 314}
 315EXPORT_SYMBOL(netdev_name_node_alt_create);
 316
 317static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
 318{
 319	list_del(&name_node->list);
 320	netdev_name_node_del(name_node);
 321	kfree(name_node->name);
 322	netdev_name_node_free(name_node);
 323}
 324
 325int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
 326{
 327	struct netdev_name_node *name_node;
 328	struct net *net = dev_net(dev);
 329
 330	name_node = netdev_name_node_lookup(net, name);
 331	if (!name_node)
 332		return -ENOENT;
 333	/* lookup might have found our primary name or a name belonging
 334	 * to another device.
 335	 */
 336	if (name_node == dev->name_node || name_node->dev != dev)
 337		return -EINVAL;
 338
 339	__netdev_name_node_alt_destroy(name_node);
 340
 341	return 0;
 342}
 343EXPORT_SYMBOL(netdev_name_node_alt_destroy);
 344
 345static void netdev_name_node_alt_flush(struct net_device *dev)
 346{
 347	struct netdev_name_node *name_node, *tmp;
 348
 349	list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
 350		__netdev_name_node_alt_destroy(name_node);
 351}
 352
 353/* Device list insertion */
 354static void list_netdevice(struct net_device *dev)
 355{
 356	struct net *net = dev_net(dev);
 357
 358	ASSERT_RTNL();
 359
 360	write_lock_bh(&dev_base_lock);
 361	list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
 362	netdev_name_node_add(net, dev->name_node);
 363	hlist_add_head_rcu(&dev->index_hlist,
 364			   dev_index_hash(net, dev->ifindex));
 365	write_unlock_bh(&dev_base_lock);
 366
 367	dev_base_seq_inc(net);
 368}
 369
 370/* Device list removal
 371 * caller must respect a RCU grace period before freeing/reusing dev
 372 */
 373static void unlist_netdevice(struct net_device *dev)
 374{
 375	ASSERT_RTNL();
 376
 377	/* Unlink dev from the device chain */
 378	write_lock_bh(&dev_base_lock);
 379	list_del_rcu(&dev->dev_list);
 380	netdev_name_node_del(dev->name_node);
 381	hlist_del_rcu(&dev->index_hlist);
 382	write_unlock_bh(&dev_base_lock);
 383
 384	dev_base_seq_inc(dev_net(dev));
 385}
 386
 387/*
 388 *	Our notifier list
 389 */
 390
 391static RAW_NOTIFIER_HEAD(netdev_chain);
 392
 393/*
 394 *	Device drivers call our routines to queue packets here. We empty the
 395 *	queue in the local softnet handler.
 396 */
 397
 398DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 399EXPORT_PER_CPU_SYMBOL(softnet_data);
 400
 401/*******************************************************************************
 402 *
 403 *		Protocol management and registration routines
 404 *
 405 *******************************************************************************/
 406
 407
 408/*
 409 *	Add a protocol ID to the list. Now that the input handler is
 410 *	smarter we can dispense with all the messy stuff that used to be
 411 *	here.
 412 *
 413 *	BEWARE!!! Protocol handlers, mangling input packets,
 414 *	MUST BE last in hash buckets and checking protocol handlers
 415 *	MUST start from promiscuous ptype_all chain in net_bh.
 416 *	It is true now, do not change it.
 417 *	Explanation follows: if protocol handler, mangling packet, will
 418 *	be the first on list, it is not able to sense, that packet
 419 *	is cloned and should be copied-on-write, so that it will
 420 *	change it and subsequent readers will get broken packet.
 421 *							--ANK (980803)
 422 */
 423
 424static inline struct list_head *ptype_head(const struct packet_type *pt)
 425{
 426	if (pt->type == htons(ETH_P_ALL))
 427		return pt->dev ? &pt->dev->ptype_all : &ptype_all;
 428	else
 429		return pt->dev ? &pt->dev->ptype_specific :
 430				 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 431}
 432
 433/**
 434 *	dev_add_pack - add packet handler
 435 *	@pt: packet type declaration
 436 *
 437 *	Add a protocol handler to the networking stack. The passed &packet_type
 438 *	is linked into kernel lists and may not be freed until it has been
 439 *	removed from the kernel lists.
 440 *
 441 *	This call does not sleep therefore it can not
 442 *	guarantee all CPU's that are in middle of receiving packets
 443 *	will see the new packet type (until the next received packet).
 444 */
 445
 446void dev_add_pack(struct packet_type *pt)
 447{
 448	struct list_head *head = ptype_head(pt);
 449
 450	spin_lock(&ptype_lock);
 451	list_add_rcu(&pt->list, head);
 452	spin_unlock(&ptype_lock);
 453}
 454EXPORT_SYMBOL(dev_add_pack);
 455
 456/**
 457 *	__dev_remove_pack	 - remove packet handler
 458 *	@pt: packet type declaration
 459 *
 460 *	Remove a protocol handler that was previously added to the kernel
 461 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 462 *	from the kernel lists and can be freed or reused once this function
 463 *	returns.
 464 *
 465 *      The packet type might still be in use by receivers
 466 *	and must not be freed until after all the CPU's have gone
 467 *	through a quiescent state.
 468 */
 469void __dev_remove_pack(struct packet_type *pt)
 470{
 471	struct list_head *head = ptype_head(pt);
 472	struct packet_type *pt1;
 473
 474	spin_lock(&ptype_lock);
 475
 476	list_for_each_entry(pt1, head, list) {
 477		if (pt == pt1) {
 478			list_del_rcu(&pt->list);
 479			goto out;
 480		}
 481	}
 482
 483	pr_warn("dev_remove_pack: %p not found\n", pt);
 484out:
 485	spin_unlock(&ptype_lock);
 486}
 487EXPORT_SYMBOL(__dev_remove_pack);
 488
 489/**
 490 *	dev_remove_pack	 - remove packet handler
 491 *	@pt: packet type declaration
 492 *
 493 *	Remove a protocol handler that was previously added to the kernel
 494 *	protocol handlers by dev_add_pack(). The passed &packet_type is removed
 495 *	from the kernel lists and can be freed or reused once this function
 496 *	returns.
 497 *
 498 *	This call sleeps to guarantee that no CPU is looking at the packet
 499 *	type after return.
 500 */
 501void dev_remove_pack(struct packet_type *pt)
 502{
 503	__dev_remove_pack(pt);
 504
 505	synchronize_net();
 506}
 507EXPORT_SYMBOL(dev_remove_pack);
 508
 509
 510/**
 511 *	dev_add_offload - register offload handlers
 512 *	@po: protocol offload declaration
 513 *
 514 *	Add protocol offload handlers to the networking stack. The passed
 515 *	&proto_offload is linked into kernel lists and may not be freed until
 516 *	it has been removed from the kernel lists.
 517 *
 518 *	This call does not sleep therefore it can not
 519 *	guarantee all CPU's that are in middle of receiving packets
 520 *	will see the new offload handlers (until the next received packet).
 521 */
 522void dev_add_offload(struct packet_offload *po)
 523{
 524	struct packet_offload *elem;
 525
 526	spin_lock(&offload_lock);
 527	list_for_each_entry(elem, &offload_base, list) {
 528		if (po->priority < elem->priority)
 529			break;
 530	}
 531	list_add_rcu(&po->list, elem->list.prev);
 532	spin_unlock(&offload_lock);
 533}
 534EXPORT_SYMBOL(dev_add_offload);
 535
 536/**
 537 *	__dev_remove_offload	 - remove offload handler
 538 *	@po: packet offload declaration
 539 *
 540 *	Remove a protocol offload handler that was previously added to the
 541 *	kernel offload handlers by dev_add_offload(). The passed &offload_type
 542 *	is removed from the kernel lists and can be freed or reused once this
 543 *	function returns.
 544 *
 545 *      The packet type might still be in use by receivers
 546 *	and must not be freed until after all the CPU's have gone
 547 *	through a quiescent state.
 548 */
 549static void __dev_remove_offload(struct packet_offload *po)
 550{
 551	struct list_head *head = &offload_base;
 552	struct packet_offload *po1;
 553
 554	spin_lock(&offload_lock);
 555
 556	list_for_each_entry(po1, head, list) {
 557		if (po == po1) {
 558			list_del_rcu(&po->list);
 559			goto out;
 560		}
 561	}
 562
 563	pr_warn("dev_remove_offload: %p not found\n", po);
 564out:
 565	spin_unlock(&offload_lock);
 566}
 567
 568/**
 569 *	dev_remove_offload	 - remove packet offload handler
 570 *	@po: packet offload declaration
 571 *
 572 *	Remove a packet offload handler that was previously added to the kernel
 573 *	offload handlers by dev_add_offload(). The passed &offload_type is
 574 *	removed from the kernel lists and can be freed or reused once this
 575 *	function returns.
 576 *
 577 *	This call sleeps to guarantee that no CPU is looking at the packet
 578 *	type after return.
 579 */
 580void dev_remove_offload(struct packet_offload *po)
 581{
 582	__dev_remove_offload(po);
 583
 584	synchronize_net();
 585}
 586EXPORT_SYMBOL(dev_remove_offload);
 587
 588/******************************************************************************
 589 *
 590 *		      Device Boot-time Settings Routines
 591 *
 592 ******************************************************************************/
 593
 594/* Boot time configuration table */
 595static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 596
 597/**
 598 *	netdev_boot_setup_add	- add new setup entry
 599 *	@name: name of the device
 600 *	@map: configured settings for the device
 601 *
 602 *	Adds new setup entry to the dev_boot_setup list.  The function
 603 *	returns 0 on error and 1 on success.  This is a generic routine to
 604 *	all netdevices.
 605 */
 606static int netdev_boot_setup_add(char *name, struct ifmap *map)
 607{
 608	struct netdev_boot_setup *s;
 609	int i;
 610
 611	s = dev_boot_setup;
 612	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 613		if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 614			memset(s[i].name, 0, sizeof(s[i].name));
 615			strlcpy(s[i].name, name, IFNAMSIZ);
 616			memcpy(&s[i].map, map, sizeof(s[i].map));
 617			break;
 618		}
 619	}
 620
 621	return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 622}
 623
 624/**
 625 * netdev_boot_setup_check	- check boot time settings
 626 * @dev: the netdevice
 627 *
 628 * Check boot time settings for the device.
 629 * The found settings are set for the device to be used
 630 * later in the device probing.
 631 * Returns 0 if no settings found, 1 if they are.
 632 */
 633int netdev_boot_setup_check(struct net_device *dev)
 634{
 635	struct netdev_boot_setup *s = dev_boot_setup;
 636	int i;
 637
 638	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 639		if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 640		    !strcmp(dev->name, s[i].name)) {
 641			dev->irq = s[i].map.irq;
 642			dev->base_addr = s[i].map.base_addr;
 643			dev->mem_start = s[i].map.mem_start;
 644			dev->mem_end = s[i].map.mem_end;
 645			return 1;
 646		}
 647	}
 648	return 0;
 649}
 650EXPORT_SYMBOL(netdev_boot_setup_check);
 651
 652
 653/**
 654 * netdev_boot_base	- get address from boot time settings
 655 * @prefix: prefix for network device
 656 * @unit: id for network device
 657 *
 658 * Check boot time settings for the base address of device.
 659 * The found settings are set for the device to be used
 660 * later in the device probing.
 661 * Returns 0 if no settings found.
 662 */
 663unsigned long netdev_boot_base(const char *prefix, int unit)
 664{
 665	const struct netdev_boot_setup *s = dev_boot_setup;
 666	char name[IFNAMSIZ];
 667	int i;
 668
 669	sprintf(name, "%s%d", prefix, unit);
 670
 671	/*
 672	 * If device already registered then return base of 1
 673	 * to indicate not to probe for this interface
 674	 */
 675	if (__dev_get_by_name(&init_net, name))
 676		return 1;
 677
 678	for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 679		if (!strcmp(name, s[i].name))
 680			return s[i].map.base_addr;
 681	return 0;
 682}
 683
 684/*
 685 * Saves at boot time configured settings for any netdevice.
 686 */
 687int __init netdev_boot_setup(char *str)
 688{
 689	int ints[5];
 690	struct ifmap map;
 691
 692	str = get_options(str, ARRAY_SIZE(ints), ints);
 693	if (!str || !*str)
 694		return 0;
 695
 696	/* Save settings */
 697	memset(&map, 0, sizeof(map));
 698	if (ints[0] > 0)
 699		map.irq = ints[1];
 700	if (ints[0] > 1)
 701		map.base_addr = ints[2];
 702	if (ints[0] > 2)
 703		map.mem_start = ints[3];
 704	if (ints[0] > 3)
 705		map.mem_end = ints[4];
 706
 707	/* Add new entry to the list */
 708	return netdev_boot_setup_add(str, &map);
 709}
 710
 711__setup("netdev=", netdev_boot_setup);
 712
 713/*******************************************************************************
 714 *
 715 *			    Device Interface Subroutines
 716 *
 717 *******************************************************************************/
 718
 719/**
 720 *	dev_get_iflink	- get 'iflink' value of a interface
 721 *	@dev: targeted interface
 722 *
 723 *	Indicates the ifindex the interface is linked to.
 724 *	Physical interfaces have the same 'ifindex' and 'iflink' values.
 725 */
 726
 727int dev_get_iflink(const struct net_device *dev)
 728{
 729	if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
 730		return dev->netdev_ops->ndo_get_iflink(dev);
 731
 732	return dev->ifindex;
 733}
 734EXPORT_SYMBOL(dev_get_iflink);
 735
 736/**
 737 *	dev_fill_metadata_dst - Retrieve tunnel egress information.
 738 *	@dev: targeted interface
 739 *	@skb: The packet.
 740 *
 741 *	For better visibility of tunnel traffic OVS needs to retrieve
 742 *	egress tunnel information for a packet. Following API allows
 743 *	user to get this info.
 744 */
 745int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
 746{
 747	struct ip_tunnel_info *info;
 748
 749	if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
 750		return -EINVAL;
 751
 752	info = skb_tunnel_info_unclone(skb);
 753	if (!info)
 754		return -ENOMEM;
 755	if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
 756		return -EINVAL;
 757
 758	return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
 759}
 760EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
 761
 762/**
 763 *	__dev_get_by_name	- find a device by its name
 764 *	@net: the applicable net namespace
 765 *	@name: name to find
 766 *
 767 *	Find an interface by name. Must be called under RTNL semaphore
 768 *	or @dev_base_lock. If the name is found a pointer to the device
 769 *	is returned. If the name is not found then %NULL is returned. The
 770 *	reference counters are not incremented so the caller must be
 771 *	careful with locks.
 772 */
 773
 774struct net_device *__dev_get_by_name(struct net *net, const char *name)
 775{
 776	struct netdev_name_node *node_name;
 777
 778	node_name = netdev_name_node_lookup(net, name);
 779	return node_name ? node_name->dev : NULL;
 780}
 781EXPORT_SYMBOL(__dev_get_by_name);
 782
 783/**
 784 * dev_get_by_name_rcu	- find a device by its name
 785 * @net: the applicable net namespace
 786 * @name: name to find
 787 *
 788 * Find an interface by name.
 789 * If the name is found a pointer to the device is returned.
 790 * If the name is not found then %NULL is returned.
 791 * The reference counters are not incremented so the caller must be
 792 * careful with locks. The caller must hold RCU lock.
 793 */
 794
 795struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
 796{
 797	struct netdev_name_node *node_name;
 798
 799	node_name = netdev_name_node_lookup_rcu(net, name);
 800	return node_name ? node_name->dev : NULL;
 801}
 802EXPORT_SYMBOL(dev_get_by_name_rcu);
 803
 804/**
 805 *	dev_get_by_name		- find a device by its name
 806 *	@net: the applicable net namespace
 807 *	@name: name to find
 808 *
 809 *	Find an interface by name. This can be called from any
 810 *	context and does its own locking. The returned handle has
 811 *	the usage count incremented and the caller must use dev_put() to
 812 *	release it when it is no longer needed. %NULL is returned if no
 813 *	matching device is found.
 814 */
 815
 816struct net_device *dev_get_by_name(struct net *net, const char *name)
 817{
 818	struct net_device *dev;
 819
 820	rcu_read_lock();
 821	dev = dev_get_by_name_rcu(net, name);
 822	if (dev)
 823		dev_hold(dev);
 824	rcu_read_unlock();
 825	return dev;
 826}
 827EXPORT_SYMBOL(dev_get_by_name);
 828
 829/**
 830 *	__dev_get_by_index - find a device by its ifindex
 831 *	@net: the applicable net namespace
 832 *	@ifindex: index of device
 833 *
 834 *	Search for an interface by index. Returns %NULL if the device
 835 *	is not found or a pointer to the device. The device has not
 836 *	had its reference counter increased so the caller must be careful
 837 *	about locking. The caller must hold either the RTNL semaphore
 838 *	or @dev_base_lock.
 839 */
 840
 841struct net_device *__dev_get_by_index(struct net *net, int ifindex)
 842{
 843	struct net_device *dev;
 844	struct hlist_head *head = dev_index_hash(net, ifindex);
 845
 846	hlist_for_each_entry(dev, head, index_hlist)
 847		if (dev->ifindex == ifindex)
 848			return dev;
 849
 850	return NULL;
 851}
 852EXPORT_SYMBOL(__dev_get_by_index);
 853
 854/**
 855 *	dev_get_by_index_rcu - find a device by its ifindex
 856 *	@net: the applicable net namespace
 857 *	@ifindex: index of device
 858 *
 859 *	Search for an interface by index. Returns %NULL if the device
 860 *	is not found or a pointer to the device. The device has not
 861 *	had its reference counter increased so the caller must be careful
 862 *	about locking. The caller must hold RCU lock.
 863 */
 864
 865struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
 866{
 867	struct net_device *dev;
 868	struct hlist_head *head = dev_index_hash(net, ifindex);
 869
 870	hlist_for_each_entry_rcu(dev, head, index_hlist)
 871		if (dev->ifindex == ifindex)
 872			return dev;
 873
 874	return NULL;
 875}
 876EXPORT_SYMBOL(dev_get_by_index_rcu);
 877
 878
 879/**
 880 *	dev_get_by_index - find a device by its ifindex
 881 *	@net: the applicable net namespace
 882 *	@ifindex: index of device
 883 *
 884 *	Search for an interface by index. Returns NULL if the device
 885 *	is not found or a pointer to the device. The device returned has
 886 *	had a reference added and the pointer is safe until the user calls
 887 *	dev_put to indicate they have finished with it.
 888 */
 889
 890struct net_device *dev_get_by_index(struct net *net, int ifindex)
 891{
 892	struct net_device *dev;
 893
 894	rcu_read_lock();
 895	dev = dev_get_by_index_rcu(net, ifindex);
 896	if (dev)
 897		dev_hold(dev);
 898	rcu_read_unlock();
 899	return dev;
 900}
 901EXPORT_SYMBOL(dev_get_by_index);
 902
 903/**
 904 *	dev_get_by_napi_id - find a device by napi_id
 905 *	@napi_id: ID of the NAPI struct
 906 *
 907 *	Search for an interface by NAPI ID. Returns %NULL if the device
 908 *	is not found or a pointer to the device. The device has not had
 909 *	its reference counter increased so the caller must be careful
 910 *	about locking. The caller must hold RCU lock.
 911 */
 912
 913struct net_device *dev_get_by_napi_id(unsigned int napi_id)
 914{
 915	struct napi_struct *napi;
 916
 917	WARN_ON_ONCE(!rcu_read_lock_held());
 918
 919	if (napi_id < MIN_NAPI_ID)
 920		return NULL;
 921
 922	napi = napi_by_id(napi_id);
 923
 924	return napi ? napi->dev : NULL;
 925}
 926EXPORT_SYMBOL(dev_get_by_napi_id);
 927
 928/**
 929 *	netdev_get_name - get a netdevice name, knowing its ifindex.
 930 *	@net: network namespace
 931 *	@name: a pointer to the buffer where the name will be stored.
 932 *	@ifindex: the ifindex of the interface to get the name from.
 933 *
 934 *	The use of raw_seqcount_begin() and cond_resched() before
 935 *	retrying is required as we want to give the writers a chance
 936 *	to complete when CONFIG_PREEMPTION is not set.
 937 */
 938int netdev_get_name(struct net *net, char *name, int ifindex)
 939{
 940	struct net_device *dev;
 941	unsigned int seq;
 942
 943retry:
 944	seq = raw_seqcount_begin(&devnet_rename_seq);
 945	rcu_read_lock();
 946	dev = dev_get_by_index_rcu(net, ifindex);
 947	if (!dev) {
 948		rcu_read_unlock();
 949		return -ENODEV;
 950	}
 951
 952	strcpy(name, dev->name);
 953	rcu_read_unlock();
 954	if (read_seqcount_retry(&devnet_rename_seq, seq)) {
 955		cond_resched();
 956		goto retry;
 957	}
 958
 959	return 0;
 960}
 961
 962/**
 963 *	dev_getbyhwaddr_rcu - find a device by its hardware address
 964 *	@net: the applicable net namespace
 965 *	@type: media type of device
 966 *	@ha: hardware address
 967 *
 968 *	Search for an interface by MAC address. Returns NULL if the device
 969 *	is not found or a pointer to the device.
 970 *	The caller must hold RCU or RTNL.
 971 *	The returned device has not had its ref count increased
 972 *	and the caller must therefore be careful about locking
 973 *
 974 */
 975
 976struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
 977				       const char *ha)
 978{
 979	struct net_device *dev;
 980
 981	for_each_netdev_rcu(net, dev)
 982		if (dev->type == type &&
 983		    !memcmp(dev->dev_addr, ha, dev->addr_len))
 984			return dev;
 985
 986	return NULL;
 987}
 988EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 989
 990struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 991{
 992	struct net_device *dev;
 993
 994	ASSERT_RTNL();
 995	for_each_netdev(net, dev)
 996		if (dev->type == type)
 997			return dev;
 998
 999	return NULL;
1000}
1001EXPORT_SYMBOL(__dev_getfirstbyhwtype);
1002
1003struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
1004{
1005	struct net_device *dev, *ret = NULL;
1006
1007	rcu_read_lock();
1008	for_each_netdev_rcu(net, dev)
1009		if (dev->type == type) {
1010			dev_hold(dev);
1011			ret = dev;
1012			break;
1013		}
1014	rcu_read_unlock();
1015	return ret;
1016}
1017EXPORT_SYMBOL(dev_getfirstbyhwtype);
1018
1019/**
1020 *	__dev_get_by_flags - find any device with given flags
1021 *	@net: the applicable net namespace
1022 *	@if_flags: IFF_* values
1023 *	@mask: bitmask of bits in if_flags to check
1024 *
1025 *	Search for any interface with the given flags. Returns NULL if a device
1026 *	is not found or a pointer to the device. Must be called inside
1027 *	rtnl_lock(), and result refcount is unchanged.
1028 */
1029
1030struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
1031				      unsigned short mask)
1032{
1033	struct net_device *dev, *ret;
1034
1035	ASSERT_RTNL();
1036
1037	ret = NULL;
1038	for_each_netdev(net, dev) {
1039		if (((dev->flags ^ if_flags) & mask) == 0) {
1040			ret = dev;
1041			break;
1042		}
1043	}
1044	return ret;
1045}
1046EXPORT_SYMBOL(__dev_get_by_flags);
1047
1048/**
1049 *	dev_valid_name - check if name is okay for network device
1050 *	@name: name string
1051 *
1052 *	Network device names need to be valid file names to
1053 *	to allow sysfs to work.  We also disallow any kind of
1054 *	whitespace.
1055 */
1056bool dev_valid_name(const char *name)
1057{
1058	if (*name == '\0')
1059		return false;
1060	if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
1061		return false;
1062	if (!strcmp(name, ".") || !strcmp(name, ".."))
1063		return false;
1064
1065	while (*name) {
1066		if (*name == '/' || *name == ':' || isspace(*name))
1067			return false;
1068		name++;
1069	}
1070	return true;
1071}
1072EXPORT_SYMBOL(dev_valid_name);
1073
1074/**
1075 *	__dev_alloc_name - allocate a name for a device
1076 *	@net: network namespace to allocate the device name in
1077 *	@name: name format string
1078 *	@buf:  scratch buffer and result name string
1079 *
1080 *	Passed a format string - eg "lt%d" it will try and find a suitable
1081 *	id. It scans list of devices to build up a free map, then chooses
1082 *	the first empty slot. The caller must hold the dev_base or rtnl lock
1083 *	while allocating the name and adding the device in order to avoid
1084 *	duplicates.
1085 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1086 *	Returns the number of the unit assigned or a negative errno code.
1087 */
1088
1089static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1090{
1091	int i = 0;
1092	const char *p;
1093	const int max_netdevices = 8*PAGE_SIZE;
1094	unsigned long *inuse;
1095	struct net_device *d;
1096
1097	if (!dev_valid_name(name))
1098		return -EINVAL;
1099
1100	p = strchr(name, '%');
1101	if (p) {
1102		/*
1103		 * Verify the string as this thing may have come from
1104		 * the user.  There must be either one "%d" and no other "%"
1105		 * characters.
1106		 */
1107		if (p[1] != 'd' || strchr(p + 2, '%'))
1108			return -EINVAL;
1109
1110		/* Use one page as a bit array of possible slots */
1111		inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1112		if (!inuse)
1113			return -ENOMEM;
1114
1115		for_each_netdev(net, d) {
1116			if (!sscanf(d->name, name, &i))
1117				continue;
1118			if (i < 0 || i >= max_netdevices)
1119				continue;
1120
1121			/*  avoid cases where sscanf is not exact inverse of printf */
1122			snprintf(buf, IFNAMSIZ, name, i);
1123			if (!strncmp(buf, d->name, IFNAMSIZ))
1124				set_bit(i, inuse);
1125		}
1126
1127		i = find_first_zero_bit(inuse, max_netdevices);
1128		free_page((unsigned long) inuse);
1129	}
1130
1131	snprintf(buf, IFNAMSIZ, name, i);
1132	if (!__dev_get_by_name(net, buf))
1133		return i;
1134
1135	/* It is possible to run out of possible slots
1136	 * when the name is long and there isn't enough space left
1137	 * for the digits, or if all bits are used.
1138	 */
1139	return -ENFILE;
1140}
1141
1142static int dev_alloc_name_ns(struct net *net,
1143			     struct net_device *dev,
1144			     const char *name)
1145{
1146	char buf[IFNAMSIZ];
1147	int ret;
1148
1149	BUG_ON(!net);
1150	ret = __dev_alloc_name(net, name, buf);
1151	if (ret >= 0)
1152		strlcpy(dev->name, buf, IFNAMSIZ);
1153	return ret;
1154}
1155
1156/**
1157 *	dev_alloc_name - allocate a name for a device
1158 *	@dev: device
1159 *	@name: name format string
1160 *
1161 *	Passed a format string - eg "lt%d" it will try and find a suitable
1162 *	id. It scans list of devices to build up a free map, then chooses
1163 *	the first empty slot. The caller must hold the dev_base or rtnl lock
1164 *	while allocating the name and adding the device in order to avoid
1165 *	duplicates.
1166 *	Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1167 *	Returns the number of the unit assigned or a negative errno code.
1168 */
1169
1170int dev_alloc_name(struct net_device *dev, const char *name)
1171{
1172	return dev_alloc_name_ns(dev_net(dev), dev, name);
1173}
1174EXPORT_SYMBOL(dev_alloc_name);
1175
1176static int dev_get_valid_name(struct net *net, struct net_device *dev,
1177			      const char *name)
1178{
1179	BUG_ON(!net);
1180
1181	if (!dev_valid_name(name))
1182		return -EINVAL;
1183
1184	if (strchr(name, '%'))
1185		return dev_alloc_name_ns(net, dev, name);
1186	else if (__dev_get_by_name(net, name))
1187		return -EEXIST;
1188	else if (dev->name != name)
1189		strlcpy(dev->name, name, IFNAMSIZ);
1190
1191	return 0;
1192}
1193
1194/**
1195 *	dev_change_name - change name of a device
1196 *	@dev: device
1197 *	@newname: name (or format string) must be at least IFNAMSIZ
1198 *
1199 *	Change name of a device, can pass format strings "eth%d".
1200 *	for wildcarding.
1201 */
1202int dev_change_name(struct net_device *dev, const char *newname)
1203{
1204	unsigned char old_assign_type;
1205	char oldname[IFNAMSIZ];
1206	int err = 0;
1207	int ret;
1208	struct net *net;
1209
1210	ASSERT_RTNL();
1211	BUG_ON(!dev_net(dev));
1212
1213	net = dev_net(dev);
1214
1215	/* Some auto-enslaved devices e.g. failover slaves are
1216	 * special, as userspace might rename the device after
1217	 * the interface had been brought up and running since
1218	 * the point kernel initiated auto-enslavement. Allow
1219	 * live name change even when these slave devices are
1220	 * up and running.
1221	 *
1222	 * Typically, users of these auto-enslaving devices
1223	 * don't actually care about slave name change, as
1224	 * they are supposed to operate on master interface
1225	 * directly.
1226	 */
1227	if (dev->flags & IFF_UP &&
1228	    likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
1229		return -EBUSY;
1230
1231	write_seqcount_begin(&devnet_rename_seq);
1232
1233	if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
1234		write_seqcount_end(&devnet_rename_seq);
1235		return 0;
1236	}
1237
1238	memcpy(oldname, dev->name, IFNAMSIZ);
1239
1240	err = dev_get_valid_name(net, dev, newname);
1241	if (err < 0) {
1242		write_seqcount_end(&devnet_rename_seq);
1243		return err;
1244	}
1245
1246	if (oldname[0] && !strchr(oldname, '%'))
1247		netdev_info(dev, "renamed from %s\n", oldname);
1248
1249	old_assign_type = dev->name_assign_type;
1250	dev->name_assign_type = NET_NAME_RENAMED;
1251
1252rollback:
1253	ret = device_rename(&dev->dev, dev->name);
1254	if (ret) {
1255		memcpy(dev->name, oldname, IFNAMSIZ);
1256		dev->name_assign_type = old_assign_type;
1257		write_seqcount_end(&devnet_rename_seq);
1258		return ret;
1259	}
1260
1261	write_seqcount_end(&devnet_rename_seq);
1262
1263	netdev_adjacent_rename_links(dev, oldname);
1264
1265	write_lock_bh(&dev_base_lock);
1266	netdev_name_node_del(dev->name_node);
1267	write_unlock_bh(&dev_base_lock);
1268
1269	synchronize_rcu();
1270
1271	write_lock_bh(&dev_base_lock);
1272	netdev_name_node_add(net, dev->name_node);
1273	write_unlock_bh(&dev_base_lock);
1274
1275	ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1276	ret = notifier_to_errno(ret);
1277
1278	if (ret) {
1279		/* err >= 0 after dev_alloc_name() or stores the first errno */
1280		if (err >= 0) {
1281			err = ret;
1282			write_seqcount_begin(&devnet_rename_seq);
1283			memcpy(dev->name, oldname, IFNAMSIZ);
1284			memcpy(oldname, newname, IFNAMSIZ);
1285			dev->name_assign_type = old_assign_type;
1286			old_assign_type = NET_NAME_RENAMED;
1287			goto rollback;
1288		} else {
1289			pr_err("%s: name change rollback failed: %d\n",
1290			       dev->name, ret);
1291		}
1292	}
1293
1294	return err;
1295}
1296
1297/**
1298 *	dev_set_alias - change ifalias of a device
1299 *	@dev: device
1300 *	@alias: name up to IFALIASZ
1301 *	@len: limit of bytes to copy from info
1302 *
1303 *	Set ifalias for a device,
1304 */
1305int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1306{
1307	struct dev_ifalias *new_alias = NULL;
1308
1309	if (len >= IFALIASZ)
1310		return -EINVAL;
1311
1312	if (len) {
1313		new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
1314		if (!new_alias)
1315			return -ENOMEM;
1316
1317		memcpy(new_alias->ifalias, alias, len);
1318		new_alias->ifalias[len] = 0;
1319	}
1320
1321	mutex_lock(&ifalias_mutex);
1322	new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
1323					mutex_is_locked(&ifalias_mutex));
1324	mutex_unlock(&ifalias_mutex);
1325
1326	if (new_alias)
1327		kfree_rcu(new_alias, rcuhead);
1328
1329	return len;
1330}
1331EXPORT_SYMBOL(dev_set_alias);
1332
1333/**
1334 *	dev_get_alias - get ifalias of a device
1335 *	@dev: device
1336 *	@name: buffer to store name of ifalias
1337 *	@len: size of buffer
1338 *
1339 *	get ifalias for a device.  Caller must make sure dev cannot go
1340 *	away,  e.g. rcu read lock or own a reference count to device.
1341 */
1342int dev_get_alias(const struct net_device *dev, char *name, size_t len)
1343{
1344	const struct dev_ifalias *alias;
1345	int ret = 0;
1346
1347	rcu_read_lock();
1348	alias = rcu_dereference(dev->ifalias);
1349	if (alias)
1350		ret = snprintf(name, len, "%s", alias->ifalias);
1351	rcu_read_unlock();
1352
1353	return ret;
1354}
1355
1356/**
1357 *	netdev_features_change - device changes features
1358 *	@dev: device to cause notification
1359 *
1360 *	Called to indicate a device has changed features.
1361 */
1362void netdev_features_change(struct net_device *dev)
1363{
1364	call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1365}
1366EXPORT_SYMBOL(netdev_features_change);
1367
1368/**
1369 *	netdev_state_change - device changes state
1370 *	@dev: device to cause notification
1371 *
1372 *	Called to indicate a device has changed state. This function calls
1373 *	the notifier chains for netdev_chain and sends a NEWLINK message
1374 *	to the routing socket.
1375 */
1376void netdev_state_change(struct net_device *dev)
1377{
1378	if (dev->flags & IFF_UP) {
1379		struct netdev_notifier_change_info change_info = {
1380			.info.dev = dev,
1381		};
1382
1383		call_netdevice_notifiers_info(NETDEV_CHANGE,
1384					      &change_info.info);
1385		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
1386	}
1387}
1388EXPORT_SYMBOL(netdev_state_change);
1389
1390/**
1391 * netdev_notify_peers - notify network peers about existence of @dev
1392 * @dev: network device
1393 *
1394 * Generate traffic such that interested network peers are aware of
1395 * @dev, such as by generating a gratuitous ARP. This may be used when
1396 * a device wants to inform the rest of the network about some sort of
1397 * reconfiguration such as a failover event or virtual machine
1398 * migration.
1399 */
1400void netdev_notify_peers(struct net_device *dev)
1401{
1402	rtnl_lock();
1403	call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
1404	call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
1405	rtnl_unlock();
1406}
1407EXPORT_SYMBOL(netdev_notify_peers);
1408
1409static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1410{
1411	const struct net_device_ops *ops = dev->netdev_ops;
1412	int ret;
1413
1414	ASSERT_RTNL();
1415
1416	if (!netif_device_present(dev))
1417		return -ENODEV;
1418
1419	/* Block netpoll from trying to do any rx path servicing.
1420	 * If we don't do this there is a chance ndo_poll_controller
1421	 * or ndo_poll may be running while we open the device
1422	 */
1423	netpoll_poll_disable(dev);
1424
1425	ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
1426	ret = notifier_to_errno(ret);
1427	if (ret)
1428		return ret;
1429
1430	set_bit(__LINK_STATE_START, &dev->state);
1431
1432	if (ops->ndo_validate_addr)
1433		ret = ops->ndo_validate_addr(dev);
1434
1435	if (!ret && ops->ndo_open)
1436		ret = ops->ndo_open(dev);
1437
1438	netpoll_poll_enable(dev);
1439
1440	if (ret)
1441		clear_bit(__LINK_STATE_START, &dev->state);
1442	else {
1443		dev->flags |= IFF_UP;
1444		dev_set_rx_mode(dev);
1445		dev_activate(dev);
1446		add_device_randomness(dev->dev_addr, dev->addr_len);
1447	}
1448
1449	return ret;
1450}
1451
1452/**
1453 *	dev_open	- prepare an interface for use.
1454 *	@dev: device to open
1455 *	@extack: netlink extended ack
1456 *
1457 *	Takes a device from down to up state. The device's private open
1458 *	function is invoked and then the multicast lists are loaded. Finally
1459 *	the device is moved into the up state and a %NETDEV_UP message is
1460 *	sent to the netdev notifier chain.
1461 *
1462 *	Calling this function on an active interface is a nop. On a failure
1463 *	a negative errno code is returned.
1464 */
1465int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
1466{
1467	int ret;
1468
1469	if (dev->flags & IFF_UP)
1470		return 0;
1471
1472	ret = __dev_open(dev, extack);
1473	if (ret < 0)
1474		return ret;
1475
1476	rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1477	call_netdevice_notifiers(NETDEV_UP, dev);
1478
1479	return ret;
1480}
1481EXPORT_SYMBOL(dev_open);
1482
1483static void __dev_close_many(struct list_head *head)
1484{
1485	struct net_device *dev;
1486
1487	ASSERT_RTNL();
1488	might_sleep();
1489
1490	list_for_each_entry(dev, head, close_list) {
1491		/* Temporarily disable netpoll until the interface is down */
1492		netpoll_poll_disable(dev);
1493
1494		call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1495
1496		clear_bit(__LINK_STATE_START, &dev->state);
1497
1498		/* Synchronize to scheduled poll. We cannot touch poll list, it
1499		 * can be even on different cpu. So just clear netif_running().
1500		 *
1501		 * dev->stop() will invoke napi_disable() on all of it's
1502		 * napi_struct instances on this device.
1503		 */
1504		smp_mb__after_atomic(); /* Commit netif_running(). */
1505	}
1506
1507	dev_deactivate_many(head);
1508
1509	list_for_each_entry(dev, head, close_list) {
1510		const struct net_device_ops *ops = dev->netdev_ops;
1511
1512		/*
1513		 *	Call the device specific close. This cannot fail.
1514		 *	Only if device is UP
1515		 *
1516		 *	We allow it to be called even after a DETACH hot-plug
1517		 *	event.
1518		 */
1519		if (ops->ndo_stop)
1520			ops->ndo_stop(dev);
1521
1522		dev->flags &= ~IFF_UP;
1523		netpoll_poll_enable(dev);
1524	}
1525}
1526
1527static void __dev_close(struct net_device *dev)
1528{
1529	LIST_HEAD(single);
1530
1531	list_add(&dev->close_list, &single);
1532	__dev_close_many(&single);
1533	list_del(&single);
1534}
1535
1536void dev_close_many(struct list_head *head, bool unlink)
1537{
1538	struct net_device *dev, *tmp;
1539
1540	/* Remove the devices that don't need to be closed */
1541	list_for_each_entry_safe(dev, tmp, head, close_list)
1542		if (!(dev->flags & IFF_UP))
1543			list_del_init(&dev->close_list);
1544
1545	__dev_close_many(head);
1546
1547	list_for_each_entry_safe(dev, tmp, head, close_list) {
1548		rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
1549		call_netdevice_notifiers(NETDEV_DOWN, dev);
1550		if (unlink)
1551			list_del_init(&dev->close_list);
1552	}
1553}
1554EXPORT_SYMBOL(dev_close_many);
1555
1556/**
1557 *	dev_close - shutdown an interface.
1558 *	@dev: device to shutdown
1559 *
1560 *	This function moves an active device into down state. A
1561 *	%NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1562 *	is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1563 *	chain.
1564 */
1565void dev_close(struct net_device *dev)
1566{
1567	if (dev->flags & IFF_UP) {
1568		LIST_HEAD(single);
1569
1570		list_add(&dev->close_list, &single);
1571		dev_close_many(&single, true);
1572		list_del(&single);
1573	}
1574}
1575EXPORT_SYMBOL(dev_close);
1576
1577
1578/**
1579 *	dev_disable_lro - disable Large Receive Offload on a device
1580 *	@dev: device
1581 *
1582 *	Disable Large Receive Offload (LRO) on a net device.  Must be
1583 *	called under RTNL.  This is needed if received packets may be
1584 *	forwarded to another interface.
1585 */
1586void dev_disable_lro(struct net_device *dev)
1587{
1588	struct net_device *lower_dev;
1589	struct list_head *iter;
1590
1591	dev->wanted_features &= ~NETIF_F_LRO;
1592	netdev_update_features(dev);
1593
1594	if (unlikely(dev->features & NETIF_F_LRO))
1595		netdev_WARN(dev, "failed to disable LRO!\n");
1596
1597	netdev_for_each_lower_dev(dev, lower_dev, iter)
1598		dev_disable_lro(lower_dev);
1599}
1600EXPORT_SYMBOL(dev_disable_lro);
1601
1602/**
1603 *	dev_disable_gro_hw - disable HW Generic Receive Offload on a device
1604 *	@dev: device
1605 *
1606 *	Disable HW Generic Receive Offload (GRO_HW) on a net device.  Must be
1607 *	called under RTNL.  This is needed if Generic XDP is installed on
1608 *	the device.
1609 */
1610static void dev_disable_gro_hw(struct net_device *dev)
1611{
1612	dev->wanted_features &= ~NETIF_F_GRO_HW;
1613	netdev_update_features(dev);
1614
1615	if (unlikely(dev->features & NETIF_F_GRO_HW))
1616		netdev_WARN(dev, "failed to disable GRO_HW!\n");
1617}
1618
1619const char *netdev_cmd_to_name(enum netdev_cmd cmd)
1620{
1621#define N(val) 						\
1622	case NETDEV_##val:				\
1623		return "NETDEV_" __stringify(val);
1624	switch (cmd) {
1625	N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
1626	N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
1627	N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
1628	N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
1629	N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
1630	N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
1631	N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
1632	N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
1633	N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
1634	N(PRE_CHANGEADDR)
1635	}
1636#undef N
1637	return "UNKNOWN_NETDEV_EVENT";
1638}
1639EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
1640
1641static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
1642				   struct net_device *dev)
1643{
1644	struct netdev_notifier_info info = {
1645		.dev = dev,
1646	};
1647
1648	return nb->notifier_call(nb, val, &info);
1649}
1650
1651static int call_netdevice_register_notifiers(struct notifier_block *nb,
1652					     struct net_device *dev)
1653{
1654	int err;
1655
1656	err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
1657	err = notifier_to_errno(err);
1658	if (err)
1659		return err;
1660
1661	if (!(dev->flags & IFF_UP))
1662		return 0;
1663
1664	call_netdevice_notifier(nb, NETDEV_UP, dev);
1665	return 0;
1666}
1667
1668static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
1669						struct net_device *dev)
1670{
1671	if (dev->flags & IFF_UP) {
1672		call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
1673					dev);
1674		call_netdevice_notifier(nb, NETDEV_DOWN, dev);
1675	}
1676	call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
1677}
1678
1679static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
1680						 struct net *net)
1681{
1682	struct net_device *dev;
1683	int err;
1684
1685	for_each_netdev(net, dev) {
1686		err = call_netdevice_register_notifiers(nb, dev);
1687		if (err)
1688			goto rollback;
1689	}
1690	return 0;
1691
1692rollback:
1693	for_each_netdev_continue_reverse(net, dev)
1694		call_netdevice_unregister_notifiers(nb, dev);
1695	return err;
1696}
1697
1698static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
1699						    struct net *net)
1700{
1701	struct net_device *dev;
1702
1703	for_each_netdev(net, dev)
1704		call_netdevice_unregister_notifiers(nb, dev);
1705}
1706
1707static int dev_boot_phase = 1;
1708
1709/**
1710 * register_netdevice_notifier - register a network notifier block
1711 * @nb: notifier
1712 *
1713 * Register a notifier to be called when network device events occur.
1714 * The notifier passed is linked into the kernel structures and must
1715 * not be reused until it has been unregistered. A negative errno code
1716 * is returned on a failure.
1717 *
1718 * When registered all registration and up events are replayed
1719 * to the new notifier to allow device to have a race free
1720 * view of the network device list.
1721 */
1722
1723int register_netdevice_notifier(struct notifier_block *nb)
1724{
1725	struct net *net;
1726	int err;
1727
1728	/* Close race with setup_net() and cleanup_net() */
1729	down_write(&pernet_ops_rwsem);
1730	rtnl_lock();
1731	err = raw_notifier_chain_register(&netdev_chain, nb);
1732	if (err)
1733		goto unlock;
1734	if (dev_boot_phase)
1735		goto unlock;
1736	for_each_net(net) {
1737		err = call_netdevice_register_net_notifiers(nb, net);
1738		if (err)
1739			goto rollback;
1740	}
1741
1742unlock:
1743	rtnl_unlock();
1744	up_write(&pernet_ops_rwsem);
1745	return err;
1746
1747rollback:
1748	for_each_net_continue_reverse(net)
1749		call_netdevice_unregister_net_notifiers(nb, net);
1750
1751	raw_notifier_chain_unregister(&netdev_chain, nb);
1752	goto unlock;
1753}
1754EXPORT_SYMBOL(register_netdevice_notifier);
1755
1756/**
1757 * unregister_netdevice_notifier - unregister a network notifier block
1758 * @nb: notifier
1759 *
1760 * Unregister a notifier previously registered by
1761 * register_netdevice_notifier(). The notifier is unlinked into the
1762 * kernel structures and may then be reused. A negative errno code
1763 * is returned on a failure.
1764 *
1765 * After unregistering unregister and down device events are synthesized
1766 * for all devices on the device list to the removed notifier to remove
1767 * the need for special case cleanup code.
1768 */
1769
1770int unregister_netdevice_notifier(struct notifier_block *nb)
1771{
1772	struct net *net;
1773	int err;
1774
1775	/* Close race with setup_net() and cleanup_net() */
1776	down_write(&pernet_ops_rwsem);
1777	rtnl_lock();
1778	err = raw_notifier_chain_unregister(&netdev_chain, nb);
1779	if (err)
1780		goto unlock;
1781
1782	for_each_net(net)
1783		call_netdevice_unregister_net_notifiers(nb, net);
1784
1785unlock:
1786	rtnl_unlock();
1787	up_write(&pernet_ops_rwsem);
1788	return err;
1789}
1790EXPORT_SYMBOL(unregister_netdevice_notifier);
1791
1792static int __register_netdevice_notifier_net(struct net *net,
1793					     struct notifier_block *nb,
1794					     bool ignore_call_fail)
1795{
1796	int err;
1797
1798	err = raw_notifier_chain_register(&net->netdev_chain, nb);
1799	if (err)
1800		return err;
1801	if (dev_boot_phase)
1802		return 0;
1803
1804	err = call_netdevice_register_net_notifiers(nb, net);
1805	if (err && !ignore_call_fail)
1806		goto chain_unregister;
1807
1808	return 0;
1809
1810chain_unregister:
1811	raw_notifier_chain_unregister(&net->netdev_chain, nb);
1812	return err;
1813}
1814
1815static int __unregister_netdevice_notifier_net(struct net *net,
1816					       struct notifier_block *nb)
1817{
1818	int err;
1819
1820	err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
1821	if (err)
1822		return err;
1823
1824	call_netdevice_unregister_net_notifiers(nb, net);
1825	return 0;
1826}
1827
1828/**
1829 * register_netdevice_notifier_net - register a per-netns network notifier block
1830 * @net: network namespace
1831 * @nb: notifier
1832 *
1833 * Register a notifier to be called when network device events occur.
1834 * The notifier passed is linked into the kernel structures and must
1835 * not be reused until it has been unregistered. A negative errno code
1836 * is returned on a failure.
1837 *
1838 * When registered all registration and up events are replayed
1839 * to the new notifier to allow device to have a race free
1840 * view of the network device list.
1841 */
1842
1843int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
1844{
1845	int err;
1846
1847	rtnl_lock();
1848	err = __register_netdevice_notifier_net(net, nb, false);
1849	rtnl_unlock();
1850	return err;
1851}
1852EXPORT_SYMBOL(register_netdevice_notifier_net);
1853
1854/**
1855 * unregister_netdevice_notifier_net - unregister a per-netns
1856 *                                     network notifier block
1857 * @net: network namespace
1858 * @nb: notifier
1859 *
1860 * Unregister a notifier previously registered by
1861 * register_netdevice_notifier(). The notifier is unlinked into the
1862 * kernel structures and may then be reused. A negative errno code
1863 * is returned on a failure.
1864 *
1865 * After unregistering unregister and down device events are synthesized
1866 * for all devices on the device list to the removed notifier to remove
1867 * the need for special case cleanup code.
1868 */
1869
1870int unregister_netdevice_notifier

Large files files are truncated, but you can click here to view the full file