/net/core/dev.c

http://github.com/mirrors/linux · C · 10568 lines · 6899 code · 1665 blank · 2004 comment · 1201 complexity · b85658a488468d1c3f7f658206f1a07c MD5 · raw file

Large files are truncated click here to view the full file

  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * NET3 Protocol independent device support routines.
  4. *
  5. * Derived from the non IP parts of dev.c 1.0.19
  6. * Authors: Ross Biro
  7. * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  8. * Mark Evans, <evansmp@uhura.aston.ac.uk>
  9. *
  10. * Additional Authors:
  11. * Florian la Roche <rzsfl@rz.uni-sb.de>
  12. * Alan Cox <gw4pts@gw4pts.ampr.org>
  13. * David Hinds <dahinds@users.sourceforge.net>
  14. * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  15. * Adam Sulmicki <adam@cfar.umd.edu>
  16. * Pekka Riikonen <priikone@poesidon.pspt.fi>
  17. *
  18. * Changes:
  19. * D.J. Barrow : Fixed bug where dev->refcnt gets set
  20. * to 2 if register_netdev gets called
  21. * before net_dev_init & also removed a
  22. * few lines of code in the process.
  23. * Alan Cox : device private ioctl copies fields back.
  24. * Alan Cox : Transmit queue code does relevant
  25. * stunts to keep the queue safe.
  26. * Alan Cox : Fixed double lock.
  27. * Alan Cox : Fixed promisc NULL pointer trap
  28. * ???????? : Support the full private ioctl range
  29. * Alan Cox : Moved ioctl permission check into
  30. * drivers
  31. * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
  32. * Alan Cox : 100 backlog just doesn't cut it when
  33. * you start doing multicast video 8)
  34. * Alan Cox : Rewrote net_bh and list manager.
  35. * Alan Cox : Fix ETH_P_ALL echoback lengths.
  36. * Alan Cox : Took out transmit every packet pass
  37. * Saved a few bytes in the ioctl handler
  38. * Alan Cox : Network driver sets packet type before
  39. * calling netif_rx. Saves a function
  40. * call a packet.
  41. * Alan Cox : Hashed net_bh()
  42. * Richard Kooijman: Timestamp fixes.
  43. * Alan Cox : Wrong field in SIOCGIFDSTADDR
  44. * Alan Cox : Device lock protection.
  45. * Alan Cox : Fixed nasty side effect of device close
  46. * changes.
  47. * Rudi Cilibrasi : Pass the right thing to
  48. * set_mac_address()
  49. * Dave Miller : 32bit quantity for the device lock to
  50. * make it work out on a Sparc.
  51. * Bjorn Ekwall : Added KERNELD hack.
  52. * Alan Cox : Cleaned up the backlog initialise.
  53. * Craig Metz : SIOCGIFCONF fix if space for under
  54. * 1 device.
  55. * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
  56. * is no device open function.
  57. * Andi Kleen : Fix error reporting for SIOCGIFCONF
  58. * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
  59. * Cyrus Durgin : Cleaned for KMOD
  60. * Adam Sulmicki : Bug Fix : Network Device Unload
  61. * A network device unload needs to purge
  62. * the backlog queue.
  63. * Paul Rusty Russell : SIOCSIFNAME
  64. * Pekka Riikonen : Netdev boot-time settings code
  65. * Andrew Morton : Make unregister_netdevice wait
  66. * indefinitely on dev->refcnt
  67. * J Hadi Salim : - Backlog queue sampling
  68. * - netif_rx() feedback
  69. */
  70. #include <linux/uaccess.h>
  71. #include <linux/bitops.h>
  72. #include <linux/capability.h>
  73. #include <linux/cpu.h>
  74. #include <linux/types.h>
  75. #include <linux/kernel.h>
  76. #include <linux/hash.h>
  77. #include <linux/slab.h>
  78. #include <linux/sched.h>
  79. #include <linux/sched/mm.h>
  80. #include <linux/mutex.h>
  81. #include <linux/string.h>
  82. #include <linux/mm.h>
  83. #include <linux/socket.h>
  84. #include <linux/sockios.h>
  85. #include <linux/errno.h>
  86. #include <linux/interrupt.h>
  87. #include <linux/if_ether.h>
  88. #include <linux/netdevice.h>
  89. #include <linux/etherdevice.h>
  90. #include <linux/ethtool.h>
  91. #include <linux/skbuff.h>
  92. #include <linux/bpf.h>
  93. #include <linux/bpf_trace.h>
  94. #include <net/net_namespace.h>
  95. #include <net/sock.h>
  96. #include <net/busy_poll.h>
  97. #include <linux/rtnetlink.h>
  98. #include <linux/stat.h>
  99. #include <net/dst.h>
  100. #include <net/dst_metadata.h>
  101. #include <net/pkt_sched.h>
  102. #include <net/pkt_cls.h>
  103. #include <net/checksum.h>
  104. #include <net/xfrm.h>
  105. #include <linux/highmem.h>
  106. #include <linux/init.h>
  107. #include <linux/module.h>
  108. #include <linux/netpoll.h>
  109. #include <linux/rcupdate.h>
  110. #include <linux/delay.h>
  111. #include <net/iw_handler.h>
  112. #include <asm/current.h>
  113. #include <linux/audit.h>
  114. #include <linux/dmaengine.h>
  115. #include <linux/err.h>
  116. #include <linux/ctype.h>
  117. #include <linux/if_arp.h>
  118. #include <linux/if_vlan.h>
  119. #include <linux/ip.h>
  120. #include <net/ip.h>
  121. #include <net/mpls.h>
  122. #include <linux/ipv6.h>
  123. #include <linux/in.h>
  124. #include <linux/jhash.h>
  125. #include <linux/random.h>
  126. #include <trace/events/napi.h>
  127. #include <trace/events/net.h>
  128. #include <trace/events/skb.h>
  129. #include <linux/inetdevice.h>
  130. #include <linux/cpu_rmap.h>
  131. #include <linux/static_key.h>
  132. #include <linux/hashtable.h>
  133. #include <linux/vmalloc.h>
  134. #include <linux/if_macvlan.h>
  135. #include <linux/errqueue.h>
  136. #include <linux/hrtimer.h>
  137. #include <linux/netfilter_ingress.h>
  138. #include <linux/crash_dump.h>
  139. #include <linux/sctp.h>
  140. #include <net/udp_tunnel.h>
  141. #include <linux/net_namespace.h>
  142. #include <linux/indirect_call_wrapper.h>
  143. #include <net/devlink.h>
  144. #include "net-sysfs.h"
  145. #define MAX_GRO_SKBS 8
  146. /* This should be increased if a protocol with a bigger head is added. */
  147. #define GRO_MAX_HEAD (MAX_HEADER + 128)
  148. static DEFINE_SPINLOCK(ptype_lock);
  149. static DEFINE_SPINLOCK(offload_lock);
  150. struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
  151. struct list_head ptype_all __read_mostly; /* Taps */
  152. static struct list_head offload_base __read_mostly;
  153. static int netif_rx_internal(struct sk_buff *skb);
  154. static int call_netdevice_notifiers_info(unsigned long val,
  155. struct netdev_notifier_info *info);
  156. static int call_netdevice_notifiers_extack(unsigned long val,
  157. struct net_device *dev,
  158. struct netlink_ext_ack *extack);
  159. static struct napi_struct *napi_by_id(unsigned int napi_id);
  160. /*
  161. * The @dev_base_head list is protected by @dev_base_lock and the rtnl
  162. * semaphore.
  163. *
  164. * Pure readers hold dev_base_lock for reading, or rcu_read_lock()
  165. *
  166. * Writers must hold the rtnl semaphore while they loop through the
  167. * dev_base_head list, and hold dev_base_lock for writing when they do the
  168. * actual updates. This allows pure readers to access the list even
  169. * while a writer is preparing to update it.
  170. *
  171. * To put it another way, dev_base_lock is held for writing only to
  172. * protect against pure readers; the rtnl semaphore provides the
  173. * protection against other writers.
  174. *
  175. * See, for example usages, register_netdevice() and
  176. * unregister_netdevice(), which must be called with the rtnl
  177. * semaphore held.
  178. */
  179. DEFINE_RWLOCK(dev_base_lock);
  180. EXPORT_SYMBOL(dev_base_lock);
  181. static DEFINE_MUTEX(ifalias_mutex);
  182. /* protects napi_hash addition/deletion and napi_gen_id */
  183. static DEFINE_SPINLOCK(napi_hash_lock);
  184. static unsigned int napi_gen_id = NR_CPUS;
  185. static DEFINE_READ_MOSTLY_HASHTABLE(napi_hash, 8);
  186. static seqcount_t devnet_rename_seq;
  187. static inline void dev_base_seq_inc(struct net *net)
  188. {
  189. while (++net->dev_base_seq == 0)
  190. ;
  191. }
  192. static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
  193. {
  194. unsigned int hash = full_name_hash(net, name, strnlen(name, IFNAMSIZ));
  195. return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
  196. }
  197. static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
  198. {
  199. return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
  200. }
  201. static inline void rps_lock(struct softnet_data *sd)
  202. {
  203. #ifdef CONFIG_RPS
  204. spin_lock(&sd->input_pkt_queue.lock);
  205. #endif
  206. }
  207. static inline void rps_unlock(struct softnet_data *sd)
  208. {
  209. #ifdef CONFIG_RPS
  210. spin_unlock(&sd->input_pkt_queue.lock);
  211. #endif
  212. }
  213. static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
  214. const char *name)
  215. {
  216. struct netdev_name_node *name_node;
  217. name_node = kmalloc(sizeof(*name_node), GFP_KERNEL);
  218. if (!name_node)
  219. return NULL;
  220. INIT_HLIST_NODE(&name_node->hlist);
  221. name_node->dev = dev;
  222. name_node->name = name;
  223. return name_node;
  224. }
  225. static struct netdev_name_node *
  226. netdev_name_node_head_alloc(struct net_device *dev)
  227. {
  228. struct netdev_name_node *name_node;
  229. name_node = netdev_name_node_alloc(dev, dev->name);
  230. if (!name_node)
  231. return NULL;
  232. INIT_LIST_HEAD(&name_node->list);
  233. return name_node;
  234. }
  235. static void netdev_name_node_free(struct netdev_name_node *name_node)
  236. {
  237. kfree(name_node);
  238. }
  239. static void netdev_name_node_add(struct net *net,
  240. struct netdev_name_node *name_node)
  241. {
  242. hlist_add_head_rcu(&name_node->hlist,
  243. dev_name_hash(net, name_node->name));
  244. }
  245. static void netdev_name_node_del(struct netdev_name_node *name_node)
  246. {
  247. hlist_del_rcu(&name_node->hlist);
  248. }
  249. static struct netdev_name_node *netdev_name_node_lookup(struct net *net,
  250. const char *name)
  251. {
  252. struct hlist_head *head = dev_name_hash(net, name);
  253. struct netdev_name_node *name_node;
  254. hlist_for_each_entry(name_node, head, hlist)
  255. if (!strcmp(name_node->name, name))
  256. return name_node;
  257. return NULL;
  258. }
  259. static struct netdev_name_node *netdev_name_node_lookup_rcu(struct net *net,
  260. const char *name)
  261. {
  262. struct hlist_head *head = dev_name_hash(net, name);
  263. struct netdev_name_node *name_node;
  264. hlist_for_each_entry_rcu(name_node, head, hlist)
  265. if (!strcmp(name_node->name, name))
  266. return name_node;
  267. return NULL;
  268. }
  269. int netdev_name_node_alt_create(struct net_device *dev, const char *name)
  270. {
  271. struct netdev_name_node *name_node;
  272. struct net *net = dev_net(dev);
  273. name_node = netdev_name_node_lookup(net, name);
  274. if (name_node)
  275. return -EEXIST;
  276. name_node = netdev_name_node_alloc(dev, name);
  277. if (!name_node)
  278. return -ENOMEM;
  279. netdev_name_node_add(net, name_node);
  280. /* The node that holds dev->name acts as a head of per-device list. */
  281. list_add_tail(&name_node->list, &dev->name_node->list);
  282. return 0;
  283. }
  284. EXPORT_SYMBOL(netdev_name_node_alt_create);
  285. static void __netdev_name_node_alt_destroy(struct netdev_name_node *name_node)
  286. {
  287. list_del(&name_node->list);
  288. netdev_name_node_del(name_node);
  289. kfree(name_node->name);
  290. netdev_name_node_free(name_node);
  291. }
  292. int netdev_name_node_alt_destroy(struct net_device *dev, const char *name)
  293. {
  294. struct netdev_name_node *name_node;
  295. struct net *net = dev_net(dev);
  296. name_node = netdev_name_node_lookup(net, name);
  297. if (!name_node)
  298. return -ENOENT;
  299. /* lookup might have found our primary name or a name belonging
  300. * to another device.
  301. */
  302. if (name_node == dev->name_node || name_node->dev != dev)
  303. return -EINVAL;
  304. __netdev_name_node_alt_destroy(name_node);
  305. return 0;
  306. }
  307. EXPORT_SYMBOL(netdev_name_node_alt_destroy);
  308. static void netdev_name_node_alt_flush(struct net_device *dev)
  309. {
  310. struct netdev_name_node *name_node, *tmp;
  311. list_for_each_entry_safe(name_node, tmp, &dev->name_node->list, list)
  312. __netdev_name_node_alt_destroy(name_node);
  313. }
  314. /* Device list insertion */
  315. static void list_netdevice(struct net_device *dev)
  316. {
  317. struct net *net = dev_net(dev);
  318. ASSERT_RTNL();
  319. write_lock_bh(&dev_base_lock);
  320. list_add_tail_rcu(&dev->dev_list, &net->dev_base_head);
  321. netdev_name_node_add(net, dev->name_node);
  322. hlist_add_head_rcu(&dev->index_hlist,
  323. dev_index_hash(net, dev->ifindex));
  324. write_unlock_bh(&dev_base_lock);
  325. dev_base_seq_inc(net);
  326. }
  327. /* Device list removal
  328. * caller must respect a RCU grace period before freeing/reusing dev
  329. */
  330. static void unlist_netdevice(struct net_device *dev)
  331. {
  332. ASSERT_RTNL();
  333. /* Unlink dev from the device chain */
  334. write_lock_bh(&dev_base_lock);
  335. list_del_rcu(&dev->dev_list);
  336. netdev_name_node_del(dev->name_node);
  337. hlist_del_rcu(&dev->index_hlist);
  338. write_unlock_bh(&dev_base_lock);
  339. dev_base_seq_inc(dev_net(dev));
  340. }
  341. /*
  342. * Our notifier list
  343. */
  344. static RAW_NOTIFIER_HEAD(netdev_chain);
  345. /*
  346. * Device drivers call our routines to queue packets here. We empty the
  347. * queue in the local softnet handler.
  348. */
  349. DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
  350. EXPORT_PER_CPU_SYMBOL(softnet_data);
  351. /*******************************************************************************
  352. *
  353. * Protocol management and registration routines
  354. *
  355. *******************************************************************************/
  356. /*
  357. * Add a protocol ID to the list. Now that the input handler is
  358. * smarter we can dispense with all the messy stuff that used to be
  359. * here.
  360. *
  361. * BEWARE!!! Protocol handlers, mangling input packets,
  362. * MUST BE last in hash buckets and checking protocol handlers
  363. * MUST start from promiscuous ptype_all chain in net_bh.
  364. * It is true now, do not change it.
  365. * Explanation follows: if protocol handler, mangling packet, will
  366. * be the first on list, it is not able to sense, that packet
  367. * is cloned and should be copied-on-write, so that it will
  368. * change it and subsequent readers will get broken packet.
  369. * --ANK (980803)
  370. */
  371. static inline struct list_head *ptype_head(const struct packet_type *pt)
  372. {
  373. if (pt->type == htons(ETH_P_ALL))
  374. return pt->dev ? &pt->dev->ptype_all : &ptype_all;
  375. else
  376. return pt->dev ? &pt->dev->ptype_specific :
  377. &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
  378. }
  379. /**
  380. * dev_add_pack - add packet handler
  381. * @pt: packet type declaration
  382. *
  383. * Add a protocol handler to the networking stack. The passed &packet_type
  384. * is linked into kernel lists and may not be freed until it has been
  385. * removed from the kernel lists.
  386. *
  387. * This call does not sleep therefore it can not
  388. * guarantee all CPU's that are in middle of receiving packets
  389. * will see the new packet type (until the next received packet).
  390. */
  391. void dev_add_pack(struct packet_type *pt)
  392. {
  393. struct list_head *head = ptype_head(pt);
  394. spin_lock(&ptype_lock);
  395. list_add_rcu(&pt->list, head);
  396. spin_unlock(&ptype_lock);
  397. }
  398. EXPORT_SYMBOL(dev_add_pack);
  399. /**
  400. * __dev_remove_pack - remove packet handler
  401. * @pt: packet type declaration
  402. *
  403. * Remove a protocol handler that was previously added to the kernel
  404. * protocol handlers by dev_add_pack(). The passed &packet_type is removed
  405. * from the kernel lists and can be freed or reused once this function
  406. * returns.
  407. *
  408. * The packet type might still be in use by receivers
  409. * and must not be freed until after all the CPU's have gone
  410. * through a quiescent state.
  411. */
  412. void __dev_remove_pack(struct packet_type *pt)
  413. {
  414. struct list_head *head = ptype_head(pt);
  415. struct packet_type *pt1;
  416. spin_lock(&ptype_lock);
  417. list_for_each_entry(pt1, head, list) {
  418. if (pt == pt1) {
  419. list_del_rcu(&pt->list);
  420. goto out;
  421. }
  422. }
  423. pr_warn("dev_remove_pack: %p not found\n", pt);
  424. out:
  425. spin_unlock(&ptype_lock);
  426. }
  427. EXPORT_SYMBOL(__dev_remove_pack);
  428. /**
  429. * dev_remove_pack - remove packet handler
  430. * @pt: packet type declaration
  431. *
  432. * Remove a protocol handler that was previously added to the kernel
  433. * protocol handlers by dev_add_pack(). The passed &packet_type is removed
  434. * from the kernel lists and can be freed or reused once this function
  435. * returns.
  436. *
  437. * This call sleeps to guarantee that no CPU is looking at the packet
  438. * type after return.
  439. */
  440. void dev_remove_pack(struct packet_type *pt)
  441. {
  442. __dev_remove_pack(pt);
  443. synchronize_net();
  444. }
  445. EXPORT_SYMBOL(dev_remove_pack);
  446. /**
  447. * dev_add_offload - register offload handlers
  448. * @po: protocol offload declaration
  449. *
  450. * Add protocol offload handlers to the networking stack. The passed
  451. * &proto_offload is linked into kernel lists and may not be freed until
  452. * it has been removed from the kernel lists.
  453. *
  454. * This call does not sleep therefore it can not
  455. * guarantee all CPU's that are in middle of receiving packets
  456. * will see the new offload handlers (until the next received packet).
  457. */
  458. void dev_add_offload(struct packet_offload *po)
  459. {
  460. struct packet_offload *elem;
  461. spin_lock(&offload_lock);
  462. list_for_each_entry(elem, &offload_base, list) {
  463. if (po->priority < elem->priority)
  464. break;
  465. }
  466. list_add_rcu(&po->list, elem->list.prev);
  467. spin_unlock(&offload_lock);
  468. }
  469. EXPORT_SYMBOL(dev_add_offload);
  470. /**
  471. * __dev_remove_offload - remove offload handler
  472. * @po: packet offload declaration
  473. *
  474. * Remove a protocol offload handler that was previously added to the
  475. * kernel offload handlers by dev_add_offload(). The passed &offload_type
  476. * is removed from the kernel lists and can be freed or reused once this
  477. * function returns.
  478. *
  479. * The packet type might still be in use by receivers
  480. * and must not be freed until after all the CPU's have gone
  481. * through a quiescent state.
  482. */
  483. static void __dev_remove_offload(struct packet_offload *po)
  484. {
  485. struct list_head *head = &offload_base;
  486. struct packet_offload *po1;
  487. spin_lock(&offload_lock);
  488. list_for_each_entry(po1, head, list) {
  489. if (po == po1) {
  490. list_del_rcu(&po->list);
  491. goto out;
  492. }
  493. }
  494. pr_warn("dev_remove_offload: %p not found\n", po);
  495. out:
  496. spin_unlock(&offload_lock);
  497. }
  498. /**
  499. * dev_remove_offload - remove packet offload handler
  500. * @po: packet offload declaration
  501. *
  502. * Remove a packet offload handler that was previously added to the kernel
  503. * offload handlers by dev_add_offload(). The passed &offload_type is
  504. * removed from the kernel lists and can be freed or reused once this
  505. * function returns.
  506. *
  507. * This call sleeps to guarantee that no CPU is looking at the packet
  508. * type after return.
  509. */
  510. void dev_remove_offload(struct packet_offload *po)
  511. {
  512. __dev_remove_offload(po);
  513. synchronize_net();
  514. }
  515. EXPORT_SYMBOL(dev_remove_offload);
  516. /******************************************************************************
  517. *
  518. * Device Boot-time Settings Routines
  519. *
  520. ******************************************************************************/
  521. /* Boot time configuration table */
  522. static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
  523. /**
  524. * netdev_boot_setup_add - add new setup entry
  525. * @name: name of the device
  526. * @map: configured settings for the device
  527. *
  528. * Adds new setup entry to the dev_boot_setup list. The function
  529. * returns 0 on error and 1 on success. This is a generic routine to
  530. * all netdevices.
  531. */
  532. static int netdev_boot_setup_add(char *name, struct ifmap *map)
  533. {
  534. struct netdev_boot_setup *s;
  535. int i;
  536. s = dev_boot_setup;
  537. for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
  538. if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
  539. memset(s[i].name, 0, sizeof(s[i].name));
  540. strlcpy(s[i].name, name, IFNAMSIZ);
  541. memcpy(&s[i].map, map, sizeof(s[i].map));
  542. break;
  543. }
  544. }
  545. return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
  546. }
  547. /**
  548. * netdev_boot_setup_check - check boot time settings
  549. * @dev: the netdevice
  550. *
  551. * Check boot time settings for the device.
  552. * The found settings are set for the device to be used
  553. * later in the device probing.
  554. * Returns 0 if no settings found, 1 if they are.
  555. */
  556. int netdev_boot_setup_check(struct net_device *dev)
  557. {
  558. struct netdev_boot_setup *s = dev_boot_setup;
  559. int i;
  560. for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
  561. if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
  562. !strcmp(dev->name, s[i].name)) {
  563. dev->irq = s[i].map.irq;
  564. dev->base_addr = s[i].map.base_addr;
  565. dev->mem_start = s[i].map.mem_start;
  566. dev->mem_end = s[i].map.mem_end;
  567. return 1;
  568. }
  569. }
  570. return 0;
  571. }
  572. EXPORT_SYMBOL(netdev_boot_setup_check);
  573. /**
  574. * netdev_boot_base - get address from boot time settings
  575. * @prefix: prefix for network device
  576. * @unit: id for network device
  577. *
  578. * Check boot time settings for the base address of device.
  579. * The found settings are set for the device to be used
  580. * later in the device probing.
  581. * Returns 0 if no settings found.
  582. */
  583. unsigned long netdev_boot_base(const char *prefix, int unit)
  584. {
  585. const struct netdev_boot_setup *s = dev_boot_setup;
  586. char name[IFNAMSIZ];
  587. int i;
  588. sprintf(name, "%s%d", prefix, unit);
  589. /*
  590. * If device already registered then return base of 1
  591. * to indicate not to probe for this interface
  592. */
  593. if (__dev_get_by_name(&init_net, name))
  594. return 1;
  595. for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
  596. if (!strcmp(name, s[i].name))
  597. return s[i].map.base_addr;
  598. return 0;
  599. }
  600. /*
  601. * Saves at boot time configured settings for any netdevice.
  602. */
  603. int __init netdev_boot_setup(char *str)
  604. {
  605. int ints[5];
  606. struct ifmap map;
  607. str = get_options(str, ARRAY_SIZE(ints), ints);
  608. if (!str || !*str)
  609. return 0;
  610. /* Save settings */
  611. memset(&map, 0, sizeof(map));
  612. if (ints[0] > 0)
  613. map.irq = ints[1];
  614. if (ints[0] > 1)
  615. map.base_addr = ints[2];
  616. if (ints[0] > 2)
  617. map.mem_start = ints[3];
  618. if (ints[0] > 3)
  619. map.mem_end = ints[4];
  620. /* Add new entry to the list */
  621. return netdev_boot_setup_add(str, &map);
  622. }
  623. __setup("netdev=", netdev_boot_setup);
  624. /*******************************************************************************
  625. *
  626. * Device Interface Subroutines
  627. *
  628. *******************************************************************************/
  629. /**
  630. * dev_get_iflink - get 'iflink' value of a interface
  631. * @dev: targeted interface
  632. *
  633. * Indicates the ifindex the interface is linked to.
  634. * Physical interfaces have the same 'ifindex' and 'iflink' values.
  635. */
  636. int dev_get_iflink(const struct net_device *dev)
  637. {
  638. if (dev->netdev_ops && dev->netdev_ops->ndo_get_iflink)
  639. return dev->netdev_ops->ndo_get_iflink(dev);
  640. return dev->ifindex;
  641. }
  642. EXPORT_SYMBOL(dev_get_iflink);
  643. /**
  644. * dev_fill_metadata_dst - Retrieve tunnel egress information.
  645. * @dev: targeted interface
  646. * @skb: The packet.
  647. *
  648. * For better visibility of tunnel traffic OVS needs to retrieve
  649. * egress tunnel information for a packet. Following API allows
  650. * user to get this info.
  651. */
  652. int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
  653. {
  654. struct ip_tunnel_info *info;
  655. if (!dev->netdev_ops || !dev->netdev_ops->ndo_fill_metadata_dst)
  656. return -EINVAL;
  657. info = skb_tunnel_info_unclone(skb);
  658. if (!info)
  659. return -ENOMEM;
  660. if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
  661. return -EINVAL;
  662. return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
  663. }
  664. EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
  665. /**
  666. * __dev_get_by_name - find a device by its name
  667. * @net: the applicable net namespace
  668. * @name: name to find
  669. *
  670. * Find an interface by name. Must be called under RTNL semaphore
  671. * or @dev_base_lock. If the name is found a pointer to the device
  672. * is returned. If the name is not found then %NULL is returned. The
  673. * reference counters are not incremented so the caller must be
  674. * careful with locks.
  675. */
  676. struct net_device *__dev_get_by_name(struct net *net, const char *name)
  677. {
  678. struct netdev_name_node *node_name;
  679. node_name = netdev_name_node_lookup(net, name);
  680. return node_name ? node_name->dev : NULL;
  681. }
  682. EXPORT_SYMBOL(__dev_get_by_name);
  683. /**
  684. * dev_get_by_name_rcu - find a device by its name
  685. * @net: the applicable net namespace
  686. * @name: name to find
  687. *
  688. * Find an interface by name.
  689. * If the name is found a pointer to the device is returned.
  690. * If the name is not found then %NULL is returned.
  691. * The reference counters are not incremented so the caller must be
  692. * careful with locks. The caller must hold RCU lock.
  693. */
  694. struct net_device *dev_get_by_name_rcu(struct net *net, const char *name)
  695. {
  696. struct netdev_name_node *node_name;
  697. node_name = netdev_name_node_lookup_rcu(net, name);
  698. return node_name ? node_name->dev : NULL;
  699. }
  700. EXPORT_SYMBOL(dev_get_by_name_rcu);
  701. /**
  702. * dev_get_by_name - find a device by its name
  703. * @net: the applicable net namespace
  704. * @name: name to find
  705. *
  706. * Find an interface by name. This can be called from any
  707. * context and does its own locking. The returned handle has
  708. * the usage count incremented and the caller must use dev_put() to
  709. * release it when it is no longer needed. %NULL is returned if no
  710. * matching device is found.
  711. */
  712. struct net_device *dev_get_by_name(struct net *net, const char *name)
  713. {
  714. struct net_device *dev;
  715. rcu_read_lock();
  716. dev = dev_get_by_name_rcu(net, name);
  717. if (dev)
  718. dev_hold(dev);
  719. rcu_read_unlock();
  720. return dev;
  721. }
  722. EXPORT_SYMBOL(dev_get_by_name);
  723. /**
  724. * __dev_get_by_index - find a device by its ifindex
  725. * @net: the applicable net namespace
  726. * @ifindex: index of device
  727. *
  728. * Search for an interface by index. Returns %NULL if the device
  729. * is not found or a pointer to the device. The device has not
  730. * had its reference counter increased so the caller must be careful
  731. * about locking. The caller must hold either the RTNL semaphore
  732. * or @dev_base_lock.
  733. */
  734. struct net_device *__dev_get_by_index(struct net *net, int ifindex)
  735. {
  736. struct net_device *dev;
  737. struct hlist_head *head = dev_index_hash(net, ifindex);
  738. hlist_for_each_entry(dev, head, index_hlist)
  739. if (dev->ifindex == ifindex)
  740. return dev;
  741. return NULL;
  742. }
  743. EXPORT_SYMBOL(__dev_get_by_index);
  744. /**
  745. * dev_get_by_index_rcu - find a device by its ifindex
  746. * @net: the applicable net namespace
  747. * @ifindex: index of device
  748. *
  749. * Search for an interface by index. Returns %NULL if the device
  750. * is not found or a pointer to the device. The device has not
  751. * had its reference counter increased so the caller must be careful
  752. * about locking. The caller must hold RCU lock.
  753. */
  754. struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex)
  755. {
  756. struct net_device *dev;
  757. struct hlist_head *head = dev_index_hash(net, ifindex);
  758. hlist_for_each_entry_rcu(dev, head, index_hlist)
  759. if (dev->ifindex == ifindex)
  760. return dev;
  761. return NULL;
  762. }
  763. EXPORT_SYMBOL(dev_get_by_index_rcu);
  764. /**
  765. * dev_get_by_index - find a device by its ifindex
  766. * @net: the applicable net namespace
  767. * @ifindex: index of device
  768. *
  769. * Search for an interface by index. Returns NULL if the device
  770. * is not found or a pointer to the device. The device returned has
  771. * had a reference added and the pointer is safe until the user calls
  772. * dev_put to indicate they have finished with it.
  773. */
  774. struct net_device *dev_get_by_index(struct net *net, int ifindex)
  775. {
  776. struct net_device *dev;
  777. rcu_read_lock();
  778. dev = dev_get_by_index_rcu(net, ifindex);
  779. if (dev)
  780. dev_hold(dev);
  781. rcu_read_unlock();
  782. return dev;
  783. }
  784. EXPORT_SYMBOL(dev_get_by_index);
  785. /**
  786. * dev_get_by_napi_id - find a device by napi_id
  787. * @napi_id: ID of the NAPI struct
  788. *
  789. * Search for an interface by NAPI ID. Returns %NULL if the device
  790. * is not found or a pointer to the device. The device has not had
  791. * its reference counter increased so the caller must be careful
  792. * about locking. The caller must hold RCU lock.
  793. */
  794. struct net_device *dev_get_by_napi_id(unsigned int napi_id)
  795. {
  796. struct napi_struct *napi;
  797. WARN_ON_ONCE(!rcu_read_lock_held());
  798. if (napi_id < MIN_NAPI_ID)
  799. return NULL;
  800. napi = napi_by_id(napi_id);
  801. return napi ? napi->dev : NULL;
  802. }
  803. EXPORT_SYMBOL(dev_get_by_napi_id);
  804. /**
  805. * netdev_get_name - get a netdevice name, knowing its ifindex.
  806. * @net: network namespace
  807. * @name: a pointer to the buffer where the name will be stored.
  808. * @ifindex: the ifindex of the interface to get the name from.
  809. *
  810. * The use of raw_seqcount_begin() and cond_resched() before
  811. * retrying is required as we want to give the writers a chance
  812. * to complete when CONFIG_PREEMPTION is not set.
  813. */
  814. int netdev_get_name(struct net *net, char *name, int ifindex)
  815. {
  816. struct net_device *dev;
  817. unsigned int seq;
  818. retry:
  819. seq = raw_seqcount_begin(&devnet_rename_seq);
  820. rcu_read_lock();
  821. dev = dev_get_by_index_rcu(net, ifindex);
  822. if (!dev) {
  823. rcu_read_unlock();
  824. return -ENODEV;
  825. }
  826. strcpy(name, dev->name);
  827. rcu_read_unlock();
  828. if (read_seqcount_retry(&devnet_rename_seq, seq)) {
  829. cond_resched();
  830. goto retry;
  831. }
  832. return 0;
  833. }
  834. /**
  835. * dev_getbyhwaddr_rcu - find a device by its hardware address
  836. * @net: the applicable net namespace
  837. * @type: media type of device
  838. * @ha: hardware address
  839. *
  840. * Search for an interface by MAC address. Returns NULL if the device
  841. * is not found or a pointer to the device.
  842. * The caller must hold RCU or RTNL.
  843. * The returned device has not had its ref count increased
  844. * and the caller must therefore be careful about locking
  845. *
  846. */
  847. struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
  848. const char *ha)
  849. {
  850. struct net_device *dev;
  851. for_each_netdev_rcu(net, dev)
  852. if (dev->type == type &&
  853. !memcmp(dev->dev_addr, ha, dev->addr_len))
  854. return dev;
  855. return NULL;
  856. }
  857. EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
  858. struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
  859. {
  860. struct net_device *dev;
  861. ASSERT_RTNL();
  862. for_each_netdev(net, dev)
  863. if (dev->type == type)
  864. return dev;
  865. return NULL;
  866. }
  867. EXPORT_SYMBOL(__dev_getfirstbyhwtype);
  868. struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
  869. {
  870. struct net_device *dev, *ret = NULL;
  871. rcu_read_lock();
  872. for_each_netdev_rcu(net, dev)
  873. if (dev->type == type) {
  874. dev_hold(dev);
  875. ret = dev;
  876. break;
  877. }
  878. rcu_read_unlock();
  879. return ret;
  880. }
  881. EXPORT_SYMBOL(dev_getfirstbyhwtype);
  882. /**
  883. * __dev_get_by_flags - find any device with given flags
  884. * @net: the applicable net namespace
  885. * @if_flags: IFF_* values
  886. * @mask: bitmask of bits in if_flags to check
  887. *
  888. * Search for any interface with the given flags. Returns NULL if a device
  889. * is not found or a pointer to the device. Must be called inside
  890. * rtnl_lock(), and result refcount is unchanged.
  891. */
  892. struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
  893. unsigned short mask)
  894. {
  895. struct net_device *dev, *ret;
  896. ASSERT_RTNL();
  897. ret = NULL;
  898. for_each_netdev(net, dev) {
  899. if (((dev->flags ^ if_flags) & mask) == 0) {
  900. ret = dev;
  901. break;
  902. }
  903. }
  904. return ret;
  905. }
  906. EXPORT_SYMBOL(__dev_get_by_flags);
  907. /**
  908. * dev_valid_name - check if name is okay for network device
  909. * @name: name string
  910. *
  911. * Network device names need to be valid file names to
  912. * to allow sysfs to work. We also disallow any kind of
  913. * whitespace.
  914. */
  915. bool dev_valid_name(const char *name)
  916. {
  917. if (*name == '\0')
  918. return false;
  919. if (strnlen(name, IFNAMSIZ) == IFNAMSIZ)
  920. return false;
  921. if (!strcmp(name, ".") || !strcmp(name, ".."))
  922. return false;
  923. while (*name) {
  924. if (*name == '/' || *name == ':' || isspace(*name))
  925. return false;
  926. name++;
  927. }
  928. return true;
  929. }
  930. EXPORT_SYMBOL(dev_valid_name);
  931. /**
  932. * __dev_alloc_name - allocate a name for a device
  933. * @net: network namespace to allocate the device name in
  934. * @name: name format string
  935. * @buf: scratch buffer and result name string
  936. *
  937. * Passed a format string - eg "lt%d" it will try and find a suitable
  938. * id. It scans list of devices to build up a free map, then chooses
  939. * the first empty slot. The caller must hold the dev_base or rtnl lock
  940. * while allocating the name and adding the device in order to avoid
  941. * duplicates.
  942. * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
  943. * Returns the number of the unit assigned or a negative errno code.
  944. */
  945. static int __dev_alloc_name(struct net *net, const char *name, char *buf)
  946. {
  947. int i = 0;
  948. const char *p;
  949. const int max_netdevices = 8*PAGE_SIZE;
  950. unsigned long *inuse;
  951. struct net_device *d;
  952. if (!dev_valid_name(name))
  953. return -EINVAL;
  954. p = strchr(name, '%');
  955. if (p) {
  956. /*
  957. * Verify the string as this thing may have come from
  958. * the user. There must be either one "%d" and no other "%"
  959. * characters.
  960. */
  961. if (p[1] != 'd' || strchr(p + 2, '%'))
  962. return -EINVAL;
  963. /* Use one page as a bit array of possible slots */
  964. inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
  965. if (!inuse)
  966. return -ENOMEM;
  967. for_each_netdev(net, d) {
  968. if (!sscanf(d->name, name, &i))
  969. continue;
  970. if (i < 0 || i >= max_netdevices)
  971. continue;
  972. /* avoid cases where sscanf is not exact inverse of printf */
  973. snprintf(buf, IFNAMSIZ, name, i);
  974. if (!strncmp(buf, d->name, IFNAMSIZ))
  975. set_bit(i, inuse);
  976. }
  977. i = find_first_zero_bit(inuse, max_netdevices);
  978. free_page((unsigned long) inuse);
  979. }
  980. snprintf(buf, IFNAMSIZ, name, i);
  981. if (!__dev_get_by_name(net, buf))
  982. return i;
  983. /* It is possible to run out of possible slots
  984. * when the name is long and there isn't enough space left
  985. * for the digits, or if all bits are used.
  986. */
  987. return -ENFILE;
  988. }
  989. static int dev_alloc_name_ns(struct net *net,
  990. struct net_device *dev,
  991. const char *name)
  992. {
  993. char buf[IFNAMSIZ];
  994. int ret;
  995. BUG_ON(!net);
  996. ret = __dev_alloc_name(net, name, buf);
  997. if (ret >= 0)
  998. strlcpy(dev->name, buf, IFNAMSIZ);
  999. return ret;
  1000. }
  1001. /**
  1002. * dev_alloc_name - allocate a name for a device
  1003. * @dev: device
  1004. * @name: name format string
  1005. *
  1006. * Passed a format string - eg "lt%d" it will try and find a suitable
  1007. * id. It scans list of devices to build up a free map, then chooses
  1008. * the first empty slot. The caller must hold the dev_base or rtnl lock
  1009. * while allocating the name and adding the device in order to avoid
  1010. * duplicates.
  1011. * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
  1012. * Returns the number of the unit assigned or a negative errno code.
  1013. */
  1014. int dev_alloc_name(struct net_device *dev, const char *name)
  1015. {
  1016. return dev_alloc_name_ns(dev_net(dev), dev, name);
  1017. }
  1018. EXPORT_SYMBOL(dev_alloc_name);
  1019. static int dev_get_valid_name(struct net *net, struct net_device *dev,
  1020. const char *name)
  1021. {
  1022. BUG_ON(!net);
  1023. if (!dev_valid_name(name))
  1024. return -EINVAL;
  1025. if (strchr(name, '%'))
  1026. return dev_alloc_name_ns(net, dev, name);
  1027. else if (__dev_get_by_name(net, name))
  1028. return -EEXIST;
  1029. else if (dev->name != name)
  1030. strlcpy(dev->name, name, IFNAMSIZ);
  1031. return 0;
  1032. }
  1033. /**
  1034. * dev_change_name - change name of a device
  1035. * @dev: device
  1036. * @newname: name (or format string) must be at least IFNAMSIZ
  1037. *
  1038. * Change name of a device, can pass format strings "eth%d".
  1039. * for wildcarding.
  1040. */
  1041. int dev_change_name(struct net_device *dev, const char *newname)
  1042. {
  1043. unsigned char old_assign_type;
  1044. char oldname[IFNAMSIZ];
  1045. int err = 0;
  1046. int ret;
  1047. struct net *net;
  1048. ASSERT_RTNL();
  1049. BUG_ON(!dev_net(dev));
  1050. net = dev_net(dev);
  1051. /* Some auto-enslaved devices e.g. failover slaves are
  1052. * special, as userspace might rename the device after
  1053. * the interface had been brought up and running since
  1054. * the point kernel initiated auto-enslavement. Allow
  1055. * live name change even when these slave devices are
  1056. * up and running.
  1057. *
  1058. * Typically, users of these auto-enslaving devices
  1059. * don't actually care about slave name change, as
  1060. * they are supposed to operate on master interface
  1061. * directly.
  1062. */
  1063. if (dev->flags & IFF_UP &&
  1064. likely(!(dev->priv_flags & IFF_LIVE_RENAME_OK)))
  1065. return -EBUSY;
  1066. write_seqcount_begin(&devnet_rename_seq);
  1067. if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
  1068. write_seqcount_end(&devnet_rename_seq);
  1069. return 0;
  1070. }
  1071. memcpy(oldname, dev->name, IFNAMSIZ);
  1072. err = dev_get_valid_name(net, dev, newname);
  1073. if (err < 0) {
  1074. write_seqcount_end(&devnet_rename_seq);
  1075. return err;
  1076. }
  1077. if (oldname[0] && !strchr(oldname, '%'))
  1078. netdev_info(dev, "renamed from %s\n", oldname);
  1079. old_assign_type = dev->name_assign_type;
  1080. dev->name_assign_type = NET_NAME_RENAMED;
  1081. rollback:
  1082. ret = device_rename(&dev->dev, dev->name);
  1083. if (ret) {
  1084. memcpy(dev->name, oldname, IFNAMSIZ);
  1085. dev->name_assign_type = old_assign_type;
  1086. write_seqcount_end(&devnet_rename_seq);
  1087. return ret;
  1088. }
  1089. write_seqcount_end(&devnet_rename_seq);
  1090. netdev_adjacent_rename_links(dev, oldname);
  1091. write_lock_bh(&dev_base_lock);
  1092. netdev_name_node_del(dev->name_node);
  1093. write_unlock_bh(&dev_base_lock);
  1094. synchronize_rcu();
  1095. write_lock_bh(&dev_base_lock);
  1096. netdev_name_node_add(net, dev->name_node);
  1097. write_unlock_bh(&dev_base_lock);
  1098. ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
  1099. ret = notifier_to_errno(ret);
  1100. if (ret) {
  1101. /* err >= 0 after dev_alloc_name() or stores the first errno */
  1102. if (err >= 0) {
  1103. err = ret;
  1104. write_seqcount_begin(&devnet_rename_seq);
  1105. memcpy(dev->name, oldname, IFNAMSIZ);
  1106. memcpy(oldname, newname, IFNAMSIZ);
  1107. dev->name_assign_type = old_assign_type;
  1108. old_assign_type = NET_NAME_RENAMED;
  1109. goto rollback;
  1110. } else {
  1111. pr_err("%s: name change rollback failed: %d\n",
  1112. dev->name, ret);
  1113. }
  1114. }
  1115. return err;
  1116. }
  1117. /**
  1118. * dev_set_alias - change ifalias of a device
  1119. * @dev: device
  1120. * @alias: name up to IFALIASZ
  1121. * @len: limit of bytes to copy from info
  1122. *
  1123. * Set ifalias for a device,
  1124. */
  1125. int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
  1126. {
  1127. struct dev_ifalias *new_alias = NULL;
  1128. if (len >= IFALIASZ)
  1129. return -EINVAL;
  1130. if (len) {
  1131. new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL);
  1132. if (!new_alias)
  1133. return -ENOMEM;
  1134. memcpy(new_alias->ifalias, alias, len);
  1135. new_alias->ifalias[len] = 0;
  1136. }
  1137. mutex_lock(&ifalias_mutex);
  1138. new_alias = rcu_replace_pointer(dev->ifalias, new_alias,
  1139. mutex_is_locked(&ifalias_mutex));
  1140. mutex_unlock(&ifalias_mutex);
  1141. if (new_alias)
  1142. kfree_rcu(new_alias, rcuhead);
  1143. return len;
  1144. }
  1145. EXPORT_SYMBOL(dev_set_alias);
  1146. /**
  1147. * dev_get_alias - get ifalias of a device
  1148. * @dev: device
  1149. * @name: buffer to store name of ifalias
  1150. * @len: size of buffer
  1151. *
  1152. * get ifalias for a device. Caller must make sure dev cannot go
  1153. * away, e.g. rcu read lock or own a reference count to device.
  1154. */
  1155. int dev_get_alias(const struct net_device *dev, char *name, size_t len)
  1156. {
  1157. const struct dev_ifalias *alias;
  1158. int ret = 0;
  1159. rcu_read_lock();
  1160. alias = rcu_dereference(dev->ifalias);
  1161. if (alias)
  1162. ret = snprintf(name, len, "%s", alias->ifalias);
  1163. rcu_read_unlock();
  1164. return ret;
  1165. }
  1166. /**
  1167. * netdev_features_change - device changes features
  1168. * @dev: device to cause notification
  1169. *
  1170. * Called to indicate a device has changed features.
  1171. */
  1172. void netdev_features_change(struct net_device *dev)
  1173. {
  1174. call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
  1175. }
  1176. EXPORT_SYMBOL(netdev_features_change);
  1177. /**
  1178. * netdev_state_change - device changes state
  1179. * @dev: device to cause notification
  1180. *
  1181. * Called to indicate a device has changed state. This function calls
  1182. * the notifier chains for netdev_chain and sends a NEWLINK message
  1183. * to the routing socket.
  1184. */
  1185. void netdev_state_change(struct net_device *dev)
  1186. {
  1187. if (dev->flags & IFF_UP) {
  1188. struct netdev_notifier_change_info change_info = {
  1189. .info.dev = dev,
  1190. };
  1191. call_netdevice_notifiers_info(NETDEV_CHANGE,
  1192. &change_info.info);
  1193. rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
  1194. }
  1195. }
  1196. EXPORT_SYMBOL(netdev_state_change);
  1197. /**
  1198. * netdev_notify_peers - notify network peers about existence of @dev
  1199. * @dev: network device
  1200. *
  1201. * Generate traffic such that interested network peers are aware of
  1202. * @dev, such as by generating a gratuitous ARP. This may be used when
  1203. * a device wants to inform the rest of the network about some sort of
  1204. * reconfiguration such as a failover event or virtual machine
  1205. * migration.
  1206. */
  1207. void netdev_notify_peers(struct net_device *dev)
  1208. {
  1209. rtnl_lock();
  1210. call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
  1211. call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev);
  1212. rtnl_unlock();
  1213. }
  1214. EXPORT_SYMBOL(netdev_notify_peers);
  1215. static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
  1216. {
  1217. const struct net_device_ops *ops = dev->netdev_ops;
  1218. int ret;
  1219. ASSERT_RTNL();
  1220. if (!netif_device_present(dev))
  1221. return -ENODEV;
  1222. /* Block netpoll from trying to do any rx path servicing.
  1223. * If we don't do this there is a chance ndo_poll_controller
  1224. * or ndo_poll may be running while we open the device
  1225. */
  1226. netpoll_poll_disable(dev);
  1227. ret = call_netdevice_notifiers_extack(NETDEV_PRE_UP, dev, extack);
  1228. ret = notifier_to_errno(ret);
  1229. if (ret)
  1230. return ret;
  1231. set_bit(__LINK_STATE_START, &dev->state);
  1232. if (ops->ndo_validate_addr)
  1233. ret = ops->ndo_validate_addr(dev);
  1234. if (!ret && ops->ndo_open)
  1235. ret = ops->ndo_open(dev);
  1236. netpoll_poll_enable(dev);
  1237. if (ret)
  1238. clear_bit(__LINK_STATE_START, &dev->state);
  1239. else {
  1240. dev->flags |= IFF_UP;
  1241. dev_set_rx_mode(dev);
  1242. dev_activate(dev);
  1243. add_device_randomness(dev->dev_addr, dev->addr_len);
  1244. }
  1245. return ret;
  1246. }
  1247. /**
  1248. * dev_open - prepare an interface for use.
  1249. * @dev: device to open
  1250. * @extack: netlink extended ack
  1251. *
  1252. * Takes a device from down to up state. The device's private open
  1253. * function is invoked and then the multicast lists are loaded. Finally
  1254. * the device is moved into the up state and a %NETDEV_UP message is
  1255. * sent to the netdev notifier chain.
  1256. *
  1257. * Calling this function on an active interface is a nop. On a failure
  1258. * a negative errno code is returned.
  1259. */
  1260. int dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
  1261. {
  1262. int ret;
  1263. if (dev->flags & IFF_UP)
  1264. return 0;
  1265. ret = __dev_open(dev, extack);
  1266. if (ret < 0)
  1267. return ret;
  1268. rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
  1269. call_netdevice_notifiers(NETDEV_UP, dev);
  1270. return ret;
  1271. }
  1272. EXPORT_SYMBOL(dev_open);
  1273. static void __dev_close_many(struct list_head *head)
  1274. {
  1275. struct net_device *dev;
  1276. ASSERT_RTNL();
  1277. might_sleep();
  1278. list_for_each_entry(dev, head, close_list) {
  1279. /* Temporarily disable netpoll until the interface is down */
  1280. netpoll_poll_disable(dev);
  1281. call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
  1282. clear_bit(__LINK_STATE_START, &dev->state);
  1283. /* Synchronize to scheduled poll. We cannot touch poll list, it
  1284. * can be even on different cpu. So just clear netif_running().
  1285. *
  1286. * dev->stop() will invoke napi_disable() on all of it's
  1287. * napi_struct instances on this device.
  1288. */
  1289. smp_mb__after_atomic(); /* Commit netif_running(). */
  1290. }
  1291. dev_deactivate_many(head);
  1292. list_for_each_entry(dev, head, close_list) {
  1293. const struct net_device_ops *ops = dev->netdev_ops;
  1294. /*
  1295. * Call the device specific close. This cannot fail.
  1296. * Only if device is UP
  1297. *
  1298. * We allow it to be called even after a DETACH hot-plug
  1299. * event.
  1300. */
  1301. if (ops->ndo_stop)
  1302. ops->ndo_stop(dev);
  1303. dev->flags &= ~IFF_UP;
  1304. netpoll_poll_enable(dev);
  1305. }
  1306. }
  1307. static void __dev_close(struct net_device *dev)
  1308. {
  1309. LIST_HEAD(single);
  1310. list_add(&dev->close_list, &single);
  1311. __dev_close_many(&single);
  1312. list_del(&single);
  1313. }
  1314. void dev_close_many(struct list_head *head, bool unlink)
  1315. {
  1316. struct net_device *dev, *tmp;
  1317. /* Remove the devices that don't need to be closed */
  1318. list_for_each_entry_safe(dev, tmp, head, close_list)
  1319. if (!(dev->flags & IFF_UP))
  1320. list_del_init(&dev->close_list);
  1321. __dev_close_many(head);
  1322. list_for_each_entry_safe(dev, tmp, head, close_list) {
  1323. rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING, GFP_KERNEL);
  1324. call_netdevice_notifiers(NETDEV_DOWN, dev);
  1325. if (unlink)
  1326. list_del_init(&dev->close_list);
  1327. }
  1328. }
  1329. EXPORT_SYMBOL(dev_close_many);
  1330. /**
  1331. * dev_close - shutdown an interface.
  1332. * @dev: device to shutdown
  1333. *
  1334. * This function moves an active device into down state. A
  1335. * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
  1336. * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
  1337. * chain.
  1338. */
  1339. void dev_close(struct net_device *dev)
  1340. {
  1341. if (dev->flags & IFF_UP) {
  1342. LIST_HEAD(single);
  1343. list_add(&dev->close_list, &single);
  1344. dev_close_many(&single, true);
  1345. list_del(&single);
  1346. }
  1347. }
  1348. EXPORT_SYMBOL(dev_close);
  1349. /**
  1350. * dev_disable_lro - disable Large Receive Offload on a device
  1351. * @dev: device
  1352. *
  1353. * Disable Large Receive Offload (LRO) on a net device. Must be
  1354. * called under RTNL. This is needed if received packets may be
  1355. * forwarded to another interface.
  1356. */
  1357. void dev_disable_lro(struct net_device *dev)
  1358. {
  1359. struct net_device *lower_dev;
  1360. struct list_head *iter;
  1361. dev->wanted_features &= ~NETIF_F_LRO;
  1362. netdev_update_features(dev);
  1363. if (unlikely(dev->features & NETIF_F_LRO))
  1364. netdev_WARN(dev, "failed to disable LRO!\n");
  1365. netdev_for_each_lower_dev(dev, lower_dev, iter)
  1366. dev_disable_lro(lower_dev);
  1367. }
  1368. EXPORT_SYMBOL(dev_disable_lro);
  1369. /**
  1370. * dev_disable_gro_hw - disable HW Generic Receive Offload on a device
  1371. * @dev: device
  1372. *
  1373. * Disable HW Generic Receive Offload (GRO_HW) on a net device. Must be
  1374. * called under RTNL. This is needed if Generic XDP is installed on
  1375. * the device.
  1376. */
  1377. static void dev_disable_gro_hw(struct net_device *dev)
  1378. {
  1379. dev->wanted_features &= ~NETIF_F_GRO_HW;
  1380. netdev_update_features(dev);
  1381. if (unlikely(dev->features & NETIF_F_GRO_HW))
  1382. netdev_WARN(dev, "failed to disable GRO_HW!\n");
  1383. }
  1384. const char *netdev_cmd_to_name(enum netdev_cmd cmd)
  1385. {
  1386. #define N(val) \
  1387. case NETDEV_##val: \
  1388. return "NETDEV_" __stringify(val);
  1389. switch (cmd) {
  1390. N(UP) N(DOWN) N(REBOOT) N(CHANGE) N(REGISTER) N(UNREGISTER)
  1391. N(CHANGEMTU) N(CHANGEADDR) N(GOING_DOWN) N(CHANGENAME) N(FEAT_CHANGE)
  1392. N(BONDING_FAILOVER) N(PRE_UP) N(PRE_TYPE_CHANGE) N(POST_TYPE_CHANGE)
  1393. N(POST_INIT) N(RELEASE) N(NOTIFY_PEERS) N(JOIN) N(CHANGEUPPER)
  1394. N(RESEND_IGMP) N(PRECHANGEMTU) N(CHANGEINFODATA) N(BONDING_INFO)
  1395. N(PRECHANGEUPPER) N(CHANGELOWERSTATE) N(UDP_TUNNEL_PUSH_INFO)
  1396. N(UDP_TUNNEL_DROP_INFO) N(CHANGE_TX_QUEUE_LEN)
  1397. N(CVLAN_FILTER_PUSH_INFO) N(CVLAN_FILTER_DROP_INFO)
  1398. N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
  1399. N(PRE_CHANGEADDR)
  1400. }
  1401. #undef N
  1402. return "UNKNOWN_NETDEV_EVENT";
  1403. }
  1404. EXPORT_SYMBOL_GPL(netdev_cmd_to_name);
  1405. static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val,
  1406. struct net_device *dev)
  1407. {
  1408. struct netdev_notifier_info info = {
  1409. .dev = dev,
  1410. };
  1411. return nb->notifier_call(nb, val, &info);
  1412. }
  1413. static int call_netdevice_register_notifiers(struct notifier_block *nb,
  1414. struct net_device *dev)
  1415. {
  1416. int err;
  1417. err = call_netdevice_notifier(nb, NETDEV_REGISTER, dev);
  1418. err = notifier_to_errno(err);
  1419. if (err)
  1420. return err;
  1421. if (!(dev->flags & IFF_UP))
  1422. return 0;
  1423. call_netdevice_notifier(nb, NETDEV_UP, dev);
  1424. return 0;
  1425. }
  1426. static void call_netdevice_unregister_notifiers(struct notifier_block *nb,
  1427. struct net_device *dev)
  1428. {
  1429. if (dev->flags & IFF_UP) {
  1430. call_netdevice_notifier(nb, NETDEV_GOING_DOWN,
  1431. dev);
  1432. call_netdevice_notifier(nb, NETDEV_DOWN, dev);
  1433. }
  1434. call_netdevice_notifier(nb, NETDEV_UNREGISTER, dev);
  1435. }
  1436. static int call_netdevice_register_net_notifiers(struct notifier_block *nb,
  1437. struct net *net)
  1438. {
  1439. struct net_device *dev;
  1440. int err;
  1441. for_each_netdev(net, dev) {
  1442. err = call_netdevice_register_notifiers(nb, dev);
  1443. if (err)
  1444. goto rollback;
  1445. }
  1446. return 0;
  1447. rollback:
  1448. for_each_netdev_continue_reverse(net, dev)
  1449. call_netdevice_unregister_notifiers(nb, dev);
  1450. return err;
  1451. }
  1452. static void call_netdevice_unregister_net_notifiers(struct notifier_block *nb,
  1453. struct net *net)
  1454. {
  1455. struct net_device *dev;
  1456. for_each_netdev(net, dev)
  1457. call_netdevice_unregister_notifiers(nb, dev);
  1458. }
  1459. static int dev_boot_phase = 1;
  1460. /**
  1461. * register_netdevice_notifier - register a network notifier block
  1462. * @nb: notifier
  1463. *
  1464. * Register a notifier to be called when network device events occur.
  1465. * The notifier passed is linked into the kernel structures and must
  1466. * not be reused until it has been unregistered. A negative errno code
  1467. * is returned on a failure.
  1468. *
  1469. * When registered all registration and up events are replayed
  1470. * to the new notifier to allow device to have a race free
  1471. * view of the network device list.
  1472. */
  1473. int register_netdevice_notifier(struct notifier_block *nb)
  1474. {
  1475. struct net *net;
  1476. int err;
  1477. /* Close race with setup_net() and cleanup_net() */
  1478. down_write(&pernet_ops_rwsem);
  1479. rtnl_lock();
  1480. err = raw_notifier_chain_register(&netdev_chain, nb);
  1481. if (err)
  1482. goto unlock;
  1483. if (dev_boot_phase)
  1484. goto unlock;
  1485. for_each_net(net) {
  1486. err = call_netdevice_register_net_notifiers(nb, net);
  1487. if (err)
  1488. goto rollback;
  1489. }
  1490. unlock:
  1491. rtnl_unlock();
  1492. up_write(&pernet_ops_rwsem);
  1493. return err;
  1494. rollback:
  1495. for_each_net_continue_reverse(net)
  1496. call_netdevice_unregister_net_notifiers(nb, net);
  1497. raw_notifier_chain_unregister(&netdev_chain, nb);
  1498. goto unlock;
  1499. }
  1500. EXPORT_SYMBOL(register_netdevice_notifier);
  1501. /**
  1502. * unregister_netdevice_notifier - unregister a network notifier block
  1503. * @nb: notifier
  1504. *
  1505. * Unregister a notifier previously registered by
  1506. * register_netdevice_notifier(). The notifier is unlinked into the
  1507. * kernel structures and may then be reused. A negative errno code
  1508. * is returned on a failure.
  1509. *
  1510. * After unregistering unregister and down device events are synthesized
  1511. * for all devices on the device list to the removed notifier to remove
  1512. * the need for special case cleanup code.
  1513. */
  1514. int unregister_netdevice_notifier(struct notifier_block *nb)
  1515. {
  1516. struct net *net;
  1517. int err;
  1518. /* Close race with setup_net() and cleanup_net() */
  1519. down_write(&pernet_ops_rwsem);
  1520. rtnl_lock();
  1521. err = raw_notifier_chain_unregister(&netdev_chain, nb);
  1522. if (err)
  1523. goto unlock;
  1524. for_each_net(net)
  1525. call_netdevice_unregister_net_notifiers(nb, net);
  1526. unlock:
  1527. rtnl_unlock();
  1528. up_write(&pernet_ops_rwsem);
  1529. return err;
  1530. }
  1531. EXPORT_SYMBOL(unregister_netdevice_notifier);
  1532. static int __register_netdevice_notifier_net(struct net *net,
  1533. struct notifier_block *nb,
  1534. bool ignore_call_fail)
  1535. {
  1536. int err;
  1537. err = raw_notifier_chain_register(&net->netdev_chain, nb);
  1538. if (err)
  1539. return err;
  1540. if (dev_boot_phase)
  1541. return 0;
  1542. err = call_netdevice_register_net_notifiers(nb, net);
  1543. if (err && !ignore_call_fail)
  1544. goto chain_unregister;
  1545. return 0;
  1546. chain_unregister:
  1547. raw_notifier_chain_unregister(&net->netdev_chain, nb);
  1548. return err;
  1549. }
  1550. static int __unregister_netdevice_notifier_net(struct net *net,
  1551. struct notifier_block *nb)
  1552. {
  1553. int err;
  1554. err = raw_notifier_chain_unregister(&net->netdev_chain, nb);
  1555. if (err)
  1556. return err;
  1557. call_netdevice_unregister_net_notifiers(nb, net);
  1558. return 0;
  1559. }
  1560. /**
  1561. * register_netdevice_notifier_net - register a per-netns network notifier block
  1562. * @net: network namespace
  1563. * @nb: notifier
  1564. *
  1565. * Register a notifier to be called when network device events occur.
  1566. * The notifier passed is linked into the kernel structures and must
  1567. * not be reused until it has been unregistered. A negative errno code
  1568. * is returned on a failure.
  1569. *
  1570. * When registered all registration and up events are replayed
  1571. * to the new notifier to allow device to have a race free
  1572. * view of the network device list.
  1573. */
  1574. int register_netdevice_notifier_net(struct net *net, struct notifier_block *nb)
  1575. {
  1576. int err;
  1577. rtnl_lock();
  1578. err = __register_netdevice_notifier_net(net, nb, false);
  1579. rtnl_unlock();
  1580. return err;
  1581. }
  1582. EXPORT_SYMBOL(register_netdevice_notifier_net);
  1583. /**
  1584. * unregister_netdevice_notifier_net - unregister a per-netns
  1585. * network notifier block
  1586. * @net: network namespace
  1587. * @nb: notifier
  1588. *
  1589. * Unregister a notifier previously registered by
  1590. * register_netdevice_notifier(). The notifier is unlinked into the
  1591. * kernel structures and may then be reused. A negative errno code
  1592. * is returned on a failure.
  1593. *
  1594. * After unregistering unregister and down device events are synthesized
  1595. * for all devices on the device list to the removed notifier to remove
  1596. * the need for special case cleanup code.
  1597. */
  1598. int unregister_netdevice_notifier