/net/bridge/br_if.c

http://github.com/mirrors/linux · C · 759 lines · 519 code · 153 blank · 87 comment · 97 complexity · 2d937c153531ad225c85580a591463f4 MD5 · raw file

  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Userspace interface
  4. * Linux ethernet bridge
  5. *
  6. * Authors:
  7. * Lennert Buytenhek <buytenh@gnu.org>
  8. */
  9. #include <linux/kernel.h>
  10. #include <linux/netdevice.h>
  11. #include <linux/etherdevice.h>
  12. #include <linux/netpoll.h>
  13. #include <linux/ethtool.h>
  14. #include <linux/if_arp.h>
  15. #include <linux/module.h>
  16. #include <linux/init.h>
  17. #include <linux/rtnetlink.h>
  18. #include <linux/if_ether.h>
  19. #include <linux/slab.h>
  20. #include <net/dsa.h>
  21. #include <net/sock.h>
  22. #include <linux/if_vlan.h>
  23. #include <net/switchdev.h>
  24. #include <net/net_namespace.h>
  25. #include "br_private.h"
  26. /*
  27. * Determine initial path cost based on speed.
  28. * using recommendations from 802.1d standard
  29. *
  30. * Since driver might sleep need to not be holding any locks.
  31. */
  32. static int port_cost(struct net_device *dev)
  33. {
  34. struct ethtool_link_ksettings ecmd;
  35. if (!__ethtool_get_link_ksettings(dev, &ecmd)) {
  36. switch (ecmd.base.speed) {
  37. case SPEED_10000:
  38. return 2;
  39. case SPEED_1000:
  40. return 4;
  41. case SPEED_100:
  42. return 19;
  43. case SPEED_10:
  44. return 100;
  45. }
  46. }
  47. /* Old silly heuristics based on name */
  48. if (!strncmp(dev->name, "lec", 3))
  49. return 7;
  50. if (!strncmp(dev->name, "plip", 4))
  51. return 2500;
  52. return 100; /* assume old 10Mbps */
  53. }
  54. /* Check for port carrier transitions. */
  55. void br_port_carrier_check(struct net_bridge_port *p, bool *notified)
  56. {
  57. struct net_device *dev = p->dev;
  58. struct net_bridge *br = p->br;
  59. if (!(p->flags & BR_ADMIN_COST) &&
  60. netif_running(dev) && netif_oper_up(dev))
  61. p->path_cost = port_cost(dev);
  62. *notified = false;
  63. if (!netif_running(br->dev))
  64. return;
  65. spin_lock_bh(&br->lock);
  66. if (netif_running(dev) && netif_oper_up(dev)) {
  67. if (p->state == BR_STATE_DISABLED) {
  68. br_stp_enable_port(p);
  69. *notified = true;
  70. }
  71. } else {
  72. if (p->state != BR_STATE_DISABLED) {
  73. br_stp_disable_port(p);
  74. *notified = true;
  75. }
  76. }
  77. spin_unlock_bh(&br->lock);
  78. }
  79. static void br_port_set_promisc(struct net_bridge_port *p)
  80. {
  81. int err = 0;
  82. if (br_promisc_port(p))
  83. return;
  84. err = dev_set_promiscuity(p->dev, 1);
  85. if (err)
  86. return;
  87. br_fdb_unsync_static(p->br, p);
  88. p->flags |= BR_PROMISC;
  89. }
  90. static void br_port_clear_promisc(struct net_bridge_port *p)
  91. {
  92. int err;
  93. /* Check if the port is already non-promisc or if it doesn't
  94. * support UNICAST filtering. Without unicast filtering support
  95. * we'll end up re-enabling promisc mode anyway, so just check for
  96. * it here.
  97. */
  98. if (!br_promisc_port(p) || !(p->dev->priv_flags & IFF_UNICAST_FLT))
  99. return;
  100. /* Since we'll be clearing the promisc mode, program the port
  101. * first so that we don't have interruption in traffic.
  102. */
  103. err = br_fdb_sync_static(p->br, p);
  104. if (err)
  105. return;
  106. dev_set_promiscuity(p->dev, -1);
  107. p->flags &= ~BR_PROMISC;
  108. }
  109. /* When a port is added or removed or when certain port flags
  110. * change, this function is called to automatically manage
  111. * promiscuity setting of all the bridge ports. We are always called
  112. * under RTNL so can skip using rcu primitives.
  113. */
  114. void br_manage_promisc(struct net_bridge *br)
  115. {
  116. struct net_bridge_port *p;
  117. bool set_all = false;
  118. /* If vlan filtering is disabled or bridge interface is placed
  119. * into promiscuous mode, place all ports in promiscuous mode.
  120. */
  121. if ((br->dev->flags & IFF_PROMISC) || !br_vlan_enabled(br->dev))
  122. set_all = true;
  123. list_for_each_entry(p, &br->port_list, list) {
  124. if (set_all) {
  125. br_port_set_promisc(p);
  126. } else {
  127. /* If the number of auto-ports is <= 1, then all other
  128. * ports will have their output configuration
  129. * statically specified through fdbs. Since ingress
  130. * on the auto-port becomes forwarding/egress to other
  131. * ports and egress configuration is statically known,
  132. * we can say that ingress configuration of the
  133. * auto-port is also statically known.
  134. * This lets us disable promiscuous mode and write
  135. * this config to hw.
  136. */
  137. if (br->auto_cnt == 0 ||
  138. (br->auto_cnt == 1 && br_auto_port(p)))
  139. br_port_clear_promisc(p);
  140. else
  141. br_port_set_promisc(p);
  142. }
  143. }
  144. }
  145. int nbp_backup_change(struct net_bridge_port *p,
  146. struct net_device *backup_dev)
  147. {
  148. struct net_bridge_port *old_backup = rtnl_dereference(p->backup_port);
  149. struct net_bridge_port *backup_p = NULL;
  150. ASSERT_RTNL();
  151. if (backup_dev) {
  152. if (!netif_is_bridge_port(backup_dev))
  153. return -ENOENT;
  154. backup_p = br_port_get_rtnl(backup_dev);
  155. if (backup_p->br != p->br)
  156. return -EINVAL;
  157. }
  158. if (p == backup_p)
  159. return -EINVAL;
  160. if (old_backup == backup_p)
  161. return 0;
  162. /* if the backup link is already set, clear it */
  163. if (old_backup)
  164. old_backup->backup_redirected_cnt--;
  165. if (backup_p)
  166. backup_p->backup_redirected_cnt++;
  167. rcu_assign_pointer(p->backup_port, backup_p);
  168. return 0;
  169. }
  170. static void nbp_backup_clear(struct net_bridge_port *p)
  171. {
  172. nbp_backup_change(p, NULL);
  173. if (p->backup_redirected_cnt) {
  174. struct net_bridge_port *cur_p;
  175. list_for_each_entry(cur_p, &p->br->port_list, list) {
  176. struct net_bridge_port *backup_p;
  177. backup_p = rtnl_dereference(cur_p->backup_port);
  178. if (backup_p == p)
  179. nbp_backup_change(cur_p, NULL);
  180. }
  181. }
  182. WARN_ON(rcu_access_pointer(p->backup_port) || p->backup_redirected_cnt);
  183. }
  184. static void nbp_update_port_count(struct net_bridge *br)
  185. {
  186. struct net_bridge_port *p;
  187. u32 cnt = 0;
  188. list_for_each_entry(p, &br->port_list, list) {
  189. if (br_auto_port(p))
  190. cnt++;
  191. }
  192. if (br->auto_cnt != cnt) {
  193. br->auto_cnt = cnt;
  194. br_manage_promisc(br);
  195. }
  196. }
  197. static void nbp_delete_promisc(struct net_bridge_port *p)
  198. {
  199. /* If port is currently promiscuous, unset promiscuity.
  200. * Otherwise, it is a static port so remove all addresses
  201. * from it.
  202. */
  203. dev_set_allmulti(p->dev, -1);
  204. if (br_promisc_port(p))
  205. dev_set_promiscuity(p->dev, -1);
  206. else
  207. br_fdb_unsync_static(p->br, p);
  208. }
  209. static void release_nbp(struct kobject *kobj)
  210. {
  211. struct net_bridge_port *p
  212. = container_of(kobj, struct net_bridge_port, kobj);
  213. kfree(p);
  214. }
  215. static void brport_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
  216. {
  217. struct net_bridge_port *p = kobj_to_brport(kobj);
  218. net_ns_get_ownership(dev_net(p->dev), uid, gid);
  219. }
  220. static struct kobj_type brport_ktype = {
  221. #ifdef CONFIG_SYSFS
  222. .sysfs_ops = &brport_sysfs_ops,
  223. #endif
  224. .release = release_nbp,
  225. .get_ownership = brport_get_ownership,
  226. };
  227. static void destroy_nbp(struct net_bridge_port *p)
  228. {
  229. struct net_device *dev = p->dev;
  230. p->br = NULL;
  231. p->dev = NULL;
  232. dev_put(dev);
  233. kobject_put(&p->kobj);
  234. }
  235. static void destroy_nbp_rcu(struct rcu_head *head)
  236. {
  237. struct net_bridge_port *p =
  238. container_of(head, struct net_bridge_port, rcu);
  239. destroy_nbp(p);
  240. }
  241. static unsigned get_max_headroom(struct net_bridge *br)
  242. {
  243. unsigned max_headroom = 0;
  244. struct net_bridge_port *p;
  245. list_for_each_entry(p, &br->port_list, list) {
  246. unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
  247. if (dev_headroom > max_headroom)
  248. max_headroom = dev_headroom;
  249. }
  250. return max_headroom;
  251. }
  252. static void update_headroom(struct net_bridge *br, int new_hr)
  253. {
  254. struct net_bridge_port *p;
  255. list_for_each_entry(p, &br->port_list, list)
  256. netdev_set_rx_headroom(p->dev, new_hr);
  257. br->dev->needed_headroom = new_hr;
  258. }
  259. /* Delete port(interface) from bridge is done in two steps.
  260. * via RCU. First step, marks device as down. That deletes
  261. * all the timers and stops new packets from flowing through.
  262. *
  263. * Final cleanup doesn't occur until after all CPU's finished
  264. * processing packets.
  265. *
  266. * Protected from multiple admin operations by RTNL mutex
  267. */
  268. static void del_nbp(struct net_bridge_port *p)
  269. {
  270. struct net_bridge *br = p->br;
  271. struct net_device *dev = p->dev;
  272. sysfs_remove_link(br->ifobj, p->dev->name);
  273. nbp_delete_promisc(p);
  274. spin_lock_bh(&br->lock);
  275. br_stp_disable_port(p);
  276. spin_unlock_bh(&br->lock);
  277. br_ifinfo_notify(RTM_DELLINK, NULL, p);
  278. list_del_rcu(&p->list);
  279. if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
  280. update_headroom(br, get_max_headroom(br));
  281. netdev_reset_rx_headroom(dev);
  282. nbp_vlan_flush(p);
  283. br_fdb_delete_by_port(br, p, 0, 1);
  284. switchdev_deferred_process();
  285. nbp_backup_clear(p);
  286. nbp_update_port_count(br);
  287. netdev_upper_dev_unlink(dev, br->dev);
  288. dev->priv_flags &= ~IFF_BRIDGE_PORT;
  289. netdev_rx_handler_unregister(dev);
  290. br_multicast_del_port(p);
  291. kobject_uevent(&p->kobj, KOBJ_REMOVE);
  292. kobject_del(&p->kobj);
  293. br_netpoll_disable(p);
  294. call_rcu(&p->rcu, destroy_nbp_rcu);
  295. }
  296. /* Delete bridge device */
  297. void br_dev_delete(struct net_device *dev, struct list_head *head)
  298. {
  299. struct net_bridge *br = netdev_priv(dev);
  300. struct net_bridge_port *p, *n;
  301. list_for_each_entry_safe(p, n, &br->port_list, list) {
  302. del_nbp(p);
  303. }
  304. br_recalculate_neigh_suppress_enabled(br);
  305. br_fdb_delete_by_port(br, NULL, 0, 1);
  306. cancel_delayed_work_sync(&br->gc_work);
  307. br_sysfs_delbr(br->dev);
  308. unregister_netdevice_queue(br->dev, head);
  309. }
  310. /* find an available port number */
  311. static int find_portno(struct net_bridge *br)
  312. {
  313. int index;
  314. struct net_bridge_port *p;
  315. unsigned long *inuse;
  316. inuse = bitmap_zalloc(BR_MAX_PORTS, GFP_KERNEL);
  317. if (!inuse)
  318. return -ENOMEM;
  319. set_bit(0, inuse); /* zero is reserved */
  320. list_for_each_entry(p, &br->port_list, list) {
  321. set_bit(p->port_no, inuse);
  322. }
  323. index = find_first_zero_bit(inuse, BR_MAX_PORTS);
  324. bitmap_free(inuse);
  325. return (index >= BR_MAX_PORTS) ? -EXFULL : index;
  326. }
  327. /* called with RTNL but without bridge lock */
  328. static struct net_bridge_port *new_nbp(struct net_bridge *br,
  329. struct net_device *dev)
  330. {
  331. struct net_bridge_port *p;
  332. int index, err;
  333. index = find_portno(br);
  334. if (index < 0)
  335. return ERR_PTR(index);
  336. p = kzalloc(sizeof(*p), GFP_KERNEL);
  337. if (p == NULL)
  338. return ERR_PTR(-ENOMEM);
  339. p->br = br;
  340. dev_hold(dev);
  341. p->dev = dev;
  342. p->path_cost = port_cost(dev);
  343. p->priority = 0x8000 >> BR_PORT_BITS;
  344. p->port_no = index;
  345. p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD | BR_BCAST_FLOOD;
  346. br_init_port(p);
  347. br_set_state(p, BR_STATE_DISABLED);
  348. br_stp_port_timer_init(p);
  349. err = br_multicast_add_port(p);
  350. if (err) {
  351. dev_put(dev);
  352. kfree(p);
  353. p = ERR_PTR(err);
  354. }
  355. return p;
  356. }
  357. int br_add_bridge(struct net *net, const char *name)
  358. {
  359. struct net_device *dev;
  360. int res;
  361. dev = alloc_netdev(sizeof(struct net_bridge), name, NET_NAME_UNKNOWN,
  362. br_dev_setup);
  363. if (!dev)
  364. return -ENOMEM;
  365. dev_net_set(dev, net);
  366. dev->rtnl_link_ops = &br_link_ops;
  367. res = register_netdev(dev);
  368. if (res)
  369. free_netdev(dev);
  370. return res;
  371. }
  372. int br_del_bridge(struct net *net, const char *name)
  373. {
  374. struct net_device *dev;
  375. int ret = 0;
  376. rtnl_lock();
  377. dev = __dev_get_by_name(net, name);
  378. if (dev == NULL)
  379. ret = -ENXIO; /* Could not find device */
  380. else if (!(dev->priv_flags & IFF_EBRIDGE)) {
  381. /* Attempt to delete non bridge device! */
  382. ret = -EPERM;
  383. }
  384. else if (dev->flags & IFF_UP) {
  385. /* Not shutdown yet. */
  386. ret = -EBUSY;
  387. }
  388. else
  389. br_dev_delete(dev, NULL);
  390. rtnl_unlock();
  391. return ret;
  392. }
  393. /* MTU of the bridge pseudo-device: ETH_DATA_LEN or the minimum of the ports */
  394. static int br_mtu_min(const struct net_bridge *br)
  395. {
  396. const struct net_bridge_port *p;
  397. int ret_mtu = 0;
  398. list_for_each_entry(p, &br->port_list, list)
  399. if (!ret_mtu || ret_mtu > p->dev->mtu)
  400. ret_mtu = p->dev->mtu;
  401. return ret_mtu ? ret_mtu : ETH_DATA_LEN;
  402. }
  403. void br_mtu_auto_adjust(struct net_bridge *br)
  404. {
  405. ASSERT_RTNL();
  406. /* if the bridge MTU was manually configured don't mess with it */
  407. if (br_opt_get(br, BROPT_MTU_SET_BY_USER))
  408. return;
  409. /* change to the minimum MTU and clear the flag which was set by
  410. * the bridge ndo_change_mtu callback
  411. */
  412. dev_set_mtu(br->dev, br_mtu_min(br));
  413. br_opt_toggle(br, BROPT_MTU_SET_BY_USER, false);
  414. }
  415. static void br_set_gso_limits(struct net_bridge *br)
  416. {
  417. unsigned int gso_max_size = GSO_MAX_SIZE;
  418. u16 gso_max_segs = GSO_MAX_SEGS;
  419. const struct net_bridge_port *p;
  420. list_for_each_entry(p, &br->port_list, list) {
  421. gso_max_size = min(gso_max_size, p->dev->gso_max_size);
  422. gso_max_segs = min(gso_max_segs, p->dev->gso_max_segs);
  423. }
  424. br->dev->gso_max_size = gso_max_size;
  425. br->dev->gso_max_segs = gso_max_segs;
  426. }
  427. /*
  428. * Recomputes features using slave's features
  429. */
  430. netdev_features_t br_features_recompute(struct net_bridge *br,
  431. netdev_features_t features)
  432. {
  433. struct net_bridge_port *p;
  434. netdev_features_t mask;
  435. if (list_empty(&br->port_list))
  436. return features;
  437. mask = features;
  438. features &= ~NETIF_F_ONE_FOR_ALL;
  439. list_for_each_entry(p, &br->port_list, list) {
  440. features = netdev_increment_features(features,
  441. p->dev->features, mask);
  442. }
  443. features = netdev_add_tso_features(features, mask);
  444. return features;
  445. }
  446. /* called with RTNL */
  447. int br_add_if(struct net_bridge *br, struct net_device *dev,
  448. struct netlink_ext_ack *extack)
  449. {
  450. struct net_bridge_port *p;
  451. int err = 0;
  452. unsigned br_hr, dev_hr;
  453. bool changed_addr;
  454. /* Don't allow bridging non-ethernet like devices, or DSA-enabled
  455. * master network devices since the bridge layer rx_handler prevents
  456. * the DSA fake ethertype handler to be invoked, so we do not strip off
  457. * the DSA switch tag protocol header and the bridge layer just return
  458. * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
  459. */
  460. if ((dev->flags & IFF_LOOPBACK) ||
  461. dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
  462. !is_valid_ether_addr(dev->dev_addr) ||
  463. netdev_uses_dsa(dev))
  464. return -EINVAL;
  465. /* No bridging of bridges */
  466. if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
  467. NL_SET_ERR_MSG(extack,
  468. "Can not enslave a bridge to a bridge");
  469. return -ELOOP;
  470. }
  471. /* Device has master upper dev */
  472. if (netdev_master_upper_dev_get(dev))
  473. return -EBUSY;
  474. /* No bridging devices that dislike that (e.g. wireless) */
  475. if (dev->priv_flags & IFF_DONT_BRIDGE) {
  476. NL_SET_ERR_MSG(extack,
  477. "Device does not allow enslaving to a bridge");
  478. return -EOPNOTSUPP;
  479. }
  480. p = new_nbp(br, dev);
  481. if (IS_ERR(p))
  482. return PTR_ERR(p);
  483. call_netdevice_notifiers(NETDEV_JOIN, dev);
  484. err = dev_set_allmulti(dev, 1);
  485. if (err) {
  486. kfree(p); /* kobject not yet init'd, manually free */
  487. goto err1;
  488. }
  489. err = kobject_init_and_add(&p->kobj, &brport_ktype, &(dev->dev.kobj),
  490. SYSFS_BRIDGE_PORT_ATTR);
  491. if (err)
  492. goto err2;
  493. err = br_sysfs_addif(p);
  494. if (err)
  495. goto err2;
  496. err = br_netpoll_enable(p);
  497. if (err)
  498. goto err3;
  499. err = netdev_rx_handler_register(dev, br_handle_frame, p);
  500. if (err)
  501. goto err4;
  502. dev->priv_flags |= IFF_BRIDGE_PORT;
  503. err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack);
  504. if (err)
  505. goto err5;
  506. err = nbp_switchdev_mark_set(p);
  507. if (err)
  508. goto err6;
  509. dev_disable_lro(dev);
  510. list_add_rcu(&p->list, &br->port_list);
  511. nbp_update_port_count(br);
  512. netdev_update_features(br->dev);
  513. br_hr = br->dev->needed_headroom;
  514. dev_hr = netdev_get_fwd_headroom(dev);
  515. if (br_hr < dev_hr)
  516. update_headroom(br, dev_hr);
  517. else
  518. netdev_set_rx_headroom(dev, br_hr);
  519. if (br_fdb_insert(br, p, dev->dev_addr, 0))
  520. netdev_err(dev, "failed insert local address bridge forwarding table\n");
  521. if (br->dev->addr_assign_type != NET_ADDR_SET) {
  522. /* Ask for permission to use this MAC address now, even if we
  523. * don't end up choosing it below.
  524. */
  525. err = dev_pre_changeaddr_notify(br->dev, dev->dev_addr, extack);
  526. if (err)
  527. goto err7;
  528. }
  529. err = nbp_vlan_init(p, extack);
  530. if (err) {
  531. netdev_err(dev, "failed to initialize vlan filtering on this port\n");
  532. goto err7;
  533. }
  534. spin_lock_bh(&br->lock);
  535. changed_addr = br_stp_recalculate_bridge_id(br);
  536. if (netif_running(dev) && netif_oper_up(dev) &&
  537. (br->dev->flags & IFF_UP))
  538. br_stp_enable_port(p);
  539. spin_unlock_bh(&br->lock);
  540. br_ifinfo_notify(RTM_NEWLINK, NULL, p);
  541. if (changed_addr)
  542. call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
  543. br_mtu_auto_adjust(br);
  544. br_set_gso_limits(br);
  545. kobject_uevent(&p->kobj, KOBJ_ADD);
  546. return 0;
  547. err7:
  548. list_del_rcu(&p->list);
  549. br_fdb_delete_by_port(br, p, 0, 1);
  550. nbp_update_port_count(br);
  551. err6:
  552. netdev_upper_dev_unlink(dev, br->dev);
  553. err5:
  554. dev->priv_flags &= ~IFF_BRIDGE_PORT;
  555. netdev_rx_handler_unregister(dev);
  556. err4:
  557. br_netpoll_disable(p);
  558. err3:
  559. sysfs_remove_link(br->ifobj, p->dev->name);
  560. err2:
  561. kobject_put(&p->kobj);
  562. dev_set_allmulti(dev, -1);
  563. err1:
  564. dev_put(dev);
  565. return err;
  566. }
  567. /* called with RTNL */
  568. int br_del_if(struct net_bridge *br, struct net_device *dev)
  569. {
  570. struct net_bridge_port *p;
  571. bool changed_addr;
  572. p = br_port_get_rtnl(dev);
  573. if (!p || p->br != br)
  574. return -EINVAL;
  575. /* Since more than one interface can be attached to a bridge,
  576. * there still maybe an alternate path for netconsole to use;
  577. * therefore there is no reason for a NETDEV_RELEASE event.
  578. */
  579. del_nbp(p);
  580. br_mtu_auto_adjust(br);
  581. br_set_gso_limits(br);
  582. spin_lock_bh(&br->lock);
  583. changed_addr = br_stp_recalculate_bridge_id(br);
  584. spin_unlock_bh(&br->lock);
  585. if (changed_addr)
  586. call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev);
  587. netdev_update_features(br->dev);
  588. return 0;
  589. }
  590. void br_port_flags_change(struct net_bridge_port *p, unsigned long mask)
  591. {
  592. struct net_bridge *br = p->br;
  593. if (mask & BR_AUTO_MASK)
  594. nbp_update_port_count(br);
  595. if (mask & BR_NEIGH_SUPPRESS)
  596. br_recalculate_neigh_suppress_enabled(br);
  597. }
  598. bool br_port_flag_is_set(const struct net_device *dev, unsigned long flag)
  599. {
  600. struct net_bridge_port *p;
  601. p = br_port_get_rtnl_rcu(dev);
  602. if (!p)
  603. return false;
  604. return p->flags & flag;
  605. }
  606. EXPORT_SYMBOL_GPL(br_port_flag_is_set);