/drivers/net/bonding/bond_alb.c

http://github.com/mirrors/linux · C · 1811 lines · 1225 code · 297 blank · 289 comment · 247 complexity · 76521cf87b1f42de51bd4870fa6e3b2c MD5 · raw file

  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright(c) 1999 - 2004 Intel Corporation. All rights reserved.
  4. */
  5. #include <linux/skbuff.h>
  6. #include <linux/netdevice.h>
  7. #include <linux/etherdevice.h>
  8. #include <linux/pkt_sched.h>
  9. #include <linux/spinlock.h>
  10. #include <linux/slab.h>
  11. #include <linux/timer.h>
  12. #include <linux/ip.h>
  13. #include <linux/ipv6.h>
  14. #include <linux/if_arp.h>
  15. #include <linux/if_ether.h>
  16. #include <linux/if_bonding.h>
  17. #include <linux/if_vlan.h>
  18. #include <linux/in.h>
  19. #include <net/ipx.h>
  20. #include <net/arp.h>
  21. #include <net/ipv6.h>
  22. #include <asm/byteorder.h>
  23. #include <net/bonding.h>
  24. #include <net/bond_alb.h>
  25. static const u8 mac_v6_allmcast[ETH_ALEN + 2] __long_aligned = {
  26. 0x33, 0x33, 0x00, 0x00, 0x00, 0x01
  27. };
  28. static const int alb_delta_in_ticks = HZ / ALB_TIMER_TICKS_PER_SEC;
  29. #pragma pack(1)
  30. struct learning_pkt {
  31. u8 mac_dst[ETH_ALEN];
  32. u8 mac_src[ETH_ALEN];
  33. __be16 type;
  34. u8 padding[ETH_ZLEN - ETH_HLEN];
  35. };
  36. struct arp_pkt {
  37. __be16 hw_addr_space;
  38. __be16 prot_addr_space;
  39. u8 hw_addr_len;
  40. u8 prot_addr_len;
  41. __be16 op_code;
  42. u8 mac_src[ETH_ALEN]; /* sender hardware address */
  43. __be32 ip_src; /* sender IP address */
  44. u8 mac_dst[ETH_ALEN]; /* target hardware address */
  45. __be32 ip_dst; /* target IP address */
  46. };
  47. #pragma pack()
  48. /* Forward declaration */
  49. static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
  50. bool strict_match);
  51. static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp);
  52. static void rlb_src_unlink(struct bonding *bond, u32 index);
  53. static void rlb_src_link(struct bonding *bond, u32 ip_src_hash,
  54. u32 ip_dst_hash);
  55. static inline u8 _simple_hash(const u8 *hash_start, int hash_size)
  56. {
  57. int i;
  58. u8 hash = 0;
  59. for (i = 0; i < hash_size; i++)
  60. hash ^= hash_start[i];
  61. return hash;
  62. }
  63. /*********************** tlb specific functions ***************************/
  64. static inline void tlb_init_table_entry(struct tlb_client_info *entry, int save_load)
  65. {
  66. if (save_load) {
  67. entry->load_history = 1 + entry->tx_bytes /
  68. BOND_TLB_REBALANCE_INTERVAL;
  69. entry->tx_bytes = 0;
  70. }
  71. entry->tx_slave = NULL;
  72. entry->next = TLB_NULL_INDEX;
  73. entry->prev = TLB_NULL_INDEX;
  74. }
  75. static inline void tlb_init_slave(struct slave *slave)
  76. {
  77. SLAVE_TLB_INFO(slave).load = 0;
  78. SLAVE_TLB_INFO(slave).head = TLB_NULL_INDEX;
  79. }
  80. static void __tlb_clear_slave(struct bonding *bond, struct slave *slave,
  81. int save_load)
  82. {
  83. struct tlb_client_info *tx_hash_table;
  84. u32 index;
  85. /* clear slave from tx_hashtbl */
  86. tx_hash_table = BOND_ALB_INFO(bond).tx_hashtbl;
  87. /* skip this if we've already freed the tx hash table */
  88. if (tx_hash_table) {
  89. index = SLAVE_TLB_INFO(slave).head;
  90. while (index != TLB_NULL_INDEX) {
  91. u32 next_index = tx_hash_table[index].next;
  92. tlb_init_table_entry(&tx_hash_table[index], save_load);
  93. index = next_index;
  94. }
  95. }
  96. tlb_init_slave(slave);
  97. }
  98. static void tlb_clear_slave(struct bonding *bond, struct slave *slave,
  99. int save_load)
  100. {
  101. spin_lock_bh(&bond->mode_lock);
  102. __tlb_clear_slave(bond, slave, save_load);
  103. spin_unlock_bh(&bond->mode_lock);
  104. }
  105. /* Must be called before starting the monitor timer */
  106. static int tlb_initialize(struct bonding *bond)
  107. {
  108. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  109. int size = TLB_HASH_TABLE_SIZE * sizeof(struct tlb_client_info);
  110. struct tlb_client_info *new_hashtbl;
  111. int i;
  112. new_hashtbl = kzalloc(size, GFP_KERNEL);
  113. if (!new_hashtbl)
  114. return -ENOMEM;
  115. spin_lock_bh(&bond->mode_lock);
  116. bond_info->tx_hashtbl = new_hashtbl;
  117. for (i = 0; i < TLB_HASH_TABLE_SIZE; i++)
  118. tlb_init_table_entry(&bond_info->tx_hashtbl[i], 0);
  119. spin_unlock_bh(&bond->mode_lock);
  120. return 0;
  121. }
  122. /* Must be called only after all slaves have been released */
  123. static void tlb_deinitialize(struct bonding *bond)
  124. {
  125. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  126. spin_lock_bh(&bond->mode_lock);
  127. kfree(bond_info->tx_hashtbl);
  128. bond_info->tx_hashtbl = NULL;
  129. spin_unlock_bh(&bond->mode_lock);
  130. }
  131. static long long compute_gap(struct slave *slave)
  132. {
  133. return (s64) (slave->speed << 20) - /* Convert to Megabit per sec */
  134. (s64) (SLAVE_TLB_INFO(slave).load << 3); /* Bytes to bits */
  135. }
  136. static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
  137. {
  138. struct slave *slave, *least_loaded;
  139. struct list_head *iter;
  140. long long max_gap;
  141. least_loaded = NULL;
  142. max_gap = LLONG_MIN;
  143. /* Find the slave with the largest gap */
  144. bond_for_each_slave_rcu(bond, slave, iter) {
  145. if (bond_slave_can_tx(slave)) {
  146. long long gap = compute_gap(slave);
  147. if (max_gap < gap) {
  148. least_loaded = slave;
  149. max_gap = gap;
  150. }
  151. }
  152. }
  153. return least_loaded;
  154. }
  155. static struct slave *__tlb_choose_channel(struct bonding *bond, u32 hash_index,
  156. u32 skb_len)
  157. {
  158. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  159. struct tlb_client_info *hash_table;
  160. struct slave *assigned_slave;
  161. hash_table = bond_info->tx_hashtbl;
  162. assigned_slave = hash_table[hash_index].tx_slave;
  163. if (!assigned_slave) {
  164. assigned_slave = tlb_get_least_loaded_slave(bond);
  165. if (assigned_slave) {
  166. struct tlb_slave_info *slave_info =
  167. &(SLAVE_TLB_INFO(assigned_slave));
  168. u32 next_index = slave_info->head;
  169. hash_table[hash_index].tx_slave = assigned_slave;
  170. hash_table[hash_index].next = next_index;
  171. hash_table[hash_index].prev = TLB_NULL_INDEX;
  172. if (next_index != TLB_NULL_INDEX)
  173. hash_table[next_index].prev = hash_index;
  174. slave_info->head = hash_index;
  175. slave_info->load +=
  176. hash_table[hash_index].load_history;
  177. }
  178. }
  179. if (assigned_slave)
  180. hash_table[hash_index].tx_bytes += skb_len;
  181. return assigned_slave;
  182. }
  183. static struct slave *tlb_choose_channel(struct bonding *bond, u32 hash_index,
  184. u32 skb_len)
  185. {
  186. struct slave *tx_slave;
  187. /* We don't need to disable softirq here, becase
  188. * tlb_choose_channel() is only called by bond_alb_xmit()
  189. * which already has softirq disabled.
  190. */
  191. spin_lock(&bond->mode_lock);
  192. tx_slave = __tlb_choose_channel(bond, hash_index, skb_len);
  193. spin_unlock(&bond->mode_lock);
  194. return tx_slave;
  195. }
  196. /*********************** rlb specific functions ***************************/
  197. /* when an ARP REPLY is received from a client update its info
  198. * in the rx_hashtbl
  199. */
  200. static void rlb_update_entry_from_arp(struct bonding *bond, struct arp_pkt *arp)
  201. {
  202. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  203. struct rlb_client_info *client_info;
  204. u32 hash_index;
  205. spin_lock_bh(&bond->mode_lock);
  206. hash_index = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
  207. client_info = &(bond_info->rx_hashtbl[hash_index]);
  208. if ((client_info->assigned) &&
  209. (client_info->ip_src == arp->ip_dst) &&
  210. (client_info->ip_dst == arp->ip_src) &&
  211. (!ether_addr_equal_64bits(client_info->mac_dst, arp->mac_src))) {
  212. /* update the clients MAC address */
  213. ether_addr_copy(client_info->mac_dst, arp->mac_src);
  214. client_info->ntt = 1;
  215. bond_info->rx_ntt = 1;
  216. }
  217. spin_unlock_bh(&bond->mode_lock);
  218. }
  219. static int rlb_arp_recv(const struct sk_buff *skb, struct bonding *bond,
  220. struct slave *slave)
  221. {
  222. struct arp_pkt *arp, _arp;
  223. if (skb->protocol != cpu_to_be16(ETH_P_ARP))
  224. goto out;
  225. arp = skb_header_pointer(skb, 0, sizeof(_arp), &_arp);
  226. if (!arp)
  227. goto out;
  228. /* We received an ARP from arp->ip_src.
  229. * We might have used this IP address previously (on the bonding host
  230. * itself or on a system that is bridged together with the bond).
  231. * However, if arp->mac_src is different than what is stored in
  232. * rx_hashtbl, some other host is now using the IP and we must prevent
  233. * sending out client updates with this IP address and the old MAC
  234. * address.
  235. * Clean up all hash table entries that have this address as ip_src but
  236. * have a different mac_src.
  237. */
  238. rlb_purge_src_ip(bond, arp);
  239. if (arp->op_code == htons(ARPOP_REPLY)) {
  240. /* update rx hash table for this ARP */
  241. rlb_update_entry_from_arp(bond, arp);
  242. slave_dbg(bond->dev, slave->dev, "Server received an ARP Reply from client\n");
  243. }
  244. out:
  245. return RX_HANDLER_ANOTHER;
  246. }
  247. /* Caller must hold rcu_read_lock() */
  248. static struct slave *__rlb_next_rx_slave(struct bonding *bond)
  249. {
  250. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  251. struct slave *before = NULL, *rx_slave = NULL, *slave;
  252. struct list_head *iter;
  253. bool found = false;
  254. bond_for_each_slave_rcu(bond, slave, iter) {
  255. if (!bond_slave_can_tx(slave))
  256. continue;
  257. if (!found) {
  258. if (!before || before->speed < slave->speed)
  259. before = slave;
  260. } else {
  261. if (!rx_slave || rx_slave->speed < slave->speed)
  262. rx_slave = slave;
  263. }
  264. if (slave == bond_info->rx_slave)
  265. found = true;
  266. }
  267. /* we didn't find anything after the current or we have something
  268. * better before and up to the current slave
  269. */
  270. if (!rx_slave || (before && rx_slave->speed < before->speed))
  271. rx_slave = before;
  272. if (rx_slave)
  273. bond_info->rx_slave = rx_slave;
  274. return rx_slave;
  275. }
  276. /* Caller must hold RTNL, rcu_read_lock is obtained only to silence checkers */
  277. static struct slave *rlb_next_rx_slave(struct bonding *bond)
  278. {
  279. struct slave *rx_slave;
  280. ASSERT_RTNL();
  281. rcu_read_lock();
  282. rx_slave = __rlb_next_rx_slave(bond);
  283. rcu_read_unlock();
  284. return rx_slave;
  285. }
  286. /* teach the switch the mac of a disabled slave
  287. * on the primary for fault tolerance
  288. *
  289. * Caller must hold RTNL
  290. */
  291. static void rlb_teach_disabled_mac_on_primary(struct bonding *bond, u8 addr[])
  292. {
  293. struct slave *curr_active = rtnl_dereference(bond->curr_active_slave);
  294. if (!curr_active)
  295. return;
  296. if (!bond->alb_info.primary_is_promisc) {
  297. if (!dev_set_promiscuity(curr_active->dev, 1))
  298. bond->alb_info.primary_is_promisc = 1;
  299. else
  300. bond->alb_info.primary_is_promisc = 0;
  301. }
  302. bond->alb_info.rlb_promisc_timeout_counter = 0;
  303. alb_send_learning_packets(curr_active, addr, true);
  304. }
  305. /* slave being removed should not be active at this point
  306. *
  307. * Caller must hold rtnl.
  308. */
  309. static void rlb_clear_slave(struct bonding *bond, struct slave *slave)
  310. {
  311. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  312. struct rlb_client_info *rx_hash_table;
  313. u32 index, next_index;
  314. /* clear slave from rx_hashtbl */
  315. spin_lock_bh(&bond->mode_lock);
  316. rx_hash_table = bond_info->rx_hashtbl;
  317. index = bond_info->rx_hashtbl_used_head;
  318. for (; index != RLB_NULL_INDEX; index = next_index) {
  319. next_index = rx_hash_table[index].used_next;
  320. if (rx_hash_table[index].slave == slave) {
  321. struct slave *assigned_slave = rlb_next_rx_slave(bond);
  322. if (assigned_slave) {
  323. rx_hash_table[index].slave = assigned_slave;
  324. if (is_valid_ether_addr(rx_hash_table[index].mac_dst)) {
  325. bond_info->rx_hashtbl[index].ntt = 1;
  326. bond_info->rx_ntt = 1;
  327. /* A slave has been removed from the
  328. * table because it is either disabled
  329. * or being released. We must retry the
  330. * update to avoid clients from not
  331. * being updated & disconnecting when
  332. * there is stress
  333. */
  334. bond_info->rlb_update_retry_counter =
  335. RLB_UPDATE_RETRY;
  336. }
  337. } else { /* there is no active slave */
  338. rx_hash_table[index].slave = NULL;
  339. }
  340. }
  341. }
  342. spin_unlock_bh(&bond->mode_lock);
  343. if (slave != rtnl_dereference(bond->curr_active_slave))
  344. rlb_teach_disabled_mac_on_primary(bond, slave->dev->dev_addr);
  345. }
  346. static void rlb_update_client(struct rlb_client_info *client_info)
  347. {
  348. int i;
  349. if (!client_info->slave || !is_valid_ether_addr(client_info->mac_dst))
  350. return;
  351. for (i = 0; i < RLB_ARP_BURST_SIZE; i++) {
  352. struct sk_buff *skb;
  353. skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
  354. client_info->ip_dst,
  355. client_info->slave->dev,
  356. client_info->ip_src,
  357. client_info->mac_dst,
  358. client_info->slave->dev->dev_addr,
  359. client_info->mac_dst);
  360. if (!skb) {
  361. slave_err(client_info->slave->bond->dev,
  362. client_info->slave->dev,
  363. "failed to create an ARP packet\n");
  364. continue;
  365. }
  366. skb->dev = client_info->slave->dev;
  367. if (client_info->vlan_id) {
  368. __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
  369. client_info->vlan_id);
  370. }
  371. arp_xmit(skb);
  372. }
  373. }
  374. /* sends ARP REPLIES that update the clients that need updating */
  375. static void rlb_update_rx_clients(struct bonding *bond)
  376. {
  377. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  378. struct rlb_client_info *client_info;
  379. u32 hash_index;
  380. spin_lock_bh(&bond->mode_lock);
  381. hash_index = bond_info->rx_hashtbl_used_head;
  382. for (; hash_index != RLB_NULL_INDEX;
  383. hash_index = client_info->used_next) {
  384. client_info = &(bond_info->rx_hashtbl[hash_index]);
  385. if (client_info->ntt) {
  386. rlb_update_client(client_info);
  387. if (bond_info->rlb_update_retry_counter == 0)
  388. client_info->ntt = 0;
  389. }
  390. }
  391. /* do not update the entries again until this counter is zero so that
  392. * not to confuse the clients.
  393. */
  394. bond_info->rlb_update_delay_counter = RLB_UPDATE_DELAY;
  395. spin_unlock_bh(&bond->mode_lock);
  396. }
  397. /* The slave was assigned a new mac address - update the clients */
  398. static void rlb_req_update_slave_clients(struct bonding *bond, struct slave *slave)
  399. {
  400. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  401. struct rlb_client_info *client_info;
  402. int ntt = 0;
  403. u32 hash_index;
  404. spin_lock_bh(&bond->mode_lock);
  405. hash_index = bond_info->rx_hashtbl_used_head;
  406. for (; hash_index != RLB_NULL_INDEX;
  407. hash_index = client_info->used_next) {
  408. client_info = &(bond_info->rx_hashtbl[hash_index]);
  409. if ((client_info->slave == slave) &&
  410. is_valid_ether_addr(client_info->mac_dst)) {
  411. client_info->ntt = 1;
  412. ntt = 1;
  413. }
  414. }
  415. /* update the team's flag only after the whole iteration */
  416. if (ntt) {
  417. bond_info->rx_ntt = 1;
  418. /* fasten the change */
  419. bond_info->rlb_update_retry_counter = RLB_UPDATE_RETRY;
  420. }
  421. spin_unlock_bh(&bond->mode_lock);
  422. }
  423. /* mark all clients using src_ip to be updated */
  424. static void rlb_req_update_subnet_clients(struct bonding *bond, __be32 src_ip)
  425. {
  426. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  427. struct rlb_client_info *client_info;
  428. u32 hash_index;
  429. spin_lock(&bond->mode_lock);
  430. hash_index = bond_info->rx_hashtbl_used_head;
  431. for (; hash_index != RLB_NULL_INDEX;
  432. hash_index = client_info->used_next) {
  433. client_info = &(bond_info->rx_hashtbl[hash_index]);
  434. if (!client_info->slave) {
  435. netdev_err(bond->dev, "found a client with no channel in the client's hash table\n");
  436. continue;
  437. }
  438. /* update all clients using this src_ip, that are not assigned
  439. * to the team's address (curr_active_slave) and have a known
  440. * unicast mac address.
  441. */
  442. if ((client_info->ip_src == src_ip) &&
  443. !ether_addr_equal_64bits(client_info->slave->dev->dev_addr,
  444. bond->dev->dev_addr) &&
  445. is_valid_ether_addr(client_info->mac_dst)) {
  446. client_info->ntt = 1;
  447. bond_info->rx_ntt = 1;
  448. }
  449. }
  450. spin_unlock(&bond->mode_lock);
  451. }
  452. static struct slave *rlb_choose_channel(struct sk_buff *skb,
  453. struct bonding *bond,
  454. const struct arp_pkt *arp)
  455. {
  456. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  457. struct slave *assigned_slave, *curr_active_slave;
  458. struct rlb_client_info *client_info;
  459. u32 hash_index = 0;
  460. spin_lock(&bond->mode_lock);
  461. curr_active_slave = rcu_dereference(bond->curr_active_slave);
  462. hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
  463. client_info = &(bond_info->rx_hashtbl[hash_index]);
  464. if (client_info->assigned) {
  465. if ((client_info->ip_src == arp->ip_src) &&
  466. (client_info->ip_dst == arp->ip_dst)) {
  467. /* the entry is already assigned to this client */
  468. if (!is_broadcast_ether_addr(arp->mac_dst)) {
  469. /* update mac address from arp */
  470. ether_addr_copy(client_info->mac_dst, arp->mac_dst);
  471. }
  472. ether_addr_copy(client_info->mac_src, arp->mac_src);
  473. assigned_slave = client_info->slave;
  474. if (assigned_slave) {
  475. spin_unlock(&bond->mode_lock);
  476. return assigned_slave;
  477. }
  478. } else {
  479. /* the entry is already assigned to some other client,
  480. * move the old client to primary (curr_active_slave) so
  481. * that the new client can be assigned to this entry.
  482. */
  483. if (curr_active_slave &&
  484. client_info->slave != curr_active_slave) {
  485. client_info->slave = curr_active_slave;
  486. rlb_update_client(client_info);
  487. }
  488. }
  489. }
  490. /* assign a new slave */
  491. assigned_slave = __rlb_next_rx_slave(bond);
  492. if (assigned_slave) {
  493. if (!(client_info->assigned &&
  494. client_info->ip_src == arp->ip_src)) {
  495. /* ip_src is going to be updated,
  496. * fix the src hash list
  497. */
  498. u32 hash_src = _simple_hash((u8 *)&arp->ip_src,
  499. sizeof(arp->ip_src));
  500. rlb_src_unlink(bond, hash_index);
  501. rlb_src_link(bond, hash_src, hash_index);
  502. }
  503. client_info->ip_src = arp->ip_src;
  504. client_info->ip_dst = arp->ip_dst;
  505. /* arp->mac_dst is broadcast for arp reqeusts.
  506. * will be updated with clients actual unicast mac address
  507. * upon receiving an arp reply.
  508. */
  509. ether_addr_copy(client_info->mac_dst, arp->mac_dst);
  510. ether_addr_copy(client_info->mac_src, arp->mac_src);
  511. client_info->slave = assigned_slave;
  512. if (is_valid_ether_addr(client_info->mac_dst)) {
  513. client_info->ntt = 1;
  514. bond->alb_info.rx_ntt = 1;
  515. } else {
  516. client_info->ntt = 0;
  517. }
  518. if (vlan_get_tag(skb, &client_info->vlan_id))
  519. client_info->vlan_id = 0;
  520. if (!client_info->assigned) {
  521. u32 prev_tbl_head = bond_info->rx_hashtbl_used_head;
  522. bond_info->rx_hashtbl_used_head = hash_index;
  523. client_info->used_next = prev_tbl_head;
  524. if (prev_tbl_head != RLB_NULL_INDEX) {
  525. bond_info->rx_hashtbl[prev_tbl_head].used_prev =
  526. hash_index;
  527. }
  528. client_info->assigned = 1;
  529. }
  530. }
  531. spin_unlock(&bond->mode_lock);
  532. return assigned_slave;
  533. }
  534. /* chooses (and returns) transmit channel for arp reply
  535. * does not choose channel for other arp types since they are
  536. * sent on the curr_active_slave
  537. */
  538. static struct slave *rlb_arp_xmit(struct sk_buff *skb, struct bonding *bond)
  539. {
  540. struct slave *tx_slave = NULL;
  541. struct arp_pkt *arp;
  542. if (!pskb_network_may_pull(skb, sizeof(*arp)))
  543. return NULL;
  544. arp = (struct arp_pkt *)skb_network_header(skb);
  545. /* Don't modify or load balance ARPs that do not originate locally
  546. * (e.g.,arrive via a bridge).
  547. */
  548. if (!bond_slave_has_mac_rx(bond, arp->mac_src))
  549. return NULL;
  550. if (arp->op_code == htons(ARPOP_REPLY)) {
  551. /* the arp must be sent on the selected rx channel */
  552. tx_slave = rlb_choose_channel(skb, bond, arp);
  553. if (tx_slave)
  554. bond_hw_addr_copy(arp->mac_src, tx_slave->dev->dev_addr,
  555. tx_slave->dev->addr_len);
  556. netdev_dbg(bond->dev, "(slave %s): Server sent ARP Reply packet\n",
  557. tx_slave ? tx_slave->dev->name : "NULL");
  558. } else if (arp->op_code == htons(ARPOP_REQUEST)) {
  559. /* Create an entry in the rx_hashtbl for this client as a
  560. * place holder.
  561. * When the arp reply is received the entry will be updated
  562. * with the correct unicast address of the client.
  563. */
  564. tx_slave = rlb_choose_channel(skb, bond, arp);
  565. /* The ARP reply packets must be delayed so that
  566. * they can cancel out the influence of the ARP request.
  567. */
  568. bond->alb_info.rlb_update_delay_counter = RLB_UPDATE_DELAY;
  569. /* arp requests are broadcast and are sent on the primary
  570. * the arp request will collapse all clients on the subnet to
  571. * the primary slave. We must register these clients to be
  572. * updated with their assigned mac.
  573. */
  574. rlb_req_update_subnet_clients(bond, arp->ip_src);
  575. netdev_dbg(bond->dev, "(slave %s): Server sent ARP Request packet\n",
  576. tx_slave ? tx_slave->dev->name : "NULL");
  577. }
  578. return tx_slave;
  579. }
  580. static void rlb_rebalance(struct bonding *bond)
  581. {
  582. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  583. struct slave *assigned_slave;
  584. struct rlb_client_info *client_info;
  585. int ntt;
  586. u32 hash_index;
  587. spin_lock_bh(&bond->mode_lock);
  588. ntt = 0;
  589. hash_index = bond_info->rx_hashtbl_used_head;
  590. for (; hash_index != RLB_NULL_INDEX;
  591. hash_index = client_info->used_next) {
  592. client_info = &(bond_info->rx_hashtbl[hash_index]);
  593. assigned_slave = __rlb_next_rx_slave(bond);
  594. if (assigned_slave && (client_info->slave != assigned_slave)) {
  595. client_info->slave = assigned_slave;
  596. if (!is_zero_ether_addr(client_info->mac_dst)) {
  597. client_info->ntt = 1;
  598. ntt = 1;
  599. }
  600. }
  601. }
  602. /* update the team's flag only after the whole iteration */
  603. if (ntt)
  604. bond_info->rx_ntt = 1;
  605. spin_unlock_bh(&bond->mode_lock);
  606. }
  607. /* Caller must hold mode_lock */
  608. static void rlb_init_table_entry_dst(struct rlb_client_info *entry)
  609. {
  610. entry->used_next = RLB_NULL_INDEX;
  611. entry->used_prev = RLB_NULL_INDEX;
  612. entry->assigned = 0;
  613. entry->slave = NULL;
  614. entry->vlan_id = 0;
  615. }
  616. static void rlb_init_table_entry_src(struct rlb_client_info *entry)
  617. {
  618. entry->src_first = RLB_NULL_INDEX;
  619. entry->src_prev = RLB_NULL_INDEX;
  620. entry->src_next = RLB_NULL_INDEX;
  621. }
  622. static void rlb_init_table_entry(struct rlb_client_info *entry)
  623. {
  624. memset(entry, 0, sizeof(struct rlb_client_info));
  625. rlb_init_table_entry_dst(entry);
  626. rlb_init_table_entry_src(entry);
  627. }
  628. static void rlb_delete_table_entry_dst(struct bonding *bond, u32 index)
  629. {
  630. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  631. u32 next_index = bond_info->rx_hashtbl[index].used_next;
  632. u32 prev_index = bond_info->rx_hashtbl[index].used_prev;
  633. if (index == bond_info->rx_hashtbl_used_head)
  634. bond_info->rx_hashtbl_used_head = next_index;
  635. if (prev_index != RLB_NULL_INDEX)
  636. bond_info->rx_hashtbl[prev_index].used_next = next_index;
  637. if (next_index != RLB_NULL_INDEX)
  638. bond_info->rx_hashtbl[next_index].used_prev = prev_index;
  639. }
  640. /* unlink a rlb hash table entry from the src list */
  641. static void rlb_src_unlink(struct bonding *bond, u32 index)
  642. {
  643. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  644. u32 next_index = bond_info->rx_hashtbl[index].src_next;
  645. u32 prev_index = bond_info->rx_hashtbl[index].src_prev;
  646. bond_info->rx_hashtbl[index].src_next = RLB_NULL_INDEX;
  647. bond_info->rx_hashtbl[index].src_prev = RLB_NULL_INDEX;
  648. if (next_index != RLB_NULL_INDEX)
  649. bond_info->rx_hashtbl[next_index].src_prev = prev_index;
  650. if (prev_index == RLB_NULL_INDEX)
  651. return;
  652. /* is prev_index pointing to the head of this list? */
  653. if (bond_info->rx_hashtbl[prev_index].src_first == index)
  654. bond_info->rx_hashtbl[prev_index].src_first = next_index;
  655. else
  656. bond_info->rx_hashtbl[prev_index].src_next = next_index;
  657. }
  658. static void rlb_delete_table_entry(struct bonding *bond, u32 index)
  659. {
  660. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  661. struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
  662. rlb_delete_table_entry_dst(bond, index);
  663. rlb_init_table_entry_dst(entry);
  664. rlb_src_unlink(bond, index);
  665. }
  666. /* add the rx_hashtbl[ip_dst_hash] entry to the list
  667. * of entries with identical ip_src_hash
  668. */
  669. static void rlb_src_link(struct bonding *bond, u32 ip_src_hash, u32 ip_dst_hash)
  670. {
  671. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  672. u32 next;
  673. bond_info->rx_hashtbl[ip_dst_hash].src_prev = ip_src_hash;
  674. next = bond_info->rx_hashtbl[ip_src_hash].src_first;
  675. bond_info->rx_hashtbl[ip_dst_hash].src_next = next;
  676. if (next != RLB_NULL_INDEX)
  677. bond_info->rx_hashtbl[next].src_prev = ip_dst_hash;
  678. bond_info->rx_hashtbl[ip_src_hash].src_first = ip_dst_hash;
  679. }
  680. /* deletes all rx_hashtbl entries with arp->ip_src if their mac_src does
  681. * not match arp->mac_src
  682. */
  683. static void rlb_purge_src_ip(struct bonding *bond, struct arp_pkt *arp)
  684. {
  685. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  686. u32 ip_src_hash = _simple_hash((u8 *)&(arp->ip_src), sizeof(arp->ip_src));
  687. u32 index;
  688. spin_lock_bh(&bond->mode_lock);
  689. index = bond_info->rx_hashtbl[ip_src_hash].src_first;
  690. while (index != RLB_NULL_INDEX) {
  691. struct rlb_client_info *entry = &(bond_info->rx_hashtbl[index]);
  692. u32 next_index = entry->src_next;
  693. if (entry->ip_src == arp->ip_src &&
  694. !ether_addr_equal_64bits(arp->mac_src, entry->mac_src))
  695. rlb_delete_table_entry(bond, index);
  696. index = next_index;
  697. }
  698. spin_unlock_bh(&bond->mode_lock);
  699. }
  700. static int rlb_initialize(struct bonding *bond)
  701. {
  702. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  703. struct rlb_client_info *new_hashtbl;
  704. int size = RLB_HASH_TABLE_SIZE * sizeof(struct rlb_client_info);
  705. int i;
  706. new_hashtbl = kmalloc(size, GFP_KERNEL);
  707. if (!new_hashtbl)
  708. return -1;
  709. spin_lock_bh(&bond->mode_lock);
  710. bond_info->rx_hashtbl = new_hashtbl;
  711. bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
  712. for (i = 0; i < RLB_HASH_TABLE_SIZE; i++)
  713. rlb_init_table_entry(bond_info->rx_hashtbl + i);
  714. spin_unlock_bh(&bond->mode_lock);
  715. /* register to receive ARPs */
  716. bond->recv_probe = rlb_arp_recv;
  717. return 0;
  718. }
  719. static void rlb_deinitialize(struct bonding *bond)
  720. {
  721. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  722. spin_lock_bh(&bond->mode_lock);
  723. kfree(bond_info->rx_hashtbl);
  724. bond_info->rx_hashtbl = NULL;
  725. bond_info->rx_hashtbl_used_head = RLB_NULL_INDEX;
  726. spin_unlock_bh(&bond->mode_lock);
  727. }
  728. static void rlb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
  729. {
  730. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  731. u32 curr_index;
  732. spin_lock_bh(&bond->mode_lock);
  733. curr_index = bond_info->rx_hashtbl_used_head;
  734. while (curr_index != RLB_NULL_INDEX) {
  735. struct rlb_client_info *curr = &(bond_info->rx_hashtbl[curr_index]);
  736. u32 next_index = bond_info->rx_hashtbl[curr_index].used_next;
  737. if (curr->vlan_id == vlan_id)
  738. rlb_delete_table_entry(bond, curr_index);
  739. curr_index = next_index;
  740. }
  741. spin_unlock_bh(&bond->mode_lock);
  742. }
  743. /*********************** tlb/rlb shared functions *********************/
  744. static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
  745. __be16 vlan_proto, u16 vid)
  746. {
  747. struct learning_pkt pkt;
  748. struct sk_buff *skb;
  749. int size = sizeof(struct learning_pkt);
  750. memset(&pkt, 0, size);
  751. ether_addr_copy(pkt.mac_dst, mac_addr);
  752. ether_addr_copy(pkt.mac_src, mac_addr);
  753. pkt.type = cpu_to_be16(ETH_P_LOOPBACK);
  754. skb = dev_alloc_skb(size);
  755. if (!skb)
  756. return;
  757. skb_put_data(skb, &pkt, size);
  758. skb_reset_mac_header(skb);
  759. skb->network_header = skb->mac_header + ETH_HLEN;
  760. skb->protocol = pkt.type;
  761. skb->priority = TC_PRIO_CONTROL;
  762. skb->dev = slave->dev;
  763. slave_dbg(slave->bond->dev, slave->dev,
  764. "Send learning packet: mac %pM vlan %d\n", mac_addr, vid);
  765. if (vid)
  766. __vlan_hwaccel_put_tag(skb, vlan_proto, vid);
  767. dev_queue_xmit(skb);
  768. }
  769. struct alb_walk_data {
  770. struct bonding *bond;
  771. struct slave *slave;
  772. u8 *mac_addr;
  773. bool strict_match;
  774. };
  775. static int alb_upper_dev_walk(struct net_device *upper, void *_data)
  776. {
  777. struct alb_walk_data *data = _data;
  778. bool strict_match = data->strict_match;
  779. struct bonding *bond = data->bond;
  780. struct slave *slave = data->slave;
  781. u8 *mac_addr = data->mac_addr;
  782. struct bond_vlan_tag *tags;
  783. if (is_vlan_dev(upper) &&
  784. bond->dev->lower_level == upper->lower_level - 1) {
  785. if (upper->addr_assign_type == NET_ADDR_STOLEN) {
  786. alb_send_lp_vid(slave, mac_addr,
  787. vlan_dev_vlan_proto(upper),
  788. vlan_dev_vlan_id(upper));
  789. } else {
  790. alb_send_lp_vid(slave, upper->dev_addr,
  791. vlan_dev_vlan_proto(upper),
  792. vlan_dev_vlan_id(upper));
  793. }
  794. }
  795. /* If this is a macvlan device, then only send updates
  796. * when strict_match is turned off.
  797. */
  798. if (netif_is_macvlan(upper) && !strict_match) {
  799. tags = bond_verify_device_path(bond->dev, upper, 0);
  800. if (IS_ERR_OR_NULL(tags))
  801. BUG();
  802. alb_send_lp_vid(slave, upper->dev_addr,
  803. tags[0].vlan_proto, tags[0].vlan_id);
  804. kfree(tags);
  805. }
  806. return 0;
  807. }
  808. static void alb_send_learning_packets(struct slave *slave, u8 mac_addr[],
  809. bool strict_match)
  810. {
  811. struct bonding *bond = bond_get_bond_by_slave(slave);
  812. struct alb_walk_data data = {
  813. .strict_match = strict_match,
  814. .mac_addr = mac_addr,
  815. .slave = slave,
  816. .bond = bond,
  817. };
  818. /* send untagged */
  819. alb_send_lp_vid(slave, mac_addr, 0, 0);
  820. /* loop through all devices and see if we need to send a packet
  821. * for that device.
  822. */
  823. rcu_read_lock();
  824. netdev_walk_all_upper_dev_rcu(bond->dev, alb_upper_dev_walk, &data);
  825. rcu_read_unlock();
  826. }
  827. static int alb_set_slave_mac_addr(struct slave *slave, u8 addr[],
  828. unsigned int len)
  829. {
  830. struct net_device *dev = slave->dev;
  831. struct sockaddr_storage ss;
  832. if (BOND_MODE(slave->bond) == BOND_MODE_TLB) {
  833. memcpy(dev->dev_addr, addr, len);
  834. return 0;
  835. }
  836. /* for rlb each slave must have a unique hw mac addresses so that
  837. * each slave will receive packets destined to a different mac
  838. */
  839. memcpy(ss.__data, addr, len);
  840. ss.ss_family = dev->type;
  841. if (dev_set_mac_address(dev, (struct sockaddr *)&ss, NULL)) {
  842. slave_err(slave->bond->dev, dev, "dev_set_mac_address on slave failed! ALB mode requires that the base driver support setting the hw address also when the network device's interface is open\n");
  843. return -EOPNOTSUPP;
  844. }
  845. return 0;
  846. }
  847. /* Swap MAC addresses between two slaves.
  848. *
  849. * Called with RTNL held, and no other locks.
  850. */
  851. static void alb_swap_mac_addr(struct slave *slave1, struct slave *slave2)
  852. {
  853. u8 tmp_mac_addr[MAX_ADDR_LEN];
  854. bond_hw_addr_copy(tmp_mac_addr, slave1->dev->dev_addr,
  855. slave1->dev->addr_len);
  856. alb_set_slave_mac_addr(slave1, slave2->dev->dev_addr,
  857. slave2->dev->addr_len);
  858. alb_set_slave_mac_addr(slave2, tmp_mac_addr,
  859. slave1->dev->addr_len);
  860. }
  861. /* Send learning packets after MAC address swap.
  862. *
  863. * Called with RTNL and no other locks
  864. */
  865. static void alb_fasten_mac_swap(struct bonding *bond, struct slave *slave1,
  866. struct slave *slave2)
  867. {
  868. int slaves_state_differ = (bond_slave_can_tx(slave1) != bond_slave_can_tx(slave2));
  869. struct slave *disabled_slave = NULL;
  870. ASSERT_RTNL();
  871. /* fasten the change in the switch */
  872. if (bond_slave_can_tx(slave1)) {
  873. alb_send_learning_packets(slave1, slave1->dev->dev_addr, false);
  874. if (bond->alb_info.rlb_enabled) {
  875. /* inform the clients that the mac address
  876. * has changed
  877. */
  878. rlb_req_update_slave_clients(bond, slave1);
  879. }
  880. } else {
  881. disabled_slave = slave1;
  882. }
  883. if (bond_slave_can_tx(slave2)) {
  884. alb_send_learning_packets(slave2, slave2->dev->dev_addr, false);
  885. if (bond->alb_info.rlb_enabled) {
  886. /* inform the clients that the mac address
  887. * has changed
  888. */
  889. rlb_req_update_slave_clients(bond, slave2);
  890. }
  891. } else {
  892. disabled_slave = slave2;
  893. }
  894. if (bond->alb_info.rlb_enabled && slaves_state_differ) {
  895. /* A disabled slave was assigned an active mac addr */
  896. rlb_teach_disabled_mac_on_primary(bond,
  897. disabled_slave->dev->dev_addr);
  898. }
  899. }
  900. /**
  901. * alb_change_hw_addr_on_detach
  902. * @bond: bonding we're working on
  903. * @slave: the slave that was just detached
  904. *
  905. * We assume that @slave was already detached from the slave list.
  906. *
  907. * If @slave's permanent hw address is different both from its current
  908. * address and from @bond's address, then somewhere in the bond there's
  909. * a slave that has @slave's permanet address as its current address.
  910. * We'll make sure that that slave no longer uses @slave's permanent address.
  911. *
  912. * Caller must hold RTNL and no other locks
  913. */
  914. static void alb_change_hw_addr_on_detach(struct bonding *bond, struct slave *slave)
  915. {
  916. int perm_curr_diff;
  917. int perm_bond_diff;
  918. struct slave *found_slave;
  919. perm_curr_diff = !ether_addr_equal_64bits(slave->perm_hwaddr,
  920. slave->dev->dev_addr);
  921. perm_bond_diff = !ether_addr_equal_64bits(slave->perm_hwaddr,
  922. bond->dev->dev_addr);
  923. if (perm_curr_diff && perm_bond_diff) {
  924. found_slave = bond_slave_has_mac(bond, slave->perm_hwaddr);
  925. if (found_slave) {
  926. alb_swap_mac_addr(slave, found_slave);
  927. alb_fasten_mac_swap(bond, slave, found_slave);
  928. }
  929. }
  930. }
  931. /**
  932. * alb_handle_addr_collision_on_attach
  933. * @bond: bonding we're working on
  934. * @slave: the slave that was just attached
  935. *
  936. * checks uniqueness of slave's mac address and handles the case the
  937. * new slave uses the bonds mac address.
  938. *
  939. * If the permanent hw address of @slave is @bond's hw address, we need to
  940. * find a different hw address to give @slave, that isn't in use by any other
  941. * slave in the bond. This address must be, of course, one of the permanent
  942. * addresses of the other slaves.
  943. *
  944. * We go over the slave list, and for each slave there we compare its
  945. * permanent hw address with the current address of all the other slaves.
  946. * If no match was found, then we've found a slave with a permanent address
  947. * that isn't used by any other slave in the bond, so we can assign it to
  948. * @slave.
  949. *
  950. * assumption: this function is called before @slave is attached to the
  951. * bond slave list.
  952. */
  953. static int alb_handle_addr_collision_on_attach(struct bonding *bond, struct slave *slave)
  954. {
  955. struct slave *has_bond_addr = rcu_access_pointer(bond->curr_active_slave);
  956. struct slave *tmp_slave1, *free_mac_slave = NULL;
  957. struct list_head *iter;
  958. if (!bond_has_slaves(bond)) {
  959. /* this is the first slave */
  960. return 0;
  961. }
  962. /* if slave's mac address differs from bond's mac address
  963. * check uniqueness of slave's mac address against the other
  964. * slaves in the bond.
  965. */
  966. if (!ether_addr_equal_64bits(slave->perm_hwaddr, bond->dev->dev_addr)) {
  967. if (!bond_slave_has_mac(bond, slave->dev->dev_addr))
  968. return 0;
  969. /* Try setting slave mac to bond address and fall-through
  970. * to code handling that situation below...
  971. */
  972. alb_set_slave_mac_addr(slave, bond->dev->dev_addr,
  973. bond->dev->addr_len);
  974. }
  975. /* The slave's address is equal to the address of the bond.
  976. * Search for a spare address in the bond for this slave.
  977. */
  978. bond_for_each_slave(bond, tmp_slave1, iter) {
  979. if (!bond_slave_has_mac(bond, tmp_slave1->perm_hwaddr)) {
  980. /* no slave has tmp_slave1's perm addr
  981. * as its curr addr
  982. */
  983. free_mac_slave = tmp_slave1;
  984. break;
  985. }
  986. if (!has_bond_addr) {
  987. if (ether_addr_equal_64bits(tmp_slave1->dev->dev_addr,
  988. bond->dev->dev_addr)) {
  989. has_bond_addr = tmp_slave1;
  990. }
  991. }
  992. }
  993. if (free_mac_slave) {
  994. alb_set_slave_mac_addr(slave, free_mac_slave->perm_hwaddr,
  995. free_mac_slave->dev->addr_len);
  996. slave_warn(bond->dev, slave->dev, "the slave hw address is in use by the bond; giving it the hw address of %s\n",
  997. free_mac_slave->dev->name);
  998. } else if (has_bond_addr) {
  999. slave_err(bond->dev, slave->dev, "the slave hw address is in use by the bond; couldn't find a slave with a free hw address to give it (this should not have happened)\n");
  1000. return -EFAULT;
  1001. }
  1002. return 0;
  1003. }
  1004. /**
  1005. * alb_set_mac_address
  1006. * @bond:
  1007. * @addr:
  1008. *
  1009. * In TLB mode all slaves are configured to the bond's hw address, but set
  1010. * their dev_addr field to different addresses (based on their permanent hw
  1011. * addresses).
  1012. *
  1013. * For each slave, this function sets the interface to the new address and then
  1014. * changes its dev_addr field to its previous value.
  1015. *
  1016. * Unwinding assumes bond's mac address has not yet changed.
  1017. */
  1018. static int alb_set_mac_address(struct bonding *bond, void *addr)
  1019. {
  1020. struct slave *slave, *rollback_slave;
  1021. struct list_head *iter;
  1022. struct sockaddr_storage ss;
  1023. char tmp_addr[MAX_ADDR_LEN];
  1024. int res;
  1025. if (bond->alb_info.rlb_enabled)
  1026. return 0;
  1027. bond_for_each_slave(bond, slave, iter) {
  1028. /* save net_device's current hw address */
  1029. bond_hw_addr_copy(tmp_addr, slave->dev->dev_addr,
  1030. slave->dev->addr_len);
  1031. res = dev_set_mac_address(slave->dev, addr, NULL);
  1032. /* restore net_device's hw address */
  1033. bond_hw_addr_copy(slave->dev->dev_addr, tmp_addr,
  1034. slave->dev->addr_len);
  1035. if (res)
  1036. goto unwind;
  1037. }
  1038. return 0;
  1039. unwind:
  1040. memcpy(ss.__data, bond->dev->dev_addr, bond->dev->addr_len);
  1041. ss.ss_family = bond->dev->type;
  1042. /* unwind from head to the slave that failed */
  1043. bond_for_each_slave(bond, rollback_slave, iter) {
  1044. if (rollback_slave == slave)
  1045. break;
  1046. bond_hw_addr_copy(tmp_addr, rollback_slave->dev->dev_addr,
  1047. rollback_slave->dev->addr_len);
  1048. dev_set_mac_address(rollback_slave->dev,
  1049. (struct sockaddr *)&ss, NULL);
  1050. bond_hw_addr_copy(rollback_slave->dev->dev_addr, tmp_addr,
  1051. rollback_slave->dev->addr_len);
  1052. }
  1053. return res;
  1054. }
  1055. /************************ exported alb funcions ************************/
  1056. int bond_alb_initialize(struct bonding *bond, int rlb_enabled)
  1057. {
  1058. int res;
  1059. res = tlb_initialize(bond);
  1060. if (res)
  1061. return res;
  1062. if (rlb_enabled) {
  1063. bond->alb_info.rlb_enabled = 1;
  1064. res = rlb_initialize(bond);
  1065. if (res) {
  1066. tlb_deinitialize(bond);
  1067. return res;
  1068. }
  1069. } else {
  1070. bond->alb_info.rlb_enabled = 0;
  1071. }
  1072. return 0;
  1073. }
  1074. void bond_alb_deinitialize(struct bonding *bond)
  1075. {
  1076. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  1077. tlb_deinitialize(bond);
  1078. if (bond_info->rlb_enabled)
  1079. rlb_deinitialize(bond);
  1080. }
  1081. static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
  1082. struct slave *tx_slave)
  1083. {
  1084. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  1085. struct ethhdr *eth_data = eth_hdr(skb);
  1086. if (!tx_slave) {
  1087. /* unbalanced or unassigned, send through primary */
  1088. tx_slave = rcu_dereference(bond->curr_active_slave);
  1089. if (bond->params.tlb_dynamic_lb)
  1090. bond_info->unbalanced_load += skb->len;
  1091. }
  1092. if (tx_slave && bond_slave_can_tx(tx_slave)) {
  1093. if (tx_slave != rcu_access_pointer(bond->curr_active_slave)) {
  1094. ether_addr_copy(eth_data->h_source,
  1095. tx_slave->dev->dev_addr);
  1096. }
  1097. bond_dev_queue_xmit(bond, skb, tx_slave->dev);
  1098. goto out;
  1099. }
  1100. if (tx_slave && bond->params.tlb_dynamic_lb) {
  1101. spin_lock(&bond->mode_lock);
  1102. __tlb_clear_slave(bond, tx_slave, 0);
  1103. spin_unlock(&bond->mode_lock);
  1104. }
  1105. /* no suitable interface, frame not sent */
  1106. bond_tx_drop(bond->dev, skb);
  1107. out:
  1108. return NETDEV_TX_OK;
  1109. }
  1110. netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
  1111. {
  1112. struct bonding *bond = netdev_priv(bond_dev);
  1113. struct ethhdr *eth_data;
  1114. struct slave *tx_slave = NULL;
  1115. u32 hash_index;
  1116. skb_reset_mac_header(skb);
  1117. eth_data = eth_hdr(skb);
  1118. /* Do not TX balance any multicast or broadcast */
  1119. if (!is_multicast_ether_addr(eth_data->h_dest)) {
  1120. switch (skb->protocol) {
  1121. case htons(ETH_P_IP):
  1122. case htons(ETH_P_IPX):
  1123. /* In case of IPX, it will falback to L2 hash */
  1124. case htons(ETH_P_IPV6):
  1125. hash_index = bond_xmit_hash(bond, skb);
  1126. if (bond->params.tlb_dynamic_lb) {
  1127. tx_slave = tlb_choose_channel(bond,
  1128. hash_index & 0xFF,
  1129. skb->len);
  1130. } else {
  1131. struct bond_up_slave *slaves;
  1132. unsigned int count;
  1133. slaves = rcu_dereference(bond->slave_arr);
  1134. count = slaves ? READ_ONCE(slaves->count) : 0;
  1135. if (likely(count))
  1136. tx_slave = slaves->arr[hash_index %
  1137. count];
  1138. }
  1139. break;
  1140. }
  1141. }
  1142. return bond_do_alb_xmit(skb, bond, tx_slave);
  1143. }
  1144. netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
  1145. {
  1146. struct bonding *bond = netdev_priv(bond_dev);
  1147. struct ethhdr *eth_data;
  1148. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  1149. struct slave *tx_slave = NULL;
  1150. static const __be32 ip_bcast = htonl(0xffffffff);
  1151. int hash_size = 0;
  1152. bool do_tx_balance = true;
  1153. u32 hash_index = 0;
  1154. const u8 *hash_start = NULL;
  1155. skb_reset_mac_header(skb);
  1156. eth_data = eth_hdr(skb);
  1157. switch (ntohs(skb->protocol)) {
  1158. case ETH_P_IP: {
  1159. const struct iphdr *iph;
  1160. if (is_broadcast_ether_addr(eth_data->h_dest) ||
  1161. !pskb_network_may_pull(skb, sizeof(*iph))) {
  1162. do_tx_balance = false;
  1163. break;
  1164. }
  1165. iph = ip_hdr(skb);
  1166. if (iph->daddr == ip_bcast || iph->protocol == IPPROTO_IGMP) {
  1167. do_tx_balance = false;
  1168. break;
  1169. }
  1170. hash_start = (char *)&(iph->daddr);
  1171. hash_size = sizeof(iph->daddr);
  1172. break;
  1173. }
  1174. case ETH_P_IPV6: {
  1175. const struct ipv6hdr *ip6hdr;
  1176. /* IPv6 doesn't really use broadcast mac address, but leave
  1177. * that here just in case.
  1178. */
  1179. if (is_broadcast_ether_addr(eth_data->h_dest)) {
  1180. do_tx_balance = false;
  1181. break;
  1182. }
  1183. /* IPv6 uses all-nodes multicast as an equivalent to
  1184. * broadcasts in IPv4.
  1185. */
  1186. if (ether_addr_equal_64bits(eth_data->h_dest, mac_v6_allmcast)) {
  1187. do_tx_balance = false;
  1188. break;
  1189. }
  1190. if (!pskb_network_may_pull(skb, sizeof(*ip6hdr))) {
  1191. do_tx_balance = false;
  1192. break;
  1193. }
  1194. /* Additionally, DAD probes should not be tx-balanced as that
  1195. * will lead to false positives for duplicate addresses and
  1196. * prevent address configuration from working.
  1197. */
  1198. ip6hdr = ipv6_hdr(skb);
  1199. if (ipv6_addr_any(&ip6hdr->saddr)) {
  1200. do_tx_balance = false;
  1201. break;
  1202. }
  1203. hash_start = (char *)&ip6hdr->daddr;
  1204. hash_size = sizeof(ip6hdr->daddr);
  1205. break;
  1206. }
  1207. case ETH_P_IPX: {
  1208. const struct ipxhdr *ipxhdr;
  1209. if (pskb_network_may_pull(skb, sizeof(*ipxhdr))) {
  1210. do_tx_balance = false;
  1211. break;
  1212. }
  1213. ipxhdr = (struct ipxhdr *)skb_network_header(skb);
  1214. if (ipxhdr->ipx_checksum != IPX_NO_CHECKSUM) {
  1215. /* something is wrong with this packet */
  1216. do_tx_balance = false;
  1217. break;
  1218. }
  1219. if (ipxhdr->ipx_type != IPX_TYPE_NCP) {
  1220. /* The only protocol worth balancing in
  1221. * this family since it has an "ARP" like
  1222. * mechanism
  1223. */
  1224. do_tx_balance = false;
  1225. break;
  1226. }
  1227. eth_data = eth_hdr(skb);
  1228. hash_start = (char *)eth_data->h_dest;
  1229. hash_size = ETH_ALEN;
  1230. break;
  1231. }
  1232. case ETH_P_ARP:
  1233. do_tx_balance = false;
  1234. if (bond_info->rlb_enabled)
  1235. tx_slave = rlb_arp_xmit(skb, bond);
  1236. break;
  1237. default:
  1238. do_tx_balance = false;
  1239. break;
  1240. }
  1241. if (do_tx_balance) {
  1242. if (bond->params.tlb_dynamic_lb) {
  1243. hash_index = _simple_hash(hash_start, hash_size);
  1244. tx_slave = tlb_choose_channel(bond, hash_index, skb->len);
  1245. } else {
  1246. /*
  1247. * do_tx_balance means we are free to select the tx_slave
  1248. * So we do exactly what tlb would do for hash selection
  1249. */
  1250. struct bond_up_slave *slaves;
  1251. unsigned int count;
  1252. slaves = rcu_dereference(bond->slave_arr);
  1253. count = slaves ? READ_ONCE(slaves->count) : 0;
  1254. if (likely(count))
  1255. tx_slave = slaves->arr[bond_xmit_hash(bond, skb) %
  1256. count];
  1257. }
  1258. }
  1259. return bond_do_alb_xmit(skb, bond, tx_slave);
  1260. }
  1261. void bond_alb_monitor(struct work_struct *work)
  1262. {
  1263. struct bonding *bond = container_of(work, struct bonding,
  1264. alb_work.work);
  1265. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  1266. struct list_head *iter;
  1267. struct slave *slave;
  1268. if (!bond_has_slaves(bond)) {
  1269. bond_info->tx_rebalance_counter = 0;
  1270. bond_info->lp_counter = 0;
  1271. goto re_arm;
  1272. }
  1273. rcu_read_lock();
  1274. bond_info->tx_rebalance_counter++;
  1275. bond_info->lp_counter++;
  1276. /* send learning packets */
  1277. if (bond_info->lp_counter >= BOND_ALB_LP_TICKS(bond)) {
  1278. bool strict_match;
  1279. bond_for_each_slave_rcu(bond, slave, iter) {
  1280. /* If updating current_active, use all currently
  1281. * user mac addreses (!strict_match). Otherwise, only
  1282. * use mac of the slave device.
  1283. * In RLB mode, we always use strict matches.
  1284. */
  1285. strict_match = (slave != rcu_access_pointer(bond->curr_active_slave) ||
  1286. bond_info->rlb_enabled);
  1287. alb_send_learning_packets(slave, slave->dev->dev_addr,
  1288. strict_match);
  1289. }
  1290. bond_info->lp_counter = 0;
  1291. }
  1292. /* rebalance tx traffic */
  1293. if (bond_info->tx_rebalance_counter >= BOND_TLB_REBALANCE_TICKS) {
  1294. bond_for_each_slave_rcu(bond, slave, iter) {
  1295. tlb_clear_slave(bond, slave, 1);
  1296. if (slave == rcu_access_pointer(bond->curr_active_slave)) {
  1297. SLAVE_TLB_INFO(slave).load =
  1298. bond_info->unbalanced_load /
  1299. BOND_TLB_REBALANCE_INTERVAL;
  1300. bond_info->unbalanced_load = 0;
  1301. }
  1302. }
  1303. bond_info->tx_rebalance_counter = 0;
  1304. }
  1305. if (bond_info->rlb_enabled) {
  1306. if (bond_info->primary_is_promisc &&
  1307. (++bond_info->rlb_promisc_timeout_counter >= RLB_PROMISC_TIMEOUT)) {
  1308. /* dev_set_promiscuity requires rtnl and
  1309. * nothing else. Avoid race with bond_close.
  1310. */
  1311. rcu_read_unlock();
  1312. if (!rtnl_trylock())
  1313. goto re_arm;
  1314. bond_info->rlb_promisc_timeout_counter = 0;
  1315. /* If the primary was set to promiscuous mode
  1316. * because a slave was disabled then
  1317. * it can now leave promiscuous mode.
  1318. */
  1319. dev_set_promiscuity(rtnl_dereference(bond->curr_active_slave)->dev,
  1320. -1);
  1321. bond_info->primary_is_promisc = 0;
  1322. rtnl_unlock();
  1323. rcu_read_lock();
  1324. }
  1325. if (bond_info->rlb_rebalance) {
  1326. bond_info->rlb_rebalance = 0;
  1327. rlb_rebalance(bond);
  1328. }
  1329. /* check if clients need updating */
  1330. if (bond_info->rx_ntt) {
  1331. if (bond_info->rlb_update_delay_counter) {
  1332. --bond_info->rlb_update_delay_counter;
  1333. } else {
  1334. rlb_update_rx_clients(bond);
  1335. if (bond_info->rlb_update_retry_counter)
  1336. --bond_info->rlb_update_retry_counter;
  1337. else
  1338. bond_info->rx_ntt = 0;
  1339. }
  1340. }
  1341. }
  1342. rcu_read_unlock();
  1343. re_arm:
  1344. queue_delayed_work(bond->wq, &bond->alb_work, alb_delta_in_ticks);
  1345. }
  1346. /* assumption: called before the slave is attached to the bond
  1347. * and not locked by the bond lock
  1348. */
  1349. int bond_alb_init_slave(struct bonding *bond, struct slave *slave)
  1350. {
  1351. int res;
  1352. res = alb_set_slave_mac_addr(slave, slave->perm_hwaddr,
  1353. slave->dev->addr_len);
  1354. if (res)
  1355. return res;
  1356. res = alb_handle_addr_collision_on_attach(bond, slave);
  1357. if (res)
  1358. return res;
  1359. tlb_init_slave(slave);
  1360. /* order a rebalance ASAP */
  1361. bond->alb_info.tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
  1362. if (bond->alb_info.rlb_enabled)
  1363. bond->alb_info.rlb_rebalance = 1;
  1364. return 0;
  1365. }
  1366. /* Remove slave from tlb and rlb hash tables, and fix up MAC addresses
  1367. * if necessary.
  1368. *
  1369. * Caller must hold RTNL and no other locks
  1370. */
  1371. void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave)
  1372. {
  1373. if (bond_has_slaves(bond))
  1374. alb_change_hw_addr_on_detach(bond, slave);
  1375. tlb_clear_slave(bond, slave, 0);
  1376. if (bond->alb_info.rlb_enabled) {
  1377. bond->alb_info.rx_slave = NULL;
  1378. rlb_clear_slave(bond, slave);
  1379. }
  1380. }
  1381. void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link)
  1382. {
  1383. struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
  1384. if (link == BOND_LINK_DOWN) {
  1385. tlb_clear_slave(bond, slave, 0);
  1386. if (bond->alb_info.rlb_enabled)
  1387. rlb_clear_slave(bond, slave);
  1388. } else if (link == BOND_LINK_UP) {
  1389. /* order a rebalance ASAP */
  1390. bond_info->tx_rebalance_counter = BOND_TLB_REBALANCE_TICKS;
  1391. if (bond->alb_info.rlb_enabled) {
  1392. bond->alb_info.rlb_rebalance = 1;
  1393. /* If the updelay module parameter is smaller than the
  1394. * forwarding delay of the switch the rebalance will
  1395. * not work because the rebalance arp replies will
  1396. * not be forwarded to the clients..
  1397. */
  1398. }
  1399. }
  1400. if (bond_is_nondyn_tlb(bond)) {
  1401. if (bond_update_slave_arr(bond, NULL))
  1402. pr_err("Failed to build slave-array for TLB mode.\n");
  1403. }
  1404. }
  1405. /**
  1406. * bond_alb_handle_active_change - assign new curr_active_slave
  1407. * @bond: our bonding struct
  1408. * @new_slave: new slave to assign
  1409. *
  1410. * Set the bond->curr_active_slave to @new_slave and handle
  1411. * mac address swapping and promiscuity changes as needed.
  1412. *
  1413. * Caller must hold RTNL
  1414. */
  1415. void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave)
  1416. {
  1417. struct slave *swap_slave;
  1418. struct slave *curr_active;
  1419. curr_active = rtnl_dereference(bond->curr_active_slave);
  1420. if (curr_active == new_slave)
  1421. return;
  1422. if (curr_active && bond->alb_info.primary_is_promisc) {
  1423. dev_set_promiscuity(curr_active->dev, -1);
  1424. bond->alb_info.primary_is_promisc = 0;
  1425. bond->alb_info.rlb_promisc_timeout_counter = 0;
  1426. }
  1427. swap_slave = curr_active;
  1428. rcu_assign_pointer(bond->curr_active_slave, new_slave);
  1429. if (!new_slave || !bond_has_slaves(bond))
  1430. return;
  1431. /* set the new curr_active_slave to the bonds mac address
  1432. * i.e. swap mac addresses of old curr_active_slave and new curr_active_slave
  1433. */
  1434. if (!swap_slave)
  1435. swap_slave = bond_slave_has_mac(bond, bond->dev->dev_addr);
  1436. /* Arrange for swap_slave and new_slave to temporarily be
  1437. * ignored so we can mess with their MAC addresses without
  1438. * fear of interference from transmit activity.
  1439. */
  1440. if (swap_slave)
  1441. tlb_clear_slave(bond, swap_slave, 1);
  1442. tlb_clear_slave(bond, new_slave, 1);
  1443. /* in TLB mode, the slave might flip down/up with the old dev_addr,
  1444. * and thus filter bond->dev_addr's packets, so force bond's mac
  1445. */
  1446. if (BOND_MODE(bond) == BOND_MODE_TLB) {
  1447. struct sockaddr_storage ss;
  1448. u8 tmp_addr[MAX_ADDR_LEN];
  1449. bond_hw_addr_copy(tmp_addr, new_slave->dev->dev_addr,
  1450. new_slave->dev->addr_len);
  1451. bond_hw_addr_copy(ss.__data, bond->dev->dev_addr,
  1452. bond->dev->addr_len);
  1453. ss.ss_family = bond->dev->type;
  1454. /* we don't care if it can't change its mac, best effort */
  1455. dev_set_mac_address(new_slave->dev, (struct sockaddr *)&ss,
  1456. NULL);
  1457. bond_hw_addr_copy(new_slave->dev->dev_addr, tmp_addr,
  1458. new_slave->dev->addr_len);
  1459. }
  1460. /* curr_active_slave must be set before calling alb_swap_mac_addr */
  1461. if (swap_slave) {
  1462. /* swap mac address */
  1463. alb_swap_mac_addr(swap_slave, new_slave);
  1464. alb_fasten_mac_swap(bond, swap_slave, new_slave);
  1465. } else {
  1466. /* set the new_slave to the bond mac address */
  1467. alb_set_slave_mac_addr(new_slave, bond->dev->dev_addr,
  1468. bond->dev->addr_len);
  1469. alb_send_learning_packets(new_slave, bond->dev->dev_addr,
  1470. false);
  1471. }
  1472. }
  1473. /* Called with RTNL */
  1474. int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr)
  1475. {
  1476. struct bonding *bond = netdev_priv(bond_dev);
  1477. struct sockaddr_storage *ss = addr;
  1478. struct slave *curr_active;
  1479. struct slave *swap_slave;
  1480. int res;
  1481. if (!is_valid_ether_addr(ss->__data))
  1482. return -EADDRNOTAVAIL;
  1483. res = alb_set_mac_address(bond, addr);
  1484. if (res)
  1485. return res;
  1486. bond_hw_addr_copy(bond_dev->dev_addr, ss->__data, bond_dev->addr_len);
  1487. /* If there is no curr_active_slave there is nothing else to do.
  1488. * Otherwise we'll need to pass the new address to it and handle
  1489. * duplications.
  1490. */
  1491. curr_active = rtnl_dereference(bond->curr_active_slave);
  1492. if (!curr_active)
  1493. return 0;
  1494. swap_slave = bond_slave_has_mac(bond, bond_dev->dev_addr);
  1495. if (swap_slave) {
  1496. alb_swap_mac_addr(swap_slave, curr_active);
  1497. alb_fasten_mac_swap(bond, swap_slave, curr_active);
  1498. } else {
  1499. alb_set_slave_mac_addr(curr_active, bond_dev->dev_addr,
  1500. bond_dev->addr_len);
  1501. alb_send_learning_packets(curr_active,
  1502. bond_dev->dev_addr, false);
  1503. if (bond->alb_info.rlb_enabled) {
  1504. /* inform clients mac address has changed */
  1505. rlb_req_update_slave_clients(bond, curr_active);
  1506. }
  1507. }
  1508. return 0;
  1509. }
  1510. void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id)
  1511. {
  1512. if (bond->alb_info.rlb_enabled)
  1513. rlb_clear_vlan(bond, vlan_id);
  1514. }