/drivers/net/ethernet/intel/i40e/i40e_txrx.c
C | 3746 lines | 2194 code | 547 blank | 1005 comment | 376 complexity | 448e36811ad40f136a09d522a9226bbd MD5 | raw file
Possible License(s): LGPL-2.0, AGPL-1.0, GPL-2.0
Large files files are truncated, but you can click here to view the full file
- // SPDX-License-Identifier: GPL-2.0
- /* Copyright(c) 2013 - 2018 Intel Corporation. */
- #include <linux/prefetch.h>
- #include <linux/bpf_trace.h>
- #include <net/xdp.h>
- #include "i40e.h"
- #include "i40e_trace.h"
- #include "i40e_prototype.h"
- #include "i40e_txrx_common.h"
- #include "i40e_xsk.h"
- #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
- /**
- * i40e_fdir - Generate a Flow Director descriptor based on fdata
- * @tx_ring: Tx ring to send buffer on
- * @fdata: Flow director filter data
- * @add: Indicate if we are adding a rule or deleting one
- *
- **/
- static void i40e_fdir(struct i40e_ring *tx_ring,
- struct i40e_fdir_filter *fdata, bool add)
- {
- struct i40e_filter_program_desc *fdir_desc;
- struct i40e_pf *pf = tx_ring->vsi->back;
- u32 flex_ptype, dtype_cmd;
- u16 i;
- /* grab the next descriptor */
- i = tx_ring->next_to_use;
- fdir_desc = I40E_TX_FDIRDESC(tx_ring, i);
- i++;
- tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
- flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK &
- (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT);
- flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK &
- (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
- flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
- (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
- flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK &
- (fdata->flex_offset << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT);
- /* Use LAN VSI Id if not programmed by user */
- flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK &
- ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) <<
- I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
- dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
- dtype_cmd |= add ?
- I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
- I40E_TXD_FLTR_QW1_PCMD_SHIFT :
- I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
- I40E_TXD_FLTR_QW1_PCMD_SHIFT;
- dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK &
- (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT);
- dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK &
- (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT);
- if (fdata->cnt_index) {
- dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK;
- dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK &
- ((u32)fdata->cnt_index <<
- I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT);
- }
- fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
- fdir_desc->rsvd = cpu_to_le32(0);
- fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
- fdir_desc->fd_id = cpu_to_le32(fdata->fd_id);
- }
- #define I40E_FD_CLEAN_DELAY 10
- /**
- * i40e_program_fdir_filter - Program a Flow Director filter
- * @fdir_data: Packet data that will be filter parameters
- * @raw_packet: the pre-allocated packet buffer for FDir
- * @pf: The PF pointer
- * @add: True for add/update, False for remove
- **/
- static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
- u8 *raw_packet, struct i40e_pf *pf,
- bool add)
- {
- struct i40e_tx_buffer *tx_buf, *first;
- struct i40e_tx_desc *tx_desc;
- struct i40e_ring *tx_ring;
- struct i40e_vsi *vsi;
- struct device *dev;
- dma_addr_t dma;
- u32 td_cmd = 0;
- u16 i;
- /* find existing FDIR VSI */
- vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
- if (!vsi)
- return -ENOENT;
- tx_ring = vsi->tx_rings[0];
- dev = tx_ring->dev;
- /* we need two descriptors to add/del a filter and we can wait */
- for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) {
- if (!i)
- return -EAGAIN;
- msleep_interruptible(1);
- }
- dma = dma_map_single(dev, raw_packet,
- I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
- if (dma_mapping_error(dev, dma))
- goto dma_fail;
- /* grab the next descriptor */
- i = tx_ring->next_to_use;
- first = &tx_ring->tx_bi[i];
- i40e_fdir(tx_ring, fdir_data, add);
- /* Now program a dummy descriptor */
- i = tx_ring->next_to_use;
- tx_desc = I40E_TX_DESC(tx_ring, i);
- tx_buf = &tx_ring->tx_bi[i];
- tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0;
- memset(tx_buf, 0, sizeof(struct i40e_tx_buffer));
- /* record length, and DMA address */
- dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
- dma_unmap_addr_set(tx_buf, dma, dma);
- tx_desc->buffer_addr = cpu_to_le64(dma);
- td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
- tx_buf->tx_flags = I40E_TX_FLAGS_FD_SB;
- tx_buf->raw_buf = (void *)raw_packet;
- tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch.
- */
- wmb();
- /* Mark the data descriptor to be watched */
- first->next_to_watch = tx_desc;
- writel(tx_ring->next_to_use, tx_ring->tail);
- return 0;
- dma_fail:
- return -1;
- }
- #define IP_HEADER_OFFSET 14
- #define I40E_UDPIP_DUMMY_PACKET_LEN 42
- /**
- * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
- static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
- struct i40e_fdir_filter *fd_data,
- bool add)
- {
- struct i40e_pf *pf = vsi->back;
- struct udphdr *udp;
- struct iphdr *ip;
- u8 *raw_packet;
- int ret;
- static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
- 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
- raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
- if (!raw_packet)
- return -ENOMEM;
- memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
- ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
- udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
- + sizeof(struct iphdr));
- ip->daddr = fd_data->dst_ip;
- udp->dest = fd_data->dst_port;
- ip->saddr = fd_data->src_ip;
- udp->source = fd_data->src_port;
- if (fd_data->flex_filter) {
- u8 *payload = raw_packet + I40E_UDPIP_DUMMY_PACKET_LEN;
- __be16 pattern = fd_data->flex_word;
- u16 off = fd_data->flex_offset;
- *((__force __be16 *)(payload + off)) = pattern;
- }
- fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP;
- ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
- fd_data->pctype, fd_data->fd_id, ret);
- /* Free the packet buffer since it wasn't added to the ring */
- kfree(raw_packet);
- return -EOPNOTSUPP;
- } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
- if (add)
- dev_info(&pf->pdev->dev,
- "Filter OK for PCTYPE %d loc = %d\n",
- fd_data->pctype, fd_data->fd_id);
- else
- dev_info(&pf->pdev->dev,
- "Filter deleted for PCTYPE %d loc = %d\n",
- fd_data->pctype, fd_data->fd_id);
- }
- if (add)
- pf->fd_udp4_filter_cnt++;
- else
- pf->fd_udp4_filter_cnt--;
- return 0;
- }
- #define I40E_TCPIP_DUMMY_PACKET_LEN 54
- /**
- * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
- static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
- struct i40e_fdir_filter *fd_data,
- bool add)
- {
- struct i40e_pf *pf = vsi->back;
- struct tcphdr *tcp;
- struct iphdr *ip;
- u8 *raw_packet;
- int ret;
- /* Dummy packet */
- static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
- 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
- 0x0, 0x72, 0, 0, 0, 0};
- raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
- if (!raw_packet)
- return -ENOMEM;
- memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
- ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
- tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
- + sizeof(struct iphdr));
- ip->daddr = fd_data->dst_ip;
- tcp->dest = fd_data->dst_port;
- ip->saddr = fd_data->src_ip;
- tcp->source = fd_data->src_port;
- if (fd_data->flex_filter) {
- u8 *payload = raw_packet + I40E_TCPIP_DUMMY_PACKET_LEN;
- __be16 pattern = fd_data->flex_word;
- u16 off = fd_data->flex_offset;
- *((__force __be16 *)(payload + off)) = pattern;
- }
- fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
- ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
- fd_data->pctype, fd_data->fd_id, ret);
- /* Free the packet buffer since it wasn't added to the ring */
- kfree(raw_packet);
- return -EOPNOTSUPP;
- } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
- if (add)
- dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d loc = %d)\n",
- fd_data->pctype, fd_data->fd_id);
- else
- dev_info(&pf->pdev->dev,
- "Filter deleted for PCTYPE %d loc = %d\n",
- fd_data->pctype, fd_data->fd_id);
- }
- if (add) {
- pf->fd_tcp4_filter_cnt++;
- if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) &&
- I40E_DEBUG_FD & pf->hw.debug_mask)
- dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
- set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
- } else {
- pf->fd_tcp4_filter_cnt--;
- }
- return 0;
- }
- #define I40E_SCTPIP_DUMMY_PACKET_LEN 46
- /**
- * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
- static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
- struct i40e_fdir_filter *fd_data,
- bool add)
- {
- struct i40e_pf *pf = vsi->back;
- struct sctphdr *sctp;
- struct iphdr *ip;
- u8 *raw_packet;
- int ret;
- /* Dummy packet */
- static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
- 0x45, 0, 0, 0x20, 0, 0, 0x40, 0, 0x40, 0x84, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
- raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
- if (!raw_packet)
- return -ENOMEM;
- memcpy(raw_packet, packet, I40E_SCTPIP_DUMMY_PACKET_LEN);
- ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
- sctp = (struct sctphdr *)(raw_packet + IP_HEADER_OFFSET
- + sizeof(struct iphdr));
- ip->daddr = fd_data->dst_ip;
- sctp->dest = fd_data->dst_port;
- ip->saddr = fd_data->src_ip;
- sctp->source = fd_data->src_port;
- if (fd_data->flex_filter) {
- u8 *payload = raw_packet + I40E_SCTPIP_DUMMY_PACKET_LEN;
- __be16 pattern = fd_data->flex_word;
- u16 off = fd_data->flex_offset;
- *((__force __be16 *)(payload + off)) = pattern;
- }
- fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_SCTP;
- ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
- fd_data->pctype, fd_data->fd_id, ret);
- /* Free the packet buffer since it wasn't added to the ring */
- kfree(raw_packet);
- return -EOPNOTSUPP;
- } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
- if (add)
- dev_info(&pf->pdev->dev,
- "Filter OK for PCTYPE %d loc = %d\n",
- fd_data->pctype, fd_data->fd_id);
- else
- dev_info(&pf->pdev->dev,
- "Filter deleted for PCTYPE %d loc = %d\n",
- fd_data->pctype, fd_data->fd_id);
- }
- if (add)
- pf->fd_sctp4_filter_cnt++;
- else
- pf->fd_sctp4_filter_cnt--;
- return 0;
- }
- #define I40E_IP_DUMMY_PACKET_LEN 34
- /**
- * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
- static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
- struct i40e_fdir_filter *fd_data,
- bool add)
- {
- struct i40e_pf *pf = vsi->back;
- struct iphdr *ip;
- u8 *raw_packet;
- int ret;
- int i;
- static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
- 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0};
- for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
- i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
- raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
- if (!raw_packet)
- return -ENOMEM;
- memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
- ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
- ip->saddr = fd_data->src_ip;
- ip->daddr = fd_data->dst_ip;
- ip->protocol = 0;
- if (fd_data->flex_filter) {
- u8 *payload = raw_packet + I40E_IP_DUMMY_PACKET_LEN;
- __be16 pattern = fd_data->flex_word;
- u16 off = fd_data->flex_offset;
- *((__force __be16 *)(payload + off)) = pattern;
- }
- fd_data->pctype = i;
- ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "PCTYPE:%d, Filter command send failed for fd_id:%d (ret = %d)\n",
- fd_data->pctype, fd_data->fd_id, ret);
- /* The packet buffer wasn't added to the ring so we
- * need to free it now.
- */
- kfree(raw_packet);
- return -EOPNOTSUPP;
- } else if (I40E_DEBUG_FD & pf->hw.debug_mask) {
- if (add)
- dev_info(&pf->pdev->dev,
- "Filter OK for PCTYPE %d loc = %d\n",
- fd_data->pctype, fd_data->fd_id);
- else
- dev_info(&pf->pdev->dev,
- "Filter deleted for PCTYPE %d loc = %d\n",
- fd_data->pctype, fd_data->fd_id);
- }
- }
- if (add)
- pf->fd_ip4_filter_cnt++;
- else
- pf->fd_ip4_filter_cnt--;
- return 0;
- }
- /**
- * i40e_add_del_fdir - Build raw packets to add/del fdir filter
- * @vsi: pointer to the targeted VSI
- * @input: filter to add or delete
- * @add: true adds a filter, false removes it
- *
- **/
- int i40e_add_del_fdir(struct i40e_vsi *vsi,
- struct i40e_fdir_filter *input, bool add)
- {
- struct i40e_pf *pf = vsi->back;
- int ret;
- switch (input->flow_type & ~FLOW_EXT) {
- case TCP_V4_FLOW:
- ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
- break;
- case UDP_V4_FLOW:
- ret = i40e_add_del_fdir_udpv4(vsi, input, add);
- break;
- case SCTP_V4_FLOW:
- ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
- break;
- case IP_USER_FLOW:
- switch (input->ip4_proto) {
- case IPPROTO_TCP:
- ret = i40e_add_del_fdir_tcpv4(vsi, input, add);
- break;
- case IPPROTO_UDP:
- ret = i40e_add_del_fdir_udpv4(vsi, input, add);
- break;
- case IPPROTO_SCTP:
- ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
- break;
- case IPPROTO_IP:
- ret = i40e_add_del_fdir_ipv4(vsi, input, add);
- break;
- default:
- /* We cannot support masking based on protocol */
- dev_info(&pf->pdev->dev, "Unsupported IPv4 protocol 0x%02x\n",
- input->ip4_proto);
- return -EINVAL;
- }
- break;
- default:
- dev_info(&pf->pdev->dev, "Unsupported flow type 0x%02x\n",
- input->flow_type);
- return -EINVAL;
- }
- /* The buffer allocated here will be normally be freed by
- * i40e_clean_fdir_tx_irq() as it reclaims resources after transmit
- * completion. In the event of an error adding the buffer to the FDIR
- * ring, it will immediately be freed. It may also be freed by
- * i40e_clean_tx_ring() when closing the VSI.
- */
- return ret;
- }
- /**
- * i40e_fd_handle_status - check the Programming Status for FD
- * @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
- * @prog_id: the id originally used for programming
- *
- * This is used to verify if the FD programming or invalidation
- * requested by SW to the HW is successful or not and take actions accordingly.
- **/
- void i40e_fd_handle_status(struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc, u8 prog_id)
- {
- struct i40e_pf *pf = rx_ring->vsi->back;
- struct pci_dev *pdev = pf->pdev;
- u32 fcnt_prog, fcnt_avail;
- u32 error;
- u64 qw;
- qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
- I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
- if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
- pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
- if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
- (I40E_DEBUG_FD & pf->hw.debug_mask))
- dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
- pf->fd_inv);
- /* Check if the programming error is for ATR.
- * If so, auto disable ATR and set a state for
- * flush in progress. Next time we come here if flush is in
- * progress do nothing, once flush is complete the state will
- * be cleared.
- */
- if (test_bit(__I40E_FD_FLUSH_REQUESTED, pf->state))
- return;
- pf->fd_add_err++;
- /* store the current atr filter count */
- pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
- if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
- test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
- /* These set_bit() calls aren't atomic with the
- * test_bit() here, but worse case we potentially
- * disable ATR and queue a flush right after SB
- * support is re-enabled. That shouldn't cause an
- * issue in practice
- */
- set_bit(__I40E_FD_ATR_AUTO_DISABLED, pf->state);
- set_bit(__I40E_FD_FLUSH_REQUESTED, pf->state);
- }
- /* filter programming failed most likely due to table full */
- fcnt_prog = i40e_get_global_fd_count(pf);
- fcnt_avail = pf->fdir_pf_filter_count;
- /* If ATR is running fcnt_prog can quickly change,
- * if we are very close to full, it makes sense to disable
- * FD ATR/SB and then re-enable it when there is room.
- */
- if (fcnt_prog >= (fcnt_avail - I40E_FDIR_BUFFER_FULL_MARGIN)) {
- if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) &&
- !test_and_set_bit(__I40E_FD_SB_AUTO_DISABLED,
- pf->state))
- if (I40E_DEBUG_FD & pf->hw.debug_mask)
- dev_warn(&pdev->dev, "FD filter space full, new ntuple rules will not be added\n");
- }
- } else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
- if (I40E_DEBUG_FD & pf->hw.debug_mask)
- dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
- rx_desc->wb.qword0.hi_dword.fd_id);
- }
- }
- /**
- * i40e_unmap_and_free_tx_resource - Release a Tx buffer
- * @ring: the ring that owns the buffer
- * @tx_buffer: the buffer to free
- **/
- static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring,
- struct i40e_tx_buffer *tx_buffer)
- {
- if (tx_buffer->skb) {
- if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB)
- kfree(tx_buffer->raw_buf);
- else if (ring_is_xdp(ring))
- xdp_return_frame(tx_buffer->xdpf);
- else
- dev_kfree_skb_any(tx_buffer->skb);
- if (dma_unmap_len(tx_buffer, len))
- dma_unmap_single(ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- } else if (dma_unmap_len(tx_buffer, len)) {
- dma_unmap_page(ring->dev,
- dma_unmap_addr(tx_buffer, dma),
- dma_unmap_len(tx_buffer, len),
- DMA_TO_DEVICE);
- }
- tx_buffer->next_to_watch = NULL;
- tx_buffer->skb = NULL;
- dma_unmap_len_set(tx_buffer, len, 0);
- /* tx_buffer must be completely set up in the transmit path */
- }
- /**
- * i40e_clean_tx_ring - Free any empty Tx buffers
- * @tx_ring: ring to be cleaned
- **/
- void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
- {
- unsigned long bi_size;
- u16 i;
- if (ring_is_xdp(tx_ring) && tx_ring->xsk_umem) {
- i40e_xsk_clean_tx_ring(tx_ring);
- } else {
- /* ring already cleared, nothing to do */
- if (!tx_ring->tx_bi)
- return;
- /* Free all the Tx ring sk_buffs */
- for (i = 0; i < tx_ring->count; i++)
- i40e_unmap_and_free_tx_resource(tx_ring,
- &tx_ring->tx_bi[i]);
- }
- bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
- memset(tx_ring->tx_bi, 0, bi_size);
- /* Zero out the descriptor ring */
- memset(tx_ring->desc, 0, tx_ring->size);
- tx_ring->next_to_use = 0;
- tx_ring->next_to_clean = 0;
- if (!tx_ring->netdev)
- return;
- /* cleanup Tx queue statistics */
- netdev_tx_reset_queue(txring_txq(tx_ring));
- }
- /**
- * i40e_free_tx_resources - Free Tx resources per queue
- * @tx_ring: Tx descriptor ring for a specific queue
- *
- * Free all transmit software resources
- **/
- void i40e_free_tx_resources(struct i40e_ring *tx_ring)
- {
- i40e_clean_tx_ring(tx_ring);
- kfree(tx_ring->tx_bi);
- tx_ring->tx_bi = NULL;
- if (tx_ring->desc) {
- dma_free_coherent(tx_ring->dev, tx_ring->size,
- tx_ring->desc, tx_ring->dma);
- tx_ring->desc = NULL;
- }
- }
- /**
- * i40e_get_tx_pending - how many tx descriptors not processed
- * @ring: the ring of descriptors
- * @in_sw: use SW variables
- *
- * Since there is no access to the ring head register
- * in XL710, we need to use our local copies
- **/
- u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
- {
- u32 head, tail;
- if (!in_sw) {
- head = i40e_get_head(ring);
- tail = readl(ring->tail);
- } else {
- head = ring->next_to_clean;
- tail = ring->next_to_use;
- }
- if (head != tail)
- return (head < tail) ?
- tail - head : (tail + ring->count - head);
- return 0;
- }
- /**
- * i40e_detect_recover_hung - Function to detect and recover hung_queues
- * @vsi: pointer to vsi struct with tx queues
- *
- * VSI has netdev and netdev has TX queues. This function is to check each of
- * those TX queues if they are hung, trigger recovery by issuing SW interrupt.
- **/
- void i40e_detect_recover_hung(struct i40e_vsi *vsi)
- {
- struct i40e_ring *tx_ring = NULL;
- struct net_device *netdev;
- unsigned int i;
- int packets;
- if (!vsi)
- return;
- if (test_bit(__I40E_VSI_DOWN, vsi->state))
- return;
- netdev = vsi->netdev;
- if (!netdev)
- return;
- if (!netif_carrier_ok(netdev))
- return;
- for (i = 0; i < vsi->num_queue_pairs; i++) {
- tx_ring = vsi->tx_rings[i];
- if (tx_ring && tx_ring->desc) {
- /* If packet counter has not changed the queue is
- * likely stalled, so force an interrupt for this
- * queue.
- *
- * prev_pkt_ctr would be negative if there was no
- * pending work.
- */
- packets = tx_ring->stats.packets & INT_MAX;
- if (tx_ring->tx_stats.prev_pkt_ctr == packets) {
- i40e_force_wb(vsi, tx_ring->q_vector);
- continue;
- }
- /* Memory barrier between read of packet count and call
- * to i40e_get_tx_pending()
- */
- smp_rmb();
- tx_ring->tx_stats.prev_pkt_ctr =
- i40e_get_tx_pending(tx_ring, true) ? packets : -1;
- }
- }
- }
- /**
- * i40e_clean_tx_irq - Reclaim resources after transmit completes
- * @vsi: the VSI we care about
- * @tx_ring: Tx ring to clean
- * @napi_budget: Used to determine if we are in netpoll
- *
- * Returns true if there's any budget left (e.g. the clean is finished)
- **/
- static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
- struct i40e_ring *tx_ring, int napi_budget)
- {
- int i = tx_ring->next_to_clean;
- struct i40e_tx_buffer *tx_buf;
- struct i40e_tx_desc *tx_head;
- struct i40e_tx_desc *tx_desc;
- unsigned int total_bytes = 0, total_packets = 0;
- unsigned int budget = vsi->work_limit;
- tx_buf = &tx_ring->tx_bi[i];
- tx_desc = I40E_TX_DESC(tx_ring, i);
- i -= tx_ring->count;
- tx_head = I40E_TX_DESC(tx_ring, i40e_get_head(tx_ring));
- do {
- struct i40e_tx_desc *eop_desc = tx_buf->next_to_watch;
- /* if next_to_watch is not set then there is no work pending */
- if (!eop_desc)
- break;
- /* prevent any other reads prior to eop_desc */
- smp_rmb();
- i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf);
- /* we have caught up to head, no work left to do */
- if (tx_head == tx_desc)
- break;
- /* clear next_to_watch to prevent false hangs */
- tx_buf->next_to_watch = NULL;
- /* update the statistics for this packet */
- total_bytes += tx_buf->bytecount;
- total_packets += tx_buf->gso_segs;
- /* free the skb/XDP data */
- if (ring_is_xdp(tx_ring))
- xdp_return_frame(tx_buf->xdpf);
- else
- napi_consume_skb(tx_buf->skb, napi_budget);
- /* unmap skb header data */
- dma_unmap_single(tx_ring->dev,
- dma_unmap_addr(tx_buf, dma),
- dma_unmap_len(tx_buf, len),
- DMA_TO_DEVICE);
- /* clear tx_buffer data */
- tx_buf->skb = NULL;
- dma_unmap_len_set(tx_buf, len, 0);
- /* unmap remaining buffers */
- while (tx_desc != eop_desc) {
- i40e_trace(clean_tx_irq_unmap,
- tx_ring, tx_desc, tx_buf);
- tx_buf++;
- tx_desc++;
- i++;
- if (unlikely(!i)) {
- i -= tx_ring->count;
- tx_buf = tx_ring->tx_bi;
- tx_desc = I40E_TX_DESC(tx_ring, 0);
- }
- /* unmap any remaining paged data */
- if (dma_unmap_len(tx_buf, len)) {
- dma_unmap_page(tx_ring->dev,
- dma_unmap_addr(tx_buf, dma),
- dma_unmap_len(tx_buf, len),
- DMA_TO_DEVICE);
- dma_unmap_len_set(tx_buf, len, 0);
- }
- }
- /* move us one more past the eop_desc for start of next pkt */
- tx_buf++;
- tx_desc++;
- i++;
- if (unlikely(!i)) {
- i -= tx_ring->count;
- tx_buf = tx_ring->tx_bi;
- tx_desc = I40E_TX_DESC(tx_ring, 0);
- }
- prefetch(tx_desc);
- /* update budget accounting */
- budget--;
- } while (likely(budget));
- i += tx_ring->count;
- tx_ring->next_to_clean = i;
- i40e_update_tx_stats(tx_ring, total_packets, total_bytes);
- i40e_arm_wb(tx_ring, vsi, budget);
- if (ring_is_xdp(tx_ring))
- return !!budget;
- /* notify netdev of completed buffers */
- netdev_tx_completed_queue(txring_txq(tx_ring),
- total_packets, total_bytes);
- #define TX_WAKE_THRESHOLD ((s16)(DESC_NEEDED * 2))
- if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
- (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
- /* Make sure that anybody stopping the queue after this
- * sees the new next_to_clean.
- */
- smp_mb();
- if (__netif_subqueue_stopped(tx_ring->netdev,
- tx_ring->queue_index) &&
- !test_bit(__I40E_VSI_DOWN, vsi->state)) {
- netif_wake_subqueue(tx_ring->netdev,
- tx_ring->queue_index);
- ++tx_ring->tx_stats.restart_queue;
- }
- }
- return !!budget;
- }
- /**
- * i40e_enable_wb_on_itr - Arm hardware to do a wb, interrupts are not enabled
- * @vsi: the VSI we care about
- * @q_vector: the vector on which to enable writeback
- *
- **/
- static void i40e_enable_wb_on_itr(struct i40e_vsi *vsi,
- struct i40e_q_vector *q_vector)
- {
- u16 flags = q_vector->tx.ring[0].flags;
- u32 val;
- if (!(flags & I40E_TXR_FLAGS_WB_ON_ITR))
- return;
- if (q_vector->arm_wb_state)
- return;
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
- val = I40E_PFINT_DYN_CTLN_WB_ON_ITR_MASK |
- I40E_PFINT_DYN_CTLN_ITR_INDX_MASK; /* set noitr */
- wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(q_vector->reg_idx),
- val);
- } else {
- val = I40E_PFINT_DYN_CTL0_WB_ON_ITR_MASK |
- I40E_PFINT_DYN_CTL0_ITR_INDX_MASK; /* set noitr */
- wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
- }
- q_vector->arm_wb_state = true;
- }
- /**
- * i40e_force_wb - Issue SW Interrupt so HW does a wb
- * @vsi: the VSI we care about
- * @q_vector: the vector on which to force writeback
- *
- **/
- void i40e_force_wb(struct i40e_vsi *vsi, struct i40e_q_vector *q_vector)
- {
- if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
- u32 val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
- I40E_PFINT_DYN_CTLN_ITR_INDX_MASK | /* set noitr */
- I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
- I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK;
- /* allow 00 to be written to the index */
- wr32(&vsi->back->hw,
- I40E_PFINT_DYN_CTLN(q_vector->reg_idx), val);
- } else {
- u32 val = I40E_PFINT_DYN_CTL0_INTENA_MASK |
- I40E_PFINT_DYN_CTL0_ITR_INDX_MASK | /* set noitr */
- I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK |
- I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK;
- /* allow 00 to be written to the index */
- wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0, val);
- }
- }
- static inline bool i40e_container_is_rx(struct i40e_q_vector *q_vector,
- struct i40e_ring_container *rc)
- {
- return &q_vector->rx == rc;
- }
- static inline unsigned int i40e_itr_divisor(struct i40e_q_vector *q_vector)
- {
- unsigned int divisor;
- switch (q_vector->vsi->back->hw.phy.link_info.link_speed) {
- case I40E_LINK_SPEED_40GB:
- divisor = I40E_ITR_ADAPTIVE_MIN_INC * 1024;
- break;
- case I40E_LINK_SPEED_25GB:
- case I40E_LINK_SPEED_20GB:
- divisor = I40E_ITR_ADAPTIVE_MIN_INC * 512;
- break;
- default:
- case I40E_LINK_SPEED_10GB:
- divisor = I40E_ITR_ADAPTIVE_MIN_INC * 256;
- break;
- case I40E_LINK_SPEED_1GB:
- case I40E_LINK_SPEED_100MB:
- divisor = I40E_ITR_ADAPTIVE_MIN_INC * 32;
- break;
- }
- return divisor;
- }
- /**
- * i40e_update_itr - update the dynamic ITR value based on statistics
- * @q_vector: structure containing interrupt and ring information
- * @rc: structure containing ring performance data
- *
- * Stores a new ITR value based on packets and byte
- * counts during the last interrupt. The advantage of per interrupt
- * computation is faster updates and more accurate ITR for the current
- * traffic pattern. Constants in this function were computed
- * based on theoretical maximum wire speed and thresholds were set based
- * on testing data as well as attempting to minimize response time
- * while increasing bulk throughput.
- **/
- static void i40e_update_itr(struct i40e_q_vector *q_vector,
- struct i40e_ring_container *rc)
- {
- unsigned int avg_wire_size, packets, bytes, itr;
- unsigned long next_update = jiffies;
- /* If we don't have any rings just leave ourselves set for maximum
- * possible latency so we take ourselves out of the equation.
- */
- if (!rc->ring || !ITR_IS_DYNAMIC(rc->ring->itr_setting))
- return;
- /* For Rx we want to push the delay up and default to low latency.
- * for Tx we want to pull the delay down and default to high latency.
- */
- itr = i40e_container_is_rx(q_vector, rc) ?
- I40E_ITR_ADAPTIVE_MIN_USECS | I40E_ITR_ADAPTIVE_LATENCY :
- I40E_ITR_ADAPTIVE_MAX_USECS | I40E_ITR_ADAPTIVE_LATENCY;
- /* If we didn't update within up to 1 - 2 jiffies we can assume
- * that either packets are coming in so slow there hasn't been
- * any work, or that there is so much work that NAPI is dealing
- * with interrupt moderation and we don't need to do anything.
- */
- if (time_after(next_update, rc->next_update))
- goto clear_counts;
- /* If itr_countdown is set it means we programmed an ITR within
- * the last 4 interrupt cycles. This has a side effect of us
- * potentially firing an early interrupt. In order to work around
- * this we need to throw out any data received for a few
- * interrupts following the update.
- */
- if (q_vector->itr_countdown) {
- itr = rc->target_itr;
- goto clear_counts;
- }
- packets = rc->total_packets;
- bytes = rc->total_bytes;
- if (i40e_container_is_rx(q_vector, rc)) {
- /* If Rx there are 1 to 4 packets and bytes are less than
- * 9000 assume insufficient data to use bulk rate limiting
- * approach unless Tx is already in bulk rate limiting. We
- * are likely latency driven.
- */
- if (packets && packets < 4 && bytes < 9000 &&
- (q_vector->tx.target_itr & I40E_ITR_ADAPTIVE_LATENCY)) {
- itr = I40E_ITR_ADAPTIVE_LATENCY;
- goto adjust_by_size;
- }
- } else if (packets < 4) {
- /* If we have Tx and Rx ITR maxed and Tx ITR is running in
- * bulk mode and we are receiving 4 or fewer packets just
- * reset the ITR_ADAPTIVE_LATENCY bit for latency mode so
- * that the Rx can relax.
- */
- if (rc->target_itr == I40E_ITR_ADAPTIVE_MAX_USECS &&
- (q_vector->rx.target_itr & I40E_ITR_MASK) ==
- I40E_ITR_ADAPTIVE_MAX_USECS)
- goto clear_counts;
- } else if (packets > 32) {
- /* If we have processed over 32 packets in a single interrupt
- * for Tx assume we need to switch over to "bulk" mode.
- */
- rc->target_itr &= ~I40E_ITR_ADAPTIVE_LATENCY;
- }
- /* We have no packets to actually measure against. This means
- * either one of the other queues on this vector is active or
- * we are a Tx queue doing TSO with too high of an interrupt rate.
- *
- * Between 4 and 56 we can assume that our current interrupt delay
- * is only slightly too low. As such we should increase it by a small
- * fixed amount.
- */
- if (packets < 56) {
- itr = rc->target_itr + I40E_ITR_ADAPTIVE_MIN_INC;
- if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
- itr &= I40E_ITR_ADAPTIVE_LATENCY;
- itr += I40E_ITR_ADAPTIVE_MAX_USECS;
- }
- goto clear_counts;
- }
- if (packets <= 256) {
- itr = min(q_vector->tx.current_itr, q_vector->rx.current_itr);
- itr &= I40E_ITR_MASK;
- /* Between 56 and 112 is our "goldilocks" zone where we are
- * working out "just right". Just report that our current
- * ITR is good for us.
- */
- if (packets <= 112)
- goto clear_counts;
- /* If packet count is 128 or greater we are likely looking
- * at a slight overrun of the delay we want. Try halving
- * our delay to see if that will cut the number of packets
- * in half per interrupt.
- */
- itr /= 2;
- itr &= I40E_ITR_MASK;
- if (itr < I40E_ITR_ADAPTIVE_MIN_USECS)
- itr = I40E_ITR_ADAPTIVE_MIN_USECS;
- goto clear_counts;
- }
- /* The paths below assume we are dealing with a bulk ITR since
- * number of packets is greater than 256. We are just going to have
- * to compute a value and try to bring the count under control,
- * though for smaller packet sizes there isn't much we can do as
- * NAPI polling will likely be kicking in sooner rather than later.
- */
- itr = I40E_ITR_ADAPTIVE_BULK;
- adjust_by_size:
- /* If packet counts are 256 or greater we can assume we have a gross
- * overestimation of what the rate should be. Instead of trying to fine
- * tune it just use the formula below to try and dial in an exact value
- * give the current packet size of the frame.
- */
- avg_wire_size = bytes / packets;
- /* The following is a crude approximation of:
- * wmem_default / (size + overhead) = desired_pkts_per_int
- * rate / bits_per_byte / (size + ethernet overhead) = pkt_rate
- * (desired_pkt_rate / pkt_rate) * usecs_per_sec = ITR value
- *
- * Assuming wmem_default is 212992 and overhead is 640 bytes per
- * packet, (256 skb, 64 headroom, 320 shared info), we can reduce the
- * formula down to
- *
- * (170 * (size + 24)) / (size + 640) = ITR
- *
- * We first do some math on the packet size and then finally bitshift
- * by 8 after rounding up. We also have to account for PCIe link speed
- * difference as ITR scales based on this.
- */
- if (avg_wire_size <= 60) {
- /* Start at 250k ints/sec */
- avg_wire_size = 4096;
- } else if (avg_wire_size <= 380) {
- /* 250K ints/sec to 60K ints/sec */
- avg_wire_size *= 40;
- avg_wire_size += 1696;
- } else if (avg_wire_size <= 1084) {
- /* 60K ints/sec to 36K ints/sec */
- avg_wire_size *= 15;
- avg_wire_size += 11452;
- } else if (avg_wire_size <= 1980) {
- /* 36K ints/sec to 30K ints/sec */
- avg_wire_size *= 5;
- avg_wire_size += 22420;
- } else {
- /* plateau at a limit of 30K ints/sec */
- avg_wire_size = 32256;
- }
- /* If we are in low latency mode halve our delay which doubles the
- * rate to somewhere between 100K to 16K ints/sec
- */
- if (itr & I40E_ITR_ADAPTIVE_LATENCY)
- avg_wire_size /= 2;
- /* Resultant value is 256 times larger than it needs to be. This
- * gives us room to adjust the value as needed to either increase
- * or decrease the value based on link speeds of 10G, 2.5G, 1G, etc.
- *
- * Use addition as we have already recorded the new latency flag
- * for the ITR value.
- */
- itr += DIV_ROUND_UP(avg_wire_size, i40e_itr_divisor(q_vector)) *
- I40E_ITR_ADAPTIVE_MIN_INC;
- if ((itr & I40E_ITR_MASK) > I40E_ITR_ADAPTIVE_MAX_USECS) {
- itr &= I40E_ITR_ADAPTIVE_LATENCY;
- itr += I40E_ITR_ADAPTIVE_MAX_USECS;
- }
- clear_counts:
- /* write back value */
- rc->target_itr = itr;
- /* next update should occur within next jiffy */
- rc->next_update = next_update + 1;
- rc->total_bytes = 0;
- rc->total_packets = 0;
- }
- /**
- * i40e_reuse_rx_page - page flip buffer and store it back on the ring
- * @rx_ring: rx descriptor ring to store buffers on
- * @old_buff: donor buffer to have page reused
- *
- * Synchronizes page for reuse by the adapter
- **/
- static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *old_buff)
- {
- struct i40e_rx_buffer *new_buff;
- u16 nta = rx_ring->next_to_alloc;
- new_buff = &rx_ring->rx_bi[nta];
- /* update, and store next to alloc */
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
- /* transfer page from old buffer to new buffer */
- new_buff->dma = old_buff->dma;
- new_buff->page = old_buff->page;
- new_buff->page_offset = old_buff->page_offset;
- new_buff->pagecnt_bias = old_buff->pagecnt_bias;
- rx_ring->rx_stats.page_reuse_count++;
- /* clear contents of buffer_info */
- old_buff->page = NULL;
- }
- /**
- * i40e_rx_is_programming_status - check for programming status descriptor
- * @qw: qword representing status_error_len in CPU ordering
- *
- * The value of in the descriptor length field indicate if this
- * is a programming status descriptor for flow director or FCoE
- * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
- * it is a packet descriptor.
- **/
- static inline bool i40e_rx_is_programming_status(u64 qw)
- {
- /* The Rx filter programming status and SPH bit occupy the same
- * spot in the descriptor. Since we don't support packet split we
- * can just reuse the bit as an indication that this is a
- * programming status descriptor.
- */
- return qw & I40E_RXD_QW1_LENGTH_SPH_MASK;
- }
- /**
- * i40e_clean_programming_status - try clean the programming status descriptor
- * @rx_ring: the rx ring that has this descriptor
- * @rx_desc: the rx descriptor written back by HW
- * @qw: qword representing status_error_len in CPU ordering
- *
- * Flow director should handle FD_FILTER_STATUS to check its filter programming
- * status being successful or not and take actions accordingly. FCoE should
- * handle its context/filter programming/invalidation status and take actions.
- *
- * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
- **/
- struct i40e_rx_buffer *i40e_clean_programming_status(
- struct i40e_ring *rx_ring,
- union i40e_rx_desc *rx_desc,
- u64 qw)
- {
- struct i40e_rx_buffer *rx_buffer;
- u32 ntc;
- u8 id;
- if (!i40e_rx_is_programming_status(qw))
- return NULL;
- ntc = rx_ring->next_to_clean;
- /* fetch, update, and store next to clean */
- rx_buffer = &rx_ring->rx_bi[ntc++];
- ntc = (ntc < rx_ring->count) ? ntc : 0;
- rx_ring->next_to_clean = ntc;
- prefetch(I40E_RX_DESC(rx_ring, ntc));
- id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
- I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
- if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
- i40e_fd_handle_status(rx_ring, rx_desc, id);
- return rx_buffer;
- }
- /**
- * i40e_setup_tx_descriptors - Allocate the Tx descriptors
- * @tx_ring: the tx ring to set up
- *
- * Return 0 on success, negative on error
- **/
- int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
- {
- struct device *dev = tx_ring->dev;
- int bi_size;
- if (!dev)
- return -ENOMEM;
- /* warn if we are about to overwrite the pointer */
- WARN_ON(tx_ring->tx_bi);
- bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
- tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
- if (!tx_ring->tx_bi)
- goto err;
- u64_stats_init(&tx_ring->syncp);
- /* round up to nearest 4K */
- tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
- /* add u32 for head writeback, align after this takes care of
- * guaranteeing this is at least one cache line in size
- */
- tx_ring->size += sizeof(u32);
- tx_ring->size = ALIGN(tx_ring->size, 4096);
- tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
- &tx_ring->dma, GFP_KERNEL);
- if (!tx_ring->desc) {
- dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
- tx_ring->size);
- goto err;
- }
- tx_ring->next_to_use = 0;
- tx_ring->next_to_clean = 0;
- tx_ring->tx_stats.prev_pkt_ctr = -1;
- return 0;
- err:
- kfree(tx_ring->tx_bi);
- tx_ring->tx_bi = NULL;
- return -ENOMEM;
- }
- /**
- * i40e_clean_rx_ring - Free Rx buffers
- * @rx_ring: ring to be cleaned
- **/
- void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
- {
- unsigned long bi_size;
- u16 i;
- /* ring already cleared, nothing to do */
- if (!rx_ring->rx_bi)
- return;
- if (rx_ring->skb) {
- dev_kfree_skb(rx_ring->skb);
- rx_ring->skb = NULL;
- }
- if (rx_ring->xsk_umem) {
- i40e_xsk_clean_rx_ring(rx_ring);
- goto skip_free;
- }
- /* Free all the Rx ring sk_buffs */
- for (i = 0; i < rx_ring->count; i++) {
- struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
- if (!rx_bi->page)
- continue;
- /* Invalidate cache lines that may have been written to by
- * device so that we avoid corrupting memory.
- */
- dma_sync_single_range_for_cpu(rx_ring->dev,
- rx_bi->dma,
- rx_bi->page_offset,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- /* free resources associated with mapping */
- dma_unmap_page_attrs(rx_ring->dev, rx_bi->dma,
- i40e_rx_pg_size(rx_ring),
- DMA_FROM_DEVICE,
- I40E_RX_DMA_ATTR);
- __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
- rx_bi->page = NULL;
- rx_bi->page_offset = 0;
- }
- skip_free:
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
- memset(rx_ring->rx_bi, 0, bi_size);
- /* Zero out the descriptor ring */
- memset(rx_ring->desc, 0, rx_ring->size);
- rx_ring->next_to_alloc = 0;
- rx_ring->next_to_clean = 0;
- rx_ring->next_to_use = 0;
- }
- /**
- * i40e_free_rx_resources - Free Rx resources
- * @rx_ring: ring to clean the resources from
- *
- * Free all receive software resources
- **/
- void i40e_free_rx_resources(struct i40e_ring *rx_ring)
- {
- i40e_clean_rx_ring(rx_ring);
- if (rx_ring->vsi->type == I40E_VSI_MAIN)
- xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
- rx_ring->xdp_prog = NULL;
- kfree(rx_ring->rx_bi);
- rx_ring->rx_bi = NULL;
- if (rx_ring->desc) {
- dma_free_coherent(rx_ring->dev, rx_ring->size,
- rx_ring->desc, rx_ring->dma);
- rx_ring->desc = NULL;
- }
- }
- /**
- * i40e_setup_rx_descriptors - Allocate Rx descriptors
- * @rx_ring: Rx descriptor ring (for a specific queue) to setup
- *
- * Returns 0 on success, negative on failure
- **/
- int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
- {
- struct device *dev = rx_ring->dev;
- int err = -ENOMEM;
- int bi_size;
- /* warn if we are about to overwrite the pointer */
- WARN_ON(rx_ring->rx_bi);
- bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
- rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
- if (!rx_ring->rx_bi)
- goto err;
- u64_stats_init(&rx_ring->syncp);
- /* Round up to nearest 4K */
- rx_ring->size = rx_ring->count * sizeof(union i40e_32byte_rx_desc);
- rx_ring->size = ALIGN(rx_ring->size, 4096);
- rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
- &rx_ring->dma, GFP_KERNEL);
- if (!rx_ring->desc) {
- dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
- rx_ring->size);
- goto err;
- }
- rx_ring->next_to_alloc = 0;
- rx_ring->next_to_clean = 0;
- rx_ring->next_to_use = 0;
- /* XDP RX-queue info only needed for RX rings exposed to XDP */
- if (rx_ring->vsi->type == I40E_VSI_MAIN) {
- err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
- rx_ring->queue_index);
- if (err < 0)
- goto err;
- }
- rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
- return 0;
- err:
- kfree(rx_ring->rx_bi);
- rx_ring->rx_bi = NULL;
- return err;
- }
- /**
- * i40e_release_rx_desc - Store the new tail and head values
- * @rx_ring: ring to bump
- * @val: new head index
- **/
- void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
- {
- rx_ring->next_to_use = val;
- /* update next to alloc since we have filled the ring */
- rx_ring->next_to_alloc = val;
- /* Force memory writes to complete before letting h/w
- * know there are new descriptors to fetch. (Only
- * applicable for weak-ordered memory model archs,
- * such as IA-64).
- */
- wmb();
- writel(val, rx_ring->tail);
- }
- /**
- * i40e_rx_offset - Return expected offset into page to access data
- * @rx_ring: Ring we are requesting offset of
- *
- * Returns the offset value for ring into the data buffer.
- */
- static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
- {
- return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
- }
- /**
- * i40e_alloc_mapped_page - recycle or make a new page
- * @rx_ring: ring to use
- * @bi: rx_buffer struct to modify
- *
- * Returns true if the page was successfully allocated or
- * reused.
- **/
- static bool i40e_alloc_mapped_page(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *bi)
- {
- struct page *page = bi->page;
- dma_addr_t dma;
- /* since we are recycling buffers we should seldom need to alloc */
- if (likely(page)) {
- rx_ring->rx_stats.page_reuse_count++;
- return true;
- }
- /* alloc new page for storage */
- page = dev_alloc_pages(i40e_rx_pg_order(rx_ring));
- if (unlikely(!page)) {
- rx_ring->rx_stats.alloc_page_failed++;
- return false;
- }
- /* map page for use */
- dma = dma_map_page_attrs(rx_ring->dev, page, 0,
- i40e_rx_pg_size(rx_ring),
- DMA_FROM_DEVICE,
- I40E_RX_DMA_ATTR);
- /* if mapping failed free memory back to system since
- * there isn't much point in holding memory we can't use
- */
- if (dma_mapping_error(rx_ring->dev, dma)) {
- __free_pages(page, i40e_rx_pg_order(rx_ring));
- rx_ring->rx_stats.alloc_page_failed++;
- return false;
- }
- bi->dma = dma;
- bi->page = page;
- bi->page_offset = i40e_rx_offset(rx_ring);
- page_ref_add(page, USHRT_MAX - 1);
- bi->pagecnt_bias = USHRT_MAX;
- return true;
- }
- /**
- * i40e_alloc_rx_buffers - Replace used receive buffers
- * @rx_ring: ring to place buffers on
- * @cleaned_count: number of buffers to replace
- *
- * Returns false if all allocations were successful, true if any fail
- **/
- bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
- {
- u16 ntu = rx_ring->next_to_use;
- union i40e_rx_desc *rx_desc;
- struct i40e_rx_buffer *bi;
- /* do nothing if no valid netdev defined */
- if (!rx_ring->netdev || !cleaned_count)
- return false;
- rx_desc = I40E_RX_DESC(rx_ring, ntu);
- bi = &rx_ring->rx_bi[ntu];
- do {
- if (!i40e_alloc_mapped_page(rx_ring, bi))
- goto no_buffers;
- /* sync the buffer for use by the device */
- dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
- bi->page_offset,
- rx_ring->rx_buf_len,
- DMA_FROM_DEVICE);
- /* Refresh the desc even if buffer_addrs didn't change
- * because each write-back erases this info.
- */
- rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
- rx_desc++;
- bi++;
- ntu++;
- if (unlikely(ntu == rx_ring->count)) {
- rx_desc = I40E_RX_DESC(rx_ring, 0);
- bi = rx_ring->rx_bi;
- ntu = 0;
- }
- /* clear the status bits for the next_to_use descriptor */
- rx_desc->wb.qword1.status_error_len = 0;
- cleaned_count--;
- } while (cleaned_count);
- if (rx_ring->next_to_use != ntu)
- i40e_release_rx_desc(rx_ring, ntu);
- return false;
- no_buffers:
- if (rx_ring->next_to_use != ntu)
- i40e_release_rx_desc(rx_ring, ntu);
- /* make sure to come back via polling to try again after
- * allocation failure
- */
- return true;
- }
- /**
- * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
- * @vsi: the VSI we care about
- * @skb: skb currently being received and modified
- * @rx_desc: the receive descriptor
- **/
- static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
- struct sk_buff *skb,
- union i40e_rx_desc *rx_desc)
- {
- struct i40e_rx_ptype_decoded decoded;
- u32 rx_error, rx_status;
- bool ipv4, ipv6;
- u8 ptype;
- u64 qword;
- qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
- ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >> I40E_RXD_QW1_PTYPE_SHIFT;
- rx_error = (qword & I40E_RXD_QW1_ERROR_MASK) >>
- I40E_RXD_QW1_ERROR_SHIFT;
- rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
- I40E_RXD_QW1_STATUS_SHIFT;
- decoded = decode_rx_desc_ptype(ptype);
- skb->ip_summed = CHECKSUM_NONE;
- skb_checksum_none_assert(skb);
- /* Rx csum enabled and ip headers found? */
- if (!(vsi->netdev->features & NETIF_F_RXCSUM))
- return;
- /* did the hardware decode the packet and checksum? */
- if (!(rx_status & BIT(I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
- return;
- /* both known and outer_ip must be set for the below code to work */
- if (!(decoded.known && decoded.outer_ip))
- return;
- ipv4 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4);
- ipv6 = (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP) &&
- (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6);
- if (ipv4 &&
- (rx_error & (BIT(I40E_RX_DESC_ERROR_IPE_SHIFT) |
- BIT(I40E_RX_DESC_ERROR_EIPE_SHIFT))))
- goto checksum_fail;
- /* likely incorrect csum if alternate IP extension headers found */
- if (ipv6 &&
- rx_status & BIT(I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT))
- /* don't increment checksum err here, non-fatal err */
- return;
- /* there was some L4 error, count error and punt packet to the stack */
- if (rx_error & BIT(I40E_RX_DESC_ERROR_L4E_SHIFT))
- goto checksum_fail;
- /* handle packets that were not able to be checksummed due
- * to arrival speed, in this case the stack can compute
- * the csum.
- */
- if (rx_error & BIT(I40E_RX_DESC_ERROR_PPRS_SHIFT))
- return;
- /* If there is an outer header present that might contain a checksum
- * we need to bump the checksum level by 1 to reflect the fact that
- * we are indicating we validated the inner checksum.
- */
- if (decoded.tunnel_type >= I40E_RX_PTYPE_TUNNEL_IP_GRENAT)
- skb->csum_level = 1;
- /* Only report checksum unnecessary for TCP, UDP, or SCTP */
- switch (decoded.inner_prot) {
- case I40E_RX_PTYPE_INNER_PROT_TCP:
- case I40E_RX_PTYPE_INNER_PROT_UDP:
- case I40E_RX_PTYPE_INNER_PROT_SCTP:
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- /* fall though */
- default:
- break;
- }
- return;
- checksum_fail:
- vsi->back->hw_csum_rx_error++;
- }
- /**
- * i40e_ptype_to_htype - get a hash type
- * @ptype: the ptype value from the descriptor
- *
- * Returns a hash type to be used by skb_set_hash
- **/
- static inline int i40e_ptype_to_htype(u8 ptype)
- {
- struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
- if (!decoded.known)
- return PKT_HASH_TYPE_NONE;
- if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
- decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
- return PKT_HASH_TYPE_L4;
- else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
- decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
- return PKT_HASH_TYPE_L3;
- else
- return PKT_HASH_TYPE_L2;
- }
- /**
- * i40e_rx_hash - set the hash value in the skb
- * @ring: descriptor ring
- * @rx_desc: specific descriptor
- * @skb: skb currently being received and modified
- * @rx_ptype: Rx packet type
- **/
- static inline void i40e_rx_hash(struct i40e_ring *ring,
- union i40e_rx_d…
Large files files are truncated, but you can click here to view the full file