diff options
author | Manish Chopra <manish.chopra@cavium.com> | 2018-05-17 22:05:00 +0300 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-05-18 00:06:53 +0300 |
commit | 8a8633978b842c88fbcfe00d4e5dde96048f630e (patch) | |
tree | e88013d9c363d612ce5f77d7ec5ada6115921003 /drivers/net/ethernet/qlogic/qede/qede_main.c | |
parent | 56a9a9e73783dcf48fa1bcbec56c643c36648eb6 (diff) | |
download | linux-8a8633978b842c88fbcfe00d4e5dde96048f630e.tar.xz |
qede: Add build_skb() support.
This patch makes use of build_skb() throughout in driver's receieve
data path [HW gro flow and non HW gro flow]. With this, driver can
build skb directly from the page segments which are already mapped
to the hardware instead of allocating new SKB via netdev_alloc_skb()
and memcpy the data which is quite costly.
This really improves performance (keeping same or slight gain in rx
throughput) in terms of CPU utilization which is significantly reduced
[almost half] in non HW gro flow where for every incoming MTU sized
packet driver had to allocate skb, memcpy headers etc. Additionally
in that flow, it also gets rid of bunch of additional overheads
[eth_get_headlen() etc.] to split headers and data in the skb.
Tested with:
system: 2 sockets, 4 cores per socket, hyperthreading, 2x4x2=16 cores
iperf [server]: iperf -s
iperf [client]: iperf -c <server_ip> -t 500 -i 10 -P 32
HW GRO off – w/o build_skb(), throughput: 36.8 Gbits/sec
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle
Average: all 0.59 0.00 32.93 0.00 0.00 43.07 0.00 0.00 23.42
HW GRO off - with build_skb(), throughput: 36.9 Gbits/sec
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle
Average: all 0.70 0.00 31.70 0.00 0.00 25.68 0.00 0.00 41.92
HW GRO on - w/o build_skb(), throughput: 36.9 Gbits/sec
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle
Average: all 0.86 0.00 24.14 0.00 0.00 6.59 0.00 0.00 68.41
HW GRO on - with build_skb(), throughput: 37.5 Gbits/sec
Average: CPU %usr %nice %sys %iowait %irq %soft %steal %guest %idle
Average: all 0.87 0.00 23.75 0.00 0.00 6.19 0.00 0.00 69.19
Signed-off-by: Ariel Elior <ariel.elior@cavium.com>
Signed-off-by: Manish Chopra <manish.chopra@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/qlogic/qede/qede_main.c')
-rw-r--r-- | drivers/net/ethernet/qlogic/qede/qede_main.c | 76 |
1 files changed, 15 insertions, 61 deletions
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 89c581c3c21a..40e2b923af39 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1197,30 +1197,8 @@ static void qede_free_rx_buffers(struct qede_dev *edev, } } -static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) -{ - int i; - - if (edev->gro_disable) - return; - - for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { - struct qede_agg_info *tpa_info = &rxq->tpa_info[i]; - struct sw_rx_data *replace_buf = &tpa_info->buffer; - - if (replace_buf->data) { - dma_unmap_page(&edev->pdev->dev, - replace_buf->mapping, - PAGE_SIZE, DMA_FROM_DEVICE); - __free_page(replace_buf->data); - } - } -} - static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) { - qede_free_sge_mem(edev, rxq); - /* Free rx buffers */ qede_free_rx_buffers(edev, rxq); @@ -1232,45 +1210,15 @@ static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) edev->ops->common->chain_free(edev->cdev, &rxq->rx_comp_ring); } -static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) +static void qede_set_tpa_param(struct qede_rx_queue *rxq) { - dma_addr_t mapping; int i; - if (edev->gro_disable) - return 0; - for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) { struct qede_agg_info *tpa_info = &rxq->tpa_info[i]; - struct sw_rx_data *replace_buf = &tpa_info->buffer; - - replace_buf->data = alloc_pages(GFP_ATOMIC, 0); - if (unlikely(!replace_buf->data)) { - DP_NOTICE(edev, - "Failed to allocate TPA skb pool [replacement buffer]\n"); - goto err; - } - - mapping = dma_map_page(&edev->pdev->dev, replace_buf->data, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { - DP_NOTICE(edev, - "Failed to map TPA replacement buffer\n"); - goto err; - } - replace_buf->mapping = mapping; - tpa_info->buffer.page_offset = 0; - tpa_info->buffer_mapping = mapping; tpa_info->state = QEDE_AGG_STATE_NONE; } - - return 0; -err: - qede_free_sge_mem(edev, rxq); - edev->gro_disable = 1; - edev->ndev->features &= ~NETIF_F_GRO_HW; - return -ENOMEM; } /* This function allocates all memory needed per Rx queue */ @@ -1281,19 +1229,24 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) rxq->num_rx_buffers = edev->q_num_rx_buffers; rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu; - rxq->rx_headroom = edev->xdp_prog ? XDP_PACKET_HEADROOM : 0; + + rxq->rx_headroom = edev->xdp_prog ? XDP_PACKET_HEADROOM : NET_SKB_PAD; + size = rxq->rx_headroom + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Make sure that the headroom and payload fit in a single page */ - if (rxq->rx_buf_size + rxq->rx_headroom > PAGE_SIZE) - rxq->rx_buf_size = PAGE_SIZE - rxq->rx_headroom; + if (rxq->rx_buf_size + size > PAGE_SIZE) + rxq->rx_buf_size = PAGE_SIZE - size; - /* Segment size to spilt a page in multiple equal parts, + /* Segment size to spilt a page in multiple equal parts , * unless XDP is used in which case we'd use the entire page. */ - if (!edev->xdp_prog) - rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size); - else + if (!edev->xdp_prog) { + size = size + rxq->rx_buf_size; + rxq->rx_buf_seg_size = roundup_pow_of_two(size); + } else { rxq->rx_buf_seg_size = PAGE_SIZE; + } /* Allocate the parallel driver ring for Rx buffers */ size = sizeof(*rxq->sw_rx_ring) * RX_RING_SIZE; @@ -1337,7 +1290,8 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) } } - rc = qede_alloc_sge_mem(edev, rxq); + if (!edev->gro_disable) + qede_set_tpa_param(rxq); err: return rc; } |