diff options
author | Shay Agroskin <shayag@mellanox.com> | 2019-03-14 15:54:07 +0300 |
---|---|---|
committer | Saeed Mahameed <saeedm@mellanox.com> | 2019-04-23 22:09:20 +0300 |
commit | c2273219baa5097a4d7c1c162b992623534f34c1 (patch) | |
tree | 921448bbf8fa8fb8b73662fbab4ed5f8d89d9ffe /drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | |
parent | 73cab880e7664702681e6d5115225e4fff1d6d98 (diff) | |
download | linux-c2273219baa5097a4d7c1c162b992623534f34c1.tar.xz |
net/mlx5e: XDP, Inline small packets into the TX MPWQE in XDP xmit flow
Upon high packet rate with multiple CPUs TX workloads, much of the HCA's
resources are spent on prefetching TX descriptors, thus affecting
transmission rates.
This patch comes to mitigate this problem by moving some workload to the
CPU and reducing the HW data prefetch overhead for small packets (<= 256B).
When forwarding packets with XDP, a packet that is smaller
than a certain size (set to ~256 bytes) would be sent inline within
its WQE TX descrptor (mem-copied), when the hardware tx queue is congested
beyond a pre-defined water-mark.
This is added to better utilize the HW resources (which now makes
one less packet data prefetch) and allow better scalability, on the
account of CPU usage (which now 'memcpy's the packet into the WQE).
To load balance between HW and CPU and get max packet rate, we use
watermarks to detect how much the HW is congested and move the work
loads back and forth between HW and CPU.
Performance:
Tested packet rate for UDP 64Byte multi-stream
over two dual port ConnectX-5 100Gbps NICs.
CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz
* Tested with hyper-threading disabled
XDP_TX:
| | before | after | |
| 24 rings | 51Mpps | 116Mpps | +126% |
| 1 ring | 12Mpps | 12Mpps | same |
XDP_REDIRECT:
** Below is the transmit rate, not the redirection rate
which might be larger, and is not affected by this patch.
| | before | after | |
| 32 rings | 64Mpps | 92Mpps | +43% |
| 1 ring | 6.4Mpps | 6.4Mpps | same |
As we can see, feature significantly improves scaling, without
hurting single ring performance.
Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h | 57 |
1 files changed, 54 insertions, 3 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h index ee27a7c8cd87..858e7a2a13ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h @@ -75,16 +75,68 @@ static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_xdpsq *sq) } } +/* Enable inline WQEs to shift some load from a congested HCA (HW) to + * a less congested cpu (SW). + */ +static inline void mlx5e_xdp_update_inline_state(struct mlx5e_xdpsq *sq) +{ + u16 outstanding = sq->xdpi_fifo_pc - sq->xdpi_fifo_cc; + struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + +#define MLX5E_XDP_INLINE_WATERMARK_LOW 10 +#define MLX5E_XDP_INLINE_WATERMARK_HIGH 128 + + if (session->inline_on) { + if (outstanding <= MLX5E_XDP_INLINE_WATERMARK_LOW) + session->inline_on = 0; + return; + } + + /* inline is false */ + if (outstanding >= MLX5E_XDP_INLINE_WATERMARK_HIGH) + session->inline_on = 1; +} + static inline void -mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, dma_addr_t dma_addr, u16 dma_len) +mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq, struct mlx5e_xdp_info *xdpi, + struct mlx5e_xdpsq_stats *stats) { struct mlx5e_xdp_mpwqe *session = &sq->mpwqe; + dma_addr_t dma_addr = xdpi->dma_addr; + struct xdp_frame *xdpf = xdpi->xdpf; struct mlx5_wqe_data_seg *dseg = - (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count++; + (struct mlx5_wqe_data_seg *)session->wqe + session->ds_count; + u16 dma_len = xdpf->len; + session->pkt_count++; + +#define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg)) + + if (session->inline_on && dma_len <= MLX5E_XDP_INLINE_WQE_SZ_THRSD) { + struct mlx5_wqe_inline_seg *inline_dseg = + (struct mlx5_wqe_inline_seg *)dseg; + u16 ds_len = sizeof(*inline_dseg) + dma_len; + u16 ds_cnt = DIV_ROUND_UP(ds_len, MLX5_SEND_WQE_DS); + + if (unlikely(session->ds_count + ds_cnt > session->max_ds_count)) { + /* Not enough space for inline wqe, send with memory pointer */ + session->complete = true; + goto no_inline; + } + + inline_dseg->byte_count = cpu_to_be32(dma_len | MLX5_INLINE_SEG); + memcpy(inline_dseg->data, xdpf->data, dma_len); + + session->ds_count += ds_cnt; + stats->inlnw++; + return; + } + +no_inline: dseg->addr = cpu_to_be64(dma_addr); dseg->byte_count = cpu_to_be32(dma_len); dseg->lkey = sq->mkey_be; + session->ds_count++; } static inline void mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, @@ -111,5 +163,4 @@ mlx5e_xdpi_fifo_pop(struct mlx5e_xdp_info_fifo *fifo) { return fifo->xi[(*fifo->cc)++ & fifo->mask]; } - #endif |