summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
diff options
context:
space:
mode:
authorDragos Tatulea <dtatulea@nvidia.com>2023-02-21 22:05:07 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2023-03-28 23:43:59 +0300
commitcd640b050368d5be6bccf1edb51b1e4c553555e6 (patch)
tree4911c91ea21916632daf68dd26846b8ed72771b3 /drivers/net/ethernet/mellanox/mlx5/core/en/params.c
parent4ba2b4988c98ce9b56b77a1610c3a7b70ee30b57 (diff)
downloadlinux-cd640b050368d5be6bccf1edb51b1e4c553555e6.tar.xz
net/mlx5e: RX, Break the wqe bulk refill in smaller chunks
To avoid overflowing the page pool's cache, don't release the whole bulk which is usually larger than the cache refill size. Group release+alloc instead into cache refill units that allow releasing to the cache and then allocating from the cache. A refill_unit variable is added as a iteration unit over the wqe_bulk when doing release+alloc. For a single ring, single core, default MTU (1500) TCP stream test the number of pages allocated from the cache directly (rx_pp_recycle_cached) increases from 0% to 52%: +---------------------------------------------+ | Page Pool stats (/sec) | Before | After | +-------------------------+---------+---------+ |rx_pp_alloc_fast | 2145422 | 2193802 | |rx_pp_alloc_slow | 2 | 0 | |rx_pp_alloc_empty | 2 | 0 | |rx_pp_alloc_refill | 34059 | 16634 | |rx_pp_alloc_waive | 0 | 0 | |rx_pp_recycle_cached | 0 | 1145818 | |rx_pp_recycle_cache_full | 0 | 0 | |rx_pp_recycle_ring | 2179361 | 1064616 | |rx_pp_recycle_ring_full | 121 | 0 | +---------------------------------------------+ With this patch, the performance for legacy rq for the above test is back to baseline. Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/params.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/params.c8
1 files changed, 7 insertions, 1 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index 40218d77ef34..31f3c6e51d9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -674,6 +674,7 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
u32 bulk_bound_rq_size_in_bytes;
u32 sum_frag_strides = 0;
u32 wqe_bulk_in_bytes;
+ u16 split_factor;
u32 wqe_bulk;
int i;
@@ -702,6 +703,10 @@ static void mlx5e_rx_compute_wqe_bulk_params(struct mlx5e_params *params,
* by older WQEs.
*/
info->wqe_bulk = max_t(u16, info->wqe_index_mask + 1, wqe_bulk);
+
+ split_factor = DIV_ROUND_UP(MAX_WQE_BULK_BYTES(params->xdp_prog),
+ PP_ALLOC_CACHE_REFILL * PAGE_SIZE);
+ info->refill_unit = DIV_ROUND_UP(info->wqe_bulk, split_factor);
}
#define DEFAULT_FRAG_SIZE (2048)
@@ -817,7 +822,8 @@ out:
*/
mlx5e_rx_compute_wqe_bulk_params(params, info);
- mlx5_core_dbg(mdev, "%s: wqe_bulk = %u\n", __func__, info->wqe_bulk);
+ mlx5_core_dbg(mdev, "%s: wqe_bulk = %u, wqe_bulk_refill_unit = %u\n",
+ __func__, info->wqe_bulk, info->refill_unit);
info->log_num_frags = order_base_2(info->num_frags);