summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox/mlx5/core/wq.h
diff options
context:
space:
mode:
authorOfer Levi <oferle@nvidia.com>2021-02-09 18:48:11 +0300
committerSaeed Mahameed <saeedm@nvidia.com>2022-11-12 13:20:19 +0300
commit2c925db0a7d69b404d6bfe4c037935c2d367913d (patch)
tree2240599dab471a8040075f255584d6ec774f4366 /drivers/net/ethernet/mellanox/mlx5/core/wq.h
parent9458108040b3c0980a02308ddf30568b9823349c (diff)
downloadlinux-2c925db0a7d69b404d6bfe4c037935c2d367913d.tar.xz
net/mlx5e: Support enhanced CQE compression
CQE compression feature improves performance by reducing PCI bandwidth bottleneck on CQEs write. Enhanced CQE compression introduced in ConnectX-6 and it aims to reduce CPU utilization of SW side packets decompression by eliminating the need to rewrite ownership bit, which is likely to cost a cache-miss, is replaced by validity byte handled solely by HW. Another advantage of the enhanced feature is that session packets are available to SW as soon as a single CQE slot is filled, instead of waiting for session to close, this improves packet latency from NIC to host. Performance: Following are tested scenarios and reults comparing basic and enahnced CQE compression. setup: IXIA 100GbE connected directly to port 0 and port 1 of ConnectX-6 Dx 100GbE dual port. Case #1 RX only, single flow goes to single queue: IRQ rate reduced by ~ 30%, CPU utilization improved by 2%. Case #2 IP forwarding from port 1 to port 0 single flow goes to single queue: Avg latency improved from 60us to 21us, frame loss improved from 0.5% to 0.0%. Case #3 IP forwarding from port 1 to port 0 Max Throughput IXIA sends 100%, 8192 UDP flows, goes to 24 queues: Enhanced is equal or slightly better than basic. Testing the basic compression feature with this patch shows there is no perfrormance degradation of the basic compression feature. Signed-off-by: Ofer Levi <oferle@nvidia.com> Reviewed-by: Tariq Toukan <tariqt@nvidia.com> Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/wq.h')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/wq.h17
1 files changed, 17 insertions, 0 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 4d629e5ddbc7..e4ef1d24a3ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -243,6 +243,23 @@ static inline struct mlx5_cqe64 *mlx5_cqwq_get_cqe(struct mlx5_cqwq *wq)
return cqe;
}
+static inline
+struct mlx5_cqe64 *mlx5_cqwq_get_cqe_enahnced_comp(struct mlx5_cqwq *wq)
+{
+ u8 sw_validity_iteration_count = mlx5_cqwq_get_wrap_cnt(wq) & 0xff;
+ u32 ci = mlx5_cqwq_get_ci(wq);
+ struct mlx5_cqe64 *cqe;
+
+ cqe = mlx5_cqwq_get_wqe(wq, ci);
+ if (cqe->validity_iteration_count != sw_validity_iteration_count)
+ return NULL;
+
+ /* ensure cqe content is read after cqe ownership bit/validity byte */
+ dma_rmb();
+
+ return cqe;
+}
+
static inline u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq)
{
return (u32)wq->fbc.sz_m1 + 1;