summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/dev.c14
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/devlink.h11
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eq.c220
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c42
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c17
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c248
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h4
-rw-r--r--include/linux/cpu_rmap.h4
-rw-r--r--include/linux/mlx5/driver.h6
-rw-r--r--lib/cpu_rmap.c57
11 files changed, 398 insertions, 235 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index 445fe30c3d0b..e7739acc926e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -35,6 +35,7 @@
#include <linux/mlx5/mlx5_ifc_vdpa.h>
#include <linux/mlx5/vport.h>
#include "mlx5_core.h"
+#include "devlink.h"
/* intf dev list mutex */
static DEFINE_MUTEX(mlx5_intf_mutex);
@@ -109,17 +110,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev)
return true;
}
-static bool is_eth_enabled(struct mlx5_core_dev *dev)
-{
- union devlink_param_value val;
- int err;
-
- err = devl_param_driverinit_value_get(priv_to_devlink(dev),
- DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
- &val);
- return err ? false : val.vbool;
-}
-
bool mlx5_vnet_supported(struct mlx5_core_dev *dev)
{
if (!IS_ENABLED(CONFIG_MLX5_VDPA_NET))
@@ -251,7 +241,7 @@ static const struct mlx5_adev_device {
.is_enabled = &is_ib_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH] = { .suffix = "eth",
.is_supported = &mlx5_eth_supported,
- .is_enabled = &is_eth_enabled },
+ .is_enabled = &mlx5_core_is_eth_enabled },
[MLX5_INTERFACE_PROTOCOL_ETH_REP] = { .suffix = "eth-rep",
.is_supported = &is_eth_rep_supported },
[MLX5_INTERFACE_PROTOCOL_IB_REP] = { .suffix = "rdma-rep",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
index 5dcfb4d86d8a..defba5bd91d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h
@@ -46,4 +46,15 @@ void mlx5_devlink_free(struct devlink *devlink);
int mlx5_devlink_params_register(struct devlink *devlink);
void mlx5_devlink_params_unregister(struct devlink *devlink);
+static inline bool mlx5_core_is_eth_enabled(struct mlx5_core_dev *dev)
+{
+ union devlink_param_value val;
+ int err;
+
+ err = devl_param_driverinit_value_get(priv_to_devlink(dev),
+ DEVLINK_PARAM_GENERIC_ID_ENABLE_ETH,
+ &val);
+ return err ? false : val.vbool;
+}
+
#endif /* __MLX5_DEVLINK_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 38b32e98f3bd..eb41f0abf798 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -18,6 +18,7 @@
#include "lib/clock.h"
#include "diag/fw_tracer.h"
#include "mlx5_irq.h"
+#include "pci_irq.h"
#include "devlink.h"
#include "en_accel/ipsec.h"
@@ -61,9 +62,7 @@ struct mlx5_eq_table {
struct mlx5_irq_table *irq_table;
struct mlx5_irq **comp_irqs;
struct mlx5_irq *ctrl_irq;
-#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rmap;
-#endif
};
#define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
@@ -637,6 +636,7 @@ static u16 async_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
mlx5_core_dbg(dev, "Failed to get param. using default. err = %d\n", err);
return MLX5_NUM_ASYNC_EQE;
}
+
static int create_async_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -803,44 +803,28 @@ void mlx5_eq_update_ci(struct mlx5_eq *eq, u32 cc, bool arm)
}
EXPORT_SYMBOL(mlx5_eq_update_ci);
-static void comp_irqs_release(struct mlx5_core_dev *dev)
+static void comp_irqs_release_pci(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
- if (mlx5_core_is_sf(dev))
- mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
- else
- mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
- kfree(table->comp_irqs);
+ mlx5_irqs_release_vectors(table->comp_irqs, table->num_comp_eqs);
}
-static int comp_irqs_request(struct mlx5_core_dev *dev)
+static int comp_irqs_request_pci(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
const struct cpumask *prev = cpu_none_mask;
const struct cpumask *mask;
- int ncomp_eqs = table->num_comp_eqs;
+ int ncomp_eqs;
u16 *cpus;
int ret;
int cpu;
int i;
ncomp_eqs = table->num_comp_eqs;
- table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
- if (!table->comp_irqs)
- return -ENOMEM;
- if (mlx5_core_is_sf(dev)) {
- ret = mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
- if (ret < 0)
- goto free_irqs;
- return ret;
- }
-
cpus = kcalloc(ncomp_eqs, sizeof(*cpus), GFP_KERNEL);
- if (!cpus) {
+ if (!cpus)
ret = -ENOMEM;
- goto free_irqs;
- }
i = 0;
rcu_read_lock();
@@ -854,17 +838,89 @@ static int comp_irqs_request(struct mlx5_core_dev *dev)
}
spread_done:
rcu_read_unlock();
- ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs);
+ ret = mlx5_irqs_request_vectors(dev, cpus, ncomp_eqs, table->comp_irqs, &table->rmap);
kfree(cpus);
- if (ret < 0)
- goto free_irqs;
return ret;
+}
+
+static void comp_irqs_release_sf(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ mlx5_irq_affinity_irqs_release(dev, table->comp_irqs, table->num_comp_eqs);
+}
+
+static int comp_irqs_request_sf(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs = table->num_comp_eqs;
+
+ return mlx5_irq_affinity_irqs_request_auto(dev, ncomp_eqs, table->comp_irqs);
+}
+
+static void comp_irqs_release(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+
+ mlx5_core_is_sf(dev) ? comp_irqs_release_sf(dev) :
+ comp_irqs_release_pci(dev);
-free_irqs:
kfree(table->comp_irqs);
+}
+
+static int comp_irqs_request(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *table = dev->priv.eq_table;
+ int ncomp_eqs;
+ int ret;
+
+ ncomp_eqs = table->num_comp_eqs;
+ table->comp_irqs = kcalloc(ncomp_eqs, sizeof(*table->comp_irqs), GFP_KERNEL);
+ if (!table->comp_irqs)
+ return -ENOMEM;
+
+ ret = mlx5_core_is_sf(dev) ? comp_irqs_request_sf(dev) :
+ comp_irqs_request_pci(dev);
+ if (ret < 0)
+ kfree(table->comp_irqs);
+
return ret;
}
+#ifdef CONFIG_RFS_ACCEL
+static int alloc_rmap(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+
+ /* rmap is a mapping between irq number and queue number.
+ * Each irq can be assigned only to a single rmap.
+ * Since SFs share IRQs, rmap mapping cannot function correctly
+ * for irqs that are shared between different core/netdev RX rings.
+ * Hence we don't allow netdev rmap for SFs.
+ */
+ if (mlx5_core_is_sf(mdev))
+ return 0;
+
+ eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
+ if (!eq_table->rmap)
+ return -ENOMEM;
+ return 0;
+}
+
+static void free_rmap(struct mlx5_core_dev *mdev)
+{
+ struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
+
+ if (eq_table->rmap) {
+ free_irq_cpu_rmap(eq_table->rmap);
+ eq_table->rmap = NULL;
+ }
+}
+#else
+static int alloc_rmap(struct mlx5_core_dev *mdev) { return 0; }
+static void free_rmap(struct mlx5_core_dev *mdev) {}
+#endif
+
static void destroy_comp_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
@@ -880,6 +936,7 @@ static void destroy_comp_eqs(struct mlx5_core_dev *dev)
kfree(eq);
}
comp_irqs_release(dev);
+ free_rmap(dev);
}
static u16 comp_eq_depth_devlink_param_get(struct mlx5_core_dev *dev)
@@ -906,9 +963,16 @@ static int create_comp_eqs(struct mlx5_core_dev *dev)
int err;
int i;
+ err = alloc_rmap(dev);
+ if (err)
+ return err;
+
ncomp_eqs = comp_irqs_request(dev);
- if (ncomp_eqs < 0)
- return ncomp_eqs;
+ if (ncomp_eqs < 0) {
+ err = ncomp_eqs;
+ goto err_irqs_req;
+ }
+
INIT_LIST_HEAD(&table->comp_eqs_list);
nent = comp_eq_depth_devlink_param_get(dev);
@@ -953,6 +1017,8 @@ clean_eq:
kfree(eq);
clean:
destroy_comp_eqs(dev);
+err_irqs_req:
+ free_rmap(dev);
return err;
}
@@ -1031,55 +1097,12 @@ struct mlx5_eq_comp *mlx5_eqn2comp_eq(struct mlx5_core_dev *dev, int eqn)
return ERR_PTR(-ENOENT);
}
-static void clear_rmap(struct mlx5_core_dev *dev)
-{
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_eq_table *eq_table = dev->priv.eq_table;
-
- free_irq_cpu_rmap(eq_table->rmap);
-#endif
-}
-
-static int set_rmap(struct mlx5_core_dev *mdev)
-{
- int err = 0;
-#ifdef CONFIG_RFS_ACCEL
- struct mlx5_eq_table *eq_table = mdev->priv.eq_table;
- int vecidx;
-
- eq_table->rmap = alloc_irq_cpu_rmap(eq_table->num_comp_eqs);
- if (!eq_table->rmap) {
- err = -ENOMEM;
- mlx5_core_err(mdev, "Failed to allocate cpu_rmap. err %d", err);
- goto err_out;
- }
-
- for (vecidx = 0; vecidx < eq_table->num_comp_eqs; vecidx++) {
- err = irq_cpu_rmap_add(eq_table->rmap,
- pci_irq_vector(mdev->pdev, vecidx));
- if (err) {
- mlx5_core_err(mdev, "irq_cpu_rmap_add failed. err %d",
- err);
- goto err_irq_cpu_rmap_add;
- }
- }
- return 0;
-
-err_irq_cpu_rmap_add:
- clear_rmap(mdev);
-err_out:
-#endif
- return err;
-}
-
/* This function should only be called after mlx5_cmd_force_teardown_hca */
void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *table = dev->priv.eq_table;
mutex_lock(&table->lock); /* sync with create/destroy_async_eq */
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
mlx5_irq_table_destroy(dev);
mutex_unlock(&table->lock);
}
@@ -1090,44 +1113,47 @@ void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
#define MLX5_MAX_ASYNC_EQS 3
#endif
-int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+static int get_num_eqs(struct mlx5_core_dev *dev)
{
struct mlx5_eq_table *eq_table = dev->priv.eq_table;
- int num_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
+ int max_dev_eqs;
+ int max_eqs_sf;
+ int num_eqs;
+
+ /* If ethernet is disabled we use just a single completion vector to
+ * have the other vectors available for other drivers using mlx5_core. For
+ * example, mlx5_vdpa
+ */
+ if (!mlx5_core_is_eth_enabled(dev) && mlx5_eth_supported(dev))
+ return 1;
+
+ max_dev_eqs = MLX5_CAP_GEN(dev, max_num_eqs) ?
MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq);
- int max_eqs_sf;
- int err;
- eq_table->num_comp_eqs =
- min_t(int,
- mlx5_irq_table_get_num_comp(eq_table->irq_table),
- num_eqs - MLX5_MAX_ASYNC_EQS);
+ num_eqs = min_t(int, mlx5_irq_table_get_num_comp(eq_table->irq_table),
+ max_dev_eqs - MLX5_MAX_ASYNC_EQS);
if (mlx5_core_is_sf(dev)) {
max_eqs_sf = min_t(int, MLX5_COMP_EQS_PER_SF,
mlx5_irq_table_get_sfs_vec(eq_table->irq_table));
- eq_table->num_comp_eqs = min_t(int, eq_table->num_comp_eqs,
- max_eqs_sf);
+ num_eqs = min_t(int, num_eqs, max_eqs_sf);
}
+ return num_eqs;
+}
+
+int mlx5_eq_table_create(struct mlx5_core_dev *dev)
+{
+ struct mlx5_eq_table *eq_table = dev->priv.eq_table;
+ int err;
+
+ eq_table->num_comp_eqs = get_num_eqs(dev);
err = create_async_eqs(dev);
if (err) {
mlx5_core_err(dev, "Failed to create async EQs\n");
goto err_async_eqs;
}
- if (!mlx5_core_is_sf(dev)) {
- /* rmap is a mapping between irq number and queue number.
- * each irq can be assign only to a single rmap.
- * since SFs share IRQs, rmap mapping cannot function correctly
- * for irqs that are shared for different core/netdev RX rings.
- * Hence we don't allow netdev rmap for SFs
- */
- err = set_rmap(dev);
- if (err)
- goto err_rmap;
- }
-
err = create_comp_eqs(dev);
if (err) {
mlx5_core_err(dev, "Failed to create completion EQs\n");
@@ -1135,10 +1161,8 @@ int mlx5_eq_table_create(struct mlx5_core_dev *dev)
}
return 0;
+
err_comp_eqs:
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
-err_rmap:
destroy_async_eqs(dev);
err_async_eqs:
return err;
@@ -1146,8 +1170,6 @@ err_async_eqs:
void mlx5_eq_table_destroy(struct mlx5_core_dev *dev)
{
- if (!mlx5_core_is_sf(dev))
- clear_rmap(dev);
destroy_comp_eqs(dev);
destroy_async_eqs(dev);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
index 380a208ab137..fa467335526e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c
@@ -45,30 +45,28 @@ static int cpu_get_least_loaded(struct mlx5_irq_pool *pool,
/* Creating an IRQ from irq_pool */
static struct mlx5_irq *
-irq_pool_request_irq(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{
- cpumask_var_t auto_mask;
- struct mlx5_irq *irq;
+ struct irq_affinity_desc auto_desc = {};
u32 irq_index;
int err;
- if (!zalloc_cpumask_var(&auto_mask, GFP_KERNEL))
- return ERR_PTR(-ENOMEM);
err = xa_alloc(&pool->irqs, &irq_index, NULL, pool->xa_num_irqs, GFP_KERNEL);
if (err)
return ERR_PTR(err);
if (pool->irqs_per_cpu) {
- if (cpumask_weight(req_mask) > 1)
+ if (cpumask_weight(&af_desc->mask) > 1)
/* if req_mask contain more then one CPU, set the least loadad CPU
* of req_mask
*/
- cpumask_set_cpu(cpu_get_least_loaded(pool, req_mask), auto_mask);
+ cpumask_set_cpu(cpu_get_least_loaded(pool, &af_desc->mask),
+ &auto_desc.mask);
else
- cpu_get(pool, cpumask_first(req_mask));
+ cpu_get(pool, cpumask_first(&af_desc->mask));
}
- irq = mlx5_irq_alloc(pool, irq_index, cpumask_empty(auto_mask) ? req_mask : auto_mask);
- free_cpumask_var(auto_mask);
- return irq;
+ return mlx5_irq_alloc(pool, irq_index,
+ cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc,
+ NULL);
}
/* Looking for the IRQ with the smallest refcount that fits req_mask.
@@ -115,22 +113,22 @@ irq_pool_find_least_loaded(struct mlx5_irq_pool *pool, const struct cpumask *req
/**
* mlx5_irq_affinity_request - request an IRQ according to the given mask.
* @pool: IRQ pool to request from.
- * @req_mask: cpumask requested for this IRQ.
+ * @af_desc: affinity descriptor for this IRQ.
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
struct mlx5_irq *
-mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{
struct mlx5_irq *least_loaded_irq, *new_irq;
mutex_lock(&pool->lock);
- least_loaded_irq = irq_pool_find_least_loaded(pool, req_mask);
+ least_loaded_irq = irq_pool_find_least_loaded(pool, &af_desc->mask);
if (least_loaded_irq &&
mlx5_irq_read_locked(least_loaded_irq) < pool->min_threshold)
goto out;
/* We didn't find an IRQ with less than min_thres, try to allocate a new IRQ */
- new_irq = irq_pool_request_irq(pool, req_mask);
+ new_irq = irq_pool_request_irq(pool, af_desc);
if (IS_ERR(new_irq)) {
if (!least_loaded_irq) {
/* We failed to create an IRQ and we didn't find an IRQ */
@@ -194,32 +192,30 @@ int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
struct mlx5_irq **irqs)
{
struct mlx5_irq_pool *pool = mlx5_irq_pool_get(dev);
- cpumask_var_t req_mask;
+ struct irq_affinity_desc af_desc = {};
struct mlx5_irq *irq;
int i = 0;
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
- return -ENOMEM;
- cpumask_copy(req_mask, cpu_online_mask);
+ af_desc.is_managed = 1;
+ cpumask_copy(&af_desc.mask, cpu_online_mask);
for (i = 0; i < nirqs; i++) {
if (mlx5_irq_pool_is_sf_pool(pool))
- irq = mlx5_irq_affinity_request(pool, req_mask);
+ irq = mlx5_irq_affinity_request(pool, &af_desc);
else
/* In case SF pool doesn't exists, fallback to the PF IRQs.
* The PF IRQs are already allocated and binded to CPU
* at this point. Hence, only an index is needed.
*/
- irq = mlx5_irq_request(dev, i, NULL);
+ irq = mlx5_irq_request(dev, i, NULL, NULL);
if (IS_ERR(irq))
break;
irqs[i] = irq;
- cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), req_mask);
+ cpumask_clear_cpu(cpumask_first(mlx5_irq_get_affinity_mask(irq)), &af_desc.mask);
mlx5_core_dbg(pool->dev, "IRQ %u mapped to cpu %*pbl, %u EQs on this irq\n",
pci_irq_vector(dev->pdev, mlx5_irq_get_index(irq)),
cpumask_pr_args(mlx5_irq_get_affinity_mask(irq)),
mlx5_irq_read_locked(irq) / MLX5_EQ_REFS_PER_IRQ);
}
- free_cpumask_var(req_mask);
if (!i)
return PTR_ERR(irq);
return i;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 597174ceadc9..f95df73d1089 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -46,9 +46,6 @@
#include <linux/kmod.h>
#include <linux/mlx5/mlx5_ifc.h>
#include <linux/mlx5/vport.h>
-#ifdef CONFIG_RFS_ACCEL
-#include <linux/cpu_rmap.h>
-#endif
#include <linux/version.h>
#include <net/devlink.h>
#include "mlx5_core.h"
@@ -1401,16 +1398,16 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
goto function_teardown;
}
+ err = mlx5_devlink_params_register(priv_to_devlink(dev));
+ if (err)
+ goto err_devlink_params_reg;
+
err = mlx5_load(dev);
if (err)
goto err_load;
set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
- err = mlx5_devlink_params_register(priv_to_devlink(dev));
- if (err)
- goto err_devlink_params_reg;
-
err = mlx5_register_device(dev);
if (err)
goto err_register;
@@ -1420,11 +1417,11 @@ int mlx5_init_one(struct mlx5_core_dev *dev)
return 0;
err_register:
- mlx5_devlink_params_unregister(priv_to_devlink(dev));
-err_devlink_params_reg:
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev);
err_load:
+ mlx5_devlink_params_unregister(priv_to_devlink(dev));
+err_devlink_params_reg:
mlx5_cleanup_once(dev);
function_teardown:
mlx5_function_teardown(dev, true);
@@ -1443,7 +1440,6 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
mutex_lock(&dev->intf_state_mutex);
mlx5_unregister_device(dev);
- mlx5_devlink_params_unregister(priv_to_devlink(dev));
if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
mlx5_core_warn(dev, "%s: interface is down, NOP\n",
@@ -1454,6 +1450,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
clear_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
mlx5_unload(dev);
+ mlx5_devlink_params_unregister(priv_to_devlink(dev));
mlx5_cleanup_once(dev);
mlx5_function_teardown(dev, true);
out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
index 23cb63fa4588..efd0c299c5c7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_irq.h
@@ -9,6 +9,7 @@
#define MLX5_COMP_EQS_PER_SF 8
struct mlx5_irq;
+struct cpu_rmap;
int mlx5_irq_table_init(struct mlx5_core_dev *dev);
void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev);
@@ -25,9 +26,10 @@ int mlx5_get_default_msix_vec_count(struct mlx5_core_dev *dev, int num_vfs);
struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev);
void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq);
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
- struct cpumask *affinity);
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap);
int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
- struct mlx5_irq **irqs);
+ struct mlx5_irq **irqs, struct cpu_rmap **rmap);
void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs);
int mlx5_irq_attach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
int mlx5_irq_detach_nb(struct mlx5_irq *irq, struct notifier_block *nb);
@@ -39,7 +41,7 @@ struct mlx5_irq_pool;
int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev, int nirqs,
struct mlx5_irq **irqs);
struct mlx5_irq *mlx5_irq_affinity_request(struct mlx5_irq_pool *pool,
- const struct cpumask *req_mask);
+ struct irq_affinity_desc *af_desc);
void mlx5_irq_affinity_irqs_release(struct mlx5_core_dev *dev, struct mlx5_irq **irqs,
int num_irqs);
#else
@@ -50,7 +52,7 @@ static inline int mlx5_irq_affinity_irqs_request_auto(struct mlx5_core_dev *dev,
}
static inline struct mlx5_irq *
-mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, const struct cpumask *req_mask)
+mlx5_irq_affinity_request(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc)
{
return ERR_PTR(-EOPNOTSUPP);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
index 6bde18bcd42f..e12e528c09f5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c
@@ -9,6 +9,7 @@
#include "mlx5_irq.h"
#include "pci_irq.h"
#include "lib/sf.h"
+#include "lib/eq.h"
#ifdef CONFIG_RFS_ACCEL
#include <linux/cpu_rmap.h>
#endif
@@ -29,12 +30,11 @@ struct mlx5_irq {
char name[MLX5_MAX_IRQ_NAME];
struct mlx5_irq_pool *pool;
int refcount;
- u32 index;
- int irqn;
+ struct msi_map map;
};
struct mlx5_irq_table {
- struct mlx5_irq_pool *pf_pool;
+ struct mlx5_irq_pool *pcif_pool;
struct mlx5_irq_pool *sf_ctrl_pool;
struct mlx5_irq_pool *sf_comp_pool;
};
@@ -127,15 +127,26 @@ out:
static void irq_release(struct mlx5_irq *irq)
{
struct mlx5_irq_pool *pool = irq->pool;
+#ifdef CONFIG_RFS_ACCEL
+ struct cpu_rmap *rmap;
+#endif
- xa_erase(&pool->irqs, irq->index);
- /* free_irq requires that affinity_hint and rmap will be cleared
- * before calling it. This is why there is asymmetry with set_rmap
- * which should be called after alloc_irq but before request_irq.
+ xa_erase(&pool->irqs, irq->map.index);
+ /* free_irq requires that affinity_hint and rmap will be cleared before
+ * calling it. To satisfy this requirement, we call
+ * irq_cpu_rmap_remove() to remove the notifier
*/
- irq_update_affinity_hint(irq->irqn, NULL);
+ irq_update_affinity_hint(irq->map.virq, NULL);
+#ifdef CONFIG_RFS_ACCEL
+ rmap = mlx5_eq_table_get_rmap(pool->dev);
+ if (rmap && irq->map.index)
+ irq_cpu_rmap_remove(rmap, irq->map.virq);
+#endif
+
free_cpumask_var(irq->mask);
- free_irq(irq->irqn, &irq->nh);
+ free_irq(irq->map.virq, &irq->nh);
+ if (irq->map.index && pci_msix_can_alloc_dyn(pool->dev->pdev))
+ pci_msix_free_irq(pool->dev->pdev, irq->map);
kfree(irq);
}
@@ -198,7 +209,7 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
return;
}
- if (vecidx == pool->xa_num_irqs.max) {
+ if (!vecidx) {
snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_async%d", vecidx);
return;
}
@@ -207,7 +218,8 @@ static void irq_set_name(struct mlx5_irq_pool *pool, char *name, int vecidx)
}
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
- const struct cpumask *affinity)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_core_dev *dev = pool->dev;
char name[MLX5_MAX_IRQ_NAME];
@@ -217,7 +229,28 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
irq = kzalloc(sizeof(*irq), GFP_KERNEL);
if (!irq)
return ERR_PTR(-ENOMEM);
- irq->irqn = pci_irq_vector(dev->pdev, i);
+ if (!i || !pci_msix_can_alloc_dyn(dev->pdev)) {
+ /* The vector at index 0 was already allocated.
+ * Just get the irq number. If dynamic irq is not supported
+ * vectors have also been allocated.
+ */
+ irq->map.virq = pci_irq_vector(dev->pdev, i);
+ irq->map.index = 0;
+ } else {
+ irq->map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, af_desc);
+ if (!irq->map.virq) {
+ err = irq->map.index;
+ goto err_alloc_irq;
+ }
+ }
+
+ if (i && rmap && *rmap) {
+#ifdef CONFIG_RFS_ACCEL
+ err = irq_cpu_rmap_add(*rmap, irq->map.virq);
+ if (err)
+ goto err_irq_rmap;
+#endif
+ }
if (!mlx5_irq_pool_is_sf_pool(pool))
irq_set_name(pool, name, i);
else
@@ -225,7 +258,7 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
ATOMIC_INIT_NOTIFIER_HEAD(&irq->nh);
snprintf(irq->name, MLX5_MAX_IRQ_NAME,
"%s@pci:%s", name, pci_name(dev->pdev));
- err = request_irq(irq->irqn, irq_int_handler, 0, irq->name,
+ err = request_irq(irq->map.virq, irq_int_handler, 0, irq->name,
&irq->nh);
if (err) {
mlx5_core_err(dev, "Failed to request irq. err = %d\n", err);
@@ -236,26 +269,37 @@ struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
err = -ENOMEM;
goto err_cpumask;
}
- if (affinity) {
- cpumask_copy(irq->mask, affinity);
- irq_set_affinity_and_hint(irq->irqn, irq->mask);
+ if (af_desc) {
+ cpumask_copy(irq->mask, &af_desc->mask);
+ irq_set_affinity_and_hint(irq->map.virq, irq->mask);
}
irq->pool = pool;
irq->refcount = 1;
- irq->index = i;
- err = xa_err(xa_store(&pool->irqs, irq->index, irq, GFP_KERNEL));
+ irq->map.index = i;
+ err = xa_err(xa_store(&pool->irqs, irq->map.index, irq, GFP_KERNEL));
if (err) {
mlx5_core_err(dev, "Failed to alloc xa entry for irq(%u). err = %d\n",
- irq->index, err);
+ irq->map.index, err);
goto err_xa;
}
return irq;
err_xa:
- irq_update_affinity_hint(irq->irqn, NULL);
+ if (af_desc)
+ irq_update_affinity_hint(irq->map.virq, NULL);
free_cpumask_var(irq->mask);
err_cpumask:
- free_irq(irq->irqn, &irq->nh);
+ free_irq(irq->map.virq, &irq->nh);
err_req_irq:
+#ifdef CONFIG_RFS_ACCEL
+ if (i && rmap && *rmap) {
+ free_irq_cpu_rmap(*rmap);
+ *rmap = NULL;
+ }
+err_irq_rmap:
+#endif
+ if (i && pci_msix_can_alloc_dyn(dev->pdev))
+ pci_msix_free_irq(dev->pdev, irq->map);
+err_alloc_irq:
kfree(irq);
return ERR_PTR(err);
}
@@ -292,7 +336,7 @@ struct cpumask *mlx5_irq_get_affinity_mask(struct mlx5_irq *irq)
int mlx5_irq_get_index(struct mlx5_irq *irq)
{
- return irq->index;
+ return irq->map.index;
}
/* irq_pool API */
@@ -300,7 +344,8 @@ int mlx5_irq_get_index(struct mlx5_irq *irq)
/* requesting an irq from a given pool according to given index */
static struct mlx5_irq *
irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
- struct cpumask *affinity)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_irq *irq;
@@ -310,7 +355,7 @@ irq_pool_request_vector(struct mlx5_irq_pool *pool, int vecidx,
mlx5_irq_get_locked(irq);
goto unlock;
}
- irq = mlx5_irq_alloc(pool, vecidx, affinity);
+ irq = mlx5_irq_alloc(pool, vecidx, af_desc, rmap);
unlock:
mutex_unlock(&pool->lock);
return irq;
@@ -337,7 +382,7 @@ struct mlx5_irq_pool *mlx5_irq_pool_get(struct mlx5_core_dev *dev)
/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
* the PF IRQs pool in case the SF pool doesn't exist.
*/
- return pool ? pool : irq_table->pf_pool;
+ return pool ? pool : irq_table->pcif_pool;
}
static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
@@ -351,7 +396,7 @@ static struct mlx5_irq_pool *ctrl_irq_pool_get(struct mlx5_core_dev *dev)
/* In some configs, there won't be a pool of SFs IRQs. Hence, returning
* the PF IRQs pool in case the SF pool doesn't exist.
*/
- return pool ? pool : irq_table->pf_pool;
+ return pool ? pool : irq_table->pcif_pool;
}
/**
@@ -364,7 +409,7 @@ static void mlx5_irqs_release(struct mlx5_irq **irqs, int nirqs)
int i;
for (i = 0; i < nirqs; i++) {
- synchronize_irq(irqs[i]->irqn);
+ synchronize_irq(irqs[i]->map.virq);
mlx5_irq_put(irqs[i]);
}
}
@@ -387,26 +432,26 @@ void mlx5_ctrl_irq_release(struct mlx5_irq *ctrl_irq)
struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
{
struct mlx5_irq_pool *pool = ctrl_irq_pool_get(dev);
- cpumask_var_t req_mask;
+ struct irq_affinity_desc af_desc;
struct mlx5_irq *irq;
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
- return ERR_PTR(-ENOMEM);
- cpumask_copy(req_mask, cpu_online_mask);
+ cpumask_copy(&af_desc.mask, cpu_online_mask);
+ af_desc.is_managed = false;
if (!mlx5_irq_pool_is_sf_pool(pool)) {
- /* In case we are allocating a control IRQ for PF/VF */
+ /* In case we are allocating a control IRQ from a pci device's pool.
+ * This can happen also for a SF if the SFs pool is empty.
+ */
if (!pool->xa_num_irqs.max) {
- cpumask_clear(req_mask);
+ cpumask_clear(&af_desc.mask);
/* In case we only have a single IRQ for PF/VF */
- cpumask_set_cpu(cpumask_first(cpu_online_mask), req_mask);
+ cpumask_set_cpu(cpumask_first(cpu_online_mask), &af_desc.mask);
}
- /* Allocate the IRQ in the last index of the pool */
- irq = irq_pool_request_vector(pool, pool->xa_num_irqs.max, req_mask);
+ /* Allocate the IRQ in index 0. The vector was already allocated */
+ irq = irq_pool_request_vector(pool, 0, &af_desc, NULL);
} else {
- irq = mlx5_irq_affinity_request(pool, req_mask);
+ irq = mlx5_irq_affinity_request(pool, &af_desc);
}
- free_cpumask_var(req_mask);
return irq;
}
@@ -415,28 +460,82 @@ struct mlx5_irq *mlx5_ctrl_irq_request(struct mlx5_core_dev *dev)
* @dev: mlx5 device that requesting the IRQ.
* @vecidx: vector index of the IRQ. This argument is ignore if affinity is
* provided.
- * @affinity: cpumask requested for this IRQ.
+ * @af_desc: affinity descriptor for this IRQ.
+ * @rmap: pointer to reverse map pointer for completion interrupts
*
* This function returns a pointer to IRQ, or ERR_PTR in case of error.
*/
struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx,
- struct cpumask *affinity)
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap)
{
struct mlx5_irq_table *irq_table = mlx5_irq_table_get(dev);
struct mlx5_irq_pool *pool;
struct mlx5_irq *irq;
- pool = irq_table->pf_pool;
- irq = irq_pool_request_vector(pool, vecidx, affinity);
+ pool = irq_table->pcif_pool;
+ irq = irq_pool_request_vector(pool, vecidx, af_desc, rmap);
if (IS_ERR(irq))
return irq;
mlx5_core_dbg(dev, "irq %u mapped to cpu %*pbl, %u EQs on this irq\n",
- irq->irqn, cpumask_pr_args(affinity),
+ irq->map.virq, cpumask_pr_args(&af_desc->mask),
irq->refcount / MLX5_EQ_REFS_PER_IRQ);
return irq;
}
/**
+ * mlx5_msix_alloc - allocate msix interrupt
+ * @dev: mlx5 device from which to request
+ * @handler: interrupt handler
+ * @affdesc: affinity descriptor
+ * @name: interrupt name
+ *
+ * Returns: struct msi_map with result encoded.
+ * Note: the caller must make sure to release the irq by calling
+ * mlx5_msix_free() if shutdown was initiated.
+ */
+struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
+ irqreturn_t (*handler)(int, void *),
+ const struct irq_affinity_desc *affdesc,
+ const char *name)
+{
+ struct msi_map map;
+ int err;
+
+ if (!dev->pdev) {
+ map.virq = 0;
+ map.index = -EINVAL;
+ return map;
+ }
+
+ map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc);
+ if (!map.virq)
+ return map;
+
+ err = request_irq(map.virq, handler, 0, name, NULL);
+ if (err) {
+ mlx5_core_warn(dev, "err %d\n", err);
+ pci_msix_free_irq(dev->pdev, map);
+ map.virq = 0;
+ map.index = -ENOMEM;
+ }
+ return map;
+}
+EXPORT_SYMBOL(mlx5_msix_alloc);
+
+/**
+ * mlx5_msix_free - free a previously allocated msix interrupt
+ * @dev: mlx5 device associated with interrupt
+ * @map: map previously returned by mlx5_msix_alloc()
+ */
+void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map)
+{
+ free_irq(map.virq, NULL);
+ pci_msix_free_irq(dev->pdev, map);
+}
+EXPORT_SYMBOL(mlx5_msix_free);
+
+/**
* mlx5_irqs_release_vectors - release one or more IRQs back to the system.
* @irqs: IRQs to be released.
* @nirqs: number of IRQs to be released.
@@ -452,6 +551,7 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
* @cpus: CPUs array for binding the IRQs
* @nirqs: number of IRQs to request.
* @irqs: an output array of IRQs pointers.
+ * @rmap: pointer to reverse map pointer for completion interrupts
*
* Each IRQ is bound to at most 1 CPU.
* This function is requests nirqs IRQs, starting from @vecidx.
@@ -460,24 +560,22 @@ void mlx5_irqs_release_vectors(struct mlx5_irq **irqs, int nirqs)
* @nirqs), if successful, or a negative error code in case of an error.
*/
int mlx5_irqs_request_vectors(struct mlx5_core_dev *dev, u16 *cpus, int nirqs,
- struct mlx5_irq **irqs)
+ struct mlx5_irq **irqs, struct cpu_rmap **rmap)
{
- cpumask_var_t req_mask;
+ struct irq_affinity_desc af_desc;
struct mlx5_irq *irq;
int i;
- if (!zalloc_cpumask_var(&req_mask, GFP_KERNEL))
- return -ENOMEM;
+ af_desc.is_managed = 1;
for (i = 0; i < nirqs; i++) {
- cpumask_set_cpu(cpus[i], req_mask);
- irq = mlx5_irq_request(dev, i, req_mask);
+ cpumask_set_cpu(cpus[i], &af_desc.mask);
+ irq = mlx5_irq_request(dev, i + 1, &af_desc, rmap);
if (IS_ERR(irq))
break;
- cpumask_clear(req_mask);
+ cpumask_clear(&af_desc.mask);
irqs[i] = irq;
}
- free_cpumask_var(req_mask);
return i ? i : PTR_ERR(irq);
}
@@ -521,7 +619,7 @@ static void irq_pool_free(struct mlx5_irq_pool *pool)
kvfree(pool);
}
-static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
+static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pcif_vec)
{
struct mlx5_irq_table *table = dev->priv.irq_table;
int num_sf_ctrl_by_msix;
@@ -529,12 +627,12 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
int num_sf_ctrl;
int err;
- /* init pf_pool */
- table->pf_pool = irq_pool_alloc(dev, 0, pf_vec, NULL,
- MLX5_EQ_SHARE_IRQ_MIN_COMP,
- MLX5_EQ_SHARE_IRQ_MAX_COMP);
- if (IS_ERR(table->pf_pool))
- return PTR_ERR(table->pf_pool);
+ /* init pcif_pool */
+ table->pcif_pool = irq_pool_alloc(dev, 0, pcif_vec, NULL,
+ MLX5_EQ_SHARE_IRQ_MIN_COMP,
+ MLX5_EQ_SHARE_IRQ_MAX_COMP);
+ if (IS_ERR(table->pcif_pool))
+ return PTR_ERR(table->pcif_pool);
if (!mlx5_sf_max_functions(dev))
return 0;
if (sf_vec < MLX5_IRQ_VEC_COMP_BASE_SF) {
@@ -548,7 +646,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
MLX5_SFS_PER_CTRL_IRQ);
num_sf_ctrl = min_t(int, num_sf_ctrl_by_msix, num_sf_ctrl_by_sfs);
num_sf_ctrl = min_t(int, MLX5_IRQ_CTRL_SF_MAX, num_sf_ctrl);
- table->sf_ctrl_pool = irq_pool_alloc(dev, pf_vec, num_sf_ctrl,
+ table->sf_ctrl_pool = irq_pool_alloc(dev, pcif_vec, num_sf_ctrl,
"mlx5_sf_ctrl",
MLX5_EQ_SHARE_IRQ_MIN_CTRL,
MLX5_EQ_SHARE_IRQ_MAX_CTRL);
@@ -557,7 +655,7 @@ static int irq_pools_init(struct mlx5_core_dev *dev, int sf_vec, int pf_vec)
goto err_pf;
}
/* init sf_comp_pool */
- table->sf_comp_pool = irq_pool_alloc(dev, pf_vec + num_sf_ctrl,
+ table->sf_comp_pool = irq_pool_alloc(dev, pcif_vec + num_sf_ctrl,
sf_vec - num_sf_ctrl, "mlx5_sf_comp",
MLX5_EQ_SHARE_IRQ_MIN_COMP,
MLX5_EQ_SHARE_IRQ_MAX_COMP);
@@ -579,7 +677,7 @@ err_irqs_per_cpu:
err_sf_ctrl:
irq_pool_free(table->sf_ctrl_pool);
err_pf:
- irq_pool_free(table->pf_pool);
+ irq_pool_free(table->pcif_pool);
return err;
}
@@ -589,7 +687,7 @@ static void irq_pools_destroy(struct mlx5_irq_table *table)
irq_pool_free(table->sf_comp_pool);
irq_pool_free(table->sf_ctrl_pool);
}
- irq_pool_free(table->pf_pool);
+ irq_pool_free(table->pcif_pool);
}
/* irq_table API */
@@ -620,9 +718,9 @@ void mlx5_irq_table_cleanup(struct mlx5_core_dev *dev)
int mlx5_irq_table_get_num_comp(struct mlx5_irq_table *table)
{
- if (!table->pf_pool->xa_num_irqs.max)
+ if (!table->pcif_pool->xa_num_irqs.max)
return 1;
- return table->pf_pool->xa_num_irqs.max - table->pf_pool->xa_num_irqs.min;
+ return table->pcif_pool->xa_num_irqs.max - table->pcif_pool->xa_num_irqs.min;
}
int mlx5_irq_table_create(struct mlx5_core_dev *dev)
@@ -631,26 +729,30 @@ int mlx5_irq_table_create(struct mlx5_core_dev *dev)
MLX5_CAP_GEN(dev, max_num_eqs) :
1 << MLX5_CAP_GEN(dev, log_max_eq);
int total_vec;
- int pf_vec;
+ int pcif_vec;
+ int req_vec;
int err;
+ int n;
if (mlx5_core_is_sf(dev))
return 0;
- pf_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
- pf_vec = min_t(int, pf_vec, num_eqs);
+ pcif_vec = MLX5_CAP_GEN(dev, num_ports) * num_online_cpus() + 1;
+ pcif_vec = min_t(int, pcif_vec, num_eqs);
- total_vec = pf_vec;
+ total_vec = pcif_vec;
if (mlx5_sf_max_functions(dev))
total_vec += MLX5_IRQ_CTRL_SF_MAX +
MLX5_COMP_EQS_PER_SF * mlx5_sf_max_functions(dev);
+ total_vec = min_t(int, total_vec, pci_msix_vec_count(dev->pdev));
+ pcif_vec = min_t(int, pcif_vec, pci_msix_vec_count(dev->pdev));
- total_vec = pci_alloc_irq_vectors(dev->pdev, 1, total_vec, PCI_IRQ_MSIX);
- if (total_vec < 0)
- return total_vec;
- pf_vec = min(pf_vec, total_vec);
+ req_vec = pci_msix_can_alloc_dyn(dev->pdev) ? 1 : total_vec;
+ n = pci_alloc_irq_vectors(dev->pdev, 1, req_vec, PCI_IRQ_MSIX);
+ if (n < 0)
+ return n;
- err = irq_pools_init(dev, total_vec - pf_vec, pf_vec);
+ err = irq_pools_init(dev, total_vec - pcif_vec, pcif_vec);
if (err)
pci_free_irq_vectors(dev->pdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
index 5c7e68bee43a..d3a77a0ab848 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.h
@@ -12,6 +12,7 @@
#define MLX5_EQ_REFS_PER_IRQ (2)
struct mlx5_irq;
+struct cpu_rmap;
struct mlx5_irq_pool {
char name[MLX5_MAX_IRQ_NAME - MLX5_MAX_IRQ_IDX_CHARS];
@@ -31,7 +32,8 @@ static inline bool mlx5_irq_pool_is_sf_pool(struct mlx5_irq_pool *pool)
}
struct mlx5_irq *mlx5_irq_alloc(struct mlx5_irq_pool *pool, int i,
- const struct cpumask *affinity);
+ struct irq_affinity_desc *af_desc,
+ struct cpu_rmap **rmap);
int mlx5_irq_get_locked(struct mlx5_irq *irq);
int mlx5_irq_read_locked(struct mlx5_irq *irq);
int mlx5_irq_put(struct mlx5_irq *irq);
diff --git a/include/linux/cpu_rmap.h b/include/linux/cpu_rmap.h
index be8aea04d023..cae324d10965 100644
--- a/include/linux/cpu_rmap.h
+++ b/include/linux/cpu_rmap.h
@@ -16,14 +16,13 @@
* struct cpu_rmap - CPU affinity reverse-map
* @refcount: kref for object
* @size: Number of objects to be reverse-mapped
- * @used: Number of objects added
* @obj: Pointer to array of object pointers
* @near: For each CPU, the index and distance to the nearest object,
* based on affinity masks
*/
struct cpu_rmap {
struct kref refcount;
- u16 size, used;
+ u16 size;
void **obj;
struct {
u16 index;
@@ -61,6 +60,7 @@ static inline struct cpu_rmap *alloc_irq_cpu_rmap(unsigned int size)
}
extern void free_irq_cpu_rmap(struct cpu_rmap *rmap);
+int irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq);
extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq);
#endif /* __LINUX_CPU_RMAP_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7a898113b6b7..f243bd10a5e1 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1311,4 +1311,10 @@ enum {
MLX5_OCTWORD = 16,
};
+struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev,
+ irqreturn_t (*handler)(int, void *),
+ const struct irq_affinity_desc *affdesc,
+ const char *name);
+void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map);
+
#endif /* MLX5_DRIVER_H */
diff --git a/lib/cpu_rmap.c b/lib/cpu_rmap.c
index f08d9c56f712..73c1636b927b 100644
--- a/lib/cpu_rmap.c
+++ b/lib/cpu_rmap.c
@@ -128,19 +128,31 @@ debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
}
#endif
+static int get_free_index(struct cpu_rmap *rmap)
+{
+ int i;
+
+ for (i = 0; i < rmap->size; i++)
+ if (!rmap->obj[i])
+ return i;
+
+ return -ENOSPC;
+}
+
/**
* cpu_rmap_add - add object to a rmap
* @rmap: CPU rmap allocated with alloc_cpu_rmap()
* @obj: Object to add to rmap
*
- * Return index of object.
+ * Return index of object or -ENOSPC if no free entry was found
*/
int cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
{
- u16 index;
+ int index = get_free_index(rmap);
+
+ if (index < 0)
+ return index;
- BUG_ON(rmap->used >= rmap->size);
- index = rmap->used++;
rmap->obj[index] = obj;
return index;
}
@@ -230,9 +242,10 @@ void free_irq_cpu_rmap(struct cpu_rmap *rmap)
if (!rmap)
return;
- for (index = 0; index < rmap->used; index++) {
+ for (index = 0; index < rmap->size; index++) {
glue = rmap->obj[index];
- irq_set_affinity_notifier(glue->notify.irq, NULL);
+ if (glue)
+ irq_set_affinity_notifier(glue->notify.irq, NULL);
}
cpu_rmap_put(rmap);
@@ -268,10 +281,22 @@ static void irq_cpu_rmap_release(struct kref *ref)
container_of(ref, struct irq_glue, notify.kref);
cpu_rmap_put(glue->rmap);
+ glue->rmap->obj[glue->index] = NULL;
kfree(glue);
}
/**
+ * irq_cpu_rmap_remove - remove an IRQ from a CPU affinity reverse-map
+ * @rmap: The reverse-map
+ * @irq: The IRQ number
+ */
+int irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq)
+{
+ return irq_set_affinity_notifier(irq, NULL);
+}
+EXPORT_SYMBOL(irq_cpu_rmap_remove);
+
+/**
* irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
* @rmap: The reverse-map
* @irq: The IRQ number
@@ -293,12 +318,22 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
glue->notify.release = irq_cpu_rmap_release;
glue->rmap = rmap;
cpu_rmap_get(rmap);
- glue->index = cpu_rmap_add(rmap, glue);
+ rc = cpu_rmap_add(rmap, glue);
+ if (rc < 0)
+ goto err_add;
+
+ glue->index = rc;
rc = irq_set_affinity_notifier(irq, &glue->notify);
- if (rc) {
- cpu_rmap_put(glue->rmap);
- kfree(glue);
- }
+ if (rc)
+ goto err_set;
+
+ return rc;
+
+err_set:
+ rmap->obj[glue->index] = NULL;
+err_add:
+ cpu_rmap_put(glue->rmap);
+ kfree(glue);
return rc;
}
EXPORT_SYMBOL(irq_cpu_rmap_add);