summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/amd/pds_core/core.c
diff options
context:
space:
mode:
authorBrett Creeley <brett.creeley@amd.com>2024-01-30 02:40:33 +0300
committerJakub Kicinski <kuba@kernel.org>2024-02-01 05:26:59 +0300
commit7e82a8745b951b1e794cc780d46f3fbee5e93447 (patch)
tree1cc9ad756363508d011296f838171514f0e69385 /drivers/net/ethernet/amd/pds_core/core.c
parent951705151e50f9022bc96ec8b3fd5697380b1df6 (diff)
downloadlinux-7e82a8745b951b1e794cc780d46f3fbee5e93447.tar.xz
pds_core: Prevent race issues involving the adminq
There are multiple paths that can result in using the pdsc's adminq. [1] pdsc_adminq_isr and the resulting work from queue_work(), i.e. pdsc_work_thread()->pdsc_process_adminq() [2] pdsc_adminq_post() When the device goes through reset via PCIe reset and/or a fw_down/fw_up cycle due to bad PCIe state or bad device state the adminq is destroyed and recreated. A NULL pointer dereference can happen if [1] or [2] happens after the adminq is already destroyed. In order to fix this, add some further state checks and implement reference counting for adminq uses. Reference counting was used because multiple threads can attempt to access the adminq at the same time via [1] or [2]. Additionally, multiple clients (i.e. pds-vfio-pci) can be using [2] at the same time. The adminq_refcnt is initialized to 1 when the adminq has been allocated and is ready to use. Users/clients of the adminq (i.e. [1] and [2]) will increment the refcnt when they are using the adminq. When the driver goes into a fw_down cycle it will set the PDSC_S_FW_DEAD bit and then wait for the adminq_refcnt to hit 1. Setting the PDSC_S_FW_DEAD before waiting will prevent any further adminq_refcnt increments. Waiting for the adminq_refcnt to hit 1 allows for any current users of the adminq to finish before the driver frees the adminq. Once the adminq_refcnt hits 1 the driver clears the refcnt to signify that the adminq is deleted and cannot be used. On the fw_up cycle the driver will once again initialize the adminq_refcnt to 1 allowing the adminq to be used again. Fixes: 01ba61b55b20 ("pds_core: Add adminq processing and commands") Signed-off-by: Brett Creeley <brett.creeley@amd.com> Reviewed-by: Shannon Nelson <shannon.nelson@amd.com> Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com> Link: https://lore.kernel.org/r/20240129234035.69802-5-brett.creeley@amd.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/amd/pds_core/core.c')
-rw-r--r--drivers/net/ethernet/amd/pds_core/core.c21
1 files changed, 21 insertions, 0 deletions
diff --git a/drivers/net/ethernet/amd/pds_core/core.c b/drivers/net/ethernet/amd/pds_core/core.c
index 0356e56a6e99..f44333bd1256 100644
--- a/drivers/net/ethernet/amd/pds_core/core.c
+++ b/drivers/net/ethernet/amd/pds_core/core.c
@@ -450,6 +450,7 @@ int pdsc_setup(struct pdsc *pdsc, bool init)
pdsc_debugfs_add_viftype(pdsc);
}
+ refcount_set(&pdsc->adminq_refcnt, 1);
clear_bit(PDSC_S_FW_DEAD, &pdsc->state);
return 0;
@@ -514,6 +515,24 @@ void pdsc_stop(struct pdsc *pdsc)
PDS_CORE_INTR_MASK_SET);
}
+static void pdsc_adminq_wait_and_dec_once_unused(struct pdsc *pdsc)
+{
+ /* The driver initializes the adminq_refcnt to 1 when the adminq is
+ * allocated and ready for use. Other users/requesters will increment
+ * the refcnt while in use. If the refcnt is down to 1 then the adminq
+ * is not in use and the refcnt can be cleared and adminq freed. Before
+ * calling this function the driver will set PDSC_S_FW_DEAD, which
+ * prevent subsequent attempts to use the adminq and increment the
+ * refcnt to fail. This guarantees that this function will eventually
+ * exit.
+ */
+ while (!refcount_dec_if_one(&pdsc->adminq_refcnt)) {
+ dev_dbg_ratelimited(pdsc->dev, "%s: adminq in use\n",
+ __func__);
+ cpu_relax();
+ }
+}
+
void pdsc_fw_down(struct pdsc *pdsc)
{
union pds_core_notifyq_comp reset_event = {
@@ -529,6 +548,8 @@ void pdsc_fw_down(struct pdsc *pdsc)
if (pdsc->pdev->is_virtfn)
return;
+ pdsc_adminq_wait_and_dec_once_unused(pdsc);
+
/* Notify clients of fw_down */
if (pdsc->fw_reporter)
devlink_health_report(pdsc->fw_reporter, "FW down reported", pdsc);