From 5cd903bce9ddd234d76e67d0dfaf0aab0f11a2e0 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 3 Aug 2023 11:12:32 -0600 Subject: PCI/VPD: Add runtime power management to sysfs interface Unlike default access to config space through sysfs, the VPD read and write functions don't actively manage the runtime power management state of the device during access. Since commit 7ab5e10eda02 ("vfio/pci: Move the unused device into low power state with runtime PM"), the vfio-pci driver will use runtime power management and release unused devices to make use of low power states. Attempting to access VPD information in D3cold can result in incorrect information or kernel crashes depending on the system behavior. Wrap the VPD read/write bin attribute handlers in runtime PM and take into account the potential quirk to select the correct device to wake. Link: https://lore.kernel.org/r/20230803171233.3810944-2-alex.williamson@redhat.com Signed-off-by: Alex Williamson [bhelgaas: tweak pci_dev_put() test to match the pci_get_func0_dev() test] Signed-off-by: Bjorn Helgaas --- drivers/pci/vpd.c | 34 ++++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/vpd.c b/drivers/pci/vpd.c index a4fc4d0690fe..485a642b9304 100644 --- a/drivers/pci/vpd.c +++ b/drivers/pci/vpd.c @@ -275,8 +275,23 @@ static ssize_t vpd_read(struct file *filp, struct kobject *kobj, size_t count) { struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj)); + struct pci_dev *vpd_dev = dev; + ssize_t ret; + + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) { + vpd_dev = pci_get_func0_dev(dev); + if (!vpd_dev) + return -ENODEV; + } + + pci_config_pm_runtime_get(vpd_dev); + ret = pci_read_vpd(vpd_dev, off, count, buf); + pci_config_pm_runtime_put(vpd_dev); + + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) + pci_dev_put(vpd_dev); - return pci_read_vpd(dev, off, count, buf); + return ret; } static ssize_t vpd_write(struct file *filp, struct kobject *kobj, @@ -284,8 +299,23 @@ static ssize_t vpd_write(struct file *filp, struct kobject *kobj, size_t count) { struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj)); + struct pci_dev *vpd_dev = dev; + ssize_t ret; + + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) { + vpd_dev = pci_get_func0_dev(dev); + if (!vpd_dev) + return -ENODEV; + } + + pci_config_pm_runtime_get(vpd_dev); + ret = pci_write_vpd(vpd_dev, off, count, buf); + pci_config_pm_runtime_put(vpd_dev); + + if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0) + pci_dev_put(vpd_dev); - return pci_write_vpd(dev, off, count, buf); + return ret; } static BIN_ATTR(vpd, 0600, vpd_read, vpd_write, 0); -- cgit v1.2.3 From d3fcd7360338358aa0036bec6d2cf0e37a0ca624 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 3 Aug 2023 11:12:33 -0600 Subject: PCI: Fix runtime PM race with PME polling Testing that a device is not currently in a low power state provides no guarantees that the device is not imminently transitioning to such a state. Increment the PM usage counter before accessing the device. Since we don't wish to wake the device for PME polling, do so only if the device is already active by using pm_runtime_get_if_active(). Link: https://lore.kernel.org/r/20230803171233.3810944-3-alex.williamson@redhat.com Signed-off-by: Alex Williamson Signed-off-by: Bjorn Helgaas --- drivers/pci/pci.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 60230da957e0..bc266f290b2c 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2415,10 +2415,13 @@ static void pci_pme_list_scan(struct work_struct *work) mutex_lock(&pci_pme_list_mutex); list_for_each_entry_safe(pme_dev, n, &pci_pme_list, list) { - if (pme_dev->dev->pme_poll) { - struct pci_dev *bridge; + struct pci_dev *pdev = pme_dev->dev; + + if (pdev->pme_poll) { + struct pci_dev *bridge = pdev->bus->self; + struct device *dev = &pdev->dev; + int pm_status; - bridge = pme_dev->dev->bus->self; /* * If bridge is in low power state, the * configuration space of subordinate devices @@ -2426,14 +2429,20 @@ static void pci_pme_list_scan(struct work_struct *work) */ if (bridge && bridge->current_state != PCI_D0) continue; + /* - * If the device is in D3cold it should not be - * polled either. + * If the device is in a low power state it + * should not be polled either. */ - if (pme_dev->dev->current_state == PCI_D3cold) + pm_status = pm_runtime_get_if_active(dev, true); + if (!pm_status) continue; - pci_pme_wakeup(pme_dev->dev, NULL); + if (pdev->current_state != PCI_D3cold) + pci_pme_wakeup(pdev, NULL); + + if (pm_status > 0) + pm_runtime_put(dev); } else { list_del(&pme_dev->list); kfree(pme_dev); -- cgit v1.2.3