summaryrefslogtreecommitdiff
path: root/drivers/vfio/pci
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/pci')
-rw-r--r--drivers/vfio/pci/Kconfig8
-rw-r--r--drivers/vfio/pci/hisilicon/Kconfig4
-rw-r--r--drivers/vfio/pci/mlx5/Kconfig2
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c8
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c46
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c305
6 files changed, 264 insertions, 109 deletions
diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig
index f9d0c908e738..86bb7835cf3c 100644
--- a/drivers/vfio/pci/Kconfig
+++ b/drivers/vfio/pci/Kconfig
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
-if PCI && MMU
+menu "VFIO support for PCI devices"
+ depends on PCI && MMU
+
config VFIO_PCI_CORE
tristate
select VFIO_VIRQFD
@@ -7,9 +9,11 @@ config VFIO_PCI_CORE
config VFIO_PCI_MMAP
def_bool y if !S390
+ depends on VFIO_PCI_CORE
config VFIO_PCI_INTX
def_bool y if !S390
+ depends on VFIO_PCI_CORE
config VFIO_PCI
tristate "Generic VFIO support for any PCI device"
@@ -59,4 +63,4 @@ source "drivers/vfio/pci/mlx5/Kconfig"
source "drivers/vfio/pci/hisilicon/Kconfig"
-endif
+endmenu
diff --git a/drivers/vfio/pci/hisilicon/Kconfig b/drivers/vfio/pci/hisilicon/Kconfig
index 5daa0f45d2f9..cbf1c32f6ebf 100644
--- a/drivers/vfio/pci/hisilicon/Kconfig
+++ b/drivers/vfio/pci/hisilicon/Kconfig
@@ -1,13 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
config HISI_ACC_VFIO_PCI
- tristate "VFIO PCI support for HiSilicon ACC devices"
+ tristate "VFIO support for HiSilicon ACC PCI devices"
depends on ARM64 || (COMPILE_TEST && 64BIT)
- depends on VFIO_PCI_CORE
depends on PCI_MSI
depends on CRYPTO_DEV_HISI_QM
depends on CRYPTO_DEV_HISI_HPRE
depends on CRYPTO_DEV_HISI_SEC2
depends on CRYPTO_DEV_HISI_ZIP
+ select VFIO_PCI_CORE
help
This provides generic PCI support for HiSilicon ACC devices
using the VFIO framework.
diff --git a/drivers/vfio/pci/mlx5/Kconfig b/drivers/vfio/pci/mlx5/Kconfig
index 29ba9c504a75..7088edc4fb28 100644
--- a/drivers/vfio/pci/mlx5/Kconfig
+++ b/drivers/vfio/pci/mlx5/Kconfig
@@ -2,7 +2,7 @@
config MLX5_VFIO_PCI
tristate "VFIO support for MLX5 PCI devices"
depends on MLX5_CORE
- depends on VFIO_PCI_CORE
+ select VFIO_PCI_CORE
help
This provides migration support for MLX5 devices using the VFIO
framework.
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 948cdd464f4e..7e2e62ab0869 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -1566,8 +1566,8 @@ static int vfio_cap_init(struct vfio_pci_core_device *vdev)
}
if (!len) {
- pci_info(pdev, "%s: hiding cap %#x@%#x\n", __func__,
- cap, pos);
+ pci_dbg(pdev, "%s: hiding cap %#x@%#x\n", __func__,
+ cap, pos);
*prev = next;
pos = next;
continue;
@@ -1643,8 +1643,8 @@ static int vfio_ecap_init(struct vfio_pci_core_device *vdev)
}
if (!len) {
- pci_info(pdev, "%s: hiding ecap %#x@%#x\n",
- __func__, ecap, epos);
+ pci_dbg(pdev, "%s: hiding ecap %#x@%#x\n",
+ __func__, ecap, epos);
/* If not the first in the chain, we can skip over it */
if (prev) {
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index a5ab416cf476..20d7b69ea6ff 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -530,8 +530,11 @@ int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
vdev->msix_bar = table & PCI_MSIX_TABLE_BIR;
vdev->msix_offset = table & PCI_MSIX_TABLE_OFFSET;
vdev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * 16;
- } else
+ vdev->has_dyn_msix = pci_msix_can_alloc_dyn(pdev);
+ } else {
vdev->msix_bar = 0xFF;
+ vdev->has_dyn_msix = false;
+ }
if (!vfio_vga_disabled() && vfio_pci_is_vga(pdev))
vdev->has_vga = true;
@@ -882,6 +885,37 @@ int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
}
EXPORT_SYMBOL_GPL(vfio_pci_core_register_dev_region);
+static int vfio_pci_info_atomic_cap(struct vfio_pci_core_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ struct vfio_device_info_cap_pci_atomic_comp cap = {
+ .header.id = VFIO_DEVICE_INFO_CAP_PCI_ATOMIC_COMP,
+ .header.version = 1
+ };
+ struct pci_dev *pdev = pci_physfn(vdev->pdev);
+ u32 devcap2;
+
+ pcie_capability_read_dword(pdev, PCI_EXP_DEVCAP2, &devcap2);
+
+ if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP32) &&
+ !pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP32))
+ cap.flags |= VFIO_PCI_ATOMIC_COMP32;
+
+ if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP64) &&
+ !pci_enable_atomic_ops_to_root(pdev, PCI_EXP_DEVCAP2_ATOMIC_COMP64))
+ cap.flags |= VFIO_PCI_ATOMIC_COMP64;
+
+ if ((devcap2 & PCI_EXP_DEVCAP2_ATOMIC_COMP128) &&
+ !pci_enable_atomic_ops_to_root(pdev,
+ PCI_EXP_DEVCAP2_ATOMIC_COMP128))
+ cap.flags |= VFIO_PCI_ATOMIC_COMP128;
+
+ if (!cap.flags)
+ return -ENODEV;
+
+ return vfio_info_add_capability(caps, &cap.header, sizeof(cap));
+}
+
static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
struct vfio_device_info __user *arg)
{
@@ -920,6 +954,13 @@ static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
return ret;
}
+ ret = vfio_pci_info_atomic_cap(vdev, &caps);
+ if (ret && ret != -ENODEV) {
+ pci_warn(vdev->pdev,
+ "Failed to setup AtomicOps info capability\n");
+ return ret;
+ }
+
if (caps.size) {
info.flags |= VFIO_DEVICE_FLAGS_CAPS;
if (info.argsz < sizeof(info) + caps.size) {
@@ -1111,7 +1152,7 @@ static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev,
if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
info.flags |=
(VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED);
- else
+ else if (info.index != VFIO_PCI_MSIX_IRQ_INDEX || !vdev->has_dyn_msix)
info.flags |= VFIO_IRQ_INFO_NORESIZE;
return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
@@ -2102,6 +2143,7 @@ int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
INIT_LIST_HEAD(&vdev->vma_list);
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
init_rwsem(&vdev->memory_lock);
+ xa_init(&vdev->ctx);
return 0;
}
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index bffb0741518b..cbb4bcbfbf83 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -48,6 +48,39 @@ static bool is_irq_none(struct vfio_pci_core_device *vdev)
vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
}
+static
+struct vfio_pci_irq_ctx *vfio_irq_ctx_get(struct vfio_pci_core_device *vdev,
+ unsigned long index)
+{
+ return xa_load(&vdev->ctx, index);
+}
+
+static void vfio_irq_ctx_free(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_irq_ctx *ctx, unsigned long index)
+{
+ xa_erase(&vdev->ctx, index);
+ kfree(ctx);
+}
+
+static struct vfio_pci_irq_ctx *
+vfio_irq_ctx_alloc(struct vfio_pci_core_device *vdev, unsigned long index)
+{
+ struct vfio_pci_irq_ctx *ctx;
+ int ret;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
+ if (!ctx)
+ return NULL;
+
+ ret = xa_insert(&vdev->ctx, index, ctx, GFP_KERNEL_ACCOUNT);
+ if (ret) {
+ kfree(ctx);
+ return NULL;
+ }
+
+ return ctx;
+}
+
/*
* INTx
*/
@@ -55,14 +88,21 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
{
struct vfio_pci_core_device *vdev = opaque;
- if (likely(is_intx(vdev) && !vdev->virq_disabled))
- eventfd_signal(vdev->ctx[0].trigger, 1);
+ if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
+ struct vfio_pci_irq_ctx *ctx;
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ return;
+ eventfd_signal(ctx->trigger, 1);
+ }
}
/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_irq_ctx *ctx;
unsigned long flags;
bool masked_changed = false;
@@ -77,7 +117,14 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3)
pci_intx(pdev, 0);
- } else if (!vdev->ctx[0].masked) {
+ goto out_unlock;
+ }
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ goto out_unlock;
+
+ if (!ctx->masked) {
/*
* Can't use check_and_mask here because we always want to
* mask, not just when something is pending.
@@ -87,10 +134,11 @@ bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
else
disable_irq_nosync(pdev->irq);
- vdev->ctx[0].masked = true;
+ ctx->masked = true;
masked_changed = true;
}
+out_unlock:
spin_unlock_irqrestore(&vdev->irqlock, flags);
return masked_changed;
}
@@ -105,6 +153,7 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
{
struct vfio_pci_core_device *vdev = opaque;
struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_irq_ctx *ctx;
unsigned long flags;
int ret = 0;
@@ -117,7 +166,14 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
if (unlikely(!is_intx(vdev))) {
if (vdev->pci_2_3)
pci_intx(pdev, 1);
- } else if (vdev->ctx[0].masked && !vdev->virq_disabled) {
+ goto out_unlock;
+ }
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ goto out_unlock;
+
+ if (ctx->masked && !vdev->virq_disabled) {
/*
* A pending interrupt here would immediately trigger,
* but we can avoid that overhead by just re-sending
@@ -129,9 +185,10 @@ static int vfio_pci_intx_unmask_handler(void *opaque, void *unused)
} else
enable_irq(pdev->irq);
- vdev->ctx[0].masked = (ret > 0);
+ ctx->masked = (ret > 0);
}
+out_unlock:
spin_unlock_irqrestore(&vdev->irqlock, flags);
return ret;
@@ -146,18 +203,23 @@ void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev)
static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
{
struct vfio_pci_core_device *vdev = dev_id;
+ struct vfio_pci_irq_ctx *ctx;
unsigned long flags;
int ret = IRQ_NONE;
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ return ret;
+
spin_lock_irqsave(&vdev->irqlock, flags);
if (!vdev->pci_2_3) {
disable_irq_nosync(vdev->pdev->irq);
- vdev->ctx[0].masked = true;
+ ctx->masked = true;
ret = IRQ_HANDLED;
- } else if (!vdev->ctx[0].masked && /* may be shared */
+ } else if (!ctx->masked && /* may be shared */
pci_check_and_mask_intx(vdev->pdev)) {
- vdev->ctx[0].masked = true;
+ ctx->masked = true;
ret = IRQ_HANDLED;
}
@@ -171,27 +233,27 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
{
+ struct vfio_pci_irq_ctx *ctx;
+
if (!is_irq_none(vdev))
return -EINVAL;
if (!vdev->pdev->irq)
return -ENODEV;
- vdev->ctx = kzalloc(sizeof(struct vfio_pci_irq_ctx), GFP_KERNEL_ACCOUNT);
- if (!vdev->ctx)
+ ctx = vfio_irq_ctx_alloc(vdev, 0);
+ if (!ctx)
return -ENOMEM;
- vdev->num_ctx = 1;
-
/*
* If the virtual interrupt is masked, restore it. Devices
* supporting DisINTx can be masked at the hardware level
* here, non-PCI-2.3 devices will have to wait until the
* interrupt is enabled.
*/
- vdev->ctx[0].masked = vdev->virq_disabled;
+ ctx->masked = vdev->virq_disabled;
if (vdev->pci_2_3)
- pci_intx(vdev->pdev, !vdev->ctx[0].masked);
+ pci_intx(vdev->pdev, !ctx->masked);
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
@@ -202,41 +264,46 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
{
struct pci_dev *pdev = vdev->pdev;
unsigned long irqflags = IRQF_SHARED;
+ struct vfio_pci_irq_ctx *ctx;
struct eventfd_ctx *trigger;
unsigned long flags;
int ret;
- if (vdev->ctx[0].trigger) {
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ if (WARN_ON_ONCE(!ctx))
+ return -EINVAL;
+
+ if (ctx->trigger) {
free_irq(pdev->irq, vdev);
- kfree(vdev->ctx[0].name);
- eventfd_ctx_put(vdev->ctx[0].trigger);
- vdev->ctx[0].trigger = NULL;
+ kfree(ctx->name);
+ eventfd_ctx_put(ctx->trigger);
+ ctx->trigger = NULL;
}
if (fd < 0) /* Disable only */
return 0;
- vdev->ctx[0].name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
- pci_name(pdev));
- if (!vdev->ctx[0].name)
+ ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
+ pci_name(pdev));
+ if (!ctx->name)
return -ENOMEM;
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
- kfree(vdev->ctx[0].name);
+ kfree(ctx->name);
return PTR_ERR(trigger);
}
- vdev->ctx[0].trigger = trigger;
+ ctx->trigger = trigger;
if (!vdev->pci_2_3)
irqflags = 0;
ret = request_irq(pdev->irq, vfio_intx_handler,
- irqflags, vdev->ctx[0].name, vdev);
+ irqflags, ctx->name, vdev);
if (ret) {
- vdev->ctx[0].trigger = NULL;
- kfree(vdev->ctx[0].name);
+ ctx->trigger = NULL;
+ kfree(ctx->name);
eventfd_ctx_put(trigger);
return ret;
}
@@ -246,7 +313,7 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
* disable_irq won't.
*/
spin_lock_irqsave(&vdev->irqlock, flags);
- if (!vdev->pci_2_3 && vdev->ctx[0].masked)
+ if (!vdev->pci_2_3 && ctx->masked)
disable_irq_nosync(pdev->irq);
spin_unlock_irqrestore(&vdev->irqlock, flags);
@@ -255,12 +322,17 @@ static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
{
- vfio_virqfd_disable(&vdev->ctx[0].unmask);
- vfio_virqfd_disable(&vdev->ctx[0].mask);
+ struct vfio_pci_irq_ctx *ctx;
+
+ ctx = vfio_irq_ctx_get(vdev, 0);
+ WARN_ON_ONCE(!ctx);
+ if (ctx) {
+ vfio_virqfd_disable(&ctx->unmask);
+ vfio_virqfd_disable(&ctx->mask);
+ }
vfio_intx_set_signal(vdev, -1);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
- vdev->num_ctx = 0;
- kfree(vdev->ctx);
+ vfio_irq_ctx_free(vdev, ctx, 0);
}
/*
@@ -284,11 +356,6 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (!is_irq_none(vdev))
return -EINVAL;
- vdev->ctx = kcalloc(nvec, sizeof(struct vfio_pci_irq_ctx),
- GFP_KERNEL_ACCOUNT);
- if (!vdev->ctx)
- return -ENOMEM;
-
/* return the number of supported vectors if we can't get all: */
cmd = vfio_pci_memory_lock_and_enable(vdev);
ret = pci_alloc_irq_vectors(pdev, 1, nvec, flag);
@@ -296,12 +363,10 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
if (ret > 0)
pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
- kfree(vdev->ctx);
return ret;
}
vfio_pci_memory_unlock_and_restore(vdev, cmd);
- vdev->num_ctx = nvec;
vdev->irq_type = msix ? VFIO_PCI_MSIX_IRQ_INDEX :
VFIO_PCI_MSI_IRQ_INDEX;
@@ -316,53 +381,91 @@ static int vfio_msi_enable(struct vfio_pci_core_device *vdev, int nvec, bool msi
return 0;
}
-static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
- int vector, int fd, bool msix)
+/*
+ * vfio_msi_alloc_irq() returns the Linux IRQ number of an MSI or MSI-X device
+ * interrupt vector. If a Linux IRQ number is not available then a new
+ * interrupt is allocated if dynamic MSI-X is supported.
+ *
+ * Where is vfio_msi_free_irq()? Allocated interrupts are maintained,
+ * essentially forming a cache that subsequent allocations can draw from.
+ * Interrupts are freed using pci_free_irq_vectors() when MSI/MSI-X is
+ * disabled.
+ */
+static int vfio_msi_alloc_irq(struct vfio_pci_core_device *vdev,
+ unsigned int vector, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
- struct eventfd_ctx *trigger;
- int irq, ret;
+ struct msi_map map;
+ int irq;
u16 cmd;
- if (vector < 0 || vector >= vdev->num_ctx)
+ irq = pci_irq_vector(pdev, vector);
+ if (WARN_ON_ONCE(irq == 0))
return -EINVAL;
+ if (irq > 0 || !msix || !vdev->has_dyn_msix)
+ return irq;
- irq = pci_irq_vector(pdev, vector);
+ cmd = vfio_pci_memory_lock_and_enable(vdev);
+ map = pci_msix_alloc_irq_at(pdev, vector, NULL);
+ vfio_pci_memory_unlock_and_restore(vdev, cmd);
- if (vdev->ctx[vector].trigger) {
- irq_bypass_unregister_producer(&vdev->ctx[vector].producer);
+ return map.index < 0 ? map.index : map.virq;
+}
+static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
+ unsigned int vector, int fd, bool msix)
+{
+ struct pci_dev *pdev = vdev->pdev;
+ struct vfio_pci_irq_ctx *ctx;
+ struct eventfd_ctx *trigger;
+ int irq = -EINVAL, ret;
+ u16 cmd;
+
+ ctx = vfio_irq_ctx_get(vdev, vector);
+
+ if (ctx) {
+ irq_bypass_unregister_producer(&ctx->producer);
+ irq = pci_irq_vector(pdev, vector);
cmd = vfio_pci_memory_lock_and_enable(vdev);
- free_irq(irq, vdev->ctx[vector].trigger);
+ free_irq(irq, ctx->trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
-
- kfree(vdev->ctx[vector].name);
- eventfd_ctx_put(vdev->ctx[vector].trigger);
- vdev->ctx[vector].trigger = NULL;
+ /* Interrupt stays allocated, will be freed at MSI-X disable. */
+ kfree(ctx->name);
+ eventfd_ctx_put(ctx->trigger);
+ vfio_irq_ctx_free(vdev, ctx, vector);
}
if (fd < 0)
return 0;
- vdev->ctx[vector].name = kasprintf(GFP_KERNEL_ACCOUNT,
- "vfio-msi%s[%d](%s)",
- msix ? "x" : "", vector,
- pci_name(pdev));
- if (!vdev->ctx[vector].name)
+ if (irq == -EINVAL) {
+ /* Interrupt stays allocated, will be freed at MSI-X disable. */
+ irq = vfio_msi_alloc_irq(vdev, vector, msix);
+ if (irq < 0)
+ return irq;
+ }
+
+ ctx = vfio_irq_ctx_alloc(vdev, vector);
+ if (!ctx)
return -ENOMEM;
+ ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-msi%s[%d](%s)",
+ msix ? "x" : "", vector, pci_name(pdev));
+ if (!ctx->name) {
+ ret = -ENOMEM;
+ goto out_free_ctx;
+ }
+
trigger = eventfd_ctx_fdget(fd);
if (IS_ERR(trigger)) {
- kfree(vdev->ctx[vector].name);
- return PTR_ERR(trigger);
+ ret = PTR_ERR(trigger);
+ goto out_free_name;
}
/*
- * The MSIx vector table resides in device memory which may be cleared
- * via backdoor resets. We don't allow direct access to the vector
- * table so even if a userspace driver attempts to save/restore around
- * such a reset it would be unsuccessful. To avoid this, restore the
- * cached value of the message prior to enabling.
+ * If the vector was previously allocated, refresh the on-device
+ * message data before enabling in case it had been cleared or
+ * corrupted (e.g. due to backdoor resets) since writing.
*/
cmd = vfio_pci_memory_lock_and_enable(vdev);
if (msix) {
@@ -372,37 +475,39 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_core_device *vdev,
pci_write_msi_msg(irq, &msg);
}
- ret = request_irq(irq, vfio_msihandler, 0,
- vdev->ctx[vector].name, trigger);
+ ret = request_irq(irq, vfio_msihandler, 0, ctx->name, trigger);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
- if (ret) {
- kfree(vdev->ctx[vector].name);
- eventfd_ctx_put(trigger);
- return ret;
- }
+ if (ret)
+ goto out_put_eventfd_ctx;
- vdev->ctx[vector].producer.token = trigger;
- vdev->ctx[vector].producer.irq = irq;
- ret = irq_bypass_register_producer(&vdev->ctx[vector].producer);
+ ctx->producer.token = trigger;
+ ctx->producer.irq = irq;
+ ret = irq_bypass_register_producer(&ctx->producer);
if (unlikely(ret)) {
dev_info(&pdev->dev,
"irq bypass producer (token %p) registration fails: %d\n",
- vdev->ctx[vector].producer.token, ret);
+ ctx->producer.token, ret);
- vdev->ctx[vector].producer.token = NULL;
+ ctx->producer.token = NULL;
}
- vdev->ctx[vector].trigger = trigger;
+ ctx->trigger = trigger;
return 0;
+
+out_put_eventfd_ctx:
+ eventfd_ctx_put(trigger);
+out_free_name:
+ kfree(ctx->name);
+out_free_ctx:
+ vfio_irq_ctx_free(vdev, ctx, vector);
+ return ret;
}
static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
unsigned count, int32_t *fds, bool msix)
{
- int i, j, ret = 0;
-
- if (start >= vdev->num_ctx || start + count > vdev->num_ctx)
- return -EINVAL;
+ unsigned int i, j;
+ int ret = 0;
for (i = 0, j = start; i < count && !ret; i++, j++) {
int fd = fds ? fds[i] : -1;
@@ -410,8 +515,8 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
}
if (ret) {
- for (--j; j >= (int)start; j--)
- vfio_msi_set_vector_signal(vdev, j, -1, msix);
+ for (i = start; i < j; i++)
+ vfio_msi_set_vector_signal(vdev, i, -1, msix);
}
return ret;
@@ -420,16 +525,16 @@ static int vfio_msi_set_block(struct vfio_pci_core_device *vdev, unsigned start,
static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
{
struct pci_dev *pdev = vdev->pdev;
- int i;
+ struct vfio_pci_irq_ctx *ctx;
+ unsigned long i;
u16 cmd;
- for (i = 0; i < vdev->num_ctx; i++) {
- vfio_virqfd_disable(&vdev->ctx[i].unmask);
- vfio_virqfd_disable(&vdev->ctx[i].mask);
+ xa_for_each(&vdev->ctx, i, ctx) {
+ vfio_virqfd_disable(&ctx->unmask);
+ vfio_virqfd_disable(&ctx->mask);
+ vfio_msi_set_vector_signal(vdev, i, -1, msix);
}
- vfio_msi_set_block(vdev, 0, vdev->num_ctx, NULL, msix);
-
cmd = vfio_pci_memory_lock_and_enable(vdev);
pci_free_irq_vectors(pdev);
vfio_pci_memory_unlock_and_restore(vdev, cmd);
@@ -442,8 +547,6 @@ static void vfio_msi_disable(struct vfio_pci_core_device *vdev, bool msix)
pci_intx(pdev, 0);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
- vdev->num_ctx = 0;
- kfree(vdev->ctx);
}
/*
@@ -463,14 +566,18 @@ static int vfio_pci_set_intx_unmask(struct vfio_pci_core_device *vdev,
if (unmask)
vfio_pci_intx_unmask(vdev);
} else if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
+ struct vfio_pci_irq_ctx *ctx = vfio_irq_ctx_get(vdev, 0);
int32_t fd = *(int32_t *)data;
+
+ if (WARN_ON_ONCE(!ctx))
+ return -EINVAL;
if (fd >= 0)
return vfio_virqfd_enable((void *) vdev,
vfio_pci_intx_unmask_handler,
vfio_send_intx_eventfd, NULL,
- &vdev->ctx[0].unmask, fd);
+ &ctx->unmask, fd);
- vfio_virqfd_disable(&vdev->ctx[0].unmask);
+ vfio_virqfd_disable(&ctx->unmask);
}
return 0;
@@ -543,7 +650,8 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
unsigned index, unsigned start,
unsigned count, uint32_t flags, void *data)
{
- int i;
+ struct vfio_pci_irq_ctx *ctx;
+ unsigned int i;
bool msix = (index == VFIO_PCI_MSIX_IRQ_INDEX) ? true : false;
if (irq_is(vdev, index) && !count && (flags & VFIO_IRQ_SET_DATA_NONE)) {
@@ -573,18 +681,19 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_core_device *vdev,
return ret;
}
- if (!irq_is(vdev, index) || start + count > vdev->num_ctx)
+ if (!irq_is(vdev, index))
return -EINVAL;
for (i = start; i < start + count; i++) {
- if (!vdev->ctx[i].trigger)
+ ctx = vfio_irq_ctx_get(vdev, i);
+ if (!ctx)
continue;
if (flags & VFIO_IRQ_SET_DATA_NONE) {
- eventfd_signal(vdev->ctx[i].trigger, 1);
+ eventfd_signal(ctx->trigger, 1);
} else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
uint8_t *bools = data;
if (bools[i - start])
- eventfd_signal(vdev->ctx[i].trigger, 1);
+ eventfd_signal(ctx->trigger, 1);
}
}
return 0;