summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/base/platform-msi.c1
-rw-r--r--drivers/bcma/driver_gpio.c2
-rw-r--r--drivers/gpio/gpio-mlxbf2.c6
-rw-r--r--drivers/gpu/drm/i915/gvt/aperture_gm.c20
-rw-r--r--drivers/gpu/drm/i915/gvt/gvt.h46
-rw-r--r--drivers/gpu/drm/i915/gvt/kvmgt.c205
-rw-r--r--drivers/gpu/drm/i915/gvt/vgpu.c212
-rw-r--r--drivers/irqchip/Kconfig17
-rw-r--r--drivers/irqchip/Makefile1
-rw-r--r--drivers/irqchip/irq-gic-v3.c2
-rw-r--r--drivers/irqchip/irq-imx-mu-msi.c453
-rw-r--r--drivers/irqchip/irq-ls-extirq.c87
-rw-r--r--drivers/irqchip/irq-realtek-rtl.c134
-rw-r--r--drivers/pci/xen-pcifront.c161
-rw-r--r--drivers/pinctrl/pinctrl-amd.c2
-rw-r--r--drivers/platform/Kconfig2
-rw-r--r--drivers/platform/Makefile1
-rw-r--r--drivers/platform/loongarch/Kconfig31
-rw-r--r--drivers/platform/loongarch/Makefile1
-rw-r--r--drivers/platform/loongarch/loongson-laptop.c624
-rw-r--r--drivers/platform/x86/intel/int0002_vgpio.c3
-rw-r--r--drivers/s390/cio/vfio_ccw_drv.c12
-rw-r--r--drivers/s390/cio/vfio_ccw_ops.c92
-rw-r--r--drivers/s390/cio/vfio_ccw_private.h11
-rw-r--r--drivers/s390/crypto/vfio_ap_ops.c109
-rw-r--r--drivers/s390/crypto/vfio_ap_private.h6
-rw-r--r--drivers/ssb/driver_gpio.c6
-rw-r--r--drivers/vfio/Kconfig1
-rw-r--r--drivers/vfio/Makefile7
-rw-r--r--drivers/vfio/container.c680
-rw-r--r--drivers/vfio/fsl-mc/vfio_fsl_mc.c89
-rw-r--r--drivers/vfio/iova_bitmap.c422
-rw-r--r--drivers/vfio/mdev/mdev_core.c195
-rw-r--r--drivers/vfio/mdev/mdev_driver.c7
-rw-r--r--drivers/vfio/mdev/mdev_private.h32
-rw-r--r--drivers/vfio/mdev/mdev_sysfs.c190
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c136
-rw-r--r--drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h7
-rw-r--r--drivers/vfio/pci/mlx5/cmd.c995
-rw-r--r--drivers/vfio/pci/mlx5/cmd.h63
-rw-r--r--drivers/vfio/pci/mlx5/main.c55
-rw-r--r--drivers/vfio/pci/vfio_pci.c22
-rw-r--r--drivers/vfio/pci/vfio_pci_config.c4
-rw-r--r--drivers/vfio/pci/vfio_pci_core.c1105
-rw-r--r--drivers/vfio/pci/vfio_pci_igd.c8
-rw-r--r--drivers/vfio/pci/vfio_pci_intrs.c34
-rw-r--r--drivers/vfio/pci/vfio_pci_priv.h104
-rw-r--r--drivers/vfio/pci/vfio_pci_rdwr.c6
-rw-r--r--drivers/vfio/pci/vfio_pci_zdev.c2
-rw-r--r--drivers/vfio/platform/vfio_amba.c72
-rw-r--r--drivers/vfio/platform/vfio_platform.c66
-rw-r--r--drivers/vfio/platform/vfio_platform_common.c71
-rw-r--r--drivers/vfio/platform/vfio_platform_private.h18
-rw-r--r--drivers/vfio/vfio.h62
-rw-r--r--drivers/vfio/vfio_main.c1392
-rw-r--r--drivers/watchdog/Kconfig15
-rw-r--r--drivers/watchdog/Makefile1
-rw-r--r--drivers/watchdog/armada_37xx_wdt.c2
-rw-r--r--drivers/watchdog/aspeed_wdt.c12
-rw-r--r--drivers/watchdog/bd9576_wdt.c51
-rw-r--r--drivers/watchdog/eurotechwdt.c2
-rw-r--r--drivers/watchdog/exar_wdt.c427
-rw-r--r--drivers/watchdog/ftwdt010_wdt.c25
-rw-r--r--drivers/watchdog/hpwdt.c2
-rw-r--r--drivers/watchdog/imx7ulp_wdt.c212
-rw-r--r--drivers/watchdog/meson_gxbb_wdt.c24
-rw-r--r--drivers/watchdog/npcm_wdt.c16
-rw-r--r--drivers/watchdog/rti_wdt.c3
-rw-r--r--drivers/watchdog/rzg2l_wdt.c39
-rw-r--r--drivers/watchdog/s3c2410_wdt.c41
-rw-r--r--drivers/watchdog/sa1100_wdt.c2
-rw-r--r--drivers/watchdog/sp5100_tco.c13
-rw-r--r--drivers/watchdog/twl4030_wdt.c1
-rw-r--r--drivers/watchdog/w83627hf_wdt.c12
-rw-r--r--drivers/watchdog/w83977f_wdt.c2
-rw-r--r--drivers/watchdog/watchdog_dev.c6
-rw-r--r--drivers/watchdog/wdat_wdt.c5
-rw-r--r--drivers/xen/Kconfig2
-rw-r--r--drivers/xen/gntdev-common.h3
-rw-r--r--drivers/xen/gntdev.c80
-rw-r--r--drivers/xen/grant-dma-ops.c112
-rw-r--r--drivers/xen/xen-pciback/xenbus.c2
82 files changed, 6597 insertions, 2607 deletions
diff --git a/drivers/base/platform-msi.c b/drivers/base/platform-msi.c
index 296ea673d661..12b044151298 100644
--- a/drivers/base/platform-msi.c
+++ b/drivers/base/platform-msi.c
@@ -138,6 +138,7 @@ struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode,
return domain;
}
+EXPORT_SYMBOL_GPL(platform_msi_create_irq_domain);
static int platform_msi_alloc_priv_data(struct device *dev, unsigned int nvec,
irq_write_msi_msg_t write_msi_msg)
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index fac8ff983aec..65fb9bad1577 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -115,7 +115,7 @@ static irqreturn_t bcma_gpio_irq_handler(int irq, void *dev_id)
return IRQ_NONE;
for_each_set_bit(gpio, &irqs, gc->ngpio)
- generic_handle_irq(irq_find_mapping(gc->irq.domain, gpio));
+ generic_handle_domain_irq_safe(gc->irq.domain, gpio);
bcma_chipco_gpio_polarity(cc, irqs, val & irqs);
return IRQ_HANDLED;
diff --git a/drivers/gpio/gpio-mlxbf2.c b/drivers/gpio/gpio-mlxbf2.c
index 64cb060d9d75..77a41151c921 100644
--- a/drivers/gpio/gpio-mlxbf2.c
+++ b/drivers/gpio/gpio-mlxbf2.c
@@ -273,10 +273,8 @@ static irqreturn_t mlxbf2_gpio_irq_handler(int irq, void *ptr)
pending = readl(gs->gpio_io + YU_GPIO_CAUSE_OR_CAUSE_EVTEN0);
writel(pending, gs->gpio_io + YU_GPIO_CAUSE_OR_CLRCAUSE);
- for_each_set_bit(level, &pending, gc->ngpio) {
- int gpio_irq = irq_find_mapping(gc->irq.domain, level);
- generic_handle_irq(gpio_irq);
- }
+ for_each_set_bit(level, &pending, gc->ngpio)
+ generic_handle_domain_irq_safe(gc->irq.domain, level);
return IRQ_RETVAL(pending);
}
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index 3b81a6d35a7b..076c779f776a 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -240,13 +240,13 @@ static void free_resource(struct intel_vgpu *vgpu)
}
static int alloc_resource(struct intel_vgpu *vgpu,
- struct intel_vgpu_creation_params *param)
+ const struct intel_vgpu_config *conf)
{
struct intel_gvt *gvt = vgpu->gvt;
unsigned long request, avail, max, taken;
const char *item;
- if (!param->low_gm_sz || !param->high_gm_sz || !param->fence_sz) {
+ if (!conf->low_mm || !conf->high_mm || !conf->fence) {
gvt_vgpu_err("Invalid vGPU creation params\n");
return -EINVAL;
}
@@ -255,7 +255,7 @@ static int alloc_resource(struct intel_vgpu *vgpu,
max = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
taken = gvt->gm.vgpu_allocated_low_gm_size;
avail = max - taken;
- request = MB_TO_BYTES(param->low_gm_sz);
+ request = conf->low_mm;
if (request > avail)
goto no_enough_resource;
@@ -266,7 +266,7 @@ static int alloc_resource(struct intel_vgpu *vgpu,
max = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
taken = gvt->gm.vgpu_allocated_high_gm_size;
avail = max - taken;
- request = MB_TO_BYTES(param->high_gm_sz);
+ request = conf->high_mm;
if (request > avail)
goto no_enough_resource;
@@ -277,16 +277,16 @@ static int alloc_resource(struct intel_vgpu *vgpu,
max = gvt_fence_sz(gvt) - HOST_FENCE;
taken = gvt->fence.vgpu_allocated_fence_num;
avail = max - taken;
- request = param->fence_sz;
+ request = conf->fence;
if (request > avail)
goto no_enough_resource;
vgpu_fence_sz(vgpu) = request;
- gvt->gm.vgpu_allocated_low_gm_size += MB_TO_BYTES(param->low_gm_sz);
- gvt->gm.vgpu_allocated_high_gm_size += MB_TO_BYTES(param->high_gm_sz);
- gvt->fence.vgpu_allocated_fence_num += param->fence_sz;
+ gvt->gm.vgpu_allocated_low_gm_size += conf->low_mm;
+ gvt->gm.vgpu_allocated_high_gm_size += conf->high_mm;
+ gvt->fence.vgpu_allocated_fence_num += conf->fence;
return 0;
no_enough_resource:
@@ -340,11 +340,11 @@ void intel_vgpu_reset_resource(struct intel_vgpu *vgpu)
*
*/
int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu,
- struct intel_vgpu_creation_params *param)
+ const struct intel_vgpu_config *conf)
{
int ret;
- ret = alloc_resource(vgpu, param);
+ ret = alloc_resource(vgpu, conf);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 705689e64011..dbf8d7470b2c 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -36,6 +36,7 @@
#include <uapi/linux/pci_regs.h>
#include <linux/kvm_host.h>
#include <linux/vfio.h>
+#include <linux/mdev.h>
#include "i915_drv.h"
#include "intel_gvt.h"
@@ -172,6 +173,7 @@ struct intel_vgpu_submission {
#define KVMGT_DEBUGFS_FILENAME "kvmgt_nr_cache_entries"
struct intel_vgpu {
+ struct vfio_device vfio_device;
struct intel_gvt *gvt;
struct mutex vgpu_lock;
int id;
@@ -211,7 +213,6 @@ struct intel_vgpu {
u32 scan_nonprivbb;
- struct vfio_device vfio_device;
struct vfio_region *region;
int num_regions;
struct eventfd_ctx *intx_trigger;
@@ -294,15 +295,25 @@ struct intel_gvt_firmware {
bool firmware_loaded;
};
-#define NR_MAX_INTEL_VGPU_TYPES 20
-struct intel_vgpu_type {
- char name[16];
- unsigned int avail_instance;
- unsigned int low_gm_size;
- unsigned int high_gm_size;
+struct intel_vgpu_config {
+ unsigned int low_mm;
+ unsigned int high_mm;
unsigned int fence;
+
+ /*
+ * A vGPU with a weight of 8 will get twice as much GPU as a vGPU with
+ * a weight of 4 on a contended host, different vGPU type has different
+ * weight set. Legal weights range from 1 to 16.
+ */
unsigned int weight;
- enum intel_vgpu_edid resolution;
+ enum intel_vgpu_edid edid;
+ const char *name;
+};
+
+struct intel_vgpu_type {
+ struct mdev_type type;
+ char name[16];
+ const struct intel_vgpu_config *conf;
};
struct intel_gvt {
@@ -326,6 +337,8 @@ struct intel_gvt {
struct intel_gvt_workload_scheduler scheduler;
struct notifier_block shadow_ctx_notifier_block[I915_NUM_ENGINES];
DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
+ struct mdev_parent parent;
+ struct mdev_type **mdev_types;
struct intel_vgpu_type *types;
unsigned int num_types;
struct intel_vgpu *idle_vgpu;
@@ -436,19 +449,8 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
/* ring context size i.e. the first 0x50 dwords*/
#define RING_CTX_SIZE 320
-struct intel_vgpu_creation_params {
- __u64 low_gm_sz; /* in MB */
- __u64 high_gm_sz; /* in MB */
- __u64 fence_sz;
- __u64 resolution;
- __s32 primary;
- __u64 vgpu_id;
-
- __u32 weight;
-};
-
int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu,
- struct intel_vgpu_creation_params *param);
+ const struct intel_vgpu_config *conf);
void intel_vgpu_reset_resource(struct intel_vgpu *vgpu);
void intel_vgpu_free_resource(struct intel_vgpu *vgpu);
void intel_vgpu_write_fence(struct intel_vgpu *vgpu,
@@ -494,8 +496,8 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt);
struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt);
void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu);
-struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
- struct intel_vgpu_type *type);
+int intel_gvt_create_vgpu(struct intel_vgpu *vgpu,
+ const struct intel_vgpu_config *conf);
void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
void intel_gvt_release_vgpu(struct intel_vgpu *vgpu);
void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index e3cd58946477..7a45e5360caf 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -34,7 +34,6 @@
*/
#include <linux/init.h>
-#include <linux/device.h>
#include <linux/mm.h>
#include <linux/kthread.h>
#include <linux/sched/mm.h>
@@ -43,7 +42,6 @@
#include <linux/rbtree.h>
#include <linux/spinlock.h>
#include <linux/eventfd.h>
-#include <linux/uuid.h>
#include <linux/mdev.h>
#include <linux/debugfs.h>
@@ -115,117 +113,18 @@ static void kvmgt_page_track_flush_slot(struct kvm *kvm,
struct kvm_memory_slot *slot,
struct kvm_page_track_notifier_node *node);
-static ssize_t available_instances_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr,
- char *buf)
+static ssize_t intel_vgpu_show_description(struct mdev_type *mtype, char *buf)
{
- struct intel_vgpu_type *type;
- unsigned int num = 0;
- struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
-
- type = &gvt->types[mtype_get_type_group_id(mtype)];
- if (!type)
- num = 0;
- else
- num = type->avail_instance;
-
- return sprintf(buf, "%u\n", num);
-}
-
-static ssize_t device_api_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf)
-{
- return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING);
-}
-
-static ssize_t description_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf)
-{
- struct intel_vgpu_type *type;
- struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
-
- type = &gvt->types[mtype_get_type_group_id(mtype)];
- if (!type)
- return 0;
+ struct intel_vgpu_type *type =
+ container_of(mtype, struct intel_vgpu_type, type);
return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
"fence: %d\nresolution: %s\n"
"weight: %d\n",
- BYTES_TO_MB(type->low_gm_size),
- BYTES_TO_MB(type->high_gm_size),
- type->fence, vgpu_edid_str(type->resolution),
- type->weight);
-}
-
-static ssize_t name_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf)
-{
- struct intel_vgpu_type *type;
- struct intel_gvt *gvt = kdev_to_i915(mtype_get_parent_dev(mtype))->gvt;
-
- type = &gvt->types[mtype_get_type_group_id(mtype)];
- if (!type)
- return 0;
-
- return sprintf(buf, "%s\n", type->name);
-}
-
-static MDEV_TYPE_ATTR_RO(available_instances);
-static MDEV_TYPE_ATTR_RO(device_api);
-static MDEV_TYPE_ATTR_RO(description);
-static MDEV_TYPE_ATTR_RO(name);
-
-static struct attribute *gvt_type_attrs[] = {
- &mdev_type_attr_available_instances.attr,
- &mdev_type_attr_device_api.attr,
- &mdev_type_attr_description.attr,
- &mdev_type_attr_name.attr,
- NULL,
-};
-
-static struct attribute_group *gvt_vgpu_type_groups[] = {
- [0 ... NR_MAX_INTEL_VGPU_TYPES - 1] = NULL,
-};
-
-static int intel_gvt_init_vgpu_type_groups(struct intel_gvt *gvt)
-{
- int i, j;
- struct intel_vgpu_type *type;
- struct attribute_group *group;
-
- for (i = 0; i < gvt->num_types; i++) {
- type = &gvt->types[i];
-
- group = kzalloc(sizeof(struct attribute_group), GFP_KERNEL);
- if (!group)
- goto unwind;
-
- group->name = type->name;
- group->attrs = gvt_type_attrs;
- gvt_vgpu_type_groups[i] = group;
- }
-
- return 0;
-
-unwind:
- for (j = 0; j < i; j++) {
- group = gvt_vgpu_type_groups[j];
- kfree(group);
- }
-
- return -ENOMEM;
-}
-
-static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
-{
- int i;
- struct attribute_group *group;
-
- for (i = 0; i < gvt->num_types; i++) {
- group = gvt_vgpu_type_groups[i];
- gvt_vgpu_type_groups[i] = NULL;
- kfree(group);
- }
+ BYTES_TO_MB(type->conf->low_mm),
+ BYTES_TO_MB(type->conf->high_mm),
+ type->conf->fence, vgpu_edid_str(type->conf->edid),
+ type->conf->weight);
}
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
@@ -1546,7 +1445,28 @@ static const struct attribute_group *intel_vgpu_groups[] = {
NULL,
};
+static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
+{
+ struct mdev_device *mdev = to_mdev_device(vfio_dev->dev);
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+ struct intel_vgpu_type *type =
+ container_of(mdev->type, struct intel_vgpu_type, type);
+
+ vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
+ return intel_gvt_create_vgpu(vgpu, type->conf);
+}
+
+static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
+{
+ struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
+
+ intel_gvt_destroy_vgpu(vgpu);
+ vfio_free_device(vfio_dev);
+}
+
static const struct vfio_device_ops intel_vgpu_dev_ops = {
+ .init = intel_vgpu_init_dev,
+ .release = intel_vgpu_release_dev,
.open_device = intel_vgpu_open_device,
.close_device = intel_vgpu_close_device,
.read = intel_vgpu_read,
@@ -1558,35 +1478,28 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
static int intel_vgpu_probe(struct mdev_device *mdev)
{
- struct device *pdev = mdev_parent_dev(mdev);
- struct intel_gvt *gvt = kdev_to_i915(pdev)->gvt;
- struct intel_vgpu_type *type;
struct intel_vgpu *vgpu;
int ret;
- type = &gvt->types[mdev_get_type_group_id(mdev)];
- if (!type)
- return -EINVAL;
-
- vgpu = intel_gvt_create_vgpu(gvt, type);
+ vgpu = vfio_alloc_device(intel_vgpu, vfio_device, &mdev->dev,
+ &intel_vgpu_dev_ops);
if (IS_ERR(vgpu)) {
gvt_err("failed to create intel vgpu: %ld\n", PTR_ERR(vgpu));
return PTR_ERR(vgpu);
}
- vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
- &intel_vgpu_dev_ops);
-
dev_set_drvdata(&mdev->dev, vgpu);
ret = vfio_register_emulated_iommu_dev(&vgpu->vfio_device);
- if (ret) {
- intel_gvt_destroy_vgpu(vgpu);
- return ret;
- }
+ if (ret)
+ goto out_put_vdev;
gvt_dbg_core("intel_vgpu_create succeeded for mdev: %s\n",
dev_name(mdev_dev(mdev)));
return 0;
+
+out_put_vdev:
+ vfio_put_device(&vgpu->vfio_device);
+ return ret;
}
static void intel_vgpu_remove(struct mdev_device *mdev)
@@ -1595,18 +1508,43 @@ static void intel_vgpu_remove(struct mdev_device *mdev)
if (WARN_ON_ONCE(vgpu->attached))
return;
- intel_gvt_destroy_vgpu(vgpu);
+
+ vfio_unregister_group_dev(&vgpu->vfio_device);
+ vfio_put_device(&vgpu->vfio_device);
+}
+
+static unsigned int intel_vgpu_get_available(struct mdev_type *mtype)
+{
+ struct intel_vgpu_type *type =
+ container_of(mtype, struct intel_vgpu_type, type);
+ struct intel_gvt *gvt = kdev_to_i915(mtype->parent->dev)->gvt;
+ unsigned int low_gm_avail, high_gm_avail, fence_avail;
+
+ mutex_lock(&gvt->lock);
+ low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
+ gvt->gm.vgpu_allocated_low_gm_size;
+ high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
+ gvt->gm.vgpu_allocated_high_gm_size;
+ fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
+ gvt->fence.vgpu_allocated_fence_num;
+ mutex_unlock(&gvt->lock);
+
+ return min3(low_gm_avail / type->conf->low_mm,
+ high_gm_avail / type->conf->high_mm,
+ fence_avail / type->conf->fence);
}
static struct mdev_driver intel_vgpu_mdev_driver = {
+ .device_api = VFIO_DEVICE_API_PCI_STRING,
.driver = {
.name = "intel_vgpu_mdev",
.owner = THIS_MODULE,
.dev_groups = intel_vgpu_groups,
},
- .probe = intel_vgpu_probe,
- .remove = intel_vgpu_remove,
- .supported_type_groups = gvt_vgpu_type_groups,
+ .probe = intel_vgpu_probe,
+ .remove = intel_vgpu_remove,
+ .get_available = intel_vgpu_get_available,
+ .show_description = intel_vgpu_show_description,
};
int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
@@ -1904,8 +1842,7 @@ static void intel_gvt_clean_device(struct drm_i915_private *i915)
if (drm_WARN_ON(&i915->drm, !gvt))
return;
- mdev_unregister_device(i915->drm.dev);
- intel_gvt_cleanup_vgpu_type_groups(gvt);
+ mdev_unregister_parent(&gvt->parent);
intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
intel_gvt_clean_vgpu_types(gvt);
@@ -2005,19 +1942,15 @@ static int intel_gvt_init_device(struct drm_i915_private *i915)
intel_gvt_debugfs_init(gvt);
- ret = intel_gvt_init_vgpu_type_groups(gvt);
+ ret = mdev_register_parent(&gvt->parent, i915->drm.dev,
+ &intel_vgpu_mdev_driver,
+ gvt->mdev_types, gvt->num_types);
if (ret)
goto out_destroy_idle_vgpu;
- ret = mdev_register_device(i915->drm.dev, &intel_vgpu_mdev_driver);
- if (ret)
- goto out_cleanup_vgpu_type_groups;
-
gvt_dbg_core("gvt device initialization is done\n");
return 0;
-out_cleanup_vgpu_type_groups:
- intel_gvt_cleanup_vgpu_type_groups(gvt);
out_destroy_idle_vgpu:
intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
intel_gvt_debugfs_clean(gvt);
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 46da19b3225d..56c71474008a 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -73,24 +73,21 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
drm_WARN_ON(&i915->drm, sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
}
+/*
+ * vGPU type name is defined as GVTg_Vx_y which contains the physical GPU
+ * generation type (e.g V4 as BDW server, V5 as SKL server).
+ *
+ * Depening on the physical SKU resource, we might see vGPU types like
+ * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create different types of
+ * vGPU on same physical GPU depending on available resource. Each vGPU
+ * type will have a different number of avail_instance to indicate how
+ * many vGPU instance can be created for this type.
+ */
#define VGPU_MAX_WEIGHT 16
#define VGPU_WEIGHT(vgpu_num) \
(VGPU_MAX_WEIGHT / (vgpu_num))
-static const struct {
- unsigned int low_mm;
- unsigned int high_mm;
- unsigned int fence;
-
- /* A vGPU with a weight of 8 will get twice as much GPU as a vGPU
- * with a weight of 4 on a contended host, different vGPU type has
- * different weight set. Legal weights range from 1 to 16.
- */
- unsigned int weight;
- enum intel_vgpu_edid edid;
- const char *name;
-} vgpu_types[] = {
-/* Fixed vGPU type table */
+static const struct intel_vgpu_config intel_vgpu_configs[] = {
{ MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" },
{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" },
{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" },
@@ -106,102 +103,58 @@ static const struct {
*/
int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
{
- unsigned int num_types;
- unsigned int i, low_avail, high_avail;
- unsigned int min_low;
-
- /* vGPU type name is defined as GVTg_Vx_y which contains
- * physical GPU generation type (e.g V4 as BDW server, V5 as
- * SKL server).
- *
- * Depend on physical SKU resource, might see vGPU types like
- * GVTg_V4_8, GVTg_V4_4, GVTg_V4_2, etc. We can create
- * different types of vGPU on same physical GPU depending on
- * available resource. Each vGPU type will have "avail_instance"
- * to indicate how many vGPU instance can be created for this
- * type.
- *
- */
- low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
- high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
- num_types = ARRAY_SIZE(vgpu_types);
+ unsigned int low_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE;
+ unsigned int high_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE;
+ unsigned int num_types = ARRAY_SIZE(intel_vgpu_configs);
+ unsigned int i;
gvt->types = kcalloc(num_types, sizeof(struct intel_vgpu_type),
GFP_KERNEL);
if (!gvt->types)
return -ENOMEM;
- min_low = MB_TO_BYTES(32);
- for (i = 0; i < num_types; ++i) {
- if (low_avail / vgpu_types[i].low_mm == 0)
- break;
-
- gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
- gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
- gvt->types[i].fence = vgpu_types[i].fence;
+ gvt->mdev_types = kcalloc(num_types, sizeof(*gvt->mdev_types),
+ GFP_KERNEL);
+ if (!gvt->mdev_types)
+ goto out_free_types;
- if (vgpu_types[i].weight < 1 ||
- vgpu_types[i].weight > VGPU_MAX_WEIGHT)
- return -EINVAL;
+ for (i = 0; i < num_types; ++i) {
+ const struct intel_vgpu_config *conf = &intel_vgpu_configs[i];
- gvt->types[i].weight = vgpu_types[i].weight;
- gvt->types[i].resolution = vgpu_types[i].edid;
- gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
- high_avail / vgpu_types[i].high_mm);
+ if (low_avail / conf->low_mm == 0)
+ break;
+ if (conf->weight < 1 || conf->weight > VGPU_MAX_WEIGHT)
+ goto out_free_mdev_types;
- if (GRAPHICS_VER(gvt->gt->i915) == 8)
- sprintf(gvt->types[i].name, "GVTg_V4_%s",
- vgpu_types[i].name);
- else if (GRAPHICS_VER(gvt->gt->i915) == 9)
- sprintf(gvt->types[i].name, "GVTg_V5_%s",
- vgpu_types[i].name);
+ sprintf(gvt->types[i].name, "GVTg_V%u_%s",
+ GRAPHICS_VER(gvt->gt->i915) == 8 ? 4 : 5, conf->name);
+ gvt->types[i].conf = conf;
gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n",
i, gvt->types[i].name,
- gvt->types[i].avail_instance,
- gvt->types[i].low_gm_size,
- gvt->types[i].high_gm_size, gvt->types[i].fence,
- gvt->types[i].weight,
- vgpu_edid_str(gvt->types[i].resolution));
+ min(low_avail / conf->low_mm,
+ high_avail / conf->high_mm),
+ conf->low_mm, conf->high_mm, conf->fence,
+ conf->weight, vgpu_edid_str(conf->edid));
+
+ gvt->mdev_types[i] = &gvt->types[i].type;
+ gvt->mdev_types[i]->sysfs_name = gvt->types[i].name;
}
gvt->num_types = i;
return 0;
-}
-void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt)
-{
+out_free_mdev_types:
+ kfree(gvt->mdev_types);
+out_free_types:
kfree(gvt->types);
+ return -EINVAL;
}
-static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
+void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt)
{
- int i;
- unsigned int low_gm_avail, high_gm_avail, fence_avail;
- unsigned int low_gm_min, high_gm_min, fence_min;
-
- /* Need to depend on maxium hw resource size but keep on
- * static config for now.
- */
- low_gm_avail = gvt_aperture_sz(gvt) - HOST_LOW_GM_SIZE -
- gvt->gm.vgpu_allocated_low_gm_size;
- high_gm_avail = gvt_hidden_sz(gvt) - HOST_HIGH_GM_SIZE -
- gvt->gm.vgpu_allocated_high_gm_size;
- fence_avail = gvt_fence_sz(gvt) - HOST_FENCE -
- gvt->fence.vgpu_allocated_fence_num;
-
- for (i = 0; i < gvt->num_types; i++) {
- low_gm_min = low_gm_avail / gvt->types[i].low_gm_size;
- high_gm_min = high_gm_avail / gvt->types[i].high_gm_size;
- fence_min = fence_avail / gvt->types[i].fence;
- gvt->types[i].avail_instance = min(min(low_gm_min, high_gm_min),
- fence_min);
-
- gvt_dbg_core("update type[%d]: %s avail %u low %u high %u fence %u\n",
- i, gvt->types[i].name,
- gvt->types[i].avail_instance, gvt->types[i].low_gm_size,
- gvt->types[i].high_gm_size, gvt->types[i].fence);
- }
+ kfree(gvt->mdev_types);
+ kfree(gvt->types);
}
/**
@@ -298,12 +251,6 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
intel_vgpu_clean_mmio(vgpu);
intel_vgpu_dmabuf_cleanup(vgpu);
mutex_unlock(&vgpu->vgpu_lock);
-
- mutex_lock(&gvt->lock);
- intel_gvt_update_vgpu_types(gvt);
- mutex_unlock(&gvt->lock);
-
- vfree(vgpu);
}
#define IDLE_VGPU_IDR 0
@@ -363,42 +310,38 @@ void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu)
vfree(vgpu);
}
-static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
- struct intel_vgpu_creation_params *param)
+int intel_gvt_create_vgpu(struct intel_vgpu *vgpu,
+ const struct intel_vgpu_config *conf)
{
+ struct intel_gvt *gvt = vgpu->gvt;
struct drm_i915_private *dev_priv = gvt->gt->i915;
- struct intel_vgpu *vgpu;
int ret;
- gvt_dbg_core("low %llu MB high %llu MB fence %llu\n",
- param->low_gm_sz, param->high_gm_sz,
- param->fence_sz);
-
- vgpu = vzalloc(sizeof(*vgpu));
- if (!vgpu)
- return ERR_PTR(-ENOMEM);
+ gvt_dbg_core("low %u MB high %u MB fence %u\n",
+ BYTES_TO_MB(conf->low_mm), BYTES_TO_MB(conf->high_mm),
+ conf->fence);
+ mutex_lock(&gvt->lock);
ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU,
GFP_KERNEL);
if (ret < 0)
- goto out_free_vgpu;
+ goto out_unlock;;
vgpu->id = ret;
- vgpu->gvt = gvt;
- vgpu->sched_ctl.weight = param->weight;
+ vgpu->sched_ctl.weight = conf->weight;
mutex_init(&vgpu->vgpu_lock);
mutex_init(&vgpu->dmabuf_lock);
INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head);
INIT_RADIX_TREE(&vgpu->page_track_tree, GFP_KERNEL);
idr_init_base(&vgpu->object_idr, 1);
- intel_vgpu_init_cfg_space(vgpu, param->primary);
+ intel_vgpu_init_cfg_space(vgpu, 1);
vgpu->d3_entered = false;
ret = intel_vgpu_init_mmio(vgpu);
if (ret)
goto out_clean_idr;
- ret = intel_vgpu_alloc_resource(vgpu, param);
+ ret = intel_vgpu_alloc_resource(vgpu, conf);
if (ret)
goto out_clean_vgpu_mmio;
@@ -412,7 +355,7 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
if (ret)
goto out_clean_gtt;
- ret = intel_vgpu_init_display(vgpu, param->resolution);
+ ret = intel_vgpu_init_display(vgpu, conf->edid);
if (ret)
goto out_clean_opregion;
@@ -437,7 +380,9 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
if (ret)
goto out_clean_sched_policy;
- return vgpu;
+ intel_gvt_update_reg_whitelist(vgpu);
+ mutex_unlock(&gvt->lock);
+ return 0;
out_clean_sched_policy:
intel_vgpu_clean_sched_policy(vgpu);
@@ -455,48 +400,9 @@ out_clean_vgpu_mmio:
intel_vgpu_clean_mmio(vgpu);
out_clean_idr:
idr_remove(&gvt->vgpu_idr, vgpu->id);
-out_free_vgpu:
- vfree(vgpu);
- return ERR_PTR(ret);
-}
-
-/**
- * intel_gvt_create_vgpu - create a virtual GPU
- * @gvt: GVT device
- * @type: type of the vGPU to create
- *
- * This function is called when user wants to create a virtual GPU.
- *
- * Returns:
- * pointer to intel_vgpu, error pointer if failed.
- */
-struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
- struct intel_vgpu_type *type)
-{
- struct intel_vgpu_creation_params param;
- struct intel_vgpu *vgpu;
-
- param.primary = 1;
- param.low_gm_sz = type->low_gm_size;
- param.high_gm_sz = type->high_gm_size;
- param.fence_sz = type->fence;
- param.weight = type->weight;
- param.resolution = type->resolution;
-
- /* XXX current param based on MB */
- param.low_gm_sz = BYTES_TO_MB(param.low_gm_sz);
- param.high_gm_sz = BYTES_TO_MB(param.high_gm_sz);
-
- mutex_lock(&gvt->lock);
- vgpu = __intel_gvt_create_vgpu(gvt, &param);
- if (!IS_ERR(vgpu)) {
- /* calculate left instance change for types */
- intel_gvt_update_vgpu_types(gvt);
- intel_gvt_update_reg_whitelist(vgpu);
- }
+out_unlock:
mutex_unlock(&gvt->lock);
-
- return vgpu;
+ return ret;
}
/**
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index eb5ea5b69cfa..7ef9f5e696d3 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -3,7 +3,7 @@ menu "IRQ chip support"
config IRQCHIP
def_bool y
- depends on OF_IRQ
+ depends on (OF_IRQ || ACPI_GENERIC_GSI)
config ARM_GIC
bool
@@ -481,6 +481,21 @@ config IMX_INTMUX
help
Support for the i.MX INTMUX interrupt multiplexer.
+config IMX_MU_MSI
+ tristate "i.MX MU used as MSI controller"
+ depends on OF && HAS_IOMEM
+ depends on ARCH_MXC || COMPILE_TEST
+ default m if ARCH_MXC
+ select IRQ_DOMAIN
+ select IRQ_DOMAIN_HIERARCHY
+ select GENERIC_MSI_IRQ_DOMAIN
+ help
+ Provide a driver for the i.MX Messaging Unit block used as a
+ CPU-to-CPU MSI controller. This requires a specially crafted DT
+ to make use of this driver.
+
+ If unsure, say N
+
config LS1X_IRQ
bool "Loongson-1 Interrupt Controller"
depends on MACH_LOONGSON32
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index b6acbca2248b..87b49a10962c 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_RISCV_INTC) += irq-riscv-intc.o
obj-$(CONFIG_SIFIVE_PLIC) += irq-sifive-plic.o
obj-$(CONFIG_IMX_IRQSTEER) += irq-imx-irqsteer.o
obj-$(CONFIG_IMX_INTMUX) += irq-imx-intmux.o
+obj-$(CONFIG_IMX_MU_MSI) += irq-imx-mu-msi.o
obj-$(CONFIG_MADERA_IRQ) += irq-madera.o
obj-$(CONFIG_LS1X_IRQ) += irq-ls1x.o
obj-$(CONFIG_TI_SCI_INTR_IRQCHIP) += irq-ti-sci-intr.o
diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 262658fd5f9e..34d58567b78d 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -978,7 +978,7 @@ static int __gic_update_rdist_properties(struct redist_region *region,
u64 typer = gic_read_typer(ptr + GICR_TYPER);
u32 ctlr = readl_relaxed(ptr + GICR_CTLR);
- /* Boot-time cleanip */
+ /* Boot-time cleanup */
if ((typer & GICR_TYPER_VLPIS) && (typer & GICR_TYPER_RVPEID)) {
u64 val;
diff --git a/drivers/irqchip/irq-imx-mu-msi.c b/drivers/irqchip/irq-imx-mu-msi.c
new file mode 100644
index 000000000000..229039eda1b1
--- /dev/null
+++ b/drivers/irqchip/irq-imx-mu-msi.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Freescale MU used as MSI controller
+ *
+ * Copyright (c) 2018 Pengutronix, Oleksij Rempel <o.rempel@pengutronix.de>
+ * Copyright 2022 NXP
+ * Frank Li <Frank.Li@nxp.com>
+ * Peng Fan <peng.fan@nxp.com>
+ *
+ * Based on drivers/mailbox/imx-mailbox.c
+ */
+
+#include <linux/clk.h>
+#include <linux/irq.h>
+#include <linux/irqchip.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
+#include <linux/spinlock.h>
+
+#define IMX_MU_CHANS 4
+
+enum imx_mu_xcr {
+ IMX_MU_GIER,
+ IMX_MU_GCR,
+ IMX_MU_TCR,
+ IMX_MU_RCR,
+ IMX_MU_xCR_MAX,
+};
+
+enum imx_mu_xsr {
+ IMX_MU_SR,
+ IMX_MU_GSR,
+ IMX_MU_TSR,
+ IMX_MU_RSR,
+ IMX_MU_xSR_MAX
+};
+
+enum imx_mu_type {
+ IMX_MU_V2 = BIT(1),
+};
+
+/* Receive Interrupt Enable */
+#define IMX_MU_xCR_RIEn(data, x) ((data->cfg->type) & IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
+#define IMX_MU_xSR_RFn(data, x) ((data->cfg->type) & IMX_MU_V2 ? BIT(x) : BIT(24 + (3 - (x))))
+
+struct imx_mu_dcfg {
+ enum imx_mu_type type;
+ u32 xTR; /* Transmit Register0 */
+ u32 xRR; /* Receive Register0 */
+ u32 xSR[IMX_MU_xSR_MAX]; /* Status Registers */
+ u32 xCR[IMX_MU_xCR_MAX]; /* Control Registers */
+};
+
+struct imx_mu_msi {
+ raw_spinlock_t lock;
+ struct irq_domain *msi_domain;
+ void __iomem *regs;
+ phys_addr_t msiir_addr;
+ const struct imx_mu_dcfg *cfg;
+ unsigned long used;
+ struct clk *clk;
+};
+
+static void imx_mu_write(struct imx_mu_msi *msi_data, u32 val, u32 offs)
+{
+ iowrite32(val, msi_data->regs + offs);
+}
+
+static u32 imx_mu_read(struct imx_mu_msi *msi_data, u32 offs)
+{
+ return ioread32(msi_data->regs + offs);
+}
+
+static u32 imx_mu_xcr_rmw(struct imx_mu_msi *msi_data, enum imx_mu_xcr type, u32 set, u32 clr)
+{
+ unsigned long flags;
+ u32 val;
+
+ raw_spin_lock_irqsave(&msi_data->lock, flags);
+ val = imx_mu_read(msi_data, msi_data->cfg->xCR[type]);
+ val &= ~clr;
+ val |= set;
+ imx_mu_write(msi_data, val, msi_data->cfg->xCR[type]);
+ raw_spin_unlock_irqrestore(&msi_data->lock, flags);
+
+ return val;
+}
+
+static void imx_mu_msi_parent_mask_irq(struct irq_data *data)
+{
+ struct imx_mu_msi *msi_data = irq_data_get_irq_chip_data(data);
+
+ imx_mu_xcr_rmw(msi_data, IMX_MU_RCR, 0, IMX_MU_xCR_RIEn(msi_data, data->hwirq));
+}
+
+static void imx_mu_msi_parent_unmask_irq(struct irq_data *data)
+{
+ struct imx_mu_msi *msi_data = irq_data_get_irq_chip_data(data);
+
+ imx_mu_xcr_rmw(msi_data, IMX_MU_RCR, IMX_MU_xCR_RIEn(msi_data, data->hwirq), 0);
+}
+
+static void imx_mu_msi_parent_ack_irq(struct irq_data *data)
+{
+ struct imx_mu_msi *msi_data = irq_data_get_irq_chip_data(data);
+
+ imx_mu_read(msi_data, msi_data->cfg->xRR + data->hwirq * 4);
+}
+
+static struct irq_chip imx_mu_msi_irq_chip = {
+ .name = "MU-MSI",
+ .irq_ack = irq_chip_ack_parent,
+};
+
+static struct msi_domain_ops imx_mu_msi_irq_ops = {
+};
+
+static struct msi_domain_info imx_mu_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+ .ops = &imx_mu_msi_irq_ops,
+ .chip = &imx_mu_msi_irq_chip,
+};
+
+static void imx_mu_msi_parent_compose_msg(struct irq_data *data,
+ struct msi_msg *msg)
+{
+ struct imx_mu_msi *msi_data = irq_data_get_irq_chip_data(data);
+ u64 addr = msi_data->msiir_addr + 4 * data->hwirq;
+
+ msg->address_hi = upper_32_bits(addr);
+ msg->address_lo = lower_32_bits(addr);
+ msg->data = data->hwirq;
+}
+
+static int imx_mu_msi_parent_set_affinity(struct irq_data *irq_data,
+ const struct cpumask *mask, bool force)
+{
+ return -EINVAL;
+}
+
+static struct irq_chip imx_mu_msi_parent_chip = {
+ .name = "MU",
+ .irq_mask = imx_mu_msi_parent_mask_irq,
+ .irq_unmask = imx_mu_msi_parent_unmask_irq,
+ .irq_ack = imx_mu_msi_parent_ack_irq,
+ .irq_compose_msi_msg = imx_mu_msi_parent_compose_msg,
+ .irq_set_affinity = imx_mu_msi_parent_set_affinity,
+};
+
+static int imx_mu_msi_domain_irq_alloc(struct irq_domain *domain,
+ unsigned int virq,
+ unsigned int nr_irqs,
+ void *args)
+{
+ struct imx_mu_msi *msi_data = domain->host_data;
+ unsigned long flags;
+ int pos, err = 0;
+
+ WARN_ON(nr_irqs != 1);
+
+ raw_spin_lock_irqsave(&msi_data->lock, flags);
+ pos = find_first_zero_bit(&msi_data->used, IMX_MU_CHANS);
+ if (pos < IMX_MU_CHANS)
+ __set_bit(pos, &msi_data->used);
+ else
+ err = -ENOSPC;
+ raw_spin_unlock_irqrestore(&msi_data->lock, flags);
+
+ if (err)
+ return err;
+
+ irq_domain_set_info(domain, virq, pos,
+ &imx_mu_msi_parent_chip, msi_data,
+ handle_edge_irq, NULL, NULL);
+ return 0;
+}
+
+static void imx_mu_msi_domain_irq_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct imx_mu_msi *msi_data = irq_data_get_irq_chip_data(d);
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&msi_data->lock, flags);
+ __clear_bit(d->hwirq, &msi_data->used);
+ raw_spin_unlock_irqrestore(&msi_data->lock, flags);
+}
+
+static const struct irq_domain_ops imx_mu_msi_domain_ops = {
+ .alloc = imx_mu_msi_domain_irq_alloc,
+ .free = imx_mu_msi_domain_irq_free,
+};
+
+static void imx_mu_msi_irq_handler(struct irq_desc *desc)
+{
+ struct imx_mu_msi *msi_data = irq_desc_get_handler_data(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
+ u32 status;
+ int i;
+
+ status = imx_mu_read(msi_data, msi_data->cfg->xSR[IMX_MU_RSR]);
+
+ chained_irq_enter(chip, desc);
+ for (i = 0; i < IMX_MU_CHANS; i++) {
+ if (status & IMX_MU_xSR_RFn(msi_data, i))
+ generic_handle_domain_irq(msi_data->msi_domain, i);
+ }
+ chained_irq_exit(chip, desc);
+}
+
+static int imx_mu_msi_domains_init(struct imx_mu_msi *msi_data, struct device *dev)
+{
+ struct fwnode_handle *fwnodes = dev_fwnode(dev);
+ struct irq_domain *parent;
+
+ /* Initialize MSI domain parent */
+ parent = irq_domain_create_linear(fwnodes,
+ IMX_MU_CHANS,
+ &imx_mu_msi_domain_ops,
+ msi_data);
+ if (!parent) {
+ dev_err(dev, "failed to create IRQ domain\n");
+ return -ENOMEM;
+ }
+
+ irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
+
+ msi_data->msi_domain = platform_msi_create_irq_domain(fwnodes,
+ &imx_mu_msi_domain_info,
+ parent);
+
+ if (!msi_data->msi_domain) {
+ dev_err(dev, "failed to create MSI domain\n");
+ irq_domain_remove(parent);
+ return -ENOMEM;
+ }
+
+ irq_domain_set_pm_device(msi_data->msi_domain, dev);
+
+ return 0;
+}
+
+/* Register offset of different version MU IP */
+static const struct imx_mu_dcfg imx_mu_cfg_imx6sx = {
+ .type = 0,
+ .xTR = 0x0,
+ .xRR = 0x10,
+ .xSR = {
+ [IMX_MU_SR] = 0x20,
+ [IMX_MU_GSR] = 0x20,
+ [IMX_MU_TSR] = 0x20,
+ [IMX_MU_RSR] = 0x20,
+ },
+ .xCR = {
+ [IMX_MU_GIER] = 0x24,
+ [IMX_MU_GCR] = 0x24,
+ [IMX_MU_TCR] = 0x24,
+ [IMX_MU_RCR] = 0x24,
+ },
+};
+
+static const struct imx_mu_dcfg imx_mu_cfg_imx7ulp = {
+ .type = 0,
+ .xTR = 0x20,
+ .xRR = 0x40,
+ .xSR = {
+ [IMX_MU_SR] = 0x60,
+ [IMX_MU_GSR] = 0x60,
+ [IMX_MU_TSR] = 0x60,
+ [IMX_MU_RSR] = 0x60,
+ },
+ .xCR = {
+ [IMX_MU_GIER] = 0x64,
+ [IMX_MU_GCR] = 0x64,
+ [IMX_MU_TCR] = 0x64,
+ [IMX_MU_RCR] = 0x64,
+ },
+};
+
+static const struct imx_mu_dcfg imx_mu_cfg_imx8ulp = {
+ .type = IMX_MU_V2,
+ .xTR = 0x200,
+ .xRR = 0x280,
+ .xSR = {
+ [IMX_MU_SR] = 0xC,
+ [IMX_MU_GSR] = 0x118,
+ [IMX_MU_TSR] = 0x124,
+ [IMX_MU_RSR] = 0x12C,
+ },
+ .xCR = {
+ [IMX_MU_GIER] = 0x110,
+ [IMX_MU_GCR] = 0x114,
+ [IMX_MU_TCR] = 0x120,
+ [IMX_MU_RCR] = 0x128
+ },
+};
+
+static int __init imx_mu_of_init(struct device_node *dn,
+ struct device_node *parent,
+ const struct imx_mu_dcfg *cfg)
+{
+ struct platform_device *pdev = of_find_device_by_node(dn);
+ struct device_link *pd_link_a;
+ struct device_link *pd_link_b;
+ struct imx_mu_msi *msi_data;
+ struct resource *res;
+ struct device *pd_a;
+ struct device *pd_b;
+ struct device *dev;
+ int ret;
+ int irq;
+
+ dev = &pdev->dev;
+
+ msi_data = devm_kzalloc(&pdev->dev, sizeof(*msi_data), GFP_KERNEL);
+ if (!msi_data)
+ return -ENOMEM;
+
+ msi_data->cfg = cfg;
+
+ msi_data->regs = devm_platform_ioremap_resource_byname(pdev, "processor-a-side");
+ if (IS_ERR(msi_data->regs)) {
+ dev_err(&pdev->dev, "failed to initialize 'regs'\n");
+ return PTR_ERR(msi_data->regs);
+ }
+
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "processor-b-side");
+ if (!res)
+ return -EIO;
+
+ msi_data->msiir_addr = res->start + msi_data->cfg->xTR;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq <= 0)
+ return -ENODEV;
+
+ platform_set_drvdata(pdev, msi_data);
+
+ msi_data->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(msi_data->clk))
+ return PTR_ERR(msi_data->clk);
+
+ pd_a = dev_pm_domain_attach_by_name(dev, "processor-a-side");
+ if (IS_ERR(pd_a))
+ return PTR_ERR(pd_a);
+
+ pd_b = dev_pm_domain_attach_by_name(dev, "processor-b-side");
+ if (IS_ERR(pd_b))
+ return PTR_ERR(pd_b);
+
+ pd_link_a = device_link_add(dev, pd_a,
+ DL_FLAG_STATELESS |
+ DL_FLAG_PM_RUNTIME |
+ DL_FLAG_RPM_ACTIVE);
+
+ if (!pd_link_a) {
+ dev_err(dev, "Failed to add device_link to mu a.\n");
+ goto err_pd_a;
+ }
+
+ pd_link_b = device_link_add(dev, pd_b,
+ DL_FLAG_STATELESS |
+ DL_FLAG_PM_RUNTIME |
+ DL_FLAG_RPM_ACTIVE);
+
+
+ if (!pd_link_b) {
+ dev_err(dev, "Failed to add device_link to mu a.\n");
+ goto err_pd_b;
+ }
+
+ ret = imx_mu_msi_domains_init(msi_data, dev);
+ if (ret)
+ goto err_dm_init;
+
+ pm_runtime_enable(dev);
+
+ irq_set_chained_handler_and_data(irq,
+ imx_mu_msi_irq_handler,
+ msi_data);
+
+ return 0;
+
+err_dm_init:
+ device_link_remove(dev, pd_b);
+err_pd_b:
+ device_link_remove(dev, pd_a);
+err_pd_a:
+ return -EINVAL;
+}
+
+static int __maybe_unused imx_mu_runtime_suspend(struct device *dev)
+{
+ struct imx_mu_msi *priv = dev_get_drvdata(dev);
+
+ clk_disable_unprepare(priv->clk);
+
+ return 0;
+}
+
+static int __maybe_unused imx_mu_runtime_resume(struct device *dev)
+{
+ struct imx_mu_msi *priv = dev_get_drvdata(dev);
+ int ret;
+
+ ret = clk_prepare_enable(priv->clk);
+ if (ret)
+ dev_err(dev, "failed to enable clock\n");
+
+ return ret;
+}
+
+static const struct dev_pm_ops imx_mu_pm_ops = {
+ SET_RUNTIME_PM_OPS(imx_mu_runtime_suspend,
+ imx_mu_runtime_resume, NULL)
+};
+
+static int __init imx_mu_imx7ulp_of_init(struct device_node *dn,
+ struct device_node *parent)
+{
+ return imx_mu_of_init(dn, parent, &imx_mu_cfg_imx7ulp);
+}
+
+static int __init imx_mu_imx6sx_of_init(struct device_node *dn,
+ struct device_node *parent)
+{
+ return imx_mu_of_init(dn, parent, &imx_mu_cfg_imx6sx);
+}
+
+static int __init imx_mu_imx8ulp_of_init(struct device_node *dn,
+ struct device_node *parent)
+{
+ return imx_mu_of_init(dn, parent, &imx_mu_cfg_imx8ulp);
+}
+
+IRQCHIP_PLATFORM_DRIVER_BEGIN(imx_mu_msi)
+IRQCHIP_MATCH("fsl,imx7ulp-mu-msi", imx_mu_imx7ulp_of_init)
+IRQCHIP_MATCH("fsl,imx6sx-mu-msi", imx_mu_imx6sx_of_init)
+IRQCHIP_MATCH("fsl,imx8ulp-mu-msi", imx_mu_imx8ulp_of_init)
+IRQCHIP_PLATFORM_DRIVER_END(imx_mu_msi, .pm = &imx_mu_pm_ops)
+
+
+MODULE_AUTHOR("Frank Li <Frank.Li@nxp.com>");
+MODULE_DESCRIPTION("Freescale MU MSI controller driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/irqchip/irq-ls-extirq.c b/drivers/irqchip/irq-ls-extirq.c
index 853b3972dbe7..d8d48b1f7c29 100644
--- a/drivers/irqchip/irq-ls-extirq.c
+++ b/drivers/irqchip/irq-ls-extirq.c
@@ -6,8 +6,7 @@
#include <linux/irqchip.h>
#include <linux/irqdomain.h>
#include <linux/of.h>
-#include <linux/mfd/syscon.h>
-#include <linux/regmap.h>
+#include <linux/of_address.h>
#include <linux/slab.h>
#include <dt-bindings/interrupt-controller/arm-gic.h>
@@ -16,13 +15,41 @@
#define LS1021A_SCFGREVCR 0x200
struct ls_extirq_data {
- struct regmap *syscon;
- u32 intpcr;
+ void __iomem *intpcr;
+ raw_spinlock_t lock;
+ bool big_endian;
bool is_ls1021a_or_ls1043a;
u32 nirq;
struct irq_fwspec map[MAXIRQ];
};
+static void ls_extirq_intpcr_rmw(struct ls_extirq_data *priv, u32 mask,
+ u32 value)
+{
+ u32 intpcr;
+
+ /*
+ * Serialize concurrent calls to ls_extirq_set_type() from multiple
+ * IRQ descriptors, making sure the read-modify-write is atomic.
+ */
+ raw_spin_lock(&priv->lock);
+
+ if (priv->big_endian)
+ intpcr = ioread32be(priv->intpcr);
+ else
+ intpcr = ioread32(priv->intpcr);
+
+ intpcr &= ~mask;
+ intpcr |= value;
+
+ if (priv->big_endian)
+ iowrite32be(intpcr, priv->intpcr);
+ else
+ iowrite32(intpcr, priv->intpcr);
+
+ raw_spin_unlock(&priv->lock);
+}
+
static int
ls_extirq_set_type(struct irq_data *data, unsigned int type)
{
@@ -51,7 +78,8 @@ ls_extirq_set_type(struct irq_data *data, unsigned int type)
default:
return -EINVAL;
}
- regmap_update_bits(priv->syscon, priv->intpcr, mask, value);
+
+ ls_extirq_intpcr_rmw(priv, mask, value);
return irq_chip_set_type_parent(data, type);
}
@@ -143,7 +171,6 @@ ls_extirq_parse_map(struct ls_extirq_data *priv, struct device_node *node)
static int __init
ls_extirq_of_init(struct device_node *node, struct device_node *parent)
{
-
struct irq_domain *domain, *parent_domain;
struct ls_extirq_data *priv;
int ret;
@@ -151,40 +178,52 @@ ls_extirq_of_init(struct device_node *node, struct device_node *parent)
parent_domain = irq_find_host(parent);
if (!parent_domain) {
pr_err("Cannot find parent domain\n");
- return -ENODEV;
+ ret = -ENODEV;
+ goto err_irq_find_host;
}
priv = kzalloc(sizeof(*priv), GFP_KERNEL);
- if (!priv)
- return -ENOMEM;
-
- priv->syscon = syscon_node_to_regmap(node->parent);
- if (IS_ERR(priv->syscon)) {
- ret = PTR_ERR(priv->syscon);
- pr_err("Failed to lookup parent regmap\n");
- goto out;
+ if (!priv) {
+ ret = -ENOMEM;
+ goto err_alloc_priv;
}
- ret = of_property_read_u32(node, "reg", &priv->intpcr);
- if (ret) {
- pr_err("Missing INTPCR offset value\n");
- goto out;
+
+ /*
+ * All extirq OF nodes are under a scfg/syscon node with
+ * the 'ranges' property
+ */
+ priv->intpcr = of_iomap(node, 0);
+ if (!priv->intpcr) {
+ pr_err("Cannot ioremap OF node %pOF\n", node);
+ ret = -ENOMEM;
+ goto err_iomap;
}
ret = ls_extirq_parse_map(priv, node);
if (ret)
- goto out;
+ goto err_parse_map;
+ priv->big_endian = of_device_is_big_endian(parent);
priv->is_ls1021a_or_ls1043a = of_device_is_compatible(node, "fsl,ls1021a-extirq") ||
of_device_is_compatible(node, "fsl,ls1043a-extirq");
+ raw_spin_lock_init(&priv->lock);
domain = irq_domain_add_hierarchy(parent_domain, 0, priv->nirq, node,
&extirq_domain_ops, priv);
- if (!domain)
+ if (!domain) {
ret = -ENOMEM;
+ goto err_add_hierarchy;
+ }
-out:
- if (ret)
- kfree(priv);
+ return 0;
+
+err_add_hierarchy:
+err_parse_map:
+ iounmap(priv->intpcr);
+err_iomap:
+ kfree(priv);
+err_alloc_priv:
+err_irq_find_host:
return ret;
}
diff --git a/drivers/irqchip/irq-realtek-rtl.c b/drivers/irqchip/irq-realtek-rtl.c
index 56bf502d9c67..2a349082af81 100644
--- a/drivers/irqchip/irq-realtek-rtl.c
+++ b/drivers/irqchip/irq-realtek-rtl.c
@@ -21,11 +21,33 @@
#define RTL_ICTL_IRR2 0x10
#define RTL_ICTL_IRR3 0x14
+#define RTL_ICTL_NUM_INPUTS 32
+
#define REG(x) (realtek_ictl_base + x)
static DEFINE_RAW_SPINLOCK(irq_lock);
static void __iomem *realtek_ictl_base;
+/*
+ * IRR0-IRR3 store 4 bits per interrupt, but Realtek uses inverted numbering,
+ * placing IRQ 31 in the first four bits. A routing value of '0' means the
+ * interrupt is left disconnected. Routing values {1..15} connect to output
+ * lines {0..14}.
+ */
+#define IRR_OFFSET(idx) (4 * (3 - (idx * 4) / 32))
+#define IRR_SHIFT(idx) ((idx * 4) % 32)
+
+static void write_irr(void __iomem *irr0, int idx, u32 value)
+{
+ unsigned int offset = IRR_OFFSET(idx);
+ unsigned int shift = IRR_SHIFT(idx);
+ u32 irr;
+
+ irr = readl(irr0 + offset) & ~(0xf << shift);
+ irr |= (value & 0xf) << shift;
+ writel(irr, irr0 + offset);
+}
+
static void realtek_ictl_unmask_irq(struct irq_data *i)
{
unsigned long flags;
@@ -62,8 +84,14 @@ static struct irq_chip realtek_ictl_irq = {
static int intc_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
{
+ unsigned long flags;
+
irq_set_chip_and_handler(irq, &realtek_ictl_irq, handle_level_irq);
+ raw_spin_lock_irqsave(&irq_lock, flags);
+ write_irr(REG(RTL_ICTL_IRR0), hw, 1);
+ raw_spin_unlock_irqrestore(&irq_lock, flags);
+
return 0;
}
@@ -95,90 +123,50 @@ out:
chained_irq_exit(chip, desc);
}
-/*
- * SoC interrupts are cascaded to MIPS CPU interrupts according to the
- * interrupt-map in the device tree. Each SoC interrupt gets 4 bits for
- * the CPU interrupt in an Interrupt Routing Register. Max 32 SoC interrupts
- * thus go into 4 IRRs. A routing value of '0' means the interrupt is left
- * disconnected. Routing values {1..15} connect to output lines {0..14}.
- */
-static int __init map_interrupts(struct device_node *node, struct irq_domain *domain)
-{
- struct device_node *cpu_ictl;
- const __be32 *imap;
- u32 imaplen, soc_int, cpu_int, tmp, regs[4];
- int ret, i, irr_regs[] = {
- RTL_ICTL_IRR3,
- RTL_ICTL_IRR2,
- RTL_ICTL_IRR1,
- RTL_ICTL_IRR0,
- };
- u8 mips_irqs_set;
-
- ret = of_property_read_u32(node, "#address-cells", &tmp);
- if (ret || tmp)
- return -EINVAL;
-
- imap = of_get_property(node, "interrupt-map", &imaplen);
- if (!imap || imaplen % 3)
- return -EINVAL;
-
- mips_irqs_set = 0;
- memset(regs, 0, sizeof(regs));
- for (i = 0; i < imaplen; i += 3 * sizeof(u32)) {
- soc_int = be32_to_cpup(imap);
- if (soc_int > 31)
- return -EINVAL;
-
- cpu_ictl = of_find_node_by_phandle(be32_to_cpup(imap + 1));
- if (!cpu_ictl)
- return -EINVAL;
- ret = of_property_read_u32(cpu_ictl, "#interrupt-cells", &tmp);
- of_node_put(cpu_ictl);
- if (ret || tmp != 1)
- return -EINVAL;
-
- cpu_int = be32_to_cpup(imap + 2);
- if (cpu_int > 7 || cpu_int < 2)
- return -EINVAL;
-
- if (!(mips_irqs_set & BIT(cpu_int))) {
- irq_set_chained_handler_and_data(cpu_int, realtek_irq_dispatch,
- domain);
- mips_irqs_set |= BIT(cpu_int);
- }
-
- /* Use routing values (1..6) for CPU interrupts (2..7) */
- regs[(soc_int * 4) / 32] |= (cpu_int - 1) << (soc_int * 4) % 32;
- imap += 3;
- }
-
- for (i = 0; i < 4; i++)
- writel(regs[i], REG(irr_regs[i]));
-
- return 0;
-}
-
static int __init realtek_rtl_of_init(struct device_node *node, struct device_node *parent)
{
+ struct of_phandle_args oirq;
struct irq_domain *domain;
- int ret;
+ unsigned int soc_irq;
+ int parent_irq;
realtek_ictl_base = of_iomap(node, 0);
if (!realtek_ictl_base)
return -ENXIO;
- /* Disable all cascaded interrupts */
+ /* Disable all cascaded interrupts and clear routing */
writel(0, REG(RTL_ICTL_GIMR));
+ for (soc_irq = 0; soc_irq < RTL_ICTL_NUM_INPUTS; soc_irq++)
+ write_irr(REG(RTL_ICTL_IRR0), soc_irq, 0);
+
+ if (WARN_ON(!of_irq_count(node))) {
+ /*
+ * If DT contains no parent interrupts, assume MIPS CPU IRQ 2
+ * (HW0) is connected to the first output. This is the case for
+ * all known hardware anyway. "interrupt-map" is deprecated, so
+ * don't bother trying to parse that.
+ */
+ oirq.np = of_find_compatible_node(NULL, NULL, "mti,cpu-interrupt-controller");
+ oirq.args_count = 1;
+ oirq.args[0] = 2;
+
+ parent_irq = irq_create_of_mapping(&oirq);
+
+ of_node_put(oirq.np);
+ } else {
+ parent_irq = of_irq_get(node, 0);
+ }
- domain = irq_domain_add_simple(node, 32, 0,
- &irq_domain_ops, NULL);
+ if (parent_irq < 0)
+ return parent_irq;
+ else if (!parent_irq)
+ return -ENODEV;
- ret = map_interrupts(node, domain);
- if (ret) {
- pr_err("invalid interrupt map\n");
- return ret;
- }
+ domain = irq_domain_add_linear(node, RTL_ICTL_NUM_INPUTS, &irq_domain_ops, NULL);
+ if (!domain)
+ return -ENOMEM;
+
+ irq_set_chained_handler_and_data(parent_irq, realtek_irq_dispatch, domain);
return 0;
}
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 689271c4245c..7378e2f3e525 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -521,24 +521,14 @@ static int pcifront_rescan_root(struct pcifront_device *pdev,
int err;
struct pci_bus *b;
-#ifndef CONFIG_PCI_DOMAINS
- if (domain != 0) {
- dev_err(&pdev->xdev->dev,
- "PCI Root in non-zero PCI Domain! domain=%d\n", domain);
- dev_err(&pdev->xdev->dev,
- "Please compile with CONFIG_PCI_DOMAINS\n");
- return -EINVAL;
- }
-#endif
-
- dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
- domain, bus);
-
b = pci_find_bus(domain, bus);
if (!b)
/* If the bus is unknown, create it. */
return pcifront_scan_root(pdev, domain, bus);
+ dev_info(&pdev->xdev->dev, "Rescanning PCI Frontend Bus %04x:%02x\n",
+ domain, bus);
+
err = pcifront_scan_bus(pdev, domain, bus, b);
/* Claim resources before going "live" with our devices */
@@ -819,76 +809,73 @@ out:
return err;
}
-static int pcifront_try_connect(struct pcifront_device *pdev)
+static void pcifront_connect(struct pcifront_device *pdev)
{
- int err = -EFAULT;
+ int err;
int i, num_roots, len;
char str[64];
unsigned int domain, bus;
-
- /* Only connect once */
- if (xenbus_read_driver_state(pdev->xdev->nodename) !=
- XenbusStateInitialised)
- goto out;
-
- err = pcifront_connect_and_init_dma(pdev);
- if (err && err != -EEXIST) {
- xenbus_dev_fatal(pdev->xdev, err,
- "Error setting up PCI Frontend");
- goto out;
- }
-
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
"root_num", "%d", &num_roots);
if (err == -ENOENT) {
xenbus_dev_error(pdev->xdev, err,
"No PCI Roots found, trying 0000:00");
- err = pcifront_scan_root(pdev, 0, 0);
+ err = pcifront_rescan_root(pdev, 0, 0);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error scanning PCI root 0000:00");
- goto out;
+ return;
}
num_roots = 0;
} else if (err != 1) {
- if (err == 0)
- err = -EINVAL;
- xenbus_dev_fatal(pdev->xdev, err,
+ xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err,
"Error reading number of PCI roots");
- goto out;
+ return;
}
for (i = 0; i < num_roots; i++) {
len = snprintf(str, sizeof(str), "root-%d", i);
- if (unlikely(len >= (sizeof(str) - 1))) {
- err = -ENOMEM;
- goto out;
- }
+ if (unlikely(len >= (sizeof(str) - 1)))
+ return;
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
"%x:%x", &domain, &bus);
if (err != 2) {
- if (err >= 0)
- err = -EINVAL;
- xenbus_dev_fatal(pdev->xdev, err,
+ xenbus_dev_fatal(pdev->xdev, err >= 0 ? -EINVAL : err,
"Error reading PCI root %d", i);
- goto out;
+ return;
}
- err = pcifront_scan_root(pdev, domain, bus);
+ err = pcifront_rescan_root(pdev, domain, bus);
if (err) {
xenbus_dev_fatal(pdev->xdev, err,
"Error scanning PCI root %04x:%02x",
domain, bus);
- goto out;
+ return;
}
}
- err = xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+ xenbus_switch_state(pdev->xdev, XenbusStateConnected);
+}
-out:
- return err;
+static void pcifront_try_connect(struct pcifront_device *pdev)
+{
+ int err;
+
+ /* Only connect once */
+ if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+ XenbusStateInitialised)
+ return;
+
+ err = pcifront_connect_and_init_dma(pdev);
+ if (err && err != -EEXIST) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error setting up PCI Frontend");
+ return;
+ }
+
+ pcifront_connect(pdev);
}
static int pcifront_try_disconnect(struct pcifront_device *pdev)
@@ -914,80 +901,37 @@ out:
return err;
}
-static int pcifront_attach_devices(struct pcifront_device *pdev)
+static void pcifront_attach_devices(struct pcifront_device *pdev)
{
- int err = -EFAULT;
- int i, num_roots, len;
- unsigned int domain, bus;
- char str[64];
-
- if (xenbus_read_driver_state(pdev->xdev->nodename) !=
+ if (xenbus_read_driver_state(pdev->xdev->nodename) ==
XenbusStateReconfiguring)
- goto out;
-
- err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend,
- "root_num", "%d", &num_roots);
- if (err == -ENOENT) {
- xenbus_dev_error(pdev->xdev, err,
- "No PCI Roots found, trying 0000:00");
- err = pcifront_rescan_root(pdev, 0, 0);
- if (err) {
- xenbus_dev_fatal(pdev->xdev, err,
- "Error scanning PCI root 0000:00");
- goto out;
- }
- num_roots = 0;
- } else if (err != 1) {
- if (err == 0)
- err = -EINVAL;
- xenbus_dev_fatal(pdev->xdev, err,
- "Error reading number of PCI roots");
- goto out;
- }
-
- for (i = 0; i < num_roots; i++) {
- len = snprintf(str, sizeof(str), "root-%d", i);
- if (unlikely(len >= (sizeof(str) - 1))) {
- err = -ENOMEM;
- goto out;
- }
-
- err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str,
- "%x:%x", &domain, &bus);
- if (err != 2) {
- if (err >= 0)
- err = -EINVAL;
- xenbus_dev_fatal(pdev->xdev, err,
- "Error reading PCI root %d", i);
- goto out;
- }
-
- err = pcifront_rescan_root(pdev, domain, bus);
- if (err) {
- xenbus_dev_fatal(pdev->xdev, err,
- "Error scanning PCI root %04x:%02x",
- domain, bus);
- goto out;
- }
- }
-
- xenbus_switch_state(pdev->xdev, XenbusStateConnected);
-
-out:
- return err;
+ pcifront_connect(pdev);
}
static int pcifront_detach_devices(struct pcifront_device *pdev)
{
int err = 0;
int i, num_devs;
+ enum xenbus_state state;
unsigned int domain, bus, slot, func;
struct pci_dev *pci_dev;
char str[64];
- if (xenbus_read_driver_state(pdev->xdev->nodename) !=
- XenbusStateConnected)
+ state = xenbus_read_driver_state(pdev->xdev->nodename);
+ if (state == XenbusStateInitialised) {
+ dev_dbg(&pdev->xdev->dev, "Handle skipped connect.\n");
+ /* We missed Connected and need to initialize. */
+ err = pcifront_connect_and_init_dma(pdev);
+ if (err && err != -EEXIST) {
+ xenbus_dev_fatal(pdev->xdev, err,
+ "Error setting up PCI Frontend");
+ goto out;
+ }
+
+ goto out_switch_state;
+ } else if (state != XenbusStateConnected) {
goto out;
+ }
err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, "num_devs", "%d",
&num_devs);
@@ -1048,6 +992,7 @@ static int pcifront_detach_devices(struct pcifront_device *pdev)
domain, bus, slot, func);
}
+ out_switch_state:
err = xenbus_switch_state(pdev->xdev, XenbusStateReconfiguring);
out:
diff --git a/drivers/pinctrl/pinctrl-amd.c b/drivers/pinctrl/pinctrl-amd.c
index ecf65237b263..6be896871718 100644
--- a/drivers/pinctrl/pinctrl-amd.c
+++ b/drivers/pinctrl/pinctrl-amd.c
@@ -639,7 +639,7 @@ static bool do_amd_gpio_irq_handler(int irq, void *dev_id)
if (!(regval & PIN_IRQ_PENDING) ||
!(regval & BIT(INTERRUPT_MASK_OFF)))
continue;
- generic_handle_domain_irq(gc->irq.domain, irqnr + i);
+ generic_handle_domain_irq_safe(gc->irq.domain, irqnr + i);
/* Clear interrupt.
* We must read the pin register again, in case the
diff --git a/drivers/platform/Kconfig b/drivers/platform/Kconfig
index b437847b6237..dbd327712205 100644
--- a/drivers/platform/Kconfig
+++ b/drivers/platform/Kconfig
@@ -3,6 +3,8 @@ if MIPS
source "drivers/platform/mips/Kconfig"
endif
+source "drivers/platform/loongarch/Kconfig"
+
source "drivers/platform/goldfish/Kconfig"
source "drivers/platform/chrome/Kconfig"
diff --git a/drivers/platform/Makefile b/drivers/platform/Makefile
index 4de08ef4ec9d..41640172975a 100644
--- a/drivers/platform/Makefile
+++ b/drivers/platform/Makefile
@@ -4,6 +4,7 @@
#
obj-$(CONFIG_X86) += x86/
+obj-$(CONFIG_LOONGARCH) += loongarch/
obj-$(CONFIG_MELLANOX_PLATFORM) += mellanox/
obj-$(CONFIG_MIPS) += mips/
obj-$(CONFIG_OLPC_EC) += olpc/
diff --git a/drivers/platform/loongarch/Kconfig b/drivers/platform/loongarch/Kconfig
new file mode 100644
index 000000000000..5633e4d73991
--- /dev/null
+++ b/drivers/platform/loongarch/Kconfig
@@ -0,0 +1,31 @@
+#
+# LoongArch Platform Specific Drivers
+#
+
+menuconfig LOONGARCH_PLATFORM_DEVICES
+ bool "LoongArch Platform Specific Device Drivers"
+ default y
+ depends on LOONGARCH
+ help
+ Say Y here to get to see options for device drivers of various
+ LoongArch platforms, including vendor-specific laptop/desktop
+ extension and hardware monitor drivers. This option itself does
+ not add any kernel code.
+
+ If you say N, all options in this submenu will be skipped and disabled.
+
+if LOONGARCH_PLATFORM_DEVICES
+
+config LOONGSON_LAPTOP
+ tristate "Generic Loongson-3 Laptop Driver"
+ depends on ACPI
+ depends on BACKLIGHT_CLASS_DEVICE
+ depends on INPUT
+ depends on MACH_LOONGSON64
+ select ACPI_VIDEO
+ select INPUT_SPARSEKMAP
+ default y
+ help
+ ACPI-based Loongson-3 family laptops generic driver.
+
+endif # LOONGARCH_PLATFORM_DEVICES
diff --git a/drivers/platform/loongarch/Makefile b/drivers/platform/loongarch/Makefile
new file mode 100644
index 000000000000..f43ab03db1a2
--- /dev/null
+++ b/drivers/platform/loongarch/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_LOONGSON_LAPTOP) += loongson-laptop.o
diff --git a/drivers/platform/loongarch/loongson-laptop.c b/drivers/platform/loongarch/loongson-laptop.c
new file mode 100644
index 000000000000..f0166ad5d2c2
--- /dev/null
+++ b/drivers/platform/loongarch/loongson-laptop.c
@@ -0,0 +1,624 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Generic Loongson processor based LAPTOP/ALL-IN-ONE driver
+ *
+ * Jianmin Lv <lvjianmin@loongson.cn>
+ * Huacai Chen <chenhuacai@loongson.cn>
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/acpi.h>
+#include <linux/backlight.h>
+#include <linux/device.h>
+#include <linux/input.h>
+#include <linux/input/sparse-keymap.h>
+#include <linux/platform_device.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <acpi/video.h>
+
+/* 1. Driver-wide structs and misc. variables */
+
+/* ACPI HIDs */
+#define LOONGSON_ACPI_EC_HID "PNP0C09"
+#define LOONGSON_ACPI_HKEY_HID "LOON0000"
+
+#define ACPI_LAPTOP_NAME "loongson-laptop"
+#define ACPI_LAPTOP_ACPI_EVENT_PREFIX "loongson"
+
+#define MAX_ACPI_ARGS 3
+#define GENERIC_HOTKEY_MAP_MAX 64
+
+#define GENERIC_EVENT_TYPE_OFF 12
+#define GENERIC_EVENT_TYPE_MASK 0xF000
+#define GENERIC_EVENT_CODE_MASK 0x0FFF
+
+struct generic_sub_driver {
+ u32 type;
+ char *name;
+ acpi_handle *handle;
+ struct acpi_device *device;
+ struct platform_driver *driver;
+ int (*init)(struct generic_sub_driver *sub_driver);
+ void (*notify)(struct generic_sub_driver *sub_driver, u32 event);
+ u8 acpi_notify_installed;
+};
+
+static u32 input_device_registered;
+static struct input_dev *generic_inputdev;
+
+static acpi_handle hotkey_handle;
+static struct key_entry hotkey_keycode_map[GENERIC_HOTKEY_MAP_MAX];
+
+int loongson_laptop_turn_on_backlight(void);
+int loongson_laptop_turn_off_backlight(void);
+static int loongson_laptop_backlight_update(struct backlight_device *bd);
+
+/* 2. ACPI Helpers and device model */
+
+static int acpi_evalf(acpi_handle handle, int *res, char *method, char *fmt, ...)
+{
+ char res_type;
+ char *fmt0 = fmt;
+ va_list ap;
+ int success, quiet;
+ acpi_status status;
+ struct acpi_object_list params;
+ struct acpi_buffer result, *resultp;
+ union acpi_object in_objs[MAX_ACPI_ARGS], out_obj;
+
+ if (!*fmt) {
+ pr_err("acpi_evalf() called with empty format\n");
+ return 0;
+ }
+
+ if (*fmt == 'q') {
+ quiet = 1;
+ fmt++;
+ } else
+ quiet = 0;
+
+ res_type = *(fmt++);
+
+ params.count = 0;
+ params.pointer = &in_objs[0];
+
+ va_start(ap, fmt);
+ while (*fmt) {
+ char c = *(fmt++);
+ switch (c) {
+ case 'd': /* int */
+ in_objs[params.count].integer.value = va_arg(ap, int);
+ in_objs[params.count++].type = ACPI_TYPE_INTEGER;
+ break;
+ /* add more types as needed */
+ default:
+ pr_err("acpi_evalf() called with invalid format character '%c'\n", c);
+ va_end(ap);
+ return 0;
+ }
+ }
+ va_end(ap);
+
+ if (res_type != 'v') {
+ result.length = sizeof(out_obj);
+ result.pointer = &out_obj;
+ resultp = &result;
+ } else
+ resultp = NULL;
+
+ status = acpi_evaluate_object(handle, method, &params, resultp);
+
+ switch (res_type) {
+ case 'd': /* int */
+ success = (status == AE_OK && out_obj.type == ACPI_TYPE_INTEGER);
+ if (success && res)
+ *res = out_obj.integer.value;
+ break;
+ case 'v': /* void */
+ success = status == AE_OK;
+ break;
+ /* add more types as needed */
+ default:
+ pr_err("acpi_evalf() called with invalid format character '%c'\n", res_type);
+ return 0;
+ }
+
+ if (!success && !quiet)
+ pr_err("acpi_evalf(%s, %s, ...) failed: %s\n",
+ method, fmt0, acpi_format_exception(status));
+
+ return success;
+}
+
+static int hotkey_status_get(int *status)
+{
+ if (!acpi_evalf(hotkey_handle, status, "GSWS", "d"))
+ return -EIO;
+
+ return 0;
+}
+
+static void dispatch_acpi_notify(acpi_handle handle, u32 event, void *data)
+{
+ struct generic_sub_driver *sub_driver = data;
+
+ if (!sub_driver || !sub_driver->notify)
+ return;
+ sub_driver->notify(sub_driver, event);
+}
+
+static int __init setup_acpi_notify(struct generic_sub_driver *sub_driver)
+{
+ acpi_status status;
+
+ if (!*sub_driver->handle)
+ return 0;
+
+ sub_driver->device = acpi_fetch_acpi_dev(*sub_driver->handle);
+ if (!sub_driver->device) {
+ pr_err("acpi_fetch_acpi_dev(%s) failed\n", sub_driver->name);
+ return -ENODEV;
+ }
+
+ sub_driver->device->driver_data = sub_driver;
+ sprintf(acpi_device_class(sub_driver->device), "%s/%s",
+ ACPI_LAPTOP_ACPI_EVENT_PREFIX, sub_driver->name);
+
+ status = acpi_install_notify_handler(*sub_driver->handle,
+ sub_driver->type, dispatch_acpi_notify, sub_driver);
+ if (ACPI_FAILURE(status)) {
+ if (status == AE_ALREADY_EXISTS) {
+ pr_notice("Another device driver is already "
+ "handling %s events\n", sub_driver->name);
+ } else {
+ pr_err("acpi_install_notify_handler(%s) failed: %s\n",
+ sub_driver->name, acpi_format_exception(status));
+ }
+ return -ENODEV;
+ }
+ sub_driver->acpi_notify_installed = 1;
+
+ return 0;
+}
+
+static int loongson_hotkey_suspend(struct device *dev)
+{
+ return 0;
+}
+
+static int loongson_hotkey_resume(struct device *dev)
+{
+ int status = 0;
+ struct key_entry ke;
+ struct backlight_device *bd;
+
+ /*
+ * Only if the firmware supports SW_LID event model, we can handle the
+ * event. This is for the consideration of development board without EC.
+ */
+ if (test_bit(SW_LID, generic_inputdev->swbit)) {
+ if (hotkey_status_get(&status) < 0)
+ return -EIO;
+ /*
+ * The input device sw element records the last lid status.
+ * When the system is awakened by other wake-up sources,
+ * the lid event will also be reported. The judgment of
+ * adding SW_LID bit which in sw element can avoid this
+ * case.
+ *
+ * Input system will drop lid event when current lid event
+ * value and last lid status in the same. So laptop driver
+ * doesn't report repeated events.
+ *
+ * Lid status is generally 0, but hardware exception is
+ * considered. So add lid status confirmation.
+ */
+ if (test_bit(SW_LID, generic_inputdev->sw) && !(status & (1 << SW_LID))) {
+ ke.type = KE_SW;
+ ke.sw.value = (u8)status;
+ ke.sw.code = SW_LID;
+ sparse_keymap_report_entry(generic_inputdev, &ke, 1, true);
+ }
+ }
+
+ bd = backlight_device_get_by_type(BACKLIGHT_PLATFORM);
+ if (bd) {
+ loongson_laptop_backlight_update(bd) ?
+ pr_warn("Loongson_backlight: resume brightness failed") :
+ pr_info("Loongson_backlight: resume brightness %d\n", bd->props.brightness);
+ }
+
+ return 0;
+}
+
+static DEFINE_SIMPLE_DEV_PM_OPS(loongson_hotkey_pm,
+ loongson_hotkey_suspend, loongson_hotkey_resume);
+
+static int loongson_hotkey_probe(struct platform_device *pdev)
+{
+ hotkey_handle = ACPI_HANDLE(&pdev->dev);
+
+ if (!hotkey_handle)
+ return -ENODEV;
+
+ return 0;
+}
+
+static const struct acpi_device_id loongson_device_ids[] = {
+ {LOONGSON_ACPI_HKEY_HID, 0},
+ {"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, loongson_device_ids);
+
+static struct platform_driver loongson_hotkey_driver = {
+ .probe = loongson_hotkey_probe,
+ .driver = {
+ .name = "loongson-hotkey",
+ .owner = THIS_MODULE,
+ .pm = pm_ptr(&loongson_hotkey_pm),
+ .acpi_match_table = loongson_device_ids,
+ },
+};
+
+static int hotkey_map(void)
+{
+ u32 index;
+ acpi_status status;
+ struct acpi_buffer buf;
+ union acpi_object *pack;
+
+ buf.length = ACPI_ALLOCATE_BUFFER;
+ status = acpi_evaluate_object_typed(hotkey_handle, "KMAP", NULL, &buf, ACPI_TYPE_PACKAGE);
+ if (status != AE_OK) {
+ pr_err("ACPI exception: %s\n", acpi_format_exception(status));
+ return -1;
+ }
+ pack = buf.pointer;
+ for (index = 0; index < pack->package.count; index++) {
+ union acpi_object *element, *sub_pack;
+
+ sub_pack = &pack->package.elements[index];
+
+ element = &sub_pack->package.elements[0];
+ hotkey_keycode_map[index].type = element->integer.value;
+ element = &sub_pack->package.elements[1];
+ hotkey_keycode_map[index].code = element->integer.value;
+ element = &sub_pack->package.elements[2];
+ hotkey_keycode_map[index].keycode = element->integer.value;
+ }
+
+ return 0;
+}
+
+static int hotkey_backlight_set(bool enable)
+{
+ if (!acpi_evalf(hotkey_handle, NULL, "VCBL", "vd", enable ? 1 : 0))
+ return -EIO;
+
+ return 0;
+}
+
+static int ec_get_brightness(void)
+{
+ int status = 0;
+
+ if (!hotkey_handle)
+ return -ENXIO;
+
+ if (!acpi_evalf(hotkey_handle, &status, "ECBG", "d"))
+ return -EIO;
+
+ return status;
+}
+
+static int ec_set_brightness(int level)
+{
+
+ int ret = 0;
+
+ if (!hotkey_handle)
+ return -ENXIO;
+
+ if (!acpi_evalf(hotkey_handle, NULL, "ECBS", "vd", level))
+ ret = -EIO;
+
+ return ret;
+}
+
+static int ec_backlight_level(u8 level)
+{
+ int status = 0;
+
+ if (!hotkey_handle)
+ return -ENXIO;
+
+ if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d"))
+ return -EIO;
+
+ if ((status < 0) || (level > status))
+ return status;
+
+ if (!acpi_evalf(hotkey_handle, &status, "ECSL", "d"))
+ return -EIO;
+
+ if ((status < 0) || (level < status))
+ return status;
+
+ return level;
+}
+
+static int loongson_laptop_backlight_update(struct backlight_device *bd)
+{
+ int lvl = ec_backlight_level(bd->props.brightness);
+
+ if (lvl < 0)
+ return -EIO;
+ if (ec_set_brightness(lvl))
+ return -EIO;
+
+ return 0;
+}
+
+static int loongson_laptop_get_brightness(struct backlight_device *bd)
+{
+ int level;
+
+ level = ec_get_brightness();
+ if (level < 0)
+ return -EIO;
+
+ return level;
+}
+
+static const struct backlight_ops backlight_laptop_ops = {
+ .update_status = loongson_laptop_backlight_update,
+ .get_brightness = loongson_laptop_get_brightness,
+};
+
+static int laptop_backlight_register(void)
+{
+ int status = 0;
+ struct backlight_properties props;
+
+ memset(&props, 0, sizeof(props));
+
+ if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d"))
+ return -EIO;
+
+ props.brightness = 1;
+ props.max_brightness = status;
+ props.type = BACKLIGHT_PLATFORM;
+
+ backlight_device_register("loongson_laptop",
+ NULL, NULL, &backlight_laptop_ops, &props);
+
+ return 0;
+}
+
+int loongson_laptop_turn_on_backlight(void)
+{
+ int status;
+ union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+ struct acpi_object_list args = { 1, &arg0 };
+
+ arg0.integer.value = 1;
+ status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL);
+ if (ACPI_FAILURE(status)) {
+ pr_info("Loongson lvds error: 0x%x\n", status);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+int loongson_laptop_turn_off_backlight(void)
+{
+ int status;
+ union acpi_object arg0 = { ACPI_TYPE_INTEGER };
+ struct acpi_object_list args = { 1, &arg0 };
+
+ arg0.integer.value = 0;
+ status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL);
+ if (ACPI_FAILURE(status)) {
+ pr_info("Loongson lvds error: 0x%x\n", status);
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+static int __init event_init(struct generic_sub_driver *sub_driver)
+{
+ int ret;
+
+ ret = hotkey_map();
+ if (ret < 0) {
+ pr_err("Failed to parse keymap from DSDT\n");
+ return ret;
+ }
+
+ ret = sparse_keymap_setup(generic_inputdev, hotkey_keycode_map, NULL);
+ if (ret < 0) {
+ pr_err("Failed to setup input device keymap\n");
+ input_free_device(generic_inputdev);
+
+ return ret;
+ }
+
+ /*
+ * This hotkey driver handle backlight event when
+ * acpi_video_get_backlight_type() gets acpi_backlight_vendor
+ */
+ if (acpi_video_get_backlight_type() == acpi_backlight_vendor)
+ hotkey_backlight_set(true);
+ else
+ hotkey_backlight_set(false);
+
+ pr_info("ACPI: enabling firmware HKEY event interface...\n");
+
+ return ret;
+}
+
+static void event_notify(struct generic_sub_driver *sub_driver, u32 event)
+{
+ int type, scan_code;
+ struct key_entry *ke = NULL;
+
+ scan_code = event & GENERIC_EVENT_CODE_MASK;
+ type = (event & GENERIC_EVENT_TYPE_MASK) >> GENERIC_EVENT_TYPE_OFF;
+ ke = sparse_keymap_entry_from_scancode(generic_inputdev, scan_code);
+ if (ke) {
+ if (type == KE_SW) {
+ int status = 0;
+
+ if (hotkey_status_get(&status) < 0)
+ return;
+
+ ke->sw.value = !!(status & (1 << ke->sw.code));
+ }
+ sparse_keymap_report_entry(generic_inputdev, ke, 1, true);
+ }
+}
+
+/* 3. Infrastructure */
+
+static void generic_subdriver_exit(struct generic_sub_driver *sub_driver);
+
+static int __init generic_subdriver_init(struct generic_sub_driver *sub_driver)
+{
+ int ret;
+
+ if (!sub_driver || !sub_driver->driver)
+ return -EINVAL;
+
+ ret = platform_driver_register(sub_driver->driver);
+ if (ret)
+ return -EINVAL;
+
+ if (sub_driver->init)
+ sub_driver->init(sub_driver);
+
+ if (sub_driver->notify) {
+ ret = setup_acpi_notify(sub_driver);
+ if (ret == -ENODEV) {
+ ret = 0;
+ goto err_out;
+ }
+ if (ret < 0)
+ goto err_out;
+ }
+
+ return 0;
+
+err_out:
+ generic_subdriver_exit(sub_driver);
+ return (ret < 0) ? ret : 0;
+}
+
+static void generic_subdriver_exit(struct generic_sub_driver *sub_driver)
+{
+
+ if (sub_driver->acpi_notify_installed) {
+ acpi_remove_notify_handler(*sub_driver->handle,
+ sub_driver->type, dispatch_acpi_notify);
+ sub_driver->acpi_notify_installed = 0;
+ }
+ platform_driver_unregister(sub_driver->driver);
+}
+
+static struct generic_sub_driver generic_sub_drivers[] __refdata = {
+ {
+ .name = "hotkey",
+ .init = event_init,
+ .notify = event_notify,
+ .handle = &hotkey_handle,
+ .type = ACPI_DEVICE_NOTIFY,
+ .driver = &loongson_hotkey_driver,
+ },
+};
+
+static int __init generic_acpi_laptop_init(void)
+{
+ bool ec_found;
+ int i, ret, status;
+
+ if (acpi_disabled)
+ return -ENODEV;
+
+ /* The EC device is required */
+ ec_found = acpi_dev_found(LOONGSON_ACPI_EC_HID);
+ if (!ec_found)
+ return -ENODEV;
+
+ /* Enable SCI for EC */
+ acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1);
+
+ generic_inputdev = input_allocate_device();
+ if (!generic_inputdev) {
+ pr_err("Unable to allocate input device\n");
+ return -ENOMEM;
+ }
+
+ /* Prepare input device, but don't register */
+ generic_inputdev->name =
+ "Loongson Generic Laptop/All-in-One Extra Buttons";
+ generic_inputdev->phys = ACPI_LAPTOP_NAME "/input0";
+ generic_inputdev->id.bustype = BUS_HOST;
+ generic_inputdev->dev.parent = NULL;
+
+ /* Init subdrivers */
+ for (i = 0; i < ARRAY_SIZE(generic_sub_drivers); i++) {
+ ret = generic_subdriver_init(&generic_sub_drivers[i]);
+ if (ret < 0) {
+ input_free_device(generic_inputdev);
+ while (--i >= 0)
+ generic_subdriver_exit(&generic_sub_drivers[i]);
+ return ret;
+ }
+ }
+
+ ret = input_register_device(generic_inputdev);
+ if (ret < 0) {
+ input_free_device(generic_inputdev);
+ while (--i >= 0)
+ generic_subdriver_exit(&generic_sub_drivers[i]);
+ pr_err("Unable to register input device\n");
+ return ret;
+ }
+
+ input_device_registered = 1;
+
+ if (acpi_evalf(hotkey_handle, &status, "ECBG", "d")) {
+ pr_info("Loongson Laptop used, init brightness is 0x%x\n", status);
+ ret = laptop_backlight_register();
+ if (ret < 0)
+ pr_err("Loongson Laptop: laptop-backlight device register failed\n");
+ }
+
+ return 0;
+}
+
+static void __exit generic_acpi_laptop_exit(void)
+{
+ if (generic_inputdev) {
+ if (input_device_registered)
+ input_unregister_device(generic_inputdev);
+ else
+ input_free_device(generic_inputdev);
+ }
+}
+
+module_init(generic_acpi_laptop_init);
+module_exit(generic_acpi_laptop_exit);
+
+MODULE_AUTHOR("Jianmin Lv <lvjianmin@loongson.cn>");
+MODULE_AUTHOR("Huacai Chen <chenhuacai@loongson.cn>");
+MODULE_DESCRIPTION("Loongson Laptop/All-in-One ACPI Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/platform/x86/intel/int0002_vgpio.c b/drivers/platform/x86/intel/int0002_vgpio.c
index 617dbf98980e..97cfbc520a02 100644
--- a/drivers/platform/x86/intel/int0002_vgpio.c
+++ b/drivers/platform/x86/intel/int0002_vgpio.c
@@ -125,8 +125,7 @@ static irqreturn_t int0002_irq(int irq, void *data)
if (!(gpe_sts_reg & GPE0A_PME_B0_STS_BIT))
return IRQ_NONE;
- generic_handle_irq(irq_find_mapping(chip->irq.domain,
- GPE0A_PME_B0_VIRT_GPIO_PIN));
+ generic_handle_domain_irq_safe(chip->irq.domain, GPE0A_PME_B0_VIRT_GPIO_PIN);
pm_wakeup_hard_event(chip->parent);
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 86d9e428357b..7f5402fe857a 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -12,7 +12,6 @@
#include <linux/module.h>
#include <linux/init.h>
-#include <linux/device.h>
#include <linux/slab.h>
#include <linux/mdev.h>
@@ -142,7 +141,6 @@ static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch)
INIT_LIST_HEAD(&private->crw);
INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
- atomic_set(&private->avail, 1);
private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
GFP_KERNEL);
@@ -203,7 +201,6 @@ static void vfio_ccw_free_private(struct vfio_ccw_private *private)
mutex_destroy(&private->io_mutex);
kfree(private);
}
-
static int vfio_ccw_sch_probe(struct subchannel *sch)
{
struct pmcw *pmcw = &sch->schib.pmcw;
@@ -222,7 +219,12 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
dev_set_drvdata(&sch->dev, private);
- ret = mdev_register_device(&sch->dev, &vfio_ccw_mdev_driver);
+ private->mdev_type.sysfs_name = "io";
+ private->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)";
+ private->mdev_types[0] = &private->mdev_type;
+ ret = mdev_register_parent(&private->parent, &sch->dev,
+ &vfio_ccw_mdev_driver,
+ private->mdev_types, 1);
if (ret)
goto out_free;
@@ -241,7 +243,7 @@ static void vfio_ccw_sch_remove(struct subchannel *sch)
{
struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
- mdev_unregister_device(&sch->dev);
+ mdev_unregister_parent(&private->parent);
dev_set_drvdata(&sch->dev, NULL);
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 4a806a2273b5..6ae4d012d800 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -11,7 +11,6 @@
*/
#include <linux/vfio.h>
-#include <linux/mdev.h>
#include <linux/nospec.h>
#include <linux/slab.h>
@@ -45,47 +44,14 @@ static void vfio_ccw_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length)
vfio_ccw_mdev_reset(private);
}
-static ssize_t name_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf)
-{
- return sprintf(buf, "I/O subchannel (Non-QDIO)\n");
-}
-static MDEV_TYPE_ATTR_RO(name);
-
-static ssize_t device_api_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf)
-{
- return sprintf(buf, "%s\n", VFIO_DEVICE_API_CCW_STRING);
-}
-static MDEV_TYPE_ATTR_RO(device_api);
-
-static ssize_t available_instances_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr,
- char *buf)
+static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev)
{
struct vfio_ccw_private *private =
- dev_get_drvdata(mtype_get_parent_dev(mtype));
+ container_of(vdev, struct vfio_ccw_private, vdev);
- return sprintf(buf, "%d\n", atomic_read(&private->avail));
+ init_completion(&private->release_comp);
+ return 0;
}
-static MDEV_TYPE_ATTR_RO(available_instances);
-
-static struct attribute *mdev_types_attrs[] = {
- &mdev_type_attr_name.attr,
- &mdev_type_attr_device_api.attr,
- &mdev_type_attr_available_instances.attr,
- NULL,
-};
-
-static struct attribute_group mdev_type_group = {
- .name = "io",
- .attrs = mdev_types_attrs,
-};
-
-static struct attribute_group *mdev_type_groups[] = {
- &mdev_type_group,
- NULL,
-};
static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
{
@@ -95,12 +61,9 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
if (private->state == VFIO_CCW_STATE_NOT_OPER)
return -ENODEV;
- if (atomic_dec_if_positive(&private->avail) < 0)
- return -EPERM;
-
- memset(&private->vdev, 0, sizeof(private->vdev));
- vfio_init_group_dev(&private->vdev, &mdev->dev,
- &vfio_ccw_dev_ops);
+ ret = vfio_init_device(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops);
+ if (ret)
+ return ret;
VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n",
private->sch->schid.cssid,
@@ -109,16 +72,32 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
ret = vfio_register_emulated_iommu_dev(&private->vdev);
if (ret)
- goto err_atomic;
+ goto err_put_vdev;
dev_set_drvdata(&mdev->dev, private);
return 0;
-err_atomic:
- vfio_uninit_group_dev(&private->vdev);
- atomic_inc(&private->avail);
+err_put_vdev:
+ vfio_put_device(&private->vdev);
return ret;
}
+static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev)
+{
+ struct vfio_ccw_private *private =
+ container_of(vdev, struct vfio_ccw_private, vdev);
+
+ /*
+ * We cannot free vfio_ccw_private here because it includes
+ * parent info which must be free'ed by css driver.
+ *
+ * Use a workaround by memset'ing the core device part and
+ * then notifying the remove path that all active references
+ * to this device have been released.
+ */
+ memset(vdev, 0, sizeof(*vdev));
+ complete(&private->release_comp);
+}
+
static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
{
struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent);
@@ -130,8 +109,16 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
vfio_unregister_group_dev(&private->vdev);
- vfio_uninit_group_dev(&private->vdev);
- atomic_inc(&private->avail);
+ vfio_put_device(&private->vdev);
+ /*
+ * Wait for all active references on mdev are released so it
+ * is safe to defer kfree() to a later point.
+ *
+ * TODO: the clean fix is to split parent/mdev info from ccw
+ * private structure so each can be managed in its own life
+ * cycle.
+ */
+ wait_for_completion(&private->release_comp);
}
static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
@@ -592,6 +579,8 @@ static void vfio_ccw_mdev_request(struct vfio_device *vdev, unsigned int count)
}
static const struct vfio_device_ops vfio_ccw_dev_ops = {
+ .init = vfio_ccw_mdev_init_dev,
+ .release = vfio_ccw_mdev_release_dev,
.open_device = vfio_ccw_mdev_open_device,
.close_device = vfio_ccw_mdev_close_device,
.read = vfio_ccw_mdev_read,
@@ -602,6 +591,8 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
};
struct mdev_driver vfio_ccw_mdev_driver = {
+ .device_api = VFIO_DEVICE_API_CCW_STRING,
+ .max_instances = 1,
.driver = {
.name = "vfio_ccw_mdev",
.owner = THIS_MODULE,
@@ -609,5 +600,4 @@ struct mdev_driver vfio_ccw_mdev_driver = {
},
.probe = vfio_ccw_mdev_probe,
.remove = vfio_ccw_mdev_remove,
- .supported_type_groups = mdev_type_groups,
};
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index cd24b7fada91..bd5fb81456af 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -18,6 +18,7 @@
#include <linux/workqueue.h>
#include <linux/vfio_ccw.h>
#include <linux/vfio.h>
+#include <linux/mdev.h>
#include <asm/crw.h>
#include <asm/debug.h>
@@ -72,7 +73,6 @@ struct vfio_ccw_crw {
* @sch: pointer to the subchannel
* @state: internal state of the device
* @completion: synchronization helper of the I/O completion
- * @avail: available for creating a mediated device
* @io_region: MMIO region to input/output I/O arguments/results
* @io_mutex: protect against concurrent update of I/O regions
* @region: additional regions for other subchannel operations
@@ -88,13 +88,14 @@ struct vfio_ccw_crw {
* @req_trigger: eventfd ctx for signaling userspace to return device
* @io_work: work for deferral process of I/O handling
* @crw_work: work for deferral process of CRW handling
+ * @release_comp: synchronization helper for vfio device release
+ * @parent: parent data structures for mdevs created
*/
struct vfio_ccw_private {
struct vfio_device vdev;
struct subchannel *sch;
int state;
struct completion *completion;
- atomic_t avail;
struct ccw_io_region *io_region;
struct mutex io_mutex;
struct vfio_ccw_region *region;
@@ -113,6 +114,12 @@ struct vfio_ccw_private {
struct eventfd_ctx *req_trigger;
struct work_struct io_work;
struct work_struct crw_work;
+
+ struct completion release_comp;
+
+ struct mdev_parent parent;
+ struct mdev_type mdev_type;
+ struct mdev_type *mdev_types[1];
} __aligned(8);
int vfio_ccw_sch_quiesce(struct subchannel *sch);
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index ee82207b4e60..0b4cc8c597ae 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -684,42 +684,41 @@ static bool vfio_ap_mdev_filter_matrix(unsigned long *apm, unsigned long *aqm,
AP_DOMAINS);
}
-static int vfio_ap_mdev_probe(struct mdev_device *mdev)
+static int vfio_ap_mdev_init_dev(struct vfio_device *vdev)
{
- struct ap_matrix_mdev *matrix_mdev;
- int ret;
-
- if ((atomic_dec_if_positive(&matrix_dev->available_instances) < 0))
- return -EPERM;
-
- matrix_mdev = kzalloc(sizeof(*matrix_mdev), GFP_KERNEL);
- if (!matrix_mdev) {
- ret = -ENOMEM;
- goto err_dec_available;
- }
- vfio_init_group_dev(&matrix_mdev->vdev, &mdev->dev,
- &vfio_ap_matrix_dev_ops);
+ struct ap_matrix_mdev *matrix_mdev =
+ container_of(vdev, struct ap_matrix_mdev, vdev);
- matrix_mdev->mdev = mdev;
+ matrix_mdev->mdev = to_mdev_device(vdev->dev);
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
matrix_mdev->pqap_hook = handle_pqap;
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb);
hash_init(matrix_mdev->qtable.queues);
+ return 0;
+}
+
+static int vfio_ap_mdev_probe(struct mdev_device *mdev)
+{
+ struct ap_matrix_mdev *matrix_mdev;
+ int ret;
+
+ matrix_mdev = vfio_alloc_device(ap_matrix_mdev, vdev, &mdev->dev,
+ &vfio_ap_matrix_dev_ops);
+ if (IS_ERR(matrix_mdev))
+ return PTR_ERR(matrix_mdev);
+
ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev);
if (ret)
- goto err_list;
+ goto err_put_vdev;
dev_set_drvdata(&mdev->dev, matrix_mdev);
mutex_lock(&matrix_dev->mdevs_lock);
list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
mutex_unlock(&matrix_dev->mdevs_lock);
return 0;
-err_list:
- vfio_uninit_group_dev(&matrix_mdev->vdev);
- kfree(matrix_mdev);
-err_dec_available:
- atomic_inc(&matrix_dev->available_instances);
+err_put_vdev:
+ vfio_put_device(&matrix_mdev->vdev);
return ret;
}
@@ -766,6 +765,11 @@ static void vfio_ap_mdev_unlink_fr_queues(struct ap_matrix_mdev *matrix_mdev)
}
}
+static void vfio_ap_mdev_release_dev(struct vfio_device *vdev)
+{
+ vfio_free_device(vdev);
+}
+
static void vfio_ap_mdev_remove(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
@@ -779,54 +783,9 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev)
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->mdevs_lock);
mutex_unlock(&matrix_dev->guests_lock);
- vfio_uninit_group_dev(&matrix_mdev->vdev);
- kfree(matrix_mdev);
- atomic_inc(&matrix_dev->available_instances);
+ vfio_put_device(&matrix_mdev->vdev);
}
-static ssize_t name_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf)
-{
- return sprintf(buf, "%s\n", VFIO_AP_MDEV_NAME_HWVIRT);
-}
-
-static MDEV_TYPE_ATTR_RO(name);
-
-static ssize_t available_instances_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr,
- char *buf)
-{
- return sprintf(buf, "%d\n",
- atomic_read(&matrix_dev->available_instances));
-}
-
-static MDEV_TYPE_ATTR_RO(available_instances);
-
-static ssize_t device_api_show(struct mdev_type *mtype,
- struct mdev_type_attribute *attr, char *buf)
-{
- return sprintf(buf, "%s\n", VFIO_DEVICE_API_AP_STRING);
-}
-
-static MDEV_TYPE_ATTR_RO(device_api);
-
-static struct attribute *vfio_ap_mdev_type_attrs[] = {
- &mdev_type_attr_name.attr,
- &mdev_type_attr_device_api.attr,
- &mdev_type_attr_available_instances.attr,
- NULL,
-};
-
-static struct attribute_group vfio_ap_mdev_hwvirt_type_group = {
- .name = VFIO_AP_MDEV_TYPE_HWVIRT,
- .attrs = vfio_ap_mdev_type_attrs,
-};
-
-static struct attribute_group *vfio_ap_mdev_type_groups[] = {
- &vfio_ap_mdev_hwvirt_type_group,
- NULL,
-};
-
#define MDEV_SHARING_ERR "Userspace may not re-assign queue %02lx.%04lx " \
"already assigned to %s"
@@ -1824,6 +1783,8 @@ static const struct attribute_group vfio_queue_attr_group = {
};
static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
+ .init = vfio_ap_mdev_init_dev,
+ .release = vfio_ap_mdev_release_dev,
.open_device = vfio_ap_mdev_open_device,
.close_device = vfio_ap_mdev_close_device,
.ioctl = vfio_ap_mdev_ioctl,
@@ -1831,6 +1792,8 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
};
static struct mdev_driver vfio_ap_matrix_driver = {
+ .device_api = VFIO_DEVICE_API_AP_STRING,
+ .max_instances = MAX_ZDEV_ENTRIES_EXT,
.driver = {
.name = "vfio_ap_mdev",
.owner = THIS_MODULE,
@@ -1839,20 +1802,22 @@ static struct mdev_driver vfio_ap_matrix_driver = {
},
.probe = vfio_ap_mdev_probe,
.remove = vfio_ap_mdev_remove,
- .supported_type_groups = vfio_ap_mdev_type_groups,
};
int vfio_ap_mdev_register(void)
{
int ret;
- atomic_set(&matrix_dev->available_instances, MAX_ZDEV_ENTRIES_EXT);
-
ret = mdev_register_driver(&vfio_ap_matrix_driver);
if (ret)
return ret;
- ret = mdev_register_device(&matrix_dev->device, &vfio_ap_matrix_driver);
+ matrix_dev->mdev_type.sysfs_name = VFIO_AP_MDEV_TYPE_HWVIRT;
+ matrix_dev->mdev_type.pretty_name = VFIO_AP_MDEV_NAME_HWVIRT;
+ matrix_dev->mdev_types[0] = &matrix_dev->mdev_type;
+ ret = mdev_register_parent(&matrix_dev->parent, &matrix_dev->device,
+ &vfio_ap_matrix_driver,
+ matrix_dev->mdev_types, 1);
if (ret)
goto err_driver;
return 0;
@@ -1864,7 +1829,7 @@ err_driver:
void vfio_ap_mdev_unregister(void)
{
- mdev_unregister_device(&matrix_dev->device);
+ mdev_unregister_parent(&matrix_dev->parent);
mdev_unregister_driver(&vfio_ap_matrix_driver);
}
diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h
index d782cf463eab..2eddd5f34ed3 100644
--- a/drivers/s390/crypto/vfio_ap_private.h
+++ b/drivers/s390/crypto/vfio_ap_private.h
@@ -13,7 +13,6 @@
#define _VFIO_AP_PRIVATE_H_
#include <linux/types.h>
-#include <linux/device.h>
#include <linux/mdev.h>
#include <linux/delay.h>
#include <linux/mutex.h>
@@ -30,7 +29,6 @@
* struct ap_matrix_dev - Contains the data for the matrix device.
*
* @device: generic device structure associated with the AP matrix device
- * @available_instances: number of mediated matrix devices that can be created
* @info: the struct containing the output from the PQAP(QCI) instruction
* @mdev_list: the list of mediated matrix devices created
* @mdevs_lock: mutex for locking the AP matrix device. This lock will be
@@ -47,12 +45,14 @@
*/
struct ap_matrix_dev {
struct device device;
- atomic_t available_instances;
struct ap_config_info info;
struct list_head mdev_list;
struct mutex mdevs_lock; /* serializes access to each ap_matrix_mdev */
struct ap_driver *vfio_ap_drv;
struct mutex guests_lock; /* serializes access to each KVM guest */
+ struct mdev_parent parent;
+ struct mdev_type mdev_type;
+ struct mdev_type *mdev_types[];
};
extern struct ap_matrix_dev *matrix_dev;
diff --git a/drivers/ssb/driver_gpio.c b/drivers/ssb/driver_gpio.c
index 2de3896489c8..897cb8db5084 100644
--- a/drivers/ssb/driver_gpio.c
+++ b/drivers/ssb/driver_gpio.c
@@ -132,7 +132,8 @@ static irqreturn_t ssb_gpio_irq_chipco_handler(int irq, void *dev_id)
return IRQ_NONE;
for_each_set_bit(gpio, &irqs, bus->gpio.ngpio)
- generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio));
+ generic_handle_domain_irq_safe(bus->irq_domain, gpio);
+
ssb_chipco_gpio_polarity(chipco, irqs, val & irqs);
return IRQ_HANDLED;
@@ -330,7 +331,8 @@ static irqreturn_t ssb_gpio_irq_extif_handler(int irq, void *dev_id)
return IRQ_NONE;
for_each_set_bit(gpio, &irqs, bus->gpio.ngpio)
- generic_handle_irq(ssb_gpio_to_irq(&bus->gpio, gpio));
+ generic_handle_domain_irq_safe(bus->irq_domain, gpio);
+
ssb_extif_gpio_polarity(extif, irqs, val & irqs);
return IRQ_HANDLED;
diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 6130d00252ed..86c381ceb9a1 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -3,6 +3,7 @@ menuconfig VFIO
tristate "VFIO Non-Privileged userspace driver framework"
select IOMMU_API
select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+ select INTERVAL_TREE
help
VFIO provides a framework for secure userspace device drivers.
See Documentation/driver-api/vfio.rst for more details.
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 1a32357592e3..b693a1169286 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,9 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
vfio_virqfd-y := virqfd.o
-vfio-y += vfio_main.o
-
obj-$(CONFIG_VFIO) += vfio.o
+
+vfio-y += vfio_main.o \
+ iova_bitmap.o \
+ container.o
+
obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
new file mode 100644
index 000000000000..d74164abbf40
--- /dev/null
+++ b/drivers/vfio/container.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
+ *
+ * VFIO container (/dev/vfio/vfio)
+ */
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/iommu.h>
+#include <linux/miscdevice.h>
+#include <linux/vfio.h>
+#include <uapi/linux/vfio.h>
+
+#include "vfio.h"
+
+struct vfio_container {
+ struct kref kref;
+ struct list_head group_list;
+ struct rw_semaphore group_lock;
+ struct vfio_iommu_driver *iommu_driver;
+ void *iommu_data;
+ bool noiommu;
+};
+
+static struct vfio {
+ struct list_head iommu_drivers_list;
+ struct mutex iommu_drivers_lock;
+} vfio;
+
+#ifdef CONFIG_VFIO_NOIOMMU
+bool vfio_noiommu __read_mostly;
+module_param_named(enable_unsafe_noiommu_mode,
+ vfio_noiommu, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
+#endif
+
+static void *vfio_noiommu_open(unsigned long arg)
+{
+ if (arg != VFIO_NOIOMMU_IOMMU)
+ return ERR_PTR(-EINVAL);
+ if (!capable(CAP_SYS_RAWIO))
+ return ERR_PTR(-EPERM);
+
+ return NULL;
+}
+
+static void vfio_noiommu_release(void *iommu_data)
+{
+}
+
+static long vfio_noiommu_ioctl(void *iommu_data,
+ unsigned int cmd, unsigned long arg)
+{
+ if (cmd == VFIO_CHECK_EXTENSION)
+ return vfio_noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
+
+ return -ENOTTY;
+}
+
+static int vfio_noiommu_attach_group(void *iommu_data,
+ struct iommu_group *iommu_group, enum vfio_group_type type)
+{
+ return 0;
+}
+
+static void vfio_noiommu_detach_group(void *iommu_data,
+ struct iommu_group *iommu_group)
+{
+}
+
+static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
+ .name = "vfio-noiommu",
+ .owner = THIS_MODULE,
+ .open = vfio_noiommu_open,
+ .release = vfio_noiommu_release,
+ .ioctl = vfio_noiommu_ioctl,
+ .attach_group = vfio_noiommu_attach_group,
+ .detach_group = vfio_noiommu_detach_group,
+};
+
+/*
+ * Only noiommu containers can use vfio-noiommu and noiommu containers can only
+ * use vfio-noiommu.
+ */
+static bool vfio_iommu_driver_allowed(struct vfio_container *container,
+ const struct vfio_iommu_driver *driver)
+{
+ if (!IS_ENABLED(CONFIG_VFIO_NOIOMMU))
+ return true;
+ return container->noiommu == (driver->ops == &vfio_noiommu_ops);
+}
+
+/*
+ * IOMMU driver registration
+ */
+int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
+{
+ struct vfio_iommu_driver *driver, *tmp;
+
+ if (WARN_ON(!ops->register_device != !ops->unregister_device))
+ return -EINVAL;
+
+ driver = kzalloc(sizeof(*driver), GFP_KERNEL);
+ if (!driver)
+ return -ENOMEM;
+
+ driver->ops = ops;
+
+ mutex_lock(&vfio.iommu_drivers_lock);
+
+ /* Check for duplicates */
+ list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
+ if (tmp->ops == ops) {
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ kfree(driver);
+ return -EINVAL;
+ }
+ }
+
+ list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
+
+ mutex_unlock(&vfio.iommu_drivers_lock);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
+
+void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
+{
+ struct vfio_iommu_driver *driver;
+
+ mutex_lock(&vfio.iommu_drivers_lock);
+ list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
+ if (driver->ops == ops) {
+ list_del(&driver->vfio_next);
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ kfree(driver);
+ return;
+ }
+ }
+ mutex_unlock(&vfio.iommu_drivers_lock);
+}
+EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
+
+/*
+ * Container objects - containers are created when /dev/vfio/vfio is
+ * opened, but their lifecycle extends until the last user is done, so
+ * it's freed via kref. Must support container/group/device being
+ * closed in any order.
+ */
+static void vfio_container_release(struct kref *kref)
+{
+ struct vfio_container *container;
+ container = container_of(kref, struct vfio_container, kref);
+
+ kfree(container);
+}
+
+static void vfio_container_get(struct vfio_container *container)
+{
+ kref_get(&container->kref);
+}
+
+static void vfio_container_put(struct vfio_container *container)
+{
+ kref_put(&container->kref, vfio_container_release);
+}
+
+void vfio_device_container_register(struct vfio_device *device)
+{
+ struct vfio_iommu_driver *iommu_driver =
+ device->group->container->iommu_driver;
+
+ if (iommu_driver && iommu_driver->ops->register_device)
+ iommu_driver->ops->register_device(
+ device->group->container->iommu_data, device);
+}
+
+void vfio_device_container_unregister(struct vfio_device *device)
+{
+ struct vfio_iommu_driver *iommu_driver =
+ device->group->container->iommu_driver;
+
+ if (iommu_driver && iommu_driver->ops->unregister_device)
+ iommu_driver->ops->unregister_device(
+ device->group->container->iommu_data, device);
+}
+
+long vfio_container_ioctl_check_extension(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct vfio_iommu_driver *driver;
+ long ret = 0;
+
+ down_read(&container->group_lock);
+
+ driver = container->iommu_driver;
+
+ switch (arg) {
+ /* No base extensions yet */
+ default:
+ /*
+ * If no driver is set, poll all registered drivers for
+ * extensions and return the first positive result. If
+ * a driver is already set, further queries will be passed
+ * only to that driver.
+ */
+ if (!driver) {
+ mutex_lock(&vfio.iommu_drivers_lock);
+ list_for_each_entry(driver, &vfio.iommu_drivers_list,
+ vfio_next) {
+
+ if (!list_empty(&container->group_list) &&
+ !vfio_iommu_driver_allowed(container,
+ driver))
+ continue;
+ if (!try_module_get(driver->ops->owner))
+ continue;
+
+ ret = driver->ops->ioctl(NULL,
+ VFIO_CHECK_EXTENSION,
+ arg);
+ module_put(driver->ops->owner);
+ if (ret > 0)
+ break;
+ }
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ } else
+ ret = driver->ops->ioctl(container->iommu_data,
+ VFIO_CHECK_EXTENSION, arg);
+ }
+
+ up_read(&container->group_lock);
+
+ return ret;
+}
+
+/* hold write lock on container->group_lock */
+static int __vfio_container_attach_groups(struct vfio_container *container,
+ struct vfio_iommu_driver *driver,
+ void *data)
+{
+ struct vfio_group *group;
+ int ret = -ENODEV;
+
+ list_for_each_entry(group, &container->group_list, container_next) {
+ ret = driver->ops->attach_group(data, group->iommu_group,
+ group->type);
+ if (ret)
+ goto unwind;
+ }
+
+ return ret;
+
+unwind:
+ list_for_each_entry_continue_reverse(group, &container->group_list,
+ container_next) {
+ driver->ops->detach_group(data, group->iommu_group);
+ }
+
+ return ret;
+}
+
+static long vfio_ioctl_set_iommu(struct vfio_container *container,
+ unsigned long arg)
+{
+ struct vfio_iommu_driver *driver;
+ long ret = -ENODEV;
+
+ down_write(&container->group_lock);
+
+ /*
+ * The container is designed to be an unprivileged interface while
+ * the group can be assigned to specific users. Therefore, only by
+ * adding a group to a container does the user get the privilege of
+ * enabling the iommu, which may allocate finite resources. There
+ * is no unset_iommu, but by removing all the groups from a container,
+ * the container is deprivileged and returns to an unset state.
+ */
+ if (list_empty(&container->group_list) || container->iommu_driver) {
+ up_write(&container->group_lock);
+ return -EINVAL;
+ }
+
+ mutex_lock(&vfio.iommu_drivers_lock);
+ list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
+ void *data;
+
+ if (!vfio_iommu_driver_allowed(container, driver))
+ continue;
+ if (!try_module_get(driver->ops->owner))
+ continue;
+
+ /*
+ * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
+ * so test which iommu driver reported support for this
+ * extension and call open on them. We also pass them the
+ * magic, allowing a single driver to support multiple
+ * interfaces if they'd like.
+ */
+ if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
+ module_put(driver->ops->owner);
+ continue;
+ }
+
+ data = driver->ops->open(arg);
+ if (IS_ERR(data)) {
+ ret = PTR_ERR(data);
+ module_put(driver->ops->owner);
+ continue;
+ }
+
+ ret = __vfio_container_attach_groups(container, driver, data);
+ if (ret) {
+ driver->ops->release(data);
+ module_put(driver->ops->owner);
+ continue;
+ }
+
+ container->iommu_driver = driver;
+ container->iommu_data = data;
+ break;
+ }
+
+ mutex_unlock(&vfio.iommu_drivers_lock);
+ up_write(&container->group_lock);
+
+ return ret;
+}
+
+static long vfio_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_container *container = filep->private_data;
+ struct vfio_iommu_driver *driver;
+ void *data;
+ long ret = -EINVAL;
+
+ if (!container)
+ return ret;
+
+ switch (cmd) {
+ case VFIO_GET_API_VERSION:
+ ret = VFIO_API_VERSION;
+ break;
+ case VFIO_CHECK_EXTENSION:
+ ret = vfio_container_ioctl_check_extension(container, arg);
+ break;
+ case VFIO_SET_IOMMU:
+ ret = vfio_ioctl_set_iommu(container, arg);
+ break;
+ default:
+ driver = container->iommu_driver;
+ data = container->iommu_data;
+
+ if (driver) /* passthrough all unrecognized ioctls */
+ ret = driver->ops->ioctl(data, cmd, arg);
+ }
+
+ return ret;
+}
+
+static int vfio_fops_open(struct inode *inode, struct file *filep)
+{
+ struct vfio_container *container;
+
+ container = kzalloc(sizeof(*container), GFP_KERNEL);
+ if (!container)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&container->group_list);
+ init_rwsem(&container->group_lock);
+ kref_init(&container->kref);
+
+ filep->private_data = container;
+
+ return 0;
+}
+
+static int vfio_fops_release(struct inode *inode, struct file *filep)
+{
+ struct vfio_container *container = filep->private_data;
+ struct vfio_iommu_driver *driver = container->iommu_driver;
+
+ if (driver && driver->ops->notify)
+ driver->ops->notify(container->iommu_data,
+ VFIO_IOMMU_CONTAINER_CLOSE);
+
+ filep->private_data = NULL;
+
+ vfio_container_put(container);
+
+ return 0;
+}
+
+static const struct file_operations vfio_fops = {
+ .owner = THIS_MODULE,
+ .open = vfio_fops_open,
+ .release = vfio_fops_release,
+ .unlocked_ioctl = vfio_fops_unl_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+};
+
+struct vfio_container *vfio_container_from_file(struct file *file)
+{
+ struct vfio_container *container;
+
+ /* Sanity check, is this really our fd? */
+ if (file->f_op != &vfio_fops)
+ return NULL;
+
+ container = file->private_data;
+ WARN_ON(!container); /* fget ensures we don't race vfio_release */
+ return container;
+}
+
+static struct miscdevice vfio_dev = {
+ .minor = VFIO_MINOR,
+ .name = "vfio",
+ .fops = &vfio_fops,
+ .nodename = "vfio/vfio",
+ .mode = S_IRUGO | S_IWUGO,
+};
+
+int vfio_container_attach_group(struct vfio_container *container,
+ struct vfio_group *group)
+{
+ struct vfio_iommu_driver *driver;
+ int ret = 0;
+
+ lockdep_assert_held(&group->group_lock);
+
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ down_write(&container->group_lock);
+
+ /* Real groups and fake groups cannot mix */
+ if (!list_empty(&container->group_list) &&
+ container->noiommu != (group->type == VFIO_NO_IOMMU)) {
+ ret = -EPERM;
+ goto out_unlock_container;
+ }
+
+ if (group->type == VFIO_IOMMU) {
+ ret = iommu_group_claim_dma_owner(group->iommu_group, group);
+ if (ret)
+ goto out_unlock_container;
+ }
+
+ driver = container->iommu_driver;
+ if (driver) {
+ ret = driver->ops->attach_group(container->iommu_data,
+ group->iommu_group,
+ group->type);
+ if (ret) {
+ if (group->type == VFIO_IOMMU)
+ iommu_group_release_dma_owner(
+ group->iommu_group);
+ goto out_unlock_container;
+ }
+ }
+
+ group->container = container;
+ group->container_users = 1;
+ container->noiommu = (group->type == VFIO_NO_IOMMU);
+ list_add(&group->container_next, &container->group_list);
+
+ /* Get a reference on the container and mark a user within the group */
+ vfio_container_get(container);
+
+out_unlock_container:
+ up_write(&container->group_lock);
+ return ret;
+}
+
+void vfio_group_detach_container(struct vfio_group *group)
+{
+ struct vfio_container *container = group->container;
+ struct vfio_iommu_driver *driver;
+
+ lockdep_assert_held(&group->group_lock);
+ WARN_ON(group->container_users != 1);
+
+ down_write(&container->group_lock);
+
+ driver = container->iommu_driver;
+ if (driver)
+ driver->ops->detach_group(container->iommu_data,
+ group->iommu_group);
+
+ if (group->type == VFIO_IOMMU)
+ iommu_group_release_dma_owner(group->iommu_group);
+
+ group->container = NULL;
+ group->container_users = 0;
+ list_del(&group->container_next);
+
+ /* Detaching the last group deprivileges a container, remove iommu */
+ if (driver && list_empty(&container->group_list)) {
+ driver->ops->release(container->iommu_data);
+ module_put(driver->ops->owner);
+ container->iommu_driver = NULL;
+ container->iommu_data = NULL;
+ }
+
+ up_write(&container->group_lock);
+
+ vfio_container_put(container);
+}
+
+int vfio_device_assign_container(struct vfio_device *device)
+{
+ struct vfio_group *group = device->group;
+
+ lockdep_assert_held(&group->group_lock);
+
+ if (!group->container || !group->container->iommu_driver ||
+ WARN_ON(!group->container_users))
+ return -EINVAL;
+
+ if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
+ return -EPERM;
+
+ get_file(group->opened_file);
+ group->container_users++;
+ return 0;
+}
+
+void vfio_device_unassign_container(struct vfio_device *device)
+{
+ mutex_lock(&device->group->group_lock);
+ WARN_ON(device->group->container_users <= 1);
+ device->group->container_users--;
+ fput(device->group->opened_file);
+ mutex_unlock(&device->group->group_lock);
+}
+
+/*
+ * Pin contiguous user pages and return their associated host pages for local
+ * domain only.
+ * @device [in] : device
+ * @iova [in] : starting IOVA of user pages to be pinned.
+ * @npage [in] : count of pages to be pinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @prot [in] : protection flags
+ * @pages[out] : array of host pages
+ * Return error or number of pages pinned.
+ *
+ * A driver may only call this function if the vfio_device was created
+ * by vfio_register_emulated_iommu_dev().
+ */
+int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
+ int npage, int prot, struct page **pages)
+{
+ struct vfio_container *container;
+ struct vfio_group *group = device->group;
+ struct vfio_iommu_driver *driver;
+ int ret;
+
+ if (!pages || !npage || !vfio_assert_device_open(device))
+ return -EINVAL;
+
+ if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
+ return -E2BIG;
+
+ /* group->container cannot change while a vfio device is open */
+ container = group->container;
+ driver = container->iommu_driver;
+ if (likely(driver && driver->ops->pin_pages))
+ ret = driver->ops->pin_pages(container->iommu_data,
+ group->iommu_group, iova,
+ npage, prot, pages);
+ else
+ ret = -ENOTTY;
+
+ return ret;
+}
+EXPORT_SYMBOL(vfio_pin_pages);
+
+/*
+ * Unpin contiguous host pages for local domain only.
+ * @device [in] : device
+ * @iova [in] : starting address of user pages to be unpinned.
+ * @npage [in] : count of pages to be unpinned. This count should not
+ * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ */
+void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+{
+ struct vfio_container *container;
+ struct vfio_iommu_driver *driver;
+
+ if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
+ return;
+
+ if (WARN_ON(!vfio_assert_device_open(device)))
+ return;
+
+ /* group->container cannot change while a vfio device is open */
+ container = device->group->container;
+ driver = container->iommu_driver;
+
+ driver->ops->unpin_pages(container->iommu_data, iova, npage);
+}
+EXPORT_SYMBOL(vfio_unpin_pages);
+
+/*
+ * This interface allows the CPUs to perform some sort of virtual DMA on
+ * behalf of the device.
+ *
+ * CPUs read/write from/into a range of IOVAs pointing to user space memory
+ * into/from a kernel buffer.
+ *
+ * As the read/write of user space memory is conducted via the CPUs and is
+ * not a real device DMA, it is not necessary to pin the user space memory.
+ *
+ * @device [in] : VFIO device
+ * @iova [in] : base IOVA of a user space buffer
+ * @data [in] : pointer to kernel buffer
+ * @len [in] : kernel buffer length
+ * @write : indicate read or write
+ * Return error code on failure or 0 on success.
+ */
+int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
+ size_t len, bool write)
+{
+ struct vfio_container *container;
+ struct vfio_iommu_driver *driver;
+ int ret = 0;
+
+ if (!data || len <= 0 || !vfio_assert_device_open(device))
+ return -EINVAL;
+
+ /* group->container cannot change while a vfio device is open */
+ container = device->group->container;
+ driver = container->iommu_driver;
+
+ if (likely(driver && driver->ops->dma_rw))
+ ret = driver->ops->dma_rw(container->iommu_data,
+ iova, data, len, write);
+ else
+ ret = -ENOTTY;
+ return ret;
+}
+EXPORT_SYMBOL(vfio_dma_rw);
+
+int __init vfio_container_init(void)
+{
+ int ret;
+
+ mutex_init(&vfio.iommu_drivers_lock);
+ INIT_LIST_HEAD(&vfio.iommu_drivers_list);
+
+ ret = misc_register(&vfio_dev);
+ if (ret) {
+ pr_err("vfio: misc device register failed\n");
+ return ret;
+ }
+
+ if (IS_ENABLED(CONFIG_VFIO_NOIOMMU)) {
+ ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
+ if (ret)
+ goto err_misc;
+ }
+ return 0;
+
+err_misc:
+ misc_deregister(&vfio_dev);
+ return ret;
+}
+
+void vfio_container_cleanup(void)
+{
+ if (IS_ENABLED(CONFIG_VFIO_NOIOMMU))
+ vfio_unregister_iommu_driver(&vfio_noiommu_ops);
+ misc_deregister(&vfio_dev);
+ mutex_destroy(&vfio.iommu_drivers_lock);
+}
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index 3feff729f3ce..b16874e913e4 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -108,9 +108,9 @@ static void vfio_fsl_mc_close_device(struct vfio_device *core_vdev)
/* reset the device before cleaning up the interrupts */
ret = vfio_fsl_mc_reset_device(vdev);
- if (WARN_ON(ret))
+ if (ret)
dev_warn(&mc_cont->dev,
- "VFIO_FLS_MC: reset device has failed (%d)\n", ret);
+ "VFIO_FSL_MC: reset device has failed (%d)\n", ret);
vfio_fsl_mc_irqs_cleanup(vdev);
@@ -418,16 +418,7 @@ static int vfio_fsl_mc_mmap(struct vfio_device *core_vdev,
return vfio_fsl_mc_mmap_mmio(vdev->regions[index], vma);
}
-static const struct vfio_device_ops vfio_fsl_mc_ops = {
- .name = "vfio-fsl-mc",
- .open_device = vfio_fsl_mc_open_device,
- .close_device = vfio_fsl_mc_close_device,
- .ioctl = vfio_fsl_mc_ioctl,
- .read = vfio_fsl_mc_read,
- .write = vfio_fsl_mc_write,
- .mmap = vfio_fsl_mc_mmap,
-};
-
+static const struct vfio_device_ops vfio_fsl_mc_ops;
static int vfio_fsl_mc_bus_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -518,35 +509,43 @@ static void vfio_fsl_uninit_device(struct vfio_fsl_mc_device *vdev)
bus_unregister_notifier(&fsl_mc_bus_type, &vdev->nb);
}
-static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
+static int vfio_fsl_mc_init_dev(struct vfio_device *core_vdev)
{
- struct vfio_fsl_mc_device *vdev;
- struct device *dev = &mc_dev->dev;
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
+ struct fsl_mc_device *mc_dev = to_fsl_mc_device(core_vdev->dev);
int ret;
- vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
- if (!vdev)
- return -ENOMEM;
-
- vfio_init_group_dev(&vdev->vdev, dev, &vfio_fsl_mc_ops);
vdev->mc_dev = mc_dev;
mutex_init(&vdev->igate);
if (is_fsl_mc_bus_dprc(mc_dev))
- ret = vfio_assign_device_set(&vdev->vdev, &mc_dev->dev);
+ ret = vfio_assign_device_set(core_vdev, &mc_dev->dev);
else
- ret = vfio_assign_device_set(&vdev->vdev, mc_dev->dev.parent);
- if (ret)
- goto out_uninit;
+ ret = vfio_assign_device_set(core_vdev, mc_dev->dev.parent);
- ret = vfio_fsl_mc_init_device(vdev);
if (ret)
- goto out_uninit;
+ return ret;
+
+ /* device_set is released by vfio core if @init fails */
+ return vfio_fsl_mc_init_device(vdev);
+}
+
+static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
+{
+ struct vfio_fsl_mc_device *vdev;
+ struct device *dev = &mc_dev->dev;
+ int ret;
+
+ vdev = vfio_alloc_device(vfio_fsl_mc_device, vdev, dev,
+ &vfio_fsl_mc_ops);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
ret = vfio_register_group_dev(&vdev->vdev);
if (ret) {
dev_err(dev, "VFIO_FSL_MC: Failed to add to vfio group\n");
- goto out_device;
+ goto out_put_vdev;
}
ret = vfio_fsl_mc_scan_container(mc_dev);
@@ -557,30 +556,44 @@ static int vfio_fsl_mc_probe(struct fsl_mc_device *mc_dev)
out_group_dev:
vfio_unregister_group_dev(&vdev->vdev);
-out_device:
- vfio_fsl_uninit_device(vdev);
-out_uninit:
- vfio_uninit_group_dev(&vdev->vdev);
- kfree(vdev);
+out_put_vdev:
+ vfio_put_device(&vdev->vdev);
return ret;
}
+static void vfio_fsl_mc_release_dev(struct vfio_device *core_vdev)
+{
+ struct vfio_fsl_mc_device *vdev =
+ container_of(core_vdev, struct vfio_fsl_mc_device, vdev);
+
+ vfio_fsl_uninit_device(vdev);
+ mutex_destroy(&vdev->igate);
+ vfio_free_device(core_vdev);
+}
+
static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
{
struct device *dev = &mc_dev->dev;
struct vfio_fsl_mc_device *vdev = dev_get_drvdata(dev);
vfio_unregister_group_dev(&vdev->vdev);
- mutex_destroy(&vdev->igate);
-
dprc_remove_devices(mc_dev, NULL, 0);
- vfio_fsl_uninit_device(vdev);
-
- vfio_uninit_group_dev(&vdev->vdev);
- kfree(vdev);
+ vfio_put_device(&vdev->vdev);
return 0;
}
+static const struct vfio_device_ops vfio_fsl_mc_ops = {
+ .name = "vfio-fsl-mc",
+ .init = vfio_fsl_mc_init_dev,
+ .release = vfio_fsl_mc_release_dev,
+ .open_device = vfio_fsl_mc_open_device,
+ .close_device = vfio_fsl_mc_close_device,
+ .ioctl = vfio_fsl_mc_ioctl,
+ .read = vfio_fsl_mc_read,
+ .write = vfio_fsl_mc_write,
+ .mmap = vfio_fsl_mc_mmap,
+};
+
static struct fsl_mc_driver vfio_fsl_mc_driver = {
.probe = vfio_fsl_mc_probe,
.remove = vfio_fsl_mc_remove,
diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c
new file mode 100644
index 000000000000..6631e8befe1b
--- /dev/null
+++ b/drivers/vfio/iova_bitmap.c
@@ -0,0 +1,422 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates.
+ * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved
+ */
+#include <linux/iova_bitmap.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+
+#define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
+
+/*
+ * struct iova_bitmap_map - A bitmap representing an IOVA range
+ *
+ * Main data structure for tracking mapped user pages of bitmap data.
+ *
+ * For example, for something recording dirty IOVAs, it will be provided a
+ * struct iova_bitmap structure, as a general structure for iterating the
+ * total IOVA range. The struct iova_bitmap_map, though, represents the
+ * subset of said IOVA space that is pinned by its parent structure (struct
+ * iova_bitmap).
+ *
+ * The user does not need to exact location of the bits in the bitmap.
+ * From user perspective the only API available is iova_bitmap_set() which
+ * records the IOVA *range* in the bitmap by setting the corresponding
+ * bits.
+ *
+ * The bitmap is an array of u64 whereas each bit represents an IOVA of
+ * range of (1 << pgshift). Thus formula for the bitmap data to be set is:
+ *
+ * data[(iova / page_size) / 64] & (1ULL << (iova % 64))
+ */
+struct iova_bitmap_map {
+ /* base IOVA representing bit 0 of the first page */
+ unsigned long iova;
+
+ /* page size order that each bit granules to */
+ unsigned long pgshift;
+
+ /* page offset of the first user page pinned */
+ unsigned long pgoff;
+
+ /* number of pages pinned */
+ unsigned long npages;
+
+ /* pinned pages representing the bitmap data */
+ struct page **pages;
+};
+
+/*
+ * struct iova_bitmap - The IOVA bitmap object
+ *
+ * Main data structure for iterating over the bitmap data.
+ *
+ * Abstracts the pinning work and iterates in IOVA ranges.
+ * It uses a windowing scheme and pins the bitmap in relatively
+ * big ranges e.g.
+ *
+ * The bitmap object uses one base page to store all the pinned pages
+ * pointers related to the bitmap. For sizeof(struct page*) == 8 it stores
+ * 512 struct page pointers which, if the base page size is 4K, it means
+ * 2M of bitmap data is pinned at a time. If the iova_bitmap page size is
+ * also 4K then the range window to iterate is 64G.
+ *
+ * For example iterating on a total IOVA range of 4G..128G, it will walk
+ * through this set of ranges:
+ *
+ * 4G - 68G-1 (64G)
+ * 68G - 128G-1 (64G)
+ *
+ * An example of the APIs on how to use/iterate over the IOVA bitmap:
+ *
+ * bitmap = iova_bitmap_alloc(iova, length, page_size, data);
+ * if (IS_ERR(bitmap))
+ * return PTR_ERR(bitmap);
+ *
+ * ret = iova_bitmap_for_each(bitmap, arg, dirty_reporter_fn);
+ *
+ * iova_bitmap_free(bitmap);
+ *
+ * Each iteration of the @dirty_reporter_fn is called with a unique @iova
+ * and @length argument, indicating the current range available through the
+ * iova_bitmap. The @dirty_reporter_fn uses iova_bitmap_set() to mark dirty
+ * areas (@iova_length) within that provided range, as following:
+ *
+ * iova_bitmap_set(bitmap, iova, iova_length);
+ *
+ * The internals of the object uses an index @mapped_base_index that indexes
+ * which u64 word of the bitmap is mapped, up to @mapped_total_index.
+ * Those keep being incremented until @mapped_total_index is reached while
+ * mapping up to PAGE_SIZE / sizeof(struct page*) maximum of pages.
+ *
+ * The IOVA bitmap is usually located on what tracks DMA mapped ranges or
+ * some form of IOVA range tracking that co-relates to the user passed
+ * bitmap.
+ */
+struct iova_bitmap {
+ /* IOVA range representing the currently mapped bitmap data */
+ struct iova_bitmap_map mapped;
+
+ /* userspace address of the bitmap */
+ u64 __user *bitmap;
+
+ /* u64 index that @mapped points to */
+ unsigned long mapped_base_index;
+
+ /* how many u64 can we walk in total */
+ unsigned long mapped_total_index;
+
+ /* base IOVA of the whole bitmap */
+ unsigned long iova;
+
+ /* length of the IOVA range for the whole bitmap */
+ size_t length;
+};
+
+/*
+ * Converts a relative IOVA to a bitmap index.
+ * This function provides the index into the u64 array (bitmap::bitmap)
+ * for a given IOVA offset.
+ * Relative IOVA means relative to the bitmap::mapped base IOVA
+ * (stored in mapped::iova). All computations in this file are done using
+ * relative IOVAs and thus avoid an extra subtraction against mapped::iova.
+ * The user API iova_bitmap_set() always uses a regular absolute IOVAs.
+ */
+static unsigned long iova_bitmap_offset_to_index(struct iova_bitmap *bitmap,
+ unsigned long iova)
+{
+ unsigned long pgsize = 1 << bitmap->mapped.pgshift;
+
+ return iova / (BITS_PER_TYPE(*bitmap->bitmap) * pgsize);
+}
+
+/*
+ * Converts a bitmap index to a *relative* IOVA.
+ */
+static unsigned long iova_bitmap_index_to_offset(struct iova_bitmap *bitmap,
+ unsigned long index)
+{
+ unsigned long pgshift = bitmap->mapped.pgshift;
+
+ return (index * BITS_PER_TYPE(*bitmap->bitmap)) << pgshift;
+}
+
+/*
+ * Returns the base IOVA of the mapped range.
+ */
+static unsigned long iova_bitmap_mapped_iova(struct iova_bitmap *bitmap)
+{
+ unsigned long skip = bitmap->mapped_base_index;
+
+ return bitmap->iova + iova_bitmap_index_to_offset(bitmap, skip);
+}
+
+/*
+ * Pins the bitmap user pages for the current range window.
+ * This is internal to IOVA bitmap and called when advancing the
+ * index (@mapped_base_index) or allocating the bitmap.
+ */
+static int iova_bitmap_get(struct iova_bitmap *bitmap)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+ unsigned long npages;
+ u64 __user *addr;
+ long ret;
+
+ /*
+ * @mapped_base_index is the index of the currently mapped u64 words
+ * that we have access. Anything before @mapped_base_index is not
+ * mapped. The range @mapped_base_index .. @mapped_total_index-1 is
+ * mapped but capped at a maximum number of pages.
+ */
+ npages = DIV_ROUND_UP((bitmap->mapped_total_index -
+ bitmap->mapped_base_index) *
+ sizeof(*bitmap->bitmap), PAGE_SIZE);
+
+ /*
+ * We always cap at max number of 'struct page' a base page can fit.
+ * This is, for example, on x86 means 2M of bitmap data max.
+ */
+ npages = min(npages, PAGE_SIZE / sizeof(struct page *));
+
+ /*
+ * Bitmap address to be pinned is calculated via pointer arithmetic
+ * with bitmap u64 word index.
+ */
+ addr = bitmap->bitmap + bitmap->mapped_base_index;
+
+ ret = pin_user_pages_fast((unsigned long)addr, npages,
+ FOLL_WRITE, mapped->pages);
+ if (ret <= 0)
+ return -EFAULT;
+
+ mapped->npages = (unsigned long)ret;
+ /* Base IOVA where @pages point to i.e. bit 0 of the first page */
+ mapped->iova = iova_bitmap_mapped_iova(bitmap);
+
+ /*
+ * offset of the page where pinned pages bit 0 is located.
+ * This handles the case where the bitmap is not PAGE_SIZE
+ * aligned.
+ */
+ mapped->pgoff = offset_in_page(addr);
+ return 0;
+}
+
+/*
+ * Unpins the bitmap user pages and clears @npages
+ * (un)pinning is abstracted from API user and it's done when advancing
+ * the index or freeing the bitmap.
+ */
+static void iova_bitmap_put(struct iova_bitmap *bitmap)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+
+ if (mapped->npages) {
+ unpin_user_pages(mapped->pages, mapped->npages);
+ mapped->npages = 0;
+ }
+}
+
+/**
+ * iova_bitmap_alloc() - Allocates an IOVA bitmap object
+ * @iova: Start address of the IOVA range
+ * @length: Length of the IOVA range
+ * @page_size: Page size of the IOVA bitmap. It defines what each bit
+ * granularity represents
+ * @data: Userspace address of the bitmap
+ *
+ * Allocates an IOVA object and initializes all its fields including the
+ * first user pages of @data.
+ *
+ * Return: A pointer to a newly allocated struct iova_bitmap
+ * or ERR_PTR() on error.
+ */
+struct iova_bitmap *iova_bitmap_alloc(unsigned long iova, size_t length,
+ unsigned long page_size, u64 __user *data)
+{
+ struct iova_bitmap_map *mapped;
+ struct iova_bitmap *bitmap;
+ int rc;
+
+ bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL);
+ if (!bitmap)
+ return ERR_PTR(-ENOMEM);
+
+ mapped = &bitmap->mapped;
+ mapped->pgshift = __ffs(page_size);
+ bitmap->bitmap = data;
+ bitmap->mapped_total_index =
+ iova_bitmap_offset_to_index(bitmap, length - 1) + 1;
+ bitmap->iova = iova;
+ bitmap->length = length;
+ mapped->iova = iova;
+ mapped->pages = (struct page **)__get_free_page(GFP_KERNEL);
+ if (!mapped->pages) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ rc = iova_bitmap_get(bitmap);
+ if (rc)
+ goto err;
+ return bitmap;
+
+err:
+ iova_bitmap_free(bitmap);
+ return ERR_PTR(rc);
+}
+
+/**
+ * iova_bitmap_free() - Frees an IOVA bitmap object
+ * @bitmap: IOVA bitmap to free
+ *
+ * It unpins and releases pages array memory and clears any leftover
+ * state.
+ */
+void iova_bitmap_free(struct iova_bitmap *bitmap)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+
+ iova_bitmap_put(bitmap);
+
+ if (mapped->pages) {
+ free_page((unsigned long)mapped->pages);
+ mapped->pages = NULL;
+ }
+
+ kfree(bitmap);
+}
+
+/*
+ * Returns the remaining bitmap indexes from mapped_total_index to process for
+ * the currently pinned bitmap pages.
+ */
+static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
+{
+ unsigned long remaining;
+
+ remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
+ remaining = min_t(unsigned long, remaining,
+ (bitmap->mapped.npages << PAGE_SHIFT) / sizeof(*bitmap->bitmap));
+
+ return remaining;
+}
+
+/*
+ * Returns the length of the mapped IOVA range.
+ */
+static unsigned long iova_bitmap_mapped_length(struct iova_bitmap *bitmap)
+{
+ unsigned long max_iova = bitmap->iova + bitmap->length - 1;
+ unsigned long iova = iova_bitmap_mapped_iova(bitmap);
+ unsigned long remaining;
+
+ /*
+ * iova_bitmap_mapped_remaining() returns a number of indexes which
+ * when converted to IOVA gives us a max length that the bitmap
+ * pinned data can cover. Afterwards, that is capped to
+ * only cover the IOVA range in @bitmap::iova .. @bitmap::length.
+ */
+ remaining = iova_bitmap_index_to_offset(bitmap,
+ iova_bitmap_mapped_remaining(bitmap));
+
+ if (iova + remaining - 1 > max_iova)
+ remaining -= ((iova + remaining - 1) - max_iova);
+
+ return remaining;
+}
+
+/*
+ * Returns true if there's not more data to iterate.
+ */
+static bool iova_bitmap_done(struct iova_bitmap *bitmap)
+{
+ return bitmap->mapped_base_index >= bitmap->mapped_total_index;
+}
+
+/*
+ * Advances to the next range, releases the current pinned
+ * pages and pins the next set of bitmap pages.
+ * Returns 0 on success or otherwise errno.
+ */
+static int iova_bitmap_advance(struct iova_bitmap *bitmap)
+{
+ unsigned long iova = iova_bitmap_mapped_length(bitmap) - 1;
+ unsigned long count = iova_bitmap_offset_to_index(bitmap, iova) + 1;
+
+ bitmap->mapped_base_index += count;
+
+ iova_bitmap_put(bitmap);
+ if (iova_bitmap_done(bitmap))
+ return 0;
+
+ /* When advancing the index we pin the next set of bitmap pages */
+ return iova_bitmap_get(bitmap);
+}
+
+/**
+ * iova_bitmap_for_each() - Iterates over the bitmap
+ * @bitmap: IOVA bitmap to iterate
+ * @opaque: Additional argument to pass to the callback
+ * @fn: Function that gets called for each IOVA range
+ *
+ * Helper function to iterate over bitmap data representing a portion of IOVA
+ * space. It hides the complexity of iterating bitmaps and translating the
+ * mapped bitmap user pages into IOVA ranges to process.
+ *
+ * Return: 0 on success, and an error on failure either upon
+ * iteration or when the callback returns an error.
+ */
+int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
+ iova_bitmap_fn_t fn)
+{
+ int ret = 0;
+
+ for (; !iova_bitmap_done(bitmap) && !ret;
+ ret = iova_bitmap_advance(bitmap)) {
+ ret = fn(bitmap, iova_bitmap_mapped_iova(bitmap),
+ iova_bitmap_mapped_length(bitmap), opaque);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * iova_bitmap_set() - Records an IOVA range in bitmap
+ * @bitmap: IOVA bitmap
+ * @iova: IOVA to start
+ * @length: IOVA range length
+ *
+ * Set the bits corresponding to the range [iova .. iova+length-1] in
+ * the user bitmap.
+ *
+ * Return: The number of bits set.
+ */
+void iova_bitmap_set(struct iova_bitmap *bitmap,
+ unsigned long iova, size_t length)
+{
+ struct iova_bitmap_map *mapped = &bitmap->mapped;
+ unsigned long offset = (iova - mapped->iova) >> mapped->pgshift;
+ unsigned long nbits = max_t(unsigned long, 1, length >> mapped->pgshift);
+ unsigned long page_idx = offset / BITS_PER_PAGE;
+ unsigned long page_offset = mapped->pgoff;
+ void *kaddr;
+
+ offset = offset % BITS_PER_PAGE;
+
+ do {
+ unsigned long size = min(BITS_PER_PAGE - offset, nbits);
+
+ kaddr = kmap_local_page(mapped->pages[page_idx]);
+ bitmap_set(kaddr + page_offset, offset, size);
+ kunmap_local(kaddr);
+ page_offset = offset = 0;
+ nbits -= size;
+ page_idx++;
+ } while (nbits > 0);
+}
+EXPORT_SYMBOL_GPL(iova_bitmap_set);
diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index b8b9e7911e55..58f91b3bd670 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -8,9 +8,7 @@
*/
#include <linux/module.h>
-#include <linux/device.h>
#include <linux/slab.h>
-#include <linux/uuid.h>
#include <linux/sysfs.h>
#include <linux/mdev.h>
@@ -20,71 +18,11 @@
#define DRIVER_AUTHOR "NVIDIA Corporation"
#define DRIVER_DESC "Mediated device Core Driver"
-static LIST_HEAD(parent_list);
-static DEFINE_MUTEX(parent_list_lock);
static struct class_compat *mdev_bus_compat_class;
static LIST_HEAD(mdev_list);
static DEFINE_MUTEX(mdev_list_lock);
-struct device *mdev_parent_dev(struct mdev_device *mdev)
-{
- return mdev->type->parent->dev;
-}
-EXPORT_SYMBOL(mdev_parent_dev);
-
-/*
- * Return the index in supported_type_groups that this mdev_device was created
- * from.
- */
-unsigned int mdev_get_type_group_id(struct mdev_device *mdev)
-{
- return mdev->type->type_group_id;
-}
-EXPORT_SYMBOL(mdev_get_type_group_id);
-
-/*
- * Used in mdev_type_attribute sysfs functions to return the index in the
- * supported_type_groups that the sysfs is called from.
- */
-unsigned int mtype_get_type_group_id(struct mdev_type *mtype)
-{
- return mtype->type_group_id;
-}
-EXPORT_SYMBOL(mtype_get_type_group_id);
-
-/*
- * Used in mdev_type_attribute sysfs functions to return the parent struct
- * device
- */
-struct device *mtype_get_parent_dev(struct mdev_type *mtype)
-{
- return mtype->parent->dev;
-}
-EXPORT_SYMBOL(mtype_get_parent_dev);
-
-/* Should be called holding parent_list_lock */
-static struct mdev_parent *__find_parent_device(struct device *dev)
-{
- struct mdev_parent *parent;
-
- list_for_each_entry(parent, &parent_list, next) {
- if (parent->dev == dev)
- return parent;
- }
- return NULL;
-}
-
-void mdev_release_parent(struct kref *kref)
-{
- struct mdev_parent *parent = container_of(kref, struct mdev_parent,
- ref);
- struct device *dev = parent->dev;
-
- kfree(parent);
- put_device(dev);
-}
-
/* Caller must hold parent unreg_sem read or write lock */
static void mdev_device_remove_common(struct mdev_device *mdev)
{
@@ -99,145 +37,96 @@ static void mdev_device_remove_common(struct mdev_device *mdev)
static int mdev_device_remove_cb(struct device *dev, void *data)
{
- struct mdev_device *mdev = mdev_from_dev(dev);
-
- if (mdev)
- mdev_device_remove_common(mdev);
+ if (dev->bus == &mdev_bus_type)
+ mdev_device_remove_common(to_mdev_device(dev));
return 0;
}
/*
- * mdev_register_device : Register a device
+ * mdev_register_parent: Register a device as parent for mdevs
+ * @parent: parent structure registered
* @dev: device structure representing parent device.
* @mdev_driver: Device driver to bind to the newly created mdev
+ * @types: Array of supported mdev types
+ * @nr_types: Number of entries in @types
+ *
+ * Registers the @parent stucture as a parent for mdev types and thus mdev
+ * devices. The caller needs to hold a reference on @dev that must not be
+ * released until after the call to mdev_unregister_parent().
*
- * Add device to list of registered parent devices.
* Returns a negative value on error, otherwise 0.
*/
-int mdev_register_device(struct device *dev, struct mdev_driver *mdev_driver)
+int mdev_register_parent(struct mdev_parent *parent, struct device *dev,
+ struct mdev_driver *mdev_driver, struct mdev_type **types,
+ unsigned int nr_types)
{
- int ret;
- struct mdev_parent *parent;
char *env_string = "MDEV_STATE=registered";
char *envp[] = { env_string, NULL };
+ int ret;
- /* check for mandatory ops */
- if (!mdev_driver->supported_type_groups)
- return -EINVAL;
-
- dev = get_device(dev);
- if (!dev)
- return -EINVAL;
-
- mutex_lock(&parent_list_lock);
-
- /* Check for duplicate */
- parent = __find_parent_device(dev);
- if (parent) {
- parent = NULL;
- ret = -EEXIST;
- goto add_dev_err;
- }
-
- parent = kzalloc(sizeof(*parent), GFP_KERNEL);
- if (!parent) {
- ret = -ENOMEM;
- goto add_dev_err;
- }
-
- kref_init(&parent->ref);
+ memset(parent, 0, sizeof(*parent));
init_rwsem(&parent->unreg_sem);
-
parent->dev = dev;
parent->mdev_driver = mdev_driver;
+ parent->types = types;
+ parent->nr_types = nr_types;
+ atomic_set(&parent->available_instances, mdev_driver->max_instances);
if (!mdev_bus_compat_class) {
mdev_bus_compat_class = class_compat_register("mdev_bus");
- if (!mdev_bus_compat_class) {
- ret = -ENOMEM;
- goto add_dev_err;
- }
+ if (!mdev_bus_compat_class)
+ return -ENOMEM;
}
ret = parent_create_sysfs_files(parent);
if (ret)
- goto add_dev_err;
+ return ret;
ret = class_compat_create_link(mdev_bus_compat_class, dev, NULL);
if (ret)
dev_warn(dev, "Failed to create compatibility class link\n");
- list_add(&parent->next, &parent_list);
- mutex_unlock(&parent_list_lock);
-
dev_info(dev, "MDEV: Registered\n");
kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
-
return 0;
-
-add_dev_err:
- mutex_unlock(&parent_list_lock);
- if (parent)
- mdev_put_parent(parent);
- else
- put_device(dev);
- return ret;
}
-EXPORT_SYMBOL(mdev_register_device);
+EXPORT_SYMBOL(mdev_register_parent);
/*
- * mdev_unregister_device : Unregister a parent device
- * @dev: device structure representing parent device.
- *
- * Remove device from list of registered parent devices. Give a chance to free
- * existing mediated devices for given device.
+ * mdev_unregister_parent : Unregister a parent device
+ * @parent: parent structure to unregister
*/
-
-void mdev_unregister_device(struct device *dev)
+void mdev_unregister_parent(struct mdev_parent *parent)
{
- struct mdev_parent *parent;
char *env_string = "MDEV_STATE=unregistered";
char *envp[] = { env_string, NULL };
- mutex_lock(&parent_list_lock);
- parent = __find_parent_device(dev);
-
- if (!parent) {
- mutex_unlock(&parent_list_lock);
- return;
- }
- dev_info(dev, "MDEV: Unregistering\n");
-
- list_del(&parent->next);
- mutex_unlock(&parent_list_lock);
+ dev_info(parent->dev, "MDEV: Unregistering\n");
down_write(&parent->unreg_sem);
-
- class_compat_remove_link(mdev_bus_compat_class, dev, NULL);
-
- device_for_each_child(dev, NULL, mdev_device_remove_cb);
-
+ class_compat_remove_link(mdev_bus_compat_class, parent->dev, NULL);
+ device_for_each_child(parent->dev, NULL, mdev_device_remove_cb);
parent_remove_sysfs_files(parent);
up_write(&parent->unreg_sem);
- mdev_put_parent(parent);
-
- /* We still have the caller's reference to use for the uevent */
- kobject_uevent_env(&dev->kobj, KOBJ_CHANGE, envp);
+ kobject_uevent_env(&parent->dev->kobj, KOBJ_CHANGE, envp);
}
-EXPORT_SYMBOL(mdev_unregister_device);
+EXPORT_SYMBOL(mdev_unregister_parent);
static void mdev_device_release(struct device *dev)
{
struct mdev_device *mdev = to_mdev_device(dev);
-
- /* Pairs with the get in mdev_device_create() */
- kobject_put(&mdev->type->kobj);
+ struct mdev_parent *parent = mdev->type->parent;
mutex_lock(&mdev_list_lock);
list_del(&mdev->next);
+ if (!parent->mdev_driver->get_available)
+ atomic_inc(&parent->available_instances);
mutex_unlock(&mdev_list_lock);
+ /* Pairs with the get in mdev_device_create() */
+ kobject_put(&mdev->type->kobj);
+
dev_dbg(&mdev->dev, "MDEV: destroying\n");
kfree(mdev);
}
@@ -259,6 +148,18 @@ int mdev_device_create(struct mdev_type *type, const guid_t *uuid)
}
}
+ if (!drv->get_available) {
+ /*
+ * Note: that non-atomic read and dec is fine here because
+ * all modifications are under mdev_list_lock.
+ */
+ if (!atomic_read(&parent->available_instances)) {
+ mutex_unlock(&mdev_list_lock);
+ return -EUSERS;
+ }
+ atomic_dec(&parent->available_instances);
+ }
+
mdev = kzalloc(sizeof(*mdev), GFP_KERNEL);
if (!mdev) {
mutex_unlock(&mdev_list_lock);
diff --git a/drivers/vfio/mdev/mdev_driver.c b/drivers/vfio/mdev/mdev_driver.c
index 9c2af59809e2..7825d83a55f8 100644
--- a/drivers/vfio/mdev/mdev_driver.c
+++ b/drivers/vfio/mdev/mdev_driver.c
@@ -7,7 +7,6 @@
* Kirti Wankhede <kwankhede@nvidia.com>
*/
-#include <linux/device.h>
#include <linux/iommu.h>
#include <linux/mdev.h>
@@ -47,7 +46,6 @@ struct bus_type mdev_bus_type = {
.remove = mdev_remove,
.match = mdev_match,
};
-EXPORT_SYMBOL_GPL(mdev_bus_type);
/**
* mdev_register_driver - register a new MDEV driver
@@ -57,10 +55,11 @@ EXPORT_SYMBOL_GPL(mdev_bus_type);
**/
int mdev_register_driver(struct mdev_driver *drv)
{
+ if (!drv->device_api)
+ return -EINVAL;
+
/* initialize common driver fields */
drv->driver.bus = &mdev_bus_type;
-
- /* register with core */
return driver_register(&drv->driver);
}
EXPORT_SYMBOL(mdev_register_driver);
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index 7c9fc79f3d83..af457b27f607 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -13,25 +13,7 @@
int mdev_bus_register(void);
void mdev_bus_unregister(void);
-struct mdev_parent {
- struct device *dev;
- struct mdev_driver *mdev_driver;
- struct kref ref;
- struct list_head next;
- struct kset *mdev_types_kset;
- struct list_head type_list;
- /* Synchronize device creation/removal with parent unregistration */
- struct rw_semaphore unreg_sem;
-};
-
-struct mdev_type {
- struct kobject kobj;
- struct kobject *devices_kobj;
- struct mdev_parent *parent;
- struct list_head next;
- unsigned int type_group_id;
-};
-
+extern struct bus_type mdev_bus_type;
extern const struct attribute_group *mdev_device_groups[];
#define to_mdev_type_attr(_attr) \
@@ -48,16 +30,4 @@ void mdev_remove_sysfs_files(struct mdev_device *mdev);
int mdev_device_create(struct mdev_type *kobj, const guid_t *uuid);
int mdev_device_remove(struct mdev_device *dev);
-void mdev_release_parent(struct kref *kref);
-
-static inline void mdev_get_parent(struct mdev_parent *parent)
-{
- kref_get(&parent->ref);
-}
-
-static inline void mdev_put_parent(struct mdev_parent *parent)
-{
- kref_put(&parent->ref, mdev_release_parent);
-}
-
#endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index 0ccfeb3dda24..abe3359dd477 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -9,14 +9,24 @@
#include <linux/sysfs.h>
#include <linux/ctype.h>
-#include <linux/device.h>
#include <linux/slab.h>
-#include <linux/uuid.h>
#include <linux/mdev.h>
#include "mdev_private.h"
-/* Static functions */
+struct mdev_type_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct mdev_type *mtype,
+ struct mdev_type_attribute *attr, char *buf);
+ ssize_t (*store)(struct mdev_type *mtype,
+ struct mdev_type_attribute *attr, const char *buf,
+ size_t count);
+};
+
+#define MDEV_TYPE_ATTR_RO(_name) \
+ struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_RO(_name)
+#define MDEV_TYPE_ATTR_WO(_name) \
+ struct mdev_type_attribute mdev_type_attr_##_name = __ATTR_WO(_name)
static ssize_t mdev_type_attr_show(struct kobject *kobj,
struct attribute *__attr, char *buf)
@@ -74,152 +84,156 @@ static ssize_t create_store(struct mdev_type *mtype,
return count;
}
-
static MDEV_TYPE_ATTR_WO(create);
+static ssize_t device_api_show(struct mdev_type *mtype,
+ struct mdev_type_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "%s\n", mtype->parent->mdev_driver->device_api);
+}
+static MDEV_TYPE_ATTR_RO(device_api);
+
+static ssize_t name_show(struct mdev_type *mtype,
+ struct mdev_type_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%s\n",
+ mtype->pretty_name ? mtype->pretty_name : mtype->sysfs_name);
+}
+
+static MDEV_TYPE_ATTR_RO(name);
+
+static ssize_t available_instances_show(struct mdev_type *mtype,
+ struct mdev_type_attribute *attr,
+ char *buf)
+{
+ struct mdev_driver *drv = mtype->parent->mdev_driver;
+
+ if (drv->get_available)
+ return sysfs_emit(buf, "%u\n", drv->get_available(mtype));
+ return sysfs_emit(buf, "%u\n",
+ atomic_read(&mtype->parent->available_instances));
+}
+static MDEV_TYPE_ATTR_RO(available_instances);
+
+static ssize_t description_show(struct mdev_type *mtype,
+ struct mdev_type_attribute *attr,
+ char *buf)
+{
+ return mtype->parent->mdev_driver->show_description(mtype, buf);
+}
+static MDEV_TYPE_ATTR_RO(description);
+
+static struct attribute *mdev_types_core_attrs[] = {
+ &mdev_type_attr_create.attr,
+ &mdev_type_attr_device_api.attr,
+ &mdev_type_attr_name.attr,
+ &mdev_type_attr_available_instances.attr,
+ &mdev_type_attr_description.attr,
+ NULL,
+};
+
+static umode_t mdev_types_core_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ if (attr == &mdev_type_attr_description.attr &&
+ !to_mdev_type(kobj)->parent->mdev_driver->show_description)
+ return 0;
+ return attr->mode;
+}
+
+static struct attribute_group mdev_type_core_group = {
+ .attrs = mdev_types_core_attrs,
+ .is_visible = mdev_types_core_is_visible,
+};
+
+static const struct attribute_group *mdev_type_groups[] = {
+ &mdev_type_core_group,
+ NULL,
+};
+
static void mdev_type_release(struct kobject *kobj)
{
struct mdev_type *type = to_mdev_type(kobj);
pr_debug("Releasing group %s\n", kobj->name);
/* Pairs with the get in add_mdev_supported_type() */
- mdev_put_parent(type->parent);
- kfree(type);
+ put_device(type->parent->dev);
}
static struct kobj_type mdev_type_ktype = {
- .sysfs_ops = &mdev_type_sysfs_ops,
- .release = mdev_type_release,
+ .sysfs_ops = &mdev_type_sysfs_ops,
+ .release = mdev_type_release,
+ .default_groups = mdev_type_groups,
};
-static struct mdev_type *add_mdev_supported_type(struct mdev_parent *parent,
- unsigned int type_group_id)
+static int mdev_type_add(struct mdev_parent *parent, struct mdev_type *type)
{
- struct mdev_type *type;
- struct attribute_group *group =
- parent->mdev_driver->supported_type_groups[type_group_id];
int ret;
- if (!group->name) {
- pr_err("%s: Type name empty!\n", __func__);
- return ERR_PTR(-EINVAL);
- }
-
- type = kzalloc(sizeof(*type), GFP_KERNEL);
- if (!type)
- return ERR_PTR(-ENOMEM);
-
type->kobj.kset = parent->mdev_types_kset;
type->parent = parent;
/* Pairs with the put in mdev_type_release() */
- mdev_get_parent(parent);
- type->type_group_id = type_group_id;
+ get_device(parent->dev);
ret = kobject_init_and_add(&type->kobj, &mdev_type_ktype, NULL,
"%s-%s", dev_driver_string(parent->dev),
- group->name);
+ type->sysfs_name);
if (ret) {
kobject_put(&type->kobj);
- return ERR_PTR(ret);
+ return ret;
}
- ret = sysfs_create_file(&type->kobj, &mdev_type_attr_create.attr);
- if (ret)
- goto attr_create_failed;
-
type->devices_kobj = kobject_create_and_add("devices", &type->kobj);
if (!type->devices_kobj) {
ret = -ENOMEM;
goto attr_devices_failed;
}
- ret = sysfs_create_files(&type->kobj,
- (const struct attribute **)group->attrs);
- if (ret) {
- ret = -ENOMEM;
- goto attrs_failed;
- }
- return type;
+ return 0;
-attrs_failed:
- kobject_put(type->devices_kobj);
attr_devices_failed:
- sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
-attr_create_failed:
kobject_del(&type->kobj);
kobject_put(&type->kobj);
- return ERR_PTR(ret);
+ return ret;
}
-static void remove_mdev_supported_type(struct mdev_type *type)
+static void mdev_type_remove(struct mdev_type *type)
{
- struct attribute_group *group =
- type->parent->mdev_driver->supported_type_groups[type->type_group_id];
-
- sysfs_remove_files(&type->kobj,
- (const struct attribute **)group->attrs);
kobject_put(type->devices_kobj);
- sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
kobject_del(&type->kobj);
kobject_put(&type->kobj);
}
-static int add_mdev_supported_type_groups(struct mdev_parent *parent)
-{
- int i;
-
- for (i = 0; parent->mdev_driver->supported_type_groups[i]; i++) {
- struct mdev_type *type;
-
- type = add_mdev_supported_type(parent, i);
- if (IS_ERR(type)) {
- struct mdev_type *ltype, *tmp;
-
- list_for_each_entry_safe(ltype, tmp, &parent->type_list,
- next) {
- list_del(&ltype->next);
- remove_mdev_supported_type(ltype);
- }
- return PTR_ERR(type);
- }
- list_add(&type->next, &parent->type_list);
- }
- return 0;
-}
-
/* mdev sysfs functions */
void parent_remove_sysfs_files(struct mdev_parent *parent)
{
- struct mdev_type *type, *tmp;
-
- list_for_each_entry_safe(type, tmp, &parent->type_list, next) {
- list_del(&type->next);
- remove_mdev_supported_type(type);
- }
+ int i;
+ for (i = 0; i < parent->nr_types; i++)
+ mdev_type_remove(parent->types[i]);
kset_unregister(parent->mdev_types_kset);
}
int parent_create_sysfs_files(struct mdev_parent *parent)
{
- int ret;
+ int ret, i;
parent->mdev_types_kset = kset_create_and_add("mdev_supported_types",
NULL, &parent->dev->kobj);
-
if (!parent->mdev_types_kset)
return -ENOMEM;
- INIT_LIST_HEAD(&parent->type_list);
-
- ret = add_mdev_supported_type_groups(parent);
- if (ret)
- goto create_err;
+ for (i = 0; i < parent->nr_types; i++) {
+ ret = mdev_type_add(parent, parent->types[i]);
+ if (ret)
+ goto out_err;
+ }
return 0;
-create_err:
- kset_unregister(parent->mdev_types_kset);
- return ret;
+out_err:
+ while (--i >= 0)
+ mdev_type_remove(parent->types[i]);
+ return 0;
}
static ssize_t remove_store(struct device *dev, struct device_attribute *attr,
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index ea762e28c1cc..39eeca18a0f7 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -16,7 +16,7 @@
#include "hisi_acc_vfio_pci.h"
-/* return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
+/* Return 0 on VM acc device ready, -ETIMEDOUT hardware timeout */
static int qm_wait_dev_not_ready(struct hisi_qm *qm)
{
u32 val;
@@ -189,7 +189,7 @@ static int qm_set_regs(struct hisi_qm *qm, struct acc_vf_data *vf_data)
struct device *dev = &qm->pdev->dev;
int ret;
- /* check VF state */
+ /* Check VF state */
if (unlikely(hisi_qm_wait_mb_ready(qm))) {
dev_err(&qm->pdev->dev, "QM device is not ready to write\n");
return -EBUSY;
@@ -337,16 +337,7 @@ static int vf_qm_cache_wb(struct hisi_qm *qm)
return 0;
}
-static struct hisi_acc_vf_core_device *hssi_acc_drvdata(struct pci_dev *pdev)
-{
- struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
-
- return container_of(core_device, struct hisi_acc_vf_core_device,
- core_device);
-}
-
-static void vf_qm_fun_reset(struct hisi_acc_vf_core_device *hisi_acc_vdev,
- struct hisi_qm *qm)
+static void vf_qm_fun_reset(struct hisi_qm *qm)
{
int i;
@@ -382,7 +373,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return -EINVAL;
}
- /* vf qp num check */
+ /* VF qp num check */
ret = qm_get_vft(vf_qm, &vf_qm->qp_base);
if (ret <= 0) {
dev_err(dev, "failed to get vft qp nums\n");
@@ -396,7 +387,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
vf_qm->qp_num = ret;
- /* vf isolation state check */
+ /* VF isolation state check */
ret = qm_read_regs(pf_qm, QM_QUE_ISO_CFG_V, &que_iso_state, 1);
if (ret) {
dev_err(dev, "failed to read QM_QUE_ISO_CFG_V\n");
@@ -405,7 +396,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
if (vf_data->que_iso_cfg != que_iso_state) {
dev_err(dev, "failed to match isolation state\n");
- return ret;
+ return -EINVAL;
}
ret = qm_write_regs(vf_qm, QM_VF_STATE, &vf_data->vf_qm_state, 1);
@@ -427,10 +418,10 @@ static int vf_qm_get_match_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
int ret;
vf_data->acc_magic = ACC_DEV_MAGIC;
- /* save device id */
+ /* Save device id */
vf_data->dev_id = hisi_acc_vdev->vf_dev->device;
- /* vf qp num save from PF */
+ /* VF qp num save from PF */
ret = pf_qm_get_qp_num(pf_qm, vf_id, &vf_data->qp_base);
if (ret <= 0) {
dev_err(dev, "failed to get vft qp nums!\n");
@@ -474,19 +465,19 @@ static int vf_qm_load_data(struct hisi_acc_vf_core_device *hisi_acc_vdev,
ret = qm_set_regs(qm, vf_data);
if (ret) {
- dev_err(dev, "Set VF regs failed\n");
+ dev_err(dev, "set VF regs failed\n");
return ret;
}
ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
if (ret) {
- dev_err(dev, "Set sqc failed\n");
+ dev_err(dev, "set sqc failed\n");
return ret;
}
ret = hisi_qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
if (ret) {
- dev_err(dev, "Set cqc failed\n");
+ dev_err(dev, "set cqc failed\n");
return ret;
}
@@ -528,12 +519,12 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return -EINVAL;
/* Every reg is 32 bit, the dma address is 64 bit. */
- vf_data->eqe_dma = vf_data->qm_eqc_dw[2];
+ vf_data->eqe_dma = vf_data->qm_eqc_dw[1];
vf_data->eqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->eqe_dma |= vf_data->qm_eqc_dw[1];
- vf_data->aeqe_dma = vf_data->qm_aeqc_dw[2];
+ vf_data->eqe_dma |= vf_data->qm_eqc_dw[0];
+ vf_data->aeqe_dma = vf_data->qm_aeqc_dw[1];
vf_data->aeqe_dma <<= QM_XQC_ADDR_OFFSET;
- vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[1];
+ vf_data->aeqe_dma |= vf_data->qm_aeqc_dw[0];
/* Through SQC_BT/CQC_BT to get sqc and cqc address */
ret = qm_get_sqc(vf_qm, &vf_data->sqc_dma);
@@ -552,6 +543,14 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
return 0;
}
+static struct hisi_acc_vf_core_device *hisi_acc_drvdata(struct pci_dev *pdev)
+{
+ struct vfio_pci_core_device *core_device = dev_get_drvdata(&pdev->dev);
+
+ return container_of(core_device, struct hisi_acc_vf_core_device,
+ core_device);
+}
+
/* Check the PF's RAS state and Function INT state */
static int
hisi_acc_check_int_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
@@ -662,7 +661,10 @@ static void hisi_acc_vf_start_device(struct hisi_acc_vf_core_device *hisi_acc_vd
if (hisi_acc_vdev->vf_qm_state != QM_READY)
return;
- vf_qm_fun_reset(hisi_acc_vdev, vf_qm);
+ /* Make sure the device is enabled */
+ qm_dev_cmd_init(vf_qm);
+
+ vf_qm_fun_reset(vf_qm);
}
static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
@@ -970,7 +972,7 @@ hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
static void hisi_acc_vf_pci_aer_reset_done(struct pci_dev *pdev)
{
- struct hisi_acc_vf_core_device *hisi_acc_vdev = hssi_acc_drvdata(pdev);
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
if (hisi_acc_vdev->core_device.vdev.migration_flags !=
VFIO_MIGRATION_STOP_COPY)
@@ -1213,8 +1215,28 @@ static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = {
.migration_get_state = hisi_acc_vfio_pci_get_device_state,
};
+static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
+{
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = container_of(core_vdev,
+ struct hisi_acc_vf_core_device, core_device.vdev);
+ struct pci_dev *pdev = to_pci_dev(core_vdev->dev);
+ struct hisi_qm *pf_qm = hisi_acc_get_pf_qm(pdev);
+
+ hisi_acc_vdev->vf_id = pci_iov_vf_id(pdev) + 1;
+ hisi_acc_vdev->pf_qm = pf_qm;
+ hisi_acc_vdev->vf_dev = pdev;
+ mutex_init(&hisi_acc_vdev->state_mutex);
+
+ core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY;
+ core_vdev->mig_ops = &hisi_acc_vfio_pci_migrn_state_ops;
+
+ return vfio_pci_core_init_dev(core_vdev);
+}
+
static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
.name = "hisi-acc-vfio-pci-migration",
+ .init = hisi_acc_vfio_pci_migrn_init_dev,
+ .release = vfio_pci_core_release_dev,
.open_device = hisi_acc_vfio_pci_open_device,
.close_device = hisi_acc_vfio_pci_close_device,
.ioctl = hisi_acc_vfio_pci_ioctl,
@@ -1228,6 +1250,8 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
.name = "hisi-acc-vfio-pci",
+ .init = vfio_pci_core_init_dev,
+ .release = vfio_pci_core_release_dev,
.open_device = hisi_acc_vfio_pci_open_device,
.close_device = vfio_pci_core_close_device,
.ioctl = vfio_pci_core_ioctl,
@@ -1239,73 +1263,45 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
.match = vfio_pci_core_match,
};
-static int
-hisi_acc_vfio_pci_migrn_init(struct hisi_acc_vf_core_device *hisi_acc_vdev,
- struct pci_dev *pdev, struct hisi_qm *pf_qm)
-{
- int vf_id;
-
- vf_id = pci_iov_vf_id(pdev);
- if (vf_id < 0)
- return vf_id;
-
- hisi_acc_vdev->vf_id = vf_id + 1;
- hisi_acc_vdev->core_device.vdev.migration_flags =
- VFIO_MIGRATION_STOP_COPY;
- hisi_acc_vdev->pf_qm = pf_qm;
- hisi_acc_vdev->vf_dev = pdev;
- mutex_init(&hisi_acc_vdev->state_mutex);
-
- return 0;
-}
-
static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct hisi_acc_vf_core_device *hisi_acc_vdev;
+ const struct vfio_device_ops *ops = &hisi_acc_vfio_pci_ops;
struct hisi_qm *pf_qm;
+ int vf_id;
int ret;
- hisi_acc_vdev = kzalloc(sizeof(*hisi_acc_vdev), GFP_KERNEL);
- if (!hisi_acc_vdev)
- return -ENOMEM;
-
pf_qm = hisi_acc_get_pf_qm(pdev);
if (pf_qm && pf_qm->ver >= QM_HW_V3) {
- ret = hisi_acc_vfio_pci_migrn_init(hisi_acc_vdev, pdev, pf_qm);
- if (!ret) {
- vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
- &hisi_acc_vfio_pci_migrn_ops);
- hisi_acc_vdev->core_device.vdev.mig_ops =
- &hisi_acc_vfio_pci_migrn_state_ops;
- } else {
+ vf_id = pci_iov_vf_id(pdev);
+ if (vf_id >= 0)
+ ops = &hisi_acc_vfio_pci_migrn_ops;
+ else
pci_warn(pdev, "migration support failed, continue with generic interface\n");
- vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
- &hisi_acc_vfio_pci_ops);
- }
- } else {
- vfio_pci_core_init_device(&hisi_acc_vdev->core_device, pdev,
- &hisi_acc_vfio_pci_ops);
}
+ hisi_acc_vdev = vfio_alloc_device(hisi_acc_vf_core_device,
+ core_device.vdev, &pdev->dev, ops);
+ if (IS_ERR(hisi_acc_vdev))
+ return PTR_ERR(hisi_acc_vdev);
+
dev_set_drvdata(&pdev->dev, &hisi_acc_vdev->core_device);
ret = vfio_pci_core_register_device(&hisi_acc_vdev->core_device);
if (ret)
- goto out_free;
+ goto out_put_vdev;
return 0;
-out_free:
- vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device);
- kfree(hisi_acc_vdev);
+out_put_vdev:
+ vfio_put_device(&hisi_acc_vdev->core_device.vdev);
return ret;
}
static void hisi_acc_vfio_pci_remove(struct pci_dev *pdev)
{
- struct hisi_acc_vf_core_device *hisi_acc_vdev = hssi_acc_drvdata(pdev);
+ struct hisi_acc_vf_core_device *hisi_acc_vdev = hisi_acc_drvdata(pdev);
vfio_pci_core_unregister_device(&hisi_acc_vdev->core_device);
- vfio_pci_core_uninit_device(&hisi_acc_vdev->core_device);
- kfree(hisi_acc_vdev);
+ vfio_put_device(&hisi_acc_vdev->core_device.vdev);
}
static const struct pci_device_id hisi_acc_vfio_pci_table[] = {
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 5494f4983bbe..67343325b320 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -16,7 +16,6 @@
#define SEC_CORE_INT_STATUS 0x301008
#define HPRE_HAC_INT_STATUS 0x301800
#define HZIP_CORE_INT_STATUS 0x3010AC
-#define QM_QUE_ISO_CFG 0x301154
#define QM_VFT_CFG_RDY 0x10006c
#define QM_VFT_CFG_OP_WR 0x100058
@@ -80,7 +79,7 @@ struct acc_vf_data {
/* QM reserved 5 regs */
u32 qm_rsv_regs[5];
u32 padding;
- /* qm memory init information */
+ /* QM memory init information */
u64 eqe_dma;
u64 aeqe_dma;
u64 sqc_dma;
@@ -99,7 +98,7 @@ struct hisi_acc_vf_migration_file {
struct hisi_acc_vf_core_device {
struct vfio_pci_core_device core_device;
u8 deferred_reset:1;
- /* for migration state */
+ /* For migration state */
struct mutex state_mutex;
enum vfio_device_mig_state mig_state;
struct pci_dev *pf_dev;
@@ -108,7 +107,7 @@ struct hisi_acc_vf_core_device {
struct hisi_qm vf_qm;
u32 vf_qm_state;
int vf_id;
- /* for reset handler */
+ /* For reset handler */
spinlock_t reset_lock;
struct hisi_acc_vf_migration_file *resuming_migf;
struct hisi_acc_vf_migration_file *saving_migf;
diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index dd5d7bfe0a49..c604b70437a5 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -5,8 +5,12 @@
#include "cmd.h"
+enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
+
static int mlx5vf_cmd_get_vhca_id(struct mlx5_core_dev *mdev, u16 function_id,
u16 *vhca_id);
+static void
+_mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
{
@@ -66,25 +70,35 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
return 0;
}
+static void set_tracker_error(struct mlx5vf_pci_core_device *mvdev)
+{
+ /* Mark the tracker under an error and wake it up if it's running */
+ mvdev->tracker.is_err = true;
+ complete(&mvdev->tracker_comp);
+}
+
static int mlx5fv_vf_event(struct notifier_block *nb,
unsigned long event, void *data)
{
struct mlx5vf_pci_core_device *mvdev =
container_of(nb, struct mlx5vf_pci_core_device, nb);
- mutex_lock(&mvdev->state_mutex);
switch (event) {
case MLX5_PF_NOTIFY_ENABLE_VF:
+ mutex_lock(&mvdev->state_mutex);
mvdev->mdev_detach = false;
+ mlx5vf_state_mutex_unlock(mvdev);
break;
case MLX5_PF_NOTIFY_DISABLE_VF:
- mlx5vf_disable_fds(mvdev);
+ mlx5vf_cmd_close_migratable(mvdev);
+ mutex_lock(&mvdev->state_mutex);
mvdev->mdev_detach = true;
+ mlx5vf_state_mutex_unlock(mvdev);
break;
default:
break;
}
- mlx5vf_state_mutex_unlock(mvdev);
+
return 0;
}
@@ -93,8 +107,11 @@ void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev)
if (!mvdev->migrate_cap)
return;
+ /* Must be done outside the lock to let it progress */
+ set_tracker_error(mvdev);
mutex_lock(&mvdev->state_mutex);
mlx5vf_disable_fds(mvdev);
+ _mlx5vf_free_page_tracker_resources(mvdev);
mlx5vf_state_mutex_unlock(mvdev);
}
@@ -109,7 +126,8 @@ void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev)
}
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
- const struct vfio_migration_ops *mig_ops)
+ const struct vfio_migration_ops *mig_ops,
+ const struct vfio_log_ops *log_ops)
{
struct pci_dev *pdev = mvdev->core_device.pdev;
int ret;
@@ -151,6 +169,9 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
VFIO_MIGRATION_STOP_COPY |
VFIO_MIGRATION_P2P;
mvdev->core_device.vdev.mig_ops = mig_ops;
+ init_completion(&mvdev->tracker_comp);
+ if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
+ mvdev->core_device.vdev.log_ops = log_ops;
end:
mlx5_vf_put_core_dev(mvdev->mdev);
@@ -188,11 +209,13 @@ err_exec:
return ret;
}
-static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
- struct mlx5_vf_migration_file *migf, u32 *mkey)
+static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
+ struct mlx5_vf_migration_file *migf,
+ struct mlx5_vhca_recv_buf *recv_buf,
+ u32 *mkey)
{
- size_t npages = DIV_ROUND_UP(migf->total_length, PAGE_SIZE);
- struct sg_dma_page_iter dma_iter;
+ size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) :
+ recv_buf->npages;
int err = 0, inlen;
__be64 *mtt;
void *mkc;
@@ -209,8 +232,17 @@ static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
DIV_ROUND_UP(npages, 2));
mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
- for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
- *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
+ if (migf) {
+ struct sg_dma_page_iter dma_iter;
+
+ for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
+ *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
+ } else {
+ int i;
+
+ for (i = 0; i < npages; i++)
+ *mtt++ = cpu_to_be64(recv_buf->dma_addrs[i]);
+ }
mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
@@ -223,7 +255,8 @@ static int _create_state_mkey(struct mlx5_core_dev *mdev, u32 pdn,
MLX5_SET(mkc, mkc, qpn, 0xffffff);
MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
- MLX5_SET64(mkc, mkc, len, migf->total_length);
+ MLX5_SET64(mkc, mkc, len,
+ migf ? migf->total_length : (npages * PAGE_SIZE));
err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
kvfree(in);
return err;
@@ -297,7 +330,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (err)
goto err_dma_map;
- err = _create_state_mkey(mdev, pdn, migf, &mkey);
+ err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
if (err)
goto err_create_mkey;
@@ -369,7 +402,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
if (err)
goto err_reg;
- err = _create_state_mkey(mdev, pdn, migf, &mkey);
+ err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
if (err)
goto err_mkey;
@@ -391,3 +424,939 @@ end:
mutex_unlock(&migf->lock);
return err;
}
+
+static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
+ u32 req_nodes)
+{
+ struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
+ unsigned long min_gap;
+ unsigned long curr_gap;
+
+ /* Special shortcut when a single range is required */
+ if (req_nodes == 1) {
+ unsigned long last;
+
+ curr = comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
+ while (curr) {
+ last = curr->last;
+ prev = curr;
+ curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
+ if (prev != comb_start)
+ interval_tree_remove(prev, root);
+ }
+ comb_start->last = last;
+ return;
+ }
+
+ /* Combine ranges which have the smallest gap */
+ while (cur_nodes > req_nodes) {
+ prev = NULL;
+ min_gap = ULONG_MAX;
+ curr = interval_tree_iter_first(root, 0, ULONG_MAX);
+ while (curr) {
+ if (prev) {
+ curr_gap = curr->start - prev->last;
+ if (curr_gap < min_gap) {
+ min_gap = curr_gap;
+ comb_start = prev;
+ comb_end = curr;
+ }
+ }
+ prev = curr;
+ curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
+ }
+ comb_start->last = comb_end->last;
+ interval_tree_remove(comb_end, root);
+ cur_nodes--;
+ }
+}
+
+static int mlx5vf_create_tracker(struct mlx5_core_dev *mdev,
+ struct mlx5vf_pci_core_device *mvdev,
+ struct rb_root_cached *ranges, u32 nnodes)
+{
+ int max_num_range =
+ MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_max_num_range);
+ struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
+ int record_size = MLX5_ST_SZ_BYTES(page_track_range);
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ struct interval_tree_node *node = NULL;
+ u64 total_ranges_len = 0;
+ u32 num_ranges = nnodes;
+ u8 log_addr_space_size;
+ void *range_list_ptr;
+ void *obj_context;
+ void *cmd_hdr;
+ int inlen;
+ void *in;
+ int err;
+ int i;
+
+ if (num_ranges > max_num_range) {
+ combine_ranges(ranges, nnodes, max_num_range);
+ num_ranges = max_num_range;
+ }
+
+ inlen = MLX5_ST_SZ_BYTES(create_page_track_obj_in) +
+ record_size * num_ranges;
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ cmd_hdr = MLX5_ADDR_OF(create_page_track_obj_in, in,
+ general_obj_in_cmd_hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode,
+ MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type,
+ MLX5_OBJ_TYPE_PAGE_TRACK);
+ obj_context = MLX5_ADDR_OF(create_page_track_obj_in, in, obj_context);
+ MLX5_SET(page_track, obj_context, vhca_id, mvdev->vhca_id);
+ MLX5_SET(page_track, obj_context, track_type, 1);
+ MLX5_SET(page_track, obj_context, log_page_size,
+ ilog2(tracker->host_qp->tracked_page_size));
+ MLX5_SET(page_track, obj_context, log_msg_size,
+ ilog2(tracker->host_qp->max_msg_size));
+ MLX5_SET(page_track, obj_context, reporting_qpn, tracker->fw_qp->qpn);
+ MLX5_SET(page_track, obj_context, num_ranges, num_ranges);
+
+ range_list_ptr = MLX5_ADDR_OF(page_track, obj_context, track_range);
+ node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
+ for (i = 0; i < num_ranges; i++) {
+ void *addr_range_i_base = range_list_ptr + record_size * i;
+ unsigned long length = node->last - node->start;
+
+ MLX5_SET64(page_track_range, addr_range_i_base, start_address,
+ node->start);
+ MLX5_SET64(page_track_range, addr_range_i_base, length, length);
+ total_ranges_len += length;
+ node = interval_tree_iter_next(node, 0, ULONG_MAX);
+ }
+
+ WARN_ON(node);
+ log_addr_space_size = ilog2(total_ranges_len);
+ if (log_addr_space_size <
+ (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_min_addr_space)) ||
+ log_addr_space_size >
+ (MLX5_CAP_ADV_VIRTUALIZATION(mdev, pg_track_log_max_addr_space))) {
+ err = -EOPNOTSUPP;
+ goto out;
+ }
+
+ MLX5_SET(page_track, obj_context, log_addr_space_size,
+ log_addr_space_size);
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ if (err)
+ goto out;
+
+ tracker->id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
+out:
+ kfree(in);
+ return err;
+}
+
+static int mlx5vf_cmd_destroy_tracker(struct mlx5_core_dev *mdev,
+ u32 tracker_id)
+{
+ u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+
+ MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
+ MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, tracker_id);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int mlx5vf_cmd_modify_tracker(struct mlx5_core_dev *mdev,
+ u32 tracker_id, unsigned long iova,
+ unsigned long length, u32 tracker_state)
+{
+ u32 in[MLX5_ST_SZ_DW(modify_page_track_obj_in)] = {};
+ u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
+ void *obj_context;
+ void *cmd_hdr;
+
+ cmd_hdr = MLX5_ADDR_OF(modify_page_track_obj_in, in, general_obj_in_cmd_hdr);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_PAGE_TRACK);
+ MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, tracker_id);
+
+ obj_context = MLX5_ADDR_OF(modify_page_track_obj_in, in, obj_context);
+ MLX5_SET64(page_track, obj_context, modify_field_select, 0x3);
+ MLX5_SET64(page_track, obj_context, range_start_address, iova);
+ MLX5_SET64(page_track, obj_context, length, length);
+ MLX5_SET(page_track, obj_context, state, tracker_state);
+
+ return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+}
+
+static int alloc_cq_frag_buf(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_cq_buf *buf, int nent,
+ int cqe_size)
+{
+ struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
+ u8 log_wq_stride = 6 + (cqe_size == 128 ? 1 : 0);
+ u8 log_wq_sz = ilog2(cqe_size);
+ int err;
+
+ err = mlx5_frag_buf_alloc_node(mdev, nent * cqe_size, frag_buf,
+ mdev->priv.numa_node);
+ if (err)
+ return err;
+
+ mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
+ buf->cqe_size = cqe_size;
+ buf->nent = nent;
+ return 0;
+}
+
+static void init_cq_frag_buf(struct mlx5_vhca_cq_buf *buf)
+{
+ struct mlx5_cqe64 *cqe64;
+ void *cqe;
+ int i;
+
+ for (i = 0; i < buf->nent; i++) {
+ cqe = mlx5_frag_buf_get_wqe(&buf->fbc, i);
+ cqe64 = buf->cqe_size == 64 ? cqe : cqe + 64;
+ cqe64->op_own = MLX5_CQE_INVALID << 4;
+ }
+}
+
+static void mlx5vf_destroy_cq(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_cq *cq)
+{
+ mlx5_core_destroy_cq(mdev, &cq->mcq);
+ mlx5_frag_buf_free(mdev, &cq->buf.frag_buf);
+ mlx5_db_free(mdev, &cq->db);
+}
+
+static void mlx5vf_cq_event(struct mlx5_core_cq *mcq, enum mlx5_event type)
+{
+ if (type != MLX5_EVENT_TYPE_CQ_ERROR)
+ return;
+
+ set_tracker_error(container_of(mcq, struct mlx5vf_pci_core_device,
+ tracker.cq.mcq));
+}
+
+static int mlx5vf_event_notifier(struct notifier_block *nb, unsigned long type,
+ void *data)
+{
+ struct mlx5_vhca_page_tracker *tracker =
+ mlx5_nb_cof(nb, struct mlx5_vhca_page_tracker, nb);
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ tracker, struct mlx5vf_pci_core_device, tracker);
+ struct mlx5_eqe *eqe = data;
+ u8 event_type = (u8)type;
+ u8 queue_type;
+ int qp_num;
+
+ switch (event_type) {
+ case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+ case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+ queue_type = eqe->data.qp_srq.type;
+ if (queue_type != MLX5_EVENT_QUEUE_TYPE_QP)
+ break;
+ qp_num = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+ if (qp_num != tracker->host_qp->qpn &&
+ qp_num != tracker->fw_qp->qpn)
+ break;
+ set_tracker_error(mvdev);
+ break;
+ default:
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static void mlx5vf_cq_complete(struct mlx5_core_cq *mcq,
+ struct mlx5_eqe *eqe)
+{
+ struct mlx5vf_pci_core_device *mvdev =
+ container_of(mcq, struct mlx5vf_pci_core_device,
+ tracker.cq.mcq);
+
+ complete(&mvdev->tracker_comp);
+}
+
+static int mlx5vf_create_cq(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_page_tracker *tracker,
+ size_t ncqe)
+{
+ int cqe_size = cache_line_size() == 128 ? 128 : 64;
+ u32 out[MLX5_ST_SZ_DW(create_cq_out)];
+ struct mlx5_vhca_cq *cq;
+ int inlen, err, eqn;
+ void *cqc, *in;
+ __be64 *pas;
+ int vector;
+
+ cq = &tracker->cq;
+ ncqe = roundup_pow_of_two(ncqe);
+ err = mlx5_db_alloc_node(mdev, &cq->db, mdev->priv.numa_node);
+ if (err)
+ return err;
+
+ cq->ncqe = ncqe;
+ cq->mcq.set_ci_db = cq->db.db;
+ cq->mcq.arm_db = cq->db.db + 1;
+ cq->mcq.cqe_sz = cqe_size;
+ err = alloc_cq_frag_buf(mdev, &cq->buf, ncqe, cqe_size);
+ if (err)
+ goto err_db_free;
+
+ init_cq_frag_buf(&cq->buf);
+ inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
+ MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) *
+ cq->buf.frag_buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_buff;
+ }
+
+ vector = raw_smp_processor_id() % mlx5_comp_vectors_count(mdev);
+ err = mlx5_vector2eqn(mdev, vector, &eqn);
+ if (err)
+ goto err_vec;
+
+ cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
+ MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
+ MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
+ MLX5_SET(cqc, cqc, uar_page, tracker->uar->index);
+ MLX5_SET(cqc, cqc, log_page_size, cq->buf.frag_buf.page_shift -
+ MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma);
+ pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
+ mlx5_fill_page_frag_array(&cq->buf.frag_buf, pas);
+ cq->mcq.comp = mlx5vf_cq_complete;
+ cq->mcq.event = mlx5vf_cq_event;
+ err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
+ if (err)
+ goto err_vec;
+
+ mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
+ cq->mcq.cons_index);
+ kvfree(in);
+ return 0;
+
+err_vec:
+ kvfree(in);
+err_buff:
+ mlx5_frag_buf_free(mdev, &cq->buf.frag_buf);
+err_db_free:
+ mlx5_db_free(mdev, &cq->db);
+ return err;
+}
+
+static struct mlx5_vhca_qp *
+mlx5vf_create_rc_qp(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_page_tracker *tracker, u32 max_recv_wr)
+{
+ u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+ struct mlx5_vhca_qp *qp;
+ u8 log_rq_stride;
+ u8 log_rq_sz;
+ void *qpc;
+ int inlen;
+ void *in;
+ int err;
+
+ qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+ if (!qp)
+ return ERR_PTR(-ENOMEM);
+
+ qp->rq.wqe_cnt = roundup_pow_of_two(max_recv_wr);
+ log_rq_stride = ilog2(MLX5_SEND_WQE_DS);
+ log_rq_sz = ilog2(qp->rq.wqe_cnt);
+ err = mlx5_db_alloc_node(mdev, &qp->db, mdev->priv.numa_node);
+ if (err)
+ goto err_free;
+
+ if (max_recv_wr) {
+ err = mlx5_frag_buf_alloc_node(mdev,
+ wq_get_byte_sz(log_rq_sz, log_rq_stride),
+ &qp->buf, mdev->priv.numa_node);
+ if (err)
+ goto err_db_free;
+ mlx5_init_fbc(qp->buf.frags, log_rq_stride, log_rq_sz, &qp->rq.fbc);
+ }
+
+ qp->rq.db = &qp->db.db[MLX5_RCV_DBR];
+ inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
+ MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
+ qp->buf.npages;
+ in = kvzalloc(inlen, GFP_KERNEL);
+ if (!in) {
+ err = -ENOMEM;
+ goto err_in;
+ }
+
+ qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+ MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+ MLX5_SET(qpc, qpc, pd, tracker->pdn);
+ MLX5_SET(qpc, qpc, uar_page, tracker->uar->index);
+ MLX5_SET(qpc, qpc, log_page_size,
+ qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
+ MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
+ if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
+ MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
+ MLX5_SET(qpc, qpc, no_sq, 1);
+ if (max_recv_wr) {
+ MLX5_SET(qpc, qpc, cqn_rcv, tracker->cq.mcq.cqn);
+ MLX5_SET(qpc, qpc, log_rq_stride, log_rq_stride - 4);
+ MLX5_SET(qpc, qpc, log_rq_size, log_rq_sz);
+ MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
+ MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma);
+ mlx5_fill_page_frag_array(&qp->buf,
+ (__be64 *)MLX5_ADDR_OF(create_qp_in,
+ in, pas));
+ } else {
+ MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
+ }
+
+ MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+ err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+ kvfree(in);
+ if (err)
+ goto err_in;
+
+ qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+ return qp;
+
+err_in:
+ if (max_recv_wr)
+ mlx5_frag_buf_free(mdev, &qp->buf);
+err_db_free:
+ mlx5_db_free(mdev, &qp->db);
+err_free:
+ kfree(qp);
+ return ERR_PTR(err);
+}
+
+static void mlx5vf_post_recv(struct mlx5_vhca_qp *qp)
+{
+ struct mlx5_wqe_data_seg *data;
+ unsigned int ix;
+
+ WARN_ON(qp->rq.pc - qp->rq.cc >= qp->rq.wqe_cnt);
+ ix = qp->rq.pc & (qp->rq.wqe_cnt - 1);
+ data = mlx5_frag_buf_get_wqe(&qp->rq.fbc, ix);
+ data->byte_count = cpu_to_be32(qp->max_msg_size);
+ data->lkey = cpu_to_be32(qp->recv_buf.mkey);
+ data->addr = cpu_to_be64(qp->recv_buf.next_rq_offset);
+ qp->rq.pc++;
+ /* Make sure that descriptors are written before doorbell record. */
+ dma_wmb();
+ *qp->rq.db = cpu_to_be32(qp->rq.pc & 0xffff);
+}
+
+static int mlx5vf_activate_qp(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_qp *qp, u32 remote_qpn,
+ bool host_qp)
+{
+ u32 init_in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+ u32 rtr_in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+ u32 rts_in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+ void *qpc;
+ int ret;
+
+ /* Init */
+ qpc = MLX5_ADDR_OF(rst2init_qp_in, init_in, qpc);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
+ MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
+ MLX5_SET(qpc, qpc, rre, 1);
+ MLX5_SET(qpc, qpc, rwe, 1);
+ MLX5_SET(rst2init_qp_in, init_in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+ MLX5_SET(rst2init_qp_in, init_in, qpn, qp->qpn);
+ ret = mlx5_cmd_exec_in(mdev, rst2init_qp, init_in);
+ if (ret)
+ return ret;
+
+ if (host_qp) {
+ struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
+ int i;
+
+ for (i = 0; i < qp->rq.wqe_cnt; i++) {
+ mlx5vf_post_recv(qp);
+ recv_buf->next_rq_offset += qp->max_msg_size;
+ }
+ }
+
+ /* RTR */
+ qpc = MLX5_ADDR_OF(init2rtr_qp_in, rtr_in, qpc);
+ MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn);
+ MLX5_SET(qpc, qpc, mtu, IB_MTU_4096);
+ MLX5_SET(qpc, qpc, log_msg_max, MLX5_CAP_GEN(mdev, log_max_msg));
+ MLX5_SET(qpc, qpc, remote_qpn, remote_qpn);
+ MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
+ MLX5_SET(qpc, qpc, primary_address_path.fl, 1);
+ MLX5_SET(qpc, qpc, min_rnr_nak, 1);
+ MLX5_SET(init2rtr_qp_in, rtr_in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+ MLX5_SET(init2rtr_qp_in, rtr_in, qpn, qp->qpn);
+ ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, rtr_in);
+ if (ret || host_qp)
+ return ret;
+
+ /* RTS */
+ qpc = MLX5_ADDR_OF(rtr2rts_qp_in, rts_in, qpc);
+ MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn);
+ MLX5_SET(qpc, qpc, retry_count, 7);
+ MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
+ MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
+ MLX5_SET(rtr2rts_qp_in, rts_in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+ MLX5_SET(rtr2rts_qp_in, rts_in, qpn, qp->qpn);
+
+ return mlx5_cmd_exec_in(mdev, rtr2rts_qp, rts_in);
+}
+
+static void mlx5vf_destroy_qp(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_qp *qp)
+{
+ u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+ MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+ MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+ mlx5_cmd_exec_in(mdev, destroy_qp, in);
+
+ mlx5_frag_buf_free(mdev, &qp->buf);
+ mlx5_db_free(mdev, &qp->db);
+ kfree(qp);
+}
+
+static void free_recv_pages(struct mlx5_vhca_recv_buf *recv_buf)
+{
+ int i;
+
+ /* Undo alloc_pages_bulk_array() */
+ for (i = 0; i < recv_buf->npages; i++)
+ __free_page(recv_buf->page_list[i]);
+
+ kvfree(recv_buf->page_list);
+}
+
+static int alloc_recv_pages(struct mlx5_vhca_recv_buf *recv_buf,
+ unsigned int npages)
+{
+ unsigned int filled = 0, done = 0;
+ int i;
+
+ recv_buf->page_list = kvcalloc(npages, sizeof(*recv_buf->page_list),
+ GFP_KERNEL);
+ if (!recv_buf->page_list)
+ return -ENOMEM;
+
+ for (;;) {
+ filled = alloc_pages_bulk_array(GFP_KERNEL, npages - done,
+ recv_buf->page_list + done);
+ if (!filled)
+ goto err;
+
+ done += filled;
+ if (done == npages)
+ break;
+ }
+
+ recv_buf->npages = npages;
+ return 0;
+
+err:
+ for (i = 0; i < npages; i++) {
+ if (recv_buf->page_list[i])
+ __free_page(recv_buf->page_list[i]);
+ }
+
+ kvfree(recv_buf->page_list);
+ return -ENOMEM;
+}
+
+static int register_dma_recv_pages(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_recv_buf *recv_buf)
+{
+ int i, j;
+
+ recv_buf->dma_addrs = kvcalloc(recv_buf->npages,
+ sizeof(*recv_buf->dma_addrs),
+ GFP_KERNEL);
+ if (!recv_buf->dma_addrs)
+ return -ENOMEM;
+
+ for (i = 0; i < recv_buf->npages; i++) {
+ recv_buf->dma_addrs[i] = dma_map_page(mdev->device,
+ recv_buf->page_list[i],
+ 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+ if (dma_mapping_error(mdev->device, recv_buf->dma_addrs[i]))
+ goto error;
+ }
+ return 0;
+
+error:
+ for (j = 0; j < i; j++)
+ dma_unmap_single(mdev->device, recv_buf->dma_addrs[j],
+ PAGE_SIZE, DMA_FROM_DEVICE);
+
+ kvfree(recv_buf->dma_addrs);
+ return -ENOMEM;
+}
+
+static void unregister_dma_recv_pages(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_recv_buf *recv_buf)
+{
+ int i;
+
+ for (i = 0; i < recv_buf->npages; i++)
+ dma_unmap_single(mdev->device, recv_buf->dma_addrs[i],
+ PAGE_SIZE, DMA_FROM_DEVICE);
+
+ kvfree(recv_buf->dma_addrs);
+}
+
+static void mlx5vf_free_qp_recv_resources(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_qp *qp)
+{
+ struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
+
+ mlx5_core_destroy_mkey(mdev, recv_buf->mkey);
+ unregister_dma_recv_pages(mdev, recv_buf);
+ free_recv_pages(&qp->recv_buf);
+}
+
+static int mlx5vf_alloc_qp_recv_resources(struct mlx5_core_dev *mdev,
+ struct mlx5_vhca_qp *qp, u32 pdn,
+ u64 rq_size)
+{
+ unsigned int npages = DIV_ROUND_UP_ULL(rq_size, PAGE_SIZE);
+ struct mlx5_vhca_recv_buf *recv_buf = &qp->recv_buf;
+ int err;
+
+ err = alloc_recv_pages(recv_buf, npages);
+ if (err < 0)
+ return err;
+
+ err = register_dma_recv_pages(mdev, recv_buf);
+ if (err)
+ goto end;
+
+ err = _create_mkey(mdev, pdn, NULL, recv_buf, &recv_buf->mkey);
+ if (err)
+ goto err_create_mkey;
+
+ return 0;
+
+err_create_mkey:
+ unregister_dma_recv_pages(mdev, recv_buf);
+end:
+ free_recv_pages(recv_buf);
+ return err;
+}
+
+static void
+_mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev)
+{
+ struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
+ struct mlx5_core_dev *mdev = mvdev->mdev;
+
+ lockdep_assert_held(&mvdev->state_mutex);
+
+ if (!mvdev->log_active)
+ return;
+
+ WARN_ON(mvdev->mdev_detach);
+
+ mlx5_eq_notifier_unregister(mdev, &tracker->nb);
+ mlx5vf_cmd_destroy_tracker(mdev, tracker->id);
+ mlx5vf_destroy_qp(mdev, tracker->fw_qp);
+ mlx5vf_free_qp_recv_resources(mdev, tracker->host_qp);
+ mlx5vf_destroy_qp(mdev, tracker->host_qp);
+ mlx5vf_destroy_cq(mdev, &tracker->cq);
+ mlx5_core_dealloc_pd(mdev, tracker->pdn);
+ mlx5_put_uars_page(mdev, tracker->uar);
+ mvdev->log_active = false;
+}
+
+int mlx5vf_stop_page_tracker(struct vfio_device *vdev)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+
+ mutex_lock(&mvdev->state_mutex);
+ if (!mvdev->log_active)
+ goto end;
+
+ _mlx5vf_free_page_tracker_resources(mvdev);
+ mvdev->log_active = false;
+end:
+ mlx5vf_state_mutex_unlock(mvdev);
+ return 0;
+}
+
+int mlx5vf_start_page_tracker(struct vfio_device *vdev,
+ struct rb_root_cached *ranges, u32 nnodes,
+ u64 *page_size)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+ struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
+ u8 log_tracked_page = ilog2(*page_size);
+ struct mlx5_vhca_qp *host_qp;
+ struct mlx5_vhca_qp *fw_qp;
+ struct mlx5_core_dev *mdev;
+ u32 max_msg_size = PAGE_SIZE;
+ u64 rq_size = SZ_2M;
+ u32 max_recv_wr;
+ int err;
+
+ mutex_lock(&mvdev->state_mutex);
+ if (mvdev->mdev_detach) {
+ err = -ENOTCONN;
+ goto end;
+ }
+
+ if (mvdev->log_active) {
+ err = -EINVAL;
+ goto end;
+ }
+
+ mdev = mvdev->mdev;
+ memset(tracker, 0, sizeof(*tracker));
+ tracker->uar = mlx5_get_uars_page(mdev);
+ if (IS_ERR(tracker->uar)) {
+ err = PTR_ERR(tracker->uar);
+ goto end;
+ }
+
+ err = mlx5_core_alloc_pd(mdev, &tracker->pdn);
+ if (err)
+ goto err_uar;
+
+ max_recv_wr = DIV_ROUND_UP_ULL(rq_size, max_msg_size);
+ err = mlx5vf_create_cq(mdev, tracker, max_recv_wr);
+ if (err)
+ goto err_dealloc_pd;
+
+ host_qp = mlx5vf_create_rc_qp(mdev, tracker, max_recv_wr);
+ if (IS_ERR(host_qp)) {
+ err = PTR_ERR(host_qp);
+ goto err_cq;
+ }
+
+ host_qp->max_msg_size = max_msg_size;
+ if (log_tracked_page < MLX5_CAP_ADV_VIRTUALIZATION(mdev,
+ pg_track_log_min_page_size)) {
+ log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev,
+ pg_track_log_min_page_size);
+ } else if (log_tracked_page > MLX5_CAP_ADV_VIRTUALIZATION(mdev,
+ pg_track_log_max_page_size)) {
+ log_tracked_page = MLX5_CAP_ADV_VIRTUALIZATION(mdev,
+ pg_track_log_max_page_size);
+ }
+
+ host_qp->tracked_page_size = (1ULL << log_tracked_page);
+ err = mlx5vf_alloc_qp_recv_resources(mdev, host_qp, tracker->pdn,
+ rq_size);
+ if (err)
+ goto err_host_qp;
+
+ fw_qp = mlx5vf_create_rc_qp(mdev, tracker, 0);
+ if (IS_ERR(fw_qp)) {
+ err = PTR_ERR(fw_qp);
+ goto err_recv_resources;
+ }
+
+ err = mlx5vf_activate_qp(mdev, host_qp, fw_qp->qpn, true);
+ if (err)
+ goto err_activate;
+
+ err = mlx5vf_activate_qp(mdev, fw_qp, host_qp->qpn, false);
+ if (err)
+ goto err_activate;
+
+ tracker->host_qp = host_qp;
+ tracker->fw_qp = fw_qp;
+ err = mlx5vf_create_tracker(mdev, mvdev, ranges, nnodes);
+ if (err)
+ goto err_activate;
+
+ MLX5_NB_INIT(&tracker->nb, mlx5vf_event_notifier, NOTIFY_ANY);
+ mlx5_eq_notifier_register(mdev, &tracker->nb);
+ *page_size = host_qp->tracked_page_size;
+ mvdev->log_active = true;
+ mlx5vf_state_mutex_unlock(mvdev);
+ return 0;
+
+err_activate:
+ mlx5vf_destroy_qp(mdev, fw_qp);
+err_recv_resources:
+ mlx5vf_free_qp_recv_resources(mdev, host_qp);
+err_host_qp:
+ mlx5vf_destroy_qp(mdev, host_qp);
+err_cq:
+ mlx5vf_destroy_cq(mdev, &tracker->cq);
+err_dealloc_pd:
+ mlx5_core_dealloc_pd(mdev, tracker->pdn);
+err_uar:
+ mlx5_put_uars_page(mdev, tracker->uar);
+end:
+ mlx5vf_state_mutex_unlock(mvdev);
+ return err;
+}
+
+static void
+set_report_output(u32 size, int index, struct mlx5_vhca_qp *qp,
+ struct iova_bitmap *dirty)
+{
+ u32 entry_size = MLX5_ST_SZ_BYTES(page_track_report_entry);
+ u32 nent = size / entry_size;
+ struct page *page;
+ u64 addr;
+ u64 *buf;
+ int i;
+
+ if (WARN_ON(index >= qp->recv_buf.npages ||
+ (nent > qp->max_msg_size / entry_size)))
+ return;
+
+ page = qp->recv_buf.page_list[index];
+ buf = kmap_local_page(page);
+ for (i = 0; i < nent; i++) {
+ addr = MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_low);
+ addr |= (u64)MLX5_GET(page_track_report_entry, buf + i,
+ dirty_address_high) << 32;
+ iova_bitmap_set(dirty, addr, qp->tracked_page_size);
+ }
+ kunmap_local(buf);
+}
+
+static void
+mlx5vf_rq_cqe(struct mlx5_vhca_qp *qp, struct mlx5_cqe64 *cqe,
+ struct iova_bitmap *dirty, int *tracker_status)
+{
+ u32 size;
+ int ix;
+
+ qp->rq.cc++;
+ *tracker_status = be32_to_cpu(cqe->immediate) >> 28;
+ size = be32_to_cpu(cqe->byte_cnt);
+ ix = be16_to_cpu(cqe->wqe_counter) & (qp->rq.wqe_cnt - 1);
+
+ /* zero length CQE, no data */
+ WARN_ON(!size && *tracker_status == MLX5_PAGE_TRACK_STATE_REPORTING);
+ if (size)
+ set_report_output(size, ix, qp, dirty);
+
+ qp->recv_buf.next_rq_offset = ix * qp->max_msg_size;
+ mlx5vf_post_recv(qp);
+}
+
+static void *get_cqe(struct mlx5_vhca_cq *cq, int n)
+{
+ return mlx5_frag_buf_get_wqe(&cq->buf.fbc, n);
+}
+
+static struct mlx5_cqe64 *get_sw_cqe(struct mlx5_vhca_cq *cq, int n)
+{
+ void *cqe = get_cqe(cq, n & (cq->ncqe - 1));
+ struct mlx5_cqe64 *cqe64;
+
+ cqe64 = (cq->mcq.cqe_sz == 64) ? cqe : cqe + 64;
+
+ if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
+ !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & (cq->ncqe)))) {
+ return cqe64;
+ } else {
+ return NULL;
+ }
+}
+
+static int
+mlx5vf_cq_poll_one(struct mlx5_vhca_cq *cq, struct mlx5_vhca_qp *qp,
+ struct iova_bitmap *dirty, int *tracker_status)
+{
+ struct mlx5_cqe64 *cqe;
+ u8 opcode;
+
+ cqe = get_sw_cqe(cq, cq->mcq.cons_index);
+ if (!cqe)
+ return CQ_EMPTY;
+
+ ++cq->mcq.cons_index;
+ /*
+ * Make sure we read CQ entry contents after we've checked the
+ * ownership bit.
+ */
+ rmb();
+ opcode = get_cqe_opcode(cqe);
+ switch (opcode) {
+ case MLX5_CQE_RESP_SEND_IMM:
+ mlx5vf_rq_cqe(qp, cqe, dirty, tracker_status);
+ return CQ_OK;
+ default:
+ return CQ_POLL_ERR;
+ }
+}
+
+int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
+ unsigned long length,
+ struct iova_bitmap *dirty)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(
+ vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+ struct mlx5_vhca_page_tracker *tracker = &mvdev->tracker;
+ struct mlx5_vhca_cq *cq = &tracker->cq;
+ struct mlx5_core_dev *mdev;
+ int poll_err, err;
+
+ mutex_lock(&mvdev->state_mutex);
+ if (!mvdev->log_active) {
+ err = -EINVAL;
+ goto end;
+ }
+
+ if (mvdev->mdev_detach) {
+ err = -ENOTCONN;
+ goto end;
+ }
+
+ mdev = mvdev->mdev;
+ err = mlx5vf_cmd_modify_tracker(mdev, tracker->id, iova, length,
+ MLX5_PAGE_TRACK_STATE_REPORTING);
+ if (err)
+ goto end;
+
+ tracker->status = MLX5_PAGE_TRACK_STATE_REPORTING;
+ while (tracker->status == MLX5_PAGE_TRACK_STATE_REPORTING &&
+ !tracker->is_err) {
+ poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp, dirty,
+ &tracker->status);
+ if (poll_err == CQ_EMPTY) {
+ mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, tracker->uar->map,
+ cq->mcq.cons_index);
+ poll_err = mlx5vf_cq_poll_one(cq, tracker->host_qp,
+ dirty, &tracker->status);
+ if (poll_err == CQ_EMPTY) {
+ wait_for_completion(&mvdev->tracker_comp);
+ continue;
+ }
+ }
+ if (poll_err == CQ_POLL_ERR) {
+ err = -EIO;
+ goto end;
+ }
+ mlx5_cq_set_ci(&cq->mcq);
+ }
+
+ if (tracker->status == MLX5_PAGE_TRACK_STATE_ERROR)
+ tracker->is_err = true;
+
+ if (tracker->is_err)
+ err = -EIO;
+end:
+ mlx5vf_state_mutex_unlock(mvdev);
+ return err;
+}
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 8208f4701a90..921d5720a1e5 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -9,6 +9,8 @@
#include <linux/kernel.h>
#include <linux/vfio_pci_core.h>
#include <linux/mlx5/driver.h>
+#include <linux/mlx5/cq.h>
+#include <linux/mlx5/qp.h>
struct mlx5vf_async_data {
struct mlx5_async_work cb_work;
@@ -39,6 +41,56 @@ struct mlx5_vf_migration_file {
struct mlx5vf_async_data async_data;
};
+struct mlx5_vhca_cq_buf {
+ struct mlx5_frag_buf_ctrl fbc;
+ struct mlx5_frag_buf frag_buf;
+ int cqe_size;
+ int nent;
+};
+
+struct mlx5_vhca_cq {
+ struct mlx5_vhca_cq_buf buf;
+ struct mlx5_db db;
+ struct mlx5_core_cq mcq;
+ size_t ncqe;
+};
+
+struct mlx5_vhca_recv_buf {
+ u32 npages;
+ struct page **page_list;
+ dma_addr_t *dma_addrs;
+ u32 next_rq_offset;
+ u32 mkey;
+};
+
+struct mlx5_vhca_qp {
+ struct mlx5_frag_buf buf;
+ struct mlx5_db db;
+ struct mlx5_vhca_recv_buf recv_buf;
+ u32 tracked_page_size;
+ u32 max_msg_size;
+ u32 qpn;
+ struct {
+ unsigned int pc;
+ unsigned int cc;
+ unsigned int wqe_cnt;
+ __be32 *db;
+ struct mlx5_frag_buf_ctrl fbc;
+ } rq;
+};
+
+struct mlx5_vhca_page_tracker {
+ u32 id;
+ u32 pdn;
+ u8 is_err:1;
+ struct mlx5_uars_page *uar;
+ struct mlx5_vhca_cq cq;
+ struct mlx5_vhca_qp *host_qp;
+ struct mlx5_vhca_qp *fw_qp;
+ struct mlx5_nb nb;
+ int status;
+};
+
struct mlx5vf_pci_core_device {
struct vfio_pci_core_device core_device;
int vf_id;
@@ -46,6 +98,8 @@ struct mlx5vf_pci_core_device {
u8 migrate_cap:1;
u8 deferred_reset:1;
u8 mdev_detach:1;
+ u8 log_active:1;
+ struct completion tracker_comp;
/* protect migration state */
struct mutex state_mutex;
enum vfio_device_mig_state mig_state;
@@ -53,6 +107,7 @@ struct mlx5vf_pci_core_device {
spinlock_t reset_lock;
struct mlx5_vf_migration_file *resuming_migf;
struct mlx5_vf_migration_file *saving_migf;
+ struct mlx5_vhca_page_tracker tracker;
struct workqueue_struct *cb_wq;
struct notifier_block nb;
struct mlx5_core_dev *mdev;
@@ -63,7 +118,8 @@ int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
size_t *state_size);
void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
- const struct vfio_migration_ops *mig_ops);
+ const struct vfio_migration_ops *mig_ops,
+ const struct vfio_log_ops *log_ops);
void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev);
int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
@@ -73,4 +129,9 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
+int mlx5vf_start_page_tracker(struct vfio_device *vdev,
+ struct rb_root_cached *ranges, u32 nnodes, u64 *page_size);
+int mlx5vf_stop_page_tracker(struct vfio_device *vdev);
+int mlx5vf_tracker_read_and_clear(struct vfio_device *vdev, unsigned long iova,
+ unsigned long length, struct iova_bitmap *dirty);
#endif /* MLX5_VFIO_CMD_H */
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index a9b63d15c5d3..fd6ccb8454a2 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -579,8 +579,41 @@ static const struct vfio_migration_ops mlx5vf_pci_mig_ops = {
.migration_get_state = mlx5vf_pci_get_device_state,
};
+static const struct vfio_log_ops mlx5vf_pci_log_ops = {
+ .log_start = mlx5vf_start_page_tracker,
+ .log_stop = mlx5vf_stop_page_tracker,
+ .log_read_and_clear = mlx5vf_tracker_read_and_clear,
+};
+
+static int mlx5vf_pci_init_dev(struct vfio_device *core_vdev)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(core_vdev,
+ struct mlx5vf_pci_core_device, core_device.vdev);
+ int ret;
+
+ ret = vfio_pci_core_init_dev(core_vdev);
+ if (ret)
+ return ret;
+
+ mlx5vf_cmd_set_migratable(mvdev, &mlx5vf_pci_mig_ops,
+ &mlx5vf_pci_log_ops);
+
+ return 0;
+}
+
+static void mlx5vf_pci_release_dev(struct vfio_device *core_vdev)
+{
+ struct mlx5vf_pci_core_device *mvdev = container_of(core_vdev,
+ struct mlx5vf_pci_core_device, core_device.vdev);
+
+ mlx5vf_cmd_remove_migratable(mvdev);
+ vfio_pci_core_release_dev(core_vdev);
+}
+
static const struct vfio_device_ops mlx5vf_pci_ops = {
.name = "mlx5-vfio-pci",
+ .init = mlx5vf_pci_init_dev,
+ .release = mlx5vf_pci_release_dev,
.open_device = mlx5vf_pci_open_device,
.close_device = mlx5vf_pci_close_device,
.ioctl = vfio_pci_core_ioctl,
@@ -598,21 +631,19 @@ static int mlx5vf_pci_probe(struct pci_dev *pdev,
struct mlx5vf_pci_core_device *mvdev;
int ret;
- mvdev = kzalloc(sizeof(*mvdev), GFP_KERNEL);
- if (!mvdev)
- return -ENOMEM;
- vfio_pci_core_init_device(&mvdev->core_device, pdev, &mlx5vf_pci_ops);
- mlx5vf_cmd_set_migratable(mvdev, &mlx5vf_pci_mig_ops);
+ mvdev = vfio_alloc_device(mlx5vf_pci_core_device, core_device.vdev,
+ &pdev->dev, &mlx5vf_pci_ops);
+ if (IS_ERR(mvdev))
+ return PTR_ERR(mvdev);
+
dev_set_drvdata(&pdev->dev, &mvdev->core_device);
ret = vfio_pci_core_register_device(&mvdev->core_device);
if (ret)
- goto out_free;
+ goto out_put_vdev;
return 0;
-out_free:
- mlx5vf_cmd_remove_migratable(mvdev);
- vfio_pci_core_uninit_device(&mvdev->core_device);
- kfree(mvdev);
+out_put_vdev:
+ vfio_put_device(&mvdev->core_device.vdev);
return ret;
}
@@ -621,9 +652,7 @@ static void mlx5vf_pci_remove(struct pci_dev *pdev)
struct mlx5vf_pci_core_device *mvdev = mlx5vf_drvdata(pdev);
vfio_pci_core_unregister_device(&mvdev->core_device);
- mlx5vf_cmd_remove_migratable(mvdev);
- vfio_pci_core_uninit_device(&mvdev->core_device);
- kfree(mvdev);
+ vfio_put_device(&mvdev->core_device.vdev);
}
static const struct pci_device_id mlx5vf_pci_table[] = {
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 4d1a97415a27..1d4919edfbde 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -25,7 +25,7 @@
#include <linux/types.h>
#include <linux/uaccess.h>
-#include <linux/vfio_pci_core.h>
+#include "vfio_pci_priv.h"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "VFIO PCI - User Level meta-driver"
@@ -127,6 +127,8 @@ static int vfio_pci_open_device(struct vfio_device *core_vdev)
static const struct vfio_device_ops vfio_pci_ops = {
.name = "vfio-pci",
+ .init = vfio_pci_core_init_dev,
+ .release = vfio_pci_core_release_dev,
.open_device = vfio_pci_open_device,
.close_device = vfio_pci_core_close_device,
.ioctl = vfio_pci_core_ioctl,
@@ -146,20 +148,19 @@ static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (vfio_pci_is_denylisted(pdev))
return -EINVAL;
- vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
- if (!vdev)
- return -ENOMEM;
- vfio_pci_core_init_device(vdev, pdev, &vfio_pci_ops);
+ vdev = vfio_alloc_device(vfio_pci_core_device, vdev, &pdev->dev,
+ &vfio_pci_ops);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
dev_set_drvdata(&pdev->dev, vdev);
ret = vfio_pci_core_register_device(vdev);
if (ret)
- goto out_free;
+ goto out_put_vdev;
return 0;
-out_free:
- vfio_pci_core_uninit_device(vdev);
- kfree(vdev);
+out_put_vdev:
+ vfio_put_device(&vdev->vdev);
return ret;
}
@@ -168,8 +169,7 @@ static void vfio_pci_remove(struct pci_dev *pdev)
struct vfio_pci_core_device *vdev = dev_get_drvdata(&pdev->dev);
vfio_pci_core_unregister_device(vdev);
- vfio_pci_core_uninit_device(vdev);
- kfree(vdev);
+ vfio_put_device(&vdev->vdev);
}
static int vfio_pci_sriov_configure(struct pci_dev *pdev, int nr_virtfn)
diff --git a/drivers/vfio/pci/vfio_pci_config.c b/drivers/vfio/pci/vfio_pci_config.c
index 442d3ba4122b..4a350421c5f6 100644
--- a/drivers/vfio/pci/vfio_pci_config.c
+++ b/drivers/vfio/pci/vfio_pci_config.c
@@ -26,7 +26,7 @@
#include <linux/vfio.h>
#include <linux/slab.h>
-#include <linux/vfio_pci_core.h>
+#include "vfio_pci_priv.h"
/* Fake capability ID for standard config space */
#define PCI_CAP_ID_BASIC 0
@@ -1166,7 +1166,7 @@ static int vfio_msi_config_write(struct vfio_pci_core_device *vdev, int pos,
flags = le16_to_cpu(*pflags);
/* MSI is enabled via ioctl */
- if (!is_msi(vdev))
+ if (vdev->irq_type != VFIO_PCI_MSI_IRQ_INDEX)
flags &= ~PCI_MSI_FLAGS_ENABLE;
/* Check queue size */
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index c8d3b0450fb3..badc9d828cac 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -28,7 +28,7 @@
#include <linux/nospec.h>
#include <linux/sched/mm.h>
-#include <linux/vfio_pci_core.h>
+#include "vfio_pci_priv.h"
#define DRIVER_AUTHOR "Alex Williamson <alex.williamson@redhat.com>"
#define DRIVER_DESC "core driver for VFIO based PCI devices"
@@ -41,6 +41,23 @@ static bool disable_idle_d3;
static DEFINE_MUTEX(vfio_pci_sriov_pfs_mutex);
static LIST_HEAD(vfio_pci_sriov_pfs);
+struct vfio_pci_dummy_resource {
+ struct resource resource;
+ int index;
+ struct list_head res_next;
+};
+
+struct vfio_pci_vf_token {
+ struct mutex lock;
+ uuid_t uuid;
+ int users;
+};
+
+struct vfio_pci_mmap_vma {
+ struct vm_area_struct *vma;
+ struct list_head vma_next;
+};
+
static inline bool vfio_vga_disabled(void)
{
#ifdef CONFIG_VFIO_PCI_VGA
@@ -260,16 +277,189 @@ int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev, pci_power_t stat
return ret;
}
+static int vfio_pci_runtime_pm_entry(struct vfio_pci_core_device *vdev,
+ struct eventfd_ctx *efdctx)
+{
+ /*
+ * The vdev power related flags are protected with 'memory_lock'
+ * semaphore.
+ */
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
+ if (vdev->pm_runtime_engaged) {
+ up_write(&vdev->memory_lock);
+ return -EINVAL;
+ }
+
+ vdev->pm_runtime_engaged = true;
+ vdev->pm_wake_eventfd_ctx = efdctx;
+ pm_runtime_put_noidle(&vdev->pdev->dev);
+ up_write(&vdev->memory_lock);
+
+ return 0;
+}
+
+static int vfio_pci_core_pm_entry(struct vfio_device *device, u32 flags,
+ void __user *arg, size_t argsz)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+ int ret;
+
+ ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
+ if (ret != 1)
+ return ret;
+
+ /*
+ * Inside vfio_pci_runtime_pm_entry(), only the runtime PM usage count
+ * will be decremented. The pm_runtime_put() will be invoked again
+ * while returning from the ioctl and then the device can go into
+ * runtime suspended state.
+ */
+ return vfio_pci_runtime_pm_entry(vdev, NULL);
+}
+
+static int vfio_pci_core_pm_entry_with_wakeup(
+ struct vfio_device *device, u32 flags,
+ struct vfio_device_low_power_entry_with_wakeup __user *arg,
+ size_t argsz)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+ struct vfio_device_low_power_entry_with_wakeup entry;
+ struct eventfd_ctx *efdctx;
+ int ret;
+
+ ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET,
+ sizeof(entry));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&entry, arg, sizeof(entry)))
+ return -EFAULT;
+
+ if (entry.wakeup_eventfd < 0)
+ return -EINVAL;
+
+ efdctx = eventfd_ctx_fdget(entry.wakeup_eventfd);
+ if (IS_ERR(efdctx))
+ return PTR_ERR(efdctx);
+
+ ret = vfio_pci_runtime_pm_entry(vdev, efdctx);
+ if (ret)
+ eventfd_ctx_put(efdctx);
+
+ return ret;
+}
+
+static void __vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev)
+{
+ if (vdev->pm_runtime_engaged) {
+ vdev->pm_runtime_engaged = false;
+ pm_runtime_get_noresume(&vdev->pdev->dev);
+
+ if (vdev->pm_wake_eventfd_ctx) {
+ eventfd_ctx_put(vdev->pm_wake_eventfd_ctx);
+ vdev->pm_wake_eventfd_ctx = NULL;
+ }
+ }
+}
+
+static void vfio_pci_runtime_pm_exit(struct vfio_pci_core_device *vdev)
+{
+ /*
+ * The vdev power related flags are protected with 'memory_lock'
+ * semaphore.
+ */
+ down_write(&vdev->memory_lock);
+ __vfio_pci_runtime_pm_exit(vdev);
+ up_write(&vdev->memory_lock);
+}
+
+static int vfio_pci_core_pm_exit(struct vfio_device *device, u32 flags,
+ void __user *arg, size_t argsz)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(device, struct vfio_pci_core_device, vdev);
+ int ret;
+
+ ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_SET, 0);
+ if (ret != 1)
+ return ret;
+
+ /*
+ * The device is always in the active state here due to pm wrappers
+ * around ioctls. If the device had entered a low power state and
+ * pm_wake_eventfd_ctx is valid, vfio_pci_core_runtime_resume() has
+ * already signaled the eventfd and exited low power mode itself.
+ * pm_runtime_engaged protects the redundant call here.
+ */
+ vfio_pci_runtime_pm_exit(vdev);
+ return 0;
+}
+
+#ifdef CONFIG_PM
+static int vfio_pci_core_runtime_suspend(struct device *dev)
+{
+ struct vfio_pci_core_device *vdev = dev_get_drvdata(dev);
+
+ down_write(&vdev->memory_lock);
+ /*
+ * The user can move the device into D3hot state before invoking
+ * power management IOCTL. Move the device into D0 state here and then
+ * the pci-driver core runtime PM suspend function will move the device
+ * into the low power state. Also, for the devices which have
+ * NoSoftRst-, it will help in restoring the original state
+ * (saved locally in 'vdev->pm_save').
+ */
+ vfio_pci_set_power_state(vdev, PCI_D0);
+ up_write(&vdev->memory_lock);
+
+ /*
+ * If INTx is enabled, then mask INTx before going into the runtime
+ * suspended state and unmask the same in the runtime resume.
+ * If INTx has already been masked by the user, then
+ * vfio_pci_intx_mask() will return false and in that case, INTx
+ * should not be unmasked in the runtime resume.
+ */
+ vdev->pm_intx_masked = ((vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX) &&
+ vfio_pci_intx_mask(vdev));
+
+ return 0;
+}
+
+static int vfio_pci_core_runtime_resume(struct device *dev)
+{
+ struct vfio_pci_core_device *vdev = dev_get_drvdata(dev);
+
+ /*
+ * Resume with a pm_wake_eventfd_ctx signals the eventfd and exit
+ * low power mode.
+ */
+ down_write(&vdev->memory_lock);
+ if (vdev->pm_wake_eventfd_ctx) {
+ eventfd_signal(vdev->pm_wake_eventfd_ctx, 1);
+ __vfio_pci_runtime_pm_exit(vdev);
+ }
+ up_write(&vdev->memory_lock);
+
+ if (vdev->pm_intx_masked)
+ vfio_pci_intx_unmask(vdev);
+
+ return 0;
+}
+#endif /* CONFIG_PM */
+
/*
- * The dev_pm_ops needs to be provided to make pci-driver runtime PM working,
- * so use structure without any callbacks.
- *
* The pci-driver core runtime PM routines always save the device state
* before going into suspended state. If the device is going into low power
* state with only with runtime PM ops, then no explicit handling is needed
* for the devices which have NoSoftRst-.
*/
-static const struct dev_pm_ops vfio_pci_core_pm_ops = { };
+static const struct dev_pm_ops vfio_pci_core_pm_ops = {
+ SET_RUNTIME_PM_OPS(vfio_pci_core_runtime_suspend,
+ vfio_pci_core_runtime_resume,
+ NULL)
+};
int vfio_pci_core_enable(struct vfio_pci_core_device *vdev)
{
@@ -371,6 +561,18 @@ void vfio_pci_core_disable(struct vfio_pci_core_device *vdev)
/*
* This function can be invoked while the power state is non-D0.
+ * This non-D0 power state can be with or without runtime PM.
+ * vfio_pci_runtime_pm_exit() will internally increment the usage
+ * count corresponding to pm_runtime_put() called during low power
+ * feature entry and then pm_runtime_resume() will wake up the device,
+ * if the device has already gone into the suspended state. Otherwise,
+ * the vfio_pci_set_power_state() will change the device power state
+ * to D0.
+ */
+ vfio_pci_runtime_pm_exit(vdev);
+ pm_runtime_resume(&pdev->dev);
+
+ /*
* This function calls __pci_reset_function_locked() which internally
* can use pci_pm_reset() for the function reset. pci_pm_reset() will
* fail if the power state is non-D0. Also, for the devices which
@@ -645,10 +847,10 @@ static int msix_mmappable_cap(struct vfio_pci_core_device *vdev,
return vfio_info_add_capability(caps, &header, sizeof(header));
}
-int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev,
- unsigned int type, unsigned int subtype,
- const struct vfio_pci_regops *ops,
- size_t size, u32 flags, void *data)
+int vfio_pci_core_register_dev_region(struct vfio_pci_core_device *vdev,
+ unsigned int type, unsigned int subtype,
+ const struct vfio_pci_regops *ops,
+ size_t size, u32 flags, void *data)
{
struct vfio_pci_region *region;
@@ -670,508 +872,532 @@ int vfio_pci_register_dev_region(struct vfio_pci_core_device *vdev,
return 0;
}
-EXPORT_SYMBOL_GPL(vfio_pci_register_dev_region);
+EXPORT_SYMBOL_GPL(vfio_pci_core_register_dev_region);
-long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
- unsigned long arg)
+static int vfio_pci_ioctl_get_info(struct vfio_pci_core_device *vdev,
+ struct vfio_device_info __user *arg)
{
- struct vfio_pci_core_device *vdev =
- container_of(core_vdev, struct vfio_pci_core_device, vdev);
- unsigned long minsz;
-
- if (cmd == VFIO_DEVICE_GET_INFO) {
- struct vfio_device_info info;
- struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
- unsigned long capsz;
- int ret;
-
- minsz = offsetofend(struct vfio_device_info, num_irqs);
+ unsigned long minsz = offsetofend(struct vfio_device_info, num_irqs);
+ struct vfio_device_info info;
+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+ unsigned long capsz;
+ int ret;
- /* For backward compatibility, cannot require this */
- capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
+ /* For backward compatibility, cannot require this */
+ capsz = offsetofend(struct vfio_iommu_type1_info, cap_offset);
- if (copy_from_user(&info, (void __user *)arg, minsz))
- return -EFAULT;
+ if (copy_from_user(&info, arg, minsz))
+ return -EFAULT;
- if (info.argsz < minsz)
- return -EINVAL;
+ if (info.argsz < minsz)
+ return -EINVAL;
- if (info.argsz >= capsz) {
- minsz = capsz;
- info.cap_offset = 0;
- }
+ if (info.argsz >= capsz) {
+ minsz = capsz;
+ info.cap_offset = 0;
+ }
- info.flags = VFIO_DEVICE_FLAGS_PCI;
+ info.flags = VFIO_DEVICE_FLAGS_PCI;
- if (vdev->reset_works)
- info.flags |= VFIO_DEVICE_FLAGS_RESET;
+ if (vdev->reset_works)
+ info.flags |= VFIO_DEVICE_FLAGS_RESET;
- info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
- info.num_irqs = VFIO_PCI_NUM_IRQS;
+ info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
+ info.num_irqs = VFIO_PCI_NUM_IRQS;
- ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
- if (ret && ret != -ENODEV) {
- pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
- return ret;
- }
+ ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
+ if (ret && ret != -ENODEV) {
+ pci_warn(vdev->pdev,
+ "Failed to setup zPCI info capabilities\n");
+ return ret;
+ }
- if (caps.size) {
- info.flags |= VFIO_DEVICE_FLAGS_CAPS;
- if (info.argsz < sizeof(info) + caps.size) {
- info.argsz = sizeof(info) + caps.size;
- } else {
- vfio_info_cap_shift(&caps, sizeof(info));
- if (copy_to_user((void __user *)arg +
- sizeof(info), caps.buf,
- caps.size)) {
- kfree(caps.buf);
- return -EFAULT;
- }
- info.cap_offset = sizeof(info);
+ if (caps.size) {
+ info.flags |= VFIO_DEVICE_FLAGS_CAPS;
+ if (info.argsz < sizeof(info) + caps.size) {
+ info.argsz = sizeof(info) + caps.size;
+ } else {
+ vfio_info_cap_shift(&caps, sizeof(info));
+ if (copy_to_user(arg + 1, caps.buf, caps.size)) {
+ kfree(caps.buf);
+ return -EFAULT;
}
-
- kfree(caps.buf);
+ info.cap_offset = sizeof(*arg);
}
- return copy_to_user((void __user *)arg, &info, minsz) ?
- -EFAULT : 0;
+ kfree(caps.buf);
+ }
- } else if (cmd == VFIO_DEVICE_GET_REGION_INFO) {
- struct pci_dev *pdev = vdev->pdev;
- struct vfio_region_info info;
- struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
- int i, ret;
+ return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
- minsz = offsetofend(struct vfio_region_info, offset);
+static int vfio_pci_ioctl_get_region_info(struct vfio_pci_core_device *vdev,
+ struct vfio_region_info __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_region_info, offset);
+ struct pci_dev *pdev = vdev->pdev;
+ struct vfio_region_info info;
+ struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
+ int i, ret;
- if (copy_from_user(&info, (void __user *)arg, minsz))
- return -EFAULT;
+ if (copy_from_user(&info, arg, minsz))
+ return -EFAULT;
- if (info.argsz < minsz)
- return -EINVAL;
+ if (info.argsz < minsz)
+ return -EINVAL;
- switch (info.index) {
- case VFIO_PCI_CONFIG_REGION_INDEX:
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = pdev->cfg_size;
- info.flags = VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE;
+ switch (info.index) {
+ case VFIO_PCI_CONFIG_REGION_INDEX:
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = pdev->cfg_size;
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
+ break;
+ case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = pci_resource_len(pdev, info.index);
+ if (!info.size) {
+ info.flags = 0;
break;
- case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = pci_resource_len(pdev, info.index);
- if (!info.size) {
- info.flags = 0;
- break;
- }
+ }
- info.flags = VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE;
- if (vdev->bar_mmap_supported[info.index]) {
- info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
- if (info.index == vdev->msix_bar) {
- ret = msix_mmappable_cap(vdev, &caps);
- if (ret)
- return ret;
- }
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
+ if (vdev->bar_mmap_supported[info.index]) {
+ info.flags |= VFIO_REGION_INFO_FLAG_MMAP;
+ if (info.index == vdev->msix_bar) {
+ ret = msix_mmappable_cap(vdev, &caps);
+ if (ret)
+ return ret;
}
+ }
- break;
- case VFIO_PCI_ROM_REGION_INDEX:
- {
- void __iomem *io;
- size_t size;
- u16 cmd;
-
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.flags = 0;
+ break;
+ case VFIO_PCI_ROM_REGION_INDEX: {
+ void __iomem *io;
+ size_t size;
+ u16 cmd;
+
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.flags = 0;
+
+ /* Report the BAR size, not the ROM size */
+ info.size = pci_resource_len(pdev, info.index);
+ if (!info.size) {
+ /* Shadow ROMs appear as PCI option ROMs */
+ if (pdev->resource[PCI_ROM_RESOURCE].flags &
+ IORESOURCE_ROM_SHADOW)
+ info.size = 0x20000;
+ else
+ break;
+ }
- /* Report the BAR size, not the ROM size */
- info.size = pci_resource_len(pdev, info.index);
- if (!info.size) {
- /* Shadow ROMs appear as PCI option ROMs */
- if (pdev->resource[PCI_ROM_RESOURCE].flags &
- IORESOURCE_ROM_SHADOW)
- info.size = 0x20000;
- else
- break;
- }
+ /*
+ * Is it really there? Enable memory decode for implicit access
+ * in pci_map_rom().
+ */
+ cmd = vfio_pci_memory_lock_and_enable(vdev);
+ io = pci_map_rom(pdev, &size);
+ if (io) {
+ info.flags = VFIO_REGION_INFO_FLAG_READ;
+ pci_unmap_rom(pdev, io);
+ } else {
+ info.size = 0;
+ }
+ vfio_pci_memory_unlock_and_restore(vdev, cmd);
- /*
- * Is it really there? Enable memory decode for
- * implicit access in pci_map_rom().
- */
- cmd = vfio_pci_memory_lock_and_enable(vdev);
- io = pci_map_rom(pdev, &size);
- if (io) {
- info.flags = VFIO_REGION_INFO_FLAG_READ;
- pci_unmap_rom(pdev, io);
- } else {
- info.size = 0;
- }
- vfio_pci_memory_unlock_and_restore(vdev, cmd);
+ break;
+ }
+ case VFIO_PCI_VGA_REGION_INDEX:
+ if (!vdev->has_vga)
+ return -EINVAL;
- break;
- }
- case VFIO_PCI_VGA_REGION_INDEX:
- if (!vdev->has_vga)
- return -EINVAL;
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = 0xc0000;
+ info.flags = VFIO_REGION_INFO_FLAG_READ |
+ VFIO_REGION_INFO_FLAG_WRITE;
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = 0xc0000;
- info.flags = VFIO_REGION_INFO_FLAG_READ |
- VFIO_REGION_INFO_FLAG_WRITE;
+ break;
+ default: {
+ struct vfio_region_info_cap_type cap_type = {
+ .header.id = VFIO_REGION_INFO_CAP_TYPE,
+ .header.version = 1
+ };
- break;
- default:
- {
- struct vfio_region_info_cap_type cap_type = {
- .header.id = VFIO_REGION_INFO_CAP_TYPE,
- .header.version = 1 };
+ if (info.index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
+ return -EINVAL;
+ info.index = array_index_nospec(
+ info.index, VFIO_PCI_NUM_REGIONS + vdev->num_regions);
- if (info.index >=
- VFIO_PCI_NUM_REGIONS + vdev->num_regions)
- return -EINVAL;
- info.index = array_index_nospec(info.index,
- VFIO_PCI_NUM_REGIONS +
- vdev->num_regions);
+ i = info.index - VFIO_PCI_NUM_REGIONS;
- i = info.index - VFIO_PCI_NUM_REGIONS;
+ info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+ info.size = vdev->region[i].size;
+ info.flags = vdev->region[i].flags;
- info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
- info.size = vdev->region[i].size;
- info.flags = vdev->region[i].flags;
+ cap_type.type = vdev->region[i].type;
+ cap_type.subtype = vdev->region[i].subtype;
- cap_type.type = vdev->region[i].type;
- cap_type.subtype = vdev->region[i].subtype;
+ ret = vfio_info_add_capability(&caps, &cap_type.header,
+ sizeof(cap_type));
+ if (ret)
+ return ret;
- ret = vfio_info_add_capability(&caps, &cap_type.header,
- sizeof(cap_type));
+ if (vdev->region[i].ops->add_capability) {
+ ret = vdev->region[i].ops->add_capability(
+ vdev, &vdev->region[i], &caps);
if (ret)
return ret;
-
- if (vdev->region[i].ops->add_capability) {
- ret = vdev->region[i].ops->add_capability(vdev,
- &vdev->region[i], &caps);
- if (ret)
- return ret;
- }
- }
}
+ }
+ }
- if (caps.size) {
- info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
- if (info.argsz < sizeof(info) + caps.size) {
- info.argsz = sizeof(info) + caps.size;
- info.cap_offset = 0;
- } else {
- vfio_info_cap_shift(&caps, sizeof(info));
- if (copy_to_user((void __user *)arg +
- sizeof(info), caps.buf,
- caps.size)) {
- kfree(caps.buf);
- return -EFAULT;
- }
- info.cap_offset = sizeof(info);
+ if (caps.size) {
+ info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
+ if (info.argsz < sizeof(info) + caps.size) {
+ info.argsz = sizeof(info) + caps.size;
+ info.cap_offset = 0;
+ } else {
+ vfio_info_cap_shift(&caps, sizeof(info));
+ if (copy_to_user(arg + 1, caps.buf, caps.size)) {
+ kfree(caps.buf);
+ return -EFAULT;
}
-
- kfree(caps.buf);
+ info.cap_offset = sizeof(*arg);
}
- return copy_to_user((void __user *)arg, &info, minsz) ?
- -EFAULT : 0;
+ kfree(caps.buf);
+ }
- } else if (cmd == VFIO_DEVICE_GET_IRQ_INFO) {
- struct vfio_irq_info info;
+ return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
- minsz = offsetofend(struct vfio_irq_info, count);
+static int vfio_pci_ioctl_get_irq_info(struct vfio_pci_core_device *vdev,
+ struct vfio_irq_info __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_irq_info, count);
+ struct vfio_irq_info info;
- if (copy_from_user(&info, (void __user *)arg, minsz))
- return -EFAULT;
+ if (copy_from_user(&info, arg, minsz))
+ return -EFAULT;
- if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
- return -EINVAL;
+ if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
+ return -EINVAL;
- switch (info.index) {
- case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
- case VFIO_PCI_REQ_IRQ_INDEX:
+ switch (info.index) {
+ case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+ case VFIO_PCI_REQ_IRQ_INDEX:
+ break;
+ case VFIO_PCI_ERR_IRQ_INDEX:
+ if (pci_is_pcie(vdev->pdev))
break;
- case VFIO_PCI_ERR_IRQ_INDEX:
- if (pci_is_pcie(vdev->pdev))
- break;
- fallthrough;
- default:
- return -EINVAL;
- }
-
- info.flags = VFIO_IRQ_INFO_EVENTFD;
-
- info.count = vfio_pci_get_irq_count(vdev, info.index);
+ fallthrough;
+ default:
+ return -EINVAL;
+ }
- if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
- info.flags |= (VFIO_IRQ_INFO_MASKABLE |
- VFIO_IRQ_INFO_AUTOMASKED);
- else
- info.flags |= VFIO_IRQ_INFO_NORESIZE;
+ info.flags = VFIO_IRQ_INFO_EVENTFD;
- return copy_to_user((void __user *)arg, &info, minsz) ?
- -EFAULT : 0;
+ info.count = vfio_pci_get_irq_count(vdev, info.index);
- } else if (cmd == VFIO_DEVICE_SET_IRQS) {
- struct vfio_irq_set hdr;
- u8 *data = NULL;
- int max, ret = 0;
- size_t data_size = 0;
+ if (info.index == VFIO_PCI_INTX_IRQ_INDEX)
+ info.flags |=
+ (VFIO_IRQ_INFO_MASKABLE | VFIO_IRQ_INFO_AUTOMASKED);
+ else
+ info.flags |= VFIO_IRQ_INFO_NORESIZE;
- minsz = offsetofend(struct vfio_irq_set, count);
+ return copy_to_user(arg, &info, minsz) ? -EFAULT : 0;
+}
- if (copy_from_user(&hdr, (void __user *)arg, minsz))
- return -EFAULT;
+static int vfio_pci_ioctl_set_irqs(struct vfio_pci_core_device *vdev,
+ struct vfio_irq_set __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_irq_set, count);
+ struct vfio_irq_set hdr;
+ u8 *data = NULL;
+ int max, ret = 0;
+ size_t data_size = 0;
- max = vfio_pci_get_irq_count(vdev, hdr.index);
+ if (copy_from_user(&hdr, arg, minsz))
+ return -EFAULT;
- ret = vfio_set_irqs_validate_and_prepare(&hdr, max,
- VFIO_PCI_NUM_IRQS, &data_size);
- if (ret)
- return ret;
+ max = vfio_pci_get_irq_count(vdev, hdr.index);
- if (data_size) {
- data = memdup_user((void __user *)(arg + minsz),
- data_size);
- if (IS_ERR(data))
- return PTR_ERR(data);
- }
+ ret = vfio_set_irqs_validate_and_prepare(&hdr, max, VFIO_PCI_NUM_IRQS,
+ &data_size);
+ if (ret)
+ return ret;
- mutex_lock(&vdev->igate);
+ if (data_size) {
+ data = memdup_user(&arg->data, data_size);
+ if (IS_ERR(data))
+ return PTR_ERR(data);
+ }
- ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index,
- hdr.start, hdr.count, data);
+ mutex_lock(&vdev->igate);
- mutex_unlock(&vdev->igate);
- kfree(data);
+ ret = vfio_pci_set_irqs_ioctl(vdev, hdr.flags, hdr.index, hdr.start,
+ hdr.count, data);
- return ret;
+ mutex_unlock(&vdev->igate);
+ kfree(data);
- } else if (cmd == VFIO_DEVICE_RESET) {
- int ret;
+ return ret;
+}
- if (!vdev->reset_works)
- return -EINVAL;
+static int vfio_pci_ioctl_reset(struct vfio_pci_core_device *vdev,
+ void __user *arg)
+{
+ int ret;
- vfio_pci_zap_and_down_write_memory_lock(vdev);
+ if (!vdev->reset_works)
+ return -EINVAL;
- /*
- * This function can be invoked while the power state is non-D0.
- * If pci_try_reset_function() has been called while the power
- * state is non-D0, then pci_try_reset_function() will
- * internally set the power state to D0 without vfio driver
- * involvement. For the devices which have NoSoftRst-, the
- * reset function can cause the PCI config space reset without
- * restoring the original state (saved locally in
- * 'vdev->pm_save').
- */
- vfio_pci_set_power_state(vdev, PCI_D0);
+ vfio_pci_zap_and_down_write_memory_lock(vdev);
- ret = pci_try_reset_function(vdev->pdev);
- up_write(&vdev->memory_lock);
+ /*
+ * This function can be invoked while the power state is non-D0. If
+ * pci_try_reset_function() has been called while the power state is
+ * non-D0, then pci_try_reset_function() will internally set the power
+ * state to D0 without vfio driver involvement. For the devices which
+ * have NoSoftRst-, the reset function can cause the PCI config space
+ * reset without restoring the original state (saved locally in
+ * 'vdev->pm_save').
+ */
+ vfio_pci_set_power_state(vdev, PCI_D0);
- return ret;
+ ret = pci_try_reset_function(vdev->pdev);
+ up_write(&vdev->memory_lock);
- } else if (cmd == VFIO_DEVICE_GET_PCI_HOT_RESET_INFO) {
- struct vfio_pci_hot_reset_info hdr;
- struct vfio_pci_fill_info fill = { 0 };
- struct vfio_pci_dependent_device *devices = NULL;
- bool slot = false;
- int ret = 0;
+ return ret;
+}
- minsz = offsetofend(struct vfio_pci_hot_reset_info, count);
+static int vfio_pci_ioctl_get_pci_hot_reset_info(
+ struct vfio_pci_core_device *vdev,
+ struct vfio_pci_hot_reset_info __user *arg)
+{
+ unsigned long minsz =
+ offsetofend(struct vfio_pci_hot_reset_info, count);
+ struct vfio_pci_hot_reset_info hdr;
+ struct vfio_pci_fill_info fill = { 0 };
+ struct vfio_pci_dependent_device *devices = NULL;
+ bool slot = false;
+ int ret = 0;
- if (copy_from_user(&hdr, (void __user *)arg, minsz))
- return -EFAULT;
+ if (copy_from_user(&hdr, arg, minsz))
+ return -EFAULT;
- if (hdr.argsz < minsz)
- return -EINVAL;
+ if (hdr.argsz < minsz)
+ return -EINVAL;
- hdr.flags = 0;
+ hdr.flags = 0;
- /* Can we do a slot or bus reset or neither? */
- if (!pci_probe_reset_slot(vdev->pdev->slot))
- slot = true;
- else if (pci_probe_reset_bus(vdev->pdev->bus))
- return -ENODEV;
+ /* Can we do a slot or bus reset or neither? */
+ if (!pci_probe_reset_slot(vdev->pdev->slot))
+ slot = true;
+ else if (pci_probe_reset_bus(vdev->pdev->bus))
+ return -ENODEV;
- /* How many devices are affected? */
- ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
- vfio_pci_count_devs,
- &fill.max, slot);
- if (ret)
- return ret;
+ /* How many devices are affected? */
+ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
+ &fill.max, slot);
+ if (ret)
+ return ret;
- WARN_ON(!fill.max); /* Should always be at least one */
+ WARN_ON(!fill.max); /* Should always be at least one */
- /*
- * If there's enough space, fill it now, otherwise return
- * -ENOSPC and the number of devices affected.
- */
- if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
- ret = -ENOSPC;
- hdr.count = fill.max;
- goto reset_info_exit;
- }
+ /*
+ * If there's enough space, fill it now, otherwise return -ENOSPC and
+ * the number of devices affected.
+ */
+ if (hdr.argsz < sizeof(hdr) + (fill.max * sizeof(*devices))) {
+ ret = -ENOSPC;
+ hdr.count = fill.max;
+ goto reset_info_exit;
+ }
- devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
- if (!devices)
- return -ENOMEM;
+ devices = kcalloc(fill.max, sizeof(*devices), GFP_KERNEL);
+ if (!devices)
+ return -ENOMEM;
- fill.devices = devices;
+ fill.devices = devices;
- ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
- vfio_pci_fill_devs,
- &fill, slot);
+ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_fill_devs,
+ &fill, slot);
- /*
- * If a device was removed between counting and filling,
- * we may come up short of fill.max. If a device was
- * added, we'll have a return of -EAGAIN above.
- */
- if (!ret)
- hdr.count = fill.cur;
+ /*
+ * If a device was removed between counting and filling, we may come up
+ * short of fill.max. If a device was added, we'll have a return of
+ * -EAGAIN above.
+ */
+ if (!ret)
+ hdr.count = fill.cur;
reset_info_exit:
- if (copy_to_user((void __user *)arg, &hdr, minsz))
+ if (copy_to_user(arg, &hdr, minsz))
+ ret = -EFAULT;
+
+ if (!ret) {
+ if (copy_to_user(&arg->devices, devices,
+ hdr.count * sizeof(*devices)))
ret = -EFAULT;
+ }
- if (!ret) {
- if (copy_to_user((void __user *)(arg + minsz), devices,
- hdr.count * sizeof(*devices)))
- ret = -EFAULT;
- }
+ kfree(devices);
+ return ret;
+}
- kfree(devices);
- return ret;
+static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
+ struct vfio_pci_hot_reset __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_pci_hot_reset, count);
+ struct vfio_pci_hot_reset hdr;
+ int32_t *group_fds;
+ struct file **files;
+ struct vfio_pci_group_info info;
+ bool slot = false;
+ int file_idx, count = 0, ret = 0;
- } else if (cmd == VFIO_DEVICE_PCI_HOT_RESET) {
- struct vfio_pci_hot_reset hdr;
- int32_t *group_fds;
- struct file **files;
- struct vfio_pci_group_info info;
- bool slot = false;
- int file_idx, count = 0, ret = 0;
+ if (copy_from_user(&hdr, arg, minsz))
+ return -EFAULT;
- minsz = offsetofend(struct vfio_pci_hot_reset, count);
+ if (hdr.argsz < minsz || hdr.flags)
+ return -EINVAL;
- if (copy_from_user(&hdr, (void __user *)arg, minsz))
- return -EFAULT;
+ /* Can we do a slot or bus reset or neither? */
+ if (!pci_probe_reset_slot(vdev->pdev->slot))
+ slot = true;
+ else if (pci_probe_reset_bus(vdev->pdev->bus))
+ return -ENODEV;
- if (hdr.argsz < minsz || hdr.flags)
- return -EINVAL;
+ /*
+ * We can't let userspace give us an arbitrarily large buffer to copy,
+ * so verify how many we think there could be. Note groups can have
+ * multiple devices so one group per device is the max.
+ */
+ ret = vfio_pci_for_each_slot_or_bus(vdev->pdev, vfio_pci_count_devs,
+ &count, slot);
+ if (ret)
+ return ret;
- /* Can we do a slot or bus reset or neither? */
- if (!pci_probe_reset_slot(vdev->pdev->slot))
- slot = true;
- else if (pci_probe_reset_bus(vdev->pdev->bus))
- return -ENODEV;
+ /* Somewhere between 1 and count is OK */
+ if (!hdr.count || hdr.count > count)
+ return -EINVAL;
- /*
- * We can't let userspace give us an arbitrarily large
- * buffer to copy, so verify how many we think there
- * could be. Note groups can have multiple devices so
- * one group per device is the max.
- */
- ret = vfio_pci_for_each_slot_or_bus(vdev->pdev,
- vfio_pci_count_devs,
- &count, slot);
- if (ret)
- return ret;
+ group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
+ files = kcalloc(hdr.count, sizeof(*files), GFP_KERNEL);
+ if (!group_fds || !files) {
+ kfree(group_fds);
+ kfree(files);
+ return -ENOMEM;
+ }
- /* Somewhere between 1 and count is OK */
- if (!hdr.count || hdr.count > count)
- return -EINVAL;
+ if (copy_from_user(group_fds, arg->group_fds,
+ hdr.count * sizeof(*group_fds))) {
+ kfree(group_fds);
+ kfree(files);
+ return -EFAULT;
+ }
- group_fds = kcalloc(hdr.count, sizeof(*group_fds), GFP_KERNEL);
- files = kcalloc(hdr.count, sizeof(*files), GFP_KERNEL);
- if (!group_fds || !files) {
- kfree(group_fds);
- kfree(files);
- return -ENOMEM;
- }
+ /*
+ * For each group_fd, get the group through the vfio external user
+ * interface and store the group and iommu ID. This ensures the group
+ * is held across the reset.
+ */
+ for (file_idx = 0; file_idx < hdr.count; file_idx++) {
+ struct file *file = fget(group_fds[file_idx]);
- if (copy_from_user(group_fds, (void __user *)(arg + minsz),
- hdr.count * sizeof(*group_fds))) {
- kfree(group_fds);
- kfree(files);
- return -EFAULT;
+ if (!file) {
+ ret = -EBADF;
+ break;
}
- /*
- * For each group_fd, get the group through the vfio external
- * user interface and store the group and iommu ID. This
- * ensures the group is held across the reset.
- */
- for (file_idx = 0; file_idx < hdr.count; file_idx++) {
- struct file *file = fget(group_fds[file_idx]);
-
- if (!file) {
- ret = -EBADF;
- break;
- }
-
- /* Ensure the FD is a vfio group FD.*/
- if (!vfio_file_iommu_group(file)) {
- fput(file);
- ret = -EINVAL;
- break;
- }
-
- files[file_idx] = file;
+ /* Ensure the FD is a vfio group FD.*/
+ if (!vfio_file_is_group(file)) {
+ fput(file);
+ ret = -EINVAL;
+ break;
}
- kfree(group_fds);
+ files[file_idx] = file;
+ }
- /* release reference to groups on error */
- if (ret)
- goto hot_reset_release;
+ kfree(group_fds);
+
+ /* release reference to groups on error */
+ if (ret)
+ goto hot_reset_release;
- info.count = hdr.count;
- info.files = files;
+ info.count = hdr.count;
+ info.files = files;
- ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
+ ret = vfio_pci_dev_set_hot_reset(vdev->vdev.dev_set, &info);
hot_reset_release:
- for (file_idx--; file_idx >= 0; file_idx--)
- fput(files[file_idx]);
+ for (file_idx--; file_idx >= 0; file_idx--)
+ fput(files[file_idx]);
- kfree(files);
- return ret;
- } else if (cmd == VFIO_DEVICE_IOEVENTFD) {
- struct vfio_device_ioeventfd ioeventfd;
- int count;
+ kfree(files);
+ return ret;
+}
- minsz = offsetofend(struct vfio_device_ioeventfd, fd);
+static int vfio_pci_ioctl_ioeventfd(struct vfio_pci_core_device *vdev,
+ struct vfio_device_ioeventfd __user *arg)
+{
+ unsigned long minsz = offsetofend(struct vfio_device_ioeventfd, fd);
+ struct vfio_device_ioeventfd ioeventfd;
+ int count;
- if (copy_from_user(&ioeventfd, (void __user *)arg, minsz))
- return -EFAULT;
+ if (copy_from_user(&ioeventfd, arg, minsz))
+ return -EFAULT;
- if (ioeventfd.argsz < minsz)
- return -EINVAL;
+ if (ioeventfd.argsz < minsz)
+ return -EINVAL;
- if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)
- return -EINVAL;
+ if (ioeventfd.flags & ~VFIO_DEVICE_IOEVENTFD_SIZE_MASK)
+ return -EINVAL;
- count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;
+ count = ioeventfd.flags & VFIO_DEVICE_IOEVENTFD_SIZE_MASK;
- if (hweight8(count) != 1 || ioeventfd.fd < -1)
- return -EINVAL;
+ if (hweight8(count) != 1 || ioeventfd.fd < -1)
+ return -EINVAL;
- return vfio_pci_ioeventfd(vdev, ioeventfd.offset,
- ioeventfd.data, count, ioeventfd.fd);
+ return vfio_pci_ioeventfd(vdev, ioeventfd.offset, ioeventfd.data, count,
+ ioeventfd.fd);
+}
+
+long vfio_pci_core_ioctl(struct vfio_device *core_vdev, unsigned int cmd,
+ unsigned long arg)
+{
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+ void __user *uarg = (void __user *)arg;
+
+ switch (cmd) {
+ case VFIO_DEVICE_GET_INFO:
+ return vfio_pci_ioctl_get_info(vdev, uarg);
+ case VFIO_DEVICE_GET_IRQ_INFO:
+ return vfio_pci_ioctl_get_irq_info(vdev, uarg);
+ case VFIO_DEVICE_GET_PCI_HOT_RESET_INFO:
+ return vfio_pci_ioctl_get_pci_hot_reset_info(vdev, uarg);
+ case VFIO_DEVICE_GET_REGION_INFO:
+ return vfio_pci_ioctl_get_region_info(vdev, uarg);
+ case VFIO_DEVICE_IOEVENTFD:
+ return vfio_pci_ioctl_ioeventfd(vdev, uarg);
+ case VFIO_DEVICE_PCI_HOT_RESET:
+ return vfio_pci_ioctl_pci_hot_reset(vdev, uarg);
+ case VFIO_DEVICE_RESET:
+ return vfio_pci_ioctl_reset(vdev, uarg);
+ case VFIO_DEVICE_SET_IRQS:
+ return vfio_pci_ioctl_set_irqs(vdev, uarg);
+ default:
+ return -ENOTTY;
}
- return -ENOTTY;
}
EXPORT_SYMBOL_GPL(vfio_pci_core_ioctl);
static int vfio_pci_core_feature_token(struct vfio_device *device, u32 flags,
- void __user *arg, size_t argsz)
+ uuid_t __user *arg, size_t argsz)
{
struct vfio_pci_core_device *vdev =
container_of(device, struct vfio_pci_core_device, vdev);
@@ -1202,6 +1428,13 @@ int vfio_pci_core_ioctl_feature(struct vfio_device *device, u32 flags,
void __user *arg, size_t argsz)
{
switch (flags & VFIO_DEVICE_FEATURE_MASK) {
+ case VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY:
+ return vfio_pci_core_pm_entry(device, flags, arg, argsz);
+ case VFIO_DEVICE_FEATURE_LOW_POWER_ENTRY_WITH_WAKEUP:
+ return vfio_pci_core_pm_entry_with_wakeup(device, flags,
+ arg, argsz);
+ case VFIO_DEVICE_FEATURE_LOW_POWER_EXIT:
+ return vfio_pci_core_pm_exit(device, flags, arg, argsz);
case VFIO_DEVICE_FEATURE_PCI_VF_TOKEN:
return vfio_pci_core_feature_token(device, flags, arg, argsz);
default:
@@ -1214,31 +1447,47 @@ static ssize_t vfio_pci_rw(struct vfio_pci_core_device *vdev, char __user *buf,
size_t count, loff_t *ppos, bool iswrite)
{
unsigned int index = VFIO_PCI_OFFSET_TO_INDEX(*ppos);
+ int ret;
if (index >= VFIO_PCI_NUM_REGIONS + vdev->num_regions)
return -EINVAL;
+ ret = pm_runtime_resume_and_get(&vdev->pdev->dev);
+ if (ret) {
+ pci_info_ratelimited(vdev->pdev, "runtime resume failed %d\n",
+ ret);
+ return -EIO;
+ }
+
switch (index) {
case VFIO_PCI_CONFIG_REGION_INDEX:
- return vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
+ ret = vfio_pci_config_rw(vdev, buf, count, ppos, iswrite);
+ break;
case VFIO_PCI_ROM_REGION_INDEX:
if (iswrite)
- return -EINVAL;
- return vfio_pci_bar_rw(vdev, buf, count, ppos, false);
+ ret = -EINVAL;
+ else
+ ret = vfio_pci_bar_rw(vdev, buf, count, ppos, false);
+ break;
case VFIO_PCI_BAR0_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
- return vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
+ ret = vfio_pci_bar_rw(vdev, buf, count, ppos, iswrite);
+ break;
case VFIO_PCI_VGA_REGION_INDEX:
- return vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
+ ret = vfio_pci_vga_rw(vdev, buf, count, ppos, iswrite);
+ break;
+
default:
index -= VFIO_PCI_NUM_REGIONS;
- return vdev->region[index].ops->rw(vdev, buf,
+ ret = vdev->region[index].ops->rw(vdev, buf,
count, ppos, iswrite);
+ break;
}
- return -EINVAL;
+ pm_runtime_put(&vdev->pdev->dev);
+ return ret;
}
ssize_t vfio_pci_core_read(struct vfio_device *core_vdev, char __user *buf,
@@ -1433,7 +1682,11 @@ static vm_fault_t vfio_pci_mmap_fault(struct vm_fault *vmf)
mutex_lock(&vdev->vma_lock);
down_read(&vdev->memory_lock);
- if (!__vfio_pci_memory_enabled(vdev)) {
+ /*
+ * Memory region cannot be accessed if the low power feature is engaged
+ * or memory access is disabled.
+ */
+ if (vdev->pm_runtime_engaged || !__vfio_pci_memory_enabled(vdev)) {
ret = VM_FAULT_SIGBUS;
goto up_out;
}
@@ -1825,12 +2078,12 @@ static void vfio_pci_vga_uninit(struct vfio_pci_core_device *vdev)
VGA_RSRC_LEGACY_MEM);
}
-void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
- struct pci_dev *pdev,
- const struct vfio_device_ops *vfio_pci_ops)
+int vfio_pci_core_init_dev(struct vfio_device *core_vdev)
{
- vfio_init_group_dev(&vdev->vdev, &pdev->dev, vfio_pci_ops);
- vdev->pdev = pdev;
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
+ vdev->pdev = to_pci_dev(core_vdev->dev);
vdev->irq_type = VFIO_PCI_NUM_IRQS;
mutex_init(&vdev->igate);
spin_lock_init(&vdev->irqlock);
@@ -1841,19 +2094,24 @@ void vfio_pci_core_init_device(struct vfio_pci_core_device *vdev,
INIT_LIST_HEAD(&vdev->vma_list);
INIT_LIST_HEAD(&vdev->sriov_pfs_item);
init_rwsem(&vdev->memory_lock);
+
+ return 0;
}
-EXPORT_SYMBOL_GPL(vfio_pci_core_init_device);
+EXPORT_SYMBOL_GPL(vfio_pci_core_init_dev);
-void vfio_pci_core_uninit_device(struct vfio_pci_core_device *vdev)
+void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
{
+ struct vfio_pci_core_device *vdev =
+ container_of(core_vdev, struct vfio_pci_core_device, vdev);
+
mutex_destroy(&vdev->igate);
mutex_destroy(&vdev->ioeventfds_lock);
mutex_destroy(&vdev->vma_lock);
- vfio_uninit_group_dev(&vdev->vdev);
kfree(vdev->region);
kfree(vdev->pm_save);
+ vfio_free_device(core_vdev);
}
-EXPORT_SYMBOL_GPL(vfio_pci_core_uninit_device);
+EXPORT_SYMBOL_GPL(vfio_pci_core_release_dev);
int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
{
@@ -1875,6 +2133,11 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
return -EINVAL;
}
+ if (vdev->vdev.log_ops && !(vdev->vdev.log_ops->log_start &&
+ vdev->vdev.log_ops->log_stop &&
+ vdev->vdev.log_ops->log_read_and_clear))
+ return -EINVAL;
+
/*
* Prevent binding to PFs with VFs enabled, the VFs might be in use
* by the host or other users. We cannot capture the VFs if they
@@ -2148,6 +2411,15 @@ static int vfio_pci_dev_set_hot_reset(struct vfio_device_set *dev_set,
goto err_unlock;
}
+ /*
+ * Some of the devices in the dev_set can be in the runtime suspended
+ * state. Increment the usage count for all the devices in the dev_set
+ * before reset and decrement the same after reset.
+ */
+ ret = vfio_pci_dev_set_pm_runtime_get(dev_set);
+ if (ret)
+ goto err_unlock;
+
list_for_each_entry(cur_vma, &dev_set->device_list, vdev.dev_set_list) {
/*
* Test whether all the affected devices are contained by the
@@ -2203,6 +2475,9 @@ err_undo:
else
mutex_unlock(&cur->vma_lock);
}
+
+ list_for_each_entry(cur, &dev_set->device_list, vdev.dev_set_list)
+ pm_runtime_put(&cur->pdev->dev);
err_unlock:
mutex_unlock(&dev_set->lock);
return ret;
diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c
index 352c725ccf18..5e6ca5926954 100644
--- a/drivers/vfio/pci/vfio_pci_igd.c
+++ b/drivers/vfio/pci/vfio_pci_igd.c
@@ -15,7 +15,7 @@
#include <linux/uaccess.h>
#include <linux/vfio.h>
-#include <linux/vfio_pci_core.h>
+#include "vfio_pci_priv.h"
#define OPREGION_SIGNATURE "IntelGraphicsMem"
#define OPREGION_SIZE (8 * 1024)
@@ -257,7 +257,7 @@ static int vfio_pci_igd_opregion_init(struct vfio_pci_core_device *vdev)
}
}
- ret = vfio_pci_register_dev_region(vdev,
+ ret = vfio_pci_core_register_dev_region(vdev,
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &vfio_pci_igd_regops,
size, VFIO_REGION_INFO_FLAG_READ, opregionvbt);
@@ -402,7 +402,7 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev)
return -EINVAL;
}
- ret = vfio_pci_register_dev_region(vdev,
+ ret = vfio_pci_core_register_dev_region(vdev,
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG,
&vfio_pci_igd_cfg_regops, host_bridge->cfg_size,
@@ -422,7 +422,7 @@ static int vfio_pci_igd_cfg_init(struct vfio_pci_core_device *vdev)
return -EINVAL;
}
- ret = vfio_pci_register_dev_region(vdev,
+ ret = vfio_pci_core_register_dev_region(vdev,
PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG,
&vfio_pci_igd_cfg_regops, lpc_bridge->cfg_size,
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 6069a11fb51a..40c3d7cf163f 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -20,7 +20,33 @@
#include <linux/wait.h>
#include <linux/slab.h>
-#include <linux/vfio_pci_core.h>
+#include "vfio_pci_priv.h"
+
+struct vfio_pci_irq_ctx {
+ struct eventfd_ctx *trigger;
+ struct virqfd *unmask;
+ struct virqfd *mask;
+ char *name;
+ bool masked;
+ struct irq_bypass_producer producer;
+};
+
+static bool irq_is(struct vfio_pci_core_device *vdev, int type)
+{
+ return vdev->irq_type == type;
+}
+
+static bool is_intx(struct vfio_pci_core_device *vdev)
+{
+ return vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX;
+}
+
+static bool is_irq_none(struct vfio_pci_core_device *vdev)
+{
+ return !(vdev->irq_type == VFIO_PCI_INTX_IRQ_INDEX ||
+ vdev->irq_type == VFIO_PCI_MSI_IRQ_INDEX ||
+ vdev->irq_type == VFIO_PCI_MSIX_IRQ_INDEX);
+}
/*
* INTx
@@ -33,10 +59,12 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
eventfd_signal(vdev->ctx[0].trigger, 1);
}
-void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
+/* Returns true if the INTx vfio_pci_irq_ctx.masked value is changed. */
+bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
{
struct pci_dev *pdev = vdev->pdev;
unsigned long flags;
+ bool masked_changed = false;
spin_lock_irqsave(&vdev->irqlock, flags);
@@ -60,9 +88,11 @@ void vfio_pci_intx_mask(struct vfio_pci_core_device *vdev)
disable_irq_nosync(pdev->irq);
vdev->ctx[0].masked = true;
+ masked_changed = true;
}
spin_unlock_irqrestore(&vdev->irqlock, flags);
+ return masked_changed;
}
/*
diff --git a/drivers/vfio/pci/vfio_pci_priv.h b/drivers/vfio/pci/vfio_pci_priv.h
new file mode 100644
index 000000000000..5e4fa69aee16
--- /dev/null
+++ b/drivers/vfio/pci/vfio_pci_priv.h
@@ -0,0 +1,104 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef VFIO_PCI_PRIV_H
+#define VFIO_PCI_PRIV_H
+
+#include <linux/vfio_pci_core.h>
+
+/* Special capability IDs predefined access */
+#define PCI_CAP_ID_INVALID 0xFF /* default raw access */
+#define PCI_CAP_ID_INVALID_VIRT 0xFE /* default virt access */
+
+/* Cap maximum number of ioeventfds per device (arbitrary) */
+#define VFIO_PCI_IOEVENTFD_MAX 1000
+
+struct vfio_pci_ioeventfd {
+ struct list_head next;
+ struct vfio_pci_core_device *vdev;
+ struct virqfd *virqfd;
+ void __iomem *addr;
+ uint64_t data;
+ loff_t pos;
+ int bar;
+ int count;
+ bool test_mem;
+};
+
+bool vfio_pci_intx_mask(struct vfio_pci_core_device *vdev);
+void vfio_pci_intx_unmask(struct vfio_pci_core_device *vdev);
+
+int vfio_pci_set_irqs_ioctl(struct vfio_pci_core_device *vdev, uint32_t flags,
+ unsigned index, unsigned start, unsigned count,
+ void *data);
+
+ssize_t vfio_pci_config_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
+
+ssize_t vfio_pci_bar_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
+
+#ifdef CONFIG_VFIO_PCI_VGA
+ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev, char __user *buf,
+ size_t count, loff_t *ppos, bool iswrite);
+#else
+static inline ssize_t vfio_pci_vga_rw(struct vfio_pci_core_device *vdev,
+ char __user *buf, size_t count,
+ loff_t *ppos, bool iswrite)
+{
+ return -EINVAL;
+}
+#endif
+
+int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
+ uint64_t data, int count, int fd);
+
+int vfio_pci_init_perm_bits(void);
+void vfio_pci_uninit_perm_bits(void);
+
+int vfio_config_init(struct vfio_pci_core_device *vdev);
+void vfio_config_free(struct vfio_pci_core_device *vdev);
+
+int vfio_pci_set_power_state(struct vfio_pci_core_device *vdev,
+ pci_power_t state);
+
+bool __vfio_pci_memory_enabled(struct vfio_pci_core_device *vdev);
+void vfio_pci_zap_and_down_write_memory_lock(struct vfio_pci_core_device *vdev);
+u16 vfio_pci_memory_lock_and_enable(struct vfio_pci_core_device *vdev);
+void vfio_pci_memory_unlock_and_restore(struct vfio_pci_core_device *vdev,
+ u16 cmd);
+
+#ifdef CONFIG_VFIO_PCI_IGD
+int vfio_pci_igd_init(struct vfio_pci_core_device *vdev);
+#else
+static inline int vfio_pci_igd_init(struct vfio_pci_core_device *vdev)
+{
+ return -ENODEV;
+}
+#endif
+
+#ifdef CONFIG_VFIO_PCI_ZDEV_KVM
+int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
+ struct vfio_info_cap *caps);
+int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev);
+void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev);
+#else
+static inline int vfio_pci_info_zdev_add_caps(struct vfio_pci_core_device *vdev,
+ struct vfio_info_cap *caps)
+{
+ return -ENODEV;
+}
+
+static inline int vfio_pci_zdev_open_device(struct vfio_pci_core_device *vdev)
+{
+ return 0;
+}
+
+static inline void vfio_pci_zdev_close_device(struct vfio_pci_core_device *vdev)
+{}
+#endif
+
+static inline bool vfio_pci_is_vga(struct pci_dev *pdev)
+{
+ return (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
+}
+
+#endif
diff --git a/drivers/vfio/pci/vfio_pci_rdwr.c b/drivers/vfio/pci/vfio_pci_rdwr.c
index 82ac1569deb0..e352a033b4ae 100644
--- a/drivers/vfio/pci/vfio_pci_rdwr.c
+++ b/drivers/vfio/pci/vfio_pci_rdwr.c
@@ -17,7 +17,7 @@
#include <linux/vfio.h>
#include <linux/vgaarb.h>
-#include <linux/vfio_pci_core.h>
+#include "vfio_pci_priv.h"
#ifdef __LITTLE_ENDIAN
#define vfio_ioread64 ioread64
@@ -412,8 +412,8 @@ static void vfio_pci_ioeventfd_thread(void *opaque, void *unused)
vfio_pci_ioeventfd_do_write(ioeventfd, ioeventfd->test_mem);
}
-long vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
- uint64_t data, int count, int fd)
+int vfio_pci_ioeventfd(struct vfio_pci_core_device *vdev, loff_t offset,
+ uint64_t data, int count, int fd)
{
struct pci_dev *pdev = vdev->pdev;
loff_t pos = offset & VFIO_PCI_OFFSET_MASK;
diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c
index 0cbdcd14f1c8..0990fdb146b7 100644
--- a/drivers/vfio/pci/vfio_pci_zdev.c
+++ b/drivers/vfio/pci/vfio_pci_zdev.c
@@ -15,7 +15,7 @@
#include <asm/pci_clp.h>
#include <asm/pci_io.h>
-#include <linux/vfio_pci_core.h>
+#include "vfio_pci_priv.h"
/*
* Add the Base PCI Function information to the device info region.
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index 1aaa4f721bd2..eaea63e5294c 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vfio.h>
+#include <linux/pm_runtime.h>
#include <linux/amba/bus.h>
#include "vfio_platform_private.h"
@@ -40,20 +41,16 @@ static int get_amba_irq(struct vfio_platform_device *vdev, int i)
return ret ? ret : -ENXIO;
}
-static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
+static int vfio_amba_init_dev(struct vfio_device *core_vdev)
{
- struct vfio_platform_device *vdev;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
+ struct amba_device *adev = to_amba_device(core_vdev->dev);
int ret;
- vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
- if (!vdev)
- return -ENOMEM;
-
vdev->name = kasprintf(GFP_KERNEL, "vfio-amba-%08x", adev->periphid);
- if (!vdev->name) {
- kfree(vdev);
+ if (!vdev->name)
return -ENOMEM;
- }
vdev->opaque = (void *) adev;
vdev->flags = VFIO_DEVICE_FLAGS_AMBA;
@@ -61,26 +58,67 @@ static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
vdev->get_irq = get_amba_irq;
vdev->reset_required = false;
- ret = vfio_platform_probe_common(vdev, &adev->dev);
- if (ret) {
+ ret = vfio_platform_init_common(vdev);
+ if (ret)
kfree(vdev->name);
- kfree(vdev);
- return ret;
- }
+ return ret;
+}
+
+static const struct vfio_device_ops vfio_amba_ops;
+static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
+{
+ struct vfio_platform_device *vdev;
+ int ret;
+
+ vdev = vfio_alloc_device(vfio_platform_device, vdev, &adev->dev,
+ &vfio_amba_ops);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
+ ret = vfio_register_group_dev(&vdev->vdev);
+ if (ret)
+ goto out_put_vdev;
+
+ pm_runtime_enable(&adev->dev);
dev_set_drvdata(&adev->dev, vdev);
return 0;
+
+out_put_vdev:
+ vfio_put_device(&vdev->vdev);
+ return ret;
+}
+
+static void vfio_amba_release_dev(struct vfio_device *core_vdev)
+{
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
+
+ vfio_platform_release_common(vdev);
+ kfree(vdev->name);
+ vfio_free_device(core_vdev);
}
static void vfio_amba_remove(struct amba_device *adev)
{
struct vfio_platform_device *vdev = dev_get_drvdata(&adev->dev);
- vfio_platform_remove_common(vdev);
- kfree(vdev->name);
- kfree(vdev);
+ vfio_unregister_group_dev(&vdev->vdev);
+ pm_runtime_disable(vdev->device);
+ vfio_put_device(&vdev->vdev);
}
+static const struct vfio_device_ops vfio_amba_ops = {
+ .name = "vfio-amba",
+ .init = vfio_amba_init_dev,
+ .release = vfio_amba_release_dev,
+ .open_device = vfio_platform_open_device,
+ .close_device = vfio_platform_close_device,
+ .ioctl = vfio_platform_ioctl,
+ .read = vfio_platform_read,
+ .write = vfio_platform_write,
+ .mmap = vfio_platform_mmap,
+};
+
static const struct amba_id pl330_ids[] = {
{ 0, 0 },
};
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 04f40c5acfd6..82cedcebfd90 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/vfio.h>
+#include <linux/pm_runtime.h>
#include <linux/platform_device.h>
#include "vfio_platform_private.h"
@@ -36,14 +37,11 @@ static int get_platform_irq(struct vfio_platform_device *vdev, int i)
return platform_get_irq_optional(pdev, i);
}
-static int vfio_platform_probe(struct platform_device *pdev)
+static int vfio_platform_init_dev(struct vfio_device *core_vdev)
{
- struct vfio_platform_device *vdev;
- int ret;
-
- vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
- if (!vdev)
- return -ENOMEM;
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
+ struct platform_device *pdev = to_platform_device(core_vdev->dev);
vdev->opaque = (void *) pdev;
vdev->name = pdev->name;
@@ -52,24 +50,64 @@ static int vfio_platform_probe(struct platform_device *pdev)
vdev->get_irq = get_platform_irq;
vdev->reset_required = reset_required;
- ret = vfio_platform_probe_common(vdev, &pdev->dev);
- if (ret) {
- kfree(vdev);
- return ret;
- }
+ return vfio_platform_init_common(vdev);
+}
+
+static const struct vfio_device_ops vfio_platform_ops;
+static int vfio_platform_probe(struct platform_device *pdev)
+{
+ struct vfio_platform_device *vdev;
+ int ret;
+
+ vdev = vfio_alloc_device(vfio_platform_device, vdev, &pdev->dev,
+ &vfio_platform_ops);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
+
+ ret = vfio_register_group_dev(&vdev->vdev);
+ if (ret)
+ goto out_put_vdev;
+
+ pm_runtime_enable(&pdev->dev);
dev_set_drvdata(&pdev->dev, vdev);
return 0;
+
+out_put_vdev:
+ vfio_put_device(&vdev->vdev);
+ return ret;
+}
+
+static void vfio_platform_release_dev(struct vfio_device *core_vdev)
+{
+ struct vfio_platform_device *vdev =
+ container_of(core_vdev, struct vfio_platform_device, vdev);
+
+ vfio_platform_release_common(vdev);
+ vfio_free_device(core_vdev);
}
static int vfio_platform_remove(struct platform_device *pdev)
{
struct vfio_platform_device *vdev = dev_get_drvdata(&pdev->dev);
- vfio_platform_remove_common(vdev);
- kfree(vdev);
+ vfio_unregister_group_dev(&vdev->vdev);
+ pm_runtime_disable(vdev->device);
+ vfio_put_device(&vdev->vdev);
return 0;
}
+static const struct vfio_device_ops vfio_platform_ops = {
+ .name = "vfio-platform",
+ .init = vfio_platform_init_dev,
+ .release = vfio_platform_release_dev,
+ .open_device = vfio_platform_open_device,
+ .close_device = vfio_platform_close_device,
+ .ioctl = vfio_platform_ioctl,
+ .read = vfio_platform_read,
+ .write = vfio_platform_write,
+ .mmap = vfio_platform_mmap,
+};
+
static struct platform_driver vfio_platform_driver = {
.probe = vfio_platform_probe,
.remove = vfio_platform_remove,
diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 256f55b84e70..55dc4f43c31e 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -218,7 +218,7 @@ static int vfio_platform_call_reset(struct vfio_platform_device *vdev,
return -EINVAL;
}
-static void vfio_platform_close_device(struct vfio_device *core_vdev)
+void vfio_platform_close_device(struct vfio_device *core_vdev)
{
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
@@ -236,8 +236,9 @@ static void vfio_platform_close_device(struct vfio_device *core_vdev)
vfio_platform_regions_cleanup(vdev);
vfio_platform_irq_cleanup(vdev);
}
+EXPORT_SYMBOL_GPL(vfio_platform_close_device);
-static int vfio_platform_open_device(struct vfio_device *core_vdev)
+int vfio_platform_open_device(struct vfio_device *core_vdev)
{
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
@@ -273,9 +274,10 @@ err_irq:
vfio_platform_regions_cleanup(vdev);
return ret;
}
+EXPORT_SYMBOL_GPL(vfio_platform_open_device);
-static long vfio_platform_ioctl(struct vfio_device *core_vdev,
- unsigned int cmd, unsigned long arg)
+long vfio_platform_ioctl(struct vfio_device *core_vdev,
+ unsigned int cmd, unsigned long arg)
{
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
@@ -382,6 +384,7 @@ static long vfio_platform_ioctl(struct vfio_device *core_vdev,
return -ENOTTY;
}
+EXPORT_SYMBOL_GPL(vfio_platform_ioctl);
static ssize_t vfio_platform_read_mmio(struct vfio_platform_region *reg,
char __user *buf, size_t count,
@@ -438,8 +441,8 @@ err:
return -EFAULT;
}
-static ssize_t vfio_platform_read(struct vfio_device *core_vdev,
- char __user *buf, size_t count, loff_t *ppos)
+ssize_t vfio_platform_read(struct vfio_device *core_vdev,
+ char __user *buf, size_t count, loff_t *ppos)
{
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
@@ -460,6 +463,7 @@ static ssize_t vfio_platform_read(struct vfio_device *core_vdev,
return -EINVAL;
}
+EXPORT_SYMBOL_GPL(vfio_platform_read);
static ssize_t vfio_platform_write_mmio(struct vfio_platform_region *reg,
const char __user *buf, size_t count,
@@ -515,8 +519,8 @@ err:
return -EFAULT;
}
-static ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *buf,
- size_t count, loff_t *ppos)
+ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __user *buf,
+ size_t count, loff_t *ppos)
{
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
@@ -537,6 +541,7 @@ static ssize_t vfio_platform_write(struct vfio_device *core_vdev, const char __u
return -EINVAL;
}
+EXPORT_SYMBOL_GPL(vfio_platform_write);
static int vfio_platform_mmap_mmio(struct vfio_platform_region region,
struct vm_area_struct *vma)
@@ -558,7 +563,7 @@ static int vfio_platform_mmap_mmio(struct vfio_platform_region region,
req_len, vma->vm_page_prot);
}
-static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
+int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma)
{
struct vfio_platform_device *vdev =
container_of(core_vdev, struct vfio_platform_device, vdev);
@@ -598,16 +603,7 @@ static int vfio_platform_mmap(struct vfio_device *core_vdev, struct vm_area_stru
return -EINVAL;
}
-
-static const struct vfio_device_ops vfio_platform_ops = {
- .name = "vfio-platform",
- .open_device = vfio_platform_open_device,
- .close_device = vfio_platform_close_device,
- .ioctl = vfio_platform_ioctl,
- .read = vfio_platform_read,
- .write = vfio_platform_write,
- .mmap = vfio_platform_mmap,
-};
+EXPORT_SYMBOL_GPL(vfio_platform_mmap);
static int vfio_platform_of_probe(struct vfio_platform_device *vdev,
struct device *dev)
@@ -639,55 +635,34 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev,
* If the firmware is ACPI type, then acpi_disabled is 0. All other checks are
* valid checks. We cannot claim that this system is DT.
*/
-int vfio_platform_probe_common(struct vfio_platform_device *vdev,
- struct device *dev)
+int vfio_platform_init_common(struct vfio_platform_device *vdev)
{
int ret;
-
- vfio_init_group_dev(&vdev->vdev, dev, &vfio_platform_ops);
+ struct device *dev = vdev->vdev.dev;
ret = vfio_platform_acpi_probe(vdev, dev);
if (ret)
ret = vfio_platform_of_probe(vdev, dev);
if (ret)
- goto out_uninit;
+ return ret;
vdev->device = dev;
+ mutex_init(&vdev->igate);
ret = vfio_platform_get_reset(vdev);
- if (ret && vdev->reset_required) {
+ if (ret && vdev->reset_required)
dev_err(dev, "No reset function found for device %s\n",
vdev->name);
- goto out_uninit;
- }
-
- ret = vfio_register_group_dev(&vdev->vdev);
- if (ret)
- goto put_reset;
-
- mutex_init(&vdev->igate);
-
- pm_runtime_enable(dev);
- return 0;
-
-put_reset:
- vfio_platform_put_reset(vdev);
-out_uninit:
- vfio_uninit_group_dev(&vdev->vdev);
return ret;
}
-EXPORT_SYMBOL_GPL(vfio_platform_probe_common);
+EXPORT_SYMBOL_GPL(vfio_platform_init_common);
-void vfio_platform_remove_common(struct vfio_platform_device *vdev)
+void vfio_platform_release_common(struct vfio_platform_device *vdev)
{
- vfio_unregister_group_dev(&vdev->vdev);
-
- pm_runtime_disable(vdev->device);
vfio_platform_put_reset(vdev);
- vfio_uninit_group_dev(&vdev->vdev);
}
-EXPORT_SYMBOL_GPL(vfio_platform_remove_common);
+EXPORT_SYMBOL_GPL(vfio_platform_release_common);
void __vfio_platform_register_reset(struct vfio_platform_reset_node *node)
{
diff --git a/drivers/vfio/platform/vfio_platform_private.h b/drivers/vfio/platform/vfio_platform_private.h
index 691b43f4b2b2..8d8fab516849 100644
--- a/drivers/vfio/platform/vfio_platform_private.h
+++ b/drivers/vfio/platform/vfio_platform_private.h
@@ -78,9 +78,21 @@ struct vfio_platform_reset_node {
vfio_platform_reset_fn_t of_reset;
};
-int vfio_platform_probe_common(struct vfio_platform_device *vdev,
- struct device *dev);
-void vfio_platform_remove_common(struct vfio_platform_device *vdev);
+int vfio_platform_init_common(struct vfio_platform_device *vdev);
+void vfio_platform_release_common(struct vfio_platform_device *vdev);
+
+int vfio_platform_open_device(struct vfio_device *core_vdev);
+void vfio_platform_close_device(struct vfio_device *core_vdev);
+long vfio_platform_ioctl(struct vfio_device *core_vdev,
+ unsigned int cmd, unsigned long arg);
+ssize_t vfio_platform_read(struct vfio_device *core_vdev,
+ char __user *buf, size_t count,
+ loff_t *ppos);
+ssize_t vfio_platform_write(struct vfio_device *core_vdev,
+ const char __user *buf,
+ size_t count, loff_t *ppos);
+int vfio_platform_mmap(struct vfio_device *core_vdev,
+ struct vm_area_struct *vma);
int vfio_platform_irq_init(struct vfio_platform_device *vdev);
void vfio_platform_irq_cleanup(struct vfio_platform_device *vdev);
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 503bea6c843d..bcad54bbab08 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -3,6 +3,16 @@
* Copyright (C) 2012 Red Hat, Inc. All rights reserved.
* Author: Alex Williamson <alex.williamson@redhat.com>
*/
+#ifndef __VFIO_VFIO_H__
+#define __VFIO_VFIO_H__
+
+#include <linux/device.h>
+#include <linux/cdev.h>
+#include <linux/module.h>
+
+struct iommu_group;
+struct vfio_device;
+struct vfio_container;
enum vfio_group_type {
/*
@@ -28,6 +38,30 @@ enum vfio_group_type {
VFIO_NO_IOMMU,
};
+struct vfio_group {
+ struct device dev;
+ struct cdev cdev;
+ /*
+ * When drivers is non-zero a driver is attached to the struct device
+ * that provided the iommu_group and thus the iommu_group is a valid
+ * pointer. When drivers is 0 the driver is being detached. Once users
+ * reaches 0 then the iommu_group is invalid.
+ */
+ refcount_t drivers;
+ unsigned int container_users;
+ struct iommu_group *iommu_group;
+ struct vfio_container *container;
+ struct list_head device_list;
+ struct mutex device_lock;
+ struct list_head vfio_next;
+ struct list_head container_next;
+ enum vfio_group_type type;
+ struct mutex group_lock;
+ struct kvm *kvm;
+ struct file *opened_file;
+ struct blocking_notifier_head notifier;
+};
+
/* events for the backend driver notify callback */
enum vfio_iommu_notify_type {
VFIO_IOMMU_CONTAINER_CLOSE = 0,
@@ -67,5 +101,33 @@ struct vfio_iommu_driver_ops {
enum vfio_iommu_notify_type event);
};
+struct vfio_iommu_driver {
+ const struct vfio_iommu_driver_ops *ops;
+ struct list_head vfio_next;
+};
+
int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
+
+bool vfio_assert_device_open(struct vfio_device *device);
+
+struct vfio_container *vfio_container_from_file(struct file *filep);
+int vfio_device_assign_container(struct vfio_device *device);
+void vfio_device_unassign_container(struct vfio_device *device);
+int vfio_container_attach_group(struct vfio_container *container,
+ struct vfio_group *group);
+void vfio_group_detach_container(struct vfio_group *group);
+void vfio_device_container_register(struct vfio_device *device);
+void vfio_device_container_unregister(struct vfio_device *device);
+long vfio_container_ioctl_check_extension(struct vfio_container *container,
+ unsigned long arg);
+int __init vfio_container_init(void);
+void vfio_container_cleanup(void);
+
+#ifdef CONFIG_VFIO_NOIOMMU
+extern bool vfio_noiommu __read_mostly;
+#else
+enum { vfio_noiommu = false };
+#endif
+
+#endif
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 7cb56c382c97..2d168793d4e1 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -32,6 +32,9 @@
#include <linux/vfio.h>
#include <linux/wait.h>
#include <linux/sched/signal.h>
+#include <linux/pm_runtime.h>
+#include <linux/interval_tree.h>
+#include <linux/iova_bitmap.h>
#include "vfio.h"
#define DRIVER_VERSION "0.3"
@@ -40,54 +43,14 @@
static struct vfio {
struct class *class;
- struct list_head iommu_drivers_list;
- struct mutex iommu_drivers_lock;
struct list_head group_list;
struct mutex group_lock; /* locks group_list */
struct ida group_ida;
dev_t group_devt;
+ struct class *device_class;
+ struct ida device_ida;
} vfio;
-struct vfio_iommu_driver {
- const struct vfio_iommu_driver_ops *ops;
- struct list_head vfio_next;
-};
-
-struct vfio_container {
- struct kref kref;
- struct list_head group_list;
- struct rw_semaphore group_lock;
- struct vfio_iommu_driver *iommu_driver;
- void *iommu_data;
- bool noiommu;
-};
-
-struct vfio_group {
- struct device dev;
- struct cdev cdev;
- refcount_t users;
- unsigned int container_users;
- struct iommu_group *iommu_group;
- struct vfio_container *container;
- struct list_head device_list;
- struct mutex device_lock;
- struct list_head vfio_next;
- struct list_head container_next;
- enum vfio_group_type type;
- unsigned int dev_counter;
- struct rw_semaphore group_rwsem;
- struct kvm *kvm;
- struct file *opened_file;
- struct blocking_notifier_head notifier;
-};
-
-#ifdef CONFIG_VFIO_NOIOMMU
-static bool noiommu __read_mostly;
-module_param_named(enable_unsafe_noiommu_mode,
- noiommu, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(enable_unsafe_noiommu_mode, "Enable UNSAFE, no-IOMMU mode. This mode provides no device isolation, no DMA translation, no host kernel protection, cannot be used for device assignment to virtual machines, requires RAWIO permissions, and will taint the kernel. If you do not know what this is for, step away. (default: false)");
-#endif
-
static DEFINE_XARRAY(vfio_device_set_xa);
static const struct file_operations vfio_group_fops;
@@ -162,146 +125,6 @@ static void vfio_release_device_set(struct vfio_device *device)
xa_unlock(&vfio_device_set_xa);
}
-#ifdef CONFIG_VFIO_NOIOMMU
-static void *vfio_noiommu_open(unsigned long arg)
-{
- if (arg != VFIO_NOIOMMU_IOMMU)
- return ERR_PTR(-EINVAL);
- if (!capable(CAP_SYS_RAWIO))
- return ERR_PTR(-EPERM);
-
- return NULL;
-}
-
-static void vfio_noiommu_release(void *iommu_data)
-{
-}
-
-static long vfio_noiommu_ioctl(void *iommu_data,
- unsigned int cmd, unsigned long arg)
-{
- if (cmd == VFIO_CHECK_EXTENSION)
- return noiommu && (arg == VFIO_NOIOMMU_IOMMU) ? 1 : 0;
-
- return -ENOTTY;
-}
-
-static int vfio_noiommu_attach_group(void *iommu_data,
- struct iommu_group *iommu_group, enum vfio_group_type type)
-{
- return 0;
-}
-
-static void vfio_noiommu_detach_group(void *iommu_data,
- struct iommu_group *iommu_group)
-{
-}
-
-static const struct vfio_iommu_driver_ops vfio_noiommu_ops = {
- .name = "vfio-noiommu",
- .owner = THIS_MODULE,
- .open = vfio_noiommu_open,
- .release = vfio_noiommu_release,
- .ioctl = vfio_noiommu_ioctl,
- .attach_group = vfio_noiommu_attach_group,
- .detach_group = vfio_noiommu_detach_group,
-};
-
-/*
- * Only noiommu containers can use vfio-noiommu and noiommu containers can only
- * use vfio-noiommu.
- */
-static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
- const struct vfio_iommu_driver *driver)
-{
- return container->noiommu == (driver->ops == &vfio_noiommu_ops);
-}
-#else
-static inline bool vfio_iommu_driver_allowed(struct vfio_container *container,
- const struct vfio_iommu_driver *driver)
-{
- return true;
-}
-#endif /* CONFIG_VFIO_NOIOMMU */
-
-/*
- * IOMMU driver registration
- */
-int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops)
-{
- struct vfio_iommu_driver *driver, *tmp;
-
- if (WARN_ON(!ops->register_device != !ops->unregister_device))
- return -EINVAL;
-
- driver = kzalloc(sizeof(*driver), GFP_KERNEL);
- if (!driver)
- return -ENOMEM;
-
- driver->ops = ops;
-
- mutex_lock(&vfio.iommu_drivers_lock);
-
- /* Check for duplicates */
- list_for_each_entry(tmp, &vfio.iommu_drivers_list, vfio_next) {
- if (tmp->ops == ops) {
- mutex_unlock(&vfio.iommu_drivers_lock);
- kfree(driver);
- return -EINVAL;
- }
- }
-
- list_add(&driver->vfio_next, &vfio.iommu_drivers_list);
-
- mutex_unlock(&vfio.iommu_drivers_lock);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(vfio_register_iommu_driver);
-
-void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops)
-{
- struct vfio_iommu_driver *driver;
-
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
- if (driver->ops == ops) {
- list_del(&driver->vfio_next);
- mutex_unlock(&vfio.iommu_drivers_lock);
- kfree(driver);
- return;
- }
- }
- mutex_unlock(&vfio.iommu_drivers_lock);
-}
-EXPORT_SYMBOL_GPL(vfio_unregister_iommu_driver);
-
-static void vfio_group_get(struct vfio_group *group);
-
-/*
- * Container objects - containers are created when /dev/vfio/vfio is
- * opened, but their lifecycle extends until the last user is done, so
- * it's freed via kref. Must support container/group/device being
- * closed in any order.
- */
-static void vfio_container_get(struct vfio_container *container)
-{
- kref_get(&container->kref);
-}
-
-static void vfio_container_release(struct kref *kref)
-{
- struct vfio_container *container;
- container = container_of(kref, struct vfio_container, kref);
-
- kfree(container);
-}
-
-static void vfio_container_put(struct vfio_container *container)
-{
- kref_put(&container->kref, vfio_container_release);
-}
-
/*
* Group objects - create, release, get, put, search
*/
@@ -310,9 +133,13 @@ __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
{
struct vfio_group *group;
+ /*
+ * group->iommu_group from the vfio.group_list cannot be NULL
+ * under the vfio.group_lock.
+ */
list_for_each_entry(group, &vfio.group_list, vfio_next) {
if (group->iommu_group == iommu_group) {
- vfio_group_get(group);
+ refcount_inc(&group->drivers);
return group;
}
}
@@ -335,7 +162,8 @@ static void vfio_group_release(struct device *dev)
struct vfio_group *group = container_of(dev, struct vfio_group, dev);
mutex_destroy(&group->device_lock);
- iommu_group_put(group->iommu_group);
+ mutex_destroy(&group->group_lock);
+ WARN_ON(group->iommu_group);
ida_free(&vfio.group_ida, MINOR(group->dev.devt));
kfree(group);
}
@@ -363,8 +191,8 @@ static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
cdev_init(&group->cdev, &vfio_group_fops);
group->cdev.owner = THIS_MODULE;
- refcount_set(&group->users, 1);
- init_rwsem(&group->group_rwsem);
+ refcount_set(&group->drivers, 1);
+ mutex_init(&group->group_lock);
INIT_LIST_HEAD(&group->device_list);
mutex_init(&group->device_lock);
group->iommu_group = iommu_group;
@@ -420,44 +248,64 @@ err_put:
return ret;
}
-static void vfio_group_put(struct vfio_group *group)
+static void vfio_device_remove_group(struct vfio_device *device)
{
- if (!refcount_dec_and_mutex_lock(&group->users, &vfio.group_lock))
+ struct vfio_group *group = device->group;
+ struct iommu_group *iommu_group;
+
+ if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
+ iommu_group_remove_device(device->dev);
+
+ /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
+ if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
return;
+ list_del(&group->vfio_next);
/*
+ * We could concurrently probe another driver in the group that might
+ * race vfio_device_remove_group() with vfio_get_group(), so we have to
+ * ensure that the sysfs is all cleaned up under lock otherwise the
+ * cdev_device_add() will fail due to the name aready existing.
+ */
+ cdev_device_del(&group->cdev, &group->dev);
+
+ mutex_lock(&group->group_lock);
+ /*
* These data structures all have paired operations that can only be
- * undone when the caller holds a live reference on the group. Since all
- * pairs must be undone these WARN_ON's indicate some caller did not
+ * undone when the caller holds a live reference on the device. Since
+ * all pairs must be undone these WARN_ON's indicate some caller did not
* properly hold the group reference.
*/
WARN_ON(!list_empty(&group->device_list));
- WARN_ON(group->container || group->container_users);
WARN_ON(group->notifier.head);
- list_del(&group->vfio_next);
- cdev_device_del(&group->cdev, &group->dev);
+ /*
+ * Revoke all users of group->iommu_group. At this point we know there
+ * are no devices active because we are unplugging the last one. Setting
+ * iommu_group to NULL blocks all new users.
+ */
+ if (group->container)
+ vfio_group_detach_container(group);
+ iommu_group = group->iommu_group;
+ group->iommu_group = NULL;
+ mutex_unlock(&group->group_lock);
mutex_unlock(&vfio.group_lock);
+ iommu_group_put(iommu_group);
put_device(&group->dev);
}
-static void vfio_group_get(struct vfio_group *group)
-{
- refcount_inc(&group->users);
-}
-
/*
* Device objects - create, release, get, put, search
*/
/* Device reference always implies a group reference */
-static void vfio_device_put(struct vfio_device *device)
+static void vfio_device_put_registration(struct vfio_device *device)
{
if (refcount_dec_and_test(&device->refcount))
complete(&device->comp);
}
-static bool vfio_device_try_get(struct vfio_device *device)
+static bool vfio_device_try_get_registration(struct vfio_device *device)
{
return refcount_inc_not_zero(&device->refcount);
}
@@ -469,7 +317,8 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
mutex_lock(&group->device_lock);
list_for_each_entry(device, &group->device_list, group_next) {
- if (device->dev == dev && vfio_device_try_get(device)) {
+ if (device->dev == dev &&
+ vfio_device_try_get_registration(device)) {
mutex_unlock(&group->device_lock);
return device;
}
@@ -481,20 +330,110 @@ static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
/*
* VFIO driver API
*/
-void vfio_init_group_dev(struct vfio_device *device, struct device *dev,
- const struct vfio_device_ops *ops)
+/* Release helper called by vfio_put_device() */
+static void vfio_device_release(struct device *dev)
+{
+ struct vfio_device *device =
+ container_of(dev, struct vfio_device, device);
+
+ vfio_release_device_set(device);
+ ida_free(&vfio.device_ida, device->index);
+
+ /*
+ * kvfree() cannot be done here due to a life cycle mess in
+ * vfio-ccw. Before the ccw part is fixed all drivers are
+ * required to support @release and call vfio_free_device()
+ * from there.
+ */
+ device->ops->release(device);
+}
+
+/*
+ * Allocate and initialize vfio_device so it can be registered to vfio
+ * core.
+ *
+ * Drivers should use the wrapper vfio_alloc_device() for allocation.
+ * @size is the size of the structure to be allocated, including any
+ * private data used by the driver.
+ *
+ * Driver may provide an @init callback to cover device private data.
+ *
+ * Use vfio_put_device() to release the structure after success return.
+ */
+struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
+ const struct vfio_device_ops *ops)
{
+ struct vfio_device *device;
+ int ret;
+
+ if (WARN_ON(size < sizeof(struct vfio_device)))
+ return ERR_PTR(-EINVAL);
+
+ device = kvzalloc(size, GFP_KERNEL);
+ if (!device)
+ return ERR_PTR(-ENOMEM);
+
+ ret = vfio_init_device(device, dev, ops);
+ if (ret)
+ goto out_free;
+ return device;
+
+out_free:
+ kvfree(device);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(_vfio_alloc_device);
+
+/*
+ * Initialize a vfio_device so it can be registered to vfio core.
+ *
+ * Only vfio-ccw driver should call this interface.
+ */
+int vfio_init_device(struct vfio_device *device, struct device *dev,
+ const struct vfio_device_ops *ops)
+{
+ int ret;
+
+ ret = ida_alloc_max(&vfio.device_ida, MINORMASK, GFP_KERNEL);
+ if (ret < 0) {
+ dev_dbg(dev, "Error to alloc index\n");
+ return ret;
+ }
+
+ device->index = ret;
init_completion(&device->comp);
device->dev = dev;
device->ops = ops;
+
+ if (ops->init) {
+ ret = ops->init(device);
+ if (ret)
+ goto out_uninit;
+ }
+
+ device_initialize(&device->device);
+ device->device.release = vfio_device_release;
+ device->device.class = vfio.device_class;
+ device->device.parent = device->dev;
+ return 0;
+
+out_uninit:
+ vfio_release_device_set(device);
+ ida_free(&vfio.device_ida, device->index);
+ return ret;
}
-EXPORT_SYMBOL_GPL(vfio_init_group_dev);
+EXPORT_SYMBOL_GPL(vfio_init_device);
-void vfio_uninit_group_dev(struct vfio_device *device)
+/*
+ * The helper called by driver @release callback to free the device
+ * structure. Drivers which don't have private data to clean can
+ * simply use this helper as its @release.
+ */
+void vfio_free_device(struct vfio_device *device)
{
- vfio_release_device_set(device);
+ kvfree(device);
}
-EXPORT_SYMBOL_GPL(vfio_uninit_group_dev);
+EXPORT_SYMBOL_GPL(vfio_free_device);
static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
enum vfio_group_type type)
@@ -535,8 +474,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
struct vfio_group *group;
iommu_group = iommu_group_get(dev);
-#ifdef CONFIG_VFIO_NOIOMMU
- if (!iommu_group && noiommu) {
+ if (!iommu_group && vfio_noiommu) {
/*
* With noiommu enabled, create an IOMMU group for devices that
* don't already have one, implying no IOMMU hardware/driver
@@ -550,7 +488,7 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
}
return group;
}
-#endif
+
if (!iommu_group)
return ERR_PTR(-EINVAL);
@@ -577,7 +515,12 @@ static int __vfio_register_dev(struct vfio_device *device,
struct vfio_group *group)
{
struct vfio_device *existing_device;
+ int ret;
+ /*
+ * In all cases group is the output of one of the group allocation
+ * functions and we have group->drivers incremented for us.
+ */
if (IS_ERR(group))
return PTR_ERR(group);
@@ -590,28 +533,39 @@ static int __vfio_register_dev(struct vfio_device *device,
existing_device = vfio_group_get_device(group, device->dev);
if (existing_device) {
+ /*
+ * group->iommu_group is non-NULL because we hold the drivers
+ * refcount.
+ */
dev_WARN(device->dev, "Device already exists on group %d\n",
iommu_group_id(group->iommu_group));
- vfio_device_put(existing_device);
- if (group->type == VFIO_NO_IOMMU ||
- group->type == VFIO_EMULATED_IOMMU)
- iommu_group_remove_device(device->dev);
- vfio_group_put(group);
- return -EBUSY;
+ vfio_device_put_registration(existing_device);
+ ret = -EBUSY;
+ goto err_out;
}
/* Our reference on group is moved to the device */
device->group = group;
+ ret = dev_set_name(&device->device, "vfio%d", device->index);
+ if (ret)
+ goto err_out;
+
+ ret = device_add(&device->device);
+ if (ret)
+ goto err_out;
+
/* Refcounting can't start until the driver calls register */
refcount_set(&device->refcount, 1);
mutex_lock(&group->device_lock);
list_add(&device->group_next, &group->device_list);
- group->dev_counter++;
mutex_unlock(&group->device_lock);
return 0;
+err_out:
+ vfio_device_remove_group(device);
+ return ret;
}
int vfio_register_group_dev(struct vfio_device *device)
@@ -651,7 +605,7 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
ret = !strcmp(dev_name(it->dev), buf);
}
- if (ret && vfio_device_try_get(it)) {
+ if (ret && vfio_device_try_get_registration(it)) {
device = it;
break;
}
@@ -671,7 +625,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
bool interrupted = false;
long rc;
- vfio_device_put(device);
+ vfio_device_put_registration(device);
rc = try_wait_for_completion(&device->comp);
while (rc <= 0) {
if (device->ops->request)
@@ -696,356 +650,78 @@ void vfio_unregister_group_dev(struct vfio_device *device)
mutex_lock(&group->device_lock);
list_del(&device->group_next);
- group->dev_counter--;
mutex_unlock(&group->device_lock);
- if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
- iommu_group_remove_device(device->dev);
+ /* Balances device_add in register path */
+ device_del(&device->device);
- /* Matches the get in vfio_register_group_dev() */
- vfio_group_put(group);
+ vfio_device_remove_group(device);
}
EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
/*
- * VFIO base fd, /dev/vfio/vfio
- */
-static long vfio_ioctl_check_extension(struct vfio_container *container,
- unsigned long arg)
-{
- struct vfio_iommu_driver *driver;
- long ret = 0;
-
- down_read(&container->group_lock);
-
- driver = container->iommu_driver;
-
- switch (arg) {
- /* No base extensions yet */
- default:
- /*
- * If no driver is set, poll all registered drivers for
- * extensions and return the first positive result. If
- * a driver is already set, further queries will be passed
- * only to that driver.
- */
- if (!driver) {
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list,
- vfio_next) {
-
- if (!list_empty(&container->group_list) &&
- !vfio_iommu_driver_allowed(container,
- driver))
- continue;
- if (!try_module_get(driver->ops->owner))
- continue;
-
- ret = driver->ops->ioctl(NULL,
- VFIO_CHECK_EXTENSION,
- arg);
- module_put(driver->ops->owner);
- if (ret > 0)
- break;
- }
- mutex_unlock(&vfio.iommu_drivers_lock);
- } else
- ret = driver->ops->ioctl(container->iommu_data,
- VFIO_CHECK_EXTENSION, arg);
- }
-
- up_read(&container->group_lock);
-
- return ret;
-}
-
-/* hold write lock on container->group_lock */
-static int __vfio_container_attach_groups(struct vfio_container *container,
- struct vfio_iommu_driver *driver,
- void *data)
-{
- struct vfio_group *group;
- int ret = -ENODEV;
-
- list_for_each_entry(group, &container->group_list, container_next) {
- ret = driver->ops->attach_group(data, group->iommu_group,
- group->type);
- if (ret)
- goto unwind;
- }
-
- return ret;
-
-unwind:
- list_for_each_entry_continue_reverse(group, &container->group_list,
- container_next) {
- driver->ops->detach_group(data, group->iommu_group);
- }
-
- return ret;
-}
-
-static long vfio_ioctl_set_iommu(struct vfio_container *container,
- unsigned long arg)
-{
- struct vfio_iommu_driver *driver;
- long ret = -ENODEV;
-
- down_write(&container->group_lock);
-
- /*
- * The container is designed to be an unprivileged interface while
- * the group can be assigned to specific users. Therefore, only by
- * adding a group to a container does the user get the privilege of
- * enabling the iommu, which may allocate finite resources. There
- * is no unset_iommu, but by removing all the groups from a container,
- * the container is deprivileged and returns to an unset state.
- */
- if (list_empty(&container->group_list) || container->iommu_driver) {
- up_write(&container->group_lock);
- return -EINVAL;
- }
-
- mutex_lock(&vfio.iommu_drivers_lock);
- list_for_each_entry(driver, &vfio.iommu_drivers_list, vfio_next) {
- void *data;
-
- if (!vfio_iommu_driver_allowed(container, driver))
- continue;
- if (!try_module_get(driver->ops->owner))
- continue;
-
- /*
- * The arg magic for SET_IOMMU is the same as CHECK_EXTENSION,
- * so test which iommu driver reported support for this
- * extension and call open on them. We also pass them the
- * magic, allowing a single driver to support multiple
- * interfaces if they'd like.
- */
- if (driver->ops->ioctl(NULL, VFIO_CHECK_EXTENSION, arg) <= 0) {
- module_put(driver->ops->owner);
- continue;
- }
-
- data = driver->ops->open(arg);
- if (IS_ERR(data)) {
- ret = PTR_ERR(data);
- module_put(driver->ops->owner);
- continue;
- }
-
- ret = __vfio_container_attach_groups(container, driver, data);
- if (ret) {
- driver->ops->release(data);
- module_put(driver->ops->owner);
- continue;
- }
-
- container->iommu_driver = driver;
- container->iommu_data = data;
- break;
- }
-
- mutex_unlock(&vfio.iommu_drivers_lock);
- up_write(&container->group_lock);
-
- return ret;
-}
-
-static long vfio_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
-{
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver;
- void *data;
- long ret = -EINVAL;
-
- if (!container)
- return ret;
-
- switch (cmd) {
- case VFIO_GET_API_VERSION:
- ret = VFIO_API_VERSION;
- break;
- case VFIO_CHECK_EXTENSION:
- ret = vfio_ioctl_check_extension(container, arg);
- break;
- case VFIO_SET_IOMMU:
- ret = vfio_ioctl_set_iommu(container, arg);
- break;
- default:
- driver = container->iommu_driver;
- data = container->iommu_data;
-
- if (driver) /* passthrough all unrecognized ioctls */
- ret = driver->ops->ioctl(data, cmd, arg);
- }
-
- return ret;
-}
-
-static int vfio_fops_open(struct inode *inode, struct file *filep)
-{
- struct vfio_container *container;
-
- container = kzalloc(sizeof(*container), GFP_KERNEL);
- if (!container)
- return -ENOMEM;
-
- INIT_LIST_HEAD(&container->group_list);
- init_rwsem(&container->group_lock);
- kref_init(&container->kref);
-
- filep->private_data = container;
-
- return 0;
-}
-
-static int vfio_fops_release(struct inode *inode, struct file *filep)
-{
- struct vfio_container *container = filep->private_data;
- struct vfio_iommu_driver *driver = container->iommu_driver;
-
- if (driver && driver->ops->notify)
- driver->ops->notify(container->iommu_data,
- VFIO_IOMMU_CONTAINER_CLOSE);
-
- filep->private_data = NULL;
-
- vfio_container_put(container);
-
- return 0;
-}
-
-static const struct file_operations vfio_fops = {
- .owner = THIS_MODULE,
- .open = vfio_fops_open,
- .release = vfio_fops_release,
- .unlocked_ioctl = vfio_fops_unl_ioctl,
- .compat_ioctl = compat_ptr_ioctl,
-};
-
-/*
* VFIO Group fd, /dev/vfio/$GROUP
*/
-static void __vfio_group_unset_container(struct vfio_group *group)
-{
- struct vfio_container *container = group->container;
- struct vfio_iommu_driver *driver;
-
- lockdep_assert_held_write(&group->group_rwsem);
-
- down_write(&container->group_lock);
-
- driver = container->iommu_driver;
- if (driver)
- driver->ops->detach_group(container->iommu_data,
- group->iommu_group);
-
- if (group->type == VFIO_IOMMU)
- iommu_group_release_dma_owner(group->iommu_group);
-
- group->container = NULL;
- group->container_users = 0;
- list_del(&group->container_next);
-
- /* Detaching the last group deprivileges a container, remove iommu */
- if (driver && list_empty(&container->group_list)) {
- driver->ops->release(container->iommu_data);
- module_put(driver->ops->owner);
- container->iommu_driver = NULL;
- container->iommu_data = NULL;
- }
-
- up_write(&container->group_lock);
-
- vfio_container_put(container);
-}
-
/*
* VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
* if there was no container to unset. Since the ioctl is called on
* the group, we know that still exists, therefore the only valid
* transition here is 1->0.
*/
-static int vfio_group_unset_container(struct vfio_group *group)
+static int vfio_group_ioctl_unset_container(struct vfio_group *group)
{
- lockdep_assert_held_write(&group->group_rwsem);
+ int ret = 0;
- if (!group->container)
- return -EINVAL;
- if (group->container_users != 1)
- return -EBUSY;
- __vfio_group_unset_container(group);
- return 0;
+ mutex_lock(&group->group_lock);
+ if (!group->container) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+ if (group->container_users != 1) {
+ ret = -EBUSY;
+ goto out_unlock;
+ }
+ vfio_group_detach_container(group);
+
+out_unlock:
+ mutex_unlock(&group->group_lock);
+ return ret;
}
-static int vfio_group_set_container(struct vfio_group *group, int container_fd)
+static int vfio_group_ioctl_set_container(struct vfio_group *group,
+ int __user *arg)
{
- struct fd f;
struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret = 0;
-
- lockdep_assert_held_write(&group->group_rwsem);
-
- if (group->container || WARN_ON(group->container_users))
- return -EINVAL;
+ struct fd f;
+ int ret;
+ int fd;
- if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
- return -EPERM;
+ if (get_user(fd, arg))
+ return -EFAULT;
- f = fdget(container_fd);
+ f = fdget(fd);
if (!f.file)
return -EBADF;
- /* Sanity check, is this really our fd? */
- if (f.file->f_op != &vfio_fops) {
- fdput(f);
- return -EINVAL;
+ mutex_lock(&group->group_lock);
+ if (group->container || WARN_ON(group->container_users)) {
+ ret = -EINVAL;
+ goto out_unlock;
}
-
- container = f.file->private_data;
- WARN_ON(!container); /* fget ensures we don't race vfio_release */
-
- down_write(&container->group_lock);
-
- /* Real groups and fake groups cannot mix */
- if (!list_empty(&container->group_list) &&
- container->noiommu != (group->type == VFIO_NO_IOMMU)) {
- ret = -EPERM;
- goto unlock_out;
- }
-
- if (group->type == VFIO_IOMMU) {
- ret = iommu_group_claim_dma_owner(group->iommu_group, f.file);
- if (ret)
- goto unlock_out;
+ if (!group->iommu_group) {
+ ret = -ENODEV;
+ goto out_unlock;
}
- driver = container->iommu_driver;
- if (driver) {
- ret = driver->ops->attach_group(container->iommu_data,
- group->iommu_group,
- group->type);
- if (ret) {
- if (group->type == VFIO_IOMMU)
- iommu_group_release_dma_owner(
- group->iommu_group);
- goto unlock_out;
- }
+ container = vfio_container_from_file(f.file);
+ ret = -EINVAL;
+ if (container) {
+ ret = vfio_container_attach_group(container, group);
+ goto out_unlock;
}
- group->container = container;
- group->container_users = 1;
- container->noiommu = (group->type == VFIO_NO_IOMMU);
- list_add(&group->container_next, &container->group_list);
-
- /* Get a reference on the container and mark a user within the group */
- vfio_container_get(container);
-
-unlock_out:
- up_write(&container->group_lock);
+out_unlock:
+ mutex_unlock(&group->group_lock);
fdput(f);
return ret;
}
@@ -1053,47 +729,19 @@ unlock_out:
static const struct file_operations vfio_device_fops;
/* true if the vfio_device has open_device() called but not close_device() */
-static bool vfio_assert_device_open(struct vfio_device *device)
+bool vfio_assert_device_open(struct vfio_device *device)
{
return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
}
-static int vfio_device_assign_container(struct vfio_device *device)
-{
- struct vfio_group *group = device->group;
-
- lockdep_assert_held_write(&group->group_rwsem);
-
- if (!group->container || !group->container->iommu_driver ||
- WARN_ON(!group->container_users))
- return -EINVAL;
-
- if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
- return -EPERM;
-
- get_file(group->opened_file);
- group->container_users++;
- return 0;
-}
-
-static void vfio_device_unassign_container(struct vfio_device *device)
-{
- down_write(&device->group->group_rwsem);
- WARN_ON(device->group->container_users <= 1);
- device->group->container_users--;
- fput(device->group->opened_file);
- up_write(&device->group->group_rwsem);
-}
-
static struct file *vfio_device_open(struct vfio_device *device)
{
- struct vfio_iommu_driver *iommu_driver;
struct file *filep;
int ret;
- down_write(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
ret = vfio_device_assign_container(device);
- up_write(&device->group->group_rwsem);
+ mutex_unlock(&device->group->group_lock);
if (ret)
return ERR_PTR(ret);
@@ -1110,7 +758,7 @@ static struct file *vfio_device_open(struct vfio_device *device)
* lock. If the device driver will use it, it must obtain a
* reference and release it during close_device.
*/
- down_read(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
device->kvm = device->group->kvm;
if (device->ops->open_device) {
@@ -1118,13 +766,8 @@ static struct file *vfio_device_open(struct vfio_device *device)
if (ret)
goto err_undo_count;
}
-
- iommu_driver = device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->register_device)
- iommu_driver->ops->register_device(
- device->group->container->iommu_data, device);
-
- up_read(&device->group->group_rwsem);
+ vfio_device_container_register(device);
+ mutex_unlock(&device->group->group_lock);
}
mutex_unlock(&device->dev_set->lock);
@@ -1157,17 +800,14 @@ static struct file *vfio_device_open(struct vfio_device *device)
err_close_device:
mutex_lock(&device->dev_set->lock);
- down_read(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
if (device->open_count == 1 && device->ops->close_device) {
device->ops->close_device(device);
- iommu_driver = device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->unregister_device)
- iommu_driver->ops->unregister_device(
- device->group->container->iommu_data, device);
+ vfio_device_container_unregister(device);
}
err_undo_count:
- up_read(&device->group->group_rwsem);
+ mutex_unlock(&device->group->group_lock);
device->open_count--;
if (device->open_count == 0 && device->kvm)
device->kvm = NULL;
@@ -1178,14 +818,21 @@ err_unassign_container:
return ERR_PTR(ret);
}
-static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
+static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
+ char __user *arg)
{
struct vfio_device *device;
struct file *filep;
+ char *buf;
int fdno;
int ret;
+ buf = strndup_user(arg, PAGE_SIZE);
+ if (IS_ERR(buf))
+ return PTR_ERR(buf);
+
device = vfio_device_get_from_name(group, buf);
+ kfree(buf);
if (IS_ERR(device))
return PTR_ERR(device);
@@ -1207,81 +854,60 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf)
err_put_fdno:
put_unused_fd(fdno);
err_put_device:
- vfio_device_put(device);
+ vfio_device_put_registration(device);
return ret;
}
-static long vfio_group_fops_unl_ioctl(struct file *filep,
- unsigned int cmd, unsigned long arg)
+static int vfio_group_ioctl_get_status(struct vfio_group *group,
+ struct vfio_group_status __user *arg)
{
- struct vfio_group *group = filep->private_data;
- long ret = -ENOTTY;
+ unsigned long minsz = offsetofend(struct vfio_group_status, flags);
+ struct vfio_group_status status;
- switch (cmd) {
- case VFIO_GROUP_GET_STATUS:
- {
- struct vfio_group_status status;
- unsigned long minsz;
+ if (copy_from_user(&status, arg, minsz))
+ return -EFAULT;
- minsz = offsetofend(struct vfio_group_status, flags);
+ if (status.argsz < minsz)
+ return -EINVAL;
- if (copy_from_user(&status, (void __user *)arg, minsz))
- return -EFAULT;
+ status.flags = 0;
- if (status.argsz < minsz)
- return -EINVAL;
+ mutex_lock(&group->group_lock);
+ if (!group->iommu_group) {
+ mutex_unlock(&group->group_lock);
+ return -ENODEV;
+ }
- status.flags = 0;
+ if (group->container)
+ status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
+ VFIO_GROUP_FLAGS_VIABLE;
+ else if (!iommu_group_dma_owner_claimed(group->iommu_group))
+ status.flags |= VFIO_GROUP_FLAGS_VIABLE;
+ mutex_unlock(&group->group_lock);
- down_read(&group->group_rwsem);
- if (group->container)
- status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
- VFIO_GROUP_FLAGS_VIABLE;
- else if (!iommu_group_dma_owner_claimed(group->iommu_group))
- status.flags |= VFIO_GROUP_FLAGS_VIABLE;
- up_read(&group->group_rwsem);
+ if (copy_to_user(arg, &status, minsz))
+ return -EFAULT;
+ return 0;
+}
- if (copy_to_user((void __user *)arg, &status, minsz))
- return -EFAULT;
+static long vfio_group_fops_unl_ioctl(struct file *filep,
+ unsigned int cmd, unsigned long arg)
+{
+ struct vfio_group *group = filep->private_data;
+ void __user *uarg = (void __user *)arg;
- ret = 0;
- break;
- }
+ switch (cmd) {
+ case VFIO_GROUP_GET_DEVICE_FD:
+ return vfio_group_ioctl_get_device_fd(group, uarg);
+ case VFIO_GROUP_GET_STATUS:
+ return vfio_group_ioctl_get_status(group, uarg);
case VFIO_GROUP_SET_CONTAINER:
- {
- int fd;
-
- if (get_user(fd, (int __user *)arg))
- return -EFAULT;
-
- if (fd < 0)
- return -EINVAL;
-
- down_write(&group->group_rwsem);
- ret = vfio_group_set_container(group, fd);
- up_write(&group->group_rwsem);
- break;
- }
+ return vfio_group_ioctl_set_container(group, uarg);
case VFIO_GROUP_UNSET_CONTAINER:
- down_write(&group->group_rwsem);
- ret = vfio_group_unset_container(group);
- up_write(&group->group_rwsem);
- break;
- case VFIO_GROUP_GET_DEVICE_FD:
- {
- char *buf;
-
- buf = strndup_user((const char __user *)arg, PAGE_SIZE);
- if (IS_ERR(buf))
- return PTR_ERR(buf);
-
- ret = vfio_group_get_device_fd(group, buf);
- kfree(buf);
- break;
- }
+ return vfio_group_ioctl_unset_container(group);
+ default:
+ return -ENOTTY;
}
-
- return ret;
}
static int vfio_group_fops_open(struct inode *inode, struct file *filep)
@@ -1290,17 +916,20 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
container_of(inode->i_cdev, struct vfio_group, cdev);
int ret;
- down_write(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
- /* users can be zero if this races with vfio_group_put() */
- if (!refcount_inc_not_zero(&group->users)) {
+ /*
+ * drivers can be zero if this races with vfio_device_remove_group(), it
+ * will be stable at 0 under the group rwsem
+ */
+ if (refcount_read(&group->drivers) == 0) {
ret = -ENODEV;
- goto err_unlock;
+ goto out_unlock;
}
if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
ret = -EPERM;
- goto err_put;
+ goto out_unlock;
}
/*
@@ -1308,17 +937,13 @@ static int vfio_group_fops_open(struct inode *inode, struct file *filep)
*/
if (group->opened_file) {
ret = -EBUSY;
- goto err_put;
+ goto out_unlock;
}
group->opened_file = filep;
filep->private_data = group;
-
- up_write(&group->group_rwsem);
- return 0;
-err_put:
- vfio_group_put(group);
-err_unlock:
- up_write(&group->group_rwsem);
+ ret = 0;
+out_unlock:
+ mutex_unlock(&group->group_lock);
return ret;
}
@@ -1328,21 +953,16 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
filep->private_data = NULL;
- down_write(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
/*
* Device FDs hold a group file reference, therefore the group release
* is only called when there are no open devices.
*/
WARN_ON(group->notifier.head);
- if (group->container) {
- WARN_ON(group->container_users != 1);
- __vfio_group_unset_container(group);
- }
+ if (group->container)
+ vfio_group_detach_container(group);
group->opened_file = NULL;
- up_write(&group->group_rwsem);
-
- vfio_group_put(group);
-
+ mutex_unlock(&group->group_lock);
return 0;
}
@@ -1355,24 +975,53 @@ static const struct file_operations vfio_group_fops = {
};
/*
+ * Wrapper around pm_runtime_resume_and_get().
+ * Return error code on failure or 0 on success.
+ */
+static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
+{
+ struct device *dev = device->dev;
+
+ if (dev->driver && dev->driver->pm) {
+ int ret;
+
+ ret = pm_runtime_resume_and_get(dev);
+ if (ret) {
+ dev_info_ratelimited(dev,
+ "vfio: runtime resume failed %d\n", ret);
+ return -EIO;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Wrapper around pm_runtime_put().
+ */
+static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
+{
+ struct device *dev = device->dev;
+
+ if (dev->driver && dev->driver->pm)
+ pm_runtime_put(dev);
+}
+
+/*
* VFIO Device fd
*/
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
struct vfio_device *device = filep->private_data;
- struct vfio_iommu_driver *iommu_driver;
mutex_lock(&device->dev_set->lock);
vfio_assert_device_open(device);
- down_read(&device->group->group_rwsem);
+ mutex_lock(&device->group->group_lock);
if (device->open_count == 1 && device->ops->close_device)
device->ops->close_device(device);
- iommu_driver = device->group->container->iommu_driver;
- if (iommu_driver && iommu_driver->ops->unregister_device)
- iommu_driver->ops->unregister_device(
- device->group->container->iommu_data, device);
- up_read(&device->group->group_rwsem);
+ vfio_device_container_unregister(device);
+ mutex_unlock(&device->group->group_lock);
device->open_count--;
if (device->open_count == 0)
device->kvm = NULL;
@@ -1382,7 +1031,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
vfio_device_unassign_container(device);
- vfio_device_put(device);
+ vfio_device_put_registration(device);
return 0;
}
@@ -1628,6 +1277,167 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
return 0;
}
+/* Ranges should fit into a single kernel page */
+#define LOG_MAX_RANGES \
+ (PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
+
+static int
+vfio_ioctl_device_feature_logging_start(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ size_t minsz =
+ offsetofend(struct vfio_device_feature_dma_logging_control,
+ ranges);
+ struct vfio_device_feature_dma_logging_range __user *ranges;
+ struct vfio_device_feature_dma_logging_control control;
+ struct vfio_device_feature_dma_logging_range range;
+ struct rb_root_cached root = RB_ROOT_CACHED;
+ struct interval_tree_node *nodes;
+ u64 iova_end;
+ u32 nnodes;
+ int i, ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET,
+ sizeof(control));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&control, arg, minsz))
+ return -EFAULT;
+
+ nnodes = control.num_ranges;
+ if (!nnodes)
+ return -EINVAL;
+
+ if (nnodes > LOG_MAX_RANGES)
+ return -E2BIG;
+
+ ranges = u64_to_user_ptr(control.ranges);
+ nodes = kmalloc_array(nnodes, sizeof(struct interval_tree_node),
+ GFP_KERNEL);
+ if (!nodes)
+ return -ENOMEM;
+
+ for (i = 0; i < nnodes; i++) {
+ if (copy_from_user(&range, &ranges[i], sizeof(range))) {
+ ret = -EFAULT;
+ goto end;
+ }
+ if (!IS_ALIGNED(range.iova, control.page_size) ||
+ !IS_ALIGNED(range.length, control.page_size)) {
+ ret = -EINVAL;
+ goto end;
+ }
+
+ if (check_add_overflow(range.iova, range.length, &iova_end) ||
+ iova_end > ULONG_MAX) {
+ ret = -EOVERFLOW;
+ goto end;
+ }
+
+ nodes[i].start = range.iova;
+ nodes[i].last = range.iova + range.length - 1;
+ if (interval_tree_iter_first(&root, nodes[i].start,
+ nodes[i].last)) {
+ /* Range overlapping */
+ ret = -EINVAL;
+ goto end;
+ }
+ interval_tree_insert(nodes + i, &root);
+ }
+
+ ret = device->log_ops->log_start(device, &root, nnodes,
+ &control.page_size);
+ if (ret)
+ goto end;
+
+ if (copy_to_user(arg, &control, sizeof(control))) {
+ ret = -EFAULT;
+ device->log_ops->log_stop(device);
+ }
+
+end:
+ kfree(nodes);
+ return ret;
+}
+
+static int
+vfio_ioctl_device_feature_logging_stop(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ int ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_SET, 0);
+ if (ret != 1)
+ return ret;
+
+ return device->log_ops->log_stop(device);
+}
+
+static int vfio_device_log_read_and_clear(struct iova_bitmap *iter,
+ unsigned long iova, size_t length,
+ void *opaque)
+{
+ struct vfio_device *device = opaque;
+
+ return device->log_ops->log_read_and_clear(device, iova, length, iter);
+}
+
+static int
+vfio_ioctl_device_feature_logging_report(struct vfio_device *device,
+ u32 flags, void __user *arg,
+ size_t argsz)
+{
+ size_t minsz =
+ offsetofend(struct vfio_device_feature_dma_logging_report,
+ bitmap);
+ struct vfio_device_feature_dma_logging_report report;
+ struct iova_bitmap *iter;
+ u64 iova_end;
+ int ret;
+
+ if (!device->log_ops)
+ return -ENOTTY;
+
+ ret = vfio_check_feature(flags, argsz,
+ VFIO_DEVICE_FEATURE_GET,
+ sizeof(report));
+ if (ret != 1)
+ return ret;
+
+ if (copy_from_user(&report, arg, minsz))
+ return -EFAULT;
+
+ if (report.page_size < SZ_4K || !is_power_of_2(report.page_size))
+ return -EINVAL;
+
+ if (check_add_overflow(report.iova, report.length, &iova_end) ||
+ iova_end > ULONG_MAX)
+ return -EOVERFLOW;
+
+ iter = iova_bitmap_alloc(report.iova, report.length,
+ report.page_size,
+ u64_to_user_ptr(report.bitmap));
+ if (IS_ERR(iter))
+ return PTR_ERR(iter);
+
+ ret = iova_bitmap_for_each(iter, device,
+ vfio_device_log_read_and_clear);
+
+ iova_bitmap_free(iter);
+ return ret;
+}
+
static int vfio_ioctl_device_feature(struct vfio_device *device,
struct vfio_device_feature __user *arg)
{
@@ -1661,6 +1471,18 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
return vfio_ioctl_device_feature_mig_device_state(
device, feature.flags, arg->data,
feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_START:
+ return vfio_ioctl_device_feature_logging_start(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_STOP:
+ return vfio_ioctl_device_feature_logging_stop(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
+ case VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT:
+ return vfio_ioctl_device_feature_logging_report(
+ device, feature.flags, arg->data,
+ feature.argsz - minsz);
default:
if (unlikely(!device->ops->device_feature))
return -EINVAL;
@@ -1674,15 +1496,27 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
unsigned int cmd, unsigned long arg)
{
struct vfio_device *device = filep->private_data;
+ int ret;
+
+ ret = vfio_device_pm_runtime_get(device);
+ if (ret)
+ return ret;
switch (cmd) {
case VFIO_DEVICE_FEATURE:
- return vfio_ioctl_device_feature(device, (void __user *)arg);
+ ret = vfio_ioctl_device_feature(device, (void __user *)arg);
+ break;
+
default:
if (unlikely(!device->ops->ioctl))
- return -EINVAL;
- return device->ops->ioctl(device, cmd, arg);
+ ret = -EINVAL;
+ else
+ ret = device->ops->ioctl(device, cmd, arg);
+ break;
}
+
+ vfio_device_pm_runtime_put(device);
+ return ret;
}
static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
@@ -1732,19 +1566,42 @@ static const struct file_operations vfio_device_fops = {
* vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
* @file: VFIO group file
*
- * The returned iommu_group is valid as long as a ref is held on the file.
+ * The returned iommu_group is valid as long as a ref is held on the file. This
+ * returns a reference on the group. This function is deprecated, only the SPAPR
+ * path in kvm should call it.
*/
struct iommu_group *vfio_file_iommu_group(struct file *file)
{
struct vfio_group *group = file->private_data;
+ struct iommu_group *iommu_group = NULL;
- if (file->f_op != &vfio_group_fops)
+ if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
return NULL;
- return group->iommu_group;
+
+ if (!vfio_file_is_group(file))
+ return NULL;
+
+ mutex_lock(&group->group_lock);
+ if (group->iommu_group) {
+ iommu_group = group->iommu_group;
+ iommu_group_ref_get(iommu_group);
+ }
+ mutex_unlock(&group->group_lock);
+ return iommu_group;
}
EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
/**
+ * vfio_file_is_group - True if the file is usable with VFIO aPIS
+ * @file: VFIO group file
+ */
+bool vfio_file_is_group(struct file *file)
+{
+ return file->f_op == &vfio_group_fops;
+}
+EXPORT_SYMBOL_GPL(vfio_file_is_group);
+
+/**
* vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
* is always CPU cache coherent
* @file: VFIO group file
@@ -1758,13 +1615,13 @@ bool vfio_file_enforced_coherent(struct file *file)
struct vfio_group *group = file->private_data;
bool ret;
- if (file->f_op != &vfio_group_fops)
+ if (!vfio_file_is_group(file))
return true;
- down_read(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
if (group->container) {
- ret = vfio_ioctl_check_extension(group->container,
- VFIO_DMA_CC_IOMMU);
+ ret = vfio_container_ioctl_check_extension(group->container,
+ VFIO_DMA_CC_IOMMU);
} else {
/*
* Since the coherency state is determined only once a container
@@ -1773,7 +1630,7 @@ bool vfio_file_enforced_coherent(struct file *file)
*/
ret = true;
}
- up_read(&group->group_rwsem);
+ mutex_unlock(&group->group_lock);
return ret;
}
EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
@@ -1790,12 +1647,12 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
{
struct vfio_group *group = file->private_data;
- if (file->f_op != &vfio_group_fops)
+ if (!vfio_file_is_group(file))
return;
- down_write(&group->group_rwsem);
+ mutex_lock(&group->group_lock);
group->kvm = kvm;
- up_write(&group->group_rwsem);
+ mutex_unlock(&group->group_lock);
}
EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
@@ -1810,7 +1667,7 @@ bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
{
struct vfio_group *group = file->private_data;
- if (file->f_op != &vfio_group_fops)
+ if (!vfio_file_is_group(file))
return false;
return group == device->group;
@@ -1937,114 +1794,6 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
/*
- * Pin contiguous user pages and return their associated host pages for local
- * domain only.
- * @device [in] : device
- * @iova [in] : starting IOVA of user pages to be pinned.
- * @npage [in] : count of pages to be pinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- * @prot [in] : protection flags
- * @pages[out] : array of host pages
- * Return error or number of pages pinned.
- */
-int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
- int npage, int prot, struct page **pages)
-{
- struct vfio_container *container;
- struct vfio_group *group = device->group;
- struct vfio_iommu_driver *driver;
- int ret;
-
- if (!pages || !npage || !vfio_assert_device_open(device))
- return -EINVAL;
-
- if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
- return -E2BIG;
-
- if (group->dev_counter > 1)
- return -EINVAL;
-
- /* group->container cannot change while a vfio device is open */
- container = group->container;
- driver = container->iommu_driver;
- if (likely(driver && driver->ops->pin_pages))
- ret = driver->ops->pin_pages(container->iommu_data,
- group->iommu_group, iova,
- npage, prot, pages);
- else
- ret = -ENOTTY;
-
- return ret;
-}
-EXPORT_SYMBOL(vfio_pin_pages);
-
-/*
- * Unpin contiguous host pages for local domain only.
- * @device [in] : device
- * @iova [in] : starting address of user pages to be unpinned.
- * @npage [in] : count of pages to be unpinned. This count should not
- * be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- */
-void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
-
- if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
- return;
-
- if (WARN_ON(!vfio_assert_device_open(device)))
- return;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
-
- driver->ops->unpin_pages(container->iommu_data, iova, npage);
-}
-EXPORT_SYMBOL(vfio_unpin_pages);
-
-/*
- * This interface allows the CPUs to perform some sort of virtual DMA on
- * behalf of the device.
- *
- * CPUs read/write from/into a range of IOVAs pointing to user space memory
- * into/from a kernel buffer.
- *
- * As the read/write of user space memory is conducted via the CPUs and is
- * not a real device DMA, it is not necessary to pin the user space memory.
- *
- * @device [in] : VFIO device
- * @iova [in] : base IOVA of a user space buffer
- * @data [in] : pointer to kernel buffer
- * @len [in] : kernel buffer length
- * @write : indicate read or write
- * Return error code on failure or 0 on success.
- */
-int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
- size_t len, bool write)
-{
- struct vfio_container *container;
- struct vfio_iommu_driver *driver;
- int ret = 0;
-
- if (!data || len <= 0 || !vfio_assert_device_open(device))
- return -EINVAL;
-
- /* group->container cannot change while a vfio device is open */
- container = device->group->container;
- driver = container->iommu_driver;
-
- if (likely(driver && driver->ops->dma_rw))
- ret = driver->ops->dma_rw(container->iommu_data,
- iova, data, len, write);
- else
- ret = -ENOTTY;
- return ret;
-}
-EXPORT_SYMBOL(vfio_dma_rw);
-
-/*
* Module/class support
*/
static char *vfio_devnode(struct device *dev, umode_t *mode)
@@ -2052,59 +1801,50 @@ static char *vfio_devnode(struct device *dev, umode_t *mode)
return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
}
-static struct miscdevice vfio_dev = {
- .minor = VFIO_MINOR,
- .name = "vfio",
- .fops = &vfio_fops,
- .nodename = "vfio/vfio",
- .mode = S_IRUGO | S_IWUGO,
-};
-
static int __init vfio_init(void)
{
int ret;
ida_init(&vfio.group_ida);
+ ida_init(&vfio.device_ida);
mutex_init(&vfio.group_lock);
- mutex_init(&vfio.iommu_drivers_lock);
INIT_LIST_HEAD(&vfio.group_list);
- INIT_LIST_HEAD(&vfio.iommu_drivers_list);
- ret = misc_register(&vfio_dev);
- if (ret) {
- pr_err("vfio: misc device register failed\n");
+ ret = vfio_container_init();
+ if (ret)
return ret;
- }
/* /dev/vfio/$GROUP */
vfio.class = class_create(THIS_MODULE, "vfio");
if (IS_ERR(vfio.class)) {
ret = PTR_ERR(vfio.class);
- goto err_class;
+ goto err_group_class;
}
vfio.class->devnode = vfio_devnode;
+ /* /sys/class/vfio-dev/vfioX */
+ vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
+ if (IS_ERR(vfio.device_class)) {
+ ret = PTR_ERR(vfio.device_class);
+ goto err_dev_class;
+ }
+
ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
if (ret)
goto err_alloc_chrdev;
-#ifdef CONFIG_VFIO_NOIOMMU
- ret = vfio_register_iommu_driver(&vfio_noiommu_ops);
-#endif
- if (ret)
- goto err_driver_register;
-
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
return 0;
-err_driver_register:
- unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
err_alloc_chrdev:
+ class_destroy(vfio.device_class);
+ vfio.device_class = NULL;
+err_dev_class:
class_destroy(vfio.class);
vfio.class = NULL;
-err_class:
- misc_deregister(&vfio_dev);
+err_group_class:
+ vfio_container_cleanup();
return ret;
}
@@ -2112,14 +1852,14 @@ static void __exit vfio_cleanup(void)
{
WARN_ON(!list_empty(&vfio.group_list));
-#ifdef CONFIG_VFIO_NOIOMMU
- vfio_unregister_iommu_driver(&vfio_noiommu_ops);
-#endif
+ ida_destroy(&vfio.device_ida);
ida_destroy(&vfio.group_ida);
unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
+ class_destroy(vfio.device_class);
+ vfio.device_class = NULL;
class_destroy(vfio.class);
+ vfio_container_cleanup();
vfio.class = NULL;
- misc_deregister(&vfio_dev);
xa_destroy(&vfio_device_set_xa);
}
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index 76c3500b21c7..b64bc49c7f30 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1089,6 +1089,17 @@ config EBC_C384_WDT
WinSystems EBC-C384 motherboard. The timeout may be configured via
the timeout module parameter.
+config EXAR_WDT
+ tristate "Exar Watchdog Timer"
+ depends on X86
+ select WATCHDOG_CORE
+ help
+ Enables watchdog timer support for the watchdog timer present
+ in some Exar/MaxLinear UART chips like the XR28V38x.
+
+ To compile this driver as a module, choose M here: the
+ module will be called exar_wdt.
+
config F71808E_WDT
tristate "Fintek F718xx, F818xx Super I/O Watchdog"
depends on X86
@@ -1315,7 +1326,7 @@ config IT87_WDT
config HP_WATCHDOG
tristate "HP ProLiant iLO2+ Hardware Watchdog Timer"
select WATCHDOG_CORE
- depends on X86 && PCI
+ depends on (ARM64 || X86) && PCI
help
A software monitoring watchdog and NMI handling driver. This driver
will detect lockups and provide a stack trace. This is a driver that
@@ -1325,7 +1336,7 @@ config HP_WATCHDOG
config HPWDT_NMI_DECODING
bool "NMI support for the HP ProLiant iLO2+ Hardware Watchdog Timer"
- depends on HP_WATCHDOG
+ depends on X86 && HP_WATCHDOG
default y
help
Enables the NMI handler for the watchdog pretimeout NMI and the iLO
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index cdeb119e6e61..d41e5f830ae7 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -105,6 +105,7 @@ obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o
obj-$(CONFIG_ALIM1535_WDT) += alim1535_wdt.o
obj-$(CONFIG_ALIM7101_WDT) += alim7101_wdt.o
obj-$(CONFIG_EBC_C384_WDT) += ebc-c384_wdt.o
+obj-$(CONFIG_EXAR_WDT) += exar_wdt.o
obj-$(CONFIG_F71808E_WDT) += f71808e_wdt.o
obj-$(CONFIG_SP5100_TCO) += sp5100_tco.o
obj-$(CONFIG_GEODE_WDT) += geodewdt.o
diff --git a/drivers/watchdog/armada_37xx_wdt.c b/drivers/watchdog/armada_37xx_wdt.c
index 854b1cc723cb..ac9fed1ef681 100644
--- a/drivers/watchdog/armada_37xx_wdt.c
+++ b/drivers/watchdog/armada_37xx_wdt.c
@@ -179,6 +179,8 @@ static int armada_37xx_wdt_set_timeout(struct watchdog_device *wdt,
dev->timeout = (u64)dev->clk_rate * timeout;
do_div(dev->timeout, CNTR_CTRL_PRESCALE_MIN);
+ set_counter_value(dev, CNTR_ID_WDOG, dev->timeout);
+
return 0;
}
diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c
index bd06622813eb..0cff2adfbfc9 100644
--- a/drivers/watchdog/aspeed_wdt.c
+++ b/drivers/watchdog/aspeed_wdt.c
@@ -332,18 +332,18 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
u32 reg = readl(wdt->base + WDT_RESET_WIDTH);
reg &= config->ext_pulse_width_mask;
- if (of_property_read_bool(np, "aspeed,ext-push-pull"))
- reg |= WDT_PUSH_PULL_MAGIC;
+ if (of_property_read_bool(np, "aspeed,ext-active-high"))
+ reg |= WDT_ACTIVE_HIGH_MAGIC;
else
- reg |= WDT_OPEN_DRAIN_MAGIC;
+ reg |= WDT_ACTIVE_LOW_MAGIC;
writel(reg, wdt->base + WDT_RESET_WIDTH);
reg &= config->ext_pulse_width_mask;
- if (of_property_read_bool(np, "aspeed,ext-active-high"))
- reg |= WDT_ACTIVE_HIGH_MAGIC;
+ if (of_property_read_bool(np, "aspeed,ext-push-pull"))
+ reg |= WDT_PUSH_PULL_MAGIC;
else
- reg |= WDT_ACTIVE_LOW_MAGIC;
+ reg |= WDT_OPEN_DRAIN_MAGIC;
writel(reg, wdt->base + WDT_RESET_WIDTH);
}
diff --git a/drivers/watchdog/bd9576_wdt.c b/drivers/watchdog/bd9576_wdt.c
index 0b6999f3b6e8..4a20e07fbb69 100644
--- a/drivers/watchdog/bd9576_wdt.c
+++ b/drivers/watchdog/bd9576_wdt.c
@@ -9,8 +9,8 @@
#include <linux/gpio/consumer.h>
#include <linux/mfd/rohm-bd957x.h>
#include <linux/module.h>
-#include <linux/of.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/regmap.h>
#include <linux/watchdog.h>
@@ -202,10 +202,10 @@ static int bd957x_set_wdt_mode(struct bd9576_wdt_priv *priv, int hw_margin,
static int bd9576_wdt_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
- struct device_node *np = dev->parent->of_node;
struct bd9576_wdt_priv *priv;
u32 hw_margin[2];
u32 hw_margin_max = BD957X_WDT_DEFAULT_MARGIN, hw_margin_min = 0;
+ int count;
int ret;
priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
@@ -221,40 +221,51 @@ static int bd9576_wdt_probe(struct platform_device *pdev)
return -ENODEV;
}
- priv->gpiod_en = devm_gpiod_get_from_of_node(dev, dev->parent->of_node,
- "rohm,watchdog-enable-gpios",
- 0, GPIOD_OUT_LOW,
- "watchdog-enable");
+ priv->gpiod_en = devm_fwnode_gpiod_get(dev, dev_fwnode(dev->parent),
+ "rohm,watchdog-enable",
+ GPIOD_OUT_LOW,
+ "watchdog-enable");
if (IS_ERR(priv->gpiod_en))
return dev_err_probe(dev, PTR_ERR(priv->gpiod_en),
"getting watchdog-enable GPIO failed\n");
- priv->gpiod_ping = devm_gpiod_get_from_of_node(dev, dev->parent->of_node,
- "rohm,watchdog-ping-gpios",
- 0, GPIOD_OUT_LOW,
- "watchdog-ping");
+ priv->gpiod_ping = devm_fwnode_gpiod_get(dev, dev_fwnode(dev->parent),
+ "rohm,watchdog-ping",
+ GPIOD_OUT_LOW,
+ "watchdog-ping");
if (IS_ERR(priv->gpiod_ping))
return dev_err_probe(dev, PTR_ERR(priv->gpiod_ping),
"getting watchdog-ping GPIO failed\n");
- ret = of_property_read_variable_u32_array(np, "rohm,hw-timeout-ms",
- &hw_margin[0], 1, 2);
- if (ret < 0 && ret != -EINVAL)
- return ret;
+ count = device_property_count_u32(dev->parent, "rohm,hw-timeout-ms");
+ if (count < 0 && count != -EINVAL)
+ return count;
+
+ if (count > 0) {
+ if (count > ARRAY_SIZE(hw_margin))
+ return -EINVAL;
- if (ret == 1)
- hw_margin_max = hw_margin[0];
+ ret = device_property_read_u32_array(dev->parent,
+ "rohm,hw-timeout-ms",
+ hw_margin, count);
+ if (ret < 0)
+ return ret;
- if (ret == 2) {
- hw_margin_max = hw_margin[1];
- hw_margin_min = hw_margin[0];
+ if (count == 1)
+ hw_margin_max = hw_margin[0];
+
+ if (count == 2) {
+ hw_margin_max = hw_margin[1];
+ hw_margin_min = hw_margin[0];
+ }
}
ret = bd957x_set_wdt_mode(priv, hw_margin_max, hw_margin_min);
if (ret)
return ret;
- priv->always_running = of_property_read_bool(np, "always-running");
+ priv->always_running = device_property_read_bool(dev->parent,
+ "always-running");
watchdog_set_drvdata(&priv->wdd, priv);
diff --git a/drivers/watchdog/eurotechwdt.c b/drivers/watchdog/eurotechwdt.c
index ce682942662c..e26609ad4c17 100644
--- a/drivers/watchdog/eurotechwdt.c
+++ b/drivers/watchdog/eurotechwdt.c
@@ -192,7 +192,7 @@ static void eurwdt_ping(void)
* @ppos: pointer to the position to write. No seeks allowed
*
* A write to a watchdog device is defined as a keepalive signal. Any
- * write of data will do, as we we don't define content meaning.
+ * write of data will do, as we don't define content meaning.
*/
static ssize_t eurwdt_write(struct file *file, const char __user *buf,
diff --git a/drivers/watchdog/exar_wdt.c b/drivers/watchdog/exar_wdt.c
new file mode 100644
index 000000000000..35058d8b21bc
--- /dev/null
+++ b/drivers/watchdog/exar_wdt.c
@@ -0,0 +1,427 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * exar_wdt.c - Driver for the watchdog present in some
+ * Exar/MaxLinear UART chips like the XR28V38x.
+ *
+ * (c) Copyright 2022 D. Müller <d.mueller@elsoft.ch>.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/watchdog.h>
+
+#define DRV_NAME "exar_wdt"
+
+static const unsigned short sio_config_ports[] = { 0x2e, 0x4e };
+static const unsigned char sio_enter_keys[] = { 0x67, 0x77, 0x87, 0xA0 };
+#define EXAR_EXIT_KEY 0xAA
+
+#define EXAR_LDN 0x07
+#define EXAR_DID 0x20
+#define EXAR_VID 0x23
+#define EXAR_WDT 0x26
+#define EXAR_ACT 0x30
+#define EXAR_RTBASE 0x60
+
+#define EXAR_WDT_LDEV 0x08
+
+#define EXAR_VEN_ID 0x13A8
+#define EXAR_DEV_382 0x0382
+#define EXAR_DEV_384 0x0384
+
+/* WDT runtime registers */
+#define WDT_CTRL 0x00
+#define WDT_VAL 0x01
+
+#define WDT_UNITS_10MS 0x0 /* the 10 millisec unit of the HW is not used */
+#define WDT_UNITS_SEC 0x2
+#define WDT_UNITS_MIN 0x4
+
+/* default WDT control for WDTOUT signal activ / rearm by read */
+#define EXAR_WDT_DEF_CONF 0
+
+struct wdt_pdev_node {
+ struct list_head list;
+ struct platform_device *pdev;
+ const char name[16];
+};
+
+struct wdt_priv {
+ /* the lock for WDT io operations */
+ spinlock_t io_lock;
+ struct resource wdt_res;
+ struct watchdog_device wdt_dev;
+ unsigned short did;
+ unsigned short config_port;
+ unsigned char enter_key;
+ unsigned char unit;
+ unsigned char timeout;
+};
+
+#define WATCHDOG_TIMEOUT 60
+
+static int timeout = WATCHDOG_TIMEOUT;
+module_param(timeout, int, 0);
+MODULE_PARM_DESC(timeout,
+ "Watchdog timeout in seconds. 1<=timeout<=15300, default="
+ __MODULE_STRING(WATCHDOG_TIMEOUT) ".");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout,
+ "Watchdog cannot be stopped once started (default="
+ __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+static int exar_sio_enter(const unsigned short config_port,
+ const unsigned char key)
+{
+ if (!request_muxed_region(config_port, 2, DRV_NAME))
+ return -EBUSY;
+
+ /* write the ENTER-KEY twice */
+ outb(key, config_port);
+ outb(key, config_port);
+
+ return 0;
+}
+
+static void exar_sio_exit(const unsigned short config_port)
+{
+ outb(EXAR_EXIT_KEY, config_port);
+ release_region(config_port, 2);
+}
+
+static unsigned char exar_sio_read(const unsigned short config_port,
+ const unsigned char reg)
+{
+ outb(reg, config_port);
+ return inb(config_port + 1);
+}
+
+static void exar_sio_write(const unsigned short config_port,
+ const unsigned char reg, const unsigned char val)
+{
+ outb(reg, config_port);
+ outb(val, config_port + 1);
+}
+
+static unsigned short exar_sio_read16(const unsigned short config_port,
+ const unsigned char reg)
+{
+ unsigned char msb, lsb;
+
+ msb = exar_sio_read(config_port, reg);
+ lsb = exar_sio_read(config_port, reg + 1);
+
+ return (msb << 8) | lsb;
+}
+
+static void exar_sio_select_wdt(const unsigned short config_port)
+{
+ exar_sio_write(config_port, EXAR_LDN, EXAR_WDT_LDEV);
+}
+
+static void exar_wdt_arm(const struct wdt_priv *priv)
+{
+ unsigned short rt_base = priv->wdt_res.start;
+
+ /* write timeout value twice to arm watchdog */
+ outb(priv->timeout, rt_base + WDT_VAL);
+ outb(priv->timeout, rt_base + WDT_VAL);
+}
+
+static void exar_wdt_disarm(const struct wdt_priv *priv)
+{
+ unsigned short rt_base = priv->wdt_res.start;
+
+ /*
+ * use two accesses with different values to make sure
+ * that a combination of a previous single access and
+ * the ones below with the same value are not falsely
+ * interpreted as "arm watchdog"
+ */
+ outb(0xFF, rt_base + WDT_VAL);
+ outb(0, rt_base + WDT_VAL);
+}
+
+static int exar_wdt_start(struct watchdog_device *wdog)
+{
+ struct wdt_priv *priv = watchdog_get_drvdata(wdog);
+ unsigned short rt_base = priv->wdt_res.start;
+
+ spin_lock(&priv->io_lock);
+
+ exar_wdt_disarm(priv);
+ outb(priv->unit, rt_base + WDT_CTRL);
+ exar_wdt_arm(priv);
+
+ spin_unlock(&priv->io_lock);
+ return 0;
+}
+
+static int exar_wdt_stop(struct watchdog_device *wdog)
+{
+ struct wdt_priv *priv = watchdog_get_drvdata(wdog);
+
+ spin_lock(&priv->io_lock);
+
+ exar_wdt_disarm(priv);
+
+ spin_unlock(&priv->io_lock);
+ return 0;
+}
+
+static int exar_wdt_keepalive(struct watchdog_device *wdog)
+{
+ struct wdt_priv *priv = watchdog_get_drvdata(wdog);
+ unsigned short rt_base = priv->wdt_res.start;
+
+ spin_lock(&priv->io_lock);
+
+ /* reading the WDT_VAL reg will feed the watchdog */
+ inb(rt_base + WDT_VAL);
+
+ spin_unlock(&priv->io_lock);
+ return 0;
+}
+
+static int exar_wdt_set_timeout(struct watchdog_device *wdog, unsigned int t)
+{
+ struct wdt_priv *priv = watchdog_get_drvdata(wdog);
+ bool unit_min = false;
+
+ /*
+ * if new timeout is bigger then 255 seconds, change the
+ * unit to minutes and round the timeout up to the next whole minute
+ */
+ if (t > 255) {
+ unit_min = true;
+ t = DIV_ROUND_UP(t, 60);
+ }
+
+ /* save for later use in exar_wdt_start() */
+ priv->unit = unit_min ? WDT_UNITS_MIN : WDT_UNITS_SEC;
+ priv->timeout = t;
+
+ wdog->timeout = unit_min ? t * 60 : t;
+
+ if (watchdog_hw_running(wdog))
+ exar_wdt_start(wdog);
+
+ return 0;
+}
+
+static const struct watchdog_info exar_wdt_info = {
+ .options = WDIOF_KEEPALIVEPING |
+ WDIOF_SETTIMEOUT |
+ WDIOF_MAGICCLOSE,
+ .identity = "Exar/MaxLinear XR28V38x Watchdog",
+};
+
+static const struct watchdog_ops exar_wdt_ops = {
+ .owner = THIS_MODULE,
+ .start = exar_wdt_start,
+ .stop = exar_wdt_stop,
+ .ping = exar_wdt_keepalive,
+ .set_timeout = exar_wdt_set_timeout,
+};
+
+static int exar_wdt_config(struct watchdog_device *wdog,
+ const unsigned char conf)
+{
+ struct wdt_priv *priv = watchdog_get_drvdata(wdog);
+ int ret;
+
+ ret = exar_sio_enter(priv->config_port, priv->enter_key);
+ if (ret)
+ return ret;
+
+ exar_sio_select_wdt(priv->config_port);
+ exar_sio_write(priv->config_port, EXAR_WDT, conf);
+
+ exar_sio_exit(priv->config_port);
+
+ return 0;
+}
+
+static int __init exar_wdt_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct wdt_priv *priv = dev->platform_data;
+ struct watchdog_device *wdt_dev = &priv->wdt_dev;
+ struct resource *res;
+ int ret;
+
+ res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+ if (!res)
+ return -ENXIO;
+
+ spin_lock_init(&priv->io_lock);
+
+ wdt_dev->info = &exar_wdt_info;
+ wdt_dev->ops = &exar_wdt_ops;
+ wdt_dev->min_timeout = 1;
+ wdt_dev->max_timeout = 255 * 60;
+
+ watchdog_init_timeout(wdt_dev, timeout, NULL);
+ watchdog_set_nowayout(wdt_dev, nowayout);
+ watchdog_stop_on_reboot(wdt_dev);
+ watchdog_stop_on_unregister(wdt_dev);
+ watchdog_set_drvdata(wdt_dev, priv);
+
+ ret = exar_wdt_config(wdt_dev, EXAR_WDT_DEF_CONF);
+ if (ret)
+ return ret;
+
+ exar_wdt_set_timeout(wdt_dev, timeout);
+ /* Make sure that the watchdog is not running */
+ exar_wdt_stop(wdt_dev);
+
+ ret = devm_watchdog_register_device(dev, wdt_dev);
+ if (ret)
+ return ret;
+
+ dev_info(dev, "XR28V%X WDT initialized. timeout=%d sec (nowayout=%d)\n",
+ priv->did, timeout, nowayout);
+
+ return 0;
+}
+
+static unsigned short __init exar_detect(const unsigned short config_port,
+ const unsigned char key,
+ unsigned short *rt_base)
+{
+ int ret;
+ unsigned short base = 0;
+ unsigned short vid, did;
+
+ ret = exar_sio_enter(config_port, key);
+ if (ret)
+ return 0;
+
+ vid = exar_sio_read16(config_port, EXAR_VID);
+ did = exar_sio_read16(config_port, EXAR_DID);
+
+ /* check for the vendor and device IDs we currently know about */
+ if (vid == EXAR_VEN_ID &&
+ (did == EXAR_DEV_382 ||
+ did == EXAR_DEV_384)) {
+ exar_sio_select_wdt(config_port);
+ /* is device active? */
+ if (exar_sio_read(config_port, EXAR_ACT) == 0x01)
+ base = exar_sio_read16(config_port, EXAR_RTBASE);
+ }
+
+ exar_sio_exit(config_port);
+
+ if (base) {
+ pr_debug("Found a XR28V%X WDT (conf: 0x%x / rt: 0x%04x)\n",
+ did, config_port, base);
+ *rt_base = base;
+ return did;
+ }
+
+ return 0;
+}
+
+static struct platform_driver exar_wdt_driver = {
+ .driver = {
+ .name = DRV_NAME,
+ },
+};
+
+static LIST_HEAD(pdev_list);
+
+static int __init exar_wdt_register(struct wdt_priv *priv, const int idx)
+{
+ struct wdt_pdev_node *n;
+
+ n = kzalloc(sizeof(*n), GFP_KERNEL);
+ if (!n)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&n->list);
+
+ scnprintf((char *)n->name, sizeof(n->name), DRV_NAME ".%d", idx);
+ priv->wdt_res.name = n->name;
+
+ n->pdev = platform_device_register_resndata(NULL, DRV_NAME, idx,
+ &priv->wdt_res, 1,
+ priv, sizeof(*priv));
+ if (IS_ERR(n->pdev)) {
+ kfree(n);
+ return PTR_ERR(n->pdev);
+ }
+
+ list_add_tail(&n->list, &pdev_list);
+
+ return 0;
+}
+
+static void exar_wdt_unregister(void)
+{
+ struct wdt_pdev_node *n, *t;
+
+ list_for_each_entry_safe(n, t, &pdev_list, list) {
+ platform_device_unregister(n->pdev);
+ list_del(&n->list);
+ kfree(n);
+ }
+}
+
+static int __init exar_wdt_init(void)
+{
+ int ret, i, j, idx = 0;
+
+ /* search for active Exar watchdogs on all possible locations */
+ for (i = 0; i < ARRAY_SIZE(sio_config_ports); i++) {
+ for (j = 0; j < ARRAY_SIZE(sio_enter_keys); j++) {
+ unsigned short did, rt_base = 0;
+
+ did = exar_detect(sio_config_ports[i],
+ sio_enter_keys[j],
+ &rt_base);
+
+ if (did) {
+ struct wdt_priv priv = {
+ .wdt_res = DEFINE_RES_IO(rt_base, 2),
+ .did = did,
+ .config_port = sio_config_ports[i],
+ .enter_key = sio_enter_keys[j],
+ };
+
+ ret = exar_wdt_register(&priv, idx);
+ if (!ret)
+ idx++;
+ }
+ }
+ }
+
+ if (!idx)
+ return -ENODEV;
+
+ ret = platform_driver_probe(&exar_wdt_driver, exar_wdt_probe);
+ if (ret)
+ exar_wdt_unregister();
+
+ return ret;
+}
+
+static void __exit exar_wdt_exit(void)
+{
+ exar_wdt_unregister();
+ platform_driver_unregister(&exar_wdt_driver);
+}
+
+module_init(exar_wdt_init);
+module_exit(exar_wdt_exit);
+
+MODULE_AUTHOR("David Müller <d.mueller@elsoft.ch>");
+MODULE_DESCRIPTION("Exar/MaxLinear Watchdog Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/watchdog/ftwdt010_wdt.c b/drivers/watchdog/ftwdt010_wdt.c
index 21dcc7765688..442c5bf63ff4 100644
--- a/drivers/watchdog/ftwdt010_wdt.c
+++ b/drivers/watchdog/ftwdt010_wdt.c
@@ -47,21 +47,28 @@ struct ftwdt010_wdt *to_ftwdt010_wdt(struct watchdog_device *wdd)
return container_of(wdd, struct ftwdt010_wdt, wdd);
}
-static int ftwdt010_wdt_start(struct watchdog_device *wdd)
+static void ftwdt010_enable(struct ftwdt010_wdt *gwdt,
+ unsigned int timeout,
+ bool need_irq)
{
- struct ftwdt010_wdt *gwdt = to_ftwdt010_wdt(wdd);
u32 enable;
- writel(wdd->timeout * WDT_CLOCK, gwdt->base + FTWDT010_WDLOAD);
+ writel(timeout * WDT_CLOCK, gwdt->base + FTWDT010_WDLOAD);
writel(WDRESTART_MAGIC, gwdt->base + FTWDT010_WDRESTART);
/* set clock before enabling */
enable = WDCR_CLOCK_5MHZ | WDCR_SYS_RST;
writel(enable, gwdt->base + FTWDT010_WDCR);
- if (gwdt->has_irq)
+ if (need_irq)
enable |= WDCR_WDINTR;
enable |= WDCR_ENABLE;
writel(enable, gwdt->base + FTWDT010_WDCR);
+}
+static int ftwdt010_wdt_start(struct watchdog_device *wdd)
+{
+ struct ftwdt010_wdt *gwdt = to_ftwdt010_wdt(wdd);
+
+ ftwdt010_enable(gwdt, wdd->timeout, gwdt->has_irq);
return 0;
}
@@ -93,6 +100,13 @@ static int ftwdt010_wdt_set_timeout(struct watchdog_device *wdd,
return 0;
}
+static int ftwdt010_wdt_restart(struct watchdog_device *wdd,
+ unsigned long action, void *data)
+{
+ ftwdt010_enable(to_ftwdt010_wdt(wdd), 0, false);
+ return 0;
+}
+
static irqreturn_t ftwdt010_wdt_interrupt(int irq, void *data)
{
struct ftwdt010_wdt *gwdt = data;
@@ -107,6 +121,7 @@ static const struct watchdog_ops ftwdt010_wdt_ops = {
.stop = ftwdt010_wdt_stop,
.ping = ftwdt010_wdt_ping,
.set_timeout = ftwdt010_wdt_set_timeout,
+ .restart = ftwdt010_wdt_restart,
.owner = THIS_MODULE,
};
@@ -156,7 +171,7 @@ static int ftwdt010_wdt_probe(struct platform_device *pdev)
}
irq = platform_get_irq(pdev, 0);
- if (irq) {
+ if (irq > 0) {
ret = devm_request_irq(dev, irq, ftwdt010_wdt_interrupt, 0,
"watchdog bark", gwdt);
if (ret)
diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c
index a5006a58e0db..f79f932bca14 100644
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -20,7 +20,9 @@
#include <linux/pci_ids.h>
#include <linux/types.h>
#include <linux/watchdog.h>
+#ifdef CONFIG_HPWDT_NMI_DECODING
#include <asm/nmi.h>
+#endif
#include <linux/crash_dump.h>
#define HPWDT_VERSION "2.0.4"
diff --git a/drivers/watchdog/imx7ulp_wdt.c b/drivers/watchdog/imx7ulp_wdt.c
index 922b60374295..2897902090b3 100644
--- a/drivers/watchdog/imx7ulp_wdt.c
+++ b/drivers/watchdog/imx7ulp_wdt.c
@@ -9,12 +9,15 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/reboot.h>
#include <linux/watchdog.h>
#define WDOG_CS 0x0
+#define WDOG_CS_FLG BIT(14)
#define WDOG_CS_CMD32EN BIT(13)
+#define WDOG_CS_PRES BIT(12)
#define WDOG_CS_ULK BIT(11)
#define WDOG_CS_RCS BIT(10)
#define LPO_CLK 0x1
@@ -39,60 +42,105 @@
#define DEFAULT_TIMEOUT 60
#define MAX_TIMEOUT 128
#define WDOG_CLOCK_RATE 1000
-#define WDOG_WAIT_TIMEOUT 20
+#define WDOG_ULK_WAIT_TIMEOUT 1000
+#define WDOG_RCS_WAIT_TIMEOUT 10000
+#define WDOG_RCS_POST_WAIT 3000
+
+#define RETRY_MAX 5
static bool nowayout = WATCHDOG_NOWAYOUT;
module_param(nowayout, bool, 0000);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+struct imx_wdt_hw_feature {
+ bool prescaler_enable;
+ u32 wdog_clock_rate;
+};
+
struct imx7ulp_wdt_device {
struct watchdog_device wdd;
void __iomem *base;
struct clk *clk;
+ bool post_rcs_wait;
+ const struct imx_wdt_hw_feature *hw;
};
-static int imx7ulp_wdt_wait(void __iomem *base, u32 mask)
+static int imx7ulp_wdt_wait_ulk(void __iomem *base)
{
u32 val = readl(base + WDOG_CS);
- if (!(val & mask) && readl_poll_timeout_atomic(base + WDOG_CS, val,
- val & mask, 0,
- WDOG_WAIT_TIMEOUT))
+ if (!(val & WDOG_CS_ULK) &&
+ readl_poll_timeout_atomic(base + WDOG_CS, val,
+ val & WDOG_CS_ULK, 0,
+ WDOG_ULK_WAIT_TIMEOUT))
return -ETIMEDOUT;
return 0;
}
-static int imx7ulp_wdt_enable(struct watchdog_device *wdog, bool enable)
+static int imx7ulp_wdt_wait_rcs(struct imx7ulp_wdt_device *wdt)
{
- struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+ int ret = 0;
+ u32 val = readl(wdt->base + WDOG_CS);
+ u64 timeout = (val & WDOG_CS_PRES) ?
+ WDOG_RCS_WAIT_TIMEOUT * 256 : WDOG_RCS_WAIT_TIMEOUT;
+ unsigned long wait_min = (val & WDOG_CS_PRES) ?
+ WDOG_RCS_POST_WAIT * 256 : WDOG_RCS_POST_WAIT;
+
+ if (!(val & WDOG_CS_RCS) &&
+ readl_poll_timeout(wdt->base + WDOG_CS, val, val & WDOG_CS_RCS, 100,
+ timeout))
+ ret = -ETIMEDOUT;
+
+ /* Wait 2.5 clocks after RCS done */
+ if (wdt->post_rcs_wait)
+ usleep_range(wait_min, wait_min + 2000);
+
+ return ret;
+}
+static int _imx7ulp_wdt_enable(struct imx7ulp_wdt_device *wdt, bool enable)
+{
u32 val = readl(wdt->base + WDOG_CS);
int ret;
local_irq_disable();
writel(UNLOCK, wdt->base + WDOG_CNT);
- ret = imx7ulp_wdt_wait(wdt->base, WDOG_CS_ULK);
+ ret = imx7ulp_wdt_wait_ulk(wdt->base);
if (ret)
goto enable_out;
if (enable)
writel(val | WDOG_CS_EN, wdt->base + WDOG_CS);
else
writel(val & ~WDOG_CS_EN, wdt->base + WDOG_CS);
- imx7ulp_wdt_wait(wdt->base, WDOG_CS_RCS);
-enable_out:
local_irq_enable();
+ ret = imx7ulp_wdt_wait_rcs(wdt);
return ret;
+
+enable_out:
+ local_irq_enable();
+ return ret;
}
-static bool imx7ulp_wdt_is_enabled(void __iomem *base)
+static int imx7ulp_wdt_enable(struct watchdog_device *wdog, bool enable)
{
- u32 val = readl(base + WDOG_CS);
+ struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+ int ret;
+ u32 val;
+ u32 loop = RETRY_MAX;
+
+ do {
+ ret = _imx7ulp_wdt_enable(wdt, enable);
+ val = readl(wdt->base + WDOG_CS);
+ } while (--loop > 0 && ((!!(val & WDOG_CS_EN)) != enable || ret));
- return val & WDOG_CS_EN;
+ if (loop == 0)
+ return -EBUSY;
+
+ return ret;
}
static int imx7ulp_wdt_ping(struct watchdog_device *wdog)
@@ -114,26 +162,44 @@ static int imx7ulp_wdt_stop(struct watchdog_device *wdog)
return imx7ulp_wdt_enable(wdog, false);
}
-static int imx7ulp_wdt_set_timeout(struct watchdog_device *wdog,
- unsigned int timeout)
+static int _imx7ulp_wdt_set_timeout(struct imx7ulp_wdt_device *wdt,
+ unsigned int toval)
{
- struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
- u32 val = WDOG_CLOCK_RATE * timeout;
int ret;
local_irq_disable();
writel(UNLOCK, wdt->base + WDOG_CNT);
- ret = imx7ulp_wdt_wait(wdt->base, WDOG_CS_ULK);
+ ret = imx7ulp_wdt_wait_ulk(wdt->base);
if (ret)
goto timeout_out;
- writel(val, wdt->base + WDOG_TOVAL);
- imx7ulp_wdt_wait(wdt->base, WDOG_CS_RCS);
-
- wdog->timeout = timeout;
+ writel(toval, wdt->base + WDOG_TOVAL);
+ local_irq_enable();
+ ret = imx7ulp_wdt_wait_rcs(wdt);
+ return ret;
timeout_out:
local_irq_enable();
+ return ret;
+}
+static int imx7ulp_wdt_set_timeout(struct watchdog_device *wdog,
+ unsigned int timeout)
+{
+ struct imx7ulp_wdt_device *wdt = watchdog_get_drvdata(wdog);
+ u32 toval = wdt->hw->wdog_clock_rate * timeout;
+ u32 val;
+ int ret;
+ u32 loop = RETRY_MAX;
+
+ do {
+ ret = _imx7ulp_wdt_set_timeout(wdt, toval);
+ val = readl(wdt->base + WDOG_TOVAL);
+ } while (--loop > 0 && (val != toval || ret));
+
+ if (loop == 0)
+ return -EBUSY;
+
+ wdog->timeout = timeout;
return ret;
}
@@ -173,29 +239,62 @@ static const struct watchdog_info imx7ulp_wdt_info = {
WDIOF_MAGICCLOSE,
};
-static int imx7ulp_wdt_init(void __iomem *base, unsigned int timeout)
+static int _imx7ulp_wdt_init(struct imx7ulp_wdt_device *wdt, unsigned int timeout, unsigned int cs)
{
u32 val;
int ret;
local_irq_disable();
- /* unlock the wdog for reconfiguration */
- writel_relaxed(UNLOCK_SEQ0, base + WDOG_CNT);
- writel_relaxed(UNLOCK_SEQ1, base + WDOG_CNT);
- ret = imx7ulp_wdt_wait(base, WDOG_CS_ULK);
+
+ val = readl(wdt->base + WDOG_CS);
+ if (val & WDOG_CS_CMD32EN) {
+ writel(UNLOCK, wdt->base + WDOG_CNT);
+ } else {
+ mb();
+ /* unlock the wdog for reconfiguration */
+ writel_relaxed(UNLOCK_SEQ0, wdt->base + WDOG_CNT);
+ writel_relaxed(UNLOCK_SEQ1, wdt->base + WDOG_CNT);
+ mb();
+ }
+
+ ret = imx7ulp_wdt_wait_ulk(wdt->base);
if (ret)
goto init_out;
/* set an initial timeout value in TOVAL */
- writel(timeout, base + WDOG_TOVAL);
- /* enable 32bit command sequence and reconfigure */
- val = WDOG_CS_CMD32EN | WDOG_CS_CLK | WDOG_CS_UPDATE |
- WDOG_CS_WAIT | WDOG_CS_STOP;
- writel(val, base + WDOG_CS);
- imx7ulp_wdt_wait(base, WDOG_CS_RCS);
+ writel(timeout, wdt->base + WDOG_TOVAL);
+ writel(cs, wdt->base + WDOG_CS);
+ local_irq_enable();
+ ret = imx7ulp_wdt_wait_rcs(wdt);
+
+ return ret;
init_out:
local_irq_enable();
+ return ret;
+}
+
+static int imx7ulp_wdt_init(struct imx7ulp_wdt_device *wdt, unsigned int timeout)
+{
+ /* enable 32bit command sequence and reconfigure */
+ u32 val = WDOG_CS_CMD32EN | WDOG_CS_CLK | WDOG_CS_UPDATE |
+ WDOG_CS_WAIT | WDOG_CS_STOP;
+ u32 cs, toval;
+ int ret;
+ u32 loop = RETRY_MAX;
+
+ if (wdt->hw->prescaler_enable)
+ val |= WDOG_CS_PRES;
+
+ do {
+ ret = _imx7ulp_wdt_init(wdt, timeout, val);
+ toval = readl(wdt->base + WDOG_TOVAL);
+ cs = readl(wdt->base + WDOG_CS);
+ cs &= ~(WDOG_CS_FLG | WDOG_CS_ULK | WDOG_CS_RCS);
+ } while (--loop > 0 && (cs != val || toval != timeout || ret));
+
+ if (loop == 0)
+ return -EBUSY;
return ret;
}
@@ -228,6 +327,15 @@ static int imx7ulp_wdt_probe(struct platform_device *pdev)
return PTR_ERR(imx7ulp_wdt->clk);
}
+ imx7ulp_wdt->post_rcs_wait = true;
+ if (of_device_is_compatible(dev->of_node,
+ "fsl,imx8ulp-wdt")) {
+ dev_info(dev, "imx8ulp wdt probe\n");
+ imx7ulp_wdt->post_rcs_wait = false;
+ } else {
+ dev_info(dev, "imx7ulp wdt probe\n");
+ }
+
ret = clk_prepare_enable(imx7ulp_wdt->clk);
if (ret)
return ret;
@@ -248,14 +356,16 @@ static int imx7ulp_wdt_probe(struct platform_device *pdev)
watchdog_stop_on_reboot(wdog);
watchdog_stop_on_unregister(wdog);
watchdog_set_drvdata(wdog, imx7ulp_wdt);
- ret = imx7ulp_wdt_init(imx7ulp_wdt->base, wdog->timeout * WDOG_CLOCK_RATE);
+
+ imx7ulp_wdt->hw = of_device_get_match_data(dev);
+ ret = imx7ulp_wdt_init(imx7ulp_wdt, wdog->timeout * imx7ulp_wdt->hw->wdog_clock_rate);
if (ret)
return ret;
return devm_watchdog_register_device(dev, wdog);
}
-static int __maybe_unused imx7ulp_wdt_suspend(struct device *dev)
+static int __maybe_unused imx7ulp_wdt_suspend_noirq(struct device *dev)
{
struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev);
@@ -267,30 +377,44 @@ static int __maybe_unused imx7ulp_wdt_suspend(struct device *dev)
return 0;
}
-static int __maybe_unused imx7ulp_wdt_resume(struct device *dev)
+static int __maybe_unused imx7ulp_wdt_resume_noirq(struct device *dev)
{
struct imx7ulp_wdt_device *imx7ulp_wdt = dev_get_drvdata(dev);
- u32 timeout = imx7ulp_wdt->wdd.timeout * WDOG_CLOCK_RATE;
+ u32 timeout = imx7ulp_wdt->wdd.timeout * imx7ulp_wdt->hw->wdog_clock_rate;
int ret;
ret = clk_prepare_enable(imx7ulp_wdt->clk);
if (ret)
return ret;
- if (imx7ulp_wdt_is_enabled(imx7ulp_wdt->base))
- imx7ulp_wdt_init(imx7ulp_wdt->base, timeout);
-
- if (watchdog_active(&imx7ulp_wdt->wdd))
+ if (watchdog_active(&imx7ulp_wdt->wdd)) {
+ imx7ulp_wdt_init(imx7ulp_wdt, timeout);
imx7ulp_wdt_start(&imx7ulp_wdt->wdd);
+ imx7ulp_wdt_ping(&imx7ulp_wdt->wdd);
+ }
return 0;
}
-static SIMPLE_DEV_PM_OPS(imx7ulp_wdt_pm_ops, imx7ulp_wdt_suspend,
- imx7ulp_wdt_resume);
+static const struct dev_pm_ops imx7ulp_wdt_pm_ops = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx7ulp_wdt_suspend_noirq,
+ imx7ulp_wdt_resume_noirq)
+};
+
+static const struct imx_wdt_hw_feature imx7ulp_wdt_hw = {
+ .prescaler_enable = false,
+ .wdog_clock_rate = 1000,
+};
+
+static const struct imx_wdt_hw_feature imx93_wdt_hw = {
+ .prescaler_enable = true,
+ .wdog_clock_rate = 125,
+};
static const struct of_device_id imx7ulp_wdt_dt_ids[] = {
- { .compatible = "fsl,imx7ulp-wdt", },
+ { .compatible = "fsl,imx8ulp-wdt", .data = &imx7ulp_wdt_hw, },
+ { .compatible = "fsl,imx7ulp-wdt", .data = &imx7ulp_wdt_hw, },
+ { .compatible = "fsl,imx93-wdt", .data = &imx93_wdt_hw, },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx7ulp_wdt_dt_ids);
diff --git a/drivers/watchdog/meson_gxbb_wdt.c b/drivers/watchdog/meson_gxbb_wdt.c
index d3c9e2f6e63b..981a2f7c3bec 100644
--- a/drivers/watchdog/meson_gxbb_wdt.c
+++ b/drivers/watchdog/meson_gxbb_wdt.c
@@ -156,6 +156,7 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
struct meson_gxbb_wdt *data;
int ret;
+ u32 ctrl_reg;
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
@@ -189,13 +190,26 @@ static int meson_gxbb_wdt_probe(struct platform_device *pdev)
watchdog_set_nowayout(&data->wdt_dev, nowayout);
watchdog_set_drvdata(&data->wdt_dev, data);
+ ctrl_reg = readl(data->reg_base + GXBB_WDT_CTRL_REG) &
+ GXBB_WDT_CTRL_EN;
+
+ if (ctrl_reg) {
+ /* Watchdog is running - keep it running but extend timeout
+ * to the maximum while setting the timebase
+ */
+ set_bit(WDOG_HW_RUNNING, &data->wdt_dev.status);
+ meson_gxbb_wdt_set_timeout(&data->wdt_dev,
+ GXBB_WDT_TCNT_SETUP_MASK / 1000);
+ }
+
/* Setup with 1ms timebase */
- writel(((clk_get_rate(data->clk) / 1000) & GXBB_WDT_CTRL_DIV_MASK) |
- GXBB_WDT_CTRL_EE_RESET |
- GXBB_WDT_CTRL_CLK_EN |
- GXBB_WDT_CTRL_CLKDIV_EN,
- data->reg_base + GXBB_WDT_CTRL_REG);
+ ctrl_reg |= ((clk_get_rate(data->clk) / 1000) &
+ GXBB_WDT_CTRL_DIV_MASK) |
+ GXBB_WDT_CTRL_EE_RESET |
+ GXBB_WDT_CTRL_CLK_EN |
+ GXBB_WDT_CTRL_CLKDIV_EN;
+ writel(ctrl_reg, data->reg_base + GXBB_WDT_CTRL_REG);
meson_gxbb_wdt_set_timeout(&data->wdt_dev, data->wdt_dev.timeout);
return devm_watchdog_register_device(dev, &data->wdt_dev);
diff --git a/drivers/watchdog/npcm_wdt.c b/drivers/watchdog/npcm_wdt.c
index 28a24caa2627..a5dd1c230137 100644
--- a/drivers/watchdog/npcm_wdt.c
+++ b/drivers/watchdog/npcm_wdt.c
@@ -3,6 +3,7 @@
// Copyright (c) 2018 IBM Corp.
#include <linux/bitops.h>
+#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
@@ -43,6 +44,7 @@
struct npcm_wdt {
struct watchdog_device wdd;
void __iomem *reg;
+ struct clk *clk;
};
static inline struct npcm_wdt *to_npcm_wdt(struct watchdog_device *wdd)
@@ -66,6 +68,9 @@ static int npcm_wdt_start(struct watchdog_device *wdd)
struct npcm_wdt *wdt = to_npcm_wdt(wdd);
u32 val;
+ if (wdt->clk)
+ clk_prepare_enable(wdt->clk);
+
if (wdd->timeout < 2)
val = 0x800;
else if (wdd->timeout < 3)
@@ -100,6 +105,9 @@ static int npcm_wdt_stop(struct watchdog_device *wdd)
writel(0, wdt->reg);
+ if (wdt->clk)
+ clk_disable_unprepare(wdt->clk);
+
return 0;
}
@@ -147,6 +155,10 @@ static int npcm_wdt_restart(struct watchdog_device *wdd,
{
struct npcm_wdt *wdt = to_npcm_wdt(wdd);
+ /* For reset, we start the WDT clock and leave it running. */
+ if (wdt->clk)
+ clk_prepare_enable(wdt->clk);
+
writel(NPCM_WTR | NPCM_WTRE | NPCM_WTE, wdt->reg);
udelay(1000);
@@ -191,6 +203,10 @@ static int npcm_wdt_probe(struct platform_device *pdev)
if (IS_ERR(wdt->reg))
return PTR_ERR(wdt->reg);
+ wdt->clk = devm_clk_get_optional(&pdev->dev, NULL);
+ if (IS_ERR(wdt->clk))
+ return PTR_ERR(wdt->clk);
+
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
diff --git a/drivers/watchdog/rti_wdt.c b/drivers/watchdog/rti_wdt.c
index 053ef3bde12d..6e9253761fc1 100644
--- a/drivers/watchdog/rti_wdt.c
+++ b/drivers/watchdog/rti_wdt.c
@@ -225,9 +225,8 @@ static int rti_wdt_probe(struct platform_device *pdev)
wdt->freq = wdt->freq * 9 / 10;
pm_runtime_enable(dev);
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret < 0) {
- pm_runtime_put_noidle(dev);
pm_runtime_disable(&pdev->dev);
return dev_err_probe(dev, ret, "runtime pm failed\n");
}
diff --git a/drivers/watchdog/rzg2l_wdt.c b/drivers/watchdog/rzg2l_wdt.c
index 6eea0ee4af49..974a4194a8fd 100644
--- a/drivers/watchdog/rzg2l_wdt.c
+++ b/drivers/watchdog/rzg2l_wdt.c
@@ -10,7 +10,7 @@
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/of.h>
+#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/reset.h>
@@ -40,6 +40,11 @@ module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+enum rz_wdt_type {
+ WDT_RZG2L,
+ WDT_RZV2M,
+};
+
struct rzg2l_wdt_priv {
void __iomem *base;
struct watchdog_device wdev;
@@ -48,6 +53,7 @@ struct rzg2l_wdt_priv {
unsigned long delay;
struct clk *pclk;
struct clk *osc_clk;
+ enum rz_wdt_type devtype;
};
static void rzg2l_wdt_wait_delay(struct rzg2l_wdt_priv *priv)
@@ -142,11 +148,29 @@ static int rzg2l_wdt_restart(struct watchdog_device *wdev,
clk_prepare_enable(priv->pclk);
clk_prepare_enable(priv->osc_clk);
- /* Generate Reset (WDTRSTB) Signal on parity error */
- rzg2l_wdt_write(priv, 0, PECR);
+ if (priv->devtype == WDT_RZG2L) {
+ /* Generate Reset (WDTRSTB) Signal on parity error */
+ rzg2l_wdt_write(priv, 0, PECR);
+
+ /* Force parity error */
+ rzg2l_wdt_write(priv, PEEN_FORCE, PEEN);
+ } else {
+ /* RZ/V2M doesn't have parity error registers */
+
+ wdev->timeout = 0;
+
+ /* Initialize time out */
+ rzg2l_wdt_init_timeout(wdev);
- /* Force parity error */
- rzg2l_wdt_write(priv, PEEN_FORCE, PEEN);
+ /* Initialize watchdog counter register */
+ rzg2l_wdt_write(priv, 0, WDTTIM);
+
+ /* Enable watchdog timer*/
+ rzg2l_wdt_write(priv, WDTCNT_WDTEN, WDTCNT);
+
+ /* Wait 2 consecutive overflow cycles for reset */
+ mdelay(DIV_ROUND_UP(2 * 0xFFFFF * 1000, priv->osc_clk_rate));
+ }
return 0;
}
@@ -227,6 +251,8 @@ static int rzg2l_wdt_probe(struct platform_device *pdev)
if (ret)
return dev_err_probe(dev, ret, "failed to deassert");
+ priv->devtype = (uintptr_t)of_device_get_match_data(dev);
+
pm_runtime_enable(&pdev->dev);
priv->wdev.info = &rzg2l_wdt_ident;
@@ -255,7 +281,8 @@ static int rzg2l_wdt_probe(struct platform_device *pdev)
}
static const struct of_device_id rzg2l_wdt_ids[] = {
- { .compatible = "renesas,rzg2l-wdt", },
+ { .compatible = "renesas,rzg2l-wdt", .data = (void *)WDT_RZG2L },
+ { .compatible = "renesas,rzv2m-wdt", .data = (void *)WDT_RZV2M },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, rzg2l_wdt_ids);
diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c
index 95919392927f..d3fc8ed886ff 100644
--- a/drivers/watchdog/s3c2410_wdt.c
+++ b/drivers/watchdog/s3c2410_wdt.c
@@ -60,9 +60,13 @@
#define EXYNOS850_CLUSTER0_NONCPU_INT_EN 0x1244
#define EXYNOS850_CLUSTER1_NONCPU_OUT 0x1620
#define EXYNOS850_CLUSTER1_NONCPU_INT_EN 0x1644
+#define EXYNOSAUTOV9_CLUSTER1_NONCPU_OUT 0x1520
+#define EXYNOSAUTOV9_CLUSTER1_NONCPU_INT_EN 0x1544
#define EXYNOS850_CLUSTER0_WDTRESET_BIT 24
#define EXYNOS850_CLUSTER1_WDTRESET_BIT 23
+#define EXYNOSAUTOV9_CLUSTER0_WDTRESET_BIT 25
+#define EXYNOSAUTOV9_CLUSTER1_WDTRESET_BIT 24
/**
* DOC: Quirk flags for different Samsung watchdog IP-cores
@@ -236,6 +240,30 @@ static const struct s3c2410_wdt_variant drv_data_exynos850_cl1 = {
QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN,
};
+static const struct s3c2410_wdt_variant drv_data_exynosautov9_cl0 = {
+ .mask_reset_reg = EXYNOS850_CLUSTER0_NONCPU_INT_EN,
+ .mask_bit = 2,
+ .mask_reset_inv = true,
+ .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
+ .rst_stat_bit = EXYNOSAUTOV9_CLUSTER0_WDTRESET_BIT,
+ .cnt_en_reg = EXYNOS850_CLUSTER0_NONCPU_OUT,
+ .cnt_en_bit = 7,
+ .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET |
+ QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN,
+};
+
+static const struct s3c2410_wdt_variant drv_data_exynosautov9_cl1 = {
+ .mask_reset_reg = EXYNOSAUTOV9_CLUSTER1_NONCPU_INT_EN,
+ .mask_bit = 2,
+ .mask_reset_inv = true,
+ .rst_stat_reg = EXYNOS5_RST_STAT_REG_OFFSET,
+ .rst_stat_bit = EXYNOSAUTOV9_CLUSTER1_WDTRESET_BIT,
+ .cnt_en_reg = EXYNOSAUTOV9_CLUSTER1_NONCPU_OUT,
+ .cnt_en_bit = 7,
+ .quirks = QUIRK_HAS_WTCLRINT_REG | QUIRK_HAS_PMU_MASK_RESET |
+ QUIRK_HAS_PMU_RST_STAT | QUIRK_HAS_PMU_CNT_EN,
+};
+
static const struct of_device_id s3c2410_wdt_match[] = {
{ .compatible = "samsung,s3c2410-wdt",
.data = &drv_data_s3c2410 },
@@ -249,6 +277,8 @@ static const struct of_device_id s3c2410_wdt_match[] = {
.data = &drv_data_exynos7 },
{ .compatible = "samsung,exynos850-wdt",
.data = &drv_data_exynos850_cl0 },
+ { .compatible = "samsung,exynosautov9-wdt",
+ .data = &drv_data_exynosautov9_cl0 },
{},
};
MODULE_DEVICE_TABLE(of, s3c2410_wdt_match);
@@ -630,8 +660,9 @@ s3c2410_get_wdt_drv_data(struct platform_device *pdev)
}
#ifdef CONFIG_OF
- /* Choose Exynos850 driver data w.r.t. cluster index */
- if (variant == &drv_data_exynos850_cl0) {
+ /* Choose Exynos850/ExynosAutov9 driver data w.r.t. cluster index */
+ if (variant == &drv_data_exynos850_cl0 ||
+ variant == &drv_data_exynosautov9_cl0) {
u32 index;
int err;
@@ -644,9 +675,11 @@ s3c2410_get_wdt_drv_data(struct platform_device *pdev)
switch (index) {
case 0:
- return &drv_data_exynos850_cl0;
+ return variant;
case 1:
- return &drv_data_exynos850_cl1;
+ return (variant == &drv_data_exynos850_cl0) ?
+ &drv_data_exynos850_cl1 :
+ &drv_data_exynosautov9_cl1;
default:
dev_err(dev, "wrong cluster index: %u\n", index);
return NULL;
diff --git a/drivers/watchdog/sa1100_wdt.c b/drivers/watchdog/sa1100_wdt.c
index 2d0a06a158a8..82ac5d19f519 100644
--- a/drivers/watchdog/sa1100_wdt.c
+++ b/drivers/watchdog/sa1100_wdt.c
@@ -238,7 +238,7 @@ static int sa1100dog_remove(struct platform_device *pdev)
return 0;
}
-struct platform_driver sa1100dog_driver = {
+static struct platform_driver sa1100dog_driver = {
.driver.name = "sa1100_wdt",
.probe = sa1100dog_probe,
.remove = sa1100dog_remove,
diff --git a/drivers/watchdog/sp5100_tco.c b/drivers/watchdog/sp5100_tco.c
index ae54dd33e233..fb426b7d81da 100644
--- a/drivers/watchdog/sp5100_tco.c
+++ b/drivers/watchdog/sp5100_tco.c
@@ -65,6 +65,12 @@ static struct pci_dev *sp5100_tco_pci;
/* module parameters */
+#define WATCHDOG_ACTION 0
+static bool action = WATCHDOG_ACTION;
+module_param(action, bool, 0);
+MODULE_PARM_DESC(action, "Action taken when watchdog expires, 0 to reset, 1 to poweroff (default="
+ __MODULE_STRING(WATCHDOG_ACTION) ")");
+
#define WATCHDOG_HEARTBEAT 60 /* 60 sec default heartbeat. */
static int heartbeat = WATCHDOG_HEARTBEAT; /* in seconds */
module_param(heartbeat, int, 0);
@@ -297,8 +303,11 @@ static int sp5100_tco_timer_init(struct sp5100_tco *tco)
if (val & SP5100_WDT_FIRED)
wdd->bootstatus = WDIOF_CARDRESET;
- /* Set watchdog action to reset the system */
- val &= ~SP5100_WDT_ACTION_RESET;
+ /* Set watchdog action */
+ if (action)
+ val |= SP5100_WDT_ACTION_RESET;
+ else
+ val &= ~SP5100_WDT_ACTION_RESET;
writel(val, SP5100_WDT_CONTROL(tco->tcobase));
/* Set a reasonable heartbeat before we stop the timer */
diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c
index 355e428c0b99..36b4a660928d 100644
--- a/drivers/watchdog/twl4030_wdt.c
+++ b/drivers/watchdog/twl4030_wdt.c
@@ -9,6 +9,7 @@
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
#include <linux/watchdog.h>
#include <linux/platform_device.h>
#include <linux/mfd/twl.h>
diff --git a/drivers/watchdog/w83627hf_wdt.c b/drivers/watchdog/w83627hf_wdt.c
index 56a4a4030ca9..bc33b63c5a5d 100644
--- a/drivers/watchdog/w83627hf_wdt.c
+++ b/drivers/watchdog/w83627hf_wdt.c
@@ -113,6 +113,10 @@ MODULE_PARM_DESC(early_disable, "Disable watchdog at boot time (default=0)");
#define W836X7HF_WDT_CSR 0xf7
#define NCT6102D_WDT_CSR 0xf2
+#define WDT_CSR_STATUS 0x10
+#define WDT_CSR_KBD 0x40
+#define WDT_CSR_MOUSE 0x80
+
static void superio_outb(int reg, int val)
{
outb(reg, WDT_EFER);
@@ -244,8 +248,12 @@ static int w83627hf_init(struct watchdog_device *wdog, enum chips chip)
t = superio_inb(cr_wdt_control) & ~0x0C;
superio_outb(cr_wdt_control, t);
- /* reset trigger, disable keyboard & mouse turning off watchdog */
- t = superio_inb(cr_wdt_csr) & ~0xD0;
+ t = superio_inb(cr_wdt_csr);
+ if (t & WDT_CSR_STATUS)
+ wdog->bootstatus |= WDIOF_CARDRESET;
+
+ /* reset status, disable keyboard & mouse turning off watchdog */
+ t &= ~(WDT_CSR_STATUS | WDT_CSR_KBD | WDT_CSR_MOUSE);
superio_outb(cr_wdt_csr, t);
superio_exit();
diff --git a/drivers/watchdog/w83977f_wdt.c b/drivers/watchdog/w83977f_wdt.c
index fd64ae77780a..31bf21ceaf48 100644
--- a/drivers/watchdog/w83977f_wdt.c
+++ b/drivers/watchdog/w83977f_wdt.c
@@ -321,7 +321,7 @@ static int wdt_release(struct inode *inode, struct file *file)
* @ppos: pointer to the position to write. No seeks allowed
*
* A write to a watchdog device is defined as a keepalive signal. Any
- * write of data will do, as we we don't define content meaning.
+ * write of data will do, as we don't define content meaning.
*/
static ssize_t wdt_write(struct file *file, const char __user *buf,
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 54903f3c851e..744b2ab75288 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -1015,7 +1015,11 @@ static int watchdog_cdev_register(struct watchdog_device *wdd)
wd_data->dev.groups = wdd->groups;
wd_data->dev.release = watchdog_core_data_release;
dev_set_drvdata(&wd_data->dev, wdd);
- dev_set_name(&wd_data->dev, "watchdog%d", wdd->id);
+ err = dev_set_name(&wd_data->dev, "watchdog%d", wdd->id);
+ if (err) {
+ put_device(&wd_data->dev);
+ return err;
+ }
kthread_init_work(&wd_data->work, watchdog_ping_work);
hrtimer_init(&wd_data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
diff --git a/drivers/watchdog/wdat_wdt.c b/drivers/watchdog/wdat_wdt.c
index aeadaa07c891..ce7a4a9e4b03 100644
--- a/drivers/watchdog/wdat_wdt.c
+++ b/drivers/watchdog/wdat_wdt.c
@@ -342,9 +342,8 @@ static int wdat_wdt_probe(struct platform_device *pdev)
return -EINVAL;
wdat->period = tbl->timer_period;
- wdat->wdd.min_hw_heartbeat_ms = wdat->period * tbl->min_count;
- wdat->wdd.max_hw_heartbeat_ms = wdat->period * tbl->max_count;
- wdat->wdd.min_timeout = 1;
+ wdat->wdd.min_timeout = DIV_ROUND_UP(wdat->period * tbl->min_count, 1000);
+ wdat->wdd.max_timeout = wdat->period * tbl->max_count / 1000;
wdat->stopped_in_sleep = tbl->flags & ACPI_WDAT_STOPPED;
wdat->wdd.info = &wdat_wdt_info;
wdat->wdd.ops = &wdat_wdt_ops;
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a65bd92121a5..d5d7c402b651 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -56,7 +56,7 @@ config XEN_MEMORY_HOTPLUG_LIMIT
depends on XEN_HAVE_PVMMU
depends on MEMORY_HOTPLUG
help
- Maxmium amount of memory (in GiB) that a PV guest can be
+ Maximum amount of memory (in GiB) that a PV guest can be
expanded to when using memory hotplug.
A PV guest can have more memory than this limit if is
diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
index 40ef379c28ab..9c286b2a1900 100644
--- a/drivers/xen/gntdev-common.h
+++ b/drivers/xen/gntdev-common.h
@@ -44,9 +44,10 @@ struct gntdev_unmap_notify {
};
struct gntdev_grant_map {
+ atomic_t in_use;
struct mmu_interval_notifier notifier;
+ bool notifier_init;
struct list_head next;
- struct vm_area_struct *vma;
int index;
int count;
int flags;
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 84b143eef395..4d9a3050de6a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -286,6 +286,9 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
*/
}
+ if (use_ptemod && map->notifier_init)
+ mmu_interval_notifier_remove(&map->notifier);
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
@@ -298,7 +301,7 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
{
struct gntdev_grant_map *map = data;
- unsigned int pgnr = (addr - map->vma->vm_start) >> PAGE_SHIFT;
+ unsigned int pgnr = (addr - map->pages_vm_start) >> PAGE_SHIFT;
int flags = map->flags | GNTMAP_application_map | GNTMAP_contains_pte |
(1 << _GNTMAP_guest_avail0);
u64 pte_maddr;
@@ -367,8 +370,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
for (i = 0; i < map->count; i++) {
if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- if (!use_ptemod)
- alloced++;
+ alloced++;
} else if (!err)
err = -EINVAL;
@@ -377,8 +379,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
if (use_ptemod) {
if (map->kmap_ops[i].status == GNTST_okay) {
- if (map->map_ops[i].status == GNTST_okay)
- alloced++;
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
} else if (!err)
err = -EINVAL;
@@ -394,8 +395,14 @@ static void __unmap_grant_pages_done(int result,
unsigned int i;
struct gntdev_grant_map *map = data->data;
unsigned int offset = data->unmap_ops - map->unmap_ops;
+ int successful_unmaps = 0;
+ int live_grants;
for (i = 0; i < data->count; i++) {
+ if (map->unmap_ops[offset + i].status == GNTST_okay &&
+ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("unmap handle=%d st=%d\n",
@@ -403,6 +410,10 @@ static void __unmap_grant_pages_done(int result,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
+ if (map->kunmap_ops[offset + i].status == GNTST_okay &&
+ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE)
+ successful_unmaps++;
+
WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("kunmap handle=%u st=%d\n",
@@ -411,11 +422,15 @@ static void __unmap_grant_pages_done(int result,
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
}
}
+
/*
* Decrease the live-grant counter. This must happen after the loop to
* prevent premature reuse of the grants by gnttab_mmap().
*/
- atomic_sub(data->count, &map->live_grants);
+ live_grants = atomic_sub_return(successful_unmaps, &map->live_grants);
+ if (WARN_ON(live_grants < 0))
+ pr_err("%s: live_grants became negative (%d) after unmapping %d pages!\n",
+ __func__, live_grants, successful_unmaps);
/* Release reference taken by __unmap_grant_pages */
gntdev_put_map(NULL, map);
@@ -496,11 +511,7 @@ static void gntdev_vma_close(struct vm_area_struct *vma)
struct gntdev_priv *priv = file->private_data;
pr_debug("gntdev_vma_close %p\n", vma);
- if (use_ptemod) {
- WARN_ON(map->vma != vma);
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
+
vma->vm_private_data = NULL;
gntdev_put_map(priv, map);
}
@@ -528,29 +539,30 @@ static bool gntdev_invalidate(struct mmu_interval_notifier *mn,
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
+ unsigned long map_start, map_end;
if (!mmu_notifier_range_blockable(range))
return false;
+ map_start = map->pages_vm_start;
+ map_end = map->pages_vm_start + (map->count << PAGE_SHIFT);
+
/*
* If the VMA is split or otherwise changed the notifier is not
* updated, but we don't want to process VA's outside the modified
* VMA. FIXME: It would be much more understandable to just prevent
* modifying the VMA in the first place.
*/
- if (map->vma->vm_start >= range->end ||
- map->vma->vm_end <= range->start)
+ if (map_start >= range->end || map_end <= range->start)
return true;
- mstart = max(range->start, map->vma->vm_start);
- mend = min(range->end, map->vma->vm_end);
+ mstart = max(range->start, map_start);
+ mend = min(range->end, map_end);
pr_debug("map %d+%d (%lx %lx), range %lx %lx, mrange %lx %lx\n",
- map->index, map->count,
- map->vma->vm_start, map->vma->vm_end,
- range->start, range->end, mstart, mend);
- unmap_grant_pages(map,
- (mstart - map->vma->vm_start) >> PAGE_SHIFT,
- (mend - mstart) >> PAGE_SHIFT);
+ map->index, map->count, map_start, map_end,
+ range->start, range->end, mstart, mend);
+ unmap_grant_pages(map, (mstart - map_start) >> PAGE_SHIFT,
+ (mend - mstart) >> PAGE_SHIFT);
return true;
}
@@ -1030,18 +1042,15 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
return -EINVAL;
pr_debug("map %d+%d at %lx (pgoff %lx)\n",
- index, count, vma->vm_start, vma->vm_pgoff);
+ index, count, vma->vm_start, vma->vm_pgoff);
mutex_lock(&priv->lock);
map = gntdev_find_map_index(priv, index, count);
if (!map)
goto unlock_out;
- if (use_ptemod && map->vma)
+ if (!atomic_add_unless(&map->in_use, 1, 1))
goto unlock_out;
- if (atomic_read(&map->live_grants)) {
- err = -EAGAIN;
- goto unlock_out;
- }
+
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
@@ -1062,15 +1071,16 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
map->flags |= GNTMAP_readonly;
}
+ map->pages_vm_start = vma->vm_start;
+
if (use_ptemod) {
- map->vma = vma;
err = mmu_interval_notifier_insert_locked(
&map->notifier, vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
- if (err) {
- map->vma = NULL;
+ if (err)
goto out_unlock_put;
- }
+
+ map->notifier_init = true;
}
mutex_unlock(&priv->lock);
@@ -1087,7 +1097,6 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
*/
mmu_interval_read_begin(&map->notifier);
- map->pages_vm_start = vma->vm_start;
err = apply_to_page_range(vma->vm_mm, vma->vm_start,
vma->vm_end - vma->vm_start,
find_grant_ptes, map);
@@ -1116,13 +1125,8 @@ unlock_out:
out_unlock_put:
mutex_unlock(&priv->lock);
out_put_map:
- if (use_ptemod) {
+ if (use_ptemod)
unmap_grant_pages(map, 0, map->count);
- if (map->vma) {
- mmu_interval_notifier_remove(&map->notifier);
- map->vma = NULL;
- }
- }
gntdev_put_map(priv, map);
return err;
}
diff --git a/drivers/xen/grant-dma-ops.c b/drivers/xen/grant-dma-ops.c
index 8973fc1e9ccc..860f37c93af4 100644
--- a/drivers/xen/grant-dma-ops.c
+++ b/drivers/xen/grant-dma-ops.c
@@ -25,7 +25,7 @@ struct xen_grant_dma_data {
bool broken;
};
-static DEFINE_XARRAY(xen_grant_dma_devices);
+static DEFINE_XARRAY_FLAGS(xen_grant_dma_devices, XA_FLAGS_LOCK_IRQ);
#define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63)
@@ -42,14 +42,29 @@ static inline grant_ref_t dma_to_grant(dma_addr_t dma)
static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev)
{
struct xen_grant_dma_data *data;
+ unsigned long flags;
- xa_lock(&xen_grant_dma_devices);
+ xa_lock_irqsave(&xen_grant_dma_devices, flags);
data = xa_load(&xen_grant_dma_devices, (unsigned long)dev);
- xa_unlock(&xen_grant_dma_devices);
+ xa_unlock_irqrestore(&xen_grant_dma_devices, flags);
return data;
}
+static int store_xen_grant_dma_data(struct device *dev,
+ struct xen_grant_dma_data *data)
+{
+ unsigned long flags;
+ int ret;
+
+ xa_lock_irqsave(&xen_grant_dma_devices, flags);
+ ret = xa_err(__xa_store(&xen_grant_dma_devices, (unsigned long)dev, data,
+ GFP_ATOMIC));
+ xa_unlock_irqrestore(&xen_grant_dma_devices, flags);
+
+ return ret;
+}
+
/*
* DMA ops for Xen frontends (e.g. virtio).
*
@@ -153,7 +168,7 @@ static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page,
unsigned long attrs)
{
struct xen_grant_dma_data *data;
- unsigned int i, n_pages = PFN_UP(size);
+ unsigned int i, n_pages = PFN_UP(offset + size);
grant_ref_t grant;
dma_addr_t dma_handle;
@@ -185,7 +200,8 @@ static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
unsigned long attrs)
{
struct xen_grant_dma_data *data;
- unsigned int i, n_pages = PFN_UP(size);
+ unsigned long offset = dma_handle & (PAGE_SIZE - 1);
+ unsigned int i, n_pages = PFN_UP(offset + size);
grant_ref_t grant;
if (WARN_ON(dir == DMA_NONE))
@@ -273,72 +289,91 @@ static const struct dma_map_ops xen_grant_dma_ops = {
.dma_supported = xen_grant_dma_supported,
};
-bool xen_is_grant_dma_device(struct device *dev)
+static bool xen_is_dt_grant_dma_device(struct device *dev)
{
struct device_node *iommu_np;
bool has_iommu;
- /* XXX Handle only DT devices for now */
- if (!dev->of_node)
- return false;
-
iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
- has_iommu = iommu_np && of_device_is_compatible(iommu_np, "xen,grant-dma");
+ has_iommu = iommu_np &&
+ of_device_is_compatible(iommu_np, "xen,grant-dma");
of_node_put(iommu_np);
return has_iommu;
}
+bool xen_is_grant_dma_device(struct device *dev)
+{
+ /* XXX Handle only DT devices for now */
+ if (dev->of_node)
+ return xen_is_dt_grant_dma_device(dev);
+
+ return false;
+}
+
bool xen_virtio_mem_acc(struct virtio_device *dev)
{
- if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT))
+ if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT) || xen_pv_domain())
return true;
return xen_is_grant_dma_device(dev->dev.parent);
}
-void xen_grant_setup_dma_ops(struct device *dev)
+static int xen_dt_grant_init_backend_domid(struct device *dev,
+ struct xen_grant_dma_data *data)
{
- struct xen_grant_dma_data *data;
struct of_phandle_args iommu_spec;
- data = find_xen_grant_dma_data(dev);
- if (data) {
- dev_err(dev, "Xen grant DMA data is already created\n");
- return;
- }
-
- /* XXX ACPI device unsupported for now */
- if (!dev->of_node)
- goto err;
-
if (of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells",
0, &iommu_spec)) {
dev_err(dev, "Cannot parse iommus property\n");
- goto err;
+ return -ESRCH;
}
if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") ||
iommu_spec.args_count != 1) {
dev_err(dev, "Incompatible IOMMU node\n");
of_node_put(iommu_spec.np);
- goto err;
+ return -ESRCH;
}
of_node_put(iommu_spec.np);
+ /*
+ * The endpoint ID here means the ID of the domain where the
+ * corresponding backend is running
+ */
+ data->backend_domid = iommu_spec.args[0];
+
+ return 0;
+}
+
+void xen_grant_setup_dma_ops(struct device *dev)
+{
+ struct xen_grant_dma_data *data;
+
+ data = find_xen_grant_dma_data(dev);
+ if (data) {
+ dev_err(dev, "Xen grant DMA data is already created\n");
+ return;
+ }
+
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
goto err;
- /*
- * The endpoint ID here means the ID of the domain where the corresponding
- * backend is running
- */
- data->backend_domid = iommu_spec.args[0];
+ if (dev->of_node) {
+ if (xen_dt_grant_init_backend_domid(dev, data))
+ goto err;
+ } else if (IS_ENABLED(CONFIG_XEN_VIRTIO_FORCE_GRANT)) {
+ dev_info(dev, "Using dom0 as backend\n");
+ data->backend_domid = 0;
+ } else {
+ /* XXX ACPI device unsupported for now */
+ goto err;
+ }
- if (xa_err(xa_store(&xen_grant_dma_devices, (unsigned long)dev, data,
- GFP_KERNEL))) {
+ if (store_xen_grant_dma_data(dev, data)) {
dev_err(dev, "Cannot store Xen grant DMA data\n");
goto err;
}
@@ -348,9 +383,20 @@ void xen_grant_setup_dma_ops(struct device *dev)
return;
err:
+ devm_kfree(dev, data);
dev_err(dev, "Cannot set up Xen grant DMA ops, retain platform DMA ops\n");
}
+bool xen_virtio_restricted_mem_acc(struct virtio_device *dev)
+{
+ bool ret = xen_virtio_mem_acc(dev);
+
+ if (ret)
+ xen_grant_setup_dma_ops(dev->dev.parent);
+
+ return ret;
+}
+
MODULE_DESCRIPTION("Xen grant DMA-mapping layer");
MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index bde63ef677b8..d171091eec12 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -31,7 +31,7 @@ MODULE_PARM_DESC(passthrough,
" frontend (for example, a device at 06:01.b will still appear at\n"\
" 06:01.b to the frontend). This is similar to how Xen 2.0.x\n"\
" exposed PCI devices to its driver domains. This may be required\n"\
- " for drivers which depend on finding their hardward in certain\n"\
+ " for drivers which depend on finding their hardware in certain\n"\
" bus/slot locations.");
static struct xen_pcibk_device *alloc_pdev(struct xenbus_device *xdev)