summaryrefslogtreecommitdiff
path: root/drivers/dma/idxd
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-03 21:11:56 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-03 21:11:56 +0300
commit7994beabfbb9a15c069eba7833a00f5ff4da1172 (patch)
treed4c6a163f968c99ac4987cc99a93231a7ee51dc3 /drivers/dma/idxd
parent29ee463d6fe45adde02098b3aa1166cd65fd2739 (diff)
parentf2dc327131b5cbb2cbb467cec23836f2e9d4cf46 (diff)
downloadlinux-7994beabfbb9a15c069eba7833a00f5ff4da1172.tar.xz
Merge tag 'dmaengine-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine
Pull dmaengine updates from Vinod Koul: "New support: - Apple admac t8112 device support - StarFive JH7110 DMA controller Updates: - Big pile of idxd updates to support IAA 2.0 device capabilities, DSA 2.0 Event Log and completion record faulting features and new DSA operations - at_xdmac supend & resume updates and driver code cleanup - k3-udma supend & resume support - k3-psil thread support for J784s4" * tag 'dmaengine-6.4-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine: (57 commits) dmaengine: idxd: add per wq PRS disable dmaengine: idxd: add pid to exported sysfs attribute for opened file dmaengine: idxd: expose fault counters to sysfs dmaengine: idxd: add a device to represent the file opened dmaengine: idxd: add per file user counters for completion record faults dmaengine: idxd: process batch descriptor completion record faults dmaengine: idxd: add descs_completed field for completion record dmaengine: idxd: process user page faults for completion record dmaengine: idxd: add idxd_copy_cr() to copy user completion record during page fault handling dmaengine: idxd: create kmem cache for event log fault items dmaengine: idxd: add per DSA wq workqueue for processing cr faults dmanegine: idxd: add debugfs for event log dump dmaengine: idxd: add interrupt handling for event log dmaengine: idxd: setup event log configuration dmaengine: idxd: add event log size sysfs attribute dmaengine: idxd: make misc interrupt one shot dt-bindings: dma: snps,dw-axi-dmac: constrain the items of resets for JH7110 dma dt-bindings: dma: Drop unneeded quotes dmaengine: at_xdmac: align declaration of ret with the rest of variables dmaengine: at_xdmac: add a warning message regarding for unpaused channels ...
Diffstat (limited to 'drivers/dma/idxd')
-rw-r--r--drivers/dma/idxd/Makefile2
-rw-r--r--drivers/dma/idxd/cdev.c334
-rw-r--r--drivers/dma/idxd/debugfs.c138
-rw-r--r--drivers/dma/idxd/device.c113
-rw-r--r--drivers/dma/idxd/idxd.h67
-rw-r--r--drivers/dma/idxd/init.c60
-rw-r--r--drivers/dma/idxd/irq.c210
-rw-r--r--drivers/dma/idxd/registers.h126
-rw-r--r--drivers/dma/idxd/sysfs.c146
9 files changed, 1127 insertions, 69 deletions
diff --git a/drivers/dma/idxd/Makefile b/drivers/dma/idxd/Makefile
index a1e9f2b3a37c..dc096839ac63 100644
--- a/drivers/dma/idxd/Makefile
+++ b/drivers/dma/idxd/Makefile
@@ -1,7 +1,7 @@
ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD
obj-$(CONFIG_INTEL_IDXD) += idxd.o
-idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
+idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o debugfs.o
idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index 674bfefca088..ecbf67c2ad2b 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -11,7 +11,9 @@
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/iommu.h>
+#include <linux/highmem.h>
#include <uapi/linux/idxd.h>
+#include <linux/xarray.h>
#include "registers.h"
#include "idxd.h"
@@ -22,6 +24,13 @@ struct idxd_cdev_context {
};
/*
+ * Since user file names are global in DSA devices, define their ida's as
+ * global to avoid conflict file names.
+ */
+static DEFINE_IDA(file_ida);
+static DEFINE_MUTEX(ida_lock);
+
+/*
* ictx is an array based off of accelerator types. enum idxd_type
* is used as index
*/
@@ -34,8 +43,119 @@ struct idxd_user_context {
struct idxd_wq *wq;
struct task_struct *task;
unsigned int pasid;
+ struct mm_struct *mm;
unsigned int flags;
struct iommu_sva *sva;
+ struct idxd_dev idxd_dev;
+ u64 counters[COUNTER_MAX];
+ int id;
+ pid_t pid;
+};
+
+static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid);
+static void idxd_xa_pasid_remove(struct idxd_user_context *ctx);
+
+static inline struct idxd_user_context *dev_to_uctx(struct device *dev)
+{
+ struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+ return container_of(idxd_dev, struct idxd_user_context, idxd_dev);
+}
+
+static ssize_t cr_faults_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct idxd_user_context *ctx = dev_to_uctx(dev);
+
+ return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULTS]);
+}
+static DEVICE_ATTR_RO(cr_faults);
+
+static ssize_t cr_fault_failures_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_user_context *ctx = dev_to_uctx(dev);
+
+ return sysfs_emit(buf, "%llu\n", ctx->counters[COUNTER_FAULT_FAILS]);
+}
+static DEVICE_ATTR_RO(cr_fault_failures);
+
+static ssize_t pid_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct idxd_user_context *ctx = dev_to_uctx(dev);
+
+ return sysfs_emit(buf, "%u\n", ctx->pid);
+}
+static DEVICE_ATTR_RO(pid);
+
+static struct attribute *cdev_file_attributes[] = {
+ &dev_attr_cr_faults.attr,
+ &dev_attr_cr_fault_failures.attr,
+ &dev_attr_pid.attr,
+ NULL
+};
+
+static umode_t cdev_file_attr_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+ struct device *dev = container_of(kobj, typeof(*dev), kobj);
+ struct idxd_user_context *ctx = dev_to_uctx(dev);
+ struct idxd_wq *wq = ctx->wq;
+
+ if (!wq_pasid_enabled(wq))
+ return 0;
+
+ return a->mode;
+}
+
+static const struct attribute_group cdev_file_attribute_group = {
+ .attrs = cdev_file_attributes,
+ .is_visible = cdev_file_attr_visible,
+};
+
+static const struct attribute_group *cdev_file_attribute_groups[] = {
+ &cdev_file_attribute_group,
+ NULL
+};
+
+static void idxd_file_dev_release(struct device *dev)
+{
+ struct idxd_user_context *ctx = dev_to_uctx(dev);
+ struct idxd_wq *wq = ctx->wq;
+ struct idxd_device *idxd = wq->idxd;
+ int rc;
+
+ mutex_lock(&ida_lock);
+ ida_free(&file_ida, ctx->id);
+ mutex_unlock(&ida_lock);
+
+ /* Wait for in-flight operations to complete. */
+ if (wq_shared(wq)) {
+ idxd_device_drain_pasid(idxd, ctx->pasid);
+ } else {
+ if (device_user_pasid_enabled(idxd)) {
+ /* The wq disable in the disable pasid function will drain the wq */
+ rc = idxd_wq_disable_pasid(wq);
+ if (rc < 0)
+ dev_err(dev, "wq disable pasid failed.\n");
+ } else {
+ idxd_wq_drain(wq);
+ }
+ }
+
+ if (ctx->sva) {
+ idxd_cdev_evl_drain_pasid(wq, ctx->pasid);
+ iommu_sva_unbind_device(ctx->sva);
+ idxd_xa_pasid_remove(ctx);
+ }
+ kfree(ctx);
+ mutex_lock(&wq->wq_lock);
+ idxd_wq_put(wq);
+ mutex_unlock(&wq->wq_lock);
+}
+
+static struct device_type idxd_cdev_file_type = {
+ .name = "idxd_file",
+ .release = idxd_file_dev_release,
+ .groups = cdev_file_attribute_groups,
};
static void idxd_cdev_dev_release(struct device *dev)
@@ -68,15 +188,46 @@ static inline struct idxd_wq *inode_wq(struct inode *inode)
return idxd_cdev->wq;
}
+static void idxd_xa_pasid_remove(struct idxd_user_context *ctx)
+{
+ struct idxd_wq *wq = ctx->wq;
+ void *ptr;
+
+ mutex_lock(&wq->uc_lock);
+ ptr = xa_cmpxchg(&wq->upasid_xa, ctx->pasid, ctx, NULL, GFP_KERNEL);
+ if (ptr != (void *)ctx)
+ dev_warn(&wq->idxd->pdev->dev, "xarray cmpxchg failed for pasid %u\n",
+ ctx->pasid);
+ mutex_unlock(&wq->uc_lock);
+}
+
+void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index)
+{
+ struct idxd_user_context *ctx;
+
+ if (index >= COUNTER_MAX)
+ return;
+
+ mutex_lock(&wq->uc_lock);
+ ctx = xa_load(&wq->upasid_xa, pasid);
+ if (!ctx) {
+ mutex_unlock(&wq->uc_lock);
+ return;
+ }
+ ctx->counters[index]++;
+ mutex_unlock(&wq->uc_lock);
+}
+
static int idxd_cdev_open(struct inode *inode, struct file *filp)
{
struct idxd_user_context *ctx;
struct idxd_device *idxd;
struct idxd_wq *wq;
- struct device *dev;
+ struct device *dev, *fdev;
int rc = 0;
struct iommu_sva *sva;
unsigned int pasid;
+ struct idxd_cdev *idxd_cdev;
wq = inode_wq(inode);
idxd = wq->idxd;
@@ -97,6 +248,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
ctx->wq = wq;
filp->private_data = ctx;
+ ctx->pid = current->pid;
if (device_user_pasid_enabled(idxd)) {
sva = iommu_sva_bind_device(dev, current->mm);
@@ -108,65 +260,118 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
pasid = iommu_sva_get_pasid(sva);
if (pasid == IOMMU_PASID_INVALID) {
- iommu_sva_unbind_device(sva);
rc = -EINVAL;
- goto failed;
+ goto failed_get_pasid;
}
ctx->sva = sva;
ctx->pasid = pasid;
+ ctx->mm = current->mm;
+
+ mutex_lock(&wq->uc_lock);
+ rc = xa_insert(&wq->upasid_xa, pasid, ctx, GFP_KERNEL);
+ mutex_unlock(&wq->uc_lock);
+ if (rc < 0)
+ dev_warn(dev, "PASID entry already exist in xarray.\n");
if (wq_dedicated(wq)) {
rc = idxd_wq_set_pasid(wq, pasid);
if (rc < 0) {
iommu_sva_unbind_device(sva);
dev_err(dev, "wq set pasid failed: %d\n", rc);
- goto failed;
+ goto failed_set_pasid;
}
}
}
+ idxd_cdev = wq->idxd_cdev;
+ mutex_lock(&ida_lock);
+ ctx->id = ida_alloc(&file_ida, GFP_KERNEL);
+ mutex_unlock(&ida_lock);
+ if (ctx->id < 0) {
+ dev_warn(dev, "ida alloc failure\n");
+ goto failed_ida;
+ }
+ ctx->idxd_dev.type = IDXD_DEV_CDEV_FILE;
+ fdev = user_ctx_dev(ctx);
+ device_initialize(fdev);
+ fdev->parent = cdev_dev(idxd_cdev);
+ fdev->bus = &dsa_bus_type;
+ fdev->type = &idxd_cdev_file_type;
+
+ rc = dev_set_name(fdev, "file%d", ctx->id);
+ if (rc < 0) {
+ dev_warn(dev, "set name failure\n");
+ goto failed_dev_name;
+ }
+
+ rc = device_add(fdev);
+ if (rc < 0) {
+ dev_warn(dev, "file device add failure\n");
+ goto failed_dev_add;
+ }
+
idxd_wq_get(wq);
mutex_unlock(&wq->wq_lock);
return 0;
- failed:
+failed_dev_add:
+failed_dev_name:
+ put_device(fdev);
+failed_ida:
+failed_set_pasid:
+ if (device_user_pasid_enabled(idxd))
+ idxd_xa_pasid_remove(ctx);
+failed_get_pasid:
+ if (device_user_pasid_enabled(idxd))
+ iommu_sva_unbind_device(sva);
+failed:
mutex_unlock(&wq->wq_lock);
kfree(ctx);
return rc;
}
+static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_evl *evl = idxd->evl;
+ union evl_status_reg status;
+ u16 h, t, size;
+ int ent_size = evl_ent_size(idxd);
+ struct __evl_entry *entry_head;
+
+ if (!evl)
+ return;
+
+ spin_lock(&evl->lock);
+ status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
+ t = status.tail;
+ h = evl->head;
+ size = evl->size;
+
+ while (h != t) {
+ entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
+ if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id)
+ set_bit(h, evl->bmap);
+ h = (h + 1) % size;
+ }
+ spin_unlock(&evl->lock);
+
+ drain_workqueue(wq->wq);
+}
+
static int idxd_cdev_release(struct inode *node, struct file *filep)
{
struct idxd_user_context *ctx = filep->private_data;
struct idxd_wq *wq = ctx->wq;
struct idxd_device *idxd = wq->idxd;
struct device *dev = &idxd->pdev->dev;
- int rc;
dev_dbg(dev, "%s called\n", __func__);
filep->private_data = NULL;
- /* Wait for in-flight operations to complete. */
- if (wq_shared(wq)) {
- idxd_device_drain_pasid(idxd, ctx->pasid);
- } else {
- if (device_user_pasid_enabled(idxd)) {
- /* The wq disable in the disable pasid function will drain the wq */
- rc = idxd_wq_disable_pasid(wq);
- if (rc < 0)
- dev_err(dev, "wq disable pasid failed.\n");
- } else {
- idxd_wq_drain(wq);
- }
- }
+ device_unregister(user_ctx_dev(ctx));
- if (ctx->sva)
- iommu_sva_unbind_device(ctx->sva);
- kfree(ctx);
- mutex_lock(&wq->wq_lock);
- idxd_wq_put(wq);
- mutex_unlock(&wq->wq_lock);
return 0;
}
@@ -297,6 +502,7 @@ void idxd_wq_del_cdev(struct idxd_wq *wq)
struct idxd_cdev *idxd_cdev;
idxd_cdev = wq->idxd_cdev;
+ ida_destroy(&file_ida);
wq->idxd_cdev = NULL;
cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
put_device(cdev_dev(idxd_cdev));
@@ -330,6 +536,13 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
}
mutex_lock(&wq->wq_lock);
+
+ wq->wq = create_workqueue(dev_name(wq_confdev(wq)));
+ if (!wq->wq) {
+ rc = -ENOMEM;
+ goto wq_err;
+ }
+
wq->type = IDXD_WQT_USER;
rc = drv_enable_wq(wq);
if (rc < 0)
@@ -348,7 +561,9 @@ static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
err_cdev:
drv_disable_wq(wq);
err:
+ destroy_workqueue(wq->wq);
wq->type = IDXD_WQT_NONE;
+wq_err:
mutex_unlock(&wq->wq_lock);
return rc;
}
@@ -361,6 +576,8 @@ static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
idxd_wq_del_cdev(wq);
drv_disable_wq(wq);
wq->type = IDXD_WQT_NONE;
+ destroy_workqueue(wq->wq);
+ wq->wq = NULL;
mutex_unlock(&wq->wq_lock);
}
@@ -407,3 +624,70 @@ void idxd_cdev_remove(void)
ida_destroy(&ictx[i].minor_ida);
}
}
+
+/**
+ * idxd_copy_cr - copy completion record to user address space found by wq and
+ * PASID
+ * @wq: work queue
+ * @pasid: PASID
+ * @addr: user fault address to write
+ * @cr: completion record
+ * @len: number of bytes to copy
+ *
+ * This is called by a work that handles completion record fault.
+ *
+ * Return: number of bytes copied.
+ */
+int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
+ void *cr, int len)
+{
+ struct device *dev = &wq->idxd->pdev->dev;
+ int left = len, status_size = 1;
+ struct idxd_user_context *ctx;
+ struct mm_struct *mm;
+
+ mutex_lock(&wq->uc_lock);
+
+ ctx = xa_load(&wq->upasid_xa, pasid);
+ if (!ctx) {
+ dev_warn(dev, "No user context\n");
+ goto out;
+ }
+
+ mm = ctx->mm;
+ /*
+ * The completion record fault handling work is running in kernel
+ * thread context. It temporarily switches to the mm to copy cr
+ * to addr in the mm.
+ */
+ kthread_use_mm(mm);
+ left = copy_to_user((void __user *)addr + status_size, cr + status_size,
+ len - status_size);
+ /*
+ * Copy status only after the rest of completion record is copied
+ * successfully so that the user gets the complete completion record
+ * when a non-zero status is polled.
+ */
+ if (!left) {
+ u8 status;
+
+ /*
+ * Ensure that the completion record's status field is written
+ * after the rest of the completion record has been written.
+ * This ensures that the user receives the correct completion
+ * record information once polling for a non-zero status.
+ */
+ wmb();
+ status = *(u8 *)cr;
+ if (put_user(status, (u8 __user *)addr))
+ left += status_size;
+ } else {
+ left += status_size;
+ }
+ kthread_unuse_mm(mm);
+
+out:
+ mutex_unlock(&wq->uc_lock);
+
+ return len - left;
+}
diff --git a/drivers/dma/idxd/debugfs.c b/drivers/dma/idxd/debugfs.c
new file mode 100644
index 000000000000..9cfbd9b14c4c
--- /dev/null
+++ b/drivers/dma/idxd/debugfs.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/debugfs.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <uapi/linux/idxd.h>
+#include "idxd.h"
+#include "registers.h"
+
+static struct dentry *idxd_debugfs_dir;
+
+static void dump_event_entry(struct idxd_device *idxd, struct seq_file *s,
+ u16 index, int *count, bool processed)
+{
+ struct idxd_evl *evl = idxd->evl;
+ struct dsa_evl_entry *entry;
+ struct dsa_completion_record *cr;
+ u64 *raw;
+ int i;
+ int evl_strides = evl_ent_size(idxd) / sizeof(u64);
+
+ entry = (struct dsa_evl_entry *)evl->log + index;
+
+ if (!entry->e.desc_valid)
+ return;
+
+ seq_printf(s, "Event Log entry %d (real index %u) processed: %u\n",
+ *count, index, processed);
+
+ seq_printf(s, "desc valid %u wq idx valid %u\n"
+ "batch %u fault rw %u priv %u error 0x%x\n"
+ "wq idx %u op %#x pasid %u batch idx %u\n"
+ "fault addr %#llx\n",
+ entry->e.desc_valid, entry->e.wq_idx_valid,
+ entry->e.batch, entry->e.fault_rw, entry->e.priv,
+ entry->e.error, entry->e.wq_idx, entry->e.operation,
+ entry->e.pasid, entry->e.batch_idx, entry->e.fault_addr);
+
+ cr = &entry->cr;
+ seq_printf(s, "status %#x result %#x fault_info %#x bytes_completed %u\n"
+ "fault addr %#llx inv flags %#x\n\n",
+ cr->status, cr->result, cr->fault_info, cr->bytes_completed,
+ cr->fault_addr, cr->invalid_flags);
+
+ raw = (u64 *)entry;
+
+ for (i = 0; i < evl_strides; i++)
+ seq_printf(s, "entry[%d] = %#llx\n", i, raw[i]);
+
+ seq_puts(s, "\n");
+ *count += 1;
+}
+
+static int debugfs_evl_show(struct seq_file *s, void *d)
+{
+ struct idxd_device *idxd = s->private;
+ struct idxd_evl *evl = idxd->evl;
+ union evl_status_reg evl_status;
+ u16 h, t, evl_size, i;
+ int count = 0;
+ bool processed = true;
+
+ if (!evl || !evl->log)
+ return 0;
+
+ spin_lock(&evl->lock);
+
+ h = evl->head;
+ evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
+ t = evl_status.tail;
+ evl_size = evl->size;
+
+ seq_printf(s, "Event Log head %u tail %u interrupt pending %u\n\n",
+ evl_status.head, evl_status.tail, evl_status.int_pending);
+
+ i = t;
+ while (1) {
+ i = (i + 1) % evl_size;
+ if (i == t)
+ break;
+
+ if (processed && i == h)
+ processed = false;
+ dump_event_entry(idxd, s, i, &count, processed);
+ }
+
+ spin_unlock(&evl->lock);
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(debugfs_evl);
+
+int idxd_device_init_debugfs(struct idxd_device *idxd)
+{
+ if (IS_ERR_OR_NULL(idxd_debugfs_dir))
+ return 0;
+
+ idxd->dbgfs_dir = debugfs_create_dir(dev_name(idxd_confdev(idxd)), idxd_debugfs_dir);
+ if (IS_ERR(idxd->dbgfs_dir))
+ return PTR_ERR(idxd->dbgfs_dir);
+
+ if (idxd->evl) {
+ idxd->dbgfs_evl_file = debugfs_create_file("event_log", 0400,
+ idxd->dbgfs_dir, idxd,
+ &debugfs_evl_fops);
+ if (IS_ERR(idxd->dbgfs_evl_file)) {
+ debugfs_remove_recursive(idxd->dbgfs_dir);
+ idxd->dbgfs_dir = NULL;
+ return PTR_ERR(idxd->dbgfs_evl_file);
+ }
+ }
+
+ return 0;
+}
+
+void idxd_device_remove_debugfs(struct idxd_device *idxd)
+{
+ debugfs_remove_recursive(idxd->dbgfs_dir);
+}
+
+int idxd_init_debugfs(void)
+{
+ if (!debugfs_initialized())
+ return 0;
+
+ idxd_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
+ if (IS_ERR(idxd_debugfs_dir))
+ return PTR_ERR(idxd_debugfs_dir);
+ return 0;
+}
+
+void idxd_remove_debugfs(void)
+{
+ debugfs_remove_recursive(idxd_debugfs_dir);
+}
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 6fca8fa8d3a8..5abbcc61c528 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -752,6 +752,101 @@ void idxd_device_clear_state(struct idxd_device *idxd)
spin_unlock(&idxd->dev_lock);
}
+static int idxd_device_evl_setup(struct idxd_device *idxd)
+{
+ union gencfg_reg gencfg;
+ union evlcfg_reg evlcfg;
+ union genctrl_reg genctrl;
+ struct device *dev = &idxd->pdev->dev;
+ void *addr;
+ dma_addr_t dma_addr;
+ int size;
+ struct idxd_evl *evl = idxd->evl;
+ unsigned long *bmap;
+ int rc;
+
+ if (!evl)
+ return 0;
+
+ size = evl_size(idxd);
+
+ bmap = bitmap_zalloc(size, GFP_KERNEL);
+ if (!bmap) {
+ rc = -ENOMEM;
+ goto err_bmap;
+ }
+
+ /*
+ * Address needs to be page aligned. However, dma_alloc_coherent() provides
+ * at minimal page size aligned address. No manual alignment required.
+ */
+ addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL);
+ if (!addr) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
+
+ memset(addr, 0, size);
+
+ spin_lock(&evl->lock);
+ evl->log = addr;
+ evl->dma = dma_addr;
+ evl->log_size = size;
+ evl->bmap = bmap;
+
+ memset(&evlcfg, 0, sizeof(evlcfg));
+ evlcfg.bits[0] = dma_addr & GENMASK(63, 12);
+ evlcfg.size = evl->size;
+
+ iowrite64(evlcfg.bits[0], idxd->reg_base + IDXD_EVLCFG_OFFSET);
+ iowrite64(evlcfg.bits[1], idxd->reg_base + IDXD_EVLCFG_OFFSET + 8);
+
+ genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+ genctrl.evl_int_en = 1;
+ iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+
+ gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ gencfg.evl_en = 1;
+ iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
+
+ spin_unlock(&evl->lock);
+ return 0;
+
+err_alloc:
+ bitmap_free(bmap);
+err_bmap:
+ return rc;
+}
+
+static void idxd_device_evl_free(struct idxd_device *idxd)
+{
+ union gencfg_reg gencfg;
+ union genctrl_reg genctrl;
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_evl *evl = idxd->evl;
+
+ gencfg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ if (!gencfg.evl_en)
+ return;
+
+ spin_lock(&evl->lock);
+ gencfg.evl_en = 0;
+ iowrite32(gencfg.bits, idxd->reg_base + IDXD_GENCFG_OFFSET);
+
+ genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
+ genctrl.evl_int_en = 0;
+ iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
+
+ iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET);
+ iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8);
+
+ dma_free_coherent(dev, evl->log_size, evl->log, evl->dma);
+ bitmap_free(evl->bmap);
+ evl->log = NULL;
+ evl->size = IDXD_EVL_SIZE_MIN;
+ spin_unlock(&evl->lock);
+}
+
static void idxd_group_config_write(struct idxd_group *group)
{
struct idxd_device *idxd = group->idxd;
@@ -872,12 +967,16 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
wq->wqcfg->priority = wq->priority;
if (idxd->hw.gen_cap.block_on_fault &&
- test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags))
+ test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags) &&
+ !test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags))
wq->wqcfg->bof = 1;
if (idxd->hw.wq_cap.wq_ats_support)
wq->wqcfg->wq_ats_disable = test_bit(WQ_FLAG_ATS_DISABLE, &wq->flags);
+ if (idxd->hw.wq_cap.wq_prs_support)
+ wq->wqcfg->wq_prs_disable = test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags);
+
/* bytes 12-15 */
wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes);
idxd_wqcfg_set_max_batch_shift(idxd->data->type, wq->wqcfg, ilog2(wq->max_batch_size));
@@ -1451,15 +1550,24 @@ int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
if (rc < 0)
return -ENXIO;
+ rc = idxd_device_evl_setup(idxd);
+ if (rc < 0) {
+ idxd->cmd_status = IDXD_SCMD_DEV_EVL_ERR;
+ return rc;
+ }
+
/* Start device */
rc = idxd_device_enable(idxd);
- if (rc < 0)
+ if (rc < 0) {
+ idxd_device_evl_free(idxd);
return rc;
+ }
/* Setup DMA device without channels */
rc = idxd_register_dma_device(idxd);
if (rc < 0) {
idxd_device_disable(idxd);
+ idxd_device_evl_free(idxd);
idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR;
return rc;
}
@@ -1488,6 +1596,7 @@ void idxd_device_drv_remove(struct idxd_dev *idxd_dev)
idxd_device_disable(idxd);
if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
idxd_device_reset(idxd);
+ idxd_device_evl_free(idxd);
}
static enum idxd_dev_type dev_types[] = {
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index dd2a6ed8949b..5428a2e1b1ec 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -32,6 +32,7 @@ enum idxd_dev_type {
IDXD_DEV_GROUP,
IDXD_DEV_ENGINE,
IDXD_DEV_CDEV,
+ IDXD_DEV_CDEV_FILE,
IDXD_DEV_MAX_TYPE,
};
@@ -127,6 +128,12 @@ struct idxd_pmu {
#define IDXD_MAX_PRIORITY 0xf
+enum {
+ COUNTER_FAULTS = 0,
+ COUNTER_FAULT_FAILS,
+ COUNTER_MAX
+};
+
enum idxd_wq_state {
IDXD_WQ_DISABLED = 0,
IDXD_WQ_ENABLED,
@@ -136,6 +143,7 @@ enum idxd_wq_flag {
WQ_FLAG_DEDICATED = 0,
WQ_FLAG_BLOCK_ON_FAULT,
WQ_FLAG_ATS_DISABLE,
+ WQ_FLAG_PRS_DISABLE,
};
enum idxd_wq_type {
@@ -185,6 +193,7 @@ struct idxd_wq {
struct idxd_dev idxd_dev;
struct idxd_cdev *idxd_cdev;
struct wait_queue_head err_queue;
+ struct workqueue_struct *wq;
struct idxd_device *idxd;
int id;
struct idxd_irq_entry ie;
@@ -214,6 +223,10 @@ struct idxd_wq {
char name[WQ_NAME_SIZE + 1];
u64 max_xfer_bytes;
u32 max_batch_size;
+
+ /* Lock to protect upasid_xa access. */
+ struct mutex uc_lock;
+ struct xarray upasid_xa;
};
struct idxd_engine {
@@ -232,6 +245,7 @@ struct idxd_hw {
union engine_cap_reg engine_cap;
struct opcap opcap;
u32 cmd_cap;
+ union iaa_cap_reg iaa_cap;
};
enum idxd_device_state {
@@ -258,6 +272,32 @@ struct idxd_driver_data {
struct device_type *dev_type;
int compl_size;
int align;
+ int evl_cr_off;
+ int cr_status_off;
+ int cr_result_off;
+};
+
+struct idxd_evl {
+ /* Lock to protect event log access. */
+ spinlock_t lock;
+ void *log;
+ dma_addr_t dma;
+ /* Total size of event log = number of entries * entry size. */
+ unsigned int log_size;
+ /* The number of entries in the event log. */
+ u16 size;
+ u16 head;
+ unsigned long *bmap;
+ bool batch_fail[IDXD_MAX_BATCH_IDENT];
+};
+
+struct idxd_evl_fault {
+ struct work_struct work;
+ struct idxd_wq *wq;
+ u8 status;
+
+ /* make this last member always */
+ struct __evl_entry entry[];
};
struct idxd_device {
@@ -316,8 +356,24 @@ struct idxd_device {
struct idxd_pmu *idxd_pmu;
unsigned long *opcap_bmap;
+ struct idxd_evl *evl;
+ struct kmem_cache *evl_cache;
+
+ struct dentry *dbgfs_dir;
+ struct dentry *dbgfs_evl_file;
};
+static inline unsigned int evl_ent_size(struct idxd_device *idxd)
+{
+ return idxd->hw.gen_cap.evl_support ?
+ (32 * (1 << idxd->hw.gen_cap.evl_support)) : 0;
+}
+
+static inline unsigned int evl_size(struct idxd_device *idxd)
+{
+ return idxd->evl->size * evl_ent_size(idxd);
+}
+
/* IDXD software descriptor */
struct idxd_desc {
union {
@@ -351,6 +407,7 @@ enum idxd_completion_status {
#define engine_confdev(engine) &engine->idxd_dev.conf_dev
#define group_confdev(group) &group->idxd_dev.conf_dev
#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev
+#define user_ctx_dev(ctx) (&(ctx)->idxd_dev.conf_dev)
#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev)
#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev)
@@ -598,6 +655,7 @@ int idxd_register_driver(void);
void idxd_unregister_driver(void);
void idxd_wqs_quiesce(struct idxd_device *idxd);
bool idxd_queue_int_handle_resubmit(struct idxd_desc *desc);
+void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count);
/* device interrupt control */
irqreturn_t idxd_misc_thread(int vec, void *data);
@@ -662,6 +720,9 @@ void idxd_cdev_remove(void);
int idxd_cdev_get_major(struct idxd_device *idxd);
int idxd_wq_add_cdev(struct idxd_wq *wq);
void idxd_wq_del_cdev(struct idxd_wq *wq);
+int idxd_copy_cr(struct idxd_wq *wq, ioasid_t pasid, unsigned long addr,
+ void *buf, int len);
+void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index);
/* perfmon */
#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON)
@@ -678,4 +739,10 @@ static inline void perfmon_init(void) {}
static inline void perfmon_exit(void) {}
#endif
+/* debugfs */
+int idxd_device_init_debugfs(struct idxd_device *idxd);
+void idxd_device_remove_debugfs(struct idxd_device *idxd);
+int idxd_init_debugfs(void);
+void idxd_remove_debugfs(void);
+
#endif
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 99985123001b..1aa823974cda 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -9,7 +9,6 @@
#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/workqueue.h>
-#include <linux/aer.h>
#include <linux/fs.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/device.h>
@@ -47,6 +46,9 @@ static struct idxd_driver_data idxd_driver_data[] = {
.compl_size = sizeof(struct dsa_completion_record),
.align = 32,
.dev_type = &dsa_device_type,
+ .evl_cr_off = offsetof(struct dsa_evl_entry, cr),
+ .cr_status_off = offsetof(struct dsa_completion_record, status),
+ .cr_result_off = offsetof(struct dsa_completion_record, result),
},
[IDXD_TYPE_IAX] = {
.name_prefix = "iax",
@@ -54,6 +56,9 @@ static struct idxd_driver_data idxd_driver_data[] = {
.compl_size = sizeof(struct iax_completion_record),
.align = 64,
.dev_type = &iax_device_type,
+ .evl_cr_off = offsetof(struct iax_evl_entry, cr),
+ .cr_status_off = offsetof(struct iax_completion_record, status),
+ .cr_result_off = offsetof(struct iax_completion_record, error_code),
},
};
@@ -200,6 +205,8 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
}
bitmap_copy(wq->opcap_bmap, idxd->opcap_bmap, IDXD_MAX_OPCAP_BITS);
}
+ mutex_init(&wq->uc_lock);
+ xa_init(&wq->upasid_xa);
idxd->wqs[i] = wq;
}
@@ -332,6 +339,33 @@ static void idxd_cleanup_internals(struct idxd_device *idxd)
destroy_workqueue(idxd->wq);
}
+static int idxd_init_evl(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_evl *evl;
+
+ if (idxd->hw.gen_cap.evl_support == 0)
+ return 0;
+
+ evl = kzalloc_node(sizeof(*evl), GFP_KERNEL, dev_to_node(dev));
+ if (!evl)
+ return -ENOMEM;
+
+ spin_lock_init(&evl->lock);
+ evl->size = IDXD_EVL_SIZE_MIN;
+
+ idxd->evl_cache = kmem_cache_create(dev_name(idxd_confdev(idxd)),
+ sizeof(struct idxd_evl_fault) + evl_ent_size(idxd),
+ 0, 0, NULL);
+ if (!idxd->evl_cache) {
+ kfree(evl);
+ return -ENOMEM;
+ }
+
+ idxd->evl = evl;
+ return 0;
+}
+
static int idxd_setup_internals(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
@@ -357,8 +391,14 @@ static int idxd_setup_internals(struct idxd_device *idxd)
goto err_wkq_create;
}
+ rc = idxd_init_evl(idxd);
+ if (rc < 0)
+ goto err_evl;
+
return 0;
+ err_evl:
+ destroy_workqueue(idxd->wq);
err_wkq_create:
for (i = 0; i < idxd->max_groups; i++)
put_device(group_confdev(idxd->groups[i]));
@@ -389,7 +429,7 @@ static void idxd_read_table_offsets(struct idxd_device *idxd)
dev_dbg(dev, "IDXD Perfmon Offset: %#x\n", idxd->perfmon_offset);
}
-static void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count)
+void multi_u64_to_bmap(unsigned long *bmap, u64 *val, int count)
{
int i, j, nr;
@@ -461,6 +501,10 @@ static void idxd_read_caps(struct idxd_device *idxd)
dev_dbg(dev, "opcap[%d]: %#llx\n", i, idxd->hw.opcap.bits[i]);
}
multi_u64_to_bmap(idxd->opcap_bmap, &idxd->hw.opcap.bits[0], 4);
+
+ /* read iaa cap */
+ if (idxd->data->type == IDXD_TYPE_IAX && idxd->hw.version >= DEVICE_VERSION_2)
+ idxd->hw.iaa_cap.bits = ioread64(idxd->reg_base + IDXD_IAACAP_OFFSET);
}
static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
@@ -661,6 +705,10 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
goto err_dev_register;
}
+ rc = idxd_device_init_debugfs(idxd);
+ if (rc)
+ dev_warn(dev, "IDXD debugfs failed to setup\n");
+
dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
idxd->hw.version);
@@ -723,6 +771,7 @@ static void idxd_remove(struct pci_dev *pdev)
idxd_shutdown(pdev);
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
+ idxd_device_remove_debugfs(idxd);
irq_entry = idxd_get_ie(idxd, 0);
free_irq(irq_entry->vector, irq_entry);
@@ -780,6 +829,10 @@ static int __init idxd_init_module(void)
if (err)
goto err_cdev_register;
+ err = idxd_init_debugfs();
+ if (err)
+ goto err_debugfs;
+
err = pci_register_driver(&idxd_pci_driver);
if (err)
goto err_pci_register;
@@ -787,6 +840,8 @@ static int __init idxd_init_module(void)
return 0;
err_pci_register:
+ idxd_remove_debugfs();
+err_debugfs:
idxd_cdev_remove();
err_cdev_register:
idxd_driver_unregister(&idxd_user_drv);
@@ -807,5 +862,6 @@ static void __exit idxd_exit_module(void)
pci_unregister_driver(&idxd_pci_driver);
idxd_cdev_remove();
perfmon_exit();
+ idxd_remove_debugfs();
}
module_exit(idxd_exit_module);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 242f1f0b9f09..b501320a9c7a 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -7,6 +7,8 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
+#include <linux/iommu.h>
+#include <linux/sched/mm.h>
#include <uapi/linux/idxd.h>
#include "../dmaengine.h"
#include "idxd.h"
@@ -217,13 +219,187 @@ static void idxd_int_handle_revoke(struct work_struct *work)
kfree(revoke);
}
-static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
+static void idxd_evl_fault_work(struct work_struct *work)
{
+ struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work);
+ struct idxd_wq *wq = fault->wq;
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_evl *evl = idxd->evl;
+ struct __evl_entry *entry_head = fault->entry;
+ void *cr = (void *)entry_head + idxd->data->evl_cr_off;
+ int cr_size = idxd->data->compl_size;
+ u8 *status = (u8 *)cr + idxd->data->cr_status_off;
+ u8 *result = (u8 *)cr + idxd->data->cr_result_off;
+ int copied, copy_size;
+ bool *bf;
+
+ switch (fault->status) {
+ case DSA_COMP_CRA_XLAT:
+ if (entry_head->batch && entry_head->first_err_in_batch)
+ evl->batch_fail[entry_head->batch_id] = false;
+
+ copy_size = cr_size;
+ idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS);
+ break;
+ case DSA_COMP_BATCH_EVL_ERR:
+ bf = &evl->batch_fail[entry_head->batch_id];
+
+ copy_size = entry_head->rcr || *bf ? cr_size : 0;
+ if (*bf) {
+ if (*status == DSA_COMP_SUCCESS)
+ *status = DSA_COMP_BATCH_FAIL;
+ *result = 1;
+ *bf = false;
+ }
+ idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULTS);
+ break;
+ case DSA_COMP_DRAIN_EVL:
+ copy_size = cr_size;
+ break;
+ default:
+ copy_size = 0;
+ dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n", fault->status);
+ break;
+ }
+
+ if (copy_size == 0)
+ return;
+
+ /*
+ * Copy completion record to fault_addr in user address space
+ * that is found by wq and PASID.
+ */
+ copied = idxd_copy_cr(wq, entry_head->pasid, entry_head->fault_addr,
+ cr, copy_size);
+ /*
+ * The task that triggered the page fault is unknown currently
+ * because multiple threads may share the user address
+ * space or the task exits already before this fault.
+ * So if the copy fails, SIGSEGV can not be sent to the task.
+ * Just print an error for the failure. The user application
+ * waiting for the completion record will time out on this
+ * failure.
+ */
+ switch (fault->status) {
+ case DSA_COMP_CRA_XLAT:
+ if (copied != copy_size) {
+ idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS);
+ dev_dbg_ratelimited(dev, "Failed to write to completion record: (%d:%d)\n",
+ copy_size, copied);
+ if (entry_head->batch)
+ evl->batch_fail[entry_head->batch_id] = true;
+ }
+ break;
+ case DSA_COMP_BATCH_EVL_ERR:
+ if (copied != copy_size) {
+ idxd_user_counter_increment(wq, entry_head->pasid, COUNTER_FAULT_FAILS);
+ dev_dbg_ratelimited(dev, "Failed to write to batch completion record: (%d:%d)\n",
+ copy_size, copied);
+ }
+ break;
+ case DSA_COMP_DRAIN_EVL:
+ if (copied != copy_size)
+ dev_dbg_ratelimited(dev, "Failed to write to drain completion record: (%d:%d)\n",
+ copy_size, copied);
+ break;
+ }
+
+ kmem_cache_free(idxd->evl_cache, fault);
+}
+
+static void process_evl_entry(struct idxd_device *idxd,
+ struct __evl_entry *entry_head, unsigned int index)
+{
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_evl *evl = idxd->evl;
+ u8 status;
+
+ if (test_bit(index, evl->bmap)) {
+ clear_bit(index, evl->bmap);
+ } else {
+ status = DSA_COMP_STATUS(entry_head->error);
+
+ if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL ||
+ status == DSA_COMP_BATCH_EVL_ERR) {
+ struct idxd_evl_fault *fault;
+ int ent_size = evl_ent_size(idxd);
+
+ if (entry_head->rci)
+ dev_dbg(dev, "Completion Int Req set, ignoring!\n");
+
+ if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL)
+ return;
+
+ fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC);
+ if (fault) {
+ struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx];
+
+ fault->wq = wq;
+ fault->status = status;
+ memcpy(&fault->entry, entry_head, ent_size);
+ INIT_WORK(&fault->work, idxd_evl_fault_work);
+ queue_work(wq->wq, &fault->work);
+ } else {
+ dev_warn(dev, "Failed to service fault work.\n");
+ }
+ } else {
+ dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n",
+ status, entry_head->operation,
+ entry_head->fault_addr);
+ }
+ }
+}
+
+static void process_evl_entries(struct idxd_device *idxd)
+{
+ union evl_status_reg evl_status;
+ unsigned int h, t;
+ struct idxd_evl *evl = idxd->evl;
+ struct __evl_entry *entry_head;
+ unsigned int ent_size = evl_ent_size(idxd);
+ u32 size;
+
+ evl_status.bits = 0;
+ evl_status.int_pending = 1;
+
+ spin_lock(&evl->lock);
+ /* Clear interrupt pending bit */
+ iowrite32(evl_status.bits_upper32,
+ idxd->reg_base + IDXD_EVLSTATUS_OFFSET + sizeof(u32));
+ h = evl->head;
+ evl_status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
+ t = evl_status.tail;
+ size = idxd->evl->size;
+
+ while (h != t) {
+ entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
+ process_evl_entry(idxd, entry_head, h);
+ h = (h + 1) % size;
+ }
+
+ evl->head = h;
+ evl_status.head = h;
+ iowrite32(evl_status.bits_lower32, idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
+ spin_unlock(&evl->lock);
+}
+
+irqreturn_t idxd_misc_thread(int vec, void *data)
+{
+ struct idxd_irq_entry *irq_entry = data;
+ struct idxd_device *idxd = ie_to_idxd(irq_entry);
struct device *dev = &idxd->pdev->dev;
union gensts_reg gensts;
u32 val = 0;
int i;
bool err = false;
+ u32 cause;
+
+ cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
+ if (!cause)
+ return IRQ_NONE;
+
+ iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
if (cause & IDXD_INTC_HALT_STATE)
goto halt;
@@ -295,13 +471,18 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
perfmon_counter_overflow(idxd);
}
+ if (cause & IDXD_INTC_EVL) {
+ val |= IDXD_INTC_EVL;
+ process_evl_entries(idxd);
+ }
+
val ^= cause;
if (val)
dev_warn_once(dev, "Unexpected interrupt cause bits set: %#x\n",
val);
if (!err)
- return 0;
+ goto out;
halt:
gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
@@ -324,33 +505,10 @@ halt:
"idxd halted, need %s.\n",
gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
"FLR" : "system reset");
- return -ENXIO;
}
}
- return 0;
-}
-
-irqreturn_t idxd_misc_thread(int vec, void *data)
-{
- struct idxd_irq_entry *irq_entry = data;
- struct idxd_device *idxd = ie_to_idxd(irq_entry);
- int rc;
- u32 cause;
-
- cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
- if (cause)
- iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
-
- while (cause) {
- rc = process_misc_interrupts(idxd, cause);
- if (rc < 0)
- break;
- cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
- if (cause)
- iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
- }
-
+out:
return IRQ_HANDLED;
}
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index fe3b8d04f9db..7b54a3939ea1 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -3,6 +3,8 @@
#ifndef _IDXD_REGISTERS_H_
#define _IDXD_REGISTERS_H_
+#include <uapi/linux/idxd.h>
+
/* PCI Config */
#define PCI_DEVICE_ID_INTEL_DSA_SPR0 0x0b25
#define PCI_DEVICE_ID_INTEL_IAX_SPR0 0x0cfe
@@ -31,7 +33,9 @@ union gen_cap_reg {
u64 rsvd:3;
u64 dest_readback:1;
u64 drain_readback:1;
- u64 rsvd2:6;
+ u64 rsvd2:3;
+ u64 evl_support:2;
+ u64 batch_continuation:1;
u64 max_xfer_shift:5;
u64 max_batch_shift:4;
u64 max_ims_mult:6;
@@ -55,7 +59,8 @@ union wq_cap_reg {
u64 occupancy:1;
u64 occupancy_int:1;
u64 op_config:1;
- u64 rsvd3:9;
+ u64 wq_prs_support:1;
+ u64 rsvd4:8;
};
u64 bits;
} __packed;
@@ -117,7 +122,8 @@ union gencfg_reg {
u32 rdbuf_limit:8;
u32 rsvd:4;
u32 user_int_en:1;
- u32 rsvd2:19;
+ u32 evl_en:1;
+ u32 rsvd2:18;
};
u32 bits;
} __packed;
@@ -127,7 +133,8 @@ union genctrl_reg {
struct {
u32 softerr_int_en:1;
u32 halt_int_en:1;
- u32 rsvd:30;
+ u32 evl_int_en:1;
+ u32 rsvd:29;
};
u32 bits;
} __packed;
@@ -162,6 +169,7 @@ enum idxd_device_reset_type {
#define IDXD_INTC_OCCUPY 0x04
#define IDXD_INTC_PERFMON_OVFL 0x08
#define IDXD_INTC_HALT_STATE 0x10
+#define IDXD_INTC_EVL 0x20
#define IDXD_INTC_INT_HANDLE_REVOKED 0x80000000
#define IDXD_CMD_OFFSET 0xa0
@@ -276,6 +284,45 @@ union sw_err_reg {
u64 bits[4];
} __packed;
+union iaa_cap_reg {
+ struct {
+ u64 dec_aecs_format_ver:1;
+ u64 drop_init_bits:1;
+ u64 chaining:1;
+ u64 force_array_output_mod:1;
+ u64 load_part_aecs:1;
+ u64 comp_early_abort:1;
+ u64 nested_comp:1;
+ u64 diction_comp:1;
+ u64 header_gen:1;
+ u64 crypto_gcm:1;
+ u64 crypto_cfb:1;
+ u64 crypto_xts:1;
+ u64 rsvd:52;
+ };
+ u64 bits;
+} __packed;
+
+#define IDXD_IAACAP_OFFSET 0x180
+
+#define IDXD_EVLCFG_OFFSET 0xe0
+union evlcfg_reg {
+ struct {
+ u64 pasid_en:1;
+ u64 priv:1;
+ u64 rsvd:10;
+ u64 base_addr:52;
+
+ u64 size:16;
+ u64 pasid:20;
+ u64 rsvd2:28;
+ };
+ u64 bits[2];
+} __packed;
+
+#define IDXD_EVL_SIZE_MIN 0x0040
+#define IDXD_EVL_SIZE_MAX 0xffff
+
union msix_perm {
struct {
u32 rsvd:2;
@@ -325,7 +372,7 @@ union wqcfg {
u32 mode:1; /* shared or dedicated */
u32 bof:1; /* block on fault */
u32 wq_ats_disable:1;
- u32 rsvd2:1;
+ u32 wq_prs_disable:1;
u32 priority:4;
u32 pasid:20;
u32 pasid_en:1;
@@ -513,4 +560,73 @@ union filter_cfg {
u64 val;
} __packed;
+#define IDXD_EVLSTATUS_OFFSET 0xf0
+
+union evl_status_reg {
+ struct {
+ u32 head:16;
+ u32 rsvd:16;
+ u32 tail:16;
+ u32 rsvd2:14;
+ u32 int_pending:1;
+ u32 rsvd3:1;
+ };
+ struct {
+ u32 bits_lower32;
+ u32 bits_upper32;
+ };
+ u64 bits;
+} __packed;
+
+#define IDXD_MAX_BATCH_IDENT 256
+
+struct __evl_entry {
+ u64 rsvd:2;
+ u64 desc_valid:1;
+ u64 wq_idx_valid:1;
+ u64 batch:1;
+ u64 fault_rw:1;
+ u64 priv:1;
+ u64 err_info_valid:1;
+ u64 error:8;
+ u64 wq_idx:8;
+ u64 batch_id:8;
+ u64 operation:8;
+ u64 pasid:20;
+ u64 rsvd2:4;
+
+ u16 batch_idx;
+ u16 rsvd3;
+ union {
+ /* Invalid Flags 0x11 */
+ u32 invalid_flags;
+ /* Invalid Int Handle 0x19 */
+ /* Page fault 0x1a */
+ /* Page fault 0x06, 0x1f, only operand_id */
+ /* Page fault before drain or in batch, 0x26, 0x27 */
+ struct {
+ u16 int_handle;
+ u16 rci:1;
+ u16 ims:1;
+ u16 rcr:1;
+ u16 first_err_in_batch:1;
+ u16 rsvd4_2:9;
+ u16 operand_id:3;
+ };
+ };
+ u64 fault_addr;
+ u64 rsvd5;
+} __packed;
+
+struct dsa_evl_entry {
+ struct __evl_entry e;
+ struct dsa_completion_record cr;
+} __packed;
+
+struct iax_evl_entry {
+ struct __evl_entry e;
+ u64 rsvd[4];
+ struct iax_completion_record cr;
+} __packed;
+
#endif
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 18cd8151dee0..293739ac5596 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -822,10 +822,14 @@ static ssize_t wq_block_on_fault_store(struct device *dev,
if (rc < 0)
return rc;
- if (bof)
+ if (bof) {
+ if (test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags))
+ return -EOPNOTSUPP;
+
set_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
- else
+ } else {
clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+ }
return count;
}
@@ -1109,6 +1113,44 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute
static struct device_attribute dev_attr_wq_ats_disable =
__ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store);
+static ssize_t wq_prs_disable_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+
+ return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_PRS_DISABLE, &wq->flags));
+}
+
+static ssize_t wq_prs_disable_store(struct device *dev, struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_wq *wq = confdev_to_wq(dev);
+ struct idxd_device *idxd = wq->idxd;
+ bool prs_dis;
+ int rc;
+
+ if (wq->state != IDXD_WQ_DISABLED)
+ return -EPERM;
+
+ if (!idxd->hw.wq_cap.wq_prs_support)
+ return -EOPNOTSUPP;
+
+ rc = kstrtobool(buf, &prs_dis);
+ if (rc < 0)
+ return rc;
+
+ if (prs_dis) {
+ set_bit(WQ_FLAG_PRS_DISABLE, &wq->flags);
+ /* when PRS is disabled, BOF needs to be off as well */
+ clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
+ } else {
+ clear_bit(WQ_FLAG_PRS_DISABLE, &wq->flags);
+ }
+ return count;
+}
+
+static struct device_attribute dev_attr_wq_prs_disable =
+ __ATTR(prs_disable, 0644, wq_prs_disable_show, wq_prs_disable_store);
+
static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf)
{
struct idxd_wq *wq = confdev_to_wq(dev);
@@ -1239,6 +1281,7 @@ static struct attribute *idxd_wq_attributes[] = {
&dev_attr_wq_max_transfer_size.attr,
&dev_attr_wq_max_batch_size.attr,
&dev_attr_wq_ats_disable.attr,
+ &dev_attr_wq_prs_disable.attr,
&dev_attr_wq_occupancy.attr,
&dev_attr_wq_enqcmds_retries.attr,
&dev_attr_wq_op_config.attr,
@@ -1260,6 +1303,13 @@ static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr,
idxd->data->type == IDXD_TYPE_IAX;
}
+static bool idxd_wq_attr_wq_prs_disable_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ return attr == &dev_attr_wq_prs_disable.attr &&
+ !idxd->hw.wq_cap.wq_prs_support;
+}
+
static umode_t idxd_wq_attr_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
@@ -1273,6 +1323,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj,
if (idxd_wq_attr_max_batch_size_invisible(attr, idxd))
return 0;
+ if (idxd_wq_attr_wq_prs_disable_invisible(attr, idxd))
+ return 0;
+
return attr->mode;
}
@@ -1292,6 +1345,7 @@ static void idxd_conf_wq_release(struct device *dev)
bitmap_free(wq->opcap_bmap);
kfree(wq->wqcfg);
+ xa_destroy(&wq->upasid_xa);
kfree(wq);
}
@@ -1452,15 +1506,13 @@ static ssize_t errors_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct idxd_device *idxd = confdev_to_idxd(dev);
- int i, out = 0;
+ DECLARE_BITMAP(swerr_bmap, 256);
+ bitmap_zero(swerr_bmap, 256);
spin_lock(&idxd->dev_lock);
- for (i = 0; i < 4; i++)
- out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]);
+ multi_u64_to_bmap(swerr_bmap, &idxd->sw_err.bits[0], 4);
spin_unlock(&idxd->dev_lock);
- out--;
- out += sysfs_emit_at(buf, out, "\n");
- return out;
+ return sysfs_emit(buf, "%*pb\n", 256, swerr_bmap);
}
static DEVICE_ATTR_RO(errors);
@@ -1563,6 +1615,59 @@ static ssize_t cmd_status_store(struct device *dev, struct device_attribute *att
}
static DEVICE_ATTR_RW(cmd_status);
+static ssize_t iaa_cap_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ if (idxd->hw.version < DEVICE_VERSION_2)
+ return -EOPNOTSUPP;
+
+ return sysfs_emit(buf, "%#llx\n", idxd->hw.iaa_cap.bits);
+}
+static DEVICE_ATTR_RO(iaa_cap);
+
+static ssize_t event_log_size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+
+ if (!idxd->evl)
+ return -EOPNOTSUPP;
+
+ return sysfs_emit(buf, "%u\n", idxd->evl->size);
+}
+
+static ssize_t event_log_size_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct idxd_device *idxd = confdev_to_idxd(dev);
+ unsigned long val;
+ int rc;
+
+ if (!idxd->evl)
+ return -EOPNOTSUPP;
+
+ rc = kstrtoul(buf, 10, &val);
+ if (rc < 0)
+ return -EINVAL;
+
+ if (idxd->state == IDXD_DEV_ENABLED)
+ return -EPERM;
+
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ return -EPERM;
+
+ if (val < IDXD_EVL_SIZE_MIN || val > IDXD_EVL_SIZE_MAX ||
+ (val * evl_ent_size(idxd) > ULONG_MAX - idxd->evl->dma))
+ return -EINVAL;
+
+ idxd->evl->size = val;
+ return count;
+}
+static DEVICE_ATTR_RW(event_log_size);
+
static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr,
struct idxd_device *idxd)
{
@@ -1585,6 +1690,21 @@ static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr,
idxd->data->type == IDXD_TYPE_IAX;
}
+static bool idxd_device_attr_iaa_cap_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ return attr == &dev_attr_iaa_cap.attr &&
+ (idxd->data->type != IDXD_TYPE_IAX ||
+ idxd->hw.version < DEVICE_VERSION_2);
+}
+
+static bool idxd_device_attr_event_log_size_invisible(struct attribute *attr,
+ struct idxd_device *idxd)
+{
+ return (attr == &dev_attr_event_log_size.attr &&
+ !idxd->hw.gen_cap.evl_support);
+}
+
static umode_t idxd_device_attr_visible(struct kobject *kobj,
struct attribute *attr, int n)
{
@@ -1597,6 +1717,12 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj,
if (idxd_device_attr_read_buffers_invisible(attr, idxd))
return 0;
+ if (idxd_device_attr_iaa_cap_invisible(attr, idxd))
+ return 0;
+
+ if (idxd_device_attr_event_log_size_invisible(attr, idxd))
+ return 0;
+
return attr->mode;
}
@@ -1622,6 +1748,8 @@ static struct attribute *idxd_device_attributes[] = {
&dev_attr_read_buffer_limit.attr,
&dev_attr_cdev_major.attr,
&dev_attr_cmd_status.attr,
+ &dev_attr_iaa_cap.attr,
+ &dev_attr_event_log_size.attr,
NULL,
};
@@ -1643,6 +1771,8 @@ static void idxd_conf_device_release(struct device *dev)
bitmap_free(idxd->wq_enable_map);
kfree(idxd->wqs);
kfree(idxd->engines);
+ kfree(idxd->evl);
+ kmem_cache_destroy(idxd->evl_cache);
ida_free(&idxd_ida, idxd->id);
bitmap_free(idxd->opcap_bmap);
kfree(idxd);