summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 20:15:43 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2022-12-14 20:15:43 +0300
commit08cdc2157966c07d3f986a097ddaa74cee312751 (patch)
treedad2562768b49876c642c2505813e90a467ae40a /lib
parentaa5ad10f6cca6d42f3fef6cb862e03b220ea19a6 (diff)
parentd6c55c0a20e5059abdde81713ddf6324a946eb3c (diff)
downloadlinux-08cdc2157966c07d3f986a097ddaa74cee312751.tar.xz
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd implementation from Jason Gunthorpe: "iommufd is the user API to control the IOMMU subsystem as it relates to managing IO page tables that point at user space memory. It takes over from drivers/vfio/vfio_iommu_type1.c (aka the VFIO container) which is the VFIO specific interface for a similar idea. We see a broad need for extended features, some being highly IOMMU device specific: - Binding iommu_domain's to PASID/SSID - Userspace IO page tables, for ARM, x86 and S390 - Kernel bypassed invalidation of user page tables - Re-use of the KVM page table in the IOMMU - Dirty page tracking in the IOMMU - Runtime Increase/Decrease of IOPTE size - PRI support with faults resolved in userspace Many of these HW features exist to support VM use cases - for instance the combination of PASID, PRI and Userspace IO Page Tables allows an implementation of DMA Shared Virtual Addressing (vSVA) within a guest. Dirty tracking enables VM live migration with SRIOV devices and PASID support allow creating "scalable IOV" devices, among other things. As these features are fundamental to a VM platform they need to be uniformly exposed to all the driver families that do DMA into VMs, which is currently VFIO and VDPA" For more background, see the extended explanations in Jason's pull request: https://lore.kernel.org/lkml/Y5dzTU8dlmXTbzoJ@nvidia.com/ * tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (62 commits) iommufd: Change the order of MSI setup iommufd: Improve a few unclear bits of code iommufd: Fix comment typos vfio: Move vfio group specific code into group.c vfio: Refactor dma APIs for emulated devices vfio: Wrap vfio group module init/clean code into helpers vfio: Refactor vfio_device open and close vfio: Make vfio_device_open() truly device specific vfio: Swap order of vfio_device_container_register() and open_device() vfio: Set device->group in helper function vfio: Create wrappers for group register/unregister vfio: Move the sanity check of the group to vfio_create_group() vfio: Simplify vfio_create_group() iommufd: Allow iommufd to supply /dev/vfio/vfio vfio: Make vfio_container optionally compiled vfio: Move container related MODULE_ALIAS statements into container.c vfio-iommufd: Support iommufd for emulated VFIO devices vfio-iommufd: Support iommufd for physical VFIO devices vfio-iommufd: Allow iommufd to be used in place of a container fd vfio: Use IOMMU_CAP_ENFORCE_CACHE_COHERENCY for vfio_file_enforced_coherent() ...
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig4
-rw-r--r--lib/interval_tree.c132
2 files changed, 136 insertions, 0 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 48e021846472..ce2abffb9ed8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -480,6 +480,10 @@ config INTERVAL_TREE
for more information.
+config INTERVAL_TREE_SPAN_ITER
+ bool
+ depends on INTERVAL_TREE
+
config XARRAY_MULTI
bool
help
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index 593ce56ece50..3412737ff365 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -15,3 +15,135 @@ EXPORT_SYMBOL_GPL(interval_tree_insert);
EXPORT_SYMBOL_GPL(interval_tree_remove);
EXPORT_SYMBOL_GPL(interval_tree_iter_first);
EXPORT_SYMBOL_GPL(interval_tree_iter_next);
+
+#ifdef CONFIG_INTERVAL_TREE_SPAN_ITER
+/*
+ * Roll nodes[1] into nodes[0] by advancing nodes[1] to the end of a contiguous
+ * span of nodes. This makes nodes[0]->last the end of that contiguous used span
+ * indexes that started at the original nodes[1]->start. nodes[1] is now the
+ * first node starting the next used span. A hole span is between nodes[0]->last
+ * and nodes[1]->start. nodes[1] must be !NULL.
+ */
+static void
+interval_tree_span_iter_next_gap(struct interval_tree_span_iter *state)
+{
+ struct interval_tree_node *cur = state->nodes[1];
+
+ state->nodes[0] = cur;
+ do {
+ if (cur->last > state->nodes[0]->last)
+ state->nodes[0] = cur;
+ cur = interval_tree_iter_next(cur, state->first_index,
+ state->last_index);
+ } while (cur && (state->nodes[0]->last >= cur->start ||
+ state->nodes[0]->last + 1 == cur->start));
+ state->nodes[1] = cur;
+}
+
+void interval_tree_span_iter_first(struct interval_tree_span_iter *iter,
+ struct rb_root_cached *itree,
+ unsigned long first_index,
+ unsigned long last_index)
+{
+ iter->first_index = first_index;
+ iter->last_index = last_index;
+ iter->nodes[0] = NULL;
+ iter->nodes[1] =
+ interval_tree_iter_first(itree, first_index, last_index);
+ if (!iter->nodes[1]) {
+ /* No nodes intersect the span, whole span is hole */
+ iter->start_hole = first_index;
+ iter->last_hole = last_index;
+ iter->is_hole = 1;
+ return;
+ }
+ if (iter->nodes[1]->start > first_index) {
+ /* Leading hole on first iteration */
+ iter->start_hole = first_index;
+ iter->last_hole = iter->nodes[1]->start - 1;
+ iter->is_hole = 1;
+ interval_tree_span_iter_next_gap(iter);
+ return;
+ }
+
+ /* Starting inside a used */
+ iter->start_used = first_index;
+ iter->is_hole = 0;
+ interval_tree_span_iter_next_gap(iter);
+ iter->last_used = iter->nodes[0]->last;
+ if (iter->last_used >= last_index) {
+ iter->last_used = last_index;
+ iter->nodes[0] = NULL;
+ iter->nodes[1] = NULL;
+ }
+}
+EXPORT_SYMBOL_GPL(interval_tree_span_iter_first);
+
+void interval_tree_span_iter_next(struct interval_tree_span_iter *iter)
+{
+ if (!iter->nodes[0] && !iter->nodes[1]) {
+ iter->is_hole = -1;
+ return;
+ }
+
+ if (iter->is_hole) {
+ iter->start_used = iter->last_hole + 1;
+ iter->last_used = iter->nodes[0]->last;
+ if (iter->last_used >= iter->last_index) {
+ iter->last_used = iter->last_index;
+ iter->nodes[0] = NULL;
+ iter->nodes[1] = NULL;
+ }
+ iter->is_hole = 0;
+ return;
+ }
+
+ if (!iter->nodes[1]) {
+ /* Trailing hole */
+ iter->start_hole = iter->nodes[0]->last + 1;
+ iter->last_hole = iter->last_index;
+ iter->nodes[0] = NULL;
+ iter->is_hole = 1;
+ return;
+ }
+
+ /* must have both nodes[0] and [1], interior hole */
+ iter->start_hole = iter->nodes[0]->last + 1;
+ iter->last_hole = iter->nodes[1]->start - 1;
+ iter->is_hole = 1;
+ interval_tree_span_iter_next_gap(iter);
+}
+EXPORT_SYMBOL_GPL(interval_tree_span_iter_next);
+
+/*
+ * Advance the iterator index to a specific position. The returned used/hole is
+ * updated to start at new_index. This is faster than calling
+ * interval_tree_span_iter_first() as it can avoid full searches in several
+ * cases where the iterator is already set.
+ */
+void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter,
+ struct rb_root_cached *itree,
+ unsigned long new_index)
+{
+ if (iter->is_hole == -1)
+ return;
+
+ iter->first_index = new_index;
+ if (new_index > iter->last_index) {
+ iter->is_hole = -1;
+ return;
+ }
+
+ /* Rely on the union aliasing hole/used */
+ if (iter->start_hole <= new_index && new_index <= iter->last_hole) {
+ iter->start_hole = new_index;
+ return;
+ }
+ if (new_index == iter->last_hole + 1)
+ interval_tree_span_iter_next(iter);
+ else
+ interval_tree_span_iter_first(iter, itree, new_index,
+ iter->last_index);
+}
+EXPORT_SYMBOL_GPL(interval_tree_span_iter_advance);
+#endif