From 413e8f014c8b848e4ce939156f210df59fbd1c24 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 20 Apr 2024 03:50:06 +0100 Subject: fuse: Convert fuse_readpages_end() to use folio_end_read() Nobody checks the error flag on fuse folios, so stop setting it. Optimise the (optional) setting of the uptodate flag and clearing of the lock flag by using folio_end_read(). Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Miklos Szeredi --- fs/fuse/file.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index b57ce4157640..f39456c65ed7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -935,14 +935,10 @@ static void fuse_readpages_end(struct fuse_mount *fm, struct fuse_args *args, } for (i = 0; i < ap->num_pages; i++) { - struct page *page = ap->pages[i]; + struct folio *folio = page_folio(ap->pages[i]); - if (!err) - SetPageUptodate(page); - else - SetPageError(page); - unlock_page(page); - put_page(page); + folio_end_read(folio, !err); + folio_put(folio); } if (ia->ff) fuse_file_put(ia->ff, false); -- cgit v1.2.3 From 9fe2a036a23ceeac402c4fde8ec37c02ab25f133 Mon Sep 17 00:00:00 2001 From: Richard Fung Date: Tue, 16 Apr 2024 00:16:39 +0000 Subject: fuse: Add initial support for fs-verity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds support for the FS_IOC_ENABLE_VERITY and FS_IOC_MEASURE_VERITY ioctls. The FS_IOC_READ_VERITY_METADATA is missing but from the documentation, "This is a fairly specialized use case, and most fs-verity users won’t need this ioctl." Signed-off-by: Richard Fung Acked-by: Eric Biggers Signed-off-by: Miklos Szeredi --- fs/fuse/ioctl.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) (limited to 'fs') diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c index 726640fa439e..572ce8a82ceb 100644 --- a/fs/fuse/ioctl.c +++ b/fs/fuse/ioctl.c @@ -8,6 +8,7 @@ #include #include #include +#include static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args, struct fuse_ioctl_out *outarg) @@ -117,6 +118,53 @@ static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, return 0; } +/* For fs-verity, determine iov lengths from input */ +static int fuse_setup_measure_verity(unsigned long arg, struct iovec *iov) +{ + __u16 digest_size; + struct fsverity_digest __user *uarg = (void __user *)arg; + + if (copy_from_user(&digest_size, &uarg->digest_size, sizeof(digest_size))) + return -EFAULT; + + if (digest_size > SIZE_MAX - sizeof(struct fsverity_digest)) + return -EINVAL; + + iov->iov_len = sizeof(struct fsverity_digest) + digest_size; + + return 0; +} + +static int fuse_setup_enable_verity(unsigned long arg, struct iovec *iov, + unsigned int *in_iovs) +{ + struct fsverity_enable_arg enable; + struct fsverity_enable_arg __user *uarg = (void __user *)arg; + const __u32 max_buffer_len = FUSE_MAX_MAX_PAGES * PAGE_SIZE; + + if (copy_from_user(&enable, uarg, sizeof(enable))) + return -EFAULT; + + if (enable.salt_size > max_buffer_len || enable.sig_size > max_buffer_len) + return -ENOMEM; + + if (enable.salt_size > 0) { + iov++; + (*in_iovs)++; + + iov->iov_base = u64_to_user_ptr(enable.salt_ptr); + iov->iov_len = enable.salt_size; + } + + if (enable.sig_size > 0) { + iov++; + (*in_iovs)++; + + iov->iov_base = u64_to_user_ptr(enable.sig_ptr); + iov->iov_len = enable.sig_size; + } + return 0; +} /* * For ioctls, there is no generic way to determine how much memory @@ -227,6 +275,18 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, out_iov = iov; out_iovs = 1; } + + err = 0; + switch (cmd) { + case FS_IOC_MEASURE_VERITY: + err = fuse_setup_measure_verity(arg, iov); + break; + case FS_IOC_ENABLE_VERITY: + err = fuse_setup_enable_verity(arg, iov, &in_iovs); + break; + } + if (err) + goto out; } retry: -- cgit v1.2.3 From 42815f8ac54c5113bf450ec4b7ccc5b62af0f6a7 Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 9 May 2024 20:21:53 +0800 Subject: fuse: set FR_PENDING atomically in fuse_resend() When fuse_resend() moves the requests from processing lists to pending list, it uses __set_bit() to set FR_PENDING bit in req->flags. Using __set_bit() is not safe, because other functions may update req->flags concurrently (e.g., request_wait_answer() may call set_bit(FR_INTERRUPTED, &flags)). Fix it by using set_bit() instead. Fixes: 760eac73f9f6 ("fuse: Introduce a new notification type for resend pending requests") Signed-off-by: Hou Tao Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3ec8bb5e68ff..8eb2ce7c0b01 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1813,7 +1813,7 @@ static void fuse_resend(struct fuse_conn *fc) spin_unlock(&fc->lock); list_for_each_entry_safe(req, next, &to_queue, list) { - __set_bit(FR_PENDING, &req->flags); + set_bit(FR_PENDING, &req->flags); /* mark the request as resend request */ req->in.h.unique |= FUSE_UNIQUE_RESEND; } -- cgit v1.2.3 From 246014876d782bbf2e652267482cd2e799fb5fcd Mon Sep 17 00:00:00 2001 From: Hou Tao Date: Thu, 9 May 2024 20:21:54 +0800 Subject: fuse: clear FR_SENT when re-adding requests into pending list The following warning was reported by lee bruce: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 8264 at fs/fuse/dev.c:300 fuse_request_end+0x685/0x7e0 fs/fuse/dev.c:300 Modules linked in: CPU: 0 PID: 8264 Comm: ab2 Not tainted 6.9.0-rc7 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996) RIP: 0010:fuse_request_end+0x685/0x7e0 fs/fuse/dev.c:300 ...... Call Trace: fuse_dev_do_read.constprop.0+0xd36/0x1dd0 fs/fuse/dev.c:1334 fuse_dev_read+0x166/0x200 fs/fuse/dev.c:1367 call_read_iter include/linux/fs.h:2104 [inline] new_sync_read fs/read_write.c:395 [inline] vfs_read+0x85b/0xba0 fs/read_write.c:476 ksys_read+0x12f/0x260 fs/read_write.c:619 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xce/0x260 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f ...... The warning is due to the FUSE_NOTIFY_RESEND notify sent by the write() syscall in the reproducer program and it happens as follows: (1) calls fuse_dev_read() to read the INIT request The read succeeds. During the read, bit FR_SENT will be set on the request. (2) calls fuse_dev_write() to send an USE_NOTIFY_RESEND notify The resend notify will resend all processing requests, so the INIT request is moved from processing list to pending list again. (3) calls fuse_dev_read() with an invalid output address fuse_dev_read() will try to copy the same INIT request to the output address, but it will fail due to the invalid address, so the INIT request is ended and triggers the warning in fuse_request_end(). Fix it by clearing FR_SENT when re-adding requests into pending list. Acked-by: Miklos Szeredi Reported-by: xingwei lee Reported-by: yue sun Closes: https://lore.kernel.org/linux-fsdevel/58f13e47-4765-fce4-daf4-dffcc5ae2330@huaweicloud.com/T/#m091614e5ea2af403b259e7cea6a49e51b9ee07a7 Fixes: 760eac73f9f6 ("fuse: Introduce a new notification type for resend pending requests") Signed-off-by: Hou Tao Signed-off-by: Miklos Szeredi --- fs/fuse/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8eb2ce7c0b01..9eb191b5c4de 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1814,6 +1814,7 @@ static void fuse_resend(struct fuse_conn *fc) list_for_each_entry_safe(req, next, &to_queue, list) { set_bit(FR_PENDING, &req->flags); + clear_bit(FR_SENT, &req->flags); /* mark the request as resend request */ req->in.h.unique |= FUSE_UNIQUE_RESEND; } -- cgit v1.2.3 From 103c2de111bf32f7c36a0ce8f638b114a37e0b76 Mon Sep 17 00:00:00 2001 From: Peter-Jan Gootzen Date: Wed, 1 May 2024 17:38:16 +0200 Subject: virtio-fs: limit number of request queues Virtio-fs devices might allocate significant resources to virtio queues such as CPU cores that busy poll on the queue. The device indicates how many request queues it can support and the driver should initialize the number of queues that they want to utilize. In this patch we limit the number of initialized request queues to the number of CPUs, to limit the resource consumption on the device-side and to prepare for the upcoming multi-queue patch. Signed-off-by: Peter-Jan Gootzen Signed-off-by: Yoray Zack Suggested-by: Max Gurtovoy Reviewed-by: Max Gurtovoy Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index bb3e941b9503..fa4c16abfe10 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -751,6 +751,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, if (fs->num_request_queues == 0) return -EINVAL; + /* Truncate nr of request queues to nr_cpu_id */ + fs->num_request_queues = min_t(unsigned int, fs->num_request_queues, + nr_cpu_ids); fs->nvqs = VQ_REQUEST + fs->num_request_queues; fs->vqs = kcalloc(fs->nvqs, sizeof(fs->vqs[VQ_HIPRIO]), GFP_KERNEL); if (!fs->vqs) -- cgit v1.2.3 From 529395d2ae6456c556405016ea0c43081fe607f3 Mon Sep 17 00:00:00 2001 From: Peter-Jan Gootzen Date: Wed, 1 May 2024 17:38:17 +0200 Subject: virtio-fs: add multi-queue support This commit creates a multi-queue mapping at device bring-up. The driver first attempts to use the existing MSI-X interrupt affinities (previously disabled), and if not present, will distribute the request queues evenly over the CPUs. If the latter fails as well, all CPUs are mapped to request queue zero. When a request is handed from FUSE to the virtio-fs device driver, the driver will use the current CPU to index into the multi-queue mapping and determine the optimal request queue to use. We measured the performance of this patch with the fio benchmarking tool, increasing the number of queues results in a significant speedup for both read and write operations, demonstrating the effectiveness of multi-queue support. Host: - Dell PowerEdge R760 - CPU: Intel(R) Xeon(R) Gold 6438M, 128 cores - VM: KVM with 32 cores Virtio-fs device: - BlueField-3 DPU - CPU: ARM Cortex-A78AE, 16 cores - One thread per queue, each busy polling on one request queue - Each queue is 1024 descriptors deep Workload: - fio, sequential read or write, ioengine=libaio, numjobs=32, 4GiB file per job, iodepth=8, bs=256KiB, runtime=30s Performance Results: +===========================+==========+===========+ | Number of queues | Fio read | Fio write | +===========================+==========+===========+ | 1 request queue (GiB/s) | 6.1 | 4.6 | +---------------------------+----------+-----------+ | 8 request queues (GiB/s) | 25.8 | 10.3 | +---------------------------+----------+-----------+ | 16 request queues (GiB/s) | 30.9 | 19.5 | +---------------------------+----------+-----------+ | 32 request queue (GiB/s) | 33.2 | 22.6 | +---------------------------+----------+-----------+ | Speedup | 5.5x | 5x | +---------------=-----------+----------+-----------+ Signed-off-by: Peter-Jan Gootzen Signed-off-by: Yoray Zack Signed-off-by: Max Gurtovoy Reviewed-by: Stefan Hajnoczi Acked-by: Michael S. Tsirkin Signed-off-by: Miklos Szeredi --- fs/fuse/virtio_fs.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 62 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index fa4c16abfe10..8ffa4f063a37 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -7,6 +7,8 @@ #include #include #include +#include +#include #include #include #include @@ -67,6 +69,8 @@ struct virtio_fs { unsigned int num_request_queues; /* number of request queues */ struct dax_device *dax_dev; + unsigned int *mq_map; /* index = cpu id, value = request vq id */ + /* DAX memory window where file contents are mapped */ void *window_kaddr; phys_addr_t window_phys_addr; @@ -185,6 +189,7 @@ static void virtio_fs_ktype_release(struct kobject *kobj) { struct virtio_fs *vfs = container_of(kobj, struct virtio_fs, kobj); + kfree(vfs->mq_map); kfree(vfs->vqs); kfree(vfs); } @@ -706,6 +711,44 @@ static void virtio_fs_requests_done_work(struct work_struct *work) } } +static void virtio_fs_map_queues(struct virtio_device *vdev, struct virtio_fs *fs) +{ + const struct cpumask *mask, *masks; + unsigned int q, cpu; + + /* First attempt to map using existing transport layer affinities + * e.g. PCIe MSI-X + */ + if (!vdev->config->get_vq_affinity) + goto fallback; + + for (q = 0; q < fs->num_request_queues; q++) { + mask = vdev->config->get_vq_affinity(vdev, VQ_REQUEST + q); + if (!mask) + goto fallback; + + for_each_cpu(cpu, mask) + fs->mq_map[cpu] = q; + } + + return; +fallback: + /* Attempt to map evenly in groups over the CPUs */ + masks = group_cpus_evenly(fs->num_request_queues); + /* If even this fails we default to all CPUs use queue zero */ + if (!masks) { + for_each_possible_cpu(cpu) + fs->mq_map[cpu] = 0; + return; + } + + for (q = 0; q < fs->num_request_queues; q++) { + for_each_cpu(cpu, &masks[q]) + fs->mq_map[cpu] = q; + } + kfree(masks); +} + /* Virtqueue interrupt handler */ static void virtio_fs_vq_done(struct virtqueue *vq) { @@ -742,6 +785,11 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, { struct virtqueue **vqs; vq_callback_t **callbacks; + /* Specify pre_vectors to ensure that the queues before the + * request queues (e.g. hiprio) don't claim any of the CPUs in + * the multi-queue mapping and interrupt affinities + */ + struct irq_affinity desc = { .pre_vectors = VQ_REQUEST }; const char **names; unsigned int i; int ret = 0; @@ -763,7 +811,9 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, callbacks = kmalloc_array(fs->nvqs, sizeof(callbacks[VQ_HIPRIO]), GFP_KERNEL); names = kmalloc_array(fs->nvqs, sizeof(names[VQ_HIPRIO]), GFP_KERNEL); - if (!vqs || !callbacks || !names) { + fs->mq_map = kcalloc_node(nr_cpu_ids, sizeof(*fs->mq_map), GFP_KERNEL, + dev_to_node(&vdev->dev)); + if (!vqs || !callbacks || !names || !fs->mq_map) { ret = -ENOMEM; goto out; } @@ -783,7 +833,7 @@ static int virtio_fs_setup_vqs(struct virtio_device *vdev, names[i] = fs->vqs[i].name; } - ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, NULL); + ret = virtio_find_vqs(vdev, fs->nvqs, vqs, callbacks, names, &desc); if (ret < 0) goto out; @@ -795,8 +845,10 @@ out: kfree(names); kfree(callbacks); kfree(vqs); - if (ret) + if (ret) { kfree(fs->vqs); + kfree(fs->mq_map); + } return ret; } @@ -942,7 +994,7 @@ static int virtio_fs_probe(struct virtio_device *vdev) if (ret < 0) goto out; - /* TODO vq affinity */ + virtio_fs_map_queues(vdev, fs); ret = virtio_fs_setup_dax(vdev, fs); if (ret < 0) @@ -1291,7 +1343,7 @@ out: static void virtio_fs_wake_pending_and_unlock(struct fuse_iqueue *fiq) __releases(fiq->lock) { - unsigned int queue_id = VQ_REQUEST; /* TODO multiqueue */ + unsigned int queue_id; struct virtio_fs *fs; struct fuse_req *req; struct virtio_fs_vq *fsvq; @@ -1305,11 +1357,13 @@ __releases(fiq->lock) spin_unlock(&fiq->lock); fs = fiq->priv; + queue_id = VQ_REQUEST + fs->mq_map[raw_smp_processor_id()]; - pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u\n", - __func__, req->in.h.opcode, req->in.h.unique, + pr_debug("%s: opcode %u unique %#llx nodeid %#llx in.len %u out.len %u queue_id %u\n", + __func__, req->in.h.opcode, req->in.h.unique, req->in.h.nodeid, req->in.h.len, - fuse_len_args(req->args->out_numargs, req->args->out_args)); + fuse_len_args(req->args->out_numargs, req->args->out_args), + queue_id); fsvq = &fs->vqs[queue_id]; ret = virtio_fs_enqueue_req(fsvq, req, false); -- cgit v1.2.3