From db5247d9bf5c6ade9fd70b4e4897441e0269b233 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Fri, 15 Mar 2024 19:47:06 -0500 Subject: vhost_task: Handle SIGKILL by flushing work and exiting Instead of lingering until the device is closed, this has us handle SIGKILL by: 1. marking the worker as killed so we no longer try to use it with new virtqueues and new flush operations. 2. setting the virtqueue to worker mapping so no new works are queued. 3. running all the exiting works. Suggested-by: Edward Adam Davis Reported-and-tested-by: syzbot+98edc2df894917b3431f@syzkaller.appspotmail.com Message-Id: Signed-off-by: Mike Christie Message-Id: <20240316004707.45557-9-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin --- include/linux/sched/vhost_task.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched/vhost_task.h b/include/linux/sched/vhost_task.h index bc60243d43b3..25446c5d3508 100644 --- a/include/linux/sched/vhost_task.h +++ b/include/linux/sched/vhost_task.h @@ -4,7 +4,8 @@ struct vhost_task; -struct vhost_task *vhost_task_create(bool (*fn)(void *), void *arg, +struct vhost_task *vhost_task_create(bool (*fn)(void *), + void (*handle_kill)(void *), void *arg, const char *name); void vhost_task_start(struct vhost_task *vtsk); void vhost_task_stop(struct vhost_task *vtsk); -- cgit v1.2.3 From e4544c550eb1857845bb7114bac53edd04dc078f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 18 Mar 2024 13:06:45 +0100 Subject: virtio-mem: support suspend+resume With virtio-mem, primarily hibernation is problematic: as the machine shuts down, the virtio-mem device loses its state. Powering the machine back up is like losing a bunch of DIMMs. While there would be ways to add limited support, suspend+resume is more commonly used for VMs and "easier" to support cleanly. s2idle can be supported without any device dependencies. Similarly, one would expect suspend-to-ram (i.e., S3) to work out of the box. However, QEMU currently unplugs all device memory when resuming the VM, using a cold reset on the "wakeup" path. In order to support S3, we need a feature flag for the device to tell us if memory remains plugged when waking up. In the future, QEMU will implement this feature. So let's always support s2idle and support S3 with plugged memory only if the device indicates support. Block hibernation early using the PM notifier. Trying to hibernate now fails early: # echo disk > /sys/power/state [ 26.455369] PM: hibernation: hibernation entry [ 26.458271] virtio_mem virtio0: hibernation is not supported. [ 26.462498] PM: hibernation: hibernation exit -bash: echo: write error: Operation not permitted s2idle works even without the new feature bit: # echo s2idle > /sys/power/mem_sleep # echo mem > /sys/power/state [ 52.083725] PM: suspend entry (s2idle) [ 52.095950] Filesystems sync: 0.010 seconds [ 52.101493] Freezing user space processes [ 52.104213] Freezing user space processes completed (elapsed 0.001 seconds) [ 52.106520] OOM killer disabled. [ 52.107655] Freezing remaining freezable tasks [ 52.110880] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 52.113296] printk: Suspending console(s) (use no_console_suspend to debug) S3 does not work without the feature bit when memory is plugged: # echo deep > /sys/power/mem_sleep # echo mem > /sys/power/state [ 32.788281] PM: suspend entry (deep) [ 32.816630] Filesystems sync: 0.027 seconds [ 32.820029] Freezing user space processes [ 32.823870] Freezing user space processes completed (elapsed 0.001 seconds) [ 32.827756] OOM killer disabled. [ 32.829608] Freezing remaining freezable tasks [ 32.833842] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 32.837953] printk: Suspending console(s) (use no_console_suspend to debug) [ 32.916172] virtio_mem virtio0: suspend+resume with plugged memory is not supported [ 32.916181] virtio-pci 0000:00:02.0: PM: pci_pm_suspend(): virtio_pci_freeze+0x0/0x50 returns -1 [ 32.916197] virtio-pci 0000:00:02.0: PM: dpm_run_callback(): pci_pm_suspend+0x0/0x170 returns -1 [ 32.916210] virtio-pci 0000:00:02.0: PM: failed to suspend async: error -1 But S3 works with the new feature bit when memory is plugged (patched QEMU): # echo deep > /sys/power/mem_sleep # echo mem > /sys/power/state [ 33.983694] PM: suspend entry (deep) [ 34.009828] Filesystems sync: 0.024 seconds [ 34.013589] Freezing user space processes [ 34.016722] Freezing user space processes completed (elapsed 0.001 seconds) [ 34.019092] OOM killer disabled. [ 34.020291] Freezing remaining freezable tasks [ 34.023549] Freezing remaining freezable tasks completed (elapsed 0.001 seconds) [ 34.026090] printk: Suspending console(s) (use no_console_suspend to debug) Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Xuan Zhuo Signed-off-by: David Hildenbrand Message-Id: <20240318120645.105664-1-david@redhat.com> Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 68 +++++++++++++++++++++++++++++++++++++---- include/uapi/linux/virtio_mem.h | 2 ++ 2 files changed, 64 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 8e3223294442..51088d02de32 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -252,6 +253,9 @@ struct virtio_mem { /* Memory notifier (online/offline events). */ struct notifier_block memory_notifier; + /* Notifier to block hibernation image storing/reloading. */ + struct notifier_block pm_notifier; + #ifdef CONFIG_PROC_VMCORE /* vmcore callback for /proc/vmcore handling in kdump mode */ struct vmcore_cb vmcore_cb; @@ -1111,6 +1115,25 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, return rc; } +static int virtio_mem_pm_notifier_cb(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct virtio_mem *vm = container_of(nb, struct virtio_mem, + pm_notifier); + switch (action) { + case PM_HIBERNATION_PREPARE: + case PM_RESTORE_PREPARE: + /* + * When restarting the VM, all memory is unplugged. Don't + * allow to hibernate and restore from an image. + */ + dev_err(&vm->vdev->dev, "hibernation is not supported.\n"); + return NOTIFY_BAD; + default: + return NOTIFY_OK; + } +} + /* * Set a range of pages PG_offline. Remember pages that were never onlined * (via generic_online_page()) using PageDirty(). @@ -2615,11 +2638,19 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) rc = register_memory_notifier(&vm->memory_notifier); if (rc) goto out_unreg_group; - rc = register_virtio_mem_device(vm); + /* Block hibernation as early as possible. */ + vm->pm_notifier.priority = INT_MAX; + vm->pm_notifier.notifier_call = virtio_mem_pm_notifier_cb; + rc = register_pm_notifier(&vm->pm_notifier); if (rc) goto out_unreg_mem; + rc = register_virtio_mem_device(vm); + if (rc) + goto out_unreg_pm; return 0; +out_unreg_pm: + unregister_pm_notifier(&vm->pm_notifier); out_unreg_mem: unregister_memory_notifier(&vm->memory_notifier); out_unreg_group: @@ -2897,6 +2928,7 @@ static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) /* unregister callbacks */ unregister_virtio_mem_device(vm); + unregister_pm_notifier(&vm->pm_notifier); unregister_memory_notifier(&vm->memory_notifier); /* @@ -2960,17 +2992,40 @@ static void virtio_mem_config_changed(struct virtio_device *vdev) #ifdef CONFIG_PM_SLEEP static int virtio_mem_freeze(struct virtio_device *vdev) { + struct virtio_mem *vm = vdev->priv; + /* - * When restarting the VM, all memory is usually unplugged. Don't - * allow to suspend/hibernate. + * We block hibernation using the PM notifier completely. The workqueue + * is already frozen by the PM core at this point, so we simply + * reset the device and cleanup the queues. */ - dev_err(&vdev->dev, "save/restore not supported.\n"); - return -EPERM; + if (pm_suspend_target_state != PM_SUSPEND_TO_IDLE && + vm->plugged_size && + !virtio_has_feature(vm->vdev, VIRTIO_MEM_F_PERSISTENT_SUSPEND)) { + dev_err(&vm->vdev->dev, + "suspending with plugged memory is not supported\n"); + return -EPERM; + } + + virtio_reset_device(vdev); + vdev->config->del_vqs(vdev); + vm->vq = NULL; + return 0; } static int virtio_mem_restore(struct virtio_device *vdev) { - return -EPERM; + struct virtio_mem *vm = vdev->priv; + int ret; + + ret = virtio_mem_init_vq(vm); + if (ret) + return ret; + virtio_device_ready(vdev); + + /* Let's check if anything changed. */ + virtio_mem_config_changed(vdev); + return 0; } #endif @@ -2979,6 +3034,7 @@ static unsigned int virtio_mem_features[] = { VIRTIO_MEM_F_ACPI_PXM, #endif VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE, + VIRTIO_MEM_F_PERSISTENT_SUSPEND, }; static const struct virtio_device_id virtio_mem_id_table[] = { diff --git a/include/uapi/linux/virtio_mem.h b/include/uapi/linux/virtio_mem.h index e9122f1d0e0c..6e4b2cf6b7f1 100644 --- a/include/uapi/linux/virtio_mem.h +++ b/include/uapi/linux/virtio_mem.h @@ -90,6 +90,8 @@ #define VIRTIO_MEM_F_ACPI_PXM 0 /* unplugged memory must not be accessed */ #define VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE 1 +/* plugged memory will remain plugged when suspending+resuming */ +#define VIRTIO_MEM_F_PERSISTENT_SUSPEND 2 /* --- virtio-mem: guest -> host requests --- */ -- cgit v1.2.3