From 74cf5b169c4c197a8c66feaf1a98bb009fa306f8 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 16 Aug 2018 15:50:56 +0800 Subject: virtio-balloon: remove BUG() in init_vqs It's a bit overkill to use BUG when failing to add an entry to the stats_vq in init_vqs. So remove it and just return the error to the caller to bail out nicely. Signed-off-by: Wei Wang Cc: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 3988c0914322..8100e77b1b0b 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -455,9 +455,13 @@ static int init_vqs(struct virtio_balloon *vb) num_stats = update_balloon_stats(vb); sg_init_one(&sg, vb->stats, sizeof(vb->stats[0]) * num_stats); - if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL) - < 0) - BUG(); + err = virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, + GFP_KERNEL); + if (err) { + dev_warn(&vb->vdev->dev, "%s: add stat_vq failed\n", + __func__); + return err; + } virtqueue_kick(vb->stats_vq); } return 0; -- cgit v1.2.3 From c51d8fca0ce368c707fd090e0324f3ba40931d0a Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 16 Aug 2018 15:50:57 +0800 Subject: virtio-balloon: kzalloc the vb struct Zero all the vb fields at alloaction, so that we don't need to zero-initialize each field one by one later. Signed-off-by: Wei Wang Cc: Michael S. Tsirkin Cc: Tetsuo Handa Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 8100e77b1b0b..d97d73c73303 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -561,7 +561,7 @@ static int virtballoon_probe(struct virtio_device *vdev) return -EINVAL; } - vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL); + vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL); if (!vb) { err = -ENOMEM; goto out; @@ -570,8 +570,6 @@ static int virtballoon_probe(struct virtio_device *vdev) INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func); INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func); spin_lock_init(&vb->stop_update_lock); - vb->stop_update = false; - vb->num_pages = 0; mutex_init(&vb->balloon_lock); init_waitqueue_head(&vb->acked); vb->vdev = vdev; @@ -602,7 +600,6 @@ static int virtballoon_probe(struct virtio_device *vdev) err = PTR_ERR(vb->vb_dev_info.inode); kern_unmount(balloon_mnt); unregister_oom_notifier(&vb->nb); - vb->vb_dev_info.inode = NULL; goto out_del_vqs; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; -- cgit v1.2.3 From 71994620bb25a8b109388fefa9e99a28e355255a Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 16 Aug 2018 15:50:58 +0800 Subject: virtio_balloon: replace oom notifier with shrinker The OOM notifier is getting deprecated to use for the reasons: - As a callout from the oom context, it is too subtle and easy to generate bugs and corner cases which are hard to track; - It is called too late (after the reclaiming has been performed). Drivers with large amuont of reclaimable memory is expected to release them at an early stage of memory pressure; - The notifier callback isn't aware of oom contrains; Link: https://lkml.org/lkml/2018/7/12/314 This patch replaces the virtio-balloon oom notifier with a shrinker to release balloon pages on memory pressure. The balloon pages are given back to mm adaptively by returning the number of pages that the reclaimer is asking for (i.e. sc->nr_to_scan). Currently the max possible value of sc->nr_to_scan passed to the balloon shrinker is SHRINK_BATCH, which is 128. This is smaller than the limitation that only VIRTIO_BALLOON_ARRAY_PFNS_MAX (256) pages can be returned via one invocation of leak_balloon. But this patch still considers the case that SHRINK_BATCH or shrinker->batch could be changed to a value larger than VIRTIO_BALLOON_ARRAY_PFNS_MAX, which will need to do multiple invocations of leak_balloon. Historically, the feature VIRTIO_BALLOON_F_DEFLATE_ON_OOM has been used to release balloon pages on OOM. We continue to use this feature bit for the shrinker, so the shrinker is only registered when this feature bit has been negotiated with host. Signed-off-by: Wei Wang Cc: Michael S. Tsirkin Cc: Michal Hocko Cc: Andrew Morton Cc: Tetsuo Handa Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_balloon.c | 110 +++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 51 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index d97d73c73303..d1c1f6283729 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include @@ -40,13 +39,8 @@ */ #define VIRTIO_BALLOON_PAGES_PER_PAGE (unsigned)(PAGE_SIZE >> VIRTIO_BALLOON_PFN_SHIFT) #define VIRTIO_BALLOON_ARRAY_PFNS_MAX 256 -#define OOM_VBALLOON_DEFAULT_PAGES 256 #define VIRTBALLOON_OOM_NOTIFY_PRIORITY 80 -static int oom_pages = OOM_VBALLOON_DEFAULT_PAGES; -module_param(oom_pages, int, S_IRUSR | S_IWUSR); -MODULE_PARM_DESC(oom_pages, "pages to free on OOM"); - #ifdef CONFIG_BALLOON_COMPACTION static struct vfsmount *balloon_mnt; #endif @@ -86,8 +80,8 @@ struct virtio_balloon { /* Memory statistics */ struct virtio_balloon_stat stats[VIRTIO_BALLOON_S_NR]; - /* To register callback in oom notifier call chain */ - struct notifier_block nb; + /* To register a shrinker to shrink memory upon memory pressure */ + struct shrinker shrinker; }; static struct virtio_device_id id_table[] = { @@ -365,38 +359,6 @@ static void update_balloon_size(struct virtio_balloon *vb) &actual); } -/* - * virtballoon_oom_notify - release pages when system is under severe - * memory pressure (called from out_of_memory()) - * @self : notifier block struct - * @dummy: not used - * @parm : returned - number of freed pages - * - * The balancing of memory by use of the virtio balloon should not cause - * the termination of processes while there are pages in the balloon. - * If virtio balloon manages to release some memory, it will make the - * system return and retry the allocation that forced the OOM killer - * to run. - */ -static int virtballoon_oom_notify(struct notifier_block *self, - unsigned long dummy, void *parm) -{ - struct virtio_balloon *vb; - unsigned long *freed; - unsigned num_freed_pages; - - vb = container_of(self, struct virtio_balloon, nb); - if (!virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) - return NOTIFY_OK; - - freed = parm; - num_freed_pages = leak_balloon(vb, oom_pages); - update_balloon_size(vb); - *freed += num_freed_pages; - - return NOTIFY_OK; -} - static void update_balloon_stats_func(struct work_struct *work) { struct virtio_balloon *vb; @@ -550,6 +512,52 @@ static struct file_system_type balloon_fs = { #endif /* CONFIG_BALLOON_COMPACTION */ +static unsigned long virtio_balloon_shrinker_scan(struct shrinker *shrinker, + struct shrink_control *sc) +{ + unsigned long pages_to_free, pages_freed = 0; + struct virtio_balloon *vb = container_of(shrinker, + struct virtio_balloon, shrinker); + + pages_to_free = sc->nr_to_scan * VIRTIO_BALLOON_PAGES_PER_PAGE; + + /* + * One invocation of leak_balloon can deflate at most + * VIRTIO_BALLOON_ARRAY_PFNS_MAX balloon pages, so we call it + * multiple times to deflate pages till reaching pages_to_free. + */ + while (vb->num_pages && pages_to_free) { + pages_to_free -= pages_freed; + pages_freed += leak_balloon(vb, pages_to_free); + } + update_balloon_size(vb); + + return pages_freed / VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static unsigned long virtio_balloon_shrinker_count(struct shrinker *shrinker, + struct shrink_control *sc) +{ + struct virtio_balloon *vb = container_of(shrinker, + struct virtio_balloon, shrinker); + + return vb->num_pages / VIRTIO_BALLOON_PAGES_PER_PAGE; +} + +static void virtio_balloon_unregister_shrinker(struct virtio_balloon *vb) +{ + unregister_shrinker(&vb->shrinker); +} + +static int virtio_balloon_register_shrinker(struct virtio_balloon *vb) +{ + vb->shrinker.scan_objects = virtio_balloon_shrinker_scan; + vb->shrinker.count_objects = virtio_balloon_shrinker_count; + vb->shrinker.seeks = DEFAULT_SEEKS; + + return register_shrinker(&vb->shrinker); +} + static int virtballoon_probe(struct virtio_device *vdev) { struct virtio_balloon *vb; @@ -580,17 +588,10 @@ static int virtballoon_probe(struct virtio_device *vdev) if (err) goto out_free_vb; - vb->nb.notifier_call = virtballoon_oom_notify; - vb->nb.priority = VIRTBALLOON_OOM_NOTIFY_PRIORITY; - err = register_oom_notifier(&vb->nb); - if (err < 0) - goto out_del_vqs; - #ifdef CONFIG_BALLOON_COMPACTION balloon_mnt = kern_mount(&balloon_fs); if (IS_ERR(balloon_mnt)) { err = PTR_ERR(balloon_mnt); - unregister_oom_notifier(&vb->nb); goto out_del_vqs; } @@ -599,12 +600,19 @@ static int virtballoon_probe(struct virtio_device *vdev) if (IS_ERR(vb->vb_dev_info.inode)) { err = PTR_ERR(vb->vb_dev_info.inode); kern_unmount(balloon_mnt); - unregister_oom_notifier(&vb->nb); goto out_del_vqs; } vb->vb_dev_info.inode->i_mapping->a_ops = &balloon_aops; #endif - + /* + * We continue to use VIRTIO_BALLOON_F_DEFLATE_ON_OOM to decide if a + * shrinker needs to be registered to relieve memory pressure. + */ + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) { + err = virtio_balloon_register_shrinker(vb); + if (err) + goto out_del_vqs; + } virtio_device_ready(vdev); if (towards_target(vb)) @@ -636,8 +644,8 @@ static void virtballoon_remove(struct virtio_device *vdev) { struct virtio_balloon *vb = vdev->priv; - unregister_oom_notifier(&vb->nb); - + if (virtio_has_feature(vb->vdev, VIRTIO_BALLOON_F_DEFLATE_ON_OOM)) + virtio_balloon_unregister_shrinker(vb); spin_lock_irq(&vb->stop_update_lock); vb->stop_update = true; spin_unlock_irq(&vb->stop_update_lock); -- cgit v1.2.3 From 3fc92a96c2b7efdb7fc4f856713e2ab7439757f6 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 18 Jul 2018 10:18:44 +0100 Subject: virtio: mmio-v1: Validate queue PFN virtio-mmio with virtio-v1 uses a 32bit PFN for the queue. If the queue pfn is too large to fit in 32bits, which we could hit on arm64 systems with 52bit physical addresses (even with 64K page size), we simply miss out a proper link to the other side of the queue. Add a check to validate the PFN, rather than silently breaking the devices. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Marc Zyngier Cc: Christoffer Dall Cc: Peter Maydel Cc: Jean-Philippe Brucker Signed-off-by: Suzuki K Poulose Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mmio.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c index 67763d3c7abf..4cd9ea5c75be 100644 --- a/drivers/virtio/virtio_mmio.c +++ b/drivers/virtio/virtio_mmio.c @@ -397,9 +397,23 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, /* Activate the queue */ writel(virtqueue_get_vring_size(vq), vm_dev->base + VIRTIO_MMIO_QUEUE_NUM); if (vm_dev->version == 1) { + u64 q_pfn = virtqueue_get_desc_addr(vq) >> PAGE_SHIFT; + + /* + * virtio-mmio v1 uses a 32bit QUEUE PFN. If we have something + * that doesn't fit in 32bit, fail the setup rather than + * pretending to be successful. + */ + if (q_pfn >> 32) { + dev_err(&vdev->dev, + "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n", + 0x1ULL << (32 + PAGE_SHIFT - 30)); + err = -E2BIG; + goto error_bad_pfn; + } + writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN); - writel(virtqueue_get_desc_addr(vq) >> PAGE_SHIFT, - vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); + writel(q_pfn, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); } else { u64 addr; @@ -430,6 +444,8 @@ static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index, return vq; +error_bad_pfn: + vring_del_virtqueue(vq); error_new_virtqueue: if (vm_dev->version == 1) { writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN); -- cgit v1.2.3 From 69599206ea9a3f8f2e94d46580579cbf9d08ad6c Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Wed, 18 Jul 2018 10:18:45 +0100 Subject: virtio: pci-legacy: Validate queue pfn Legacy PCI over virtio uses a 32bit PFN for the queue. If the queue pfn is too large to fit in 32bits, which we could hit on arm64 systems with 52bit physical addresses (even with 64K page size), we simply miss out a proper link to the other side of the queue. Add a check to validate the PFN, rather than silently breaking the devices. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Marc Zyngier Cc: Christoffer Dall Cc: Peter Maydel Cc: Jean-Philippe Brucker Signed-off-by: Suzuki K Poulose Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_pci_legacy.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'drivers/virtio') diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c index 2780886e8ba3..de062fb201bc 100644 --- a/drivers/virtio/virtio_pci_legacy.c +++ b/drivers/virtio/virtio_pci_legacy.c @@ -122,6 +122,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, struct virtqueue *vq; u16 num; int err; + u64 q_pfn; /* Select the queue we're interested in */ iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL); @@ -141,9 +142,17 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (!vq) return ERR_PTR(-ENOMEM); + q_pfn = virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + if (q_pfn >> 32) { + dev_err(&vp_dev->pci_dev->dev, + "platform bug: legacy virtio-mmio must not be used with RAM above 0x%llxGB\n", + 0x1ULL << (32 + PAGE_SHIFT - 30)); + err = -E2BIG; + goto out_del_vq; + } + /* activate the queue */ - iowrite32(virtqueue_get_desc_addr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT, - vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); + iowrite32(q_pfn, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY; @@ -160,6 +169,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, out_deactivate: iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN); +out_del_vq: vring_del_virtqueue(vq); return ERR_PTR(err); } -- cgit v1.2.3