From 79208c57da5311860f165b613c89b3f647e357cd Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:29 -0800 Subject: Drivers: hv: hv_balloon: Make adjustments in computing the floor Make adjustments in computing the balloon floor. The current computation of the balloon floor was not appropriate for virtual machines with more than 10 GB of assigned memory - we would get into situations where the host would agressively balloon down the guest and leave the guest in an unusable state. This patch fixes the issue by raising the floor. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/hv/hv_balloon.c') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b958ded8ac7e..9cbbb831778a 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -928,9 +928,8 @@ static unsigned long compute_balloon_floor(void) * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) - * 8192 552 (1/32) - * 32768 1320 - * 131072 4392 + * 8192 768 (1/16) + * 32768 1536 (1/32) */ if (totalram_pages < MB2PAGES(128)) min_pages = MB2PAGES(8) + (totalram_pages >> 1); @@ -938,8 +937,10 @@ static unsigned long compute_balloon_floor(void) min_pages = MB2PAGES(40) + (totalram_pages >> 2); else if (totalram_pages < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (totalram_pages >> 3); + else if (totalram_pages < MB2PAGES(8192)) + min_pages = MB2PAGES(256) + (totalram_pages >> 4); else - min_pages = MB2PAGES(296) + (totalram_pages >> 5); + min_pages = MB2PAGES(512) + (totalram_pages >> 5); #undef MB2PAGES return min_pages; } -- cgit v1.2.3 From 22f88475b62ac826acae2f77c3e1bd9543e87b2a Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:30 -0800 Subject: Drivers: hv: hv_balloon: Fix a locking bug in the balloon driver We support memory hot-add in the Hyper-V balloon driver by hot adding an appropriately sized and aligned region and controlling the on-lining of pages within that region based on the pages that the host wants us to online. We do this because the granularity and alignment requirements in Linux are different from what Windows expects. The state to manage the onlining of pages needs to be correctly protected. Fix this bug. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 68 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 5 deletions(-) (limited to 'drivers/hv/hv_balloon.c') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 9cbbb831778a..8e30415b0eb7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -533,6 +533,9 @@ struct hv_dynmem_device { */ struct task_struct *thread; + struct mutex ha_region_mutex; + struct completion waiter_event; + /* * A list of hot-add regions. */ @@ -549,7 +552,59 @@ struct hv_dynmem_device { static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); + #ifdef CONFIG_MEMORY_HOTPLUG +static void acquire_region_mutex(bool trylock) +{ + if (trylock) { + reinit_completion(&dm_device.waiter_event); + while (!mutex_trylock(&dm_device.ha_region_mutex)) + wait_for_completion(&dm_device.waiter_event); + } else { + mutex_lock(&dm_device.ha_region_mutex); + } +} + +static void release_region_mutex(bool trylock) +{ + if (trylock) { + mutex_unlock(&dm_device.ha_region_mutex); + } else { + mutex_unlock(&dm_device.ha_region_mutex); + complete(&dm_device.waiter_event); + } +} + +static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, + void *v) +{ + switch (val) { + case MEM_GOING_ONLINE: + acquire_region_mutex(true); + break; + + case MEM_ONLINE: + case MEM_CANCEL_ONLINE: + release_region_mutex(true); + if (dm_device.ha_waiting) { + dm_device.ha_waiting = false; + complete(&dm_device.ol_waitevent); + } + break; + + case MEM_GOING_OFFLINE: + case MEM_OFFLINE: + case MEM_CANCEL_OFFLINE: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block hv_memory_nb = { + .notifier_call = hv_memory_notifier, + .priority = 0 +}; + static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size) { @@ -591,6 +646,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, init_completion(&dm_device.ol_waitevent); dm_device.ha_waiting = true; + release_region_mutex(false); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), (HA_CHUNK << PAGE_SHIFT)); @@ -619,6 +675,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, * have not been "onlined" within the allowed time. */ wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); + acquire_region_mutex(false); post_status(&dm_device); } @@ -632,11 +689,6 @@ static void hv_online_page(struct page *pg) unsigned long cur_start_pgp; unsigned long cur_end_pgp; - if (dm_device.ha_waiting) { - dm_device.ha_waiting = false; - complete(&dm_device.ol_waitevent); - } - list_for_each(cur, &dm_device.ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); cur_start_pgp = (unsigned long) @@ -834,6 +886,7 @@ static void hot_add_req(struct work_struct *dummy) resp.hdr.size = sizeof(struct dm_hot_add_response); #ifdef CONFIG_MEMORY_HOTPLUG + acquire_region_mutex(false); pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; @@ -865,6 +918,7 @@ static void hot_add_req(struct work_struct *dummy) if (do_hot_add) resp.page_count = process_hot_add(pg_start, pfn_cnt, rg_start, rg_sz); + release_region_mutex(false); #endif /* * The result field of the response structure has the @@ -1388,7 +1442,9 @@ static int balloon_probe(struct hv_device *dev, dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; init_completion(&dm_device.host_event); init_completion(&dm_device.config_event); + init_completion(&dm_device.waiter_event); INIT_LIST_HEAD(&dm_device.ha_region_list); + mutex_init(&dm_device.ha_region_mutex); INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); dm_device.host_specified_ha_region = false; @@ -1402,6 +1458,7 @@ static int balloon_probe(struct hv_device *dev, #ifdef CONFIG_MEMORY_HOTPLUG set_online_page_callback(&hv_online_page); + register_memory_notifier(&hv_memory_nb); #endif hv_set_drvdata(dev, &dm_device); @@ -1520,6 +1577,7 @@ static int balloon_remove(struct hv_device *dev) kfree(send_buffer); #ifdef CONFIG_MEMORY_HOTPLUG restore_online_page_callback(&hv_online_page); + unregister_memory_notifier(&hv_memory_nb); #endif list_for_each_safe(cur, tmp, &dm->ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); -- cgit v1.2.3 From ab3de22bb4a3d4bda2d0ec8bebcb76a40f1cbf9b Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:31 -0800 Subject: Drivers: hv: hv_balloon: Don't post pressure status from interrupt context We currently release memory (balloon down) in the interrupt context and we also post memory status while releasing memory. Rather than posting the status in the interrupt context, wakeup the status posting thread to post the status. This will address the inconsistent lock state that Sitsofe Wheeler reported: http://lkml.iu.edu/hypermail/linux/kernel/1411.1/00075.html Signed-off-by: K. Y. Srinivasan Reported-by: Sitsofe Wheeler Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/hv/hv_balloon.c') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 8e30415b0eb7..ff169386b2c7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1226,7 +1226,7 @@ static void balloon_down(struct hv_dynmem_device *dm, for (i = 0; i < range_count; i++) { free_balloon_pages(dm, &range_array[i]); - post_status(&dm_device); + complete(&dm_device.config_event); } if (req->more_pages == 1) @@ -1250,19 +1250,16 @@ static void balloon_onchannelcallback(void *context); static int dm_thread_func(void *dm_dev) { struct hv_dynmem_device *dm = dm_dev; - int t; while (!kthread_should_stop()) { - t = wait_for_completion_interruptible_timeout( + wait_for_completion_interruptible_timeout( &dm_device.config_event, 1*HZ); /* * The host expects us to post information on the memory * pressure every second. */ - - if (t == 0) - post_status(dm); - + reinit_completion(&dm_device.config_event); + post_status(dm); } return 0; -- cgit v1.2.3