From 9f8b577f7b43b2170628d6c537252785dcc2dcea Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 1 Mar 2022 15:11:35 +0100 Subject: Drivers: hv: vmbus: Deactivate sysctl_record_panic_msg by default in isolated guests hv_panic_page might contain guest-sensitive information, do not dump it over to Hyper-V by default in isolated guests. While at it, update some comments in hyperv_{panic,die}_event(). Reported-by: Dexuan Cui Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Dexuan Cui Link: https://lore.kernel.org/r/20220301141135.2232-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 12a2b37e87f3..a963b970ffb2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -77,8 +77,8 @@ static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE && hyperv_report_reg()) { @@ -100,8 +100,8 @@ static int hyperv_die_event(struct notifier_block *nb, unsigned long val, /* * Hyper-V should be notified only once about a panic. If we will be - * doing hyperv_report_panic_msg() later with kmsg data, don't do - * the notification here. + * doing hv_kmsg_dump() with kmsg data later, don't do the notification + * here. */ if (hyperv_report_reg()) hyperv_report_panic(regs, val, true); @@ -1546,14 +1546,20 @@ static int vmbus_bus_init(void) if (ret) goto err_connect; + if (hv_is_isolation_supported()) + sysctl_record_panic_msg = 0; + /* * Only register if the crash MSRs are available */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { u64 hyperv_crash_ctl; /* - * Sysctl registration is not fatal, since by default - * reporting is enabled. + * Panic message recording (sysctl_record_panic_msg) + * is enabled by default in non-isolated guests and + * disabled by default in isolated guests; the panic + * message recording won't be available in isolated + * guests should the following registration fail. */ hv_ctl_table_hdr = register_sysctl_table(hv_root_table); if (!hv_ctl_table_hdr) -- cgit v1.2.3 From 3a5469582c241abca22500f36a9cb8e9331969cf Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Tue, 15 Mar 2022 15:10:53 +0100 Subject: Drivers: hv: vmbus: Fix initialization of device object in vmbus_device_register() Initialize the device's dma_{mask,parms} pointers and the device's dma_mask value before invoking device_register(). Address the following trace with 5.17-rc7: [ 49.646839] WARNING: CPU: 0 PID: 189 at include/linux/dma-mapping.h:543 netvsc_probe+0x37a/0x3a0 [hv_netvsc] [ 49.646928] Call Trace: [ 49.646930] [ 49.646935] vmbus_probe+0x40/0x60 [hv_vmbus] [ 49.646942] really_probe+0x1ce/0x3b0 [ 49.646948] __driver_probe_device+0x109/0x180 [ 49.646952] driver_probe_device+0x23/0xa0 [ 49.646955] __device_attach_driver+0x76/0xe0 [ 49.646958] ? driver_allows_async_probing+0x50/0x50 [ 49.646961] bus_for_each_drv+0x84/0xd0 [ 49.646964] __device_attach+0xed/0x170 [ 49.646967] device_initial_probe+0x13/0x20 [ 49.646970] bus_probe_device+0x8f/0xa0 [ 49.646973] device_add+0x41a/0x8e0 [ 49.646975] ? hrtimer_init+0x28/0x80 [ 49.646981] device_register+0x1b/0x20 [ 49.646983] vmbus_device_register+0x5e/0xf0 [hv_vmbus] [ 49.646991] vmbus_add_channel_work+0x12d/0x190 [hv_vmbus] [ 49.646999] process_one_work+0x21d/0x3f0 [ 49.647002] worker_thread+0x4a/0x3b0 [ 49.647005] ? process_one_work+0x3f0/0x3f0 [ 49.647007] kthread+0xff/0x130 [ 49.647011] ? kthread_complete_and_exit+0x20/0x20 [ 49.647015] ret_from_fork+0x22/0x30 [ 49.647020] [ 49.647021] ---[ end trace 0000000000000000 ]--- Fixes: 743b237c3a7b0 ("scsi: storvsc: Add Isolation VM support for storvsc driver") Signed-off-by: Andrea Parri (Microsoft) Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220315141053.3223-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index a963b970ffb2..df22fbdc2ae2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2103,6 +2103,10 @@ int vmbus_device_register(struct hv_device *child_device_obj) child_device_obj->device.parent = &hv_acpi_dev->dev; child_device_obj->device.release = vmbus_device_release; + child_device_obj->device.dma_parms = &child_device_obj->dma_parms; + child_device_obj->device.dma_mask = &child_device_obj->dma_mask; + dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); + /* * Register with the LDM. This will kick off the driver/device * binding...which will eventually call vmbus_match() and vmbus_probe() @@ -2128,9 +2132,6 @@ int vmbus_device_register(struct hv_device *child_device_obj) } hv_debug_add_dev_dir(child_device_obj); - child_device_obj->device.dma_parms = &child_device_obj->dma_parms; - child_device_obj->device.dma_mask = &child_device_obj->dma_mask; - dma_set_mask(&child_device_obj->device, DMA_BIT_MASK(64)); return 0; err_kset_unregister: -- cgit v1.2.3 From 792f232d57ff28bbd5f9c4abe0466b23d5879dc8 Mon Sep 17 00:00:00 2001 From: "Guilherme G. Piccoli" Date: Tue, 15 Mar 2022 17:35:35 -0300 Subject: Drivers: hv: vmbus: Fix potential crash on module unload The vmbus driver relies on the panic notifier infrastructure to perform some operations when a panic event is detected. Since vmbus can be built as module, it is required that the driver handles both registering and unregistering such panic notifier callback. After commit 74347a99e73a ("x86/Hyper-V: Unload vmbus channel in hv panic callback") though, the panic notifier registration is done unconditionally in the module initialization routine whereas the unregistering procedure is conditionally guarded and executes only if HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE capability is set. This patch fixes that by unconditionally unregistering the panic notifier in the module's exit routine as well. Fixes: 74347a99e73a ("x86/Hyper-V: Unload vmbus channel in hv panic callback") Signed-off-by: Guilherme G. Piccoli Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220315203535.682306-1-gpiccoli@igalia.com Signed-off-by: Wei Liu --- drivers/hv/vmbus_drv.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index df22fbdc2ae2..6c057c76c2ca 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2787,10 +2787,15 @@ static void __exit vmbus_exit(void) if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); - atomic_notifier_chain_unregister(&panic_notifier_list, - &hyperv_panic_block); } + /* + * The panic notifier is always registered, hence we should + * also unconditionally unregister it here as well. + */ + atomic_notifier_chain_unregister(&panic_notifier_list, + &hyperv_panic_block); + free_page((unsigned long)hv_panic_page); unregister_sysctl_table(hv_ctl_table_hdr); hv_ctl_table_hdr = NULL; -- cgit v1.2.3 From 37200078ed6aa2ac3c88a01a64996133dccfdd34 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Thu, 24 Mar 2022 09:14:51 -0700 Subject: Drivers: hv: vmbus: Propagate VMbus coherence to each VMbus device VMbus synthetic devices are not represented in the ACPI DSDT -- only the top level VMbus device is represented. As a result, on ARM64 coherence information in the _CCA method is not specified for synthetic devices, so they default to not hardware coherent. Drivers for some of these synthetic devices have been recently updated to use the standard DMA APIs, and they are incurring extra overhead of unneeded software coherence management. Fix this by propagating coherence information from the VMbus node in ACPI to the individual synthetic devices. There's no effect on x86/x64 where devices are always hardware coherent. Signed-off-by: Michael Kelley Acked-by: Robin Murphy Link: https://lore.kernel.org/r/1648138492-2191-2-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/hv_common.c | 11 +++++++++++ drivers/hv/vmbus_drv.c | 31 +++++++++++++++++++++++++++++++ include/asm-generic/mshyperv.h | 1 + 3 files changed, 43 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 181d16bbf49d..820e81406251 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -216,6 +217,16 @@ bool hv_query_ext_cap(u64 cap_query) } EXPORT_SYMBOL_GPL(hv_query_ext_cap); +void hv_setup_dma_ops(struct device *dev, bool coherent) +{ + /* + * Hyper-V does not offer a vIOMMU in the guest + * VM, so pass 0/NULL for the IOMMU settings + */ + arch_setup_dma_ops(dev, 0, 0, NULL, coherent); +} +EXPORT_SYMBOL_GPL(hv_setup_dma_ops); + bool hv_is_hibernation_supported(void) { return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 6c057c76c2ca..3cd0d3a44fa2 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -920,6 +920,21 @@ static int vmbus_probe(struct device *child_device) return ret; } +/* + * vmbus_dma_configure -- Configure DMA coherence for VMbus device + */ +static int vmbus_dma_configure(struct device *child_device) +{ + /* + * On ARM64, propagate the DMA coherence setting from the top level + * VMbus ACPI device to the child VMbus device being added here. + * On x86/x64 coherence is assumed and these calls have no effect. + */ + hv_setup_dma_ops(child_device, + device_get_dma_attr(&hv_acpi_dev->dev) == DEV_DMA_COHERENT); + return 0; +} + /* * vmbus_remove - Remove a vmbus device */ @@ -1040,6 +1055,7 @@ static struct bus_type hv_bus = { .remove = vmbus_remove, .probe = vmbus_probe, .uevent = vmbus_uevent, + .dma_configure = vmbus_dma_configure, .dev_groups = vmbus_dev_groups, .drv_groups = vmbus_drv_groups, .bus_groups = vmbus_bus_groups, @@ -2435,6 +2451,21 @@ static int vmbus_acpi_add(struct acpi_device *device) hv_acpi_dev = device; + /* + * Older versions of Hyper-V for ARM64 fail to include the _CCA + * method on the top level VMbus device in the DSDT. But devices + * are hardware coherent in all current Hyper-V use cases, so fix + * up the ACPI device to behave as if _CCA is present and indicates + * hardware coherence. + */ + ACPI_COMPANION_SET(&device->dev, device); + if (IS_ENABLED(CONFIG_ACPI_CCA_REQUIRED) && + device_get_dma_attr(&device->dev) == DEV_DMA_NOT_SUPPORTED) { + pr_info("No ACPI _CCA found; assuming coherent device I/O\n"); + device->flags.cca_seen = true; + device->flags.coherent_dma = true; + } + result = acpi_walk_resources(device->handle, METHOD_NAME__CRS, vmbus_walk_resources, NULL); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index c08758b6b364..c05d2ce9b6cd 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -269,6 +269,7 @@ bool hv_isolation_type_snp(void); u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); +void hv_setup_dma_ops(struct device *dev, bool coherent); void *hv_map_memory(void *addr, unsigned long size); void hv_unmap_memory(void *addr); #else /* CONFIG_HYPERV */ -- cgit v1.2.3 From b6cae15b5710c8097aad26a2e5e752c323ee5348 Mon Sep 17 00:00:00 2001 From: Michael Kelley Date: Sun, 27 Mar 2022 08:25:10 -0700 Subject: Drivers: hv: vmbus: Prevent load re-ordering when reading ring buffer When reading a packet from a host-to-guest ring buffer, there is no memory barrier between reading the write index (to see if there is a packet to read) and reading the contents of the packet. The Hyper-V host uses store-release when updating the write index to ensure that writes of the packet data are completed first. On the guest side, the processor can reorder and read the packet data before the write index, and sometimes get stale packet data. Getting such stale packet data has been observed in a reproducible case in a VM on ARM64. Fix this by using virt_load_acquire() to read the write index, ensuring that reads of the packet data cannot be reordered before it. Preventing such reordering is logically correct, and with this change, getting stale data can no longer be reproduced. Signed-off-by: Michael Kelley Reviewed-by: Andrea Parri (Microsoft) Link: https://lore.kernel.org/r/1648394710-33480-1-git-send-email-mikelley@microsoft.com Signed-off-by: Wei Liu --- drivers/hv/ring_buffer.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 71efacb90965..3d215d9dec43 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -439,7 +439,16 @@ int hv_ringbuffer_read(struct vmbus_channel *channel, static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi) { u32 priv_read_loc = rbi->priv_read_index; - u32 write_loc = READ_ONCE(rbi->ring_buffer->write_index); + u32 write_loc; + + /* + * The Hyper-V host writes the packet data, then uses + * store_release() to update the write_index. Use load_acquire() + * here to prevent loads of the packet data from being re-ordered + * before the read of the write_index and potentially getting + * stale data. + */ + write_loc = virt_load_acquire(&rbi->ring_buffer->write_index); if (write_loc >= priv_read_loc) return write_loc - priv_read_loc; -- cgit v1.2.3 From b3d6dd09ff00fdcf4f7c0cb54700ffd5dd343502 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 25 Mar 2022 10:32:11 +0800 Subject: Drivers: hv: balloon: Support status report for larger page sizes DM_STATUS_REPORT expects the numbers of pages in the unit of 4k pages (HV_HYP_PAGE) instead of guest pages, so to make it work when guest page sizes are larger than 4k, convert the numbers of guest pages into the numbers of HV_HYP_PAGEs. Note that the numbers of guest pages are still used for tracing because tracing is internal to the guest kernel. Reported-by: Vitaly Kuznetsov Signed-off-by: Boqun Feng Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220325023212.1570049-2-boqun.feng@gmail.com Signed-off-by: Wei Liu --- drivers/hv/hv_balloon.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index f2d05bff4245..062156b88a87 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem_device *dm) struct dm_status status; unsigned long now = jiffies; unsigned long last_post = last_post_time; + unsigned long num_pages_avail, num_pages_committed; if (pressure_report_delay > 0) { --pressure_report_delay; @@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem_device *dm) * num_pages_onlined) as committed to the host, otherwise it can try * asking us to balloon them out. */ - status.num_avail = si_mem_available(); - status.num_committed = vm_memory_committed() + + num_pages_avail = si_mem_available(); + num_pages_committed = vm_memory_committed() + dm->num_pages_ballooned + (dm->num_pages_added > dm->num_pages_onlined ? dm->num_pages_added - dm->num_pages_onlined : 0) + compute_balloon_floor(); - trace_balloon_status(status.num_avail, status.num_committed, + trace_balloon_status(num_pages_avail, num_pages_committed, vm_memory_committed(), dm->num_pages_ballooned, dm->num_pages_added, dm->num_pages_onlined); + + /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */ + status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE; + status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE; + /* * If our transaction ID is no longer current, just don't * send the status. This can happen if we were interrupted -- cgit v1.2.3 From be5802795cf8d0b881745fa9ba7790293b382280 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Fri, 25 Mar 2022 10:32:12 +0800 Subject: Drivers: hv: balloon: Disable balloon and hot-add accordingly Currently there are known potential issues for balloon and hot-add on ARM64: * Unballoon requests from Hyper-V should only unballoon ranges that are guest page size aligned, otherwise guests cannot handle because it's impossible to partially free a page. This is a problem when guest page size > 4096 bytes. * Memory hot-add requests from Hyper-V should provide the NUMA node id of the added ranges or ARM64 should have a functional memory_add_physaddr_to_nid(), otherwise the node id is missing for add_memory(). These issues require discussions on design and implementation. In the meanwhile, post_status() is working and essential to guest monitoring. Therefore instead of disabling the entire hv_balloon driver, the ballooning (when page size > 4096 bytes) and hot-add are disabled accordingly for now. Once the issues are fixed, they can be re-enable in these cases. Signed-off-by: Boqun Feng Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20220325023212.1570049-3-boqun.feng@gmail.com Signed-off-by: Wei Liu --- drivers/hv/hv_balloon.c | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 062156b88a87..eee7402cfc02 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1660,6 +1660,38 @@ static void disable_page_reporting(void) } } +static int ballooning_enabled(void) +{ + /* + * Disable ballooning if the page size is not 4k (HV_HYP_PAGE_SIZE), + * since currently it's unclear to us whether an unballoon request can + * make sure all page ranges are guest page size aligned. + */ + if (PAGE_SIZE != HV_HYP_PAGE_SIZE) { + pr_info("Ballooning disabled because page size is not 4096 bytes\n"); + return 0; + } + + return 1; +} + +static int hot_add_enabled(void) +{ + /* + * Disable hot add on ARM64, because we currently rely on + * memory_add_physaddr_to_nid() to get a node id of a hot add range, + * however ARM64's memory_add_physaddr_to_nid() always return 0 and + * DM_MEM_HOT_ADD_REQUEST doesn't have the NUMA node information for + * add_memory(). + */ + if (IS_ENABLED(CONFIG_ARM64)) { + pr_info("Memory hot add disabled on ARM64\n"); + return 0; + } + + return 1; +} + static int balloon_connect_vsp(struct hv_device *dev) { struct dm_version_request version_req; @@ -1731,8 +1763,8 @@ static int balloon_connect_vsp(struct hv_device *dev) * currently still requires the bits to be set, so we have to add code * to fail the host's hot-add and balloon up/down requests, if any. */ - cap_msg.caps.cap_bits.balloon = 1; - cap_msg.caps.cap_bits.hot_add = 1; + cap_msg.caps.cap_bits.balloon = ballooning_enabled(); + cap_msg.caps.cap_bits.hot_add = hot_add_enabled(); /* * Specify our alignment requirements as it relates -- cgit v1.2.3 From eaa03d34535872d29004cb5cf77dc9dec1ba9a25 Mon Sep 17 00:00:00 2001 From: "Andrea Parri (Microsoft)" Date: Mon, 28 Mar 2022 17:44:57 +0200 Subject: Drivers: hv: vmbus: Replace smp_store_mb() with virt_store_mb() Following the recommendation in Documentation/memory-barriers.txt for virtual machine guests. Fixes: 8b6a877c060ed ("Drivers: hv: vmbus: Replace the per-CPU channel lists with a global array of channels") Signed-off-by: Andrea Parri (Microsoft) Link: https://lore.kernel.org/r/20220328154457.100872-1-parri.andrea@gmail.com Signed-off-by: Wei Liu --- drivers/hv/channel_mgmt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 60375879612f..67be81208a2d 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -380,7 +380,7 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * execute: * * (a) In the "normal (i.e., not resuming from hibernation)" path, - * the full barrier in smp_store_mb() guarantees that the store + * the full barrier in virt_store_mb() guarantees that the store * is propagated to all CPUs before the add_channel_work work * is queued. In turn, add_channel_work is queued before the * channel's ring buffer is allocated/initialized and the @@ -392,14 +392,14 @@ void vmbus_channel_map_relid(struct vmbus_channel *channel) * recv_int_page before retrieving the channel pointer from the * array of channels. * - * (b) In the "resuming from hibernation" path, the smp_store_mb() + * (b) In the "resuming from hibernation" path, the virt_store_mb() * guarantees that the store is propagated to all CPUs before * the VMBus connection is marked as ready for the resume event * (cf. check_ready_for_resume_event()). The interrupt handler * of the VMBus driver and vmbus_chan_sched() can not run before * vmbus_bus_resume() has completed execution (cf. resume_noirq). */ - smp_store_mb( + virt_store_mb( vmbus_connection.channels[channel->offermsg.child_relid], channel); } -- cgit v1.2.3