From 4f74fb30ea21d8b2d515f5929d2b46ad6834edba Mon Sep 17 00:00:00 2001 From: Mitchell Levy Date: Mon, 7 Aug 2023 23:55:47 +0000 Subject: hv_balloon: Update the balloon driver to use the SBRM API This patch is intended as a proof-of-concept for the new SBRM machinery[1]. For some brief background, the idea behind SBRM is using the __cleanup__ attribute to automatically unlock locks (or otherwise release resources) when they go out of scope, similar to C++ style RAII. This promises some benefits such as making code simpler (particularly where you have lots of goto fail; type constructs) as well as reducing the surface area for certain kinds of bugs. The changes in this patch should not result in any difference in how the code actually runs (i.e., it's purely an exercise in this new syntax sugar). In one instance SBRM was not appropriate, so I left that part alone, but all other locking/unlocking is handled automatically in this patch. [1] https://lore.kernel.org/all/20230626125726.GU4253@hirez.programming.kicks-ass.net/ Suggested-by: Boqun Feng Signed-off-by: "Mitchell Levy (Microsoft)" Reviewed-by: Boqun Feng Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230807-sbrm-hyperv-v2-1-9d2ac15305bd@gmail.com --- drivers/hv/hv_balloon.c | 82 +++++++++++++++++++++++-------------------------- 1 file changed, 38 insertions(+), 44 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index dffcc894f117..2812601e84da 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -8,6 +8,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include #include #include #include @@ -646,7 +647,7 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, void *v) { struct memory_notify *mem = (struct memory_notify *)v; - unsigned long flags, pfn_count; + unsigned long pfn_count; switch (val) { case MEM_ONLINE: @@ -655,21 +656,22 @@ static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, break; case MEM_OFFLINE: - spin_lock_irqsave(&dm_device.ha_lock, flags); - pfn_count = hv_page_offline_check(mem->start_pfn, - mem->nr_pages); - if (pfn_count <= dm_device.num_pages_onlined) { - dm_device.num_pages_onlined -= pfn_count; - } else { - /* - * We're offlining more pages than we managed to online. - * This is unexpected. In any case don't let - * num_pages_onlined wrap around zero. - */ - WARN_ON_ONCE(1); - dm_device.num_pages_onlined = 0; + scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { + pfn_count = hv_page_offline_check(mem->start_pfn, + mem->nr_pages); + if (pfn_count <= dm_device.num_pages_onlined) { + dm_device.num_pages_onlined -= pfn_count; + } else { + /* + * We're offlining more pages than we + * managed to online. This is + * unexpected. In any case don't let + * num_pages_onlined wrap around zero. + */ + WARN_ON_ONCE(1); + dm_device.num_pages_onlined = 0; + } } - spin_unlock_irqrestore(&dm_device.ha_lock, flags); break; case MEM_GOING_ONLINE: case MEM_GOING_OFFLINE: @@ -721,24 +723,23 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, unsigned long start_pfn; unsigned long processed_pfn; unsigned long total_pfn = pfn_count; - unsigned long flags; for (i = 0; i < (size/HA_CHUNK); i++) { start_pfn = start + (i * HA_CHUNK); - spin_lock_irqsave(&dm_device.ha_lock, flags); - has->ha_end_pfn += HA_CHUNK; + scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { + has->ha_end_pfn += HA_CHUNK; - if (total_pfn > HA_CHUNK) { - processed_pfn = HA_CHUNK; - total_pfn -= HA_CHUNK; - } else { - processed_pfn = total_pfn; - total_pfn = 0; - } + if (total_pfn > HA_CHUNK) { + processed_pfn = HA_CHUNK; + total_pfn -= HA_CHUNK; + } else { + processed_pfn = total_pfn; + total_pfn = 0; + } - has->covered_end_pfn += processed_pfn; - spin_unlock_irqrestore(&dm_device.ha_lock, flags); + has->covered_end_pfn += processed_pfn; + } reinit_completion(&dm_device.ol_waitevent); @@ -758,10 +759,10 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, */ do_hot_add = false; } - spin_lock_irqsave(&dm_device.ha_lock, flags); - has->ha_end_pfn -= HA_CHUNK; - has->covered_end_pfn -= processed_pfn; - spin_unlock_irqrestore(&dm_device.ha_lock, flags); + scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { + has->ha_end_pfn -= HA_CHUNK; + has->covered_end_pfn -= processed_pfn; + } break; } @@ -781,10 +782,9 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, static void hv_online_page(struct page *pg, unsigned int order) { struct hv_hotadd_state *has; - unsigned long flags; unsigned long pfn = page_to_pfn(pg); - spin_lock_irqsave(&dm_device.ha_lock, flags); + guard(spinlock_irqsave)(&dm_device.ha_lock); list_for_each_entry(has, &dm_device.ha_region_list, list) { /* The page belongs to a different HAS. */ if ((pfn < has->start_pfn) || @@ -794,7 +794,6 @@ static void hv_online_page(struct page *pg, unsigned int order) hv_bring_pgs_online(has, pfn, 1UL << order); break; } - spin_unlock_irqrestore(&dm_device.ha_lock, flags); } static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) @@ -803,9 +802,8 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) struct hv_hotadd_gap *gap; unsigned long residual, new_inc; int ret = 0; - unsigned long flags; - spin_lock_irqsave(&dm_device.ha_lock, flags); + guard(spinlock_irqsave)(&dm_device.ha_lock); list_for_each_entry(has, &dm_device.ha_region_list, list) { /* * If the pfn range we are dealing with is not in the current @@ -852,7 +850,6 @@ static int pfn_covered(unsigned long start_pfn, unsigned long pfn_cnt) ret = 1; break; } - spin_unlock_irqrestore(&dm_device.ha_lock, flags); return ret; } @@ -947,7 +944,6 @@ static unsigned long process_hot_add(unsigned long pg_start, { struct hv_hotadd_state *ha_region = NULL; int covered; - unsigned long flags; if (pfn_cnt == 0) return 0; @@ -979,9 +975,9 @@ static unsigned long process_hot_add(unsigned long pg_start, ha_region->covered_end_pfn = pg_start; ha_region->end_pfn = rg_start + rg_size; - spin_lock_irqsave(&dm_device.ha_lock, flags); - list_add_tail(&ha_region->list, &dm_device.ha_region_list); - spin_unlock_irqrestore(&dm_device.ha_lock, flags); + scoped_guard(spinlock_irqsave, &dm_device.ha_lock) { + list_add_tail(&ha_region->list, &dm_device.ha_region_list); + } } do_pg_range: @@ -2047,7 +2043,6 @@ static void balloon_remove(struct hv_device *dev) struct hv_dynmem_device *dm = hv_get_drvdata(dev); struct hv_hotadd_state *has, *tmp; struct hv_hotadd_gap *gap, *tmp_gap; - unsigned long flags; if (dm->num_pages_ballooned != 0) pr_warn("Ballooned pages: %d\n", dm->num_pages_ballooned); @@ -2073,7 +2068,7 @@ static void balloon_remove(struct hv_device *dev) #endif } - spin_lock_irqsave(&dm_device.ha_lock, flags); + guard(spinlock_irqsave)(&dm_device.ha_lock); list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) { list_for_each_entry_safe(gap, tmp_gap, &has->gap_list, list) { list_del(&gap->list); @@ -2082,7 +2077,6 @@ static void balloon_remove(struct hv_device *dev) list_del(&has->list); kfree(has); } - spin_unlock_irqrestore(&dm_device.ha_lock, flags); } static int balloon_suspend(struct hv_device *hv_dev) -- cgit v1.2.3 From d6e2d652443751e290b2edb70173ec3c22f78fbe Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Fri, 18 Aug 2023 06:29:11 -0400 Subject: x86/hyperv: Add sev-snp enlightened guest static key Introduce static key isolation_type_en_snp for enlightened sev-snp guest check. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Tianyu Lan Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230818102919.1318039-2-ltykernel@gmail.com --- arch/x86/hyperv/ivm.c | 11 +++++++++++ arch/x86/include/asm/mshyperv.h | 2 ++ arch/x86/kernel/cpu/mshyperv.c | 9 +++++++-- drivers/hv/hv_common.c | 6 ++++++ include/asm-generic/mshyperv.h | 13 ++++++++++--- 5 files changed, 36 insertions(+), 5 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 14f46ad2ca64..b2b5cb19fac9 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -413,3 +413,14 @@ bool hv_isolation_type_snp(void) { return static_branch_unlikely(&isolation_type_snp); } + +DEFINE_STATIC_KEY_FALSE(isolation_type_en_snp); +/* + * hv_isolation_type_en_snp - Check system runs in the AMD SEV-SNP based + * isolation enlightened VM. + */ +bool hv_isolation_type_en_snp(void) +{ + return static_branch_unlikely(&isolation_type_en_snp); +} + diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 88d9ef98e087..9f11f0495950 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -26,6 +26,7 @@ union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); +DECLARE_STATIC_KEY_FALSE(isolation_type_en_snp); typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, @@ -239,6 +240,7 @@ static inline void hv_vtom_init(void) {} #endif extern bool hv_isolation_type_snp(void); +extern bool hv_isolation_type_en_snp(void); static inline bool hv_is_synic_reg(unsigned int reg) { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c7969e806c64..5398fb2f4d39 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -402,8 +402,12 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); - if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) + + if (cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) { + static_branch_enable(&isolation_type_en_snp); + } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { static_branch_enable(&isolation_type_snp); + } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { @@ -473,7 +477,8 @@ static void __init ms_hyperv_init_platform(void) #if IS_ENABLED(CONFIG_HYPERV) if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || - (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP)) + ((hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) && + ms_hyperv.paravisor_present)) hv_vtom_init(); /* * Setup the hook to get control post apic initialization. diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 542a1d53b303..4b4aa53c34c2 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -502,6 +502,12 @@ bool __weak hv_isolation_type_snp(void) } EXPORT_SYMBOL_GPL(hv_isolation_type_snp); +bool __weak hv_isolation_type_en_snp(void) +{ + return false; +} +EXPORT_SYMBOL_GPL(hv_isolation_type_en_snp); + void __weak hv_setup_vmbus_handler(void (*handler)(void)) { } diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 402a8c1c202d..580c766958de 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -36,15 +36,21 @@ struct ms_hyperv_info { u32 nested_features; u32 max_vp_index; u32 max_lp_index; - u32 isolation_config_a; + union { + u32 isolation_config_a; + struct { + u32 paravisor_present : 1; + u32 reserved_a1 : 31; + }; + }; union { u32 isolation_config_b; struct { u32 cvm_type : 4; - u32 reserved1 : 1; + u32 reserved_b1 : 1; u32 shared_gpa_boundary_active : 1; u32 shared_gpa_boundary_bits : 6; - u32 reserved2 : 20; + u32 reserved_b2 : 20; }; }; u64 shared_gpa_boundary; @@ -58,6 +64,7 @@ extern void * __percpu *hyperv_pcpu_output_arg; extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); extern bool hv_isolation_type_snp(void); +extern bool hv_isolation_type_en_snp(void); /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ static inline int hv_result(u64 status) -- cgit v1.2.3 From 8387ce06d70bbbb97a0c168a52b68268ae0da075 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Fri, 18 Aug 2023 06:29:12 -0400 Subject: x86/hyperv: Set Virtual Trust Level in VMBus init message SEV-SNP guests on Hyper-V can run at multiple Virtual Trust Levels (VTL). During boot, get the VTL at which we're running using the GET_VP_REGISTERs hypercall, and save the value for future use. Then during VMBus initialization, set the VTL with the saved value as required in the VMBus init message. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Tianyu Lan Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230818102919.1318039-3-ltykernel@gmail.com --- arch/x86/hyperv/hv_init.c | 34 ++++++++++++++++++++++++++++++++++ arch/x86/include/asm/hyperv-tlfs.h | 7 +++++++ drivers/hv/connection.c | 1 + include/asm-generic/mshyperv.h | 1 + include/linux/hyperv.h | 4 ++-- 5 files changed, 45 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6c04b52f139b..318010eb9f9e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -378,6 +378,36 @@ static void __init hv_get_partition_id(void) local_irq_restore(flags); } +static u8 __init get_vtl(void) +{ + u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS; + struct hv_get_vp_registers_input *input; + struct hv_get_vp_registers_output *output; + unsigned long flags; + u64 ret; + + local_irq_save(flags); + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + output = (struct hv_get_vp_registers_output *)input; + + memset(input, 0, struct_size(input, element, 1)); + input->header.partitionid = HV_PARTITION_ID_SELF; + input->header.vpindex = HV_VP_INDEX_SELF; + input->header.inputvtl = 0; + input->element[0].name0 = HV_X64_REGISTER_VSM_VP_STATUS; + + ret = hv_do_hypercall(control, input, output); + if (hv_result_success(ret)) { + ret = output->as64.low & HV_X64_VTL_MASK; + } else { + pr_err("Failed to get VTL(%lld) and set VTL to zero by default.\n", ret); + ret = 0; + } + + local_irq_restore(flags); + return ret; +} + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -506,6 +536,10 @@ void __init hyperv_init(void) /* Query the VMs extended capability once, so that it can be cached. */ hv_query_ext_cap(0); + /* Find the VTL */ + if (hv_isolation_type_en_snp()) + ms_hyperv.vtl = get_vtl(); + return; clean_guest_os_id: diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index cea95dcd27c2..4bf0b315b0ce 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -301,6 +301,13 @@ enum hv_isolation_type { #define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT #define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC +/* + * Registers are only accessible via HVCALL_GET_VP_REGISTERS hvcall and + * there is not associated MSR address. + */ +#define HV_X64_REGISTER_VSM_VP_STATUS 0x000D0003 +#define HV_X64_VTL_MASK GENMASK(3, 0) + /* Hyper-V memory host visibility */ enum hv_mem_host_visibility { VMBUS_PAGE_NOT_VISIBLE = 0, diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 5978e9dbc286..02b54f85dc60 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -98,6 +98,7 @@ int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version) */ if (version >= VERSION_WIN10_V5) { msg->msg_sint = VMBUS_MESSAGE_SINT; + msg->msg_vtl = ms_hyperv.vtl; vmbus_connection.msg_conn_id = VMBUS_MESSAGE_CONNECTION_ID_4; } else { msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 580c766958de..efd0d2aedad3 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -54,6 +54,7 @@ struct ms_hyperv_info { }; }; u64 shared_gpa_boundary; + u8 vtl; }; extern struct ms_hyperv_info ms_hyperv; extern bool hv_nested; diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index bfbc37ce223b..1f2bfec4abde 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -665,8 +665,8 @@ struct vmbus_channel_initiate_contact { u64 interrupt_page; struct { u8 msg_sint; - u8 padding1[3]; - u32 padding2; + u8 msg_vtl; + u8 reserved[6]; }; }; u64 monitor_page1; -- cgit v1.2.3 From 193061ea0a50c13f72b907e6fa7befa6e15a4302 Mon Sep 17 00:00:00 2001 From: Tianyu Lan Date: Fri, 18 Aug 2023 06:29:14 -0400 Subject: drivers: hv: Mark percpu hvcall input arg page unencrypted in SEV-SNP enlightened guest Hypervisor needs to access input arg, VMBus synic event and message pages. Mark these pages unencrypted in the SEV-SNP guest and free them only if they have been marked encrypted successfully. Reviewed-by: Dexuan Cui Reviewed-by: Michael Kelley Signed-off-by: Tianyu Lan Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230818102919.1318039-5-ltykernel@gmail.com --- drivers/hv/hv.c | 57 +++++++++++++++++++++++++++++++++++++++++++++++--- drivers/hv/hv_common.c | 13 ++++++++++++ 2 files changed, 67 insertions(+), 3 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index de6708dbe0df..ec6e35a0d9bf 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" /* The one and only */ @@ -78,7 +79,7 @@ int hv_post_message(union hv_connection_id connection_id, int hv_synic_alloc(void) { - int cpu; + int cpu, ret = -ENOMEM; struct hv_per_cpu_context *hv_cpu; /* @@ -123,26 +124,76 @@ int hv_synic_alloc(void) goto err; } } + + if (hv_isolation_type_en_snp()) { + ret = set_memory_decrypted((unsigned long) + hv_cpu->synic_message_page, 1); + if (ret) { + pr_err("Failed to decrypt SYNIC msg page: %d\n", ret); + hv_cpu->synic_message_page = NULL; + + /* + * Free the event page here so that hv_synic_free() + * won't later try to re-encrypt it. + */ + free_page((unsigned long)hv_cpu->synic_event_page); + hv_cpu->synic_event_page = NULL; + goto err; + } + + ret = set_memory_decrypted((unsigned long) + hv_cpu->synic_event_page, 1); + if (ret) { + pr_err("Failed to decrypt SYNIC event page: %d\n", ret); + hv_cpu->synic_event_page = NULL; + goto err; + } + + memset(hv_cpu->synic_message_page, 0, PAGE_SIZE); + memset(hv_cpu->synic_event_page, 0, PAGE_SIZE); + } } return 0; + err: /* * Any memory allocations that succeeded will be freed when * the caller cleans up by calling hv_synic_free() */ - return -ENOMEM; + return ret; } void hv_synic_free(void) { - int cpu; + int cpu, ret; for_each_present_cpu(cpu) { struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); + /* It's better to leak the page if the encryption fails. */ + if (hv_isolation_type_en_snp()) { + if (hv_cpu->synic_message_page) { + ret = set_memory_encrypted((unsigned long) + hv_cpu->synic_message_page, 1); + if (ret) { + pr_err("Failed to encrypt SYNIC msg page: %d\n", ret); + hv_cpu->synic_message_page = NULL; + } + } + + if (hv_cpu->synic_event_page) { + ret = set_memory_encrypted((unsigned long) + hv_cpu->synic_event_page, 1); + if (ret) { + pr_err("Failed to encrypt SYNIC event page: %d\n", ret); + hv_cpu->synic_event_page = NULL; + } + } + } + free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 4b4aa53c34c2..2d43ba2bc925 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -359,6 +360,7 @@ int hv_common_cpu_init(unsigned int cpu) u64 msr_vp_index; gfp_t flags; int pgcount = hv_root_partition ? 2 : 1; + int ret; /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; @@ -378,6 +380,17 @@ int hv_common_cpu_init(unsigned int cpu) outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; } + + if (hv_isolation_type_en_snp()) { + ret = set_memory_decrypted((unsigned long)*inputarg, pgcount); + if (ret) { + kfree(*inputarg); + *inputarg = NULL; + return ret; + } + + memset(*inputarg, 0x00, pgcount * PAGE_SIZE); + } } msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); -- cgit v1.2.3 From 78e04bbff849b51b56f5925b1945db2c6e128b61 Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Wed, 9 Aug 2023 20:40:18 +0200 Subject: Drivers: hv: vmbus: Don't dereference ACPI root object handle Since the commit referenced in the Fixes: tag below the VMBus client driver is walking the ACPI namespace up from the VMBus ACPI device to the ACPI namespace root object trying to find Hyper-V MMIO ranges. However, if it is not able to find them it ends trying to walk resources of the ACPI namespace root object itself. This object has all-ones handle, which causes a NULL pointer dereference in the ACPI code (from dereferencing this pointer with an offset). This in turn causes an oops on boot with VMBus host implementations that do not provide Hyper-V MMIO ranges in their VMBus ACPI device or its ancestors. The QEMU VMBus implementation is an example of such implementation. I guess providing these ranges is optional, since all tested Windows versions seem to be able to use VMBus devices without them. Fix this by explicitly terminating the lookup at the ACPI namespace root object. Note that Linux guests under KVM/QEMU do not use the Hyper-V PV interface by default - they only do so if the KVM PV interface is missing or disabled. Example stack trace of such oops: [ 3.710827] ? __die+0x1f/0x60 [ 3.715030] ? page_fault_oops+0x159/0x460 [ 3.716008] ? exc_page_fault+0x73/0x170 [ 3.716959] ? asm_exc_page_fault+0x22/0x30 [ 3.717957] ? acpi_ns_lookup+0x7a/0x4b0 [ 3.718898] ? acpi_ns_internalize_name+0x79/0xc0 [ 3.720018] acpi_ns_get_node_unlocked+0xb5/0xe0 [ 3.721120] ? acpi_ns_check_object_type+0xfe/0x200 [ 3.722285] ? acpi_rs_convert_aml_to_resource+0x37/0x6e0 [ 3.723559] ? down_timeout+0x3a/0x60 [ 3.724455] ? acpi_ns_get_node+0x3a/0x60 [ 3.725412] acpi_ns_get_node+0x3a/0x60 [ 3.726335] acpi_ns_evaluate+0x1c3/0x2c0 [ 3.727295] acpi_ut_evaluate_object+0x64/0x1b0 [ 3.728400] acpi_rs_get_method_data+0x2b/0x70 [ 3.729476] ? vmbus_platform_driver_probe+0x1d0/0x1d0 [hv_vmbus] [ 3.730940] ? vmbus_platform_driver_probe+0x1d0/0x1d0 [hv_vmbus] [ 3.732411] acpi_walk_resources+0x78/0xd0 [ 3.733398] vmbus_platform_driver_probe+0x9f/0x1d0 [hv_vmbus] [ 3.734802] platform_probe+0x3d/0x90 [ 3.735684] really_probe+0x19b/0x400 [ 3.736570] ? __device_attach_driver+0x100/0x100 [ 3.737697] __driver_probe_device+0x78/0x160 [ 3.738746] driver_probe_device+0x1f/0x90 [ 3.739743] __driver_attach+0xc2/0x1b0 [ 3.740671] bus_for_each_dev+0x70/0xc0 [ 3.741601] bus_add_driver+0x10e/0x210 [ 3.742527] driver_register+0x55/0xf0 [ 3.744412] ? 0xffffffffc039a000 [ 3.745207] hv_acpi_init+0x3c/0x1000 [hv_vmbus] Fixes: 7f163a6fd957 ("drivers:hv: Modify hv_vmbus to search for all MMIO ranges available.") Signed-off-by: Maciej S. Szmigiero Reviewed-by: Michael Kelley Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/fd8e64ceeecfd1d95ff49021080cf699e88dbbde.1691606267.git.maciej.szmigiero@oracle.com --- drivers/hv/vmbus_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 67f95a29aeca..edbb38f6956b 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2287,7 +2287,8 @@ static int vmbus_acpi_add(struct platform_device *pdev) * Some ancestor of the vmbus acpi device (Gen1 or Gen2 * firmware) is the VMOD that has the mmio ranges. Get that. */ - for (ancestor = acpi_dev_parent(device); ancestor; + for (ancestor = acpi_dev_parent(device); + ancestor && ancestor->handle != ACPI_ROOT_OBJECT; ancestor = acpi_dev_parent(ancestor)) { result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, vmbus_walk_resources, NULL); -- cgit v1.2.3 From 08e9d12077fcc7c4c4579d7dcd8093b59b01369e Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:03 -0700 Subject: x86/hyperv: Add hv_isolation_type_tdx() to detect TDX guests No logic change to SNP/VBS guests. hv_isolation_type_tdx() will be used to instruct a TDX guest on Hyper-V to do some TDX-specific operations, e.g. for a fully enlightened TDX guest (i.e. without the paravisor), hv_do_hypercall() should use __tdx_hypercall() and such a guest on Hyper-V should handle the Hyper-V Event/Message/Monitor pages specially. Reviewed-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-2-decui@microsoft.com --- arch/x86/hyperv/ivm.c | 9 +++++++++ arch/x86/include/asm/hyperv-tlfs.h | 3 ++- arch/x86/include/asm/mshyperv.h | 3 +++ arch/x86/kernel/cpu/mshyperv.c | 2 ++ drivers/hv/hv_common.c | 6 ++++++ include/asm-generic/mshyperv.h | 1 + 6 files changed, 23 insertions(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index cbbd3af4c3da..afdae1a8a117 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -562,3 +562,12 @@ bool hv_isolation_type_en_snp(void) return static_branch_unlikely(&isolation_type_en_snp); } +DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); +/* + * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based + * isolated VM. + */ +bool hv_isolation_type_tdx(void) +{ + return static_branch_unlikely(&isolation_type_tdx); +} diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 4bf0b315b0ce..2ff26f53cd62 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -169,7 +169,8 @@ enum hv_isolation_type { HV_ISOLATION_TYPE_NONE = 0, HV_ISOLATION_TYPE_VBS = 1, - HV_ISOLATION_TYPE_SNP = 2 + HV_ISOLATION_TYPE_SNP = 2, + HV_ISOLATION_TYPE_TDX = 3 }; /* Hyper-V specific model specific registers (MSRs) */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index b6be267ff3d0..3feb4e36851e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -27,6 +27,7 @@ union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); DECLARE_STATIC_KEY_FALSE(isolation_type_en_snp); +DECLARE_STATIC_KEY_FALSE(isolation_type_tdx); typedef int (*hyperv_fill_flush_list_func)( struct hv_guest_mapping_flush_list *flush, @@ -49,6 +50,8 @@ extern u64 hv_current_partition_id; extern union hv_ghcb * __percpu *hv_ghcb_pg; extern bool hv_isolation_type_en_snp(void); +bool hv_isolation_type_tdx(void); + /* * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA * to start AP in enlightened SEV guest. diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c8d3ca2b0e0e..63093870ec33 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -418,6 +418,8 @@ static void __init ms_hyperv_init_platform(void) static_branch_enable(&isolation_type_snp); else static_branch_enable(&isolation_type_en_snp); + } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { + static_branch_enable(&isolation_type_tdx); } } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 2d43ba2bc925..da3307533f4d 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -521,6 +521,12 @@ bool __weak hv_isolation_type_en_snp(void) } EXPORT_SYMBOL_GPL(hv_isolation_type_en_snp); +bool __weak hv_isolation_type_tdx(void) +{ + return false; +} +EXPORT_SYMBOL_GPL(hv_isolation_type_tdx); + void __weak hv_setup_vmbus_handler(void (*handler)(void)) { } diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index efd0d2aedad3..82eba2d5fc4c 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -66,6 +66,7 @@ extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); extern bool hv_isolation_type_snp(void); extern bool hv_isolation_type_en_snp(void); +bool hv_isolation_type_tdx(void); /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ static inline int hv_result(u64 status) -- cgit v1.2.3 From d6e0228d265f29348a01780ff306321c399d8b95 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:04 -0700 Subject: x86/hyperv: Support hypercalls for fully enlightened TDX guests A fully enlightened TDX guest on Hyper-V (i.e. without the paravisor) only uses the GHCI call rather than hv_hypercall_pg. Do not initialize hypercall_pg for such a guest. In hv_common_cpu_init(), the hyperv_pcpu_input_arg page needs to be decrypted in such a guest. Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-3-decui@microsoft.com --- arch/x86/hyperv/hv_init.c | 8 ++++++++ arch/x86/hyperv/ivm.c | 17 +++++++++++++++++ arch/x86/include/asm/mshyperv.h | 14 ++++++++++++++ drivers/hv/hv_common.c | 10 ++++++++-- include/asm-generic/mshyperv.h | 1 + 5 files changed, 48 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index bcfbcda8b050..255e02ec467e 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -476,6 +476,10 @@ void __init hyperv_init(void) /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); + /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */ + if (hv_isolation_type_tdx()) + goto skip_hypercall_pg_init; + hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, @@ -515,6 +519,7 @@ void __init hyperv_init(void) wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } +skip_hypercall_pg_init: /* * hyperv_init() is called before LAPIC is initialized: see * apic_intr_mode_init() -> x86_platform.apic_post_init() and @@ -642,6 +647,9 @@ bool hv_is_hyperv_initialized(void) if (x86_hyper_type != X86_HYPER_MS_HYPERV) return false; + /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */ + if (hv_isolation_type_tdx()) + return true; /* * Verify that earlier initialization succeeded by checking * that the hypercall page is setup diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index afdae1a8a117..6c7598d9e68a 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -571,3 +571,20 @@ bool hv_isolation_type_tdx(void) { return static_branch_unlikely(&isolation_type_tdx); } + +#ifdef CONFIG_INTEL_TDX_GUEST + +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) +{ + struct tdx_hypercall_args args = { }; + + args.r10 = control; + args.rdx = param1; + args.r8 = param2; + + (void)__tdx_hypercall_ret(&args); + + return args.r11; +} + +#endif diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 3feb4e36851e..6a9e00c4730b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -51,6 +51,7 @@ extern union hv_ghcb * __percpu *hv_ghcb_pg; extern bool hv_isolation_type_en_snp(void); bool hv_isolation_type_tdx(void); +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); /* * DEFAULT INIT GPAT and SEGMENT LIMIT value in struct VMSA @@ -63,6 +64,10 @@ int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages); int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id); int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags); +/* + * If the hypercall involves no input or output parameters, the hypervisor + * ignores the corresponding GPA pointer. + */ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; @@ -70,6 +75,9 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) u64 hv_status; #ifdef CONFIG_X86_64 + if (hv_isolation_type_tdx()) + return hv_tdx_hypercall(control, input_address, output_address); + if (hv_isolation_type_en_snp()) { __asm__ __volatile__("mov %4, %%r8\n" "vmmcall" @@ -123,6 +131,9 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) u64 hv_status; #ifdef CONFIG_X86_64 + if (hv_isolation_type_tdx()) + return hv_tdx_hypercall(control, input1, 0); + if (hv_isolation_type_en_snp()) { __asm__ __volatile__( "vmmcall" @@ -174,6 +185,9 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) u64 hv_status; #ifdef CONFIG_X86_64 + if (hv_isolation_type_tdx()) + return hv_tdx_hypercall(control, input1, input2); + if (hv_isolation_type_en_snp()) { __asm__ __volatile__("mov %4, %%r8\n" "vmmcall" diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index da3307533f4d..897bbb96f411 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -381,10 +381,10 @@ int hv_common_cpu_init(unsigned int cpu) *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; } - if (hv_isolation_type_en_snp()) { + if (hv_isolation_type_en_snp() || hv_isolation_type_tdx()) { ret = set_memory_decrypted((unsigned long)*inputarg, pgcount); if (ret) { - kfree(*inputarg); + /* It may be unsafe to free *inputarg */ *inputarg = NULL; return ret; } @@ -567,3 +567,9 @@ u64 __weak hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_s return HV_STATUS_INVALID_PARAMETER; } EXPORT_SYMBOL_GPL(hv_ghcb_hypercall); + +u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2) +{ + return HV_STATUS_INVALID_PARAMETER; +} +EXPORT_SYMBOL_GPL(hv_tdx_hypercall); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 82eba2d5fc4c..f577eff58ea0 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -283,6 +283,7 @@ enum hv_isolation_type hv_get_isolation_type(void); bool hv_is_isolation_supported(void); bool hv_isolation_type_snp(void); u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size); +u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); void hyperv_cleanup(void); bool hv_query_ext_cap(u64 cap_query); void hv_setup_dma_ops(struct device *dev, bool coherent); -- cgit v1.2.3 From 68f2f2bc163d4427b04f0fb6421f091f948175fe Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:05 -0700 Subject: Drivers: hv: vmbus: Support fully enlightened TDX guests Add Hyper-V specific code so that a fully enlightened TDX guest (i.e. without the paravisor) can run on Hyper-V: Don't use hv_vp_assist_page. Use GHCI instead. Don't try to use the unsupported HV_REGISTER_CRASH_CTL. Don't trust (use) Hyper-V's TLB-flushing hypercalls. Don't use lazy EOI. Share the SynIC Event/Message pages with the hypervisor. Don't use the Hyper-V TSC page for now, because non-trivial work is required to share the page with the hypervisor. Reviewed-by: Michael Kelley Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-4-decui@microsoft.com --- arch/x86/hyperv/hv_apic.c | 15 ++++++++++++--- arch/x86/hyperv/hv_init.c | 19 +++++++++++++++---- arch/x86/kernel/cpu/mshyperv.c | 14 ++++++++++++++ drivers/hv/hv.c | 9 +++++++-- 4 files changed, 48 insertions(+), 9 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 1fbda2f94184..cb7429046d18 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -177,8 +177,11 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, (exclude_self && weight == 1 && cpumask_test_cpu(this_cpu, mask))) return true; - if (!hv_hypercall_pg) - return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; @@ -231,9 +234,15 @@ static bool __send_ipi_one(int cpu, int vector) trace_hyperv_send_ipi_one(cpu, vector); - if (!hv_hypercall_pg || (vp == VP_INVAL)) + if (vp == VP_INVAL) return false; + /* A fully enlightened TDX VM uses GHCI rather than hv_hypercall_pg. */ + if (!hv_hypercall_pg) { + if (ms_hyperv.paravisor_present || !hv_isolation_type_tdx()) + return false; + } + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return false; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 255e02ec467e..c1c1b4e1502f 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -80,7 +80,7 @@ static int hyperv_init_ghcb(void) static int hv_cpu_init(unsigned int cpu) { union hv_vp_assist_msr_contents msr = { 0 }; - struct hv_vp_assist_page **hvp = &hv_vp_assist_page[cpu]; + struct hv_vp_assist_page **hvp; int ret; ret = hv_common_cpu_init(cpu); @@ -90,6 +90,7 @@ static int hv_cpu_init(unsigned int cpu) if (!hv_vp_assist_page) return 0; + hvp = &hv_vp_assist_page[cpu]; if (hv_root_partition) { /* * For root partition we get the hypervisor provided VP assist @@ -442,11 +443,21 @@ void __init hyperv_init(void) if (hv_common_init()) return; - hv_vp_assist_page = kcalloc(num_possible_cpus(), - sizeof(*hv_vp_assist_page), GFP_KERNEL); + /* + * The VP assist page is useless to a TDX guest: the only use we + * would have for it is lazy EOI, which can not be used with TDX. + */ + if (hv_isolation_type_tdx()) + hv_vp_assist_page = NULL; + else + hv_vp_assist_page = kcalloc(num_possible_cpus(), + sizeof(*hv_vp_assist_page), + GFP_KERNEL); if (!hv_vp_assist_page) { ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - goto common_free; + + if (!hv_isolation_type_tdx()) + goto common_free; } if (hv_isolation_type_snp()) { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 63093870ec33..ff3d9c5de19c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -420,6 +420,20 @@ static void __init ms_hyperv_init_platform(void) static_branch_enable(&isolation_type_en_snp); } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { static_branch_enable(&isolation_type_tdx); + + /* A TDX VM must use x2APIC and doesn't use lazy EOI. */ + ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; + + if (!ms_hyperv.paravisor_present) { + /* To be supported: more work is required. */ + ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; + + /* HV_REGISTER_CRASH_CTL is unsupported. */ + ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + + /* Don't trust Hyper-V's TLB-flushing hypercalls. */ + ms_hyperv.hints &= ~HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED; + } } } diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index ec6e35a0d9bf..d1064118a72f 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -121,11 +121,15 @@ int hv_synic_alloc(void) (void *)get_zeroed_page(GFP_ATOMIC); if (hv_cpu->synic_event_page == NULL) { pr_err("Unable to allocate SYNIC event page\n"); + + free_page((unsigned long)hv_cpu->synic_message_page); + hv_cpu->synic_message_page = NULL; goto err; } } - if (hv_isolation_type_en_snp()) { + if (!ms_hyperv.paravisor_present && + (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long) hv_cpu->synic_message_page, 1); if (ret) { @@ -174,7 +178,8 @@ void hv_synic_free(void) = per_cpu_ptr(hv_context.cpu_context, cpu); /* It's better to leak the page if the encryption fails. */ - if (hv_isolation_type_en_snp()) { + if (!ms_hyperv.paravisor_present && + (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { if (hv_cpu->synic_message_page) { ret = set_memory_encrypted((unsigned long) hv_cpu->synic_message_page, 1); -- cgit v1.2.3 From cceb4e0810b61c7f5837c17e966b9b718dd62d22 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:07 -0700 Subject: Drivers: hv: vmbus: Support >64 VPs for a fully enlightened TDX/SNP VM Don't set *this_cpu_ptr(hyperv_pcpu_input_arg) before the function set_memory_decrypted() returns, otherwise we run into this ticky issue: For a fully enlightened TDX/SNP VM, in hv_common_cpu_init(), *this_cpu_ptr(hyperv_pcpu_input_arg) is an encrypted page before the set_memory_decrypted() returns. When such a VM has more than 64 VPs, if the hyperv_pcpu_input_arg is not NULL, hv_common_cpu_init() -> set_memory_decrypted() -> ... -> cpa_flush() -> on_each_cpu() -> ... -> hv_send_ipi_mask() -> ... -> __send_ipi_mask_ex() tries to call hv_do_rep_hypercall() with the hyperv_pcpu_input_arg as the hypercall input page, which must be a decrypted page in such a VM, but the page is still encrypted at this point, and a fatal fault is triggered. Fix the issue by setting *this_cpu_ptr(hyperv_pcpu_input_arg) after set_memory_decrypted(): if the hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns HV_STATUS_INVALID_PARAMETER immediately, and hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(), which can use x2apic_send_IPI_all(), which may be slightly slower than the hypercall but still works correctly in such a VM. Reviewed-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-6-decui@microsoft.com --- drivers/hv/hv_common.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 897bbb96f411..4c858e1636da 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -360,6 +360,7 @@ int hv_common_cpu_init(unsigned int cpu) u64 msr_vp_index; gfp_t flags; int pgcount = hv_root_partition ? 2 : 1; + void *mem; int ret; /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ @@ -372,25 +373,40 @@ int hv_common_cpu_init(unsigned int cpu) * allocated if this CPU was previously online and then taken offline */ if (!*inputarg) { - *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); - if (!(*inputarg)) + mem = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); + if (!mem) return -ENOMEM; if (hv_root_partition) { outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; + *outputarg = (char *)mem + HV_HYP_PAGE_SIZE; } if (hv_isolation_type_en_snp() || hv_isolation_type_tdx()) { - ret = set_memory_decrypted((unsigned long)*inputarg, pgcount); + ret = set_memory_decrypted((unsigned long)mem, pgcount); if (ret) { - /* It may be unsafe to free *inputarg */ - *inputarg = NULL; + /* It may be unsafe to free 'mem' */ return ret; } - memset(*inputarg, 0x00, pgcount * PAGE_SIZE); + memset(mem, 0x00, pgcount * HV_HYP_PAGE_SIZE); } + + /* + * In a fully enlightened TDX/SNP VM with more than 64 VPs, if + * hyperv_pcpu_input_arg is not NULL, set_memory_decrypted() -> + * ... -> cpa_flush()-> ... -> __send_ipi_mask_ex() tries to + * use hyperv_pcpu_input_arg as the hypercall input page, which + * must be a decrypted page in such a VM, but the page is still + * encrypted before set_memory_decrypted() returns. Fix this by + * setting *inputarg after the above set_memory_decrypted(): if + * hyperv_pcpu_input_arg is NULL, __send_ipi_mask_ex() returns + * HV_STATUS_INVALID_PARAMETER immediately, and the function + * hv_send_ipi_mask() falls back to orig_apic.send_IPI_mask(), + * which may be slightly slower than the hypercall, but still + * works correctly in such a VM. + */ + *inputarg = mem; } msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); -- cgit v1.2.3 From d3a9d7e49d15316f68f4347f48adcd1665834980 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:08 -0700 Subject: x86/hyperv: Introduce a global variable hyperv_paravisor_present The new variable hyperv_paravisor_present is set only when the VM is a SNP/TDX VM with the paravisor running: see ms_hyperv_init_platform(). We introduce hyperv_paravisor_present because we can not use ms_hyperv.paravisor_present in arch/x86/include/asm/mshyperv.h: struct ms_hyperv_info is defined in include/asm-generic/mshyperv.h, which is included at the end of arch/x86/include/asm/mshyperv.h, but at the beginning of arch/x86/include/asm/mshyperv.h, we would already need to use struct ms_hyperv_info in hv_do_hypercall(). We use hyperv_paravisor_present only in include/asm-generic/mshyperv.h, and use ms_hyperv.paravisor_present elsewhere. In the future, we'll introduce a hypercall function structure for different VM types, and at boot time, the right function pointers would be written into the structure so that runtime testing of TDX vs. SNP vs. normal will be avoided and hyperv_paravisor_present will no longer be needed. Call hv_vtom_init() when it's a VBS VM or when ms_hyperv.paravisor_present is true, i.e. the VM is a SNP VM or TDX VM with the paravisor. Enhance hv_vtom_init() for a TDX VM with the paravisor. In hv_common_cpu_init(), don't decrypt the hyperv_pcpu_input_arg for a TDX VM with the paravisor, just like we don't decrypt the page for a SNP VM with the paravisor. Signed-off-by: Dexuan Cui Reviewed-by: Tianyu Lan Reviewed-by: Michael Kelley Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-7-decui@microsoft.com --- arch/x86/hyperv/hv_init.c | 4 ++-- arch/x86/hyperv/ivm.c | 38 +++++++++++++++++++++++++++++++++++--- arch/x86/include/asm/mshyperv.h | 15 ++++++++++----- arch/x86/kernel/cpu/mshyperv.c | 9 +++++++-- drivers/hv/connection.c | 15 +++++++++++---- drivers/hv/hv.c | 10 +++++----- drivers/hv/hv_common.c | 3 ++- 7 files changed, 72 insertions(+), 22 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index c1c1b4e1502f..eca5c4b7e3b5 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -658,8 +658,8 @@ bool hv_is_hyperv_initialized(void) if (x86_hyper_type != X86_HYPER_MS_HYPERV) return false; - /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */ - if (hv_isolation_type_tdx()) + /* A TDX VM with no paravisor uses TDX GHCI call rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) return true; /* * Verify that earlier initialization succeeded by checking diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 6c7598d9e68a..7bd0359d5e38 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -248,6 +248,9 @@ void hv_ghcb_msr_read(u64 msr, u64 *value) } EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) /* * hv_mark_gpa_visibility - Set pages visible to host via hvcall. * @@ -368,6 +371,10 @@ static bool hv_is_private_mmio(u64 addr) return false; } +#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ + +#ifdef CONFIG_AMD_MEM_ENCRYPT + #define hv_populate_vmcb_seg(seg, gdtr_base) \ do { \ if (seg.selector) { \ @@ -495,15 +502,40 @@ int hv_snp_boot_ap(int cpu, unsigned long start_ip) return ret; } +#endif /* CONFIG_AMD_MEM_ENCRYPT */ + +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) + void __init hv_vtom_init(void) { + enum hv_isolation_type type = hv_get_isolation_type(); + + switch (type) { + case HV_ISOLATION_TYPE_VBS: + fallthrough; /* * By design, a VM using vTOM doesn't see the SEV setting, * so SEV initialization is bypassed and sev_status isn't set. * Set it here to indicate a vTOM VM. + * + * Note: if CONFIG_AMD_MEM_ENCRYPT is not set, sev_status is + * defined as 0ULL, to which we can't assigned a value. */ - sev_status = MSR_AMD64_SNP_VTOM; - cc_vendor = CC_VENDOR_AMD; +#ifdef CONFIG_AMD_MEM_ENCRYPT + case HV_ISOLATION_TYPE_SNP: + sev_status = MSR_AMD64_SNP_VTOM; + cc_vendor = CC_VENDOR_AMD; + break; +#endif + + case HV_ISOLATION_TYPE_TDX: + cc_vendor = CC_VENDOR_INTEL; + break; + + default: + panic("hv_vtom_init: unsupported isolation type %d\n", type); + } + cc_set_mask(ms_hyperv.shared_gpa_boundary); physical_mask &= ms_hyperv.shared_gpa_boundary - 1; @@ -516,7 +548,7 @@ void __init hv_vtom_init(void) mtrr_overwrite_state(NULL, 0, MTRR_TYPE_WRBACK); } -#endif /* CONFIG_AMD_MEM_ENCRYPT */ +#endif /* defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) */ enum hv_isolation_type hv_get_isolation_type(void) { diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 6a9e00c4730b..a9f453c39371 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -42,6 +42,7 @@ static inline unsigned char hv_get_nmi_reason(void) #if IS_ENABLED(CONFIG_HYPERV) extern int hyperv_init_cpuhp; +extern bool hyperv_paravisor_present; extern void *hv_hypercall_pg; @@ -75,7 +76,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx()) + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input_address, output_address); if (hv_isolation_type_en_snp()) { @@ -131,7 +132,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx()) + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, 0); if (hv_isolation_type_en_snp()) { @@ -185,7 +186,7 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) u64 hv_status; #ifdef CONFIG_X86_64 - if (hv_isolation_type_tdx()) + if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, input2); if (hv_isolation_type_en_snp()) { @@ -278,19 +279,23 @@ void hv_ghcb_msr_write(u64 msr, u64 value); void hv_ghcb_msr_read(u64 msr, u64 *value); bool hv_ghcb_negotiate_protocol(void); void __noreturn hv_ghcb_terminate(unsigned int set, unsigned int reason); -void hv_vtom_init(void); int hv_snp_boot_ap(int cpu, unsigned long start_ip); #else static inline void hv_ghcb_msr_write(u64 msr, u64 value) {} static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} static inline bool hv_ghcb_negotiate_protocol(void) { return false; } static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} -static inline void hv_vtom_init(void) {} static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } #endif extern bool hv_isolation_type_snp(void); +#if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) +void hv_vtom_init(void); +#else +static inline void hv_vtom_init(void) {} +#endif + static inline bool hv_is_synic_reg(unsigned int reg) { return (reg >= HV_REGISTER_SCONTROL) && diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index fe5393d759d3..4c5a174935ca 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -40,6 +40,10 @@ bool hv_root_partition; bool hv_nested; struct ms_hyperv_info ms_hyperv; +/* Used in modules via hv_do_hypercall(): see arch/x86/include/asm/mshyperv.h */ +bool hyperv_paravisor_present __ro_after_init; +EXPORT_SYMBOL_GPL(hyperv_paravisor_present); + #if IS_ENABLED(CONFIG_HYPERV) static inline unsigned int hv_get_nested_reg(unsigned int reg) { @@ -429,6 +433,8 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.shared_gpa_boundary = BIT_ULL(ms_hyperv.shared_gpa_boundary_bits); + hyperv_paravisor_present = !!ms_hyperv.paravisor_present; + pr_info("Hyper-V: Isolation Config: Group A 0x%x, Group B 0x%x\n", ms_hyperv.isolation_config_a, ms_hyperv.isolation_config_b); @@ -526,8 +532,7 @@ static void __init ms_hyperv_init_platform(void) #if IS_ENABLED(CONFIG_HYPERV) if ((hv_get_isolation_type() == HV_ISOLATION_TYPE_VBS) || - ((hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) && - ms_hyperv.paravisor_present)) + ms_hyperv.paravisor_present) hv_vtom_init(); /* * Setup the hook to get control post apic initialization. diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 02b54f85dc60..2dd972ca85dd 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -484,10 +484,17 @@ void vmbus_set_event(struct vmbus_channel *channel) ++channel->sig_events; - if (hv_isolation_type_snp()) - hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, - NULL, sizeof(channel->sig_event)); - else + if (ms_hyperv.paravisor_present) { + if (hv_isolation_type_snp()) + hv_ghcb_hypercall(HVCALL_SIGNAL_EVENT, &channel->sig_event, + NULL, sizeof(channel->sig_event)); + else if (hv_isolation_type_tdx()) + hv_tdx_hypercall(HVCALL_SIGNAL_EVENT | HV_HYPERCALL_FAST_BIT, + channel->sig_event, 0); + else + WARN_ON_ONCE(1); + } else { hv_do_fast_hypercall8(HVCALL_SIGNAL_EVENT, channel->sig_event); + } } EXPORT_SYMBOL_GPL(vmbus_set_event); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index d1064118a72f..48b1623112f0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -109,7 +109,7 @@ int hv_synic_alloc(void) * Synic message and event pages are allocated by paravisor. * Skip these pages allocation here. */ - if (!hv_isolation_type_snp() && !hv_root_partition) { + if (!ms_hyperv.paravisor_present && !hv_root_partition) { hv_cpu->synic_message_page = (void *)get_zeroed_page(GFP_ATOMIC); if (hv_cpu->synic_message_page == NULL) { @@ -226,7 +226,7 @@ void hv_synic_enable_regs(unsigned int cpu) simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); simp.simp_enabled = 1; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; @@ -245,7 +245,7 @@ void hv_synic_enable_regs(unsigned int cpu) siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled = 1; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { /* Mask out vTOM bit. ioremap_cache() maps decrypted */ u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) & ~ms_hyperv.shared_gpa_boundary; @@ -328,7 +328,7 @@ void hv_synic_disable_regs(unsigned int cpu) * addresses. */ simp.simp_enabled = 0; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { iounmap(hv_cpu->synic_message_page); hv_cpu->synic_message_page = NULL; } else { @@ -340,7 +340,7 @@ void hv_synic_disable_regs(unsigned int cpu) siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled = 0; - if (hv_isolation_type_snp() || hv_root_partition) { + if (ms_hyperv.paravisor_present || hv_root_partition) { iounmap(hv_cpu->synic_event_page); hv_cpu->synic_event_page = NULL; } else { diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 4c858e1636da..e62d64753902 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -382,7 +382,8 @@ int hv_common_cpu_init(unsigned int cpu) *outputarg = (char *)mem + HV_HYP_PAGE_SIZE; } - if (hv_isolation_type_en_snp() || hv_isolation_type_tdx()) { + if (!ms_hyperv.paravisor_present && + (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long)mem, pgcount); if (ret) { /* It may be unsafe to free 'mem' */ -- cgit v1.2.3 From 23378295042a4bcaeec350733a4771678e7a1f3a Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:09 -0700 Subject: Drivers: hv: vmbus: Bring the post_msg_page back for TDX VMs with the paravisor The post_msg_page was removed in commit 9a6b1a170ca8 ("Drivers: hv: vmbus: Remove the per-CPU post_msg_page") However, it turns out that we need to bring it back, but only for a TDX VM with the paravisor: in such a VM, the hyperv_pcpu_input_arg is not decrypted, but the HVCALL_POST_MESSAGE in such a VM needs a decrypted page as the hypercall input page: see the comments in hyperv_init() for a detailed explanation. Except for HVCALL_POST_MESSAGE and HVCALL_SIGNAL_EVENT, the other hypercalls in a TDX VM with the paravisor still use hv_hypercall_pg and must use the hyperv_pcpu_input_arg (which is encrypted in such a VM), when a hypercall input page is used. Signed-off-by: Dexuan Cui Reviewed-by: Tianyu Lan Reviewed-by: Michael Kelley Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-8-decui@microsoft.com --- arch/x86/hyperv/hv_init.c | 20 ++++++++++++++-- drivers/hv/hv.c | 59 ++++++++++++++++++++++++++++++++++++++++++----- drivers/hv/hyperv_vmbus.h | 11 +++++++++ 3 files changed, 82 insertions(+), 8 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index eca5c4b7e3b5..3729eee21e47 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -480,6 +480,22 @@ void __init hyperv_init(void) * Setup the hypercall page and enable hypercalls. * 1. Register the guest ID * 2. Enable the hypercall and register the hypercall page + * + * A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg: + * when the hypercall input is a page, such a VM must pass a decrypted + * page to Hyper-V, e.g. hv_post_message() uses the per-CPU page + * hyperv_pcpu_input_arg, which is decrypted if no paravisor is present. + * + * A TDX VM with the paravisor uses hv_hypercall_pg for most hypercalls, + * which are handled by the paravisor and the VM must use an encrypted + * input page: in such a VM, the hyperv_pcpu_input_arg is encrypted and + * used in the hypercalls, e.g. see hv_mark_gpa_visibility() and + * hv_arch_irq_unmask(). Such a VM uses TDX GHCI for two hypercalls: + * 1. HVCALL_SIGNAL_EVENT: see vmbus_set_event() and _hv_do_fast_hypercall8(). + * 2. HVCALL_POST_MESSAGE: the input page must be a decrypted page, i.e. + * hv_post_message() in such a VM can't use the encrypted hyperv_pcpu_input_arg; + * instead, hv_post_message() uses the post_msg_page, which is decrypted + * in such a VM and is only used in such a VM. */ guest_id = hv_generate_guest_id(LINUX_VERSION_CODE); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); @@ -487,8 +503,8 @@ void __init hyperv_init(void) /* Hyper-V requires to write guest os id via ghcb in SNP IVM. */ hv_ghcb_msr_write(HV_X64_MSR_GUEST_OS_ID, guest_id); - /* A TDX guest uses the GHCI call rather than hv_hypercall_pg. */ - if (hv_isolation_type_tdx()) + /* A TDX VM with no paravisor only uses TDX GHCI rather than hv_hypercall_pg */ + if (hv_isolation_type_tdx() && !ms_hyperv.paravisor_present) goto skip_hypercall_pg_init; hv_hypercall_pg = __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 48b1623112f0..523c5d99f375 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -57,20 +57,37 @@ int hv_post_message(union hv_connection_id connection_id, local_irq_save(flags); - aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); + /* + * A TDX VM with the paravisor must use the decrypted post_msg_page: see + * the comment in struct hv_per_cpu_context. A SNP VM with the paravisor + * can use the encrypted hyperv_pcpu_input_arg because it copies the + * input into the GHCB page, which has been decrypted by the paravisor. + */ + if (hv_isolation_type_tdx() && ms_hyperv.paravisor_present) + aligned_msg = this_cpu_ptr(hv_context.cpu_context)->post_msg_page; + else + aligned_msg = *this_cpu_ptr(hyperv_pcpu_input_arg); + aligned_msg->connectionid = connection_id; aligned_msg->reserved = 0; aligned_msg->message_type = message_type; aligned_msg->payload_size = payload_size; memcpy((void *)aligned_msg->payload, payload, payload_size); - if (hv_isolation_type_snp()) - status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE, - (void *)aligned_msg, NULL, - sizeof(*aligned_msg)); - else + if (ms_hyperv.paravisor_present) { + if (hv_isolation_type_tdx()) + status = hv_tdx_hypercall(HVCALL_POST_MESSAGE, + virt_to_phys(aligned_msg), 0); + else if (hv_isolation_type_snp()) + status = hv_ghcb_hypercall(HVCALL_POST_MESSAGE, + aligned_msg, NULL, + sizeof(*aligned_msg)); + else + status = HV_STATUS_INVALID_PARAMETER; + } else { status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); + } local_irq_restore(flags); @@ -105,6 +122,24 @@ int hv_synic_alloc(void) tasklet_init(&hv_cpu->msg_dpc, vmbus_on_msg_dpc, (unsigned long) hv_cpu); + if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { + hv_cpu->post_msg_page = (void *)get_zeroed_page(GFP_ATOMIC); + if (hv_cpu->post_msg_page == NULL) { + pr_err("Unable to allocate post msg page\n"); + goto err; + } + + ret = set_memory_decrypted((unsigned long)hv_cpu->post_msg_page, 1); + if (ret) { + pr_err("Failed to decrypt post msg page: %d\n", ret); + /* Just leak the page, as it's unsafe to free the page. */ + hv_cpu->post_msg_page = NULL; + goto err; + } + + memset(hv_cpu->post_msg_page, 0, PAGE_SIZE); + } + /* * Synic message and event pages are allocated by paravisor. * Skip these pages allocation here. @@ -178,6 +213,17 @@ void hv_synic_free(void) = per_cpu_ptr(hv_context.cpu_context, cpu); /* It's better to leak the page if the encryption fails. */ + if (ms_hyperv.paravisor_present && hv_isolation_type_tdx()) { + if (hv_cpu->post_msg_page) { + ret = set_memory_encrypted((unsigned long) + hv_cpu->post_msg_page, 1); + if (ret) { + pr_err("Failed to encrypt post msg page: %d\n", ret); + hv_cpu->post_msg_page = NULL; + } + } + } + if (!ms_hyperv.paravisor_present && (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { if (hv_cpu->synic_message_page) { @@ -199,6 +245,7 @@ void hv_synic_free(void) } } + free_page((unsigned long)hv_cpu->post_msg_page); free_page((unsigned long)hv_cpu->synic_event_page); free_page((unsigned long)hv_cpu->synic_message_page); } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 55f2086841ae..f6b1e710f805 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -123,6 +123,17 @@ struct hv_per_cpu_context { void *synic_message_page; void *synic_event_page; + /* + * The page is only used in hv_post_message() for a TDX VM (with the + * paravisor) to post a messages to Hyper-V: when such a VM calls + * HVCALL_POST_MESSAGE, it can't use the hyperv_pcpu_input_arg (which + * is encrypted in such a VM) as the hypercall input page, because + * the input page for HVCALL_POST_MESSAGE must be decrypted in such a + * VM, so post_msg_page (which is decrypted in hv_synic_alloc()) is + * introduced for this purpose. See hyperv_init() for more comments. + */ + void *post_msg_page; + /* * Starting with win8, we can take channel interrupts on any CPU; * we will manage the tasklet that handles events messages on a per CPU -- cgit v1.2.3 From e3131f1c81448a87e08dffd21867312a5ce563d9 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 24 Aug 2023 01:07:11 -0700 Subject: x86/hyperv: Remove hv_isolation_type_en_snp In ms_hyperv_init_platform(), do not distinguish between a SNP VM with the paravisor and a SNP VM without the paravisor. Replace hv_isolation_type_en_snp() with !ms_hyperv.paravisor_present && hv_isolation_type_snp(). The hv_isolation_type_en_snp() in drivers/hv/hv.c and drivers/hv/hv_common.c can be changed to hv_isolation_type_snp() since we know !ms_hyperv.paravisor_present is true there. Signed-off-by: Dexuan Cui Reviewed-by: Michael Kelley Reviewed-by: Tianyu Lan Signed-off-by: Wei Liu Link: https://lore.kernel.org/r/20230824080712.30327-10-decui@microsoft.com --- arch/x86/hyperv/hv_init.c | 8 ++++---- arch/x86/hyperv/ivm.c | 12 +----------- arch/x86/include/asm/mshyperv.h | 11 ++++------- arch/x86/kernel/cpu/mshyperv.c | 10 ++++------ drivers/hv/hv.c | 4 ++-- drivers/hv/hv_common.c | 8 +------- include/asm-generic/mshyperv.h | 3 +-- 7 files changed, 17 insertions(+), 39 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index c4cffa3b1c3c..2b0124394e24 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -52,7 +52,7 @@ static int hyperv_init_ghcb(void) void *ghcb_va; void **ghcb_base; - if (!hv_isolation_type_snp()) + if (!ms_hyperv.paravisor_present || !hv_isolation_type_snp()) return 0; if (!hv_ghcb_pg) @@ -117,7 +117,7 @@ static int hv_cpu_init(unsigned int cpu) * is blocked to run in Confidential VM. So only decrypt assist * page in non-root partition here. */ - if (*hvp && hv_isolation_type_en_snp()) { + if (*hvp && !ms_hyperv.paravisor_present && hv_isolation_type_snp()) { WARN_ON_ONCE(set_memory_decrypted((unsigned long)(*hvp), 1)); memset(*hvp, 0, PAGE_SIZE); } @@ -460,7 +460,7 @@ void __init hyperv_init(void) goto common_free; } - if (hv_isolation_type_snp()) { + if (ms_hyperv.paravisor_present && hv_isolation_type_snp()) { /* Negotiate GHCB Version. */ if (!hv_ghcb_negotiate_protocol()) hv_ghcb_terminate(SEV_TERM_SET_GEN, @@ -583,7 +583,7 @@ skip_hypercall_pg_init: hv_query_ext_cap(0); /* Find the VTL */ - if (hv_isolation_type_en_snp()) + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) ms_hyperv.vtl = get_vtl(); return; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index fbc07493fcb4..3d48f823582c 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -637,7 +637,7 @@ bool hv_is_isolation_supported(void) DEFINE_STATIC_KEY_FALSE(isolation_type_snp); /* - * hv_isolation_type_snp - Check system runs in the AMD SEV-SNP based + * hv_isolation_type_snp - Check if the system runs in an AMD SEV-SNP based * isolation VM. */ bool hv_isolation_type_snp(void) @@ -645,16 +645,6 @@ bool hv_isolation_type_snp(void) return static_branch_unlikely(&isolation_type_snp); } -DEFINE_STATIC_KEY_FALSE(isolation_type_en_snp); -/* - * hv_isolation_type_en_snp - Check system runs in the AMD SEV-SNP based - * isolation enlightened VM. - */ -bool hv_isolation_type_en_snp(void) -{ - return static_branch_unlikely(&isolation_type_en_snp); -} - DEFINE_STATIC_KEY_FALSE(isolation_type_tdx); /* * hv_isolation_type_tdx - Check if the system runs in an Intel TDX based diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 101f71b85cfd..66ca641a164a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -26,7 +26,6 @@ union hv_ghcb; DECLARE_STATIC_KEY_FALSE(isolation_type_snp); -DECLARE_STATIC_KEY_FALSE(isolation_type_en_snp); DECLARE_STATIC_KEY_FALSE(isolation_type_tdx); typedef int (*hyperv_fill_flush_list_func)( @@ -50,7 +49,7 @@ extern u64 hv_current_partition_id; extern union hv_ghcb * __percpu *hv_ghcb_pg; -extern bool hv_isolation_type_en_snp(void); +bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2); @@ -79,7 +78,7 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input_address, output_address); - if (hv_isolation_type_en_snp()) { + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { __asm__ __volatile__("mov %4, %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -135,7 +134,7 @@ static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1) if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, 0); - if (hv_isolation_type_en_snp()) { + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { __asm__ __volatile__( "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -189,7 +188,7 @@ static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2) if (hv_isolation_type_tdx() && !hyperv_paravisor_present) return hv_tdx_hypercall(control, input1, input2); - if (hv_isolation_type_en_snp()) { + if (hv_isolation_type_snp() && !hyperv_paravisor_present) { __asm__ __volatile__("mov %4, %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -284,8 +283,6 @@ static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {} static inline int hv_snp_boot_ap(int cpu, unsigned long start_ip) { return 0; } #endif -extern bool hv_isolation_type_snp(void); - #if defined(CONFIG_AMD_MEM_ENCRYPT) || defined(CONFIG_INTEL_TDX_GUEST) void hv_vtom_init(void); void hv_ivm_msr_write(u64 msr, u64 value); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 4f51dac9eeb2..b63590ffc777 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -304,7 +304,7 @@ static void __init hv_smp_prepare_cpus(unsigned int max_cpus) * Override wakeup_secondary_cpu_64 callback for SEV-SNP * enlightened guest. */ - if (hv_isolation_type_en_snp()) { + if (!ms_hyperv.paravisor_present && hv_isolation_type_snp()) { apic->wakeup_secondary_cpu_64 = hv_snp_boot_ap; return; } @@ -440,10 +440,7 @@ static void __init ms_hyperv_init_platform(void) if (hv_get_isolation_type() == HV_ISOLATION_TYPE_SNP) { - if (ms_hyperv.paravisor_present) - static_branch_enable(&isolation_type_snp); - else - static_branch_enable(&isolation_type_en_snp); + static_branch_enable(&isolation_type_snp); } else if (hv_get_isolation_type() == HV_ISOLATION_TYPE_TDX) { static_branch_enable(&isolation_type_tdx); @@ -556,7 +553,8 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; - if (hv_root_partition || hv_isolation_type_en_snp()) + if (hv_root_partition || + (!ms_hyperv.paravisor_present && hv_isolation_type_snp())) smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus; # endif diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 523c5d99f375..51e5018ac9b2 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -164,7 +164,7 @@ int hv_synic_alloc(void) } if (!ms_hyperv.paravisor_present && - (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { + (hv_isolation_type_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long) hv_cpu->synic_message_page, 1); if (ret) { @@ -225,7 +225,7 @@ void hv_synic_free(void) } if (!ms_hyperv.paravisor_present && - (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { + (hv_isolation_type_snp() || hv_isolation_type_tdx())) { if (hv_cpu->synic_message_page) { ret = set_memory_encrypted((unsigned long) hv_cpu->synic_message_page, 1); diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index e62d64753902..81aa8be3e0df 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -383,7 +383,7 @@ int hv_common_cpu_init(unsigned int cpu) } if (!ms_hyperv.paravisor_present && - (hv_isolation_type_en_snp() || hv_isolation_type_tdx())) { + (hv_isolation_type_snp() || hv_isolation_type_tdx())) { ret = set_memory_decrypted((unsigned long)mem, pgcount); if (ret) { /* It may be unsafe to free 'mem' */ @@ -532,12 +532,6 @@ bool __weak hv_isolation_type_snp(void) } EXPORT_SYMBOL_GPL(hv_isolation_type_snp); -bool __weak hv_isolation_type_en_snp(void) -{ - return false; -} -EXPORT_SYMBOL_GPL(hv_isolation_type_en_snp); - bool __weak hv_isolation_type_tdx(void) { return false; diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index f577eff58ea0..e7ecf03f675e 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -64,8 +64,7 @@ extern void * __percpu *hyperv_pcpu_output_arg; extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); -extern bool hv_isolation_type_snp(void); -extern bool hv_isolation_type_en_snp(void); +bool hv_isolation_type_snp(void); bool hv_isolation_type_tdx(void); /* Helper functions that provide a consistent pattern for checking Hyper-V hypercall status. */ -- cgit v1.2.3