From 019b383d1132e4051de0d2e43254454b86538cf4 Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Thu, 10 Aug 2023 19:12:45 -0700 Subject: x86/tdx: Retry partially-completed page conversion hypercalls TDX guest memory is private by default and the VMM may not access it. However, in cases where the guest needs to share data with the VMM, the guest and the VMM can coordinate to make memory shared between them. The guest side of this protocol includes the "MapGPA" hypercall. This call takes a guest physical address range. The hypercall spec (aka. the GHCI) says that the MapGPA call is allowed to return partial progress in mapping this range and indicate that fact with a special error code. A guest that sees such partial progress is expected to retry the operation for the portion of the address range that was not completed. Hyper-V does this partial completion dance when set_memory_decrypted() is called to "decrypt" swiotlb bounce buffers that can be up to 1GB in size. It is evidently the only VMM that does this, which is why nobody noticed this until now. [ dhansen: rewrite changelog ] Signed-off-by: Dexuan Cui Signed-off-by: Dave Hansen Reviewed-by: Michael Kelley Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Kirill A. Shutemov Link: https://lore.kernel.org/all/20230811021246.821-2-decui%40microsoft.com --- arch/x86/coco/tdx/tdx.c | 64 +++++++++++++++++++++++++++++++-------- arch/x86/include/asm/shared/tdx.h | 2 ++ 2 files changed, 54 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 1d6b863c42b0..746075d20cd2 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -703,14 +703,15 @@ static bool tdx_cache_flush_required(void) } /* - * Inform the VMM of the guest's intent for this physical page: shared with - * the VMM or private to the guest. The VMM is expected to change its mapping - * of the page in response. + * Notify the VMM about page mapping conversion. More info about ABI + * can be found in TDX Guest-Host-Communication Interface (GHCI), + * section "TDG.VP.VMCALL". */ -static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) +static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc) { - phys_addr_t start = __pa(vaddr); - phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); + /* Retrying the hypercall a second time should succeed; use 3 just in case */ + const int max_retries_per_page = 3; + int retry_count = 0; if (!enc) { /* Set the shared (decrypted) bits: */ @@ -718,12 +719,51 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) end |= cc_mkdec(0); } - /* - * Notify the VMM about page mapping conversion. More info about ABI - * can be found in TDX Guest-Host-Communication Interface (GHCI), - * section "TDG.VP.VMCALL" - */ - if (_tdx_hypercall(TDVMCALL_MAP_GPA, start, end - start, 0, 0)) + while (retry_count < max_retries_per_page) { + struct tdx_hypercall_args args = { + .r10 = TDX_HYPERCALL_STANDARD, + .r11 = TDVMCALL_MAP_GPA, + .r12 = start, + .r13 = end - start }; + + u64 map_fail_paddr; + u64 ret = __tdx_hypercall_ret(&args); + + if (ret != TDVMCALL_STATUS_RETRY) + return !ret; + /* + * The guest must retry the operation for the pages in the + * region starting at the GPA specified in R11. R11 comes + * from the untrusted VMM. Sanity check it. + */ + map_fail_paddr = args.r11; + if (map_fail_paddr < start || map_fail_paddr >= end) + return false; + + /* "Consume" a retry without forward progress */ + if (map_fail_paddr == start) { + retry_count++; + continue; + } + + start = map_fail_paddr; + retry_count = 0; + } + + return false; +} + +/* + * Inform the VMM of the guest's intent for this physical page: shared with + * the VMM or private to the guest. The VMM is expected to change its mapping + * of the page in response. + */ +static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) +{ + phys_addr_t start = __pa(vaddr); + phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); + + if (!tdx_map_gpa(start, end, enc)) return false; /* shared->private conversion requires memory to be accepted before use */ diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 7513b3bb69b7..22ee23a3f24a 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -24,6 +24,8 @@ #define TDVMCALL_MAP_GPA 0x10001 #define TDVMCALL_REPORT_FATAL_ERROR 0x10003 +#define TDVMCALL_STATUS_RETRY 1 + #ifndef __ASSEMBLY__ /* -- cgit v1.2.3 From 5d092b66119d774853cc9308522620299048a662 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:01:55 +1200 Subject: x86/tdx: Zero out the missing RSI in TDX_HYPERCALL macro In the TDX_HYPERCALL asm, after the TDCALL instruction returns from the untrusted VMM, the registers that the TDX guest shares to the VMM need to be cleared to avoid speculative execution of VMM-provided values. RSI is specified in the bitmap of those registers, but it is missing when zeroing out those registers in the current TDX_HYPERCALL. It was there when it was originally added in commit 752d13305c78 ("x86/tdx: Expand __tdx_hypercall() to handle more arguments"), but was later removed in commit 1e70c680375a ("x86/tdx: Do not corrupt frame-pointer in __tdx_hypercall()"), which was correct because %rsi is later restored in the "pop %rsi". However a later commit 7a3a401874be ("x86/tdx: Drop flags from __tdx_hypercall()") removed that "pop %rsi" but forgot to add the "xor %rsi, %rsi" back. Fix by adding it back. Fixes: 7a3a401874be ("x86/tdx: Drop flags from __tdx_hypercall()") Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/e7d1157074a0b45d34564d5f17f3e0ffee8115e9.1692096753.git.kai.huang%40intel.com --- arch/x86/coco/tdx/tdcall.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index b193c0a1d8db..2eca5f43734f 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -195,6 +195,7 @@ SYM_FUNC_END(__tdx_module_call) xor %r10d, %r10d xor %r11d, %r11d xor %rdi, %rdi + xor %rsi, %rsi xor %rdx, %rdx /* Restore callee-saved GPRs as mandated by the x86_64 ABI */ -- cgit v1.2.3 From 03a423d40cb30e0e1cb77a801acb56ddb0bf6f5e Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:01:56 +1200 Subject: x86/tdx: Skip saving output regs when SEAMCALL fails with VMFailInvalid If SEAMCALL fails with VMFailInvalid, the SEAM software (e.g., the TDX module) won't have chance to set any output register. Skip saving the output registers to the structure in this case. Also, as '.Lno_output_struct' is the very last symbol before RET, rename it to '.Lout' to make it short. Opportunistically make the asm directives unindented. Suggested-by: Peter Zijlstra Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/704088f5b4d72c7e24084f7f15bd1ac5005b7213.1692096753.git.kai.huang%40intel.com --- arch/x86/coco/tdx/tdcall.S | 3 --- arch/x86/virt/vmx/tdx/tdxcall.S | 29 ++++++++++++++++++++--------- 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index 2eca5f43734f..e5d4b7d8ecd4 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -78,10 +78,7 @@ * Return status of TDCALL via RAX. */ SYM_FUNC_START(__tdx_module_call) - FRAME_BEGIN TDX_MODULE_CALL host=0 - FRAME_END - RET SYM_FUNC_END(__tdx_module_call) /* diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S index 49a54356ae99..6bdf6e137953 100644 --- a/arch/x86/virt/vmx/tdx/tdxcall.S +++ b/arch/x86/virt/vmx/tdx/tdxcall.S @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include #include /* @@ -18,6 +19,7 @@ * TDX module. */ .macro TDX_MODULE_CALL host:req + FRAME_BEGIN /* * R12 will be used as temporary storage for struct tdx_module_output * pointer. Since R12-R15 registers are not used by TDCALL/SEAMCALL @@ -44,7 +46,7 @@ mov %rsi, %rcx /* Leave input param 2 in RDX */ - .if \host +.if \host seamcall /* * SEAMCALL instruction is essentially a VMExit from VMX root @@ -57,13 +59,10 @@ * This value will never be used as actual SEAMCALL error code as * it is from the Reserved status code class. */ - jnc .Lno_vmfailinvalid - mov $TDX_SEAMCALL_VMFAILINVALID, %rax -.Lno_vmfailinvalid: - - .else + jc .Lseamcall_vmfailinvalid +.else tdcall - .endif +.endif /* * Fetch output pointer from stack to R12 (It is used @@ -80,7 +79,7 @@ * Other registers may contain details of the failure. */ test %r12, %r12 - jz .Lno_output_struct + jz .Lout /* Copy result registers to output struct: */ movq %rcx, TDX_MODULE_rcx(%r12) @@ -90,7 +89,19 @@ movq %r10, TDX_MODULE_r10(%r12) movq %r11, TDX_MODULE_r11(%r12) -.Lno_output_struct: +.Lout: /* Restore the state of R12 register */ pop %r12 + + FRAME_END + RET + +.if \host +.Lseamcall_vmfailinvalid: + mov $TDX_SEAMCALL_VMFAILINVALID, %rax + /* pop the unused output pointer back to %r9 */ + pop %r9 + jmp .Lout +.endif /* \host */ + .endm -- cgit v1.2.3 From f0024dbfc48d8814d915eb5bd5253496b9b8a6df Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:01:57 +1200 Subject: x86/tdx: Make macros of TDCALLs consistent with the spec The TDX spec names all TDCALLs with prefix "TDG". Currently, the kernel doesn't follow such convention for the macros of those TDCALLs but uses prefix "TDX_" for all of them. Although it's arguable whether the TDX spec names those TDCALLs properly, it's better for the kernel to follow the spec when naming those macros. Change all macros of TDCALLs to make them consistent with the spec. As a bonus, they get distinguished easily from the host-side SEAMCALLs, which all have prefix "TDH". No functional change intended. Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/516dccd0bd8fb9a0b6af30d25bb2d971aa03d598.1692096753.git.kai.huang%40intel.com --- arch/x86/coco/tdx/tdx-shared.c | 4 ++-- arch/x86/coco/tdx/tdx.c | 8 ++++---- arch/x86/include/asm/shared/tdx.h | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c index ef20ddc37b58..f10cd3e4a04e 100644 --- a/arch/x86/coco/tdx/tdx-shared.c +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -35,7 +35,7 @@ static unsigned long try_accept_one(phys_addr_t start, unsigned long len, } tdcall_rcx = start | page_size; - if (__tdx_module_call(TDX_ACCEPT_PAGE, tdcall_rcx, 0, 0, 0, NULL)) + if (__tdx_module_call(TDG_MEM_PAGE_ACCEPT, tdcall_rcx, 0, 0, 0, NULL)) return 0; return accept_size; @@ -45,7 +45,7 @@ bool tdx_accept_memory(phys_addr_t start, phys_addr_t end) { /* * For shared->private conversion, accept the page using - * TDX_ACCEPT_PAGE TDX module call. + * TDG_MEM_PAGE_ACCEPT TDX module call. */ while (start < end) { unsigned long len = end - start; diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 746075d20cd2..dd16bf1ec67a 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -91,7 +91,7 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport) { u64 ret; - ret = __tdx_module_call(TDX_GET_REPORT, virt_to_phys(tdreport), + ret = __tdx_module_call(TDG_MR_REPORT, virt_to_phys(tdreport), virt_to_phys(reportdata), TDREPORT_SUBTYPE_0, 0, NULL); if (ret) { @@ -152,7 +152,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask) * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL * [TDG.VP.INFO]. */ - tdx_module_call(TDX_GET_INFO, 0, 0, 0, 0, &out); + tdx_module_call(TDG_VP_INFO, 0, 0, 0, 0, &out); /* * The highest bit of a guest physical address is the "sharing" bit. @@ -594,7 +594,7 @@ void tdx_get_ve_info(struct ve_info *ve) * Note, the TDX module treats virtual NMIs as inhibited if the #VE * valid flag is set. It means that NMI=>#VE will not result in a #DF. */ - tdx_module_call(TDX_GET_VEINFO, 0, 0, 0, 0, &out); + tdx_module_call(TDG_VP_VEINFO_GET, 0, 0, 0, 0, &out); /* Transfer the output parameters */ ve->exit_reason = out.rcx; @@ -814,7 +814,7 @@ void __init tdx_early_init(void) cc_set_mask(cc_mask); /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */ - tdx_module_call(TDX_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL); + tdx_module_call(TDG_VM_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL); /* * All bits above GPA width are reserved and kernel treats shared bit diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 22ee23a3f24a..05874a58c8b0 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -11,11 +11,11 @@ #define TDX_IDENT "IntelTDX " /* TDX module Call Leaf IDs */ -#define TDX_GET_INFO 1 -#define TDX_GET_VEINFO 3 -#define TDX_GET_REPORT 4 -#define TDX_ACCEPT_PAGE 6 -#define TDX_WR 8 +#define TDG_VP_INFO 1 +#define TDG_VP_VEINFO_GET 3 +#define TDG_MR_REPORT 4 +#define TDG_MEM_PAGE_ACCEPT 6 +#define TDG_VM_WR 8 /* TDCS fields. To be used by TDG.VM.WR and TDG.VM.RD module calls */ #define TDCS_NOTIFY_ENABLES 0x9100000000000010 -- cgit v1.2.3 From 5efb96289e581c187af1bc288ce5d26ed6181749 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:01:58 +1200 Subject: x86/tdx: Rename __tdx_module_call() to __tdcall() __tdx_module_call() is only used by the TDX guest to issue TDCALL to the TDX module. Rename it to __tdcall() to match its behaviour, e.g., it cannot be used to make host-side SEAMCALL. Also rename tdx_module_call() which is a wrapper of __tdx_module_call() to tdcall(). No functional change intended. Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Reviewed-by: Kuppuswamy Sathyanarayanan Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/785d20d99fbcd0db8262c94da6423375422d8c75.1692096753.git.kai.huang%40intel.com --- arch/x86/coco/tdx/tdcall.S | 10 +++++----- arch/x86/coco/tdx/tdx-shared.c | 2 +- arch/x86/coco/tdx/tdx.c | 18 +++++++++--------- arch/x86/include/asm/shared/tdx.h | 4 ++-- 4 files changed, 17 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index e5d4b7d8ecd4..6aebac08f2bf 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -40,8 +40,8 @@ .section .noinstr.text, "ax" /* - * __tdx_module_call() - Used by TDX guests to request services from - * the TDX module (does not include VMM services) using TDCALL instruction. + * __tdcall() - Used by TDX guests to request services from the TDX + * module (does not include VMM services) using TDCALL instruction. * * Transforms function call register arguments into the TDCALL register ABI. * After TDCALL operation, TDX module output is saved in @out (if it is @@ -62,7 +62,7 @@ * *------------------------------------------------------------------------- * - * __tdx_module_call() function ABI: + * __tdcall() function ABI: * * @fn (RDI) - TDCALL Leaf ID, moved to RAX * @rcx (RSI) - Input parameter 1, moved to RCX @@ -77,9 +77,9 @@ * * Return status of TDCALL via RAX. */ -SYM_FUNC_START(__tdx_module_call) +SYM_FUNC_START(__tdcall) TDX_MODULE_CALL host=0 -SYM_FUNC_END(__tdx_module_call) +SYM_FUNC_END(__tdcall) /* * TDX_HYPERCALL - Make hypercalls to a TDX VMM using TDVMCALL leaf of TDCALL diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c index f10cd3e4a04e..90631abdac34 100644 --- a/arch/x86/coco/tdx/tdx-shared.c +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -35,7 +35,7 @@ static unsigned long try_accept_one(phys_addr_t start, unsigned long len, } tdcall_rcx = start | page_size; - if (__tdx_module_call(TDG_MEM_PAGE_ACCEPT, tdcall_rcx, 0, 0, 0, NULL)) + if (__tdcall(TDG_MEM_PAGE_ACCEPT, tdcall_rcx, 0, 0, 0, NULL)) return 0; return accept_size; diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index dd16bf1ec67a..9833c8140fca 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -66,10 +66,10 @@ EXPORT_SYMBOL_GPL(tdx_kvm_hypercall); * should only be used for calls that have no legitimate reason to fail * or where the kernel can not survive the call failing. */ -static inline void tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, - struct tdx_module_output *out) +static inline void tdcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, + struct tdx_module_output *out) { - if (__tdx_module_call(fn, rcx, rdx, r8, r9, out)) + if (__tdcall(fn, rcx, rdx, r8, r9, out)) panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); } @@ -91,9 +91,9 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport) { u64 ret; - ret = __tdx_module_call(TDG_MR_REPORT, virt_to_phys(tdreport), - virt_to_phys(reportdata), TDREPORT_SUBTYPE_0, - 0, NULL); + ret = __tdcall(TDG_MR_REPORT, virt_to_phys(tdreport), + virt_to_phys(reportdata), TDREPORT_SUBTYPE_0, + 0, NULL); if (ret) { if (TDCALL_RETURN_CODE(ret) == TDCALL_INVALID_OPERAND) return -EINVAL; @@ -152,7 +152,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask) * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL * [TDG.VP.INFO]. */ - tdx_module_call(TDG_VP_INFO, 0, 0, 0, 0, &out); + tdcall(TDG_VP_INFO, 0, 0, 0, 0, &out); /* * The highest bit of a guest physical address is the "sharing" bit. @@ -594,7 +594,7 @@ void tdx_get_ve_info(struct ve_info *ve) * Note, the TDX module treats virtual NMIs as inhibited if the #VE * valid flag is set. It means that NMI=>#VE will not result in a #DF. */ - tdx_module_call(TDG_VP_VEINFO_GET, 0, 0, 0, 0, &out); + tdcall(TDG_VP_VEINFO_GET, 0, 0, 0, 0, &out); /* Transfer the output parameters */ ve->exit_reason = out.rcx; @@ -814,7 +814,7 @@ void __init tdx_early_init(void) cc_set_mask(cc_mask); /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */ - tdx_module_call(TDG_VM_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL); + tdcall(TDG_VM_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL); /* * All bits above GPA width are reserved and kernel treats shared bit diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 05874a58c8b0..c11c0d930fed 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -90,8 +90,8 @@ struct tdx_module_output { }; /* Used to communicate with the TDX module */ -u64 __tdx_module_call(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, - struct tdx_module_output *out); +u64 __tdcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, + struct tdx_module_output *out); bool tdx_accept_memory(phys_addr_t start, phys_addr_t end); -- cgit v1.2.3 From 57a420bb8186d1d0178b857e5dd5026093641654 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:01:59 +1200 Subject: x86/tdx: Pass TDCALL/SEAMCALL input/output registers via a structure Currently, the TDX_MODULE_CALL asm macro, which handles both TDCALL and SEAMCALL, takes one parameter for each input register and an optional 'struct tdx_module_output' (a collection of output registers) as output. This is different from the TDX_HYPERCALL macro which uses a single 'struct tdx_hypercall_args' to carry all input/output registers. The newer TDX versions introduce more TDCALLs/SEAMCALLs which use more input/output registers. Also, the TDH.VP.ENTER (which isn't covered by the current TDX_MODULE_CALL macro) basically can use all registers that the TDX_HYPERCALL does. The current TDX_MODULE_CALL macro isn't extendible to cover those cases. Similar to the TDX_HYPERCALL macro, simplify the TDX_MODULE_CALL macro to use a single structure 'struct tdx_module_args' to carry all the input/output registers. Currently, R10/R11 are only used as output register but not as input by any TDCALL/SEAMCALL. Change to also use R10/R11 as input register to make input/output registers symmetric. Currently, the TDX_MODULE_CALL macro depends on the caller to pass a non-NULL 'struct tdx_module_output' to get additional output registers. Similar to the TDX_HYPERCALL macro, change the TDX_MODULE_CALL macro to take a new 'ret' macro argument to indicate whether to save the output registers to the 'struct tdx_module_args'. Also introduce a new __tdcall_ret() for that purpose, similar to the __tdx_hypercall_ret(). Note the tdcall(), which is a wrapper of __tdcall(), is called by three callers: tdx_parse_tdinfo(), tdx_get_ve_info() and tdx_early_init(). The former two need the additional output but the last one doesn't. For simplicity, make tdcall() always call __tdcall_ret() to avoid another "_ret()" wrapper. The last caller tdx_early_init() isn't performance critical anyway. Suggested-by: Peter Zijlstra Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/483616c1762d85eb3a3c3035a7de061cfacf2f14.1692096753.git.kai.huang%40intel.com --- arch/x86/coco/tdx/tdcall.S | 47 ++++++++----------- arch/x86/coco/tdx/tdx-shared.c | 6 +-- arch/x86/coco/tdx/tdx.c | 44 ++++++++++-------- arch/x86/include/asm/shared/tdx.h | 8 ++-- arch/x86/kernel/asm-offsets.c | 12 ++--- arch/x86/virt/vmx/tdx/tdxcall.S | 95 ++++++++++++++++----------------------- 6 files changed, 95 insertions(+), 117 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index 6aebac08f2bf..56b9cd32895e 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -43,37 +43,10 @@ * __tdcall() - Used by TDX guests to request services from the TDX * module (does not include VMM services) using TDCALL instruction. * - * Transforms function call register arguments into the TDCALL register ABI. - * After TDCALL operation, TDX module output is saved in @out (if it is - * provided by the user). - * - *------------------------------------------------------------------------- - * TDCALL ABI: - *------------------------------------------------------------------------- - * Input Registers: - * - * RAX - TDCALL Leaf number. - * RCX,RDX,R8-R9 - TDCALL Leaf specific input registers. - * - * Output Registers: - * - * RAX - TDCALL instruction error code. - * RCX,RDX,R8-R11 - TDCALL Leaf specific output registers. - * - *------------------------------------------------------------------------- - * * __tdcall() function ABI: * - * @fn (RDI) - TDCALL Leaf ID, moved to RAX - * @rcx (RSI) - Input parameter 1, moved to RCX - * @rdx (RDX) - Input parameter 2, moved to RDX - * @r8 (RCX) - Input parameter 3, moved to R8 - * @r9 (R8) - Input parameter 4, moved to R9 - * - * @out (R9) - struct tdx_module_output pointer - * stored temporarily in R12 (not - * shared with the TDX module). It - * can be NULL. + * @fn (RDI) - TDCALL Leaf ID, moved to RAX + * @args (RSI) - struct tdx_module_args for input * * Return status of TDCALL via RAX. */ @@ -81,6 +54,22 @@ SYM_FUNC_START(__tdcall) TDX_MODULE_CALL host=0 SYM_FUNC_END(__tdcall) +/* + * __tdcall_ret() - Used by TDX guests to request services from the TDX + * module (does not include VMM services) using TDCALL instruction, with + * saving output registers to the 'struct tdx_module_args' used as input. + * + * __tdcall_ret() function ABI: + * + * @fn (RDI) - TDCALL Leaf ID, moved to RAX + * @args (RSI) - struct tdx_module_args for input and output + * + * Return status of TDCALL via RAX. + */ +SYM_FUNC_START(__tdcall_ret) + TDX_MODULE_CALL host=0 ret=1 +SYM_FUNC_END(__tdcall_ret) + /* * TDX_HYPERCALL - Make hypercalls to a TDX VMM using TDVMCALL leaf of TDCALL * instruction diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c index 90631abdac34..a7396d0ddef9 100644 --- a/arch/x86/coco/tdx/tdx-shared.c +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -5,7 +5,7 @@ static unsigned long try_accept_one(phys_addr_t start, unsigned long len, enum pg_level pg_level) { unsigned long accept_size = page_level_size(pg_level); - u64 tdcall_rcx; + struct tdx_module_args args = {}; u8 page_size; if (!IS_ALIGNED(start, accept_size)) @@ -34,8 +34,8 @@ static unsigned long try_accept_one(phys_addr_t start, unsigned long len, return 0; } - tdcall_rcx = start | page_size; - if (__tdcall(TDG_MEM_PAGE_ACCEPT, tdcall_rcx, 0, 0, 0, NULL)) + args.rcx = start | page_size; + if (__tdcall(TDG_MEM_PAGE_ACCEPT, &args)) return 0; return accept_size; diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 9833c8140fca..0741a9dbf61f 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -66,10 +66,9 @@ EXPORT_SYMBOL_GPL(tdx_kvm_hypercall); * should only be used for calls that have no legitimate reason to fail * or where the kernel can not survive the call failing. */ -static inline void tdcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, - struct tdx_module_output *out) +static inline void tdcall(u64 fn, struct tdx_module_args *args) { - if (__tdcall(fn, rcx, rdx, r8, r9, out)) + if (__tdcall_ret(fn, args)) panic("TDCALL %lld failed (Buggy TDX module!)\n", fn); } @@ -89,11 +88,14 @@ static inline void tdcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, */ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport) { + struct tdx_module_args args = { + .rcx = virt_to_phys(tdreport), + .rdx = virt_to_phys(reportdata), + .r8 = TDREPORT_SUBTYPE_0, + }; u64 ret; - ret = __tdcall(TDG_MR_REPORT, virt_to_phys(tdreport), - virt_to_phys(reportdata), TDREPORT_SUBTYPE_0, - 0, NULL); + ret = __tdcall(TDG_MR_REPORT, &args); if (ret) { if (TDCALL_RETURN_CODE(ret) == TDCALL_INVALID_OPERAND) return -EINVAL; @@ -141,7 +143,7 @@ static void __noreturn tdx_panic(const char *msg) static void tdx_parse_tdinfo(u64 *cc_mask) { - struct tdx_module_output out; + struct tdx_module_args args = {}; unsigned int gpa_width; u64 td_attr; @@ -152,7 +154,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask) * Guest-Host-Communication Interface (GHCI), section 2.4.2 TDCALL * [TDG.VP.INFO]. */ - tdcall(TDG_VP_INFO, 0, 0, 0, 0, &out); + tdcall(TDG_VP_INFO, &args); /* * The highest bit of a guest physical address is the "sharing" bit. @@ -161,7 +163,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask) * The GPA width that comes out of this call is critical. TDX guests * can not meaningfully run without it. */ - gpa_width = out.rcx & GENMASK(5, 0); + gpa_width = args.rcx & GENMASK(5, 0); *cc_mask = BIT_ULL(gpa_width - 1); /* @@ -169,7 +171,7 @@ static void tdx_parse_tdinfo(u64 *cc_mask) * memory. Ensure that no #VE will be delivered for accesses to * TD-private memory. Only VMM-shared memory (MMIO) will #VE. */ - td_attr = out.rdx; + td_attr = args.rdx; if (!(td_attr & ATTR_SEPT_VE_DISABLE)) { const char *msg = "TD misconfiguration: SEPT_VE_DISABLE attribute must be set."; @@ -577,7 +579,7 @@ __init bool tdx_early_handle_ve(struct pt_regs *regs) void tdx_get_ve_info(struct ve_info *ve) { - struct tdx_module_output out; + struct tdx_module_args args = {}; /* * Called during #VE handling to retrieve the #VE info from the @@ -594,15 +596,15 @@ void tdx_get_ve_info(struct ve_info *ve) * Note, the TDX module treats virtual NMIs as inhibited if the #VE * valid flag is set. It means that NMI=>#VE will not result in a #DF. */ - tdcall(TDG_VP_VEINFO_GET, 0, 0, 0, 0, &out); + tdcall(TDG_VP_VEINFO_GET, &args); /* Transfer the output parameters */ - ve->exit_reason = out.rcx; - ve->exit_qual = out.rdx; - ve->gla = out.r8; - ve->gpa = out.r9; - ve->instr_len = lower_32_bits(out.r10); - ve->instr_info = upper_32_bits(out.r10); + ve->exit_reason = args.rcx; + ve->exit_qual = args.rdx; + ve->gla = args.r8; + ve->gpa = args.r9; + ve->instr_len = lower_32_bits(args.r10); + ve->instr_info = upper_32_bits(args.r10); } /* @@ -799,6 +801,10 @@ static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages, void __init tdx_early_init(void) { + struct tdx_module_args args = { + .rdx = TDCS_NOTIFY_ENABLES, + .r9 = -1ULL, + }; u64 cc_mask; u32 eax, sig[3]; @@ -814,7 +820,7 @@ void __init tdx_early_init(void) cc_set_mask(cc_mask); /* Kernel does not use NOTIFY_ENABLES and does not need random #VEs */ - tdcall(TDG_VM_WR, 0, TDCS_NOTIFY_ENABLES, 0, -1ULL, NULL); + tdcall(TDG_VM_WR, &args); /* * All bits above GPA width are reserved and kernel treats shared bit diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index c11c0d930fed..ca8a681ad497 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -76,11 +76,11 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) void __tdx_hypercall_failed(void); /* - * Used in __tdx_module_call() to gather the output registers' values of the + * Used in __tdcall*() to gather the input/output registers' values of the * TDCALL instruction when requesting services from the TDX module. This is a * software only structure and not part of the TDX module/VMM ABI */ -struct tdx_module_output { +struct tdx_module_args { u64 rcx; u64 rdx; u64 r8; @@ -90,8 +90,8 @@ struct tdx_module_output { }; /* Used to communicate with the TDX module */ -u64 __tdcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, - struct tdx_module_output *out); +u64 __tdcall(u64 fn, struct tdx_module_args *args); +u64 __tdcall_ret(u64 fn, struct tdx_module_args *args); bool tdx_accept_memory(phys_addr_t start, phys_addr_t end); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index dc3576303f1a..50383bc46dd7 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -68,12 +68,12 @@ static void __used common(void) #endif BLANK(); - OFFSET(TDX_MODULE_rcx, tdx_module_output, rcx); - OFFSET(TDX_MODULE_rdx, tdx_module_output, rdx); - OFFSET(TDX_MODULE_r8, tdx_module_output, r8); - OFFSET(TDX_MODULE_r9, tdx_module_output, r9); - OFFSET(TDX_MODULE_r10, tdx_module_output, r10); - OFFSET(TDX_MODULE_r11, tdx_module_output, r11); + OFFSET(TDX_MODULE_rcx, tdx_module_args, rcx); + OFFSET(TDX_MODULE_rdx, tdx_module_args, rdx); + OFFSET(TDX_MODULE_r8, tdx_module_args, r8); + OFFSET(TDX_MODULE_r9, tdx_module_args, r9); + OFFSET(TDX_MODULE_r10, tdx_module_args, r10); + OFFSET(TDX_MODULE_r11, tdx_module_args, r11); BLANK(); OFFSET(TDX_HYPERCALL_r8, tdx_hypercall_args, r8); diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S index 6bdf6e137953..e9e19e7d77f8 100644 --- a/arch/x86/virt/vmx/tdx/tdxcall.S +++ b/arch/x86/virt/vmx/tdx/tdxcall.S @@ -17,34 +17,35 @@ * TDX module and hypercalls to the VMM. * SEAMCALL - used by TDX hosts to make requests to the * TDX module. + * + *------------------------------------------------------------------------- + * TDCALL/SEAMCALL ABI: + *------------------------------------------------------------------------- + * Input Registers: + * + * RAX - TDCALL/SEAMCALL Leaf number. + * RCX,RDX,R8-R11 - TDCALL/SEAMCALL Leaf specific input registers. + * + * Output Registers: + * + * RAX - TDCALL/SEAMCALL instruction error code. + * RCX,RDX,R8-R11 - TDCALL/SEAMCALL Leaf specific output registers. + * + *------------------------------------------------------------------------- */ -.macro TDX_MODULE_CALL host:req +.macro TDX_MODULE_CALL host:req ret=0 FRAME_BEGIN - /* - * R12 will be used as temporary storage for struct tdx_module_output - * pointer. Since R12-R15 registers are not used by TDCALL/SEAMCALL - * services supported by this function, it can be reused. - */ - - /* Callee saved, so preserve it */ - push %r12 - - /* - * Push output pointer to stack. - * After the operation, it will be fetched into R12 register. - */ - push %r9 - /* Mangle function call ABI into TDCALL/SEAMCALL ABI: */ /* Move Leaf ID to RAX */ mov %rdi, %rax - /* Move input 4 to R9 */ - mov %r8, %r9 - /* Move input 3 to R8 */ - mov %rcx, %r8 - /* Move input 1 to RCX */ - mov %rsi, %rcx - /* Leave input param 2 in RDX */ + + /* Move other input regs from 'struct tdx_module_args' */ + movq TDX_MODULE_rcx(%rsi), %rcx + movq TDX_MODULE_rdx(%rsi), %rdx + movq TDX_MODULE_r8(%rsi), %r8 + movq TDX_MODULE_r9(%rsi), %r9 + movq TDX_MODULE_r10(%rsi), %r10 + movq TDX_MODULE_r11(%rsi), %r11 .if \host seamcall @@ -59,49 +60,31 @@ * This value will never be used as actual SEAMCALL error code as * it is from the Reserved status code class. */ - jc .Lseamcall_vmfailinvalid + jc .Lseamcall_vmfailinvalid\@ .else tdcall .endif - /* - * Fetch output pointer from stack to R12 (It is used - * as temporary storage) - */ - pop %r12 - - /* - * Since this macro can be invoked with NULL as an output pointer, - * check if caller provided an output struct before storing output - * registers. - * - * Update output registers, even if the call failed (RAX != 0). - * Other registers may contain details of the failure. - */ - test %r12, %r12 - jz .Lout - - /* Copy result registers to output struct: */ - movq %rcx, TDX_MODULE_rcx(%r12) - movq %rdx, TDX_MODULE_rdx(%r12) - movq %r8, TDX_MODULE_r8(%r12) - movq %r9, TDX_MODULE_r9(%r12) - movq %r10, TDX_MODULE_r10(%r12) - movq %r11, TDX_MODULE_r11(%r12) - -.Lout: - /* Restore the state of R12 register */ - pop %r12 +.if \ret + /* Copy output registers to the structure */ + movq %rcx, TDX_MODULE_rcx(%rsi) + movq %rdx, TDX_MODULE_rdx(%rsi) + movq %r8, TDX_MODULE_r8(%rsi) + movq %r9, TDX_MODULE_r9(%rsi) + movq %r10, TDX_MODULE_r10(%rsi) + movq %r11, TDX_MODULE_r11(%rsi) +.endif +.if \host +.Lout\@: +.endif FRAME_END RET .if \host -.Lseamcall_vmfailinvalid: +.Lseamcall_vmfailinvalid\@: mov $TDX_SEAMCALL_VMFAILINVALID, %rax - /* pop the unused output pointer back to %r9 */ - pop %r9 - jmp .Lout + jmp .Lout\@ .endif /* \host */ .endm -- cgit v1.2.3 From 12f34ed8622aafd3bbd9d8aa4550dcb7016ea1e6 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:02:00 +1200 Subject: x86/tdx: Extend TDX_MODULE_CALL to support more TDCALL/SEAMCALL leafs The TDX guest live migration support (TDX 1.5) adds new TDCALL/SEAMCALL leaf functions. Those new TDCALLs/SEAMCALLs take additional registers for input (R10-R13) and output (R12-R13). TDG.SERVTD.RD is an example. Also, the current TDX_MODULE_CALL doesn't aim to handle TDH.VP.ENTER SEAMCALL, which monitors the TDG.VP.VMCALL in input/output registers when it returns in case of VMCALL from TDX guest. With those new TDCALLs/SEAMCALLs and the TDH.VP.ENTER covered, the TDX_MODULE_CALL macro basically needs to handle the same input/output registers as the TDX_HYPERCALL does. And as a result, they also share similar logic in the assembly, thus should be unified to use one common assembly. Extend the TDX_MODULE_CALL asm to support the new TDCALLs/SEAMCALLs and also the TDH.VP.ENTER SEAMCALL. Eventually it will be unified with the TDX_HYPERCALL. The new input/output registers fit with the "callee-saved" registers in the x86 calling convention. Add a new "saved" parameter to support those new TDCALLs/SEAMCALLs and TDH.VP.ENTER and keep the existing TDCALLs/SEAMCALLs minimally impacted. For TDH.VP.ENTER, after it returns the registers shared by the guest contain guest's values. Explicitly clear them to prevent speculative use of guest's values. Note most TDX live migration related SEAMCALLs may also clobber AVX* state ("AVX, AVX2 and AVX512 state: may be reset to the architectural INIT state" -- see TDH.EXPORT.MEM for example). And TDH.VP.ENTER also clobbers XMM0-XMM15 when the corresponding bit is set in RCX. Don't handle them in the TDX_MODULE_CALL macro but let the caller save and restore when needed. This is basically based on Peter's code. Suggested-by: Peter Zijlstra Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/d4785de7c392f7c5684407f6c24a73b92148ec49.1692096753.git.kai.huang%40intel.com --- arch/x86/coco/tdx/tdcall.S | 4 ++ arch/x86/include/asm/shared/tdx.h | 10 ++++ arch/x86/kernel/asm-offsets.c | 7 +++ arch/x86/virt/vmx/tdx/tdxcall.S | 123 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 138 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index 56b9cd32895e..faf731d2b66a 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -48,6 +48,8 @@ * @fn (RDI) - TDCALL Leaf ID, moved to RAX * @args (RSI) - struct tdx_module_args for input * + * Only RCX/RDX/R8-R11 are used as input registers. + * * Return status of TDCALL via RAX. */ SYM_FUNC_START(__tdcall) @@ -64,6 +66,8 @@ SYM_FUNC_END(__tdcall) * @fn (RDI) - TDCALL Leaf ID, moved to RAX * @args (RSI) - struct tdx_module_args for input and output * + * Only RCX/RDX/R8-R11 are used as input/output registers. + * * Return status of TDCALL via RAX. */ SYM_FUNC_START(__tdcall_ret) diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index ca8a681ad497..669749a22d8c 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -81,12 +81,22 @@ void __tdx_hypercall_failed(void); * software only structure and not part of the TDX module/VMM ABI */ struct tdx_module_args { + /* callee-clobbered */ u64 rcx; u64 rdx; u64 r8; u64 r9; + /* extra callee-clobbered */ u64 r10; u64 r11; + /* callee-saved + rdi/rsi */ + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u64 rbx; + u64 rdi; + u64 rsi; }; /* Used to communicate with the TDX module */ diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 50383bc46dd7..1581564a67b7 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -74,6 +74,13 @@ static void __used common(void) OFFSET(TDX_MODULE_r9, tdx_module_args, r9); OFFSET(TDX_MODULE_r10, tdx_module_args, r10); OFFSET(TDX_MODULE_r11, tdx_module_args, r11); + OFFSET(TDX_MODULE_r12, tdx_module_args, r12); + OFFSET(TDX_MODULE_r13, tdx_module_args, r13); + OFFSET(TDX_MODULE_r14, tdx_module_args, r14); + OFFSET(TDX_MODULE_r15, tdx_module_args, r15); + OFFSET(TDX_MODULE_rbx, tdx_module_args, rbx); + OFFSET(TDX_MODULE_rdi, tdx_module_args, rdi); + OFFSET(TDX_MODULE_rsi, tdx_module_args, rsi); BLANK(); OFFSET(TDX_HYPERCALL_r8, tdx_hypercall_args, r8); diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S index e9e19e7d77f8..c54ea009b241 100644 --- a/arch/x86/virt/vmx/tdx/tdxcall.S +++ b/arch/x86/virt/vmx/tdx/tdxcall.S @@ -23,17 +23,25 @@ *------------------------------------------------------------------------- * Input Registers: * - * RAX - TDCALL/SEAMCALL Leaf number. - * RCX,RDX,R8-R11 - TDCALL/SEAMCALL Leaf specific input registers. + * RAX - TDCALL/SEAMCALL Leaf number. + * RCX,RDX,RDI,RSI,RBX,R8-R15 - TDCALL/SEAMCALL Leaf specific input registers. * * Output Registers: * - * RAX - TDCALL/SEAMCALL instruction error code. - * RCX,RDX,R8-R11 - TDCALL/SEAMCALL Leaf specific output registers. + * RAX - TDCALL/SEAMCALL instruction error code. + * RCX,RDX,RDI,RSI,RBX,R8-R15 - TDCALL/SEAMCALL Leaf specific output registers. * *------------------------------------------------------------------------- + * + * So while the common core (RAX,RCX,RDX,R8-R11) fits nicely in the + * callee-clobbered registers and even leaves RDI,RSI free to act as a + * base pointer, some leafs (e.g., VP.ENTER) make a giant mess of things. + * + * For simplicity, assume that anything that needs the callee-saved regs + * also tramples on RDI,RSI. This isn't strictly true, see for example + * TDH.EXPORT.MEM. */ -.macro TDX_MODULE_CALL host:req ret=0 +.macro TDX_MODULE_CALL host:req ret=0 saved=0 FRAME_BEGIN /* Move Leaf ID to RAX */ @@ -47,6 +55,35 @@ movq TDX_MODULE_r10(%rsi), %r10 movq TDX_MODULE_r11(%rsi), %r11 +.if \saved + /* + * Move additional input regs from the structure. For simplicity + * assume that anything needs the callee-saved regs also tramples + * on RDI/RSI (see VP.ENTER). + */ + /* Save those callee-saved GPRs as mandated by the x86_64 ABI */ + pushq %rbx + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + + movq TDX_MODULE_r12(%rsi), %r12 + movq TDX_MODULE_r13(%rsi), %r13 + movq TDX_MODULE_r14(%rsi), %r14 + movq TDX_MODULE_r15(%rsi), %r15 + movq TDX_MODULE_rbx(%rsi), %rbx + +.if \ret + /* Save the structure pointer as RSI is about to be clobbered */ + pushq %rsi +.endif + + movq TDX_MODULE_rdi(%rsi), %rdi + /* RSI needs to be done at last */ + movq TDX_MODULE_rsi(%rsi), %rsi +.endif /* \saved */ + .if \host seamcall /* @@ -66,6 +103,36 @@ .endif .if \ret +.if \saved + /* + * Restore the structure from stack to save the output registers + * + * In case of VP.ENTER returns due to TDVMCALL, all registers are + * valid thus no register can be used as spare to restore the + * structure from the stack (see "TDH.VP.ENTER Output Operands + * Definition on TDCALL(TDG.VP.VMCALL) Following a TD Entry"). + * For this case, need to make one register as spare by saving it + * to the stack and then manually load the structure pointer to + * the spare register. + * + * Note for other TDCALLs/SEAMCALLs there are spare registers + * thus no need for such hack but just use this for all. + */ + pushq %rax /* save the TDCALL/SEAMCALL return code */ + movq 8(%rsp), %rax /* restore the structure pointer */ + movq %rsi, TDX_MODULE_rsi(%rax) /* save RSI */ + popq %rax /* restore the return code */ + popq %rsi /* pop the structure pointer */ + + /* Copy additional output regs to the structure */ + movq %r12, TDX_MODULE_r12(%rsi) + movq %r13, TDX_MODULE_r13(%rsi) + movq %r14, TDX_MODULE_r14(%rsi) + movq %r15, TDX_MODULE_r15(%rsi) + movq %rbx, TDX_MODULE_rbx(%rsi) + movq %rdi, TDX_MODULE_rdi(%rsi) +.endif /* \saved */ + /* Copy output registers to the structure */ movq %rcx, TDX_MODULE_rcx(%rsi) movq %rdx, TDX_MODULE_rdx(%rsi) @@ -73,17 +140,61 @@ movq %r9, TDX_MODULE_r9(%rsi) movq %r10, TDX_MODULE_r10(%rsi) movq %r11, TDX_MODULE_r11(%rsi) -.endif +.endif /* \ret */ + +.if \host && \saved && \ret + /* + * Clear registers shared by guest for VP.ENTER to prevent + * speculative use of guest's values, including those are + * restored from the stack. + * + * See arch/x86/kvm/vmx/vmenter.S: + * + * In theory, a L1 cache miss when restoring register from stack + * could lead to speculative execution with guest's values. + * + * Note: RBP/RSP are not used as shared register. RSI has been + * restored already. + * + * XOR is cheap, thus unconditionally do for all leafs. + */ + xorl %ecx, %ecx + xorl %edx, %edx + xorl %r8d, %r8d + xorl %r9d, %r9d + xorl %r10d, %r10d + xorl %r11d, %r11d + xorl %r12d, %r12d + xorl %r13d, %r13d + xorl %r14d, %r14d + xorl %r15d, %r15d + xorl %ebx, %ebx + xorl %edi, %edi +.endif /* \host && \ret && \host */ .if \host .Lout\@: .endif + +.if \saved + /* Restore callee-saved GPRs as mandated by the x86_64 ABI */ + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbx +.endif /* \saved */ + FRAME_END RET .if \host .Lseamcall_vmfailinvalid\@: mov $TDX_SEAMCALL_VMFAILINVALID, %rax +.if \ret && \saved + /* pop the unused structure pointer back to RSI */ + popq %rsi +.endif jmp .Lout\@ .endif /* \host */ -- cgit v1.2.3 From c641cfb5c157b6c3062a824fd8ba190bf06fb952 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:02:01 +1200 Subject: x86/tdx: Make TDX_HYPERCALL asm similar to TDX_MODULE_CALL Now the 'struct tdx_hypercall_args' and 'struct tdx_module_args' are almost the same, and the TDX_HYPERCALL and TDX_MODULE_CALL asm macro share similar code pattern too. The __tdx_hypercall() and __tdcall() should be unified to use the same assembly code. As a preparation to unify them, simplify the TDX_HYPERCALL to make it more like the TDX_MODULE_CALL. The TDX_HYPERCALL takes the pointer of 'struct tdx_hypercall_args' as function call argument, and does below extra things comparing to the TDX_MODULE_CALL: 1) It sets RAX to 0 (TDG.VP.VMCALL leaf) internally; 2) It sets RCX to the (fixed) bitmap of shared registers internally; 3) It calls __tdx_hypercall_failed() internally (and panics) when the TDCALL instruction itself fails; 4) After TDCALL, it moves R10 to RAX to return the return code of the VMCALL leaf, regardless the '\ret' asm macro argument; Firstly, change the TDX_HYPERCALL to take the same function call arguments as the TDX_MODULE_CALL does: TDCALL leaf ID, and the pointer to 'struct tdx_module_args'. Then 1) and 2) can be moved to the caller: - TDG.VP.VMCALL leaf ID can be passed via the function call argument; - 'struct tdx_module_args' is 'struct tdx_hypercall_args' + RCX, thus the bitmap of shared registers can be passed via RCX in the structure. Secondly, to move 3) and 4) out of assembly, make the TDX_HYPERCALL always save output registers to the structure. The caller then can: - Call __tdx_hypercall_failed() when TDX_HYPERCALL returns error; - Return R10 in the structure as the return code of the VMCALL leaf; With above changes, change the asm function from __tdx_hypercall() to __tdcall_hypercall(), and reimplement __tdx_hypercall() as the C wrapper of it. This avoids having to add another wrapper of __tdx_hypercall() (_tdx_hypercall() is already taken). The __tdcall_hypercall() will be replaced with a __tdcall() variant using TDX_MODULE_CALL in a later commit as the final goal is to have one assembly to handle both TDCALL and TDVMCALL. Currently, the __tdx_hypercall() asm is in '.noinstr.text'. To keep this unchanged, annotate __tdx_hypercall(), which is a C function now, as 'noinstr'. Remove the __tdx_hypercall_ret() as __tdx_hypercall() already does so. Implement __tdx_hypercall() in tdx-shared.c so it can be shared with the compressed code. Opportunistically fix a checkpatch error complaining using space around parenthesis '(' and ')' while moving the bitmap of shared registers to . [ dhansen: quash new calls of __tdx_hypercall_ret() that showed up ] Suggested-by: Peter Zijlstra Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/0cbf25e7aee3256288045023a31f65f0cef90af4.1692096753.git.kai.huang%40intel.com --- arch/x86/boot/compressed/tdx.c | 2 +- arch/x86/coco/tdx/tdcall.S | 150 +++++++++++++------------------------- arch/x86/coco/tdx/tdx-shared.c | 43 +++++++++++ arch/x86/coco/tdx/tdx.c | 13 ++-- arch/x86/hyperv/ivm.c | 4 +- arch/x86/include/asm/shared/tdx.h | 86 ++++++++++++++-------- arch/x86/kernel/asm-offsets.c | 14 ---- 7 files changed, 160 insertions(+), 152 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c index 8841b945a1e2..bc03eae2c560 100644 --- a/arch/x86/boot/compressed/tdx.c +++ b/arch/x86/boot/compressed/tdx.c @@ -26,7 +26,7 @@ static inline unsigned int tdx_io_in(int size, u16 port) .r14 = port, }; - if (__tdx_hypercall_ret(&args)) + if (__tdx_hypercall(&args)) return UINT_MAX; return args.r11; diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index faf731d2b66a..6d76d7d49b72 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -2,41 +2,12 @@ #include #include #include -#include #include -#include #include #include "../../virt/vmx/tdx/tdxcall.S" -/* - * Bitmasks of exposed registers (with VMM). - */ -#define TDX_RDX BIT(2) -#define TDX_RBX BIT(3) -#define TDX_RSI BIT(6) -#define TDX_RDI BIT(7) -#define TDX_R8 BIT(8) -#define TDX_R9 BIT(9) -#define TDX_R10 BIT(10) -#define TDX_R11 BIT(11) -#define TDX_R12 BIT(12) -#define TDX_R13 BIT(13) -#define TDX_R14 BIT(14) -#define TDX_R15 BIT(15) - -/* - * These registers are clobbered to hold arguments for each - * TDVMCALL. They are safe to expose to the VMM. - * Each bit in this mask represents a register ID. Bit field - * details can be found in TDX GHCI specification, section - * titled "TDCALL [TDG.VP.VMCALL] leaf". - */ -#define TDVMCALL_EXPOSE_REGS_MASK \ - ( TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \ - TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15 ) - .section .noinstr.text, "ax" /* @@ -78,10 +49,13 @@ SYM_FUNC_END(__tdcall_ret) * TDX_HYPERCALL - Make hypercalls to a TDX VMM using TDVMCALL leaf of TDCALL * instruction * - * Transforms values in function call argument struct tdx_hypercall_args @args + * Transforms values in function call argument struct tdx_module_args @args * into the TDCALL register ABI. After TDCALL operation, VMM output is saved * back in @args, if \ret is 1. * + * Depends on the caller to pass TDG.VP.VMCALL as the TDCALL leaf, and set + * @args::rcx to TDVMCALL_EXPOSE_REGS_MASK. + * *------------------------------------------------------------------------- * TD VMCALL ABI: *------------------------------------------------------------------------- @@ -106,7 +80,7 @@ SYM_FUNC_END(__tdcall_ret) * R8-R15 - Same as above. * */ -.macro TDX_HYPERCALL ret:req +.macro TDX_HYPERCALL FRAME_BEGIN /* Save callee-saved GPRs as mandated by the x86_64 ABI */ @@ -116,63 +90,52 @@ SYM_FUNC_END(__tdcall_ret) push %r12 push %rbx - /* Free RDI to be used as TDVMCALL arguments */ + /* Move Leaf ID to RAX */ movq %rdi, %rax + /* Move bitmap of shared registers to RCX */ + movq TDX_MODULE_rcx(%rsi), %rcx + /* Copy hypercall registers from arg struct: */ - movq TDX_HYPERCALL_r8(%rax), %r8 - movq TDX_HYPERCALL_r9(%rax), %r9 - movq TDX_HYPERCALL_r10(%rax), %r10 - movq TDX_HYPERCALL_r11(%rax), %r11 - movq TDX_HYPERCALL_r12(%rax), %r12 - movq TDX_HYPERCALL_r13(%rax), %r13 - movq TDX_HYPERCALL_r14(%rax), %r14 - movq TDX_HYPERCALL_r15(%rax), %r15 - movq TDX_HYPERCALL_rdi(%rax), %rdi - movq TDX_HYPERCALL_rsi(%rax), %rsi - movq TDX_HYPERCALL_rbx(%rax), %rbx - movq TDX_HYPERCALL_rdx(%rax), %rdx - - push %rax - - /* Mangle function call ABI into TDCALL ABI: */ - /* Set TDCALL leaf ID (TDVMCALL (0)) in RAX */ - xor %eax, %eax - - movl $TDVMCALL_EXPOSE_REGS_MASK, %ecx + movq TDX_MODULE_r8(%rsi), %r8 + movq TDX_MODULE_r9(%rsi), %r9 + movq TDX_MODULE_r10(%rsi), %r10 + movq TDX_MODULE_r11(%rsi), %r11 + movq TDX_MODULE_r12(%rsi), %r12 + movq TDX_MODULE_r13(%rsi), %r13 + movq TDX_MODULE_r14(%rsi), %r14 + movq TDX_MODULE_r15(%rsi), %r15 + movq TDX_MODULE_rdi(%rsi), %rdi + movq TDX_MODULE_rbx(%rsi), %rbx + movq TDX_MODULE_rdx(%rsi), %rdx + + pushq %rsi + movq TDX_MODULE_rsi(%rsi), %rsi tdcall /* - * RAX!=0 indicates a failure of the TDVMCALL mechanism itself and that - * something has gone horribly wrong with the TDX module. + * Restore the pointer of the structure to save output registers. * - * The return status of the hypercall operation is in a separate - * register (in R10). Hypercall errors are a part of normal operation - * and are handled by callers. + * RCX is used as bitmap of shared registers and doesn't hold any + * value provided by the VMM, thus it can be used as spare to + * restore the structure pointer. */ - testq %rax, %rax - jne .Lpanic\@ - - pop %rax - - .if \ret - movq %r8, TDX_HYPERCALL_r8(%rax) - movq %r9, TDX_HYPERCALL_r9(%rax) - movq %r10, TDX_HYPERCALL_r10(%rax) - movq %r11, TDX_HYPERCALL_r11(%rax) - movq %r12, TDX_HYPERCALL_r12(%rax) - movq %r13, TDX_HYPERCALL_r13(%rax) - movq %r14, TDX_HYPERCALL_r14(%rax) - movq %r15, TDX_HYPERCALL_r15(%rax) - movq %rdi, TDX_HYPERCALL_rdi(%rax) - movq %rsi, TDX_HYPERCALL_rsi(%rax) - movq %rbx, TDX_HYPERCALL_rbx(%rax) - movq %rdx, TDX_HYPERCALL_rdx(%rax) - .endif - - /* TDVMCALL leaf return code is in R10 */ - movq %r10, %rax + popq %rcx + movq %rsi, TDX_MODULE_rsi(%rcx) + movq %rcx, %rsi + + movq %r8, TDX_MODULE_r8(%rsi) + movq %r9, TDX_MODULE_r9(%rsi) + movq %r10, TDX_MODULE_r10(%rsi) + movq %r11, TDX_MODULE_r11(%rsi) + movq %r12, TDX_MODULE_r12(%rsi) + movq %r13, TDX_MODULE_r13(%rsi) + movq %r14, TDX_MODULE_r14(%rsi) + movq %r15, TDX_MODULE_r15(%rsi) + movq %rdi, TDX_MODULE_rdi(%rsi) + movq %rbx, TDX_MODULE_rbx(%rsi) + movq %rdx, TDX_MODULE_rdx(%rsi) /* * Zero out registers exposed to the VMM to avoid speculative execution @@ -198,33 +161,20 @@ SYM_FUNC_END(__tdcall_ret) FRAME_END RET -.Lpanic\@: - call __tdx_hypercall_failed - /* __tdx_hypercall_failed never returns */ - REACHABLE - jmp .Lpanic\@ .endm /* * - * __tdx_hypercall() function ABI: - * - * @args (RDI) - struct tdx_hypercall_args for input - * - * On successful completion, return the hypercall error code. - */ -SYM_FUNC_START(__tdx_hypercall) - TDX_HYPERCALL ret=0 -SYM_FUNC_END(__tdx_hypercall) - -/* + * __tdcall_hypercall() function ABI: * - * __tdx_hypercall_ret() function ABI: + * @fn (RDI) - TDCALL leaf ID, moved to RAX + * @args (RSI) - struct tdx_module_args for input/output * - * @args (RDI) - struct tdx_hypercall_args for input and output + * @fn and @args::rcx from the caller must be TDG_VP_VMCALL and + * TDVMCALL_EXPOSE_REGS_MASK respectively. * * On successful completion, return the hypercall error code. */ -SYM_FUNC_START(__tdx_hypercall_ret) - TDX_HYPERCALL ret=1 -SYM_FUNC_END(__tdx_hypercall_ret) +SYM_FUNC_START(__tdcall_hypercall) + TDX_HYPERCALL +SYM_FUNC_END(__tdcall_hypercall) diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c index a7396d0ddef9..b47c8cce91b0 100644 --- a/arch/x86/coco/tdx/tdx-shared.c +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -69,3 +69,46 @@ bool tdx_accept_memory(phys_addr_t start, phys_addr_t end) return true; } + +noinstr u64 __tdx_hypercall(struct tdx_hypercall_args *args) +{ + struct tdx_module_args margs = { + .rcx = TDVMCALL_EXPOSE_REGS_MASK, + .rdx = args->rdx, + .r8 = args->r8, + .r9 = args->r9, + .r10 = args->r10, + .r11 = args->r11, + .r12 = args->r12, + .r13 = args->r13, + .r14 = args->r14, + .r15 = args->r15, + .rbx = args->rbx, + .rdi = args->rdi, + .rsi = args->rsi, + }; + + /* + * Failure of __tdcall_hypercall() indicates a failure of the TDVMCALL + * mechanism itself and that something has gone horribly wrong with + * the TDX module. __tdx_hypercall_failed() never returns. + */ + if (__tdcall_hypercall(TDG_VP_VMCALL, &margs)) + __tdx_hypercall_failed(); + + args->r8 = margs.r8; + args->r9 = margs.r9; + args->r10 = margs.r10; + args->r11 = margs.r11; + args->r12 = margs.r12; + args->r13 = margs.r13; + args->r14 = margs.r14; + args->r15 = margs.r15; + args->rdi = margs.rdi; + args->rsi = margs.rsi; + args->rbx = margs.rbx; + args->rdx = margs.rdx; + + /* TDVMCALL leaf return code is in R10 */ + return args->r10; +} diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 0741a9dbf61f..26b6c3d8f0be 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -38,7 +38,7 @@ #define TDREPORT_SUBTYPE_0 0 /* Called from __tdx_hypercall() for unrecoverable failure */ -noinstr void __tdx_hypercall_failed(void) +noinstr void __noreturn __tdx_hypercall_failed(void) { instrumentation_begin(); panic("TDVMCALL failed. TDX module bug?"); @@ -285,7 +285,7 @@ static int read_msr(struct pt_regs *regs, struct ve_info *ve) * can be found in TDX Guest-Host-Communication Interface * (GHCI), section titled "TDG.VP.VMCALL". */ - if (__tdx_hypercall_ret(&args)) + if (__tdx_hypercall(&args)) return -EIO; regs->ax = lower_32_bits(args.r11); @@ -339,7 +339,7 @@ static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) * ABI can be found in TDX Guest-Host-Communication Interface * (GHCI), section titled "VP.VMCALL". */ - if (__tdx_hypercall_ret(&args)) + if (__tdx_hypercall(&args)) return -EIO; /* @@ -366,8 +366,9 @@ static bool mmio_read(int size, unsigned long addr, unsigned long *val) .r15 = *val, }; - if (__tdx_hypercall_ret(&args)) + if (__tdx_hypercall(&args)) return false; + *val = args.r11; return true; } @@ -500,7 +501,7 @@ static bool handle_in(struct pt_regs *regs, int size, int port) * in TDX Guest-Host-Communication Interface (GHCI) section titled * "TDG.VP.VMCALL". */ - success = !__tdx_hypercall_ret(&args); + success = !__tdx_hypercall(&args); /* Update part of the register affected by the emulated instruction */ regs->ax &= ~mask; @@ -729,7 +730,7 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc) .r13 = end - start }; u64 map_fail_paddr; - u64 ret = __tdx_hypercall_ret(&args); + u64 ret = __tdx_hypercall(&args); if (ret != TDVMCALL_STATUS_RETRY) return !ret; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 8c6bf07f7d2b..e48983550743 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -404,7 +404,7 @@ static void hv_tdx_msr_read(u64 msr, u64 *val) .r12 = msr, }; - u64 ret = __tdx_hypercall_ret(&args); + u64 ret = __tdx_hypercall(&args); if (WARN_ONCE(ret, "Failed to emulate MSR read: %lld\n", ret)) *val = 0; @@ -420,7 +420,7 @@ u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) args.rdx = param1; args.r8 = param2; - (void)__tdx_hypercall_ret(&args); + (void)__tdx_hypercall(&args); return args.r11; } diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 669749a22d8c..85493c4813cf 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -11,6 +11,7 @@ #define TDX_IDENT "IntelTDX " /* TDX module Call Leaf IDs */ +#define TDG_VP_VMCALL 0 #define TDG_VP_INFO 1 #define TDG_VP_VEINFO_GET 3 #define TDG_MR_REPORT 4 @@ -26,8 +27,63 @@ #define TDVMCALL_STATUS_RETRY 1 +/* + * Bitmasks of exposed registers (with VMM). + */ +#define TDX_RDX BIT(2) +#define TDX_RBX BIT(3) +#define TDX_RSI BIT(6) +#define TDX_RDI BIT(7) +#define TDX_R8 BIT(8) +#define TDX_R9 BIT(9) +#define TDX_R10 BIT(10) +#define TDX_R11 BIT(11) +#define TDX_R12 BIT(12) +#define TDX_R13 BIT(13) +#define TDX_R14 BIT(14) +#define TDX_R15 BIT(15) + +/* + * These registers are clobbered to hold arguments for each + * TDVMCALL. They are safe to expose to the VMM. + * Each bit in this mask represents a register ID. Bit field + * details can be found in TDX GHCI specification, section + * titled "TDCALL [TDG.VP.VMCALL] leaf". + */ +#define TDVMCALL_EXPOSE_REGS_MASK \ + (TDX_RDX | TDX_RBX | TDX_RSI | TDX_RDI | TDX_R8 | TDX_R9 | \ + TDX_R10 | TDX_R11 | TDX_R12 | TDX_R13 | TDX_R14 | TDX_R15) + #ifndef __ASSEMBLY__ +/* + * Used in __tdcall*() to gather the input/output registers' values of the + * TDCALL instruction when requesting services from the TDX module. This is a + * software only structure and not part of the TDX module/VMM ABI + */ +struct tdx_module_args { + /* callee-clobbered */ + u64 rcx; + u64 rdx; + u64 r8; + u64 r9; + /* extra callee-clobbered */ + u64 r10; + u64 r11; + /* callee-saved + rdi/rsi */ + u64 r12; + u64 r13; + u64 r14; + u64 r15; + u64 rbx; + u64 rdi; + u64 rsi; +}; + +/* Used to communicate with the TDX module */ +u64 __tdcall(u64 fn, struct tdx_module_args *args); +u64 __tdcall_ret(u64 fn, struct tdx_module_args *args); + /* * Used in __tdx_hypercall() to pass down and get back registers' values of * the TDCALL instruction when requesting services from the VMM. @@ -50,8 +106,8 @@ struct tdx_hypercall_args { }; /* Used to request services from the VMM */ +u64 __tdcall_hypercall(u64 fn, struct tdx_module_args *args); u64 __tdx_hypercall(struct tdx_hypercall_args *args); -u64 __tdx_hypercall_ret(struct tdx_hypercall_args *args); /* * Wrapper for standard use of __tdx_hypercall with no output aside from @@ -75,34 +131,6 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) /* Called from __tdx_hypercall() for unrecoverable failure */ void __tdx_hypercall_failed(void); -/* - * Used in __tdcall*() to gather the input/output registers' values of the - * TDCALL instruction when requesting services from the TDX module. This is a - * software only structure and not part of the TDX module/VMM ABI - */ -struct tdx_module_args { - /* callee-clobbered */ - u64 rcx; - u64 rdx; - u64 r8; - u64 r9; - /* extra callee-clobbered */ - u64 r10; - u64 r11; - /* callee-saved + rdi/rsi */ - u64 r12; - u64 r13; - u64 r14; - u64 r15; - u64 rbx; - u64 rdi; - u64 rsi; -}; - -/* Used to communicate with the TDX module */ -u64 __tdcall(u64 fn, struct tdx_module_args *args); -u64 __tdcall_ret(u64 fn, struct tdx_module_args *args); - bool tdx_accept_memory(phys_addr_t start, phys_addr_t end); /* diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 1581564a67b7..6913b372ccf7 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -82,20 +82,6 @@ static void __used common(void) OFFSET(TDX_MODULE_rdi, tdx_module_args, rdi); OFFSET(TDX_MODULE_rsi, tdx_module_args, rsi); - BLANK(); - OFFSET(TDX_HYPERCALL_r8, tdx_hypercall_args, r8); - OFFSET(TDX_HYPERCALL_r9, tdx_hypercall_args, r9); - OFFSET(TDX_HYPERCALL_r10, tdx_hypercall_args, r10); - OFFSET(TDX_HYPERCALL_r11, tdx_hypercall_args, r11); - OFFSET(TDX_HYPERCALL_r12, tdx_hypercall_args, r12); - OFFSET(TDX_HYPERCALL_r13, tdx_hypercall_args, r13); - OFFSET(TDX_HYPERCALL_r14, tdx_hypercall_args, r14); - OFFSET(TDX_HYPERCALL_r15, tdx_hypercall_args, r15); - OFFSET(TDX_HYPERCALL_rdi, tdx_hypercall_args, rdi); - OFFSET(TDX_HYPERCALL_rsi, tdx_hypercall_args, rsi); - OFFSET(TDX_HYPERCALL_rbx, tdx_hypercall_args, rbx); - OFFSET(TDX_HYPERCALL_rdx, tdx_hypercall_args, rdx); - BLANK(); OFFSET(BP_scratch, boot_params, scratch); OFFSET(BP_secure_boot, boot_params, secure_boot); -- cgit v1.2.3 From 90f5ecd37faed9a59eb2788a56dac8deeee0a508 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:02:02 +1200 Subject: x86/tdx: Reimplement __tdx_hypercall() using TDX_MODULE_CALL asm Now the TDX_HYPERCALL asm is basically identical to the TDX_MODULE_CALL with both '\saved' and '\ret' enabled, with two minor things though: 1) The way to restore the structure pointer is different The TDX_HYPERCALL uses RCX as spare to restore the structure pointer, but the TDX_MODULE_CALL assumes no spare register can be used. In other words, TDX_MODULE_CALL already covers what TDX_HYPERCALL does. 2) TDX_MODULE_CALL only clears shared registers for TDH.VP.ENTER For this just need to make that code available for the non-host case. Thus, remove the TDX_HYPERCALL and reimplement the __tdx_hypercall() using the TDX_MODULE_CALL. Extend the TDX_MODULE_CALL to cover "clear shared registers" for TDG.VP.VMCALL. Introduce a new __tdcall_saved_ret() to replace the temporary __tdcall_hypercall(). The __tdcall_saved_ret() can also be used for those new TDCALLs which require more input/output registers than the basic TDCALLs do. Suggested-by: Peter Zijlstra Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/e68a2473fb6f5bcd78b078cae7510e9d0753b3df.1692096753.git.kai.huang%40intel.com --- arch/x86/coco/tdx/tdcall.S | 133 +++----------------------------------- arch/x86/coco/tdx/tdx-shared.c | 4 +- arch/x86/include/asm/shared/tdx.h | 2 +- arch/x86/virt/vmx/tdx/tdxcall.S | 8 +-- 4 files changed, 15 insertions(+), 132 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdcall.S b/arch/x86/coco/tdx/tdcall.S index 6d76d7d49b72..52d9786da308 100644 --- a/arch/x86/coco/tdx/tdcall.S +++ b/arch/x86/coco/tdx/tdcall.S @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include -#include #include #include @@ -46,135 +45,19 @@ SYM_FUNC_START(__tdcall_ret) SYM_FUNC_END(__tdcall_ret) /* - * TDX_HYPERCALL - Make hypercalls to a TDX VMM using TDVMCALL leaf of TDCALL - * instruction - * - * Transforms values in function call argument struct tdx_module_args @args - * into the TDCALL register ABI. After TDCALL operation, VMM output is saved - * back in @args, if \ret is 1. - * - * Depends on the caller to pass TDG.VP.VMCALL as the TDCALL leaf, and set - * @args::rcx to TDVMCALL_EXPOSE_REGS_MASK. - * - *------------------------------------------------------------------------- - * TD VMCALL ABI: - *------------------------------------------------------------------------- - * - * Input Registers: - * - * RAX - TDCALL instruction leaf number (0 - TDG.VP.VMCALL) - * RCX - BITMAP which controls which part of TD Guest GPR - * is passed as-is to the VMM and back. - * R10 - Set 0 to indicate TDCALL follows standard TDX ABI - * specification. Non zero value indicates vendor - * specific ABI. - * R11 - VMCALL sub function number - * RBX, RDX, RDI, RSI - Used to pass VMCALL sub function specific arguments. - * R8-R9, R12-R15 - Same as above. - * - * Output Registers: - * - * RAX - TDCALL instruction status (Not related to hypercall - * output). - * RBX, RDX, RDI, RSI - Hypercall sub function specific output values. - * R8-R15 - Same as above. - * - */ -.macro TDX_HYPERCALL - FRAME_BEGIN - - /* Save callee-saved GPRs as mandated by the x86_64 ABI */ - push %r15 - push %r14 - push %r13 - push %r12 - push %rbx - - /* Move Leaf ID to RAX */ - movq %rdi, %rax - - /* Move bitmap of shared registers to RCX */ - movq TDX_MODULE_rcx(%rsi), %rcx - - /* Copy hypercall registers from arg struct: */ - movq TDX_MODULE_r8(%rsi), %r8 - movq TDX_MODULE_r9(%rsi), %r9 - movq TDX_MODULE_r10(%rsi), %r10 - movq TDX_MODULE_r11(%rsi), %r11 - movq TDX_MODULE_r12(%rsi), %r12 - movq TDX_MODULE_r13(%rsi), %r13 - movq TDX_MODULE_r14(%rsi), %r14 - movq TDX_MODULE_r15(%rsi), %r15 - movq TDX_MODULE_rdi(%rsi), %rdi - movq TDX_MODULE_rbx(%rsi), %rbx - movq TDX_MODULE_rdx(%rsi), %rdx - - pushq %rsi - movq TDX_MODULE_rsi(%rsi), %rsi - - tdcall - - /* - * Restore the pointer of the structure to save output registers. - * - * RCX is used as bitmap of shared registers and doesn't hold any - * value provided by the VMM, thus it can be used as spare to - * restore the structure pointer. - */ - popq %rcx - movq %rsi, TDX_MODULE_rsi(%rcx) - movq %rcx, %rsi - - movq %r8, TDX_MODULE_r8(%rsi) - movq %r9, TDX_MODULE_r9(%rsi) - movq %r10, TDX_MODULE_r10(%rsi) - movq %r11, TDX_MODULE_r11(%rsi) - movq %r12, TDX_MODULE_r12(%rsi) - movq %r13, TDX_MODULE_r13(%rsi) - movq %r14, TDX_MODULE_r14(%rsi) - movq %r15, TDX_MODULE_r15(%rsi) - movq %rdi, TDX_MODULE_rdi(%rsi) - movq %rbx, TDX_MODULE_rbx(%rsi) - movq %rdx, TDX_MODULE_rdx(%rsi) - - /* - * Zero out registers exposed to the VMM to avoid speculative execution - * with VMM-controlled values. This needs to include all registers - * present in TDVMCALL_EXPOSE_REGS_MASK, except RBX, and R12-R15 which - * will be restored. - */ - xor %r8d, %r8d - xor %r9d, %r9d - xor %r10d, %r10d - xor %r11d, %r11d - xor %rdi, %rdi - xor %rsi, %rsi - xor %rdx, %rdx - - /* Restore callee-saved GPRs as mandated by the x86_64 ABI */ - pop %rbx - pop %r12 - pop %r13 - pop %r14 - pop %r15 - - FRAME_END - - RET -.endm - -/* + * __tdcall_saved_ret() - Used by TDX guests to request services from the + * TDX module (including VMM services) using TDCALL instruction, with + * saving output registers to the 'struct tdx_module_args' used as input. * - * __tdcall_hypercall() function ABI: + * __tdcall_saved_ret() function ABI: * * @fn (RDI) - TDCALL leaf ID, moved to RAX * @args (RSI) - struct tdx_module_args for input/output * - * @fn and @args::rcx from the caller must be TDG_VP_VMCALL and - * TDVMCALL_EXPOSE_REGS_MASK respectively. + * All registers in @args are used as input/output registers. * * On successful completion, return the hypercall error code. */ -SYM_FUNC_START(__tdcall_hypercall) - TDX_HYPERCALL -SYM_FUNC_END(__tdcall_hypercall) +SYM_FUNC_START(__tdcall_saved_ret) + TDX_MODULE_CALL host=0 ret=1 saved=1 +SYM_FUNC_END(__tdcall_saved_ret) diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c index b47c8cce91b0..344b3818f4c3 100644 --- a/arch/x86/coco/tdx/tdx-shared.c +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -89,11 +89,11 @@ noinstr u64 __tdx_hypercall(struct tdx_hypercall_args *args) }; /* - * Failure of __tdcall_hypercall() indicates a failure of the TDVMCALL + * Failure of __tdcall_saved_ret() indicates a failure of the TDVMCALL * mechanism itself and that something has gone horribly wrong with * the TDX module. __tdx_hypercall_failed() never returns. */ - if (__tdcall_hypercall(TDG_VP_VMCALL, &margs)) + if (__tdcall_saved_ret(TDG_VP_VMCALL, &margs)) __tdx_hypercall_failed(); args->r8 = margs.r8; diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index 85493c4813cf..f53a41948946 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -83,6 +83,7 @@ struct tdx_module_args { /* Used to communicate with the TDX module */ u64 __tdcall(u64 fn, struct tdx_module_args *args); u64 __tdcall_ret(u64 fn, struct tdx_module_args *args); +u64 __tdcall_saved_ret(u64 fn, struct tdx_module_args *args); /* * Used in __tdx_hypercall() to pass down and get back registers' values of @@ -106,7 +107,6 @@ struct tdx_hypercall_args { }; /* Used to request services from the VMM */ -u64 __tdcall_hypercall(u64 fn, struct tdx_module_args *args); u64 __tdx_hypercall(struct tdx_hypercall_args *args); /* diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S index c54ea009b241..3f0b83a9977e 100644 --- a/arch/x86/virt/vmx/tdx/tdxcall.S +++ b/arch/x86/virt/vmx/tdx/tdxcall.S @@ -142,10 +142,10 @@ movq %r11, TDX_MODULE_r11(%rsi) .endif /* \ret */ -.if \host && \saved && \ret +.if \saved && \ret /* - * Clear registers shared by guest for VP.ENTER to prevent - * speculative use of guest's values, including those are + * Clear registers shared by guest for VP.VMCALL/VP.ENTER to prevent + * speculative use of guest's/VMM's values, including those are * restored from the stack. * * See arch/x86/kvm/vmx/vmenter.S: @@ -170,7 +170,7 @@ xorl %r15d, %r15d xorl %ebx, %ebx xorl %edi, %edi -.endif /* \host && \ret && \host */ +.endif /* \ret && \host */ .if \host .Lout\@: -- cgit v1.2.3 From 8a8544bde858e5d62d79df6baaa387e0b6587dc7 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:02:03 +1200 Subject: x86/tdx: Remove 'struct tdx_hypercall_args' Now 'struct tdx_hypercall_args' is basically 'struct tdx_module_args' minus RCX. Although from __tdx_hypercall()'s perspective RCX isn't used as shared register thus not part of input/output registers, it's not worth to have a separate structure just due to one register. Remove the 'struct tdx_hypercall_args' and use 'struct tdx_module_args' instead in __tdx_hypercall() related code. This also saves the memory copy between the two structures within __tdx_hypercall(). Suggested-by: Peter Zijlstra Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/798dad5ce24e9d745cf0e16825b75ccc433ad065.1692096753.git.kai.huang%40intel.com --- arch/x86/boot/compressed/tdx.c | 4 ++-- arch/x86/coco/tdx/tdx-shared.c | 37 +++++++------------------------------ arch/x86/coco/tdx/tdx.c | 18 +++++++++--------- arch/x86/hyperv/ivm.c | 6 +++--- arch/x86/include/asm/shared/tdx.h | 25 ++----------------------- 5 files changed, 23 insertions(+), 67 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/tdx.c b/arch/x86/boot/compressed/tdx.c index bc03eae2c560..8451d6a1030c 100644 --- a/arch/x86/boot/compressed/tdx.c +++ b/arch/x86/boot/compressed/tdx.c @@ -18,7 +18,7 @@ void __tdx_hypercall_failed(void) static inline unsigned int tdx_io_in(int size, u16 port) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), .r12 = size, @@ -34,7 +34,7 @@ static inline unsigned int tdx_io_in(int size, u16 port) static inline void tdx_io_out(int size, u16 port, u32 value) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), .r12 = size, diff --git a/arch/x86/coco/tdx/tdx-shared.c b/arch/x86/coco/tdx/tdx-shared.c index 344b3818f4c3..78e413269791 100644 --- a/arch/x86/coco/tdx/tdx-shared.c +++ b/arch/x86/coco/tdx/tdx-shared.c @@ -70,45 +70,22 @@ bool tdx_accept_memory(phys_addr_t start, phys_addr_t end) return true; } -noinstr u64 __tdx_hypercall(struct tdx_hypercall_args *args) +noinstr u64 __tdx_hypercall(struct tdx_module_args *args) { - struct tdx_module_args margs = { - .rcx = TDVMCALL_EXPOSE_REGS_MASK, - .rdx = args->rdx, - .r8 = args->r8, - .r9 = args->r9, - .r10 = args->r10, - .r11 = args->r11, - .r12 = args->r12, - .r13 = args->r13, - .r14 = args->r14, - .r15 = args->r15, - .rbx = args->rbx, - .rdi = args->rdi, - .rsi = args->rsi, - }; + /* + * For TDVMCALL explicitly set RCX to the bitmap of shared registers. + * The caller isn't expected to set @args->rcx anyway. + */ + args->rcx = TDVMCALL_EXPOSE_REGS_MASK; /* * Failure of __tdcall_saved_ret() indicates a failure of the TDVMCALL * mechanism itself and that something has gone horribly wrong with * the TDX module. __tdx_hypercall_failed() never returns. */ - if (__tdcall_saved_ret(TDG_VP_VMCALL, &margs)) + if (__tdcall_saved_ret(TDG_VP_VMCALL, args)) __tdx_hypercall_failed(); - args->r8 = margs.r8; - args->r9 = margs.r9; - args->r10 = margs.r10; - args->r11 = margs.r11; - args->r12 = margs.r12; - args->r13 = margs.r13; - args->r14 = margs.r14; - args->r15 = margs.r15; - args->rdi = margs.rdi; - args->rsi = margs.rsi; - args->rbx = margs.rbx; - args->rdx = margs.rdx; - /* TDVMCALL leaf return code is in R10 */ return args->r10; } diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 26b6c3d8f0be..3e6dbd2199cf 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -48,7 +48,7 @@ noinstr void __noreturn __tdx_hypercall_failed(void) long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, unsigned long p2, unsigned long p3, unsigned long p4) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = nr, .r11 = p1, .r12 = p2, @@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(tdx_mcall_get_report0); static void __noreturn tdx_panic(const char *msg) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = TDVMCALL_REPORT_FATAL_ERROR, .r12 = 0, /* Error code: 0 is Panic */ @@ -230,7 +230,7 @@ static int ve_instr_len(struct ve_info *ve) static u64 __cpuidle __halt(const bool irq_disabled) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_HLT), .r12 = irq_disabled, @@ -274,7 +274,7 @@ void __cpuidle tdx_safe_halt(void) static int read_msr(struct pt_regs *regs, struct ve_info *ve) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_MSR_READ), .r12 = regs->cx, @@ -295,7 +295,7 @@ static int read_msr(struct pt_regs *regs, struct ve_info *ve) static int write_msr(struct pt_regs *regs, struct ve_info *ve) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_MSR_WRITE), .r12 = regs->cx, @@ -315,7 +315,7 @@ static int write_msr(struct pt_regs *regs, struct ve_info *ve) static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_CPUID), .r12 = regs->ax, @@ -357,7 +357,7 @@ static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve) static bool mmio_read(int size, unsigned long addr, unsigned long *val) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_EPT_VIOLATION), .r12 = size, @@ -486,7 +486,7 @@ static int handle_mmio(struct pt_regs *regs, struct ve_info *ve) static bool handle_in(struct pt_regs *regs, int size, int port) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = hcall_func(EXIT_REASON_IO_INSTRUCTION), .r12 = size, @@ -723,7 +723,7 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t end, bool enc) } while (retry_count < max_retries_per_page) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = TDVMCALL_MAP_GPA, .r12 = start, diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index e48983550743..c1088d3661d5 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -384,7 +384,7 @@ static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {} #ifdef CONFIG_INTEL_TDX_GUEST static void hv_tdx_msr_write(u64 msr, u64 val) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = EXIT_REASON_MSR_WRITE, .r12 = msr, @@ -398,7 +398,7 @@ static void hv_tdx_msr_write(u64 msr, u64 val) static void hv_tdx_msr_read(u64 msr, u64 *val) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = EXIT_REASON_MSR_READ, .r12 = msr, @@ -414,7 +414,7 @@ static void hv_tdx_msr_read(u64 msr, u64 *val) u64 hv_tdx_hypercall(u64 control, u64 param1, u64 param2) { - struct tdx_hypercall_args args = { }; + struct tdx_module_args args = { }; args.r10 = control; args.rdx = param1; diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index f53a41948946..b69886ee1c63 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -85,29 +85,8 @@ u64 __tdcall(u64 fn, struct tdx_module_args *args); u64 __tdcall_ret(u64 fn, struct tdx_module_args *args); u64 __tdcall_saved_ret(u64 fn, struct tdx_module_args *args); -/* - * Used in __tdx_hypercall() to pass down and get back registers' values of - * the TDCALL instruction when requesting services from the VMM. - * - * This is a software only structure and not part of the TDX module/VMM ABI. - */ -struct tdx_hypercall_args { - u64 r8; - u64 r9; - u64 r10; - u64 r11; - u64 r12; - u64 r13; - u64 r14; - u64 r15; - u64 rdi; - u64 rsi; - u64 rbx; - u64 rdx; -}; - /* Used to request services from the VMM */ -u64 __tdx_hypercall(struct tdx_hypercall_args *args); +u64 __tdx_hypercall(struct tdx_module_args *args); /* * Wrapper for standard use of __tdx_hypercall with no output aside from @@ -115,7 +94,7 @@ u64 __tdx_hypercall(struct tdx_hypercall_args *args); */ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) { - struct tdx_hypercall_args args = { + struct tdx_module_args args = { .r10 = TDX_HYPERCALL_STANDARD, .r11 = fn, .r12 = r12, -- cgit v1.2.3 From c33621b4c5ad5b6b8b245754013cc506f9ded2b8 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:02:04 +1200 Subject: x86/virt/tdx: Wire up basic SEAMCALL functions Intel Trust Domain Extensions (TDX) protects guest VMs from malicious host and certain physical attacks. A CPU-attested software module called 'the TDX module' runs inside a new isolated memory range as a trusted hypervisor to manage and run protected VMs. TDX introduces a new CPU mode: Secure Arbitration Mode (SEAM). This mode runs only the TDX module itself or other code to load the TDX module. The host kernel communicates with SEAM software via a new SEAMCALL instruction. This is conceptually similar to a guest->host hypercall, except it is made from the host to SEAM software instead. The TDX module establishes a new SEAMCALL ABI which allows the host to initialize the module and to manage VMs. The SEAMCALL ABI is very similar to the TDCALL ABI and leverages much TDCALL infrastructure. Wire up basic functions to make SEAMCALLs for the basic support of running TDX guests: __seamcall(), __seamcall_ret(), and __seamcall_saved_ret() for TDH.VP.ENTER. All SEAMCALLs involved in the basic TDX support don't use "callee-saved" registers as input and output, except the TDH.VP.ENTER. To start to support TDX, create a new arch/x86/virt/vmx/tdx/tdx.c for TDX host kernel support. Add a new Kconfig option CONFIG_INTEL_TDX_HOST to opt-in TDX host kernel support (to distinguish with TDX guest kernel support). So far only KVM uses TDX. Make the new config option depend on KVM_INTEL. Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Tested-by: Isaku Yamahata Link: https://lore.kernel.org/all/4db7c3fc085e6af12acc2932294254ddb3d320b3.1692096753.git.kai.huang%40intel.com --- arch/x86/Kconfig | 12 ++++++++ arch/x86/Makefile | 2 ++ arch/x86/include/asm/tdx.h | 7 +++++ arch/x86/virt/Makefile | 2 ++ arch/x86/virt/vmx/Makefile | 2 ++ arch/x86/virt/vmx/tdx/Makefile | 2 ++ arch/x86/virt/vmx/tdx/seamcall.S | 61 ++++++++++++++++++++++++++++++++++++++++ 7 files changed, 88 insertions(+) create mode 100644 arch/x86/virt/Makefile create mode 100644 arch/x86/virt/vmx/Makefile create mode 100644 arch/x86/virt/vmx/tdx/Makefile create mode 100644 arch/x86/virt/vmx/tdx/seamcall.S (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 982b777eadc7..3b3594f96330 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1939,6 +1939,18 @@ config X86_USER_SHADOW_STACK If unsure, say N. +config INTEL_TDX_HOST + bool "Intel Trust Domain Extensions (TDX) host support" + depends on CPU_SUP_INTEL + depends on X86_64 + depends on KVM_INTEL + help + Intel Trust Domain Extensions (TDX) protects guest VMs from malicious + host and certain physical attacks. This option enables necessary TDX + support in the host kernel to run confidential VMs. + + If unsure, say N. + config EFI bool "EFI runtime service support" depends on ACPI diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 5bfe5caaa444..d43981a1fd1d 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -252,6 +252,8 @@ archheaders: libs-y += arch/x86/lib/ +core-y += arch/x86/virt/ + # drivers-y are linked after core-y drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/x86/pci/ diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index 603e6d1e9d4a..a69bb7d3061b 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -72,5 +72,12 @@ static inline long tdx_kvm_hypercall(unsigned int nr, unsigned long p1, return -ENODEV; } #endif /* CONFIG_INTEL_TDX_GUEST && CONFIG_KVM_GUEST */ + +#ifdef CONFIG_INTEL_TDX_HOST +u64 __seamcall(u64 fn, struct tdx_module_args *args); +u64 __seamcall_ret(u64 fn, struct tdx_module_args *args); +u64 __seamcall_saved_ret(u64 fn, struct tdx_module_args *args); +#endif /* CONFIG_INTEL_TDX_HOST */ + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_TDX_H */ diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile new file mode 100644 index 000000000000..1e36502cd738 --- /dev/null +++ b/arch/x86/virt/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += vmx/ diff --git a/arch/x86/virt/vmx/Makefile b/arch/x86/virt/vmx/Makefile new file mode 100644 index 000000000000..feebda21d793 --- /dev/null +++ b/arch/x86/virt/vmx/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_INTEL_TDX_HOST) += tdx/ diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile new file mode 100644 index 000000000000..46ef8f73aebb --- /dev/null +++ b/arch/x86/virt/vmx/tdx/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += seamcall.o diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S new file mode 100644 index 000000000000..5b1f2286aea9 --- /dev/null +++ b/arch/x86/virt/vmx/tdx/seamcall.S @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include +#include + +#include "tdxcall.S" + +/* + * __seamcall() - Host-side interface functions to SEAM software + * (the P-SEAMLDR or the TDX module). + * + * __seamcall() function ABI: + * + * @fn (RDI) - SEAMCALL Leaf number, moved to RAX + * @args (RSI) - struct tdx_module_args for input + * + * Only RCX/RDX/R8-R11 are used as input registers. + * + * Return (via RAX) TDX_SEAMCALL_VMFAILINVALID if the SEAMCALL itself + * fails, or the completion status of the SEAMCALL leaf function. + */ +SYM_FUNC_START(__seamcall) + TDX_MODULE_CALL host=1 +SYM_FUNC_END(__seamcall) + +/* + * __seamcall_ret() - Host-side interface functions to SEAM software + * (the P-SEAMLDR or the TDX module), with saving output registers to + * the 'struct tdx_module_args' used as input. + * + * __seamcall_ret() function ABI: + * + * @fn (RDI) - SEAMCALL Leaf number, moved to RAX + * @args (RSI) - struct tdx_module_args for input and output + * + * Only RCX/RDX/R8-R11 are used as input/output registers. + * + * Return (via RAX) TDX_SEAMCALL_VMFAILINVALID if the SEAMCALL itself + * fails, or the completion status of the SEAMCALL leaf function. + */ +SYM_FUNC_START(__seamcall_ret) + TDX_MODULE_CALL host=1 ret=1 +SYM_FUNC_END(__seamcall_ret) + +/* + * __seamcall_saved_ret() - Host-side interface functions to SEAM software + * (the P-SEAMLDR or the TDX module), with saving output registers to the + * 'struct tdx_module_args' used as input. + * + * __seamcall_saved_ret() function ABI: + * + * @fn (RDI) - SEAMCALL Leaf number, moved to RAX + * @args (RSI) - struct tdx_module_args for input and output + * + * All registers in @args are used as input/output registers. + * + * Return (via RAX) TDX_SEAMCALL_VMFAILINVALID if the SEAMCALL itself + * fails, or the completion status of the SEAMCALL leaf function. + */ +SYM_FUNC_START(__seamcall_saved_ret) + TDX_MODULE_CALL host=1 ret=1 saved=1 +SYM_FUNC_END(__seamcall_saved_ret) -- cgit v1.2.3 From 7b804135d4d1f0a2b9dda69c6303d3f2dcbe9d37 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Tue, 15 Aug 2023 23:02:05 +1200 Subject: x86/virt/tdx: Make TDX_MODULE_CALL handle SEAMCALL #UD and #GP SEAMCALL instruction causes #UD if the CPU isn't in VMX operation. Currently the TDX_MODULE_CALL assembly doesn't handle #UD, thus making SEAMCALL when VMX is disabled would cause Oops. Unfortunately, there are legal cases that SEAMCALL can be made when VMX is disabled. For instance, VMX can be disabled due to emergency reboot while there are still TDX guests running. Extend the TDX_MODULE_CALL assembly to return an error code for #UD to handle this case gracefully, e.g., KVM can then quietly eat all SEAMCALL errors caused by emergency reboot. SEAMCALL instruction also causes #GP when TDX isn't enabled by the BIOS. Use _ASM_EXTABLE_FAULT() to catch both exceptions with the trap number recorded, and define two new error codes by XORing the trap number to the TDX_SW_ERROR. This opportunistically handles #GP too while using the same simple assembly code. A bonus is when kernel mistakenly calls SEAMCALL when CPU isn't in VMX operation, or when TDX isn't enabled by the BIOS, or when the BIOS is buggy, the kernel can get a nicer error code rather than a less understandable Oops. This is basically based on Peter's code. Suggested-by: Peter Zijlstra Signed-off-by: Kai Huang Signed-off-by: Dave Hansen Reviewed-by: Kirill A. Shutemov Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/de975832a367f476aab2d0eb0d9de66019a16b54.1692096753.git.kai.huang%40intel.com --- arch/x86/include/asm/tdx.h | 4 ++++ arch/x86/virt/vmx/tdx/tdxcall.S | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index a69bb7d3061b..adcbe3f1de30 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -8,6 +8,7 @@ #include #include +#include #include /* @@ -20,6 +21,9 @@ #define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40)) #define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000)) +#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP) +#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD) + #ifndef __ASSEMBLY__ /* diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S index 3f0b83a9977e..016a2a1ec1d6 100644 --- a/arch/x86/virt/vmx/tdx/tdxcall.S +++ b/arch/x86/virt/vmx/tdx/tdxcall.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #include /* @@ -85,6 +86,7 @@ .endif /* \saved */ .if \host +.Lseamcall\@: seamcall /* * SEAMCALL instruction is essentially a VMExit from VMX root @@ -191,11 +193,28 @@ .if \host .Lseamcall_vmfailinvalid\@: mov $TDX_SEAMCALL_VMFAILINVALID, %rax + jmp .Lseamcall_fail\@ + +.Lseamcall_trap\@: + /* + * SEAMCALL caused #GP or #UD. By reaching here RAX contains + * the trap number. Convert the trap number to the TDX error + * code by setting TDX_SW_ERROR to the high 32-bits of RAX. + * + * Note cannot OR TDX_SW_ERROR directly to RAX as OR instruction + * only accepts 32-bit immediate at most. + */ + movq $TDX_SW_ERROR, %rdi + orq %rdi, %rax + +.Lseamcall_fail\@: .if \ret && \saved /* pop the unused structure pointer back to RSI */ popq %rsi .endif jmp .Lout\@ + + _ASM_EXTABLE_FAULT(.Lseamcall\@, .Lseamcall_trap\@) .endif /* \host */ .endm -- cgit v1.2.3 From 518755a7eeae77a399430eaf211a1e71f6b87d4a Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 18 Sep 2023 16:18:58 +1200 Subject: x86/tdx: Fix __noreturn build warning around __tdx_hypercall_failed() LKP reported below build warning: vmlinux.o: warning: objtool: __tdx_hypercall+0x128: __tdx_hypercall_failed() is missing a __noreturn annotation The __tdx_hypercall_failed() function definition already has __noreturn annotation, but it turns out the __noreturn must be annotated to the function declaration. PeterZ explains: "FWIW, the reason being that... The point of noreturn is that the caller should know to stop generating code. For that the declaration needs the attribute, because call sites typically do not have access to the function definition in C." Add __noreturn annotation to the declaration of __tdx_hypercall_failed() to fix. It's not a bad idea to document the __noreturn nature at the definition site either, so keep the annotation at the definition. Note is also included by TDX related assembly files. Include only in case of !__ASSEMBLY__ otherwise compiling assembly file would trigger build error. Also, following the objtool documentation, add __tdx_hypercall_failed() to "tools/objtool/noreturns.h". Fixes: c641cfb5c157 ("x86/tdx: Make TDX_HYPERCALL asm similar to TDX_MODULE_CALL") Reported-by: kernel test robot Signed-off-by: Kai Huang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20230918041858.331234-1-kai.huang@intel.com Closes: https://lore.kernel.org/oe-kbuild-all/202309140828.9RdmlH2Z-lkp@intel.com/ --- arch/x86/include/asm/shared/tdx.h | 4 +++- tools/objtool/noreturns.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/shared/tdx.h b/arch/x86/include/asm/shared/tdx.h index b69886ee1c63..f74695dea217 100644 --- a/arch/x86/include/asm/shared/tdx.h +++ b/arch/x86/include/asm/shared/tdx.h @@ -56,6 +56,8 @@ #ifndef __ASSEMBLY__ +#include + /* * Used in __tdcall*() to gather the input/output registers' values of the * TDCALL instruction when requesting services from the TDX module. This is a @@ -108,7 +110,7 @@ static inline u64 _tdx_hypercall(u64 fn, u64 r12, u64 r13, u64 r14, u64 r15) /* Called from __tdx_hypercall() for unrecoverable failure */ -void __tdx_hypercall_failed(void); +void __noreturn __tdx_hypercall_failed(void); bool tdx_accept_memory(phys_addr_t start, phys_addr_t end); diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index e45c7cb1d5bc..875f8827aa33 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -11,6 +11,7 @@ NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) +NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) NORETURN(arch_call_rest_init) NORETURN(arch_cpu_idle_dead) -- cgit v1.2.3 From 9ee4318c157b9802589b746cc340bae3142d984c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Oct 2023 17:45:49 +0300 Subject: x86/tdx: Mark TSC reliable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In x86 virtualization environments, including TDX, RDTSC instruction is handled without causing a VM exit, resulting in minimal overhead and jitters. On the other hand, other clock sources (such as HPET, ACPI timer, APIC, etc.) necessitate VM exits to implement, resulting in more fluctuating measurements compared to TSC. Thus, those clock sources are not effective for calibrating TSC. As a foundation, the host TSC is guaranteed to be invariant on any system which enumerates TDX support. TDX guests and the TDX module build on that foundation by enforcing: - Virtual TSC is monotonously incrementing for any single VCPU; - Virtual TSC values are consistent among all the TD’s VCPUs at the level supported by the CPU: + VMM is required to set the same TSC_ADJUST; + VMM must not modify from initial value of TSC_ADJUST before SEAMCALL; - The frequency is determined by TD configuration: + Virtual TSC frequency is specified by VMM on TDH.MNG.INIT; + Virtual TSC starts counting from 0 at TDH.MNG.INIT; The result is that a reliable TSC is a TDX architectural guarantee. Use the TSC as the only reliable clock source in TD guests, bypassing unstable calibration. This is similar to what the kernel already does in some VMWare and HyperV environments. [ dhansen: changelog tweaks ] Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Kuppuswamy Sathyanarayanan Reviewed-by: Erdem Aktas Reviewed-by: Isaku Yamahata Acked-by: Kai Huang Link: https://lore.kernel.org/all/20231006144549.2633-1-kirill.shutemov%40linux.intel.com --- arch/x86/coco/tdx/tdx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 3e6dbd2199cf..2f27ae1e2c6b 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -816,6 +816,9 @@ void __init tdx_early_init(void) setup_force_cpu_cap(X86_FEATURE_TDX_GUEST); + /* TSC is the only reliable clock in TDX guest */ + setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); + cc_vendor = CC_VENDOR_INTEL; tdx_parse_tdinfo(&cc_mask); cc_set_mask(cc_mask); -- cgit v1.2.3