From f79b1c573cb4dc551919f81ed5797419f6fc1f3a Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 29 Mar 2018 20:36:55 +0530 Subject: x86/i8237: Register device based on FADT legacy boot flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit From Skylake onwards, the platform controller hub (Sunrisepoint PCH) does not support legacy DMA operations to IO ports 81h-83h, 87h, 89h-8Bh, 8Fh. Currently this driver registers as syscore ops and its resume function is called on every resume from S3. On Skylake and Kabylake, this causes a resume delay of around 100ms due to port IO operations, which is a problem. This change allows to load the driver only when the platform bios explicitly supports such devices or has a cut-off date earlier than 2017 due to the following reasons: - The platforms released before year 2017 have support for the 8237. (except Sunrisepoint PCH e.g. Skylake) - Some of the BIOS that were released for platforms (Skylake, Kabylake) during 2016-17 are buggy. These BIOS do not set/unset the ACPI_FADT_LEGACY_DEVICES field in FADT table properly based on the presence or absence of the DMA device. Very recently, open source system firmware like coreboot started unsetting ACPI_FADT_LEGACY_DEVICES field in FADT table if the 8237 DMA device is not present on the PCH. Please refer to chapter 21 of 6th Generation Intel® Core™ Processor Platform Controller Hub Family: BIOS Specification. Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Anshuman Gupta Signed-off-by: Thomas Gleixner Reviewed-by: Andy Shevchenko Cc: rjw@rjwysocki.net Cc: hpa@zytor.com Cc: Alan Cox Link: https://lkml.kernel.org/r/1522336015-22994-1-git-send-email-anshuman.gupta@intel.com --- arch/x86/include/asm/x86_init.h | 1 + arch/x86/kernel/i8237.c | 25 +++++++++++++++++++++++++ arch/x86/kernel/platform-quirks.c | 7 ++++++- 3 files changed, 32 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index ce8b4da07e35..db98e3ab3295 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -301,5 +301,6 @@ extern struct x86_apic_ops x86_apic_ops; extern void x86_early_init_platform_quirks(void); extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); +extern bool x86_pnpbios_disabled(void); #endif diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index 8eeaa81de066..0a3e70fd00d6 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -9,10 +9,12 @@ * your option) any later version. */ +#include #include #include #include +#include /* * This module just handles suspend/resume issues with the @@ -49,6 +51,29 @@ static struct syscore_ops i8237_syscore_ops = { static int __init i8237A_init_ops(void) { + /* + * From SKL PCH onwards, the legacy DMA device is removed in which the + * I/O ports (81h-83h, 87h, 89h-8Bh, 8Fh) related to it are removed + * as well. All removed ports must return 0xff for a inb() request. + * + * Note: DMA_PAGE_2 (port 0x81) should not be checked for detecting + * the presence of DMA device since it may be used by BIOS to decode + * LPC traffic for POST codes. Original LPC only decodes one byte of + * port 0x80 but some BIOS may choose to enhance PCH LPC port 0x8x + * decoding. + */ + if (dma_inb(DMA_PAGE_0) == 0xFF) + return -ENODEV; + + /* + * It is not required to load this driver as newer SoC may not + * support 8237 DMA or bus mastering from LPC. Platform firmware + * must announce the support for such legacy devices via + * ACPI_FADT_LEGACY_DEVICES field in FADT table. + */ + if (x86_pnpbios_disabled() && dmi_get_bios_year() >= 2017) + return -ENODEV; + register_syscore_ops(&i8237_syscore_ops); return 0; } diff --git a/arch/x86/kernel/platform-quirks.c b/arch/x86/kernel/platform-quirks.c index 235fe6008ac8..b348a672f71d 100644 --- a/arch/x86/kernel/platform-quirks.c +++ b/arch/x86/kernel/platform-quirks.c @@ -33,9 +33,14 @@ void __init x86_early_init_platform_quirks(void) x86_platform.set_legacy_features(); } +bool __init x86_pnpbios_disabled(void) +{ + return x86_platform.legacy.devices.pnpbios == 0; +} + #if defined(CONFIG_PNPBIOS) bool __init arch_pnpbios_disabled(void) { - return x86_platform.legacy.devices.pnpbios == 0; + return x86_pnpbios_disabled(); } #endif -- cgit v1.2.3 From f2173356e26c4da2ddecf9bf3d58a6a2ff56b11d Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Fri, 4 May 2018 19:59:35 +0200 Subject: x86/vdso: Remove unused file commit da861e18eccc ("x86, vdso: Get rid of the fake section mechanism") left this file behind; nothing is using it anymore. Signed-off-by: Jann Horn Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: luto@amacapital.net Link: https://lkml.kernel.org/r/20180504175935.104085-1-jannh@google.com --- arch/x86/entry/vdso/vdso32/vdso-fakesections.c | 1 - 1 file changed, 1 deletion(-) delete mode 100644 arch/x86/entry/vdso/vdso32/vdso-fakesections.c (limited to 'arch') diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c deleted file mode 100644 index 541468e25265..000000000000 --- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso-fakesections.c" -- cgit v1.2.3 From 13a4db9d75ec5dbca4bd6229e149e061ef7a6bf0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 May 2018 20:57:59 +0300 Subject: x86/mtrr: Convert to use match_string() helper The helper returns index of the matching string in an array. Replace the open coded array lookup with match_string(). Signed-off-by: Andy Shevchenko Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/20180515175759.89315-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 558444b23923..42b4f2f3b557 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -149,17 +149,16 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; ptr = skip_spaces(ptr + 5); - for (i = 0; i < MTRR_NUM_TYPES; ++i) { - if (strcmp(ptr, mtrr_strings[i])) - continue; - base >>= PAGE_SHIFT; - size >>= PAGE_SHIFT; - err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); - if (err < 0) - return err; - return len; - } - return -EINVAL; + i = match_string(mtrr_strings, MTRR_NUM_TYPES, ptr); + if (i < 0) + return i; + + base >>= PAGE_SHIFT; + size >>= PAGE_SHIFT; + err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); + if (err < 0) + return err; + return len; } static long -- cgit v1.2.3 From 7f8ec5a4f01aa7d03e94a3d99d7b5f9c02d7fe5a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 15 May 2018 21:05:35 +0300 Subject: x86/mtrr: Convert to use strncpy_from_user() helper Replace the open coded string fetch from user-space with strncpy_from_user(). Signed-off-by: Andy Shevchenko Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Luis R. Rodriguez Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Link: http://lkml.kernel.org/r/20180515180535.89703-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index 42b4f2f3b557..c610f47373e4 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -106,17 +106,9 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) memset(line, 0, LINE_SIZE); - length = len; - length--; - - if (length > LINE_SIZE - 1) - length = LINE_SIZE - 1; - + length = strncpy_from_user(line, buf, LINE_SIZE - 1); if (length < 0) - return -EINVAL; - - if (copy_from_user(line, buf, length)) - return -EFAULT; + return length; linelen = strlen(line); ptr = line + linelen - 1; -- cgit v1.2.3 From ff987fcf011d20c53b3a613edf6e2055ea48e26e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 24 May 2018 10:44:20 -0500 Subject: x86/microcode: Make the late update update_lock a raw lock for RT __reload_late() is called from stop_machine context and thus cannot acquire a non-raw spinlock on PREEMPT_RT. Signed-off-by: Scott Wood Signed-off-by: Borislav Petkov Signed-off-by: Thomas Gleixner Acked-by: Thomas Gleixner Cc: Ashok Raj Cc: Clark Williams Cc: Pei Zhang Cc: x86-ml Link: http://lkml.kernel.org/r/20180524154420.24455-1-swood@redhat.com --- arch/x86/kernel/cpu/microcode/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 77e201301528..08286269fd24 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -70,7 +70,7 @@ static DEFINE_MUTEX(microcode_mutex); /* * Serialize late loading so that CPUs get updated one-by-one. */ -static DEFINE_SPINLOCK(update_lock); +static DEFINE_RAW_SPINLOCK(update_lock); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; @@ -560,9 +560,9 @@ static int __reload_late(void *info) if (__wait_for_cpus(&late_cpus_in, NSEC_PER_SEC)) return -1; - spin_lock(&update_lock); + raw_spin_lock(&update_lock); apply_microcode_local(&err); - spin_unlock(&update_lock); + raw_spin_unlock(&update_lock); /* siblings return UCODE_OK because their engine got updated already */ if (err > UCODE_NFOUND) { -- cgit v1.2.3 From 046c0dbec0238c25b7526c26c9a9687664229ce2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Jun 2018 13:35:15 +0200 Subject: x86: Mark native_set_p4d() as __always_inline When CONFIG_OPTIMIZE_INLINING is enabled, the function native_set_p4d() may not be fully inlined into the caller, resulting in a false-positive warning about an access to the __pgtable_l5_enabled variable from a non-__init function, despite the original caller being an __init function: WARNING: vmlinux.o(.text.unlikely+0x1429): Section mismatch in reference from the function native_set_p4d() to the variable .init.data:__pgtable_l5_enabled WARNING: vmlinux.o(.text.unlikely+0x1429): Section mismatch in reference from the function native_p4d_clear() to the variable .init.data:__pgtable_l5_enabled The function native_set_p4d() references the variable __initdata __pgtable_l5_enabled. This is often because native_set_p4d lacks a __initdata annotation or the annotation of __pgtable_l5_enabled is wrong. Marking the native_set_p4d function and its caller native_p4d_clear() avoids this problem. I did not bisect the original cause, but I assume this is related to the recent rework that turned pgtable_l5_enabled() into an inline function, which in turn caused the compiler to make different inlining decisions. Fixes: ad3fe525b950 ("x86/mm: Unify pgtable_l5_enabled usage in early boot code") Signed-off-by: Arnd Bergmann Signed-off-by: Thomas Gleixner Acked-by: Kirill A. Shutemov Cc: Greg Kroah-Hartman Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: Andrew Morton Cc: Zi Yan Cc: Naoya Horiguchi Link: https://lkml.kernel.org/r/20180605113715.1133726-1-arnd@arndb.de --- arch/x86/include/asm/pgtable_64.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 877bc27718ae..c750112cb416 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -216,7 +216,7 @@ static inline pgd_t pti_set_user_pgd(pgd_t *pgdp, pgd_t pgd) } #endif -static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) +static __always_inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; @@ -230,7 +230,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) *p4dp = native_make_p4d(native_pgd_val(pgd)); } -static inline void native_p4d_clear(p4d_t *p4d) +static __always_inline void native_p4d_clear(p4d_t *p4d) { native_set_p4d(p4d, native_make_p4d(0)); } -- cgit v1.2.3 From 94d49eb30e854c84d1319095b5dd0405a7da9362 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 18 May 2018 14:30:28 +0300 Subject: x86/mm: Decouple dynamic __PHYSICAL_MASK from AMD SME AMD SME claims one bit from physical address to indicate whether the page is encrypted or not. To achieve that we clear out the bit from __PHYSICAL_MASK. The capability to adjust __PHYSICAL_MASK is required beyond AMD SME. For instance for upcoming Intel Multi-Key Total Memory Encryption. Factor it out into a separate feature with own Kconfig handle. It also helps with overhead of AMD SME. It saves more than 3k in .text on defconfig + AMD_MEM_ENCRYPT: add/remove: 3/2 grow/shrink: 5/110 up/down: 189/-3753 (-3564) We would need to return to this once we have infrastructure to patch constants in code. That's good candidate for it. Signed-off-by: Kirill A. Shutemov Signed-off-by: Thomas Gleixner Reviewed-by: Tom Lendacky Cc: linux-mm@kvack.org Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180518113028.79825-1-kirill.shutemov@linux.intel.com --- arch/x86/Kconfig | 4 ++++ arch/x86/boot/compressed/kaslr_64.c | 5 +++++ arch/x86/include/asm/page_types.h | 8 +++++++- arch/x86/mm/mem_encrypt_identity.c | 3 +++ arch/x86/mm/pgtable.c | 5 +++++ 5 files changed, 24 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..43a8fc476296 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -337,6 +337,9 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y +config DYNAMIC_PHYSICAL_MASK + bool + config PGTABLE_LEVELS int default 5 if X86_5LEVEL @@ -1508,6 +1511,7 @@ config ARCH_HAS_MEM_ENCRYPT config AMD_MEM_ENCRYPT bool "AMD Secure Memory Encryption (SME) support" depends on X86_64 && CPU_SUP_AMD + select DYNAMIC_PHYSICAL_MASK ---help--- Say yes to enable support for the encryption of system memory. This requires an AMD processor that supports Secure Memory diff --git a/arch/x86/boot/compressed/kaslr_64.c b/arch/x86/boot/compressed/kaslr_64.c index 522d11431433..748456c365f4 100644 --- a/arch/x86/boot/compressed/kaslr_64.c +++ b/arch/x86/boot/compressed/kaslr_64.c @@ -69,6 +69,8 @@ static struct alloc_pgt_data pgt_data; /* The top level page table entry pointer. */ static unsigned long top_level_pgt; +phys_addr_t physical_mask = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; + /* * Mapping information structure passed to kernel_ident_mapping_init(). * Due to relocation, pointers must be assigned at run time not build time. @@ -81,6 +83,9 @@ void initialize_identity_maps(void) /* If running as an SEV guest, the encryption mask is required. */ set_sev_encryption_mask(); + /* Exclude the encryption mask from __PHYSICAL_MASK */ + physical_mask &= ~sme_me_mask; + /* Init mapping_info with run-time function/buffer pointers. */ mapping_info.alloc_pgt_page = alloc_pgt_page; mapping_info.context = &pgt_data; diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 1e53560a84bb..c85e15010f48 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h @@ -17,7 +17,6 @@ #define PUD_PAGE_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_PAGE_MASK (~(PUD_PAGE_SIZE-1)) -#define __PHYSICAL_MASK ((phys_addr_t)(__sme_clr((1ULL << __PHYSICAL_MASK_SHIFT) - 1))) #define __VIRTUAL_MASK ((1UL << __VIRTUAL_MASK_SHIFT) - 1) /* Cast *PAGE_MASK to a signed type so that it is sign-extended if @@ -55,6 +54,13 @@ #ifndef __ASSEMBLY__ +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +extern phys_addr_t physical_mask; +#define __PHYSICAL_MASK physical_mask +#else +#define __PHYSICAL_MASK ((phys_addr_t)((1ULL << __PHYSICAL_MASK_SHIFT) - 1)) +#endif + extern int devmem_is_allowed(unsigned long pagenr); extern unsigned long max_low_pfn_mapped; diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 1b2197d13832..7ae36868aed2 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -527,6 +527,7 @@ void __init sme_enable(struct boot_params *bp) /* SEV state cannot be controlled by a command line option */ sme_me_mask = me_mask; sev_enabled = true; + physical_mask &= ~sme_me_mask; return; } @@ -561,4 +562,6 @@ void __init sme_enable(struct boot_params *bp) sme_me_mask = 0; else sme_me_mask = active_by_default ? me_mask : 0; + + physical_mask &= ~sme_me_mask; } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index ffc8c13c50e4..3ca59cf7a7f9 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -8,6 +8,11 @@ #include #include +#ifdef CONFIG_DYNAMIC_PHYSICAL_MASK +phys_addr_t physical_mask __ro_after_init = (1ULL << __PHYSICAL_MASK_SHIFT) - 1; +EXPORT_SYMBOL(physical_mask); +#endif + #define PGALLOC_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) #ifdef CONFIG_HIGHPTE -- cgit v1.2.3 From cce2946b9b30a9b31a18de737d5010c08076e77f Mon Sep 17 00:00:00 2001 From: Varsha Rao Date: Sun, 20 May 2018 13:30:12 +0530 Subject: x86/platform/uv: Remove extra parentheses Remove extra parentheses to fix the extraneous parentheses clang warning. Suggested-by: Lukas Bulwahn Signed-off-by: Varsha Rao Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire Cc: "H. Peter Anvin" Link: https://lkml.kernel.org/r/20180520080012.8215-1-rvarsha016@gmail.com --- arch/x86/platform/uv/tlb_uv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index b36caae0fb2f..b96d38288c60 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -615,7 +615,7 @@ static int uv2_3_wait_completion(struct bau_desc *bau_desc, /* spin on the status MMR, waiting for it to go idle */ while (descriptor_stat != UV2H_DESC_IDLE) { - if ((descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT)) { + if (descriptor_stat == UV2H_DESC_SOURCE_TIMEOUT) { /* * A h/w bug on the destination side may * have prevented the message being marked -- cgit v1.2.3 From 336628128826a9acb045571a960e32e4414ccb61 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Wed, 23 May 2018 10:35:55 +0800 Subject: x86/idt: Simplify the idt_setup_apic_and_irq_gates() The idt_setup_apic_and_irq_gates() sets the gates from FIRST_EXTERNAL_VECTOR up to FIRST_SYSTEM_VECTOR first. then secondly, from FIRST_SYSTEM_VECTOR to NR_VECTORS, it takes both APIC=y and APIC=n into account. But for APIC=n, the FIRST_SYSTEM_VECTOR is equal to NR_VECTORS, all vectors has been set at the first step. Simplify the second step, make it just work for APIC=y. Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20180523023555.2933-1-douly.fnst@cn.fujitsu.com --- arch/x86/kernel/idt.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 2c3a1b4294eb..74383a3780dc 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -317,15 +317,12 @@ void __init idt_setup_apic_and_irq_gates(void) set_intr_gate(i, entry); } - for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { #ifdef CONFIG_X86_LOCAL_APIC + for_each_clear_bit_from(i, system_vectors, NR_VECTORS) { set_bit(i, system_vectors); set_intr_gate(i, spurious_interrupt); -#else - entry = irq_entries_start + 8 * (i - FIRST_EXTERNAL_VECTOR); - set_intr_gate(i, entry); -#endif } +#endif } /** -- cgit v1.2.3 From 838d76d63ec4eaeaa12bedfa50f261480f615200 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 1 Jun 2018 14:50:31 +0800 Subject: x86/vector: Fix the args of vector_alloc tracepoint The vector_alloc tracepont reversed the reserved and ret aggs, that made the trace print wrong. Exchange them. Fixes: 8d1e3dca7de6 ("x86/vector: Add tracepoints for vector management") Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: hpa@zytor.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20180601065031.21872-1-douly.fnst@cn.fujitsu.com --- arch/x86/include/asm/trace/irq_vectors.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/trace/irq_vectors.h b/arch/x86/include/asm/trace/irq_vectors.h index 22647a642e98..0af81b590a0c 100644 --- a/arch/x86/include/asm/trace/irq_vectors.h +++ b/arch/x86/include/asm/trace/irq_vectors.h @@ -236,7 +236,7 @@ TRACE_EVENT(vector_alloc, TP_PROTO(unsigned int irq, unsigned int vector, bool reserved, int ret), - TP_ARGS(irq, vector, ret, reserved), + TP_ARGS(irq, vector, reserved, ret), TP_STRUCT__entry( __field( unsigned int, irq ) -- cgit v1.2.3 From 80ae7b1a918e78b0bae88b0c0ad413d3fdced968 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:33:53 +0200 Subject: x86/apic/vector: Prevent hlist corruption and leaks Several people observed the WARN_ON() in irq_matrix_free() which triggers when the caller tries to free an vector which is not in the allocation range. Song provided the trace information which allowed to decode the root cause. The rework of the vector allocation mechanism failed to preserve a sanity check, which prevents setting a new target vector/CPU when the previous affinity change has not fully completed. As a result a half finished affinity change can be overwritten, which can cause the leak of a irq descriptor pointer on the previous target CPU and double enqueue of the hlist head into the cleanup lists of two or more CPUs. After one CPU cleaned up its vector the next CPU will invoke the cleanup handler with vector 0, which triggers the out of range warning in the matrix allocator. Prevent this by checking the apic_data of the interrupt whether the move_in_progress flag is false and the hlist node is not hashed. Return -EBUSY if not. This prevents the damage and restores the behaviour before the vector allocation rework, but due to other changes in that area it also widens the chance that user space can observe -EBUSY. In theory this should be fine, but actually not all user space tools handle -EBUSY correctly. Addressing that is not part of this fix, but will be addressed in follow up patches. Fixes: 69cde0004a4b ("x86/vector: Use matrix allocator for vector assignment") Reported-by: Dmitry Safonov <0x7f454c46@gmail.com> Reported-by: Tariq Toukan Reported-by: Song Liu Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: stable@vger.kernel.org Cc: Mike Travis Cc: Borislav Petkov Link: https://lkml.kernel.org/r/20180604162224.303870257@linutronix.de --- arch/x86/kernel/apic/vector.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index bb6f7a2148d7..72b575a0b662 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -235,6 +235,15 @@ static int allocate_vector(struct irq_data *irqd, const struct cpumask *dest) if (vector && cpu_online(cpu) && cpumask_test_cpu(cpu, dest)) return 0; + /* + * Careful here. @apicd might either have move_in_progress set or + * be enqueued for cleanup. Assigning a new vector would either + * leave a stale vector on some CPU around or in case of a pending + * cleanup corrupt the hlist. + */ + if (apicd->move_in_progress || !hlist_unhashed(&apicd->clist)) + return -EBUSY; + vector = irq_matrix_alloc(vector_matrix, dest, resvd, &cpu); if (vector > 0) apic_update_vector(irqd, vector, cpu); -- cgit v1.2.3 From c0255770ccdc77ef2184d2a0a2e0cde09d2b44a4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:33:55 +0200 Subject: x86/apic: Provide apic_ack_irq() apic_ack_edge() is explicitely for handling interrupt affinity cleanup when interrupt remapping is not available or disable. Remapped interrupts and also some of the platform specific special interrupts, e.g. UV, invoke ack_APIC_irq() directly. To address the issue of failing an affinity update with -EBUSY the delayed affinity mechanism can be reused, but ack_APIC_irq() does not handle that. Adding this to ack_APIC_irq() is not possible, because that function is also used for exceptions and directly handled interrupts like IPIs. Create a new function, which just contains the conditional invocation of irq_move_irq() and the final ack_APIC_irq(). Reuse the new function in apic_ack_edge(). Preparatory change for the real fix. Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Song Liu Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: stable@vger.kernel.org Cc: Mike Travis Cc: Borislav Petkov Cc: Tariq Toukan Link: https://lkml.kernel.org/r/20180604162224.471925894@linutronix.de --- arch/x86/include/asm/apic.h | 2 ++ arch/x86/kernel/apic/vector.c | 9 +++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 08acd954f00e..74a9e06b6cfd 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -436,6 +436,8 @@ static inline void apic_set_eoi_write(void (*eoi_write)(u32 reg, u32 v)) {} #endif /* CONFIG_X86_LOCAL_APIC */ +extern void apic_ack_irq(struct irq_data *data); + static inline void ack_APIC_irq(void) { /* diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 72b575a0b662..b708f597eee3 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -809,13 +809,18 @@ static int apic_retrigger_irq(struct irq_data *irqd) return 1; } -void apic_ack_edge(struct irq_data *irqd) +void apic_ack_irq(struct irq_data *irqd) { - irq_complete_move(irqd_cfg(irqd)); irq_move_irq(irqd); ack_APIC_irq(); } +void apic_ack_edge(struct irq_data *irqd) +{ + irq_complete_move(irqd_cfg(irqd)); + apic_ack_irq(irqd); +} + static struct irq_chip lapic_controller = { .name = "APIC", .irq_ack = apic_ack_edge, -- cgit v1.2.3 From 2b04e46d8d0b9b7ac08ded672e3eab823f01d77a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:33:57 +0200 Subject: x86/ioapic: Use apic_ack_irq() To address the EBUSY fail of interrupt affinity settings in case that the previous setting has not been cleaned up yet, use the new apic_ack_irq() function instead of directly invoking ack_APIC_irq(). Preparatory change for the real fix Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Song Liu Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: stable@vger.kernel.org Cc: Mike Travis Cc: Borislav Petkov Cc: Tariq Toukan Link: https://lkml.kernel.org/r/20180604162224.639011135@linutronix.de --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7553819c74c3..3982f79d2377 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1851,7 +1851,7 @@ static void ioapic_ir_ack_level(struct irq_data *irq_data) * intr-remapping table entry. Hence for the io-apic * EOI we use the pin number. */ - ack_APIC_irq(); + apic_ack_irq(irq_data); eoi_ioapic_pin(data->entry.vector, data); } -- cgit v1.2.3 From 839b0f1c4ef674cd929a42304c078afca278581a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:33:58 +0200 Subject: x86/platform/uv: Use apic_ack_irq() To address the EBUSY fail of interrupt affinity settings in case that the previous setting has not been cleaned up yet, use the new apic_ack_irq() function instead of the special uv_ack_apic() implementation which is merily a wrapper around ack_APIC_irq(). Preparatory change for the real fix Fixes: dccfe3147b42 ("x86/vector: Simplify vector move cleanup") Reported-by: Song Liu Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: stable@vger.kernel.org Cc: Mike Travis Cc: Borislav Petkov Cc: Tariq Toukan Link: https://lkml.kernel.org/r/20180604162224.721691398@linutronix.de --- arch/x86/platform/uv/uv_irq.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c index e4cb9f4cde8a..fc13cbbb2dce 100644 --- a/arch/x86/platform/uv/uv_irq.c +++ b/arch/x86/platform/uv/uv_irq.c @@ -47,11 +47,6 @@ static void uv_program_mmr(struct irq_cfg *cfg, struct uv_irq_2_mmr_pnode *info) static void uv_noop(struct irq_data *data) { } -static void uv_ack_apic(struct irq_data *data) -{ - ack_APIC_irq(); -} - static int uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask, bool force) @@ -73,7 +68,7 @@ static struct irq_chip uv_irq_chip = { .name = "UV-CORE", .irq_mask = uv_noop, .irq_unmask = uv_noop, - .irq_eoi = uv_ack_apic, + .irq_eoi = apic_ack_irq, .irq_set_affinity = uv_set_irq_affinity, }; -- cgit v1.2.3 From a07771ac6a78860777a9da5d9bc38830ec993fe7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Jun 2018 17:34:00 +0200 Subject: x86/apic/vector: Print APIC control bits in debugfs Extend the debugability of the vector management by adding the state bits to the debugfs output. Signed-off-by: Thomas Gleixner Tested-by: Song Liu Cc: Joerg Roedel Cc: Peter Zijlstra Cc: Song Liu Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Mike Travis Cc: Borislav Petkov Cc: Tariq Toukan Link: https://lkml.kernel.org/r/20180604162224.908136099@linutronix.de --- arch/x86/kernel/apic/vector.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index b708f597eee3..35aaee4fc028 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -588,8 +588,7 @@ error: static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, struct irq_data *irqd, int ind) { - unsigned int cpu, vector, prev_cpu, prev_vector; - struct apic_chip_data *apicd; + struct apic_chip_data apicd; unsigned long flags; int irq; @@ -605,24 +604,26 @@ static void x86_vector_debug_show(struct seq_file *m, struct irq_domain *d, return; } - apicd = irqd->chip_data; - if (!apicd) { + if (!irqd->chip_data) { seq_printf(m, "%*sVector: Not assigned\n", ind, ""); return; } raw_spin_lock_irqsave(&vector_lock, flags); - cpu = apicd->cpu; - vector = apicd->vector; - prev_cpu = apicd->prev_cpu; - prev_vector = apicd->prev_vector; + memcpy(&apicd, irqd->chip_data, sizeof(apicd)); raw_spin_unlock_irqrestore(&vector_lock, flags); - seq_printf(m, "%*sVector: %5u\n", ind, "", vector); - seq_printf(m, "%*sTarget: %5u\n", ind, "", cpu); - if (prev_vector) { - seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", prev_vector); - seq_printf(m, "%*sPrevious target: %5u\n", ind, "", prev_cpu); + + seq_printf(m, "%*sVector: %5u\n", ind, "", apicd.vector); + seq_printf(m, "%*sTarget: %5u\n", ind, "", apicd.cpu); + if (apicd.prev_vector) { + seq_printf(m, "%*sPrevious vector: %5u\n", ind, "", apicd.prev_vector); + seq_printf(m, "%*sPrevious target: %5u\n", ind, "", apicd.prev_cpu); } + seq_printf(m, "%*smove_in_progress: %u\n", ind, "", apicd.move_in_progress ? 1 : 0); + seq_printf(m, "%*sis_managed: %u\n", ind, "", apicd.is_managed ? 1 : 0); + seq_printf(m, "%*scan_reserve: %u\n", ind, "", apicd.can_reserve ? 1 : 0); + seq_printf(m, "%*shas_reserved: %u\n", ind, "", apicd.has_reserved ? 1 : 0); + seq_printf(m, "%*scleanup_pending: %u\n", ind, "", !hlist_unhashed(&apicd.clist)); } #endif -- cgit v1.2.3 From 1d9f3e20a56d33e55748552aeec597f58542f92d Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 8 Jun 2018 09:07:32 -0700 Subject: x86/intel_rdt: Enable CMT and MBM on new Skylake stepping New stepping of Skylake has fixes for cache occupancy and memory bandwidth monitoring. Update the code to enable these by default on newer steppings. Signed-off-by: Tony Luck Signed-off-by: Thomas Gleixner Cc: Fenghua Yu Cc: stable@vger.kernel.org # v4.14 Cc: Vikas Shivappa Link: https://lkml.kernel.org/r/20180608160732.9842-1-tony.luck@intel.com --- arch/x86/kernel/cpu/intel_rdt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 589b948e6e01..316a8875bd90 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -821,6 +821,8 @@ static __init void rdt_quirks(void) case INTEL_FAM6_SKYLAKE_X: if (boot_cpu_data.x86_stepping <= 4) set_rdt_options("!cmt,!mbmtotal,!mbmlocal,!l3cat"); + else + set_rdt_options("!l3cat"); } } -- cgit v1.2.3