diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/boot/dts/at91-sama5d2_icp.dts | 2 | ||||
-rw-r--r-- | arch/arm/boot/dts/bcm2835-rpi.dtsi | 2 | ||||
-rw-r--r-- | arch/arm/mach-imx/cpuidle-imx6q.c | 4 | ||||
-rw-r--r-- | arch/arm/xen/enlighten.c | 2 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-neonbs-core.S | 4 | ||||
-rw-r--r-- | arch/arm64/kernel/acpi.c | 22 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/tlb.c | 7 | ||||
-rw-r--r-- | arch/ia64/mm/init.c | 6 | ||||
-rw-r--r-- | arch/mips/bcm47xx/setup.c | 2 | ||||
-rw-r--r-- | arch/mips/include/asm/cpu-type.h | 1 | ||||
-rw-r--r-- | arch/mips/loongson2ef/Platform | 4 | ||||
-rw-r--r-- | arch/mips/loongson64/cop2-ex.c | 24 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp.c | 1 | ||||
-rw-r--r-- | arch/riscv/include/asm/stackprotector.h | 4 | ||||
-rw-r--r-- | arch/riscv/include/asm/timex.h | 13 | ||||
-rw-r--r-- | arch/riscv/kernel/vmlinux.lds.S | 5 | ||||
-rw-r--r-- | arch/riscv/mm/init.c | 1 | ||||
-rw-r--r-- | arch/s390/include/asm/pgtable.h | 42 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 8 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 37 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.h | 5 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 22 | ||||
-rw-r--r-- | arch/x86/lib/usercopy_64.c | 2 |
23 files changed, 151 insertions, 69 deletions
diff --git a/arch/arm/boot/dts/at91-sama5d2_icp.dts b/arch/arm/boot/dts/at91-sama5d2_icp.dts index 8d19925fc09e..6783cf16ff81 100644 --- a/arch/arm/boot/dts/at91-sama5d2_icp.dts +++ b/arch/arm/boot/dts/at91-sama5d2_icp.dts @@ -116,7 +116,6 @@ switch0: ksz8563@0 { compatible = "microchip,ksz8563"; reg = <0>; - phy-mode = "mii"; reset-gpios = <&pioA PIN_PD4 GPIO_ACTIVE_LOW>; spi-max-frequency = <500000>; @@ -140,6 +139,7 @@ reg = <2>; label = "cpu"; ethernet = <&macb0>; + phy-mode = "mii"; fixed-link { speed = <100>; full-duplex; diff --git a/arch/arm/boot/dts/bcm2835-rpi.dtsi b/arch/arm/boot/dts/bcm2835-rpi.dtsi index f7ae5a4530b8..d94357b21f7e 100644 --- a/arch/arm/boot/dts/bcm2835-rpi.dtsi +++ b/arch/arm/boot/dts/bcm2835-rpi.dtsi @@ -13,7 +13,7 @@ soc { firmware: firmware { - compatible = "raspberrypi,bcm2835-firmware", "simple-bus"; + compatible = "raspberrypi,bcm2835-firmware", "simple-mfd"; #address-cells = <1>; #size-cells = <1>; diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c index 24dd5bbe60e4..094337dc1bc7 100644 --- a/arch/arm/mach-imx/cpuidle-imx6q.c +++ b/arch/arm/mach-imx/cpuidle-imx6q.c @@ -24,7 +24,9 @@ static int imx6q_enter_wait(struct cpuidle_device *dev, imx6_set_lpm(WAIT_UNCLOCKED); raw_spin_unlock(&cpuidle_lock); + rcu_idle_enter(); cpu_do_idle(); + rcu_idle_exit(); raw_spin_lock(&cpuidle_lock); if (num_idle_cpus-- == num_online_cpus()) @@ -44,7 +46,7 @@ static struct cpuidle_driver imx6q_cpuidle_driver = { { .exit_latency = 50, .target_residency = 75, - .flags = CPUIDLE_FLAG_TIMER_STOP, + .flags = CPUIDLE_FLAG_TIMER_STOP | CPUIDLE_FLAG_RCU_IDLE, .enter = imx6q_enter_wait, .name = "WAIT", .desc = "Clock off", diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index e93145d72c26..a6ab3689b2f4 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -150,7 +150,7 @@ static int xen_starting_cpu(unsigned int cpu) pr_info("Xen: initializing cpu%d\n", cpu); vcpup = per_cpu_ptr(xen_vcpu_info, cpu); - info.mfn = virt_to_gfn(vcpup); + info.mfn = percpu_to_gfn(vcpup); info.offset = xen_offset_in_page(vcpup); err = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, xen_vcpu_nr(cpu), diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index b357164379f6..63a52ad9a75c 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -788,7 +788,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) 0: mov bskey, x21 mov rounds, x22 - br x7 + br x16 SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 @@ -806,7 +806,7 @@ SYM_FUNC_END(__xts_crypt8) uzp1 v30.4s, v30.4s, v25.4s ld1 {v25.16b}, [x24] -99: adr x7, \do8 +99: adr x16, \do8 bl __xts_crypt8 ldp q16, q17, [sp, #.Lframe_local_offset] diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index a85174d05473..cada0b816c8a 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -298,8 +298,21 @@ void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) case EFI_BOOT_SERVICES_DATA: case EFI_CONVENTIONAL_MEMORY: case EFI_PERSISTENT_MEMORY: - pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); - return NULL; + if (memblock_is_map_memory(phys) || + !memblock_is_region_memory(phys, size)) { + pr_warn(FW_BUG "requested region covers kernel memory @ %pa\n", &phys); + return NULL; + } + /* + * Mapping kernel memory is permitted if the region in + * question is covered by a single memblock with the + * NOMAP attribute set: this enables the use of ACPI + * table overrides passed via initramfs, which are + * reserved in memory using arch_reserve_mem_area() + * below. As this particular use case only requires + * read access, fall through to the R/O mapping case. + */ + fallthrough; case EFI_RUNTIME_SERVICES_CODE: /* @@ -388,3 +401,8 @@ int apei_claim_sea(struct pt_regs *regs) return err; } + +void arch_reserve_mem_area(acpi_physical_address addr, size_t size) +{ + memblock_mark_nomap(addr, size); +} diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index 69eae608d670..b15d65a42042 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -31,7 +31,14 @@ static void __tlb_switch_to_guest(struct kvm_s2_mmu *mmu, isb(); } + /* + * __load_guest_stage2() includes an ISB only when the AT + * workaround is applied. Take care of the opposite condition, + * ensuring that we always have an ISB, but not two ISBs back + * to back. + */ __load_guest_stage2(mmu); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } static void __tlb_switch_to_host(struct tlb_inv_context *cxt) diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 0b3fb4c7af29..8e7b8c6c576e 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -538,7 +538,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg) if (map_start < map_end) memmap_init_zone((unsigned long)(map_end - map_start), args->nid, args->zone, page_to_pfn(map_start), - MEMMAP_EARLY, NULL); + MEMINIT_EARLY, NULL); return 0; } @@ -547,8 +547,8 @@ memmap_init (unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { if (!vmem_map) { - memmap_init_zone(size, nid, zone, start_pfn, MEMMAP_EARLY, - NULL); + memmap_init_zone(size, nid, zone, start_pfn, + MEMINIT_EARLY, NULL); } else { struct page *start; struct memmap_init_callback_data args; diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 82627c264964..01427bde2397 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -148,7 +148,7 @@ void __init plat_mem_setup(void) { struct cpuinfo_mips *c = ¤t_cpu_data; - if ((c->cputype == CPU_74K) || (c->cputype == CPU_1074K)) { + if (c->cputype == CPU_74K) { pr_info("Using bcma bus\n"); #ifdef CONFIG_BCM47XX_BCMA bcm47xx_bus_type = BCM47XX_BUS_TYPE_BCMA; diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 75a7a382da09..3288cef4b168 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -47,6 +47,7 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_34K: case CPU_1004K: case CPU_74K: + case CPU_1074K: case CPU_M14KC: case CPU_M14KEC: case CPU_INTERAPTIV: diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform index 4ab55f1123a0..ae023b9a1c51 100644 --- a/arch/mips/loongson2ef/Platform +++ b/arch/mips/loongson2ef/Platform @@ -44,6 +44,10 @@ ifdef CONFIG_CPU_LOONGSON2F_WORKAROUNDS endif endif +# Some -march= flags enable MMI instructions, and GCC complains about that +# support being enabled alongside -msoft-float. Thus explicitly disable MMI. +cflags-y += $(call cc-option,-mno-loongson-mmi) + # # Loongson Machines' Support # diff --git a/arch/mips/loongson64/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c index f130f62129b8..00055d4b6042 100644 --- a/arch/mips/loongson64/cop2-ex.c +++ b/arch/mips/loongson64/cop2-ex.c @@ -95,10 +95,8 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, if (res) goto fault; - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rt, value); - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rq, value_next); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rt], 0, value); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rq], 0, value_next); compute_return_epc(regs); own_fpu(1); } @@ -130,15 +128,13 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, goto sigbus; lose_fpu(1); - value_next = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rq); + value_next = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rq], 0); StoreDW(addr + 8, value_next, res); if (res) goto fault; - value = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lswc2_format.rt); + value = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lswc2_format.rt], 0); StoreDW(addr, value, res); if (res) @@ -204,8 +200,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, if (res) goto fault; - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt, value); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0, value); compute_return_epc(regs); own_fpu(1); @@ -221,8 +216,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, if (res) goto fault; - set_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt, value); + set_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0, value); compute_return_epc(regs); own_fpu(1); break; @@ -286,8 +280,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, goto sigbus; lose_fpu(1); - value = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt); + value = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0); StoreW(addr, value, res); if (res) @@ -305,8 +298,7 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, goto sigbus; lose_fpu(1); - value = get_fpr64(current->thread.fpu.fpr, - insn.loongson3_lsdc2_format.rt); + value = get_fpr64(¤t->thread.fpu.fpr[insn.loongson3_lsdc2_format.rt], 0); StoreDW(addr, value, res); if (res) diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 78d61f97371e..e809cb5a1631 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -475,7 +475,6 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP | BPF_JSET | BPF_X: true_cond = COND_NE; - fallthrough; cond_branch: /* same targets, can avoid doing the test :) */ if (filter[i].jt == filter[i].jf) { diff --git a/arch/riscv/include/asm/stackprotector.h b/arch/riscv/include/asm/stackprotector.h index d95f7b2a7f37..5962f8891f06 100644 --- a/arch/riscv/include/asm/stackprotector.h +++ b/arch/riscv/include/asm/stackprotector.h @@ -5,7 +5,6 @@ #include <linux/random.h> #include <linux/version.h> -#include <asm/timex.h> extern unsigned long __stack_chk_guard; @@ -18,12 +17,9 @@ extern unsigned long __stack_chk_guard; static __always_inline void boot_init_stack_canary(void) { unsigned long canary; - unsigned long tsc; /* Try to get a semi random initial value. */ get_random_bytes(&canary, sizeof(canary)); - tsc = get_cycles(); - canary += tsc + (tsc << BITS_PER_LONG/2); canary ^= LINUX_VERSION_CODE; canary &= CANARY_MASK; diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 7f659dda0032..ab104905d4db 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -33,6 +33,19 @@ static inline u32 get_cycles_hi(void) #define get_cycles_hi get_cycles_hi #endif /* CONFIG_64BIT */ +/* + * Much like MIPS, we may not have a viable counter to use at an early point + * in the boot process. Unfortunately we don't have a fallback, so instead + * we just return 0. + */ +static inline unsigned long random_get_entropy(void) +{ + if (unlikely(clint_time_val == NULL)) + return 0; + return get_cycles(); +} +#define random_get_entropy() random_get_entropy() + #else /* CONFIG_RISCV_M_MODE */ static inline cycles_t get_cycles(void) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index f3586e31ed1e..34d00d9e6eac 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -22,13 +22,11 @@ SECTIONS /* Beginning of code and text segment */ . = LOAD_OFFSET; _start = .; - _stext = .; HEAD_TEXT_SECTION . = ALIGN(PAGE_SIZE); __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) - INIT_DATA_SECTION(16) . = ALIGN(8); __soc_early_init_table : { __soc_early_init_table_start = .; @@ -55,6 +53,7 @@ SECTIONS . = ALIGN(SECTION_ALIGN); .text : { _text = .; + _stext = .; TEXT_TEXT SCHED_TEXT CPUIDLE_TEXT @@ -67,6 +66,8 @@ SECTIONS _etext = .; } + INIT_DATA_SECTION(16) + /* Start of data section */ _sdata = .; RO_DATA(SECTION_ALIGN) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index ca03762a3733..f750e012dbe5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -515,6 +515,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #else dtb_early_va = (void *)dtb_pa; #endif + dtb_early_pa = dtb_pa; } static inline void setup_vm_final(void) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7eb01a5459cd..b55561cc8786 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1260,26 +1260,44 @@ static inline pgd_t *pgd_offset_raw(pgd_t *pgd, unsigned long address) #define pgd_offset(mm, address) pgd_offset_raw(READ_ONCE((mm)->pgd), address) -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +static inline p4d_t *p4d_offset_lockless(pgd_t *pgdp, pgd_t pgd, unsigned long address) { - if ((pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) - return (p4d_t *) pgd_deref(*pgd) + p4d_index(address); - return (p4d_t *) pgd; + if ((pgd_val(pgd) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R1) + return (p4d_t *) pgd_deref(pgd) + p4d_index(address); + return (p4d_t *) pgdp; } +#define p4d_offset_lockless p4d_offset_lockless -static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) +static inline p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address) { - if ((p4d_val(*p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) - return (pud_t *) p4d_deref(*p4d) + pud_index(address); - return (pud_t *) p4d; + return p4d_offset_lockless(pgdp, *pgdp, address); +} + +static inline pud_t *pud_offset_lockless(p4d_t *p4dp, p4d_t p4d, unsigned long address) +{ + if ((p4d_val(p4d) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R2) + return (pud_t *) p4d_deref(p4d) + pud_index(address); + return (pud_t *) p4dp; +} +#define pud_offset_lockless pud_offset_lockless + +static inline pud_t *pud_offset(p4d_t *p4dp, unsigned long address) +{ + return pud_offset_lockless(p4dp, *p4dp, address); } #define pud_offset pud_offset -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) +static inline pmd_t *pmd_offset_lockless(pud_t *pudp, pud_t pud, unsigned long address) +{ + if ((pud_val(pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) + return (pmd_t *) pud_deref(pud) + pmd_index(address); + return (pmd_t *) pudp; +} +#define pmd_offset_lockless pmd_offset_lockless + +static inline pmd_t *pmd_offset(pud_t *pudp, unsigned long address) { - if ((pud_val(*pud) & _REGION_ENTRY_TYPE_MASK) >= _REGION_ENTRY_TYPE_R3) - return (pmd_t *) pud_deref(*pud) + pmd_index(address); - return (pmd_t *) pud; + return pmd_offset_lockless(pudp, *pudp, address); } #define pmd_offset pmd_offset diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c44f3e9140d5..91ea74ae71b8 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -2183,6 +2183,12 @@ static int iret_interception(struct vcpu_svm *svm) return 1; } +static int invd_interception(struct vcpu_svm *svm) +{ + /* Treat an INVD instruction as a NOP and just skip it. */ + return kvm_skip_emulated_instruction(&svm->vcpu); +} + static int invlpg_interception(struct vcpu_svm *svm) { if (!static_cpu_has(X86_FEATURE_DECODEASSISTS)) @@ -2774,7 +2780,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_RDPMC] = rdpmc_interception, [SVM_EXIT_CPUID] = cpuid_interception, [SVM_EXIT_IRET] = iret_interception, - [SVM_EXIT_INVD] = emulate_on_interception, + [SVM_EXIT_INVD] = invd_interception, [SVM_EXIT_PAUSE] = pause_interception, [SVM_EXIT_HLT] = halt_interception, [SVM_EXIT_INVLPG] = invlpg_interception, diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 8646a797b7a8..96979c09ebd1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -129,6 +129,9 @@ static bool __read_mostly enable_preemption_timer = 1; module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); #endif +extern bool __read_mostly allow_smaller_maxphyaddr; +module_param(allow_smaller_maxphyaddr, bool, S_IRUGO); + #define KVM_VM_CR0_ALWAYS_OFF (X86_CR0_NW | X86_CR0_CD) #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST X86_CR0_NE #define KVM_VM_CR0_ALWAYS_ON \ @@ -791,6 +794,18 @@ void update_exception_bitmap(struct kvm_vcpu *vcpu) */ if (is_guest_mode(vcpu)) eb |= get_vmcs12(vcpu)->exception_bitmap; + else { + /* + * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched + * between guest and host. In that case we only care about present + * faults. For vmcs02, however, PFEC_MASK and PFEC_MATCH are set in + * prepare_vmcs02_rare. + */ + bool selective_pf_trap = enable_ept && (eb & (1u << PF_VECTOR)); + int mask = selective_pf_trap ? PFERR_PRESENT_MASK : 0; + vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, mask); + vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, mask); + } vmcs_write32(EXCEPTION_BITMAP, eb); } @@ -4352,16 +4367,6 @@ static void init_vmcs(struct vcpu_vmx *vmx) vmx->pt_desc.guest.output_mask = 0x7F; vmcs_write64(GUEST_IA32_RTIT_CTL, 0); } - - /* - * If EPT is enabled, #PF is only trapped if MAXPHYADDR is mismatched - * between guest and host. In that case we only care about present - * faults. - */ - if (enable_ept) { - vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, PFERR_PRESENT_MASK); - vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, PFERR_PRESENT_MASK); - } } static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) @@ -4803,6 +4808,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) * EPT will cause page fault only if we need to * detect illegal GPAs. */ + WARN_ON_ONCE(!allow_smaller_maxphyaddr); kvm_fixup_and_inject_pf_error(vcpu, cr2, error_code); return 1; } else @@ -5331,7 +5337,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) * would also use advanced VM-exit information for EPT violations to * reconstruct the page fault error code. */ - if (unlikely(kvm_mmu_is_illegal_gpa(vcpu, gpa))) + if (unlikely(allow_smaller_maxphyaddr && kvm_mmu_is_illegal_gpa(vcpu, gpa))) return kvm_emulate_instruction(vcpu, 0); return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); @@ -8305,11 +8311,12 @@ static int __init vmx_init(void) vmx_check_vmcs12_offsets(); /* - * Intel processors don't have problems with - * GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable - * it for VMX by default + * Shadow paging doesn't have a (further) performance penalty + * from GUEST_MAXPHYADDR < HOST_MAXPHYADDR so enable it + * by default */ - allow_smaller_maxphyaddr = true; + if (!enable_ept) + allow_smaller_maxphyaddr = true; return 0; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index a2f82127c170..a0e47720f60c 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -552,7 +552,10 @@ static inline bool vmx_has_waitpkg(struct vcpu_vmx *vmx) static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) { - return !enable_ept || cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; + if (!enable_ept) + return true; + + return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; } void dump_vmcs(void); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1994602a0851..ce856e0ece84 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -188,7 +188,7 @@ static struct kvm_shared_msrs __percpu *shared_msrs; u64 __read_mostly host_efer; EXPORT_SYMBOL_GPL(host_efer); -bool __read_mostly allow_smaller_maxphyaddr; +bool __read_mostly allow_smaller_maxphyaddr = 0; EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); static u64 __read_mostly host_xss; @@ -976,6 +976,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long old_cr4 = kvm_read_cr4(vcpu); unsigned long pdptr_bits = X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP; + unsigned long mmu_role_bits = pdptr_bits | X86_CR4_SMAP | X86_CR4_PKE; if (kvm_valid_cr4(vcpu, cr4)) return 1; @@ -1003,7 +1004,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) if (kvm_x86_ops.set_cr4(vcpu, cr4)) return 1; - if (((cr4 ^ old_cr4) & pdptr_bits) || + if (((cr4 ^ old_cr4) & mmu_role_bits) || (!(cr4 & X86_CR4_PCIDE) && (old_cr4 & X86_CR4_PCIDE))) kvm_mmu_reset_context(vcpu); @@ -3221,9 +3222,22 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_POWER_CTL: msr_info->data = vcpu->arch.msr_ia32_power_ctl; break; - case MSR_IA32_TSC: - msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset; + case MSR_IA32_TSC: { + /* + * Intel SDM states that MSR_IA32_TSC read adds the TSC offset + * even when not intercepted. AMD manual doesn't explicitly + * state this but appears to behave the same. + * + * On userspace reads and writes, however, we unconditionally + * operate L1's TSC value to ensure backwards-compatible + * behavior for migration. + */ + u64 tsc_offset = msr_info->host_initiated ? vcpu->arch.l1_tsc_offset : + vcpu->arch.tsc_offset; + + msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + tsc_offset; break; + } case MSR_MTRRcap: case 0x200 ... 0x2ff: return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index b0dfac3d3df7..1847e993ac63 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -120,7 +120,7 @@ long __copy_user_flushcache(void *dst, const void __user *src, unsigned size) */ if (size < 8) { if (!IS_ALIGNED(dest, 4) || size != 4) - clean_cache_range(dst, 1); + clean_cache_range(dst, size); } else { if (!IS_ALIGNED(dest, 8)) { dest = ALIGN(dest, boot_cpu_data.x86_clflush_size); |