summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2021-03-22 00:16:08 +0300
committerIngo Molnar <mingo@kernel.org>2021-03-22 00:16:08 +0300
commitca8778c45e401067cefe2abbfb8b547c30d45908 (patch)
treec10beaf40adb2bbc6a8933f87ff9da7a5366bf42 /arch
parent21d6a7dcbfba5e7b31f4e9d555a9be362578bfc3 (diff)
parent2c41fab1c60b02626c8153a1806a7a1e5d62aaf1 (diff)
downloadlinux-ca8778c45e401067cefe2abbfb8b547c30d45908.tar.xz
Merge branch 'linus' into x86/cleanups, to resolve conflict
Conflicts: arch/x86/kernel/kprobes/ftrace.c Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/csky/kernel/probes/ftrace.c2
-rw-r--r--arch/mips/kernel/vmlinux.lds.S2
-rw-r--r--arch/powerpc/include/asm/cpu_has_feature.h4
-rw-r--r--arch/powerpc/kernel/vdso32/gettimeofday.S11
-rw-r--r--arch/riscv/Kconfig4
-rw-r--r--arch/riscv/Kconfig.socs2
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h16
-rw-r--r--arch/riscv/include/asm/irq.h2
-rw-r--r--arch/riscv/include/asm/processor.h1
-rw-r--r--arch/riscv/include/asm/ptrace.h5
-rw-r--r--arch/riscv/include/asm/sbi.h4
-rw-r--r--arch/riscv/include/asm/timex.h2
-rw-r--r--arch/riscv/kernel/Makefile1
-rw-r--r--arch/riscv/kernel/probes/ftrace.c18
-rw-r--r--arch/riscv/kernel/probes/kprobes.c3
-rw-r--r--arch/riscv/kernel/process.c1
-rw-r--r--arch/riscv/kernel/sbi.c2
-rw-r--r--arch/riscv/kernel/setup.c3
-rw-r--r--arch/riscv/kernel/time.c1
-rw-r--r--arch/riscv/kernel/traps.c1
-rw-r--r--arch/riscv/mm/kasan_init.c4
-rw-r--r--arch/s390/include/asm/pci.h2
-rw-r--r--arch/s390/kernel/perf_cpum_cf_diag.c3
-rw-r--r--arch/s390/kernel/vtime.c2
-rw-r--r--arch/s390/pci/pci.c28
-rw-r--r--arch/s390/pci/pci_event.c18
-rw-r--r--arch/x86/events/intel/core.c3
-rw-r--r--arch/x86/events/intel/ds.c2
-rw-r--r--arch/x86/include/asm/kvm_host.h34
-rw-r--r--arch/x86/include/asm/processor.h9
-rw-r--r--arch/x86/include/asm/thread_info.h15
-rw-r--r--arch/x86/kernel/apic/apic.c5
-rw-r--r--arch/x86/kernel/apic/io_apic.c10
-rw-r--r--arch/x86/kernel/kvm.c23
-rw-r--r--arch/x86/kernel/signal.c24
-rw-r--r--arch/x86/kvm/hyperv.c91
-rw-r--r--arch/x86/kvm/hyperv.h1
-rw-r--r--arch/x86/kvm/mmu/mmu_internal.h5
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.c30
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h4
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c40
-rw-r--r--arch/x86/kvm/x86.c113
-rw-r--r--arch/x86/platform/iris/iris.c1
43 files changed, 363 insertions, 189 deletions
diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c
index ae2b1c7b3b5c..ef2bb9bd9605 100644
--- a/arch/csky/kernel/probes/ftrace.c
+++ b/arch/csky/kernel/probes/ftrace.c
@@ -9,7 +9,7 @@ int arch_check_ftrace_location(struct kprobe *p)
return 0;
}
-/* Ftrace callback handler for kprobes -- called under preepmt disabed */
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 1234834cc4c4..1f98947fe715 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -176,7 +176,7 @@ SECTIONS
.fill : {
FILL(0);
BYTE(0);
- . = ALIGN(8);
+ STRUCT_ALIGN();
}
__appended_dtb = .;
/* leave space for appended DTB */
diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h
index 7897d16e0990..727d4b321937 100644
--- a/arch/powerpc/include/asm/cpu_has_feature.h
+++ b/arch/powerpc/include/asm/cpu_has_feature.h
@@ -7,7 +7,7 @@
#include <linux/bug.h>
#include <asm/cputable.h>
-static inline bool early_cpu_has_feature(unsigned long feature)
+static __always_inline bool early_cpu_has_feature(unsigned long feature)
{
return !!((CPU_FTRS_ALWAYS & feature) ||
(CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature));
@@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature)
return static_branch_likely(&cpu_feature_keys[i]);
}
#else
-static inline bool cpu_has_feature(unsigned long feature)
+static __always_inline bool cpu_has_feature(unsigned long feature)
{
return early_cpu_has_feature(feature);
}
diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S
index a6e29f880e0e..d21d08140a5e 100644
--- a/arch/powerpc/kernel/vdso32/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso32/gettimeofday.S
@@ -65,3 +65,14 @@ V_FUNCTION_END(__kernel_clock_getres)
V_FUNCTION_BEGIN(__kernel_time)
cvdso_call_time __c_kernel_time
V_FUNCTION_END(__kernel_time)
+
+/* Routines for restoring integer registers, called by the compiler. */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area. */
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+ lwz r0,4(r11)
+ lwz r31,-4(r11)
+ mtlr r0
+ mr r1,r11
+ blr
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 85d626b8ce5e..87d7b52f278f 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -93,7 +93,6 @@ config RISCV
select PCI_MSI if PCI
select RISCV_INTC
select RISCV_TIMER if RISCV_SBI
- select SPARSEMEM_STATIC if 32BIT
select SPARSE_IRQ
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
@@ -154,7 +153,8 @@ config ARCH_FLATMEM_ENABLE
config ARCH_SPARSEMEM_ENABLE
def_bool y
depends on MMU
- select SPARSEMEM_VMEMMAP_ENABLE
+ select SPARSEMEM_STATIC if 32BIT && SPARSMEM
+ select SPARSEMEM_VMEMMAP_ENABLE if 64BIT
config ARCH_SELECT_MEMORY_MODEL
def_bool ARCH_SPARSEMEM_ENABLE
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 7efcece8896c..e1b2690b6e45 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -31,6 +31,8 @@ config SOC_CANAAN
select SIFIVE_PLIC
select ARCH_HAS_RESET_CONTROLLER
select PINCTRL
+ select COMMON_CLK
+ select COMMON_CLK_K210
help
This enables support for Canaan Kendryte K210 SoC platform hardware.
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index 27e005fca584..2a652b0c987d 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -9,4 +9,20 @@ long long __lshrti3(long long a, int b);
long long __ashrti3(long long a, int b);
long long __ashlti3(long long a, int b);
+
+#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
+
+DECLARE_DO_ERROR_INFO(do_trap_unknown);
+DECLARE_DO_ERROR_INFO(do_trap_insn_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_insn_fault);
+DECLARE_DO_ERROR_INFO(do_trap_insn_illegal);
+DECLARE_DO_ERROR_INFO(do_trap_load_fault);
+DECLARE_DO_ERROR_INFO(do_trap_load_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_store_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_store_fault);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_u);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
+DECLARE_DO_ERROR_INFO(do_trap_break);
+
#endif /* _ASM_RISCV_PROTOTYPES_H */
diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h
index 9807ad164015..e4c435509983 100644
--- a/arch/riscv/include/asm/irq.h
+++ b/arch/riscv/include/asm/irq.h
@@ -12,4 +12,6 @@
#include <asm-generic/irq.h>
+extern void __init init_IRQ(void);
+
#endif /* _ASM_RISCV_IRQ_H */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index 3a240037bde2..021ed64ee608 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -71,6 +71,7 @@ int riscv_of_processor_hartid(struct device_node *node);
int riscv_of_parent_hartid(struct device_node *node);
extern void riscv_fill_hwcap(void);
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#endif /* __ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index cb4abb639e8d..09ad4e923510 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -119,6 +119,11 @@ extern int regs_query_register_offset(const char *name);
extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
unsigned int n);
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer);
+int do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_exit(struct pt_regs *regs);
+
/**
* regs_get_register() - get register value from its offset
* @regs: pt_regs from which register value is gotten
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 99895d9c3bdd..d7027411dde8 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -51,10 +51,10 @@ enum sbi_ext_rfence_fid {
SBI_EXT_RFENCE_REMOTE_FENCE_I = 0,
SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
- SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
- SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
+ SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
+ SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
};
enum sbi_ext_hsm_fid {
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index 81de51e6aa32..507cae273bc6 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -88,4 +88,6 @@ static inline int read_current_timer(unsigned long *timer_val)
return 0;
}
+extern void time_init(void);
+
#endif /* _ASM_RISCV_TIMEX_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 3dc0abde988a..647a47f5484a 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -8,6 +8,7 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE)
endif
+CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
extra-y += head.o
extra-y += vmlinux.lds
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
index e6372490aa0b..17ca5e923bb0 100644
--- a/arch/riscv/kernel/probes/ftrace.c
+++ b/arch/riscv/kernel/probes/ftrace.c
@@ -2,39 +2,41 @@
#include <linux/kprobes.h>
-/* Ftrace callback handler for kprobes -- called under preepmt disabed */
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct ftrace_regs *regs)
+ struct ftrace_ops *ops, struct ftrace_regs *fregs)
{
struct kprobe *p;
+ struct pt_regs *regs;
struct kprobe_ctlblk *kcb;
p = get_kprobe((kprobe_opcode_t *)ip);
if (unlikely(!p) || kprobe_disabled(p))
return;
+ regs = ftrace_get_regs(fregs);
kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
kprobes_inc_nmissed_count(p);
} else {
- unsigned long orig_ip = instruction_pointer(&(regs->regs));
+ unsigned long orig_ip = instruction_pointer(regs);
- instruction_pointer_set(&(regs->regs), ip);
+ instruction_pointer_set(regs, ip);
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
- if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) {
+ if (!p->pre_handler || !p->pre_handler(p, regs)) {
/*
* Emulate singlestep (and also recover regs->pc)
* as if there is a nop
*/
- instruction_pointer_set(&(regs->regs),
+ instruction_pointer_set(regs,
(unsigned long)p->addr + MCOUNT_INSN_SIZE);
if (unlikely(p->post_handler)) {
kcb->kprobe_status = KPROBE_HIT_SSDONE;
- p->post_handler(p, &(regs->regs), 0);
+ p->post_handler(p, regs, 0);
}
- instruction_pointer_set(&(regs->regs), orig_ip);
+ instruction_pointer_set(regs, orig_ip);
}
/*
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index a2ec18662fee..7e2c78e2ca6b 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -256,8 +256,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
* normal page fault.
*/
regs->epc = (unsigned long) cur->addr;
- if (!instruction_pointer(regs))
- BUG();
+ BUG_ON(!instruction_pointer(regs));
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 6f728e731bed..f9cd57c9c67d 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -10,6 +10,7 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/debug.h>
#include <linux/sched/task_stack.h>
#include <linux/tick.h>
#include <linux/ptrace.h>
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
index f4a7db3d309e..d3bf756321a5 100644
--- a/arch/riscv/kernel/sbi.c
+++ b/arch/riscv/kernel/sbi.c
@@ -116,7 +116,7 @@ void sbi_clear_ipi(void)
EXPORT_SYMBOL(sbi_clear_ipi);
/**
- * sbi_set_timer_v01() - Program the timer for next timer event.
+ * __sbi_set_timer_v01() - Program the timer for next timer event.
* @stime_value: The value after which next timer event should fire.
*
* Return: None
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index e85bacff1b50..f8f15332caa2 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -147,7 +147,8 @@ static void __init init_resources(void)
bss_res.end = __pa_symbol(__bss_stop) - 1;
bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res);
+ /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */
+ mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt + 1) * sizeof(*mem_res);
mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
if (!mem_res)
panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c
index 8a5cf99c0776..1b432264f7ef 100644
--- a/arch/riscv/kernel/time.c
+++ b/arch/riscv/kernel/time.c
@@ -9,6 +9,7 @@
#include <linux/delay.h>
#include <asm/sbi.h>
#include <asm/processor.h>
+#include <asm/timex.h>
unsigned long riscv_timebase;
EXPORT_SYMBOL_GPL(riscv_timebase);
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 3ed2c23601a0..0879b5df11b9 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -17,6 +17,7 @@
#include <linux/module.h>
#include <linux/irq.h>
+#include <asm/asm-prototypes.h>
#include <asm/bug.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index 3fc18f469efb..4f85c6d0ddf8 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -155,7 +155,7 @@ static void __init kasan_populate(void *start, void *end)
memset(start, KASAN_SHADOW_INIT, end - start);
}
-void __init kasan_shallow_populate(void *start, void *end)
+static void __init kasan_shallow_populate(void *start, void *end)
{
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
@@ -187,6 +187,8 @@ void __init kasan_shallow_populate(void *start, void *end)
}
vaddr += PAGE_SIZE;
}
+
+ local_flush_tlb_all();
}
void __init kasan_init(void)
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 053fe8b8dec7..a75d94a9bcb2 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -202,7 +202,7 @@ extern unsigned int s390_pci_no_rid;
----------------------------------------------------------------------------- */
/* Base stuff */
int zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
-void zpci_remove_device(struct zpci_dev *zdev);
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error);
int zpci_enable_device(struct zpci_dev *);
int zpci_disable_device(struct zpci_dev *);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c
index bc302b86ce28..2e3e7edbe3a0 100644
--- a/arch/s390/kernel/perf_cpum_cf_diag.c
+++ b/arch/s390/kernel/perf_cpum_cf_diag.c
@@ -968,7 +968,7 @@ static int cf_diag_all_start(void)
*/
static size_t cf_diag_needspace(unsigned int sets)
{
- struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+ struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events);
size_t bytes = 0;
int i;
@@ -984,6 +984,7 @@ static size_t cf_diag_needspace(unsigned int sets)
sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__,
bytes);
+ put_cpu_ptr(&cpu_cf_events);
return bytes;
}
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 73c7afcc0527..f216a1b2f825 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -214,7 +214,7 @@ void vtime_flush(struct task_struct *tsk)
avg_steal = S390_lowcore.avg_steal_timer / 2;
if ((s64) steal > 0) {
S390_lowcore.steal_timer = 0;
- account_steal_time(steal);
+ account_steal_time(cputime_to_nsecs(steal));
avg_steal += steal;
}
S390_lowcore.avg_steal_timer = avg_steal;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 600881d894dd..91064077526d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -682,16 +682,36 @@ int zpci_disable_device(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(zpci_disable_device);
-void zpci_remove_device(struct zpci_dev *zdev)
+/* zpci_remove_device - Removes the given zdev from the PCI core
+ * @zdev: the zdev to be removed from the PCI core
+ * @set_error: if true the device's error state is set to permanent failure
+ *
+ * Sets a zPCI device to a configured but offline state; the zPCI
+ * device is still accessible through its hotplug slot and the zPCI
+ * API but is removed from the common code PCI bus, making it
+ * no longer available to drivers.
+ */
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error)
{
struct zpci_bus *zbus = zdev->zbus;
struct pci_dev *pdev;
+ if (!zdev->zbus->bus)
+ return;
+
pdev = pci_get_slot(zbus->bus, zdev->devfn);
if (pdev) {
- if (pdev->is_virtfn)
- return zpci_iov_remove_virtfn(pdev, zdev->vfn);
+ if (set_error)
+ pdev->error_state = pci_channel_io_perm_failure;
+ if (pdev->is_virtfn) {
+ zpci_iov_remove_virtfn(pdev, zdev->vfn);
+ /* balance pci_get_slot */
+ pci_dev_put(pdev);
+ return;
+ }
pci_stop_and_remove_bus_device_locked(pdev);
+ /* balance pci_get_slot */
+ pci_dev_put(pdev);
}
}
@@ -765,7 +785,7 @@ void zpci_release_device(struct kref *kref)
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
if (zdev->zbus->bus)
- zpci_remove_device(zdev);
+ zpci_remove_device(zdev, false);
switch (zdev->state) {
case ZPCI_FN_STATE_ONLINE:
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b4162da4e8a2..ac0c65cdd69d 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -76,13 +76,10 @@ void zpci_event_error(void *data)
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
- struct pci_dev *pdev = NULL;
enum zpci_state state;
+ struct pci_dev *pdev;
int ret;
- if (zdev && zdev->zbus->bus)
- pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
-
zpci_err("avail CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
@@ -124,8 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
case 0x0303: /* Deconfiguration requested */
if (!zdev)
break;
- if (pdev)
- zpci_remove_device(zdev);
+ zpci_remove_device(zdev, false);
ret = zpci_disable_device(zdev);
if (ret)
@@ -140,12 +136,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
case 0x0304: /* Configured -> Standby|Reserved */
if (!zdev)
break;
- if (pdev) {
- /* Give the driver a hint that the function is
- * already unusable. */
- pdev->error_state = pci_channel_io_perm_failure;
- zpci_remove_device(zdev);
- }
+ /* Give the driver a hint that the function is
+ * already unusable.
+ */
+ zpci_remove_device(zdev, true);
zdev->fh = ccdf->fh;
zpci_disable_device(zdev);
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 5934d7c8fca0..8a70d4dfa16a 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3659,6 +3659,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
return ret;
if (event->attr.precise_ip) {
+ if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
+ return -EINVAL;
+
if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
if (!(event->attr.sample_type &
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 198211c0298c..5aabb0e2964a 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -2010,7 +2010,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
*/
if (!pebs_status && cpuc->pebs_enabled &&
!(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
- pebs_status = cpuc->pebs_enabled;
+ pebs_status = p->status = cpuc->pebs_enabled;
bit = find_first_bit((unsigned long *)&pebs_status,
x86_pmu.max_pebs_events);
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eda93e5cdf8e..10eca9e8f7f6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -884,12 +884,29 @@ struct kvm_hv_syndbg {
u64 options;
};
+/* Current state of Hyper-V TSC page clocksource */
+enum hv_tsc_page_status {
+ /* TSC page was not set up or disabled */
+ HV_TSC_PAGE_UNSET = 0,
+ /* TSC page MSR was written by the guest, update pending */
+ HV_TSC_PAGE_GUEST_CHANGED,
+ /* TSC page MSR was written by KVM userspace, update pending */
+ HV_TSC_PAGE_HOST_CHANGED,
+ /* TSC page was properly set up and is currently active */
+ HV_TSC_PAGE_SET,
+ /* TSC page is currently being updated and therefore is inactive */
+ HV_TSC_PAGE_UPDATING,
+ /* TSC page was set up with an inaccessible GPA */
+ HV_TSC_PAGE_BROKEN,
+};
+
/* Hyper-V emulation context */
struct kvm_hv {
struct mutex hv_lock;
u64 hv_guest_os_id;
u64 hv_hypercall;
u64 hv_tsc_page;
+ enum hv_tsc_page_status hv_tsc_page_status;
/* Hyper-v based guest crash (NT kernel bugcheck) parameters */
u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
@@ -931,6 +948,12 @@ enum kvm_irqchip_mode {
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
+struct kvm_x86_msr_filter {
+ u8 count;
+ bool default_allow:1;
+ struct msr_bitmap_range ranges[16];
+};
+
#define APICV_INHIBIT_REASON_DISABLE 0
#define APICV_INHIBIT_REASON_HYPERV 1
#define APICV_INHIBIT_REASON_NESTED 2
@@ -1025,16 +1048,11 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
+ bool bus_lock_detection_enabled;
+
/* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
u32 user_space_msr_mask;
-
- struct {
- u8 count;
- bool default_allow:1;
- struct msr_bitmap_range ranges[16];
- } msr_filter;
-
- bool bus_lock_detection_enabled;
+ struct kvm_x86_msr_filter __rcu *msr_filter;
struct kvm_pmu_event_filter __rcu *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread;
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0074e903687f..8b3ed21c9191 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -551,15 +551,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
*size = fpu_kernel_xstate_size;
}
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-
static inline void
native_load_sp0(unsigned long sp0)
{
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 0d751d5da702..06b740bae431 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -205,10 +205,23 @@ static inline int arch_within_stack_frames(const void * const stack,
#endif
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
+
+#ifndef __ASSEMBLY__
#ifdef CONFIG_COMPAT
#define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */
+
+#define arch_set_restart_data(restart) \
+ do { restart->arch_data = current_thread_info()->status; } while (0)
+
#endif
-#ifndef __ASSEMBLY__
#ifdef CONFIG_X86_32
#define in_ia32_syscall() true
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0a56bc7626ad..4a39fb429f15 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2342,6 +2342,11 @@ static int cpuid_to_apicid[] = {
[0 ... NR_CPUS - 1] = -1,
};
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+ return phys_id == cpuid_to_apicid[cpu];
+}
+
#ifdef CONFIG_SMP
/**
* apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index e90cbd6bdb66..d5c691a3208b 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1032,6 +1032,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
irq = mp_irqs[idx].srcbusirq;
legacy = mp_is_legacy_irq(irq);
+ /*
+ * IRQ2 is unusable for historical reasons on systems which
+ * have a legacy PIC. See the comment vs. IRQ2 further down.
+ *
+ * If this gets removed at some point then the related code
+ * in lapic_assign_system_vectors() needs to be adjusted as
+ * well.
+ */
+ if (legacy && irq == PIC_CASCADE_IR)
+ return -EINVAL;
}
mutex_lock(&ioapic_mutex);
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 5e78e01ca3b4..78bb0fae3982 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu)
static void kvm_wait(u8 *ptr, u8 val)
{
- unsigned long flags;
-
if (in_nmi())
return;
- local_irq_save(flags);
-
- if (READ_ONCE(*ptr) != val)
- goto out;
-
/*
* halt until it's our turn and kicked. Note that we do safe halt
* for irq enabled case to avoid hang when lock info is overwritten
* in irq spinlock slowpath and no spurious interrupt occur to save us.
*/
- if (arch_irqs_disabled_flags(flags))
- halt();
- else
- safe_halt();
+ if (irqs_disabled()) {
+ if (READ_ONCE(*ptr) == val)
+ halt();
+ } else {
+ local_irq_disable();
-out:
- local_irq_restore(flags);
+ if (READ_ONCE(*ptr) == val)
+ safe_halt();
+
+ local_irq_enable();
+ }
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 4de9b1da4509..a06cb107c0e8 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
{
- /*
- * This function is fundamentally broken as currently
- * implemented.
- *
- * The idea is that we want to trigger a call to the
- * restart_block() syscall and that we want in_ia32_syscall(),
- * in_x32_syscall(), etc. to match whatever they were in the
- * syscall being restarted. We assume that the syscall
- * instruction at (regs->ip - 2) matches whatever syscall
- * instruction we used to enter in the first place.
- *
- * The problem is that we can get here when ptrace pokes
- * syscall-like values into regs even if we're not in a syscall
- * at all.
- *
- * For now, we maintain historical behavior and guess based on
- * stored state. We could do better by saving the actual
- * syscall arch in restart_block or (with caveats on x32) by
- * checking if regs->ip points to 'int $0x80'. The current
- * behavior is incorrect if a tracer has a different bitness
- * than the tracee.
- */
#ifdef CONFIG_IA32_EMULATION
- if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+ if (current->restart_block.arch_data & TS_COMPAT)
return __NR_ia32_restart_syscall;
#endif
#ifdef CONFIG_X86_X32_ABI
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 58fa8c029867..f98370a39936 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm)
u64 tsc;
/*
- * The guest has not set up the TSC page or the clock isn't
- * stable, fall back to get_kvmclock_ns.
+ * Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
+ * is broken, disabled or being updated.
*/
- if (!hv->tsc_ref.tsc_sequence)
+ if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
return div_u64(get_kvmclock_ns(kvm), 100);
vcpu = kvm_get_vcpu(kvm, 0);
@@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
return true;
}
+/*
+ * Don't touch TSC page values if the guest has opted for TSC emulation after
+ * migration. KVM doesn't fully support reenlightenment notifications and TSC
+ * access emulation and Hyper-V is known to expect the values in TSC page to
+ * stay constant before TSC access emulation is disabled from guest side
+ * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
+ * frequency and guest visible TSC value across migration (and prevent it when
+ * TSC scaling is unsupported).
+ */
+static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
+{
+ return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
+ hv->hv_tsc_emulation_control;
+}
+
void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock)
{
@@ -1087,7 +1102,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
- if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+ if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
+ hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
return;
mutex_lock(&hv->hv_lock);
@@ -1101,7 +1117,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
*/
if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
&tsc_seq, sizeof(tsc_seq))))
+ goto out_err;
+
+ if (tsc_seq && tsc_page_update_unsafe(hv)) {
+ if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
+ goto out_err;
+
+ hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
goto out_unlock;
+ }
/*
* While we're computing and writing the parameters, force the
@@ -1110,15 +1134,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
hv->tsc_ref.tsc_sequence = 0;
if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
&hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
- goto out_unlock;
+ goto out_err;
if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
- goto out_unlock;
+ goto out_err;
/* Ensure sequence is zero before writing the rest of the struct. */
smp_wmb();
if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
- goto out_unlock;
+ goto out_err;
/*
* Now switch to the TSC page mechanism by writing the sequence.
@@ -1131,8 +1155,45 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
smp_wmb();
hv->tsc_ref.tsc_sequence = tsc_seq;
- kvm_write_guest(kvm, gfn_to_gpa(gfn),
- &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
+ if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+ &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
+ goto out_err;
+
+ hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
+ goto out_unlock;
+
+out_err:
+ hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
+out_unlock:
+ mutex_unlock(&hv->hv_lock);
+}
+
+void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
+{
+ struct kvm_hv *hv = to_kvm_hv(kvm);
+ u64 gfn;
+
+ if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
+ hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
+ tsc_page_update_unsafe(hv))
+ return;
+
+ mutex_lock(&hv->hv_lock);
+
+ if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+ goto out_unlock;
+
+ /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */
+ if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET)
+ hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING;
+
+ gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+
+ hv->tsc_ref.tsc_sequence = 0;
+ if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+ &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
+ hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
+
out_unlock:
mutex_unlock(&hv->hv_lock);
}
@@ -1193,8 +1254,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
}
case HV_X64_MSR_REFERENCE_TSC:
hv->hv_tsc_page = data;
- if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
+ if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+ if (!host)
+ hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
+ else
+ hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
+ } else {
+ hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
+ }
break;
case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
return kvm_hv_msr_set_crash_data(kvm,
@@ -1229,6 +1297,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
hv->hv_tsc_emulation_control = data;
break;
case HV_X64_MSR_TSC_EMULATION_STATUS:
+ if (data && !host)
+ return 1;
+
hv->hv_tsc_emulation_status = data;
break;
case HV_X64_MSR_TIME_REF_COUNT:
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index e951af1fcb2c..60547d5cb6d7 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
void kvm_hv_setup_tsc_page(struct kvm *kvm,
struct pvclock_vcpu_time_info *hv_clock);
+void kvm_hv_invalidate_tsc_page(struct kvm *kvm);
void kvm_hv_init_vm(struct kvm *kvm);
void kvm_hv_destroy_vm(struct kvm *kvm);
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index d708f472cb0d..360983865398 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -78,6 +78,11 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
return to_shadow_page(__pa(sptep));
}
+static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
+{
+ return sp->role.smm ? 1 : 0;
+}
+
static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
{
/*
diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c
index e5f148106e20..b3ed302c1a35 100644
--- a/arch/x86/kvm/mmu/tdp_iter.c
+++ b/arch/x86/kvm/mmu/tdp_iter.c
@@ -21,6 +21,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
}
/*
+ * Return the TDP iterator to the root PT and allow it to continue its
+ * traversal over the paging structure from there.
+ */
+void tdp_iter_restart(struct tdp_iter *iter)
+{
+ iter->yielded_gfn = iter->next_last_level_gfn;
+ iter->level = iter->root_level;
+
+ iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+ tdp_iter_refresh_sptep(iter);
+
+ iter->valid = true;
+}
+
+/*
* Sets a TDP iterator to walk a pre-order traversal of the paging structure
* rooted at root_pt, starting with the walk to translate next_last_level_gfn.
*/
@@ -31,16 +46,12 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
iter->next_last_level_gfn = next_last_level_gfn;
- iter->yielded_gfn = iter->next_last_level_gfn;
iter->root_level = root_level;
iter->min_level = min_level;
- iter->level = root_level;
- iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;
-
- iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
- tdp_iter_refresh_sptep(iter);
+ iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt;
+ iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt));
- iter->valid = true;
+ tdp_iter_restart(iter);
}
/*
@@ -159,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter)
iter->valid = false;
}
-tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
-{
- return iter->pt_path[iter->root_level - 1];
-}
-
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index 4cc177d75c4a..b1748b988d3a 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -36,6 +36,8 @@ struct tdp_iter {
int min_level;
/* The iterator's current level within the paging structure */
int level;
+ /* The address space ID, i.e. SMM vs. regular. */
+ int as_id;
/* A snapshot of the value at sptep */
u64 old_spte;
/*
@@ -62,6 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level);
void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
int min_level, gfn_t next_last_level_gfn);
void tdp_iter_next(struct tdp_iter *iter);
-tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter);
+void tdp_iter_restart(struct tdp_iter *iter);
#endif /* __KVM_X86_MMU_TDP_ITER_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index f56ddc2456a1..1436b1d1f4c0 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -203,11 +203,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
u64 old_spte, u64 new_spte, int level,
bool shared);
-static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
-{
- return sp->role.smm ? 1 : 0;
-}
-
static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
{
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
@@ -301,11 +296,16 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
*
* Given a page table that has been removed from the TDP paging structure,
* iterates through the page table to clear SPTEs and free child page tables.
+ *
+ * Note that pt is passed in as a tdp_ptep_t, but it does not need RCU
+ * protection. Since this thread removed it from the paging structure,
+ * this thread will be responsible for ensuring the page is freed. Hence the
+ * early rcu_dereferences in the function.
*/
-static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
+static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
bool shared)
{
- struct kvm_mmu_page *sp = sptep_to_sp(pt);
+ struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
int level = sp->role.level;
gfn_t base_gfn = sp->gfn;
u64 old_child_spte;
@@ -318,7 +318,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
tdp_mmu_unlink_page(kvm, sp, shared);
for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
- sptep = pt + i;
+ sptep = rcu_dereference(pt) + i;
gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
if (shared) {
@@ -492,10 +492,6 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
struct tdp_iter *iter,
u64 new_spte)
{
- u64 *root_pt = tdp_iter_root_pt(iter);
- struct kvm_mmu_page *root = sptep_to_sp(root_pt);
- int as_id = kvm_mmu_page_as_id(root);
-
lockdep_assert_held_read(&kvm->mmu_lock);
/*
@@ -509,8 +505,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
new_spte) != iter->old_spte)
return false;
- handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
- iter->level, true);
+ handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+ new_spte, iter->level, true);
return true;
}
@@ -538,7 +534,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
* here since the SPTE is going from non-present
* to non-present.
*/
- WRITE_ONCE(*iter->sptep, 0);
+ WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
return true;
}
@@ -564,10 +560,6 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
u64 new_spte, bool record_acc_track,
bool record_dirty_log)
{
- tdp_ptep_t root_pt = tdp_iter_root_pt(iter);
- struct kvm_mmu_page *root = sptep_to_sp(root_pt);
- int as_id = kvm_mmu_page_as_id(root);
-
lockdep_assert_held_write(&kvm->mmu_lock);
/*
@@ -581,13 +573,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
- __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
- iter->level, false);
+ __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+ new_spte, iter->level, false);
if (record_acc_track)
handle_changed_spte_acc_track(iter->old_spte, new_spte,
iter->level);
if (record_dirty_log)
- handle_changed_spte_dirty_log(kvm, as_id, iter->gfn,
+ handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
iter->old_spte, new_spte,
iter->level);
}
@@ -659,9 +651,7 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
WARN_ON(iter->gfn > iter->next_last_level_gfn);
- tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
- iter->root_level, iter->min_level,
- iter->next_last_level_gfn);
+ tdp_iter_restart(iter);
return true;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f046b08a9612..cb42c201adef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1526,35 +1526,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
{
+ struct kvm_x86_msr_filter *msr_filter;
+ struct msr_bitmap_range *ranges;
struct kvm *kvm = vcpu->kvm;
- struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
- u32 count = kvm->arch.msr_filter.count;
- u32 i;
- bool r = kvm->arch.msr_filter.default_allow;
+ bool allowed;
int idx;
+ u32 i;
- /* MSR filtering not set up or x2APIC enabled, allow everything */
- if (!count || (index >= 0x800 && index <= 0x8ff))
+ /* x2APIC MSRs do not support filtering. */
+ if (index >= 0x800 && index <= 0x8ff)
return true;
- /* Prevent collision with set_msr_filter */
idx = srcu_read_lock(&kvm->srcu);
- for (i = 0; i < count; i++) {
+ msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
+ if (!msr_filter) {
+ allowed = true;
+ goto out;
+ }
+
+ allowed = msr_filter->default_allow;
+ ranges = msr_filter->ranges;
+
+ for (i = 0; i < msr_filter->count; i++) {
u32 start = ranges[i].base;
u32 end = start + ranges[i].nmsrs;
u32 flags = ranges[i].flags;
unsigned long *bitmap = ranges[i].bitmap;
if ((index >= start) && (index < end) && (flags & type)) {
- r = !!test_bit(index - start, bitmap);
+ allowed = !!test_bit(index - start, bitmap);
break;
}
}
+out:
srcu_read_unlock(&kvm->srcu, idx);
- return r;
+ return allowed;
}
EXPORT_SYMBOL_GPL(kvm_msr_allowed);
@@ -2551,6 +2560,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
struct kvm_vcpu *vcpu;
struct kvm_arch *ka = &kvm->arch;
+ kvm_hv_invalidate_tsc_page(kvm);
+
spin_lock(&ka->pvclock_gtod_sync_lock);
kvm_make_mclock_inprogress_request(kvm);
/* no guest entries from this point */
@@ -5352,25 +5363,34 @@ split_irqchip_unlock:
return r;
}
-static void kvm_clear_msr_filter(struct kvm *kvm)
+static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
+{
+ struct kvm_x86_msr_filter *msr_filter;
+
+ msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
+ if (!msr_filter)
+ return NULL;
+
+ msr_filter->default_allow = default_allow;
+ return msr_filter;
+}
+
+static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
{
u32 i;
- u32 count = kvm->arch.msr_filter.count;
- struct msr_bitmap_range ranges[16];
- mutex_lock(&kvm->lock);
- kvm->arch.msr_filter.count = 0;
- memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
- mutex_unlock(&kvm->lock);
- synchronize_srcu(&kvm->srcu);
+ if (!msr_filter)
+ return;
- for (i = 0; i < count; i++)
- kfree(ranges[i].bitmap);
+ for (i = 0; i < msr_filter->count; i++)
+ kfree(msr_filter->ranges[i].bitmap);
+
+ kfree(msr_filter);
}
-static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
+static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
+ struct kvm_msr_filter_range *user_range)
{
- struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
struct msr_bitmap_range range;
unsigned long *bitmap = NULL;
size_t bitmap_size;
@@ -5404,11 +5424,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user
goto err;
}
- /* Everything ok, add this range identifier to our global pool */
- ranges[kvm->arch.msr_filter.count] = range;
- /* Make sure we filled the array before we tell anyone to walk it */
- smp_wmb();
- kvm->arch.msr_filter.count++;
+ /* Everything ok, add this range identifier. */
+ msr_filter->ranges[msr_filter->count] = range;
+ msr_filter->count++;
return 0;
err:
@@ -5419,10 +5437,11 @@ err:
static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
{
struct kvm_msr_filter __user *user_msr_filter = argp;
+ struct kvm_x86_msr_filter *new_filter, *old_filter;
struct kvm_msr_filter filter;
bool default_allow;
- int r = 0;
bool empty = true;
+ int r = 0;
u32 i;
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
@@ -5435,25 +5454,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
if (empty && !default_allow)
return -EINVAL;
- kvm_clear_msr_filter(kvm);
-
- kvm->arch.msr_filter.default_allow = default_allow;
+ new_filter = kvm_alloc_msr_filter(default_allow);
+ if (!new_filter)
+ return -ENOMEM;
- /*
- * Protect from concurrent calls to this function that could trigger
- * a TOCTOU violation on kvm->arch.msr_filter.count.
- */
- mutex_lock(&kvm->lock);
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
- r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
- if (r)
- break;
+ r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
+ if (r) {
+ kvm_free_msr_filter(new_filter);
+ return r;
+ }
}
+ mutex_lock(&kvm->lock);
+
+ /* The per-VM filter is protected by kvm->lock... */
+ old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
+
+ rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
+ synchronize_srcu(&kvm->srcu);
+
+ kvm_free_msr_filter(old_filter);
+
kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
mutex_unlock(&kvm->lock);
- return r;
+ return 0;
}
long kvm_arch_vm_ioctl(struct file *filp,
@@ -6603,7 +6629,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
int cpu = get_cpu();
cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
- smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
+ on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
wbinvd_ipi, NULL, 1);
put_cpu();
cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
@@ -10634,8 +10660,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
void kvm_arch_destroy_vm(struct kvm *kvm)
{
- u32 i;
-
if (current->mm == kvm->mm) {
/*
* Free memory regions allocated on behalf of userspace,
@@ -10651,8 +10675,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
mutex_unlock(&kvm->slots_lock);
}
static_call_cond(kvm_x86_vm_destroy)(kvm);
- for (i = 0; i < kvm->arch.msr_filter.count; i++)
- kfree(kvm->arch.msr_filter.ranges[i].bitmap);
+ kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
kvm_pic_destroy(kvm);
kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c
index 1ac8578258af..b42bfdab01a9 100644
--- a/arch/x86/platform/iris/iris.c
+++ b/arch/x86/platform/iris/iris.c
@@ -27,7 +27,6 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
-MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
static bool force;