summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
authorDaniel Vetter <daniel.vetter@ffwll.ch>2020-11-10 15:58:05 +0300
committerDaniel Vetter <daniel.vetter@ffwll.ch>2020-11-10 16:36:36 +0300
commit512bce50a41c528fa15c4c014293e7bebf018658 (patch)
tree090a6989d3d12f99ed2475b9ee8069166b9cb5d3 /arch
parent5b8c596976d4338942dd889b66cd06dc766424e1 (diff)
parentf8394f232b1eab649ce2df5c5f15b0e528c92091 (diff)
downloadlinux-512bce50a41c528fa15c4c014293e7bebf018658.tar.xz
Merge v5.10-rc3 into drm-next
We need commit f8f6ae5d077a ("mm: always have io_remap_pfn_range() set pgprot_decrypted()") to be able to merge Jason's cleanup patch. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Diffstat (limited to 'arch')
-rw-r--r--arch/arc/kernel/head.S17
-rw-r--r--arch/arc/kernel/stacktrace.c7
-rw-r--r--arch/arc/plat-hsdk/platform.c17
-rw-r--r--arch/arm/mm/init.c4
-rw-r--r--arch/arm64/Kconfig2
-rw-r--r--arch/arm64/include/asm/brk-imm.h2
-rw-r--r--arch/arm64/include/asm/debug-monitors.h1
-rw-r--r--arch/arm64/include/asm/kprobes.h2
-rw-r--r--arch/arm64/kernel/kexec_image.c41
-rw-r--r--arch/arm64/kernel/machine_kexec_file.c9
-rw-r--r--arch/arm64/kernel/probes/kprobes.c69
-rw-r--r--arch/powerpc/include/asm/nohash/32/kup-8xx.h2
-rw-r--r--arch/powerpc/include/asm/nohash/32/mmu-8xx.h47
-rw-r--r--arch/powerpc/include/asm/nohash/32/pte-8xx.h9
-rw-r--r--arch/powerpc/include/asm/topology.h12
-rw-r--r--arch/powerpc/include/asm/uaccess.h4
-rw-r--r--arch/powerpc/kernel/eeh_cache.c5
-rw-r--r--arch/powerpc/kernel/head_40x.S8
-rw-r--r--arch/powerpc/kernel/head_8xx.S46
-rw-r--r--arch/powerpc/kernel/head_book3s_32.S12
-rw-r--r--arch/powerpc/kernel/smp.c3
-rw-r--r--arch/riscv/include/asm/uaccess.h2
-rw-r--r--arch/riscv/kernel/ftrace.c2
-rw-r--r--arch/riscv/kernel/head.S5
-rw-r--r--arch/riscv/kernel/vdso/.gitignore1
-rw-r--r--arch/riscv/kernel/vdso/Makefile18
-rwxr-xr-xarch/riscv/kernel/vdso/so2s.sh6
-rw-r--r--arch/riscv/mm/fault.c4
-rw-r--r--arch/riscv/mm/init.c32
-rw-r--r--arch/s390/configs/debug_defconfig10
-rw-r--r--arch/s390/configs/defconfig9
-rw-r--r--arch/s390/configs/zfcpdump_defconfig2
-rw-r--r--arch/s390/include/asm/pgtable.h52
-rw-r--r--arch/s390/include/asm/vdso/vdso.h0
-rw-r--r--arch/s390/kernel/asm-offsets.c8
-rw-r--r--arch/s390/kernel/smp.c3
-rw-r--r--arch/s390/pci/pci_event.c4
-rw-r--r--arch/x86/boot/compressed/ident_map_64.c1
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S20
-rw-r--r--arch/x86/boot/compressed/misc.h2
-rw-r--r--arch/x86/hyperv/hv_apic.c14
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c23
-rw-r--r--arch/x86/kernel/cpu/bugs.c51
-rw-r--r--arch/x86/kernel/head_64.S16
-rw-r--r--arch/x86/kernel/sev-es-shared.c26
-rw-r--r--arch/x86/kernel/sev-es.c20
-rw-r--r--arch/x86/kernel/sev_verify_cbit.S89
-rw-r--r--arch/x86/lib/memcpy_64.S4
-rw-r--r--arch/x86/lib/memmove_64.S4
-rw-r--r--arch/x86/lib/memset_64.S4
-rw-r--r--arch/x86/mm/mem_encrypt.c1
-rw-r--r--arch/xtensa/mm/init.c4
52 files changed, 462 insertions, 294 deletions
diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S
index 17fd1ed700cc..9152782444b5 100644
--- a/arch/arc/kernel/head.S
+++ b/arch/arc/kernel/head.S
@@ -67,7 +67,22 @@
sr r5, [ARC_REG_LPB_CTRL]
1:
#endif /* CONFIG_ARC_LPB_DISABLE */
-#endif
+
+ /* On HSDK, CCMs need to remapped super early */
+#ifdef CONFIG_ARC_SOC_HSDK
+ mov r6, 0x60000000
+ lr r5, [ARC_REG_ICCM_BUILD]
+ breq r5, 0, 1f
+ sr r6, [ARC_REG_AUX_ICCM]
+1:
+ lr r5, [ARC_REG_DCCM_BUILD]
+ breq r5, 0, 2f
+ sr r6, [ARC_REG_AUX_DCCM]
+2:
+#endif /* CONFIG_ARC_SOC_HSDK */
+
+#endif /* CONFIG_ISA_ARCV2 */
+
; Config DSP_CTRL properly, so kernel may use integer multiply,
; multiply-accumulate, and divide operations
DSP_EARLY_INIT
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index feba91c9d969..b23986f98450 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -112,7 +112,7 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
int (*consumer_fn) (unsigned int, void *), void *arg)
{
#ifdef CONFIG_ARC_DW2_UNWIND
- int ret = 0;
+ int ret = 0, cnt = 0;
unsigned int address;
struct unwind_frame_info frame_info;
@@ -132,6 +132,11 @@ arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
break;
frame_info.regs.r63 = frame_info.regs.r31;
+
+ if (cnt++ > 128) {
+ printk("unwinder looping too long, aborting !\n");
+ return 0;
+ }
}
return address; /* return the last address it saw */
diff --git a/arch/arc/plat-hsdk/platform.c b/arch/arc/plat-hsdk/platform.c
index 0b63fc095b99..b3ea1fa11f87 100644
--- a/arch/arc/plat-hsdk/platform.c
+++ b/arch/arc/plat-hsdk/platform.c
@@ -17,22 +17,6 @@ int arc_hsdk_axi_dmac_coherent __section(".data") = 0;
#define ARC_CCM_UNUSED_ADDR 0x60000000
-static void __init hsdk_init_per_cpu(unsigned int cpu)
-{
- /*
- * By default ICCM is mapped to 0x7z while this area is used for
- * kernel virtual mappings, so move it to currently unused area.
- */
- if (cpuinfo_arc700[cpu].iccm.sz)
- write_aux_reg(ARC_REG_AUX_ICCM, ARC_CCM_UNUSED_ADDR);
-
- /*
- * By default DCCM is mapped to 0x8z while this area is used by kernel,
- * so move it to currently unused area.
- */
- if (cpuinfo_arc700[cpu].dccm.sz)
- write_aux_reg(ARC_REG_AUX_DCCM, ARC_CCM_UNUSED_ADDR);
-}
#define ARC_PERIPHERAL_BASE 0xf0000000
#define CREG_BASE (ARC_PERIPHERAL_BASE + 0x1000)
@@ -339,5 +323,4 @@ static const char *hsdk_compat[] __initconst = {
MACHINE_START(SIMULATION, "hsdk")
.dt_compat = hsdk_compat,
.init_early = hsdk_init_early,
- .init_per_cpu = hsdk_init_per_cpu,
MACHINE_END
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index d57112a276f5..c23dbf8bebee 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -354,8 +354,8 @@ static void __init free_highpages(void)
/* set highmem page free */
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
&range_start, &range_end, NULL) {
- unsigned long start = PHYS_PFN(range_start);
- unsigned long end = PHYS_PFN(range_end);
+ unsigned long start = PFN_UP(range_start);
+ unsigned long end = PFN_DOWN(range_end);
/* Ignore complete lowmem entries */
if (end <= max_low)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1d466addb078..1515f6f153a0 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1002,7 +1002,7 @@ config NUMA
config NODES_SHIFT
int "Maximum NUMA Nodes (as a power of 2)"
range 1 10
- default "2"
+ default "4"
depends on NEED_MULTIPLE_NODES
help
Specify the maximum number of NUMA Nodes available on the target
diff --git a/arch/arm64/include/asm/brk-imm.h b/arch/arm64/include/asm/brk-imm.h
index e3d47b52161d..ec7720dbe2c8 100644
--- a/arch/arm64/include/asm/brk-imm.h
+++ b/arch/arm64/include/asm/brk-imm.h
@@ -10,6 +10,7 @@
* #imm16 values used for BRK instruction generation
* 0x004: for installing kprobes
* 0x005: for installing uprobes
+ * 0x006: for kprobe software single-step
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
@@ -19,6 +20,7 @@
*/
#define KPROBES_BRK_IMM 0x004
#define UPROBES_BRK_IMM 0x005
+#define KPROBES_BRK_SS_IMM 0x006
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h
index 0b298f48f5bf..657c921fd784 100644
--- a/arch/arm64/include/asm/debug-monitors.h
+++ b/arch/arm64/include/asm/debug-monitors.h
@@ -53,6 +53,7 @@
/* kprobes BRK opcodes with ESR encoding */
#define BRK64_OPCODE_KPROBES (AARCH64_BREAK_MON | (KPROBES_BRK_IMM << 5))
+#define BRK64_OPCODE_KPROBES_SS (AARCH64_BREAK_MON | (KPROBES_BRK_SS_IMM << 5))
/* uprobes BRK opcodes with ESR encoding */
#define BRK64_OPCODE_UPROBES (AARCH64_BREAK_MON | (UPROBES_BRK_IMM << 5))
diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h
index 97e511d645a2..8699ce30f587 100644
--- a/arch/arm64/include/asm/kprobes.h
+++ b/arch/arm64/include/asm/kprobes.h
@@ -16,7 +16,7 @@
#include <linux/percpu.h>
#define __ARCH_WANT_KPROBES_INSN_SLOT
-#define MAX_INSN_SIZE 1
+#define MAX_INSN_SIZE 2
#define flush_insn_slot(p) do { } while (0)
#define kretprobe_blacklist_size 0
diff --git a/arch/arm64/kernel/kexec_image.c b/arch/arm64/kernel/kexec_image.c
index af9987c154ca..66adee8b5fc8 100644
--- a/arch/arm64/kernel/kexec_image.c
+++ b/arch/arm64/kernel/kexec_image.c
@@ -43,7 +43,7 @@ static void *image_load(struct kimage *image,
u64 flags, value;
bool be_image, be_kernel;
struct kexec_buf kbuf;
- unsigned long text_offset;
+ unsigned long text_offset, kernel_segment_number;
struct kexec_segment *kernel_segment;
int ret;
@@ -88,11 +88,37 @@ static void *image_load(struct kimage *image,
/* Adjust kernel segment with TEXT_OFFSET */
kbuf.memsz += text_offset;
- ret = kexec_add_buffer(&kbuf);
- if (ret)
+ kernel_segment_number = image->nr_segments;
+
+ /*
+ * The location of the kernel segment may make it impossible to satisfy
+ * the other segment requirements, so we try repeatedly to find a
+ * location that will work.
+ */
+ while ((ret = kexec_add_buffer(&kbuf)) == 0) {
+ /* Try to load additional data */
+ kernel_segment = &image->segment[kernel_segment_number];
+ ret = load_other_segments(image, kernel_segment->mem,
+ kernel_segment->memsz, initrd,
+ initrd_len, cmdline);
+ if (!ret)
+ break;
+
+ /*
+ * We couldn't find space for the other segments; erase the
+ * kernel segment and try the next available hole.
+ */
+ image->nr_segments -= 1;
+ kbuf.buf_min = kernel_segment->mem + kernel_segment->memsz;
+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+ }
+
+ if (ret) {
+ pr_err("Could not find any suitable kernel location!");
return ERR_PTR(ret);
+ }
- kernel_segment = &image->segment[image->nr_segments - 1];
+ kernel_segment = &image->segment[kernel_segment_number];
kernel_segment->mem += text_offset;
kernel_segment->memsz -= text_offset;
image->start = kernel_segment->mem;
@@ -101,12 +127,7 @@ static void *image_load(struct kimage *image,
kernel_segment->mem, kbuf.bufsz,
kernel_segment->memsz);
- /* Load additional data */
- ret = load_other_segments(image,
- kernel_segment->mem, kernel_segment->memsz,
- initrd, initrd_len, cmdline);
-
- return ERR_PTR(ret);
+ return 0;
}
#ifdef CONFIG_KEXEC_IMAGE_VERIFY_SIG
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c
index 5b0e67b93cdc..03210f644790 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -240,6 +240,11 @@ static int prepare_elf_headers(void **addr, unsigned long *sz)
return ret;
}
+/*
+ * Tries to add the initrd and DTB to the image. If it is not possible to find
+ * valid locations, this function will undo changes to the image and return non
+ * zero.
+ */
int load_other_segments(struct kimage *image,
unsigned long kernel_load_addr,
unsigned long kernel_size,
@@ -248,7 +253,8 @@ int load_other_segments(struct kimage *image,
{
struct kexec_buf kbuf;
void *headers, *dtb = NULL;
- unsigned long headers_sz, initrd_load_addr = 0, dtb_len;
+ unsigned long headers_sz, initrd_load_addr = 0, dtb_len,
+ orig_segments = image->nr_segments;
int ret = 0;
kbuf.image = image;
@@ -334,6 +340,7 @@ int load_other_segments(struct kimage *image,
return 0;
out_err:
+ image->nr_segments = orig_segments;
vfree(dtb);
return ret;
}
diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index deba738142ed..f11a1a1f7026 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -36,25 +36,16 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
static void __kprobes
post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
-static int __kprobes patch_text(kprobe_opcode_t *addr, u32 opcode)
-{
- void *addrs[1];
- u32 insns[1];
-
- addrs[0] = addr;
- insns[0] = opcode;
-
- return aarch64_insn_patch_text(addrs, insns, 1);
-}
-
static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
{
+ kprobe_opcode_t *addr = p->ainsn.api.insn;
+ void *addrs[] = {addr, addr + 1};
+ u32 insns[] = {p->opcode, BRK64_OPCODE_KPROBES_SS};
+
/* prepare insn slot */
- patch_text(p->ainsn.api.insn, p->opcode);
+ aarch64_insn_patch_text(addrs, insns, 2);
- flush_icache_range((uintptr_t) (p->ainsn.api.insn),
- (uintptr_t) (p->ainsn.api.insn) +
- MAX_INSN_SIZE * sizeof(kprobe_opcode_t));
+ flush_icache_range((uintptr_t)addr, (uintptr_t)(addr + MAX_INSN_SIZE));
/*
* Needs restoring of return address after stepping xol.
@@ -128,13 +119,18 @@ void *alloc_insn_page(void)
/* arm kprobe: install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, BRK64_OPCODE_KPROBES);
+ void *addr = p->addr;
+ u32 insn = BRK64_OPCODE_KPROBES;
+
+ aarch64_insn_patch_text(&addr, &insn, 1);
}
/* disarm kprobe: remove breakpoint from text */
void __kprobes arch_disarm_kprobe(struct kprobe *p)
{
- patch_text(p->addr, p->opcode);
+ void *addr = p->addr;
+
+ aarch64_insn_patch_text(&addr, &p->opcode, 1);
}
void __kprobes arch_remove_kprobe(struct kprobe *p)
@@ -163,20 +159,15 @@ static void __kprobes set_current_kprobe(struct kprobe *p)
}
/*
- * Interrupts need to be disabled before single-step mode is set, and not
- * reenabled until after single-step mode ends.
- * Without disabling interrupt on local CPU, there is a chance of
- * interrupt occurrence in the period of exception return and start of
- * out-of-line single-step, that result in wrongly single stepping
- * into the interrupt handler.
+ * Mask all of DAIF while executing the instruction out-of-line, to keep things
+ * simple and avoid nesting exceptions. Interrupts do have to be disabled since
+ * the kprobe state is per-CPU and doesn't get migrated.
*/
static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
struct pt_regs *regs)
{
kcb->saved_irqflag = regs->pstate & DAIF_MASK;
- regs->pstate |= PSR_I_BIT;
- /* Unmask PSTATE.D for enabling software step exceptions. */
- regs->pstate &= ~PSR_D_BIT;
+ regs->pstate |= DAIF_MASK;
}
static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
@@ -219,10 +210,7 @@ static void __kprobes setup_singlestep(struct kprobe *p,
slot = (unsigned long)p->ainsn.api.insn;
set_ss_context(kcb, slot); /* mark pending ss */
-
- /* IRQs and single stepping do not mix well. */
kprobes_save_local_irqflag(kcb, regs);
- kernel_enable_single_step(regs);
instruction_pointer_set(regs, slot);
} else {
/* insn simulation */
@@ -273,12 +261,8 @@ post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
}
/* call post handler */
kcb->kprobe_status = KPROBE_HIT_SSDONE;
- if (cur->post_handler) {
- /* post_handler can hit breakpoint and single step
- * again, so we enable D-flag for recursive exception.
- */
+ if (cur->post_handler)
cur->post_handler(cur, regs, 0);
- }
reset_current_kprobe();
}
@@ -302,8 +286,6 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int fsr)
if (!instruction_pointer(regs))
BUG();
- kernel_disable_single_step();
-
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
else
@@ -365,10 +347,6 @@ static void __kprobes kprobe_handler(struct pt_regs *regs)
* pre-handler and it returned non-zero, it will
* modify the execution path and no need to single
* stepping. Let's just reset current kprobe and exit.
- *
- * pre_handler can hit a breakpoint and can step thru
- * before return, keep PSTATE D-flag enabled until
- * pre_handler return back.
*/
if (!p->pre_handler || !p->pre_handler(p, regs)) {
setup_singlestep(p, regs, kcb, 0);
@@ -399,7 +377,7 @@ kprobe_ss_hit(struct kprobe_ctlblk *kcb, unsigned long addr)
}
static int __kprobes
-kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
+kprobe_breakpoint_ss_handler(struct pt_regs *regs, unsigned int esr)
{
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
int retval;
@@ -409,16 +387,15 @@ kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr)
if (retval == DBG_HOOK_HANDLED) {
kprobes_restore_local_irqflag(kcb, regs);
- kernel_disable_single_step();
-
post_kprobe_handler(kcb, regs);
}
return retval;
}
-static struct step_hook kprobes_step_hook = {
- .fn = kprobe_single_step_handler,
+static struct break_hook kprobes_break_ss_hook = {
+ .imm = KPROBES_BRK_SS_IMM,
+ .fn = kprobe_breakpoint_ss_handler,
};
static int __kprobes
@@ -486,7 +463,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
int __init arch_init_kprobes(void)
{
register_kernel_break_hook(&kprobes_break_hook);
- register_kernel_step_hook(&kprobes_step_hook);
+ register_kernel_break_hook(&kprobes_break_ss_hook);
return 0;
}
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 85ed2390fb99..567cdc557402 100644
--- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
@@ -63,7 +63,7 @@ static inline void restore_user_access(unsigned long flags)
static inline bool
bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
{
- return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xf0000000),
+ return WARN(!((regs->kuap ^ MD_APG_KUAP) & 0xff000000),
"Bug: fault blocked by AP register !");
}
diff --git a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
index 1d9ac0f9c794..0bd1b144eb76 100644
--- a/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/mmu-8xx.h
@@ -33,19 +33,18 @@
* respectively NA for All or X for Supervisor and no access for User.
* Then we use the APG to say whether accesses are according to Page rules or
* "all Supervisor" rules (Access to all)
- * Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => Kernel => 01 (all accesses performed according to page definition)
- * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * 2-15 => Not Used
- */
-#define MI_APG_INIT 0x40000000
-
-/*
- * 0 => Kernel => 01 (all accesses performed according to page definition)
- * 1 => User => 10 (all accesses performed according to swaped page definition)
- * 2-15 => Not Used
- */
-#define MI_APG_KUEP 0x60000000
+ * _PAGE_ACCESSED is also managed via APG. When _PAGE_ACCESSED is not set, say
+ * "all User" rules, that will lead to NA for all.
+ * Therefore, we define 4 APG groups. lsb is _PAGE_ACCESSED
+ * 0 => Kernel => 11 (all accesses performed according as user iaw page definition)
+ * 1 => Kernel+Accessed => 01 (all accesses performed according to page definition)
+ * 2 => User => 11 (all accesses performed according as user iaw page definition)
+ * 3 => User+Accessed => 00 (all accesses performed as supervisor iaw page definition) for INIT
+ * => 10 (all accesses performed according to swaped page definition) for KUEP
+ * 4-15 => Not Used
+ */
+#define MI_APG_INIT 0xdc000000
+#define MI_APG_KUEP 0xde000000
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MI_RPN is written, bits in
@@ -106,25 +105,9 @@
#define MD_Ks 0x80000000 /* Should not be set */
#define MD_Kp 0x40000000 /* Should always be set */
-/*
- * All pages' PP data bits are set to either 000 or 011 or 001, which means
- * respectively RW for Supervisor and no access for User, or RO for
- * Supervisor and no access for user and NA for ALL.
- * Then we use the APG to say whether accesses are according to Page rules or
- * "all Supervisor" rules (Access to all)
- * Therefore, we define 2 APG groups. lsb is _PMD_USER
- * 0 => Kernel => 01 (all accesses performed according to page definition)
- * 1 => User => 00 (all accesses performed as supervisor iaw page definition)
- * 2-15 => Not Used
- */
-#define MD_APG_INIT 0x40000000
-
-/*
- * 0 => No user => 01 (all accesses performed according to page definition)
- * 1 => User => 10 (all accesses performed according to swaped page definition)
- * 2-15 => Not Used
- */
-#define MD_APG_KUAP 0x60000000
+/* See explanation above at the definition of MI_APG_INIT */
+#define MD_APG_INIT 0xdc000000
+#define MD_APG_KUAP 0xde000000
/* The effective page number register. When read, contains the information
* about the last instruction TLB miss. When MD_RPN is written, bits in
diff --git a/arch/powerpc/include/asm/nohash/32/pte-8xx.h b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
index 66f403a7da44..1581204467e1 100644
--- a/arch/powerpc/include/asm/nohash/32/pte-8xx.h
+++ b/arch/powerpc/include/asm/nohash/32/pte-8xx.h
@@ -39,9 +39,9 @@
* into the TLB.
*/
#define _PAGE_GUARDED 0x0010 /* Copied to L1 G entry in DTLB */
-#define _PAGE_SPECIAL 0x0020 /* SW entry */
+#define _PAGE_ACCESSED 0x0020 /* Copied to L1 APG 1 entry in I/DTLB */
#define _PAGE_EXEC 0x0040 /* Copied to PP (bit 21) in ITLB */
-#define _PAGE_ACCESSED 0x0080 /* software: page referenced */
+#define _PAGE_SPECIAL 0x0080 /* SW entry */
#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
@@ -59,11 +59,12 @@
#define _PMD_PRESENT 0x0001
#define _PMD_PRESENT_MASK _PMD_PRESENT
-#define _PMD_BAD 0x0fd0
+#define _PMD_BAD 0x0f90
#define _PMD_PAGE_MASK 0x000c
#define _PMD_PAGE_8M 0x000c
#define _PMD_PAGE_512K 0x0004
-#define _PMD_USER 0x0020 /* APG 1 */
+#define _PMD_ACCESSED 0x0020 /* APG 1 */
+#define _PMD_USER 0x0040 /* APG 2 */
#define _PTE_NONE_MASK 0
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 8728590f514a..3beeb030cd78 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -6,6 +6,7 @@
struct device;
struct device_node;
+struct drmem_lmb;
#ifdef CONFIG_NUMA
@@ -61,6 +62,9 @@ static inline int early_cpu_to_node(int cpu)
*/
return (nid < 0) ? 0 : nid;
}
+
+int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+
#else
static inline int early_cpu_to_node(int cpu) { return 0; }
@@ -84,10 +88,12 @@ static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
return 0;
}
-#endif /* CONFIG_NUMA */
+static inline int of_drconf_to_nid_single(struct drmem_lmb *lmb)
+{
+ return first_online_node;
+}
-struct drmem_lmb;
-int of_drconf_to_nid_single(struct drmem_lmb *lmb);
+#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
extern int find_and_online_cpu_nid(int cpu);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index ef5bbb705c08..501c9a79038c 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -178,7 +178,7 @@ do { \
* are no aliasing issues.
*/
#define __put_user_asm_goto(x, addr, label, op) \
- asm volatile goto( \
+ asm_volatile_goto( \
"1: " op "%U1%X1 %0,%1 # put_user\n" \
EX_TABLE(1b, %l2) \
: \
@@ -191,7 +191,7 @@ do { \
__put_user_asm_goto(x, ptr, label, "std")
#else /* __powerpc64__ */
#define __put_user_asm2_goto(x, addr, label) \
- asm volatile goto( \
+ asm_volatile_goto( \
"1: stw%X1 %0, %1\n" \
"2: stw%X1 %L0, %L1\n" \
EX_TABLE(1b, %l2) \
diff --git a/arch/powerpc/kernel/eeh_cache.c b/arch/powerpc/kernel/eeh_cache.c
index 6b50bf15d8c1..bf3270426d82 100644
--- a/arch/powerpc/kernel/eeh_cache.c
+++ b/arch/powerpc/kernel/eeh_cache.c
@@ -264,8 +264,9 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
{
struct pci_io_addr_range *piar;
struct rb_node *n;
+ unsigned long flags;
- spin_lock(&pci_io_addr_cache_root.piar_lock);
+ spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
for (n = rb_first(&pci_io_addr_cache_root.rb_root); n; n = rb_next(n)) {
piar = rb_entry(n, struct pci_io_addr_range, rb_node);
@@ -273,7 +274,7 @@ static int eeh_addr_cache_show(struct seq_file *s, void *v)
(piar->flags & IORESOURCE_IO) ? "i/o" : "mem",
&piar->addr_lo, &piar->addr_hi, pci_name(piar->pcidev));
}
- spin_unlock(&pci_io_addr_cache_root.piar_lock);
+ spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
return 0;
}
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 44c9018aed1b..a1ae00689e0f 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -284,11 +284,7 @@ _ENTRY(saved_ksp_limit)
rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
lwz r11, 0(r11) /* Get Linux PTE */
-#ifdef CONFIG_SWAP
li r9, _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r9, _PAGE_PRESENT
-#endif
andc. r9, r9, r11 /* Check permission */
bne 5f
@@ -369,11 +365,7 @@ _ENTRY(saved_ksp_limit)
rlwimi r11, r10, 22, 20, 29 /* Compute PTE address */
lwz r11, 0(r11) /* Get Linux PTE */
-#ifdef CONFIG_SWAP
li r9, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#else
- li r9, _PAGE_PRESENT | _PAGE_EXEC
-#endif
andc. r9, r9, r11 /* Check permission */
bne 5f
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 9f359d3fba74..ee0bfebc375f 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -202,9 +202,7 @@ SystemCall:
InstructionTLBMiss:
mtspr SPRN_SPRG_SCRATCH0, r10
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS)
mtspr SPRN_SPRG_SCRATCH1, r11
-#endif
/* If we are faulting a kernel address, we have to use the
* kernel page tables.
@@ -224,25 +222,13 @@ InstructionTLBMiss:
3:
mtcr r11
#endif
-#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT)
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
mtspr SPRN_MD_TWC, r11
-#else
- lwz r10, (swapper_pg_dir-PAGE_OFFSET)@l(r10) /* Get level 1 entry */
- mtspr SPRN_MI_TWC, r10 /* Set segment attributes */
- mtspr SPRN_MD_TWC, r10
-#endif
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
-#if defined(CONFIG_HUGETLBFS) || !defined(CONFIG_PIN_TLB_TEXT)
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MI_TWC, r11
-#endif
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 20 and 23 must be clear.
* Software indicator bits 22, 24, 25, 26, and 27 must be
@@ -256,9 +242,7 @@ InstructionTLBMiss:
/* Restore registers */
0: mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP) || defined(CONFIG_HUGETLBFS)
mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
rfi
patch_site 0b, patch__itlbmiss_exit_1
@@ -268,9 +252,7 @@ InstructionTLBMiss:
addi r10, r10, 1
stw r10, (itlb_miss_counter - PAGE_OFFSET)@l(0)
mfspr r10, SPRN_SPRG_SCRATCH0
-#if defined(ITLB_MISS_KERNEL) || defined(CONFIG_SWAP)
mfspr r11, SPRN_SPRG_SCRATCH1
-#endif
rfi
#endif
@@ -297,30 +279,16 @@ DataStoreTLBMiss:
mfspr r10, SPRN_MD_TWC
lwz r10, 0(r10) /* Get the pte */
- /* Insert the Guarded flag into the TWC from the Linux PTE.
+ /* Insert Guarded and Accessed flags into the TWC from the Linux PTE.
* It is bit 27 of both the Linux PTE and the TWC (at least
* I got that right :-). It will be better when we can put
* this into the Linux pgd/pmd and load it in the operation
* above.
*/
- rlwimi r11, r10, 0, _PAGE_GUARDED
+ rlwimi r11, r10, 0, _PAGE_GUARDED | _PAGE_ACCESSED
rlwimi r11, r10, 32 - 9, _PMD_PAGE_512K
mtspr SPRN_MD_TWC, r11
- /* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
- * We also need to know if the insn is a load/store, so:
- * Clear _PAGE_PRESENT and load that which will
- * trap into DTLB Error with store bit set accordinly.
- */
- /* PRESENT=0x1, ACCESSED=0x20
- * r11 = ((r10 & PRESENT) & ((r10 & ACCESSED) >> 5));
- * r10 = (r10 & ~PRESENT) | r11;
- */
-#ifdef CONFIG_SWAP
- rlwinm r11, r10, 32-5, _PAGE_PRESENT
- and r11, r11, r10
- rlwimi r10, r11, 0, _PAGE_PRESENT
-#endif
/* The Linux PTE won't go exactly into the MMU TLB.
* Software indicator bits 24, 25, 26, and 27 must be
* set. All other Linux PTE bits control the behavior
@@ -711,7 +679,7 @@ initial_mmu:
li r9, 4 /* up to 4 pages of 8M */
mtctr r9
lis r9, KERNELBASE@h /* Create vaddr for TLB */
- li r10, MI_PS8MEG | MI_SVALID /* Set 8M byte page */
+ li r10, MI_PS8MEG | _PMD_ACCESSED | MI_SVALID
li r11, MI_BOOTINIT /* Create RPN for address 0 */
1:
mtspr SPRN_MI_CTR, r8 /* Set instruction MMU control */
@@ -775,7 +743,7 @@ _GLOBAL(mmu_pin_tlb)
#ifdef CONFIG_PIN_TLB_TEXT
LOAD_REG_IMMEDIATE(r5, 28 << 8)
LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
- LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
LOAD_REG_IMMEDIATE(r8, 0xf0 | _PAGE_RO | _PAGE_SPS | _PAGE_SH | _PAGE_PRESENT)
LOAD_REG_ADDR(r9, _sinittext)
li r0, 4
@@ -797,7 +765,7 @@ _GLOBAL(mmu_pin_tlb)
LOAD_REG_IMMEDIATE(r5, 28 << 8 | MD_TWAM)
#ifdef CONFIG_PIN_TLB_DATA
LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET)
- LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG)
+ LOAD_REG_IMMEDIATE(r7, MI_SVALID | MI_PS8MEG | _PMD_ACCESSED)
#ifdef CONFIG_PIN_TLB_IMMR
li r0, 3
#else
@@ -834,7 +802,7 @@ _GLOBAL(mmu_pin_tlb)
#endif
#ifdef CONFIG_PIN_TLB_IMMR
LOAD_REG_IMMEDIATE(r0, VIRT_IMMR_BASE | MD_EVALID)
- LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED)
+ LOAD_REG_IMMEDIATE(r7, MD_SVALID | MD_PS512K | MD_GUARDED | _PMD_ACCESSED)
mfspr r8, SPRN_IMMR
rlwinm r8, r8, 0, 0xfff80000
ori r8, r8, 0xf0 | _PAGE_DIRTY | _PAGE_SPS | _PAGE_SH | \
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 5eb9eedac920..2aa16d5368e1 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -457,11 +457,7 @@ InstructionTLBMiss:
cmplw 0,r1,r3
#endif
mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
li r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
-#else
- li r1,_PAGE_PRESENT | _PAGE_EXEC
-#endif
#if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
@@ -523,11 +519,7 @@ DataLoadTLBMiss:
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
li r1, _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_PRESENT
-#endif
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
@@ -603,11 +595,7 @@ DataStoreTLBMiss:
lis r1, TASK_SIZE@h /* check if kernel address */
cmplw 0,r1,r3
mfspr r2, SPRN_SPRG_PGDIR
-#ifdef CONFIG_SWAP
li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
-#else
- li r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
-#endif
bgt- 112f
lis r2, (swapper_pg_dir - PAGE_OFFSET)@ha /* if kernel address, use */
addi r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l /* kernel page table */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 3c6b9822f978..8c2857cbd960 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1393,13 +1393,14 @@ static void add_cpu_to_masks(int cpu)
/* Activate a secondary processor. */
void start_secondary(void *unused)
{
- unsigned int cpu = smp_processor_id();
+ unsigned int cpu = raw_smp_processor_id();
mmgrab(&init_mm);
current->active_mm = &init_mm;
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
+ rcu_cpu_starting(cpu);
preempt_disable();
cpu_callin_map[cpu] = 1;
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index c47e6b35c551..824b2c9da75b 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -476,7 +476,7 @@ do { \
do { \
long __kr_err; \
\
- __put_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \
+ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \
if (unlikely(__kr_err)) \
goto err_label; \
} while (0)
diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
index 99e12faa5498..765b62434f30 100644
--- a/arch/riscv/kernel/ftrace.c
+++ b/arch/riscv/kernel/ftrace.c
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2013 Linaro Limited
* Author: AKASHI Takahiro <takahiro.akashi@linaro.org>
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 11e2a4fe66e0..7e849797c9c3 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -35,6 +35,10 @@ ENTRY(_start)
.word 0
#endif
.balign 8
+#ifdef CONFIG_RISCV_M_MODE
+ /* Image load offset (0MB) from start of RAM for M-mode */
+ .dword 0
+#else
#if __riscv_xlen == 64
/* Image load offset(2MB) from start of RAM */
.dword 0x200000
@@ -42,6 +46,7 @@ ENTRY(_start)
/* Image load offset(4MB) from start of RAM */
.dword 0x400000
#endif
+#endif
/* Effective size of kernel image */
.dword _end - _start
.dword __HEAD_FLAGS
diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore
index 11ebee9e4c1d..3a19def868ec 100644
--- a/arch/riscv/kernel/vdso/.gitignore
+++ b/arch/riscv/kernel/vdso/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
vdso.lds
*.tmp
+vdso-syms.S
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index 7d6a94d45ec9..cb8f9e4cfcbf 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -43,19 +43,14 @@ $(obj)/vdso.o: $(obj)/vdso.so
SYSCFLAGS_vdso.so.dbg = $(c_flags)
$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold)
+SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
+ -Wl,--build-id -Wl,--hash-style=both
# We also create a special relocatable object that should mirror the symbol
# table and layout of the linked DSO. With ld --just-symbols we can then
# refer to these symbols in the kernel code rather than hand-coded addresses.
-
-SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
- -Wl,--build-id=sha1 -Wl,--hash-style=both
-$(obj)/vdso-dummy.o: $(src)/vdso.lds $(obj)/rt_sigreturn.o FORCE
- $(call if_changed,vdsold)
-
-LDFLAGS_vdso-syms.o := -r --just-symbols
-$(obj)/vdso-syms.o: $(obj)/vdso-dummy.o FORCE
- $(call if_changed,ld)
+$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
+ $(call if_changed,so2s)
# strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
@@ -73,6 +68,11 @@ quiet_cmd_vdsold = VDSOLD $@
$(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
rm $@.tmp
+# Extracts symbol offsets from the VDSO, converting them into an assembly file
+# that contains the same symbols at the same offsets.
+quiet_cmd_so2s = SO2S $@
+ cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
+
# install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
diff --git a/arch/riscv/kernel/vdso/so2s.sh b/arch/riscv/kernel/vdso/so2s.sh
new file mode 100755
index 000000000000..e64cb6d9440e
--- /dev/null
+++ b/arch/riscv/kernel/vdso/so2s.sh
@@ -0,0 +1,6 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+# Copyright 2020 Palmer Dabbelt <palmerdabbelt@google.com>
+
+sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_4.15\)*!.global \2\n.set \2,0x\1!' \
+| grep '^\.'
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 1359e21c0c62..3c8b9e433c67 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -86,6 +86,7 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
int index;
+ unsigned long pfn;
/* User mode accesses just cause a SIGSEGV */
if (user_mode(regs))
@@ -100,7 +101,8 @@ static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long a
* of a task switch.
*/
index = pgd_index(addr);
- pgd = (pgd_t *)pfn_to_virt(csr_read(CSR_SATP)) + index;
+ pfn = csr_read(CSR_SATP) & SATP_PPN;
+ pgd = (pgd_t *)pfn_to_virt(pfn) + index;
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k)) {
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index ea933b789a88..8e577f14f120 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -154,9 +154,8 @@ disable:
void __init setup_bootmem(void)
{
- phys_addr_t mem_size = 0;
- phys_addr_t total_mem = 0;
- phys_addr_t mem_start, start, end = 0;
+ phys_addr_t mem_start = 0;
+ phys_addr_t start, end = 0;
phys_addr_t vmlinux_end = __pa_symbol(&_end);
phys_addr_t vmlinux_start = __pa_symbol(&_start);
u64 i;
@@ -164,21 +163,18 @@ void __init setup_bootmem(void)
/* Find the memory region containing the kernel */
for_each_mem_range(i, &start, &end) {
phys_addr_t size = end - start;
- if (!total_mem)
+ if (!mem_start)
mem_start = start;
if (start <= vmlinux_start && vmlinux_end <= end)
BUG_ON(size == 0);
- total_mem = total_mem + size;
}
/*
- * Remove memblock from the end of usable area to the
- * end of region
+ * The maximal physical memory size is -PAGE_OFFSET.
+ * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed
+ * as it is unusable by kernel.
*/
- mem_size = min(total_mem, (phys_addr_t)-PAGE_OFFSET);
- if (mem_start + mem_size < end)
- memblock_remove(mem_start + mem_size,
- end - mem_start - mem_size);
+ memblock_enforce_memory_limit(mem_start - PAGE_OFFSET);
/* Reserve from the start of the kernel to the end of the kernel */
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
@@ -297,6 +293,7 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
#define NUM_EARLY_PMDS (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
#endif
pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
+pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
@@ -494,6 +491,18 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
load_pa + (va - PAGE_OFFSET),
map_size, PAGE_KERNEL_EXEC);
+#ifndef __PAGETABLE_PMD_FOLDED
+ /* Setup early PMD for DTB */
+ create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+ (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
+ /* Create two consecutive PMD mappings for FDT early scan */
+ pa = dtb_pa & ~(PMD_SIZE - 1);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
+ pa, PMD_SIZE, PAGE_KERNEL);
+ create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
+ pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+ dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else
/* Create two consecutive PGD mappings for FDT early scan */
pa = dtb_pa & ~(PGDIR_SIZE - 1);
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
@@ -501,6 +510,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
+#endif
dtb_early_pa = dtb_pa;
/*
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 0784bf3caf43..a4d3c578fbd8 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -93,9 +93,10 @@ CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
CONFIG_CMA_DEBUG=y
CONFIG_CMA_DEBUGFS=y
+CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
@@ -378,7 +379,6 @@ CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
-# CONFIG_NET_DROP_MONITOR is not set
CONFIG_PCI=y
# CONFIG_PCIEASPM is not set
CONFIG_PCI_DEBUG=y
@@ -386,7 +386,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_DEVTMPFS=y
CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
+CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
@@ -689,6 +689,7 @@ CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
@@ -709,7 +710,6 @@ CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -753,6 +753,7 @@ CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_CRC32_SELFTEST=y
CONFIG_CRC4=m
@@ -829,6 +830,7 @@ CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
CONFIG_FAULT_INJECTION=y
CONFIG_FAILSLAB=y
CONFIG_FAIL_PAGE_ALLOC=y
+CONFIG_FAULT_INJECTION_USERCOPY=y
CONFIG_FAIL_MAKE_REQUEST=y
CONFIG_FAIL_IO_TIMEOUT=y
CONFIG_FAIL_FUTEX=y
diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig
index 905bc8c4cfaf..17d5df2c1eff 100644
--- a/arch/s390/configs/defconfig
+++ b/arch/s390/configs/defconfig
@@ -87,9 +87,10 @@ CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
+CONFIG_CMA_AREAS=7
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
-CONFIG_ZSMALLOC=m
+CONFIG_ZSMALLOC=y
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
@@ -371,7 +372,6 @@ CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
-# CONFIG_NET_DROP_MONITOR is not set
CONFIG_PCI=y
# CONFIG_PCIEASPM is not set
CONFIG_HOTPLUG_PCI=y
@@ -379,7 +379,7 @@ CONFIG_HOTPLUG_PCI_S390=y
CONFIG_UEVENT_HELPER=y
CONFIG_DEVTMPFS=y
CONFIG_CONNECTOR=y
-CONFIG_ZRAM=m
+CONFIG_ZRAM=y
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
@@ -680,6 +680,7 @@ CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_ECRDSA=m
+CONFIG_CRYPTO_SM2=m
CONFIG_CRYPTO_CURVE25519=m
CONFIG_CRYPTO_GCM=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
@@ -701,7 +702,6 @@ CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_SM3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
@@ -745,6 +745,7 @@ CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
+CONFIG_CRYPTO_DEV_VIRTIO=m
CONFIG_CORDIC=m
CONFIG_PRIME_NUMBERS=m
CONFIG_CRC4=m
diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig
index 8f67c55625f9..a302630341ef 100644
--- a/arch/s390/configs/zfcpdump_defconfig
+++ b/arch/s390/configs/zfcpdump_defconfig
@@ -17,11 +17,11 @@ CONFIG_HZ_100=y
# CONFIG_CHSC_SCH is not set
# CONFIG_SCM_BUS is not set
CONFIG_CRASH_DUMP=y
-# CONFIG_SECCOMP is not set
# CONFIG_PFAULT is not set
# CONFIG_S390_HYPFS_FS is not set
# CONFIG_VIRTUALIZATION is not set
# CONFIG_S390_GUEST is not set
+# CONFIG_SECCOMP is not set
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
# CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 6b8d8c69b1a1..b5dbae78969b 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -692,16 +692,6 @@ static inline int pud_large(pud_t pud)
return !!(pud_val(pud) & _REGION3_ENTRY_LARGE);
}
-static inline unsigned long pud_pfn(pud_t pud)
-{
- unsigned long origin_mask;
-
- origin_mask = _REGION_ENTRY_ORIGIN;
- if (pud_large(pud))
- origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
- return (pud_val(pud) & origin_mask) >> PAGE_SHIFT;
-}
-
#define pmd_leaf pmd_large
static inline int pmd_large(pmd_t pmd)
{
@@ -747,16 +737,6 @@ static inline int pmd_none(pmd_t pmd)
return pmd_val(pmd) == _SEGMENT_ENTRY_EMPTY;
}
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- unsigned long origin_mask;
-
- origin_mask = _SEGMENT_ENTRY_ORIGIN;
- if (pmd_large(pmd))
- origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
- return (pmd_val(pmd) & origin_mask) >> PAGE_SHIFT;
-}
-
#define pmd_write pmd_write
static inline int pmd_write(pmd_t pmd)
{
@@ -1238,11 +1218,39 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-#define pmd_deref(pmd) (pmd_val(pmd) & _SEGMENT_ENTRY_ORIGIN)
-#define pud_deref(pud) (pud_val(pud) & _REGION_ENTRY_ORIGIN)
#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+static inline unsigned long pmd_deref(pmd_t pmd)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _SEGMENT_ENTRY_ORIGIN;
+ if (pmd_large(pmd))
+ origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
+ return pmd_val(pmd) & origin_mask;
+}
+
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return pmd_deref(pmd) >> PAGE_SHIFT;
+}
+
+static inline unsigned long pud_deref(pud_t pud)
+{
+ unsigned long origin_mask;
+
+ origin_mask = _REGION_ENTRY_ORIGIN;
+ if (pud_large(pud))
+ origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
+ return pud_val(pud) & origin_mask;
+}
+
+static inline unsigned long pud_pfn(pud_t pud)
+{
+ return pud_deref(pud) >> PAGE_SHIFT;
+}
+
/*
* The pgd_offset function *always* adds the index for the top-level
* region/segment table. This is done to get a sequence like the
diff --git a/arch/s390/include/asm/vdso/vdso.h b/arch/s390/include/asm/vdso/vdso.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/arch/s390/include/asm/vdso/vdso.h
+++ /dev/null
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index ece58f2217cb..2012c1cf0853 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -61,14 +61,6 @@ int main(void)
BLANK();
OFFSET(__VDSO_GETCPU_VAL, vdso_per_cpu_data, getcpu_val);
BLANK();
- /* constants used by the vdso */
- DEFINE(__CLOCK_REALTIME, CLOCK_REALTIME);
- DEFINE(__CLOCK_MONOTONIC, CLOCK_MONOTONIC);
- DEFINE(__CLOCK_REALTIME_COARSE, CLOCK_REALTIME_COARSE);
- DEFINE(__CLOCK_MONOTONIC_COARSE, CLOCK_MONOTONIC_COARSE);
- DEFINE(__CLOCK_THREAD_CPUTIME_ID, CLOCK_THREAD_CPUTIME_ID);
- DEFINE(__CLOCK_COARSE_RES, LOW_RES_NSEC);
- BLANK();
/* idle data offsets */
OFFSET(__CLOCK_IDLE_ENTER, s390_idle_data, clock_idle_enter);
OFFSET(__CLOCK_IDLE_EXIT, s390_idle_data, clock_idle_exit);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index ebfe86d097f0..390d97daa2b3 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -855,13 +855,14 @@ void __init smp_detect_cpus(void)
static void smp_init_secondary(void)
{
- int cpu = smp_processor_id();
+ int cpu = raw_smp_processor_id();
S390_lowcore.last_update_clock = get_tod_clock();
restore_access_regs(S390_lowcore.access_regs_save_area);
set_cpu_flag(CIF_ASCE_PRIMARY);
set_cpu_flag(CIF_ASCE_SECONDARY);
cpu_init();
+ rcu_cpu_starting(cpu);
preempt_disable();
init_cpu_timer();
vtime_init();
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index d33f21545dfd..9a6bae503fe6 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -101,6 +101,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
if (ret)
break;
+ /* the PCI function will be scanned once function 0 appears */
+ if (!zdev->zbus->bus)
+ break;
+
pdev = pci_scan_single_device(zdev->zbus->bus, zdev->devfn);
if (!pdev)
break;
diff --git a/arch/x86/boot/compressed/ident_map_64.c b/arch/x86/boot/compressed/ident_map_64.c
index a5e5db6ada3c..39b2eded7bc2 100644
--- a/arch/x86/boot/compressed/ident_map_64.c
+++ b/arch/x86/boot/compressed/ident_map_64.c
@@ -164,6 +164,7 @@ void initialize_identity_maps(void *rmode)
add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
/* Load the new page-table. */
+ sev_verify_cbit(top_level_pgt);
write_cr3(top_level_pgt);
}
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index dd07e7b41b11..aa561795efd1 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -68,6 +68,9 @@ SYM_FUNC_START(get_sev_encryption_bit)
SYM_FUNC_END(get_sev_encryption_bit)
.code64
+
+#include "../../kernel/sev_verify_cbit.S"
+
SYM_FUNC_START(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT
push %rbp
@@ -81,6 +84,19 @@ SYM_FUNC_START(set_sev_encryption_mask)
bts %rax, sme_me_mask(%rip) /* Create the encryption mask */
+ /*
+ * Read MSR_AMD64_SEV again and store it to sev_status. Can't do this in
+ * get_sev_encryption_bit() because this function is 32-bit code and
+ * shared between 64-bit and 32-bit boot path.
+ */
+ movl $MSR_AMD64_SEV, %ecx /* Read the SEV MSR */
+ rdmsr
+
+ /* Store MSR value in sev_status */
+ shlq $32, %rdx
+ orq %rdx, %rax
+ movq %rax, sev_status(%rip)
+
.Lno_sev_mask:
movq %rbp, %rsp /* Restore original stack pointer */
@@ -96,5 +112,7 @@ SYM_FUNC_END(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
-SYM_DATA(sme_me_mask, .quad 0)
+SYM_DATA(sme_me_mask, .quad 0)
+SYM_DATA(sev_status, .quad 0)
+SYM_DATA(sev_check_data, .quad 0)
#endif
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index 6d31f1b4c4d1..d9a631c5973c 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -159,4 +159,6 @@ void boot_page_fault(void);
void boot_stage1_vc(void);
void boot_stage2_vc(void);
+unsigned long sev_verify_cbit(unsigned long cr3);
+
#endif /* BOOT_COMPRESSED_MISC_H */
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 40e0e322161d..284e73661a18 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -273,11 +273,15 @@ void __init hv_apic_init(void)
pr_info("Hyper-V: Using enlightened APIC (%s mode)",
x2apic_enabled() ? "x2apic" : "xapic");
/*
- * With x2apic, architectural x2apic MSRs are equivalent to the
- * respective synthetic MSRs, so there's no need to override
- * the apic accessors. The only exception is
- * hv_apic_eoi_write, because it benefits from lazy EOI when
- * available, but it works for both xapic and x2apic modes.
+ * When in x2apic mode, don't use the Hyper-V specific APIC
+ * accessors since the field layout in the ICR register is
+ * different in x2apic mode. Furthermore, the architectural
+ * x2apic MSRs function just as well as the Hyper-V
+ * synthetic APIC MSRs, so there's no benefit in having
+ * separate Hyper-V accessors for x2apic mode. The only
+ * exception is hv_apic_eoi_write, because it benefits from
+ * lazy EOI when available, but the same accessor works for
+ * both xapic and x2apic because the field layout is the same.
*/
apic_set_eoi_write(hv_apic_eoi_write);
if (!x2apic_enabled()) {
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 714233cee0b5..3115caa7d7d0 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -290,6 +290,9 @@ static void __init uv_stringify(int len, char *to, char *from)
{
/* Relies on 'to' being NULL chars so result will be NULL terminated */
strncpy(to, from, len-1);
+
+ /* Trim trailing spaces */
+ (void)strim(to);
}
/* Find UV arch type entry in UVsystab */
@@ -366,7 +369,7 @@ static int __init early_get_arch_type(void)
return ret;
}
-static int __init uv_set_system_type(char *_oem_id)
+static int __init uv_set_system_type(char *_oem_id, char *_oem_table_id)
{
/* Save OEM_ID passed from ACPI MADT */
uv_stringify(sizeof(oem_id), oem_id, _oem_id);
@@ -386,13 +389,23 @@ static int __init uv_set_system_type(char *_oem_id)
/* (Not hubless), not a UV */
return 0;
+ /* Is UV hubless system */
+ uv_hubless_system = 0x01;
+
+ /* UV5 Hubless */
+ if (strncmp(uv_archtype, "NSGI5", 5) == 0)
+ uv_hubless_system |= 0x20;
+
/* UV4 Hubless: CH */
- if (strncmp(uv_archtype, "NSGI4", 5) == 0)
- uv_hubless_system = 0x11;
+ else if (strncmp(uv_archtype, "NSGI4", 5) == 0)
+ uv_hubless_system |= 0x10;
/* UV3 Hubless: UV300/MC990X w/o hub */
else
- uv_hubless_system = 0x9;
+ uv_hubless_system |= 0x8;
+
+ /* Copy APIC type */
+ uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
pr_info("UV: OEM IDs %s/%s, SystemType %d, HUBLESS ID %x\n",
oem_id, oem_table_id, uv_system_type, uv_hubless_system);
@@ -456,7 +469,7 @@ static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0;
/* If not UV, return. */
- if (likely(uv_set_system_type(_oem_id) == 0))
+ if (uv_set_system_type(_oem_id, _oem_table_id) == 0)
return 0;
/* Save and Decode OEM Table ID */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d3f0db463f96..581fb7223ad0 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1254,6 +1254,14 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
return 0;
}
+static bool is_spec_ib_user_controlled(void)
+{
+ return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP;
+}
+
static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
{
switch (ctrl) {
@@ -1261,16 +1269,26 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return 0;
+
/*
- * Indirect branch speculation is always disabled in strict
- * mode. It can neither be enabled if it was force-disabled
- * by a previous prctl call.
+ * With strict mode for both IBPB and STIBP, the instruction
+ * code paths avoid checking this task flag and instead,
+ * unconditionally run the instruction. However, STIBP and IBPB
+ * are independent and either can be set to conditionally
+ * enabled regardless of the mode of the other.
+ *
+ * If either is set to conditional, allow the task flag to be
+ * updated, unless it was force-disabled by a previous prctl
+ * call. Currently, this is possible on an AMD CPU which has the
+ * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the
+ * kernel is booted with 'spectre_v2_user=seccomp', then
+ * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and
+ * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED.
*/
- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+ if (!is_spec_ib_user_controlled() ||
task_spec_ib_force_disable(task))
return -EPERM;
+
task_clear_spec_ib_disable(task);
task_update_spec_tif(task);
break;
@@ -1283,10 +1301,10 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return -EPERM;
- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+
+ if (!is_spec_ib_user_controlled())
return 0;
+
task_set_spec_ib_disable(task);
if (ctrl == PR_SPEC_FORCE_DISABLE)
task_set_spec_ib_force_disable(task);
@@ -1351,20 +1369,17 @@ static int ib_prctl_get(struct task_struct *task)
if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
return PR_SPEC_ENABLE;
- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
- return PR_SPEC_DISABLE;
- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
- spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
- spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
+ else if (is_spec_ib_user_controlled()) {
if (task_spec_ib_force_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
if (task_spec_ib_disable(task))
return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
- } else
+ } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+ return PR_SPEC_DISABLE;
+ else
return PR_SPEC_NOT_AFFECTED;
}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 7eb2a1c87969..3c417734790f 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -161,6 +161,21 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
/* Setup early boot stage 4-/5-level pagetables. */
addq phys_base(%rip), %rax
+
+ /*
+ * For SEV guests: Verify that the C-bit is correct. A malicious
+ * hypervisor could lie about the C-bit position to perform a ROP
+ * attack on the guest by writing to the unencrypted stack and wait for
+ * the next RET instruction.
+ * %rsi carries pointer to realmode data and is callee-clobbered. Save
+ * and restore it.
+ */
+ pushq %rsi
+ movq %rax, %rdi
+ call sev_verify_cbit
+ popq %rsi
+
+ /* Switch to new page-table */
movq %rax, %cr3
/* Ensure I am executing from virtual addresses */
@@ -279,6 +294,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
SYM_CODE_END(secondary_startup_64)
#include "verify_cpu.S"
+#include "sev_verify_cbit.S"
#ifdef CONFIG_HOTPLUG_CPU
/*
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c
index 5f83ccaab877..7d04b356d44d 100644
--- a/arch/x86/kernel/sev-es-shared.c
+++ b/arch/x86/kernel/sev-es-shared.c
@@ -178,6 +178,32 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
goto fail;
regs->dx = val >> 32;
+ /*
+ * This is a VC handler and the #VC is only raised when SEV-ES is
+ * active, which means SEV must be active too. Do sanity checks on the
+ * CPUID results to make sure the hypervisor does not trick the kernel
+ * into the no-sev path. This could map sensitive data unencrypted and
+ * make it accessible to the hypervisor.
+ *
+ * In particular, check for:
+ * - Hypervisor CPUID bit
+ * - Availability of CPUID leaf 0x8000001f
+ * - SEV CPUID bit.
+ *
+ * The hypervisor might still report the wrong C-bit position, but this
+ * can't be checked here.
+ */
+
+ if ((fn == 1 && !(regs->cx & BIT(31))))
+ /* Hypervisor bit */
+ goto fail;
+ else if (fn == 0x80000000 && (regs->ax < 0x8000001f))
+ /* SEV leaf check */
+ goto fail;
+ else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
+ /* SEV bit */
+ goto fail;
+
/* Skip over the CPUID two-byte opcode */
regs->ip += 2;
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c
index 4a96726fbaf8..0bd1a0fc587e 100644
--- a/arch/x86/kernel/sev-es.c
+++ b/arch/x86/kernel/sev-es.c
@@ -374,8 +374,8 @@ fault:
return ES_EXCEPTION;
}
-static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
- unsigned long vaddr, phys_addr_t *paddr)
+static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+ unsigned long vaddr, phys_addr_t *paddr)
{
unsigned long va = (unsigned long)vaddr;
unsigned int level;
@@ -394,15 +394,19 @@ static bool vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
if (user_mode(ctxt->regs))
ctxt->fi.error_code |= X86_PF_USER;
- return false;
+ return ES_EXCEPTION;
}
+ if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
+ /* Emulated MMIO to/from encrypted memory not supported */
+ return ES_UNSUPPORTED;
+
pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
pa |= va & ~page_level_mask(level);
*paddr = pa;
- return true;
+ return ES_OK;
}
/* Include code shared with pre-decompression boot stage */
@@ -731,6 +735,7 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
{
u64 exit_code, exit_info_1, exit_info_2;
unsigned long ghcb_pa = __pa(ghcb);
+ enum es_result res;
phys_addr_t paddr;
void __user *ref;
@@ -740,11 +745,12 @@ static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
- if (!vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr)) {
- if (!read)
+ res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
+ if (res != ES_OK) {
+ if (res == ES_EXCEPTION && !read)
ctxt->fi.error_code |= X86_PF_WRITE;
- return ES_EXCEPTION;
+ return res;
}
exit_info_1 = paddr;
diff --git a/arch/x86/kernel/sev_verify_cbit.S b/arch/x86/kernel/sev_verify_cbit.S
new file mode 100644
index 000000000000..ee04941a6546
--- /dev/null
+++ b/arch/x86/kernel/sev_verify_cbit.S
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * sev_verify_cbit.S - Code for verification of the C-bit position reported
+ * by the Hypervisor when running with SEV enabled.
+ *
+ * Copyright (c) 2020 Joerg Roedel (jroedel@suse.de)
+ *
+ * sev_verify_cbit() is called before switching to a new long-mode page-table
+ * at boot.
+ *
+ * Verify that the C-bit position is correct by writing a random value to
+ * an encrypted memory location while on the current page-table. Then it
+ * switches to the new page-table to verify the memory content is still the
+ * same. After that it switches back to the current page-table and when the
+ * check succeeded it returns. If the check failed the code invalidates the
+ * stack pointer and goes into a hlt loop. The stack-pointer is invalidated to
+ * make sure no interrupt or exception can get the CPU out of the hlt loop.
+ *
+ * New page-table pointer is expected in %rdi (first parameter)
+ *
+ */
+SYM_FUNC_START(sev_verify_cbit)
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ /* First check if a C-bit was detected */
+ movq sme_me_mask(%rip), %rsi
+ testq %rsi, %rsi
+ jz 3f
+
+ /* sme_me_mask != 0 could mean SME or SEV - Check also for SEV */
+ movq sev_status(%rip), %rsi
+ testq %rsi, %rsi
+ jz 3f
+
+ /* Save CR4 in %rsi */
+ movq %cr4, %rsi
+
+ /* Disable Global Pages */
+ movq %rsi, %rdx
+ andq $(~X86_CR4_PGE), %rdx
+ movq %rdx, %cr4
+
+ /*
+ * Verified that running under SEV - now get a random value using
+ * RDRAND. This instruction is mandatory when running as an SEV guest.
+ *
+ * Don't bail out of the loop if RDRAND returns errors. It is better to
+ * prevent forward progress than to work with a non-random value here.
+ */
+1: rdrand %rdx
+ jnc 1b
+
+ /* Store value to memory and keep it in %rdx */
+ movq %rdx, sev_check_data(%rip)
+
+ /* Backup current %cr3 value to restore it later */
+ movq %cr3, %rcx
+
+ /* Switch to new %cr3 - This might unmap the stack */
+ movq %rdi, %cr3
+
+ /*
+ * Compare value in %rdx with memory location. If C-bit is incorrect
+ * this would read the encrypted data and make the check fail.
+ */
+ cmpq %rdx, sev_check_data(%rip)
+
+ /* Restore old %cr3 */
+ movq %rcx, %cr3
+
+ /* Restore previous CR4 */
+ movq %rsi, %cr4
+
+ /* Check CMPQ result */
+ je 3f
+
+ /*
+ * The check failed, prevent any forward progress to prevent ROP
+ * attacks, invalidate the stack and go into a hlt loop.
+ */
+ xorq %rsp, %rsp
+ subq $0x1000, %rsp
+2: hlt
+ jmp 2b
+3:
+#endif
+ /* Return page-table pointer */
+ movq %rdi, %rax
+ ret
+SYM_FUNC_END(sev_verify_cbit)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 037faac46b0c..1e299ac73c86 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -16,8 +16,6 @@
* to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
*/
-.weak memcpy
-
/*
* memcpy - Copy a memory block.
*
@@ -30,7 +28,7 @@
* rax original destination
*/
SYM_FUNC_START_ALIAS(__memcpy)
-SYM_FUNC_START_LOCAL(memcpy)
+SYM_FUNC_START_WEAK(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 7ff00ea64e4f..41902fe8b859 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -24,9 +24,7 @@
* Output:
* rax: dest
*/
-.weak memmove
-
-SYM_FUNC_START_ALIAS(memmove)
+SYM_FUNC_START_WEAK(memmove)
SYM_FUNC_START(__memmove)
mov %rdi, %rax
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 9ff15ee404a4..0bfd26e4ca9e 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -6,8 +6,6 @@
#include <asm/alternative-asm.h>
#include <asm/export.h>
-.weak memset
-
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
@@ -19,7 +17,7 @@
*
* rax original destination
*/
-SYM_FUNC_START_ALIAS(memset)
+SYM_FUNC_START_WEAK(memset)
SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index efbb3de472df..bc0833713be9 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -39,6 +39,7 @@
*/
u64 sme_me_mask __section(".data") = 0;
u64 sev_status __section(".data") = 0;
+u64 sev_check_data __section(".data") = 0;
EXPORT_SYMBOL(sme_me_mask);
DEFINE_STATIC_KEY_FALSE(sev_enable_key);
EXPORT_SYMBOL_GPL(sev_enable_key);
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index c6fc83efee0c..8731b7ad9308 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -89,8 +89,8 @@ static void __init free_highpages(void)
/* set highmem page free */
for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
&range_start, &range_end, NULL) {
- unsigned long start = PHYS_PFN(range_start);
- unsigned long end = PHYS_PFN(range_end);
+ unsigned long start = PFN_UP(range_start);
+ unsigned long end = PFN_DOWN(range_end);
/* Ignore complete lowmem entries */
if (end <= max_low)