summaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/entry/vdso/vma.c2
-rw-r--r--arch/x86/include/asm/desc.h162
-rw-r--r--arch/x86/include/asm/elf.h28
-rw-r--r--arch/x86/include/asm/fixmap.h4
-rw-r--r--arch/x86/include/asm/pgtable-2level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable-3level_types.h1
-rw-r--r--arch/x86/include/asm/pgtable.h26
-rw-r--r--arch/x86/include/asm/pgtable_64_types.h4
-rw-r--r--arch/x86/include/asm/pgtable_types.h30
-rw-r--r--arch/x86/include/asm/processor.h6
-rw-r--r--arch/x86/include/asm/stackprotector.h2
-rw-r--r--arch/x86/kernel/acpi/sleep.c2
-rw-r--r--arch/x86/kernel/apm_32.c6
-rw-r--r--arch/x86/kernel/cpu/common.c51
-rw-r--r--arch/x86/kernel/module.c1
-rw-r--r--arch/x86/kernel/setup_percpu.c2
-rw-r--r--arch/x86/kernel/smpboot.c2
-rw-r--r--arch/x86/kernel/sys_x86_64.c15
-rw-r--r--arch/x86/kernel/tboot.c6
-rw-r--r--arch/x86/kernel/vm86_32.c6
-rw-r--r--arch/x86/kvm/svm.c4
-rw-r--r--arch/x86/kvm/vmx.c12
-rw-r--r--arch/x86/mm/dump_pagetables.c1
-rw-r--r--arch/x86/mm/fault.c66
-rw-r--r--arch/x86/mm/gup.c33
-rw-r--r--arch/x86/mm/hugetlbpage.c9
-rw-r--r--arch/x86/mm/ident_map.c51
-rw-r--r--arch/x86/mm/init_32.c22
-rw-r--r--arch/x86/mm/ioremap.c3
-rw-r--r--arch/x86/mm/kasan_init_64.c1
-rw-r--r--arch/x86/mm/mmap.c125
-rw-r--r--arch/x86/mm/pgtable.c4
-rw-r--r--arch/x86/mm/pgtable_32.c8
-rw-r--r--arch/x86/platform/efi/efi_32.c4
-rw-r--r--arch/x86/platform/efi/efi_64.c13
-rw-r--r--arch/x86/power/cpu.c7
-rw-r--r--arch/x86/power/hibernate_32.c7
-rw-r--r--arch/x86/power/hibernate_64.c47
-rw-r--r--arch/x86/xen/enlighten.c5
-rw-r--r--arch/x86/xen/mmu.c1
-rw-r--r--arch/x86/xen/smp.c2
43 files changed, 600 insertions, 190 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index cd211a14a88f..c4d6833aacd9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -700,6 +700,13 @@ config ARCH_MMAP_RND_COMPAT_BITS
This value can be changed after boot using the
/proc/sys/vm/mmap_rnd_compat_bits tunable
+config HAVE_ARCH_COMPAT_MMAP_BASES
+ bool
+ help
+ This allows 64bit applications to invoke 32-bit mmap() syscall
+ and vice-versa 32-bit applications to call 64-bit mmap().
+ Required for applications doing different bitness syscalls.
+
config HAVE_COPY_THREAD_TLS
bool
help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index cc98d5a294ee..2bab9d093b51 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -106,6 +106,7 @@ config X86
select HAVE_ARCH_KMEMCHECK
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
+ select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 226ca70dc6bd..5c5d4d7618e6 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -354,7 +354,7 @@ static void vgetcpu_cpu_init(void *arg)
d.p = 1; /* Present */
d.d = 1; /* 32-bit */
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
}
static int vgetcpu_online(unsigned int cpu)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 1548ca92ad3f..ec05f9c1a62c 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -4,6 +4,7 @@
#include <asm/desc_defs.h>
#include <asm/ldt.h>
#include <asm/mmu.h>
+#include <asm/fixmap.h>
#include <linux/smp.h>
#include <linux/percpu.h>
@@ -38,6 +39,7 @@ extern struct desc_ptr idt_descr;
extern gate_desc idt_table[];
extern const struct desc_ptr debug_idt_descr;
extern gate_desc debug_idt_table[];
+extern pgprot_t pg_fixmap_gdt_flags;
struct gdt_page {
struct desc_struct gdt[GDT_ENTRIES];
@@ -45,11 +47,57 @@ struct gdt_page {
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+/* Provide the original GDT */
+static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
{
return per_cpu(gdt_page, cpu).gdt;
}
+static inline unsigned long get_cpu_gdt_rw_vaddr(unsigned int cpu)
+{
+ return (unsigned long)get_cpu_gdt_rw(cpu);
+}
+
+/* Provide the current original GDT */
+static inline struct desc_struct *get_current_gdt_rw(void)
+{
+ return this_cpu_ptr(&gdt_page)->gdt;
+}
+
+static inline unsigned long get_current_gdt_rw_vaddr(void)
+{
+ return (unsigned long)get_current_gdt_rw();
+}
+
+/* Get the fixmap index for a specific processor */
+static inline unsigned int get_cpu_gdt_ro_index(int cpu)
+{
+ return FIX_GDT_REMAP_BEGIN + cpu;
+}
+
+/* Provide the fixmap address of the remapped GDT */
+static inline struct desc_struct *get_cpu_gdt_ro(int cpu)
+{
+ unsigned int idx = get_cpu_gdt_ro_index(cpu);
+ return (struct desc_struct *)__fix_to_virt(idx);
+}
+
+static inline unsigned long get_cpu_gdt_ro_vaddr(int cpu)
+{
+ return (unsigned long)get_cpu_gdt_ro(cpu);
+}
+
+/* Provide the current read-only GDT */
+static inline struct desc_struct *get_current_gdt_ro(void)
+{
+ return get_cpu_gdt_ro(smp_processor_id());
+}
+
+static inline unsigned long get_current_gdt_ro_vaddr(void)
+{
+ return (unsigned long)get_current_gdt_ro();
+}
+
#ifdef CONFIG_X86_64
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -174,7 +222,7 @@ static inline void set_tssldt_descriptor(void *d, unsigned long addr, unsigned t
static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr)
{
- struct desc_struct *d = get_cpu_gdt_table(cpu);
+ struct desc_struct *d = get_cpu_gdt_rw(cpu);
tss_desc tss;
set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS,
@@ -194,22 +242,90 @@ static inline void native_set_ldt(const void *addr, unsigned int entries)
set_tssldt_descriptor(&ldt, (unsigned long)addr, DESC_LDT,
entries * LDT_ENTRY_SIZE - 1);
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT,
+ write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_LDT,
&ldt, DESC_LDT);
asm volatile("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
}
}
+static inline void native_load_gdt(const struct desc_ptr *dtr)
+{
+ asm volatile("lgdt %0"::"m" (*dtr));
+}
+
+static inline void native_load_idt(const struct desc_ptr *dtr)
+{
+ asm volatile("lidt %0"::"m" (*dtr));
+}
+
+static inline void native_store_gdt(struct desc_ptr *dtr)
+{
+ asm volatile("sgdt %0":"=m" (*dtr));
+}
+
+static inline void native_store_idt(struct desc_ptr *dtr)
+{
+ asm volatile("sidt %0":"=m" (*dtr));
+}
+
+/*
+ * The LTR instruction marks the TSS GDT entry as busy. On 64-bit, the GDT is
+ * a read-only remapping. To prevent a page fault, the GDT is switched to the
+ * original writeable version when needed.
+ */
+#ifdef CONFIG_X86_64
+static inline void native_load_tr_desc(void)
+{
+ struct desc_ptr gdt;
+ int cpu = raw_smp_processor_id();
+ bool restore = 0;
+ struct desc_struct *fixmap_gdt;
+
+ native_store_gdt(&gdt);
+ fixmap_gdt = get_cpu_gdt_ro(cpu);
+
+ /*
+ * If the current GDT is the read-only fixmap, swap to the original
+ * writeable version. Swap back at the end.
+ */
+ if (gdt.address == (unsigned long)fixmap_gdt) {
+ load_direct_gdt(cpu);
+ restore = 1;
+ }
+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
+ if (restore)
+ load_fixmap_gdt(cpu);
+}
+#else
static inline void native_load_tr_desc(void)
{
asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
}
+#endif
+
+static inline unsigned long native_store_tr(void)
+{
+ unsigned long tr;
+
+ asm volatile("str %0":"=r" (tr));
+
+ return tr;
+}
+
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
+{
+ struct desc_struct *gdt = get_cpu_gdt_rw(cpu);
+ unsigned int i;
+
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
+}
DECLARE_PER_CPU(bool, __tss_limit_invalid);
static inline void force_reload_TR(void)
{
- struct desc_struct *d = get_cpu_gdt_table(smp_processor_id());
+ struct desc_struct *d = get_current_gdt_rw();
tss_desc tss;
memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc));
@@ -257,44 +373,6 @@ static inline void invalidate_tss_limit(void)
this_cpu_write(__tss_limit_invalid, true);
}
-static inline void native_load_gdt(const struct desc_ptr *dtr)
-{
- asm volatile("lgdt %0"::"m" (*dtr));
-}
-
-static inline void native_load_idt(const struct desc_ptr *dtr)
-{
- asm volatile("lidt %0"::"m" (*dtr));
-}
-
-static inline void native_store_gdt(struct desc_ptr *dtr)
-{
- asm volatile("sgdt %0":"=m" (*dtr));
-}
-
-static inline void native_store_idt(struct desc_ptr *dtr)
-{
- asm volatile("sidt %0":"=m" (*dtr));
-}
-
-static inline unsigned long native_store_tr(void)
-{
- unsigned long tr;
-
- asm volatile("str %0":"=r" (tr));
-
- return tr;
-}
-
-static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(cpu);
- unsigned int i;
-
- for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
- gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
-}
-
/* This intentionally ignores lm, since 32-bit apps don't have that field. */
#define LDT_empty(info) \
((info)->base_addr == 0 && \
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 9d49c18b5ea9..d4d3ed456cb7 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -293,8 +293,23 @@ do { \
} \
} while (0)
+/*
+ * True on X86_32 or when emulating IA32 on X86_64
+ */
+static inline int mmap_is_ia32(void)
+{
+ return IS_ENABLED(CONFIG_X86_32) ||
+ (IS_ENABLED(CONFIG_COMPAT) &&
+ test_thread_flag(TIF_ADDR32));
+}
+
+extern unsigned long tasksize_32bit(void);
+extern unsigned long tasksize_64bit(void);
+extern unsigned long get_mmap_base(int is_legacy);
+
#ifdef CONFIG_X86_32
+#define __STACK_RND_MASK(is32bit) (0x7ff)
#define STACK_RND_MASK (0x7ff)
#define ARCH_DLINFO ARCH_DLINFO_IA32
@@ -304,7 +319,8 @@ do { \
#else /* CONFIG_X86_32 */
/* 1GB for 64bit, 8MB for 32bit */
-#define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff)
+#define __STACK_RND_MASK(is32bit) ((is32bit) ? 0x7ff : 0x3fffff)
+#define STACK_RND_MASK __STACK_RND_MASK(mmap_is_ia32())
#define ARCH_DLINFO \
do { \
@@ -348,16 +364,6 @@ extern int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
#define compat_arch_setup_additional_pages compat_arch_setup_additional_pages
-/*
- * True on X86_32 or when emulating IA32 on X86_64
- */
-static inline int mmap_is_ia32(void)
-{
- return IS_ENABLED(CONFIG_X86_32) ||
- (IS_ENABLED(CONFIG_COMPAT) &&
- test_thread_flag(TIF_ADDR32));
-}
-
/* Do not change the values. See get_align_mask() */
enum align_flags {
ALIGN_VA_32 = BIT(0),
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 8554f960e21b..b65155cc3760 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -100,6 +100,10 @@ enum fixed_addresses {
#ifdef CONFIG_X86_INTEL_MID
FIX_LNW_VRTC,
#endif
+ /* Fixmap entries to remap the GDTs, one per processor. */
+ FIX_GDT_REMAP_BEGIN,
+ FIX_GDT_REMAP_END = FIX_GDT_REMAP_BEGIN + NR_CPUS - 1,
+
__end_of_permanent_fixed_addresses,
/*
diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h
index 392576433e77..373ab1de909f 100644
--- a/arch/x86/include/asm/pgtable-2level_types.h
+++ b/arch/x86/include/asm/pgtable-2level_types.h
@@ -7,6 +7,7 @@
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index bcc89625ebe5..b8a4341faafa 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -7,6 +7,7 @@
typedef u64 pteval_t;
typedef u64 pmdval_t;
typedef u64 pudval_t;
+typedef u64 p4dval_t;
typedef u64 pgdval_t;
typedef u64 pgprotval_t;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 1cfb36b8c024..6f6f351e0a81 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -179,6 +179,17 @@ static inline unsigned long pud_pfn(pud_t pud)
return (pud_val(pud) & pud_pfn_mask(pud)) >> PAGE_SHIFT;
}
+static inline unsigned long p4d_pfn(p4d_t p4d)
+{
+ return (p4d_val(p4d) & p4d_pfn_mask(p4d)) >> PAGE_SHIFT;
+}
+
+static inline int p4d_large(p4d_t p4d)
+{
+ /* No 512 GiB pages yet */
+ return 0;
+}
+
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
static inline int pmd_large(pmd_t pte)
@@ -770,6 +781,16 @@ static inline int pud_large(pud_t pud)
}
#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+static inline unsigned long pud_index(unsigned long address)
+{
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
+}
+
+static inline unsigned long p4d_index(unsigned long address)
+{
+ return (address >> P4D_SHIFT) & (PTRS_PER_P4D - 1);
+}
+
#if CONFIG_PGTABLE_LEVELS > 3
static inline int pgd_present(pgd_t pgd)
{
@@ -788,11 +809,6 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
/* to find an entry in a page-table-directory. */
-static inline unsigned long pud_index(unsigned long address)
-{
- return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
-}
-
static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
{
return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 3a264200c62f..516593e66bd6 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -13,6 +13,7 @@
typedef unsigned long pteval_t;
typedef unsigned long pmdval_t;
typedef unsigned long pudval_t;
+typedef unsigned long p4dval_t;
typedef unsigned long pgdval_t;
typedef unsigned long pgprotval_t;
@@ -67,7 +68,8 @@ typedef struct { pteval_t pte; } pte_t;
#endif /* CONFIG_RANDOMIZE_MEMORY */
#define VMALLOC_END (VMALLOC_START + _AC((VMALLOC_SIZE_TB << 40) - 1, UL))
#define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
-#define MODULES_END _AC(0xffffffffff000000, UL)
+/* The module sections ends with the start of the fixmap */
+#define MODULES_END __fix_to_virt(__end_of_fixed_addresses + 1)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
#define ESPFIX_PGD_ENTRY _AC(-2, UL)
#define ESPFIX_BASE_ADDR (ESPFIX_PGD_ENTRY << PGDIR_SHIFT)
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 62484333673d..df08535f774a 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -272,9 +272,20 @@ static inline pgdval_t pgd_flags(pgd_t pgd)
return native_pgd_val(pgd) & PTE_FLAGS_MASK;
}
-#if CONFIG_PGTABLE_LEVELS > 3
+#if CONFIG_PGTABLE_LEVELS > 4
+
+#error FIXME
+
+#else
#include <asm-generic/5level-fixup.h>
+static inline p4dval_t native_p4d_val(p4d_t p4d)
+{
+ return native_pgd_val(p4d);
+}
+#endif
+
+#if CONFIG_PGTABLE_LEVELS > 3
typedef struct { pudval_t pud; } pud_t;
static inline pud_t native_make_pud(pmdval_t val)
@@ -318,6 +329,22 @@ static inline pmdval_t native_pmd_val(pmd_t pmd)
}
#endif
+static inline p4dval_t p4d_pfn_mask(p4d_t p4d)
+{
+ /* No 512 GiB huge pages yet */
+ return PTE_PFN_MASK;
+}
+
+static inline p4dval_t p4d_flags_mask(p4d_t p4d)
+{
+ return ~p4d_pfn_mask(p4d);
+}
+
+static inline p4dval_t p4d_flags(p4d_t p4d)
+{
+ return native_p4d_val(p4d) & p4d_flags_mask(p4d);
+}
+
static inline pudval_t pud_pfn_mask(pud_t pud)
{
if (native_pud_val(pud) & _PAGE_PSE)
@@ -461,6 +488,7 @@ enum pg_level {
PG_LEVEL_4K,
PG_LEVEL_2M,
PG_LEVEL_1G,
+ PG_LEVEL_512G,
PG_LEVEL_NUM
};
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca5407a..edf42c4ac8c8 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -716,6 +716,8 @@ extern struct desc_ptr early_gdt_descr;
extern void cpu_set_gdt(int);
extern void switch_to_new_gdt(int);
+extern void load_direct_gdt(int);
+extern void load_fixmap_gdt(int);
extern void load_percpu_segment(int);
extern void cpu_init(void);
@@ -797,6 +799,7 @@ static inline void spin_lock_prefetch(const void *x)
/*
* User space process size: 3GB (default).
*/
+#define IA32_PAGE_OFFSET PAGE_OFFSET
#define TASK_SIZE PAGE_OFFSET
#define TASK_SIZE_MAX TASK_SIZE
#define STACK_TOP TASK_SIZE
@@ -873,7 +876,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
* This decides where the kernel will search for a free chunk of vm
* space during mmap's.
*/
-#define TASK_UNMAPPED_BASE (PAGE_ALIGN(TASK_SIZE / 3))
+#define __TASK_UNMAPPED_BASE(task_size) (PAGE_ALIGN(task_size / 3))
+#define TASK_UNMAPPED_BASE __TASK_UNMAPPED_BASE(TASK_SIZE)
#define KSTK_EIP(task) (task_pt_regs(task)->ip)
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 58505f01962f..dcbd9bcce714 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -87,7 +87,7 @@ static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
- struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
+ struct desc_struct *gdt_table = get_cpu_gdt_rw(cpu);
struct desc_struct desc;
desc = gdt_table[GDT_ENTRY_STACK_CANARY];
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 48587335ede8..ed014814ea35 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,7 +101,7 @@ int x86_acpi_suspend_lowlevel(void)
#ifdef CONFIG_SMP
initial_stack = (unsigned long)temp_stack + sizeof(temp_stack);
early_gdt_descr.address =
- (unsigned long)get_cpu_gdt_table(smp_processor_id());
+ (unsigned long)get_cpu_gdt_rw(smp_processor_id());
initial_gs = per_cpu_offset(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 5a414545e8a3..446b0d3d4932 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -609,7 +609,7 @@ static long __apm_bios_call(void *_call)
cpu = get_cpu();
BUG_ON(cpu != 0);
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
@@ -685,7 +685,7 @@ static long __apm_bios_call_simple(void *_call)
cpu = get_cpu();
BUG_ON(cpu != 0);
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
save_desc_40 = gdt[0x40 / 8];
gdt[0x40 / 8] = bad_bios_desc;
@@ -2352,7 +2352,7 @@ static int __init apm_init(void)
* Note we only set APM segments on CPU zero, since we pin the APM
* code to that CPU.
*/
- gdt = get_cpu_gdt_table(0);
+ gdt = get_cpu_gdt_rw(0);
set_desc_base(&gdt[APM_CS >> 3],
(unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
set_desc_base(&gdt[APM_CS_16 >> 3],
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 58094a1f9e9d..f8e22dbad86c 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -449,18 +449,53 @@ void load_percpu_segment(int cpu)
}
/*
- * Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one.
+ * On 64-bit the GDT remapping is read-only.
+ * A global is used for Xen to change the default when required.
*/
-void switch_to_new_gdt(int cpu)
+#ifdef CONFIG_X86_64
+pgprot_t pg_fixmap_gdt_flags = PAGE_KERNEL_RO;
+#else
+pgprot_t pg_fixmap_gdt_flags = PAGE_KERNEL;
+#endif
+
+/* Setup the fixmap mapping only once per-processor */
+static inline void setup_fixmap_gdt(int cpu)
+{
+ __set_fixmap(get_cpu_gdt_ro_index(cpu),
+ __pa(get_cpu_gdt_rw(cpu)), pg_fixmap_gdt_flags);
+}
+
+/* Load the original GDT from the per-cpu structure */
+void load_direct_gdt(int cpu)
{
struct desc_ptr gdt_descr;
- gdt_descr.address = (long)get_cpu_gdt_table(cpu);
+ gdt_descr.address = (long)get_cpu_gdt_rw(cpu);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- /* Reload the per-cpu base */
+}
+EXPORT_SYMBOL_GPL(load_direct_gdt);
+
+/* Load a fixmap remapping of the per-cpu GDT */
+void load_fixmap_gdt(int cpu)
+{
+ struct desc_ptr gdt_descr;
+ gdt_descr.address = (long)get_cpu_gdt_ro(cpu);
+ gdt_descr.size = GDT_SIZE - 1;
+ load_gdt(&gdt_descr);
+}
+EXPORT_SYMBOL_GPL(load_fixmap_gdt);
+
+/*
+ * Current gdt points %fs at the "master" per-cpu area: after this,
+ * it's on the real one.
+ */
+void switch_to_new_gdt(int cpu)
+{
+ /* Load the original GDT */
+ load_direct_gdt(cpu);
+ /* Reload the per-cpu base */
load_percpu_segment(cpu);
}
@@ -1526,6 +1561,9 @@ void cpu_init(void)
if (is_uv_system())
uv_cpu_init();
+
+ setup_fixmap_gdt(cpu);
+ load_fixmap_gdt(cpu);
}
#else
@@ -1581,6 +1619,9 @@ void cpu_init(void)
dbg_restore_debug_regs();
fpu__init_cpu();
+
+ setup_fixmap_gdt(cpu);
+ load_fixmap_gdt(cpu);
}
#endif
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 477ae806c2fa..fad61caac75e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -35,6 +35,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
#include <asm/setup.h>
+#include <asm/fixmap.h>
#if 0
#define DEBUGP(fmt, ...) \
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 9820d6d977c6..11338b0b3ad2 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -160,7 +160,7 @@ static inline void setup_percpu_segment(int cpu)
pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
0x2 | DESCTYPE_S, 0x8);
gdt.s = 1;
- write_gdt_entry(get_cpu_gdt_table(cpu),
+ write_gdt_entry(get_cpu_gdt_rw(cpu),
GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
#endif
}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bd1f1ad35284..f04479a8f74f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -983,7 +983,7 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
unsigned long timeout;
idle->thread.sp = (unsigned long)task_pt_regs(idle);
- early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
+ early_gdt_descr.address = (unsigned long)get_cpu_gdt_rw(cpu);
initial_code = (unsigned long)start_secondary;
initial_stack = idle->thread.sp;
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index 50215a4b9347..207b8f2582c7 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -17,6 +17,8 @@
#include <linux/uaccess.h>
#include <linux/elf.h>
+#include <asm/elf.h>
+#include <asm/compat.h>
#include <asm/ia32.h>
#include <asm/syscalls.h>
@@ -101,7 +103,7 @@ out:
static void find_start_end(unsigned long flags, unsigned long *begin,
unsigned long *end)
{
- if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT)) {
+ if (!in_compat_syscall() && (flags & MAP_32BIT)) {
/* This is usually used needed to map code in small
model, so it needs to be in the first 31bit. Limit
it to that. This means we need to move the
@@ -114,10 +116,11 @@ static void find_start_end(unsigned long flags, unsigned long *begin,
if (current->flags & PF_RANDOMIZE) {
*begin = randomize_page(*begin, 0x02000000);
}
- } else {
- *begin = current->mm->mmap_legacy_base;
- *end = TASK_SIZE;
+ return;
}
+
+ *begin = get_mmap_base(1);
+ *end = in_compat_syscall() ? tasksize_32bit() : tasksize_64bit();
}
unsigned long
@@ -176,7 +179,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
return addr;
/* for MAP_32BIT mappings we force the legacy mmap base */
- if (!test_thread_flag(TIF_ADDR32) && (flags & MAP_32BIT))
+ if (!in_compat_syscall() && (flags & MAP_32BIT))
goto bottomup;
/* requesting a specific address */
@@ -191,7 +194,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = mm->mmap_base;
+ info.high_limit = get_mmap_base(0);
info.align_mask = 0;
info.align_offset = pgoff << PAGE_SHIFT;
if (filp) {
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index b868fa1b812b..5db0f33cbf2c 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -118,12 +118,16 @@ static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
pgprot_t prot)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
pgd = pgd_offset(&tboot_mm, vaddr);
- pud = pud_alloc(&tboot_mm, pgd, vaddr);
+ p4d = p4d_alloc(&tboot_mm, pgd, vaddr);
+ if (!p4d)
+ return -1;
+ pud = pud_alloc(&tboot_mm, p4d, vaddr);
if (!pud)
return -1;
pmd = pmd_alloc(&tboot_mm, pud, vaddr);
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 23ee89ce59a9..62597c300d94 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -164,6 +164,7 @@ static void mark_screen_rdonly(struct mm_struct *mm)
struct vm_area_struct *vma;
spinlock_t *ptl;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -173,7 +174,10 @@ static void mark_screen_rdonly(struct mm_struct *mm)
pgd = pgd_offset(mm, 0xA0000);
if (pgd_none_or_clear_bad(pgd))
goto out;
- pud = pud_offset(pgd, 0xA0000);
+ p4d = p4d_offset(pgd, 0xA0000);
+ if (p4d_none_or_clear_bad(p4d))
+ goto out;
+ pud = pud_offset(p4d, 0xA0000);
if (pud_none_or_clear_bad(pud))
goto out;
pmd = pmd_offset(pud, 0xA0000);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1efe2c62b3f..c02b9af2056a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -741,7 +741,6 @@ static int svm_hardware_enable(void)
struct svm_cpu_data *sd;
uint64_t efer;
- struct desc_ptr gdt_descr;
struct desc_struct *gdt;
int me = raw_smp_processor_id();
@@ -763,8 +762,7 @@ static int svm_hardware_enable(void)
sd->max_asid = cpuid_ebx(SVM_CPUID_FUNC) - 1;
sd->next_asid = sd->max_asid + 1;
- native_store_gdt(&gdt_descr);
- gdt = (struct desc_struct *)gdt_descr.address;
+ gdt = get_current_gdt_rw();
sd->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
wrmsrl(MSR_EFER, efer | EFER_SVME);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 98e82ee1e699..596a76d82b11 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -935,7 +935,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
* when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
*/
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
-static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
/*
* We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
@@ -2052,14 +2051,13 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
*/
static unsigned long segment_base(u16 selector)
{
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
struct desc_struct *table;
unsigned long v;
if (!(selector & ~SEGMENT_RPL_MASK))
return 0;
- table = (struct desc_struct *)gdt->address;
+ table = get_current_gdt_ro();
if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) {
u16 ldt_selector = kvm_read_ldt();
@@ -2164,7 +2162,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
#endif
if (vmx->host_state.msr_host_bndcfgs)
wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs);
- load_gdt(this_cpu_ptr(&host_gdt));
+ load_fixmap_gdt(raw_smp_processor_id());
}
static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -2266,7 +2264,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
if (!already_loaded) {
- struct desc_ptr *gdt = this_cpu_ptr(&host_gdt);
+ unsigned long gdt = get_current_gdt_ro_vaddr();
unsigned long sysenter_esp;
kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -2277,7 +2275,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
*/
vmcs_writel(HOST_TR_BASE,
(unsigned long)this_cpu_ptr(&cpu_tss));
- vmcs_writel(HOST_GDTR_BASE, gdt->address);
+ vmcs_writel(HOST_GDTR_BASE, gdt); /* 22.2.4 */
/*
* VM exits change the host TR limit to 0x67 after a VM
@@ -3465,8 +3463,6 @@ static int hardware_enable(void)
ept_sync_global();
}
- native_store_gdt(this_cpu_ptr(&host_gdt));
-
return 0;
}
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c
index 58b5bee7ea27..75efeecc85eb 100644
--- a/arch/x86/mm/dump_pagetables.c
+++ b/arch/x86/mm/dump_pagetables.c
@@ -20,6 +20,7 @@
#include <asm/kasan.h>
#include <asm/pgtable.h>
+#include <asm/fixmap.h>
/*
* The dumper groups pagetable entries of the same type into one, and for
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 428e31763cb9..8ad91a01cbc8 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -253,6 +253,7 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
unsigned index = pgd_index(address);
pgd_t *pgd_k;
+ p4d_t *p4d, *p4d_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
@@ -265,10 +266,15 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
/*
* set_pgd(pgd, *pgd_k); here would be useless on PAE
* and redundant with the set_pmd() on non-PAE. As would
- * set_pud.
+ * set_p4d/set_pud.
*/
- pud = pud_offset(pgd, address);
- pud_k = pud_offset(pgd_k, address);
+ p4d = p4d_offset(pgd, address);
+ p4d_k = p4d_offset(pgd_k, address);
+ if (!p4d_present(*p4d_k))
+ return NULL;
+
+ pud = pud_offset(p4d, address);
+ pud_k = pud_offset(p4d_k, address);
if (!pud_present(*pud_k))
return NULL;
@@ -384,6 +390,8 @@ static void dump_pagetable(unsigned long address)
{
pgd_t *base = __va(read_cr3());
pgd_t *pgd = &base[pgd_index(address)];
+ p4d_t *p4d;
+ pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -392,7 +400,9 @@ static void dump_pagetable(unsigned long address)
if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
goto out;
#endif
- pmd = pmd_offset(pud_offset(pgd, address), address);
+ p4d = p4d_offset(pgd, address);
+ pud = pud_offset(p4d, address);
+ pmd = pmd_offset(pud, address);
printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
/*
@@ -425,6 +435,7 @@ void vmalloc_sync_all(void)
static noinline int vmalloc_fault(unsigned long address)
{
pgd_t *pgd, *pgd_ref;
+ p4d_t *p4d, *p4d_ref;
pud_t *pud, *pud_ref;
pmd_t *pmd, *pmd_ref;
pte_t *pte, *pte_ref;
@@ -448,17 +459,37 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_ref);
arch_flush_lazy_mmu_mode();
- } else {
+ } else if (CONFIG_PGTABLE_LEVELS > 4) {
+ /*
+ * With folded p4d, pgd_none() is always false, so the pgd may
+ * point to an empty page table entry and pgd_page_vaddr()
+ * will return garbage.
+ *
+ * We will do the correct sanity check on the p4d level.
+ */
BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
}
+ /* With 4-level paging, copying happens on the p4d level. */
+ p4d = p4d_offset(pgd, address);
+ p4d_ref = p4d_offset(pgd_ref, address);
+ if (p4d_none(*p4d_ref))
+ return -1;
+
+ if (p4d_none(*p4d)) {
+ set_p4d(p4d, *p4d_ref);
+ arch_flush_lazy_mmu_mode();
+ } else {
+ BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_ref));
+ }
+
/*
* Below here mismatches are bugs because these lower tables
* are shared:
*/
- pud = pud_offset(pgd, address);
- pud_ref = pud_offset(pgd_ref, address);
+ pud = pud_offset(p4d, address);
+ pud_ref = pud_offset(p4d_ref, address);
if (pud_none(*pud_ref))
return -1;
@@ -526,6 +557,7 @@ static void dump_pagetable(unsigned long address)
{
pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
pgd_t *pgd = base + pgd_index(address);
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -538,7 +570,15 @@ static void dump_pagetable(unsigned long address)
if (!pgd_present(*pgd))
goto out;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (bad_address(p4d))
+ goto bad;
+
+ printk("P4D %lx ", p4d_val(*p4d));
+ if (!p4d_present(*p4d) || p4d_large(*p4d))
+ goto out;
+
+ pud = pud_offset(p4d, address);
if (bad_address(pud))
goto bad;
@@ -1082,6 +1122,7 @@ static noinline int
spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -1104,7 +1145,14 @@ spurious_fault(unsigned long error_code, unsigned long address)
if (!pgd_present(*pgd))
return 0;
- pud = pud_offset(pgd, address);
+ p4d = p4d_offset(pgd, address);
+ if (!p4d_present(*p4d))
+ return 0;
+
+ if (p4d_large(*p4d))
+ return spurious_fault_check(error_code, (pte_t *) p4d);
+
+ pud = pud_offset(p4d, address);
if (!pud_present(*pud))
return 0;
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 1f3b6ef105cd..456dfdfd2249 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -76,9 +76,9 @@ static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
}
/*
- * 'pteval' can come from a pte, pmd or pud. We only check
+ * 'pteval' can come from a pte, pmd, pud or p4d. We only check
* _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the
- * same value on all 3 types.
+ * same value on all 4 types.
*/
static inline int pte_allows_gup(unsigned long pteval, int write)
{
@@ -295,13 +295,13 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr,
return 1;
}
-static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+static int gup_pud_range(p4d_t p4d, unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pud_t *pudp;
- pudp = pud_offset(&pgd, addr);
+ pudp = pud_offset(&p4d, addr);
do {
pud_t pud = *pudp;
@@ -320,6 +320,27 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1;
}
+static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ p4d_t *p4dp;
+
+ p4dp = p4d_offset(&pgd, addr);
+ do {
+ p4d_t p4d = *p4dp;
+
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(p4d))
+ return 0;
+ BUILD_BUG_ON(p4d_large(p4d));
+ if (!gup_pud_range(p4d, addr, next, write, pages, nr))
+ return 0;
+ } while (p4dp++, addr = next, addr != end);
+
+ return 1;
+}
+
/*
* Like get_user_pages_fast() except its IRQ-safe in that it won't fall
* back to the regular GUP.
@@ -368,7 +389,7 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags);
@@ -440,7 +461,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
goto slow;
- if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
+ if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
goto slow;
} while (pgdp++, addr = next, addr != end);
local_irq_enable();
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index c5066a260803..302f43fd9c28 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -12,10 +12,12 @@
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
+#include <linux/compat.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>
+#include <asm/elf.h>
#if 0 /* This is just for testing */
struct page *
@@ -82,8 +84,9 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
info.flags = 0;
info.length = len;
- info.low_limit = current->mm->mmap_legacy_base;
- info.high_limit = TASK_SIZE;
+ info.low_limit = get_mmap_base(1);
+ info.high_limit = in_compat_syscall() ?
+ tasksize_32bit() : tasksize_64bit();
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area(&info);
@@ -100,7 +103,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = PAGE_SIZE;
- info.high_limit = current->mm->mmap_base;
+ info.high_limit = get_mmap_base(0);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area(&info);
diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c
index 4473cb4f8b90..04210a29dd60 100644
--- a/arch/x86/mm/ident_map.c
+++ b/arch/x86/mm/ident_map.c
@@ -45,6 +45,34 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page,
return 0;
}
+static int ident_p4d_init(struct x86_mapping_info *info, p4d_t *p4d_page,
+ unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+
+ for (; addr < end; addr = next) {
+ p4d_t *p4d = p4d_page + p4d_index(addr);
+ pud_t *pud;
+
+ next = (addr & P4D_MASK) + P4D_SIZE;
+ if (next > end)
+ next = end;
+
+ if (p4d_present(*p4d)) {
+ pud = pud_offset(p4d, 0);
+ ident_pud_init(info, pud, addr, next);
+ continue;
+ }
+ pud = (pud_t *)info->alloc_pgt_page(info->context);
+ if (!pud)
+ return -ENOMEM;
+ ident_pud_init(info, pud, addr, next);
+ set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
+ }
+
+ return 0;
+}
+
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
unsigned long pstart, unsigned long pend)
{
@@ -55,27 +83,36 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
for (; addr < end; addr = next) {
pgd_t *pgd = pgd_page + pgd_index(addr);
- pud_t *pud;
+ p4d_t *p4d;
next = (addr & PGDIR_MASK) + PGDIR_SIZE;
if (next > end)
next = end;
if (pgd_present(*pgd)) {
- pud = pud_offset(pgd, 0);
- result = ident_pud_init(info, pud, addr, next);
+ p4d = p4d_offset(pgd, 0);
+ result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
continue;
}
- pud = (pud_t *)info->alloc_pgt_page(info->context);
- if (!pud)
+ p4d = (p4d_t *)info->alloc_pgt_page(info->context);
+ if (!p4d)
return -ENOMEM;
- result = ident_pud_init(info, pud, addr, next);
+ result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
- set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
+ } else {
+ /*
+ * With p4d folded, pgd is equal to p4d.
+ * The pgd entry has to point to the pud page table in this case.
+ */
+ pud_t *pud = pud_offset(p4d, 0);
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
}
return 0;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2b4b53e6793f..5ed3c141bbd5 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -67,6 +67,7 @@ bool __read_mostly __vmalloc_start_set = false;
*/
static pmd_t * __init one_md_table_init(pgd_t *pgd)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd_table;
@@ -75,13 +76,15 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd)
pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
return pmd_table;
}
#endif
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
pmd_table = pmd_offset(pud, 0);
return pmd_table;
@@ -390,8 +393,11 @@ pte_t *kmap_pte;
static inline pte_t *kmap_get_fixmap_pte(unsigned long vaddr)
{
- return pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr),
- vaddr), vaddr), vaddr);
+ pgd_t *pgd = pgd_offset_k(vaddr);
+ p4d_t *p4d = p4d_offset(pgd, vaddr);
+ pud_t *pud = pud_offset(p4d, vaddr);
+ pmd_t *pmd = pmd_offset(pud, vaddr);
+ return pte_offset_kernel(pmd, vaddr);
}
static void __init kmap_init(void)
@@ -410,6 +416,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
{
unsigned long vaddr;
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -418,7 +425,8 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
pgd = swapper_pg_dir + pgd_index(vaddr);
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ pud = pud_offset(p4d, vaddr);
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
pkmap_page_table = pte;
@@ -450,6 +458,7 @@ void __init native_pagetable_init(void)
{
unsigned long pfn, va;
pgd_t *pgd, *base = swapper_pg_dir;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -469,7 +478,8 @@ void __init native_pagetable_init(void)
if (!pgd_present(*pgd))
break;
- pud = pud_offset(pgd, va);
+ p4d = p4d_offset(pgd, va);
+ pud = pud_offset(p4d, va);
pmd = pmd_offset(pud, va);
if (!pmd_present(*pmd))
break;
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 7aaa2635862d..a5e1cda85974 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -425,7 +425,8 @@ static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
/* Don't assume we're using swapper_pg_dir at this point */
pgd_t *base = __va(read_cr3());
pgd_t *pgd = &base[pgd_index(addr)];
- pud_t *pud = pud_offset(pgd, addr);
+ p4d_t *p4d = p4d_offset(pgd, addr);
+ pud_t *pud = pud_offset(p4d, addr);
pmd_t *pmd = pmd_offset(pud, addr);
return pmd;
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8d63d7a104c3..1bde19ef86bd 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -9,6 +9,7 @@
#include <asm/tlbflush.h>
#include <asm/sections.h>
+#include <asm/fixmap.h>
extern pgd_t early_level4_pgt[PTRS_PER_PGD];
extern struct range pfn_mapped[E820_X_MAX];
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 7940166c799b..19ad095b41df 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -30,30 +30,44 @@
#include <linux/limits.h>
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
+#include <linux/compat.h>
#include <asm/elf.h>
struct va_alignment __read_mostly va_align = {
.flags = -1,
};
-static unsigned long stack_maxrandom_size(void)
+unsigned long tasksize_32bit(void)
+{
+ return IA32_PAGE_OFFSET;
+}
+
+unsigned long tasksize_64bit(void)
+{
+ return TASK_SIZE_MAX;
+}
+
+static unsigned long stack_maxrandom_size(unsigned long task_size)
{
unsigned long max = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- max = ((-1UL) & STACK_RND_MASK) << PAGE_SHIFT;
+ max = (-1UL) & __STACK_RND_MASK(task_size == tasksize_32bit());
+ max <<= PAGE_SHIFT;
}
return max;
}
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave an at least ~128 MB hole with possible stack randomization.
- */
-#define MIN_GAP (128*1024*1024UL + stack_maxrandom_size())
-#define MAX_GAP (TASK_SIZE/6*5)
+#ifdef CONFIG_COMPAT
+# define mmap32_rnd_bits mmap_rnd_compat_bits
+# define mmap64_rnd_bits mmap_rnd_bits
+#else
+# define mmap32_rnd_bits mmap_rnd_bits
+# define mmap64_rnd_bits mmap_rnd_bits
+#endif
+
+#define SIZE_128M (128 * 1024 * 1024UL)
static int mmap_is_legacy(void)
{
@@ -66,54 +80,91 @@ static int mmap_is_legacy(void)
return sysctl_legacy_va_layout;
}
-unsigned long arch_mmap_rnd(void)
+static unsigned long arch_rnd(unsigned int rndbits)
{
- unsigned long rnd;
-
- if (mmap_is_ia32())
-#ifdef CONFIG_COMPAT
- rnd = get_random_long() & ((1UL << mmap_rnd_compat_bits) - 1);
-#else
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
-#endif
- else
- rnd = get_random_long() & ((1UL << mmap_rnd_bits) - 1);
+ return (get_random_long() & ((1UL << rndbits) - 1)) << PAGE_SHIFT;
+}
- return rnd << PAGE_SHIFT;
+unsigned long arch_mmap_rnd(void)
+{
+ if (!(current->flags & PF_RANDOMIZE))
+ return 0;
+ return arch_rnd(mmap_is_ia32() ? mmap32_rnd_bits : mmap64_rnd_bits);
}
-static unsigned long mmap_base(unsigned long rnd)
+static unsigned long mmap_base(unsigned long rnd, unsigned long task_size)
{
unsigned long gap = rlimit(RLIMIT_STACK);
+ unsigned long gap_min, gap_max;
+
+ /*
+ * Top of mmap area (just below the process stack).
+ * Leave an at least ~128 MB hole with possible stack randomization.
+ */
+ gap_min = SIZE_128M + stack_maxrandom_size(task_size);
+ gap_max = (task_size / 6) * 5;
- if (gap < MIN_GAP)
- gap = MIN_GAP;
- else if (gap > MAX_GAP)
- gap = MAX_GAP;
+ if (gap < gap_min)
+ gap = gap_min;
+ else if (gap > gap_max)
+ gap = gap_max;
+
+ return PAGE_ALIGN(task_size - gap - rnd);
+}
- return PAGE_ALIGN(TASK_SIZE - gap - rnd);
+static unsigned long mmap_legacy_base(unsigned long rnd,
+ unsigned long task_size)
+{
+ return __TASK_UNMAPPED_BASE(task_size) + rnd;
}
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
*/
+static void arch_pick_mmap_base(unsigned long *base, unsigned long *legacy_base,
+ unsigned long random_factor, unsigned long task_size)
+{
+ *legacy_base = mmap_legacy_base(random_factor, task_size);
+ if (mmap_is_legacy())
+ *base = *legacy_base;
+ else
+ *base = mmap_base(random_factor, task_size);
+}
+
void arch_pick_mmap_layout(struct mm_struct *mm)
{
- unsigned long random_factor = 0UL;
+ if (mmap_is_legacy())
+ mm->get_unmapped_area = arch_get_unmapped_area;
+ else
+ mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- if (current->flags & PF_RANDOMIZE)
- random_factor = arch_mmap_rnd();
+ arch_pick_mmap_base(&mm->mmap_base, &mm->mmap_legacy_base,
+ arch_rnd(mmap64_rnd_bits), tasksize_64bit());
+
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ /*
+ * The mmap syscall mapping base decision depends solely on the
+ * syscall type (64-bit or compat). This applies for 64bit
+ * applications and 32bit applications. The 64bit syscall uses
+ * mmap_base, the compat syscall uses mmap_compat_base.
+ */
+ arch_pick_mmap_base(&mm->mmap_compat_base, &mm->mmap_compat_legacy_base,
+ arch_rnd(mmap32_rnd_bits), tasksize_32bit());
+#endif
+}
- mm->mmap_legacy_base = TASK_UNMAPPED_BASE + random_factor;
+unsigned long get_mmap_base(int is_legacy)
+{
+ struct mm_struct *mm = current->mm;
- if (mmap_is_legacy()) {
- mm->mmap_base = mm->mmap_legacy_base;
- mm->get_unmapped_area = arch_get_unmapped_area;
- } else {
- mm->mmap_base = mmap_base(random_factor);
- mm->get_unmapped_area = arch_get_unmapped_area_topdown;
+#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES
+ if (in_compat_syscall()) {
+ return is_legacy ? mm->mmap_compat_legacy_base
+ : mm->mmap_compat_base;
}
+#endif
+ return is_legacy ? mm->mmap_legacy_base : mm->mmap_base;
}
const char *arch_vma_name(struct vm_area_struct *vma)
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 6cbdff26bb96..38b6daf72deb 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -261,13 +261,15 @@ static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
{
+ p4d_t *p4d;
pud_t *pud;
int i;
if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
return;
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
for (i = 0; i < PREALLOCATED_PMDS; i++, pud++) {
pmd_t *pmd = pmds[i];
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
index 9adce776852b..3d275a791c76 100644
--- a/arch/x86/mm/pgtable_32.c
+++ b/arch/x86/mm/pgtable_32.c
@@ -26,6 +26,7 @@ unsigned int __VMALLOC_RESERVE = 128 << 20;
void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
@@ -35,7 +36,12 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
BUG();
return;
}
- pud = pud_offset(pgd, vaddr);
+ p4d = p4d_offset(pgd, vaddr);
+ if (p4d_none(*p4d)) {
+ BUG();
+ return;
+ }
+ pud = pud_offset(p4d, vaddr);
if (pud_none(*pud)) {
BUG();
return;
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index cef39b097649..950071171436 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -68,7 +68,7 @@ pgd_t * __init efi_call_phys_prolog(void)
load_cr3(initial_page_table);
__flush_tlb_all();
- gdt_descr.address = __pa(get_cpu_gdt_table(0));
+ gdt_descr.address = __pa(get_cpu_gdt_rw(0));
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
@@ -79,7 +79,7 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd)
{
struct desc_ptr gdt_descr;
- gdt_descr.address = (unsigned long)get_cpu_gdt_table(0);
+ gdt_descr.address = (unsigned long)get_cpu_gdt_rw(0);
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index a4695da42d77..8544dae3d1b4 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -166,6 +166,7 @@ void efi_sync_low_kernel_mappings(void)
{
unsigned num_entries;
pgd_t *pgd_k, *pgd_efi;
+ p4d_t *p4d_k, *p4d_efi;
pud_t *pud_k, *pud_efi;
if (efi_enabled(EFI_OLD_MEMMAP))
@@ -197,16 +198,20 @@ void efi_sync_low_kernel_mappings(void)
BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0);
pgd_efi = efi_pgd + pgd_index(EFI_VA_END);
- pud_efi = pud_offset(pgd_efi, 0);
+ p4d_efi = p4d_offset(pgd_efi, 0);
+ pud_efi = pud_offset(p4d_efi, 0);
pgd_k = pgd_offset_k(EFI_VA_END);
- pud_k = pud_offset(pgd_k, 0);
+ p4d_k = p4d_offset(pgd_k, 0);
+ pud_k = pud_offset(p4d_k, 0);
num_entries = pud_index(EFI_VA_END);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
- pud_efi = pud_offset(pgd_efi, EFI_VA_START);
- pud_k = pud_offset(pgd_k, EFI_VA_START);
+ p4d_efi = p4d_offset(pgd_efi, EFI_VA_START);
+ pud_efi = pud_offset(p4d_efi, EFI_VA_START);
+ p4d_k = p4d_offset(pgd_k, EFI_VA_START);
+ pud_k = pud_offset(p4d_k, EFI_VA_START);
num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START);
memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 66ade16c7693..6b05a9219ea2 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -95,7 +95,7 @@ static void __save_processor_state(struct saved_context *ctxt)
* 'pmode_gdt' in wakeup_start.
*/
ctxt->gdt_desc.size = GDT_SIZE - 1;
- ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_table(smp_processor_id());
+ ctxt->gdt_desc.address = (unsigned long)get_cpu_gdt_rw(smp_processor_id());
store_tr(ctxt->tr);
@@ -162,7 +162,7 @@ static void fix_processor_context(void)
int cpu = smp_processor_id();
struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64
- struct desc_struct *desc = get_cpu_gdt_table(cpu);
+ struct desc_struct *desc = get_cpu_gdt_rw(cpu);
tss_desc tss;
#endif
set_tss_desc(cpu, t); /*
@@ -183,6 +183,9 @@ static void fix_processor_context(void)
load_mm_ldt(current->active_mm); /* This does lldt */
fpu__resume_cpu();
+
+ /* The processor is back on the direct GDT, load back the fixmap */
+ load_fixmap_gdt(cpu);
}
/**
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 9f14bd34581d..c35fdb585c68 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -32,6 +32,7 @@ pgd_t *resume_pg_dir;
*/
static pmd_t *resume_one_md_table_init(pgd_t *pgd)
{
+ p4d_t *p4d;
pud_t *pud;
pmd_t *pmd_table;
@@ -41,11 +42,13 @@ static pmd_t *resume_one_md_table_init(pgd_t *pgd)
return NULL;
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
BUG_ON(pmd_table != pmd_offset(pud, 0));
#else
- pud = pud_offset(pgd, 0);
+ p4d = p4d_offset(pgd, 0);
+ pud = pud_offset(p4d, 0);
pmd_table = pmd_offset(pud, 0);
#endif
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index ded2e8272382..2a9f993bbbf0 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -49,6 +49,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
{
pmd_t *pmd;
pud_t *pud;
+ p4d_t *p4d;
/*
* The new mapping only has to cover the page containing the image
@@ -63,6 +64,13 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
* the virtual address space after switching over to the original page
* tables used by the image kernel.
*/
+
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
+ if (!p4d)
+ return -ENOMEM;
+ }
+
pud = (pud_t *)get_safe_page(GFP_ATOMIC);
if (!pud)
return -ENOMEM;
@@ -75,8 +83,13 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
__pmd((jump_address_phys & PMD_MASK) | __PAGE_KERNEL_LARGE_EXEC));
set_pud(pud + pud_index(restore_jump_address),
__pud(__pa(pmd) | _KERNPG_TABLE));
- set_pgd(pgd + pgd_index(restore_jump_address),
- __pgd(__pa(pud) | _KERNPG_TABLE));
+ if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+ set_p4d(p4d + p4d_index(restore_jump_address), __p4d(__pa(pud) | _KERNPG_TABLE));
+ set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(p4d) | _KERNPG_TABLE));
+ } else {
+ /* No p4d for 4-level paging: point the pgd to the pud page table */
+ set_pgd(pgd + pgd_index(restore_jump_address), __pgd(__pa(pud) | _KERNPG_TABLE));
+ }
return 0;
}
@@ -124,7 +137,10 @@ static int set_up_temporary_mappings(void)
static int relocate_restore_code(void)
{
pgd_t *pgd;
+ p4d_t *p4d;
pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
relocated_restore_code = get_safe_page(GFP_ATOMIC);
if (!relocated_restore_code)
@@ -134,22 +150,25 @@ static int relocate_restore_code(void)
/* Make the page containing the relocated code executable */
pgd = (pgd_t *)__va(read_cr3()) + pgd_index(relocated_restore_code);
- pud = pud_offset(pgd, relocated_restore_code);
+ p4d = p4d_offset(pgd, relocated_restore_code);
+ if (p4d_large(*p4d)) {
+ set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
+ goto out;
+ }
+ pud = pud_offset(p4d, relocated_restore_code);
if (pud_large(*pud)) {
set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
- } else {
- pmd_t *pmd = pmd_offset(pud, relocated_restore_code);
-
- if (pmd_large(*pmd)) {
- set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
- } else {
- pte_t *pte = pte_offset_kernel(pmd, relocated_restore_code);
-
- set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
- }
+ goto out;
+ }
+ pmd = pmd_offset(pud, relocated_restore_code);
+ if (pmd_large(*pmd)) {
+ set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+ goto out;
}
+ pte = pte_offset_kernel(pmd, relocated_restore_code);
+ set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+out:
__flush_tlb_all();
-
return 0;
}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ec1d5c46e58f..08faa61de5f7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -710,7 +710,7 @@ static void load_TLS_descriptor(struct thread_struct *t,
*shadow = t->tls_array[i];
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
maddr = arbitrary_virt_to_machine(&gdt[GDT_ENTRY_TLS_MIN+i]);
mc = __xen_mc_entry(0);
@@ -1545,6 +1545,9 @@ asmlinkage __visible void __init xen_start_kernel(void)
*/
xen_initial_gdt = &per_cpu(gdt_page, 0);
+ /* GDT can only be remapped RO */
+ pg_fixmap_gdt_flags = PAGE_KERNEL_RO;
+
xen_smp_init();
#ifdef CONFIG_ACPI_NUMA
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 37cb5aad71de..ebbfe00133f7 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2326,6 +2326,7 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
#endif
case FIX_TEXT_POKE0:
case FIX_TEXT_POKE1:
+ case FIX_GDT_REMAP_BEGIN ... FIX_GDT_REMAP_END:
/* All local page mappings */
pte = pfn_pte(phys, prot);
break;
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 7ff2f1bfb7ec..eaa36162ed4a 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -392,7 +392,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
if (ctxt == NULL)
return -ENOMEM;
- gdt = get_cpu_gdt_table(cpu);
+ gdt = get_cpu_gdt_rw(cpu);
#ifdef CONFIG_X86_32
ctxt->user_regs.fs = __KERNEL_PERCPU;