diff options
Diffstat (limited to 'arch')
136 files changed, 1448 insertions, 1643 deletions
diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index bf8475ce85ee..baa152b9348e 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += export.h generic-y += irq_work.h diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 54d8616644e2..9d27a7d333dc 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1145,7 +1145,7 @@ struct rusage32 { SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) { struct rusage32 r; - cputime_t utime, stime; + u64 utime, stime; unsigned long utime_jiffies, stime_jiffies; if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN) @@ -1155,16 +1155,16 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) switch (who) { case RUSAGE_SELF: task_cputime(current, &utime, &stime); - utime_jiffies = cputime_to_jiffies(utime); - stime_jiffies = cputime_to_jiffies(stime); + utime_jiffies = nsecs_to_jiffies(utime); + stime_jiffies = nsecs_to_jiffies(stime); jiffies_to_timeval32(utime_jiffies, &r.ru_utime); jiffies_to_timeval32(stime_jiffies, &r.ru_stime); r.ru_minflt = current->min_flt; r.ru_majflt = current->maj_flt; break; case RUSAGE_CHILDREN: - utime_jiffies = cputime_to_jiffies(current->signal->cutime); - stime_jiffies = cputime_to_jiffies(current->signal->cstime); + utime_jiffies = nsecs_to_jiffies(current->signal->cutime); + stime_jiffies = nsecs_to_jiffies(current->signal->cstime); jiffies_to_timeval32(utime_jiffies, &r.ru_utime); jiffies_to_timeval32(stime_jiffies, &r.ru_stime); r.ru_minflt = current->signal->cmin_flt; diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index c332604606dd..63a04013d05a 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 028d2b70e3b5..f57ec511e7ae 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -824,6 +824,7 @@ CONFIG_QCOM_SMSM=y CONFIG_QCOM_WCNSS_CTRL=m CONFIG_ROCKCHIP_PM_DOMAINS=y CONFIG_COMMON_CLK_QCOM=y +CONFIG_QCOM_CLK_RPM=y CONFIG_CHROME_PLATFORMS=y CONFIG_STAGING_BOARD=y CONFIG_CROS_EC_CHARDEV=m diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index efb21757d41f..b14e8c7d71bd 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += bitsperlong.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += early_ioremap.h generic-y += emergency-restart.h diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 0b06f5341b45..e4e6a9d6a825 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -55,6 +55,7 @@ void efi_virtmap_unload(void); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) #define __efi_call_early(f, ...) f(__VA_ARGS__) +#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) #define efi_is_64bit() (false) #define efi_call_proto(protocol, f, instance, ...) \ diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h index 1f59ea051bab..b7e0125c0bbf 100644 --- a/arch/arm/include/asm/uaccess.h +++ b/arch/arm/include/asm/uaccess.h @@ -478,11 +478,10 @@ extern unsigned long __must_check arm_copy_from_user(void *to, const void __user *from, unsigned long n); static inline unsigned long __must_check -__copy_from_user(void *to, const void __user *from, unsigned long n) +__arch_copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned int __ua_flags; - check_object_size(to, n, false); __ua_flags = uaccess_save_and_enable(); n = arm_copy_from_user(to, from, n); uaccess_restore(__ua_flags); @@ -495,18 +494,15 @@ extern unsigned long __must_check __copy_to_user_std(void __user *to, const void *from, unsigned long n); static inline unsigned long __must_check -__copy_to_user(void __user *to, const void *from, unsigned long n) +__arch_copy_to_user(void __user *to, const void *from, unsigned long n) { #ifndef CONFIG_UACCESS_WITH_MEMCPY unsigned int __ua_flags; - - check_object_size(from, n, true); __ua_flags = uaccess_save_and_enable(); n = arm_copy_to_user(to, from, n); uaccess_restore(__ua_flags); return n; #else - check_object_size(from, n, true); return arm_copy_to_user(to, from, n); #endif } @@ -526,25 +522,49 @@ __clear_user(void __user *addr, unsigned long n) } #else -#define __copy_from_user(to, from, n) (memcpy(to, (void __force *)from, n), 0) -#define __copy_to_user(to, from, n) (memcpy((void __force *)to, from, n), 0) +#define __arch_copy_from_user(to, from, n) \ + (memcpy(to, (void __force *)from, n), 0) +#define __arch_copy_to_user(to, from, n) \ + (memcpy((void __force *)to, from, n), 0) #define __clear_user(addr, n) (memset((void __force *)addr, 0, n), 0) #endif -static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n) +static inline unsigned long __must_check +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + check_object_size(to, n, false); + return __arch_copy_from_user(to, from, n); +} + +static inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; + + check_object_size(to, n, false); + if (likely(access_ok(VERIFY_READ, from, n))) - res = __copy_from_user(to, from, n); + res = __arch_copy_from_user(to, from, n); if (unlikely(res)) memset(to + (n - res), 0, res); return res; } -static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n) +static inline unsigned long __must_check +__copy_to_user(void __user *to, const void *from, unsigned long n) { + check_object_size(from, n, true); + + return __arch_copy_to_user(to, from, n); +} + +static inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + check_object_size(from, n, true); + if (access_ok(VERIFY_WRITE, to, n)) - n = __copy_to_user(to, from, n); + n = __arch_copy_to_user(to, from, n); return n; } diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index 8ecfd15c3a02..df73914e81c8 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -67,7 +67,7 @@ ENTRY(__get_user_4) ENDPROC(__get_user_4) ENTRY(__get_user_8) - check_uaccess r0, 8, r1, r2, __get_user_bad + check_uaccess r0, 8, r1, r2, __get_user_bad8 #ifdef CONFIG_THUMB2_KERNEL 5: TUSER(ldr) r2, [r0] 6: TUSER(ldr) r3, [r0, #4] diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 2bb4b09f079e..ad7d604ff001 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -57,6 +57,7 @@ config ARCH_R7S72100 select PM select PM_GENERIC_DOMAINS select SYS_SUPPORTS_SH_MTU2 + select RENESAS_OSTM config ARCH_R8A73A4 bool "R-Mobile APE6 (R8A73A40)" diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 111742126897..f7dfd6d58659 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -96,7 +96,7 @@ config ARM64 select HAVE_RCU_TABLE_FREE select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES - select HAVE_KRETPROBES if HAVE_KPROBES + select HAVE_KRETPROBES select IOMMU_DMA if IOMMU_SUPPORT select IRQ_DOMAIN select IRQ_FORCED_THREADING diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 8365a84c2640..a12f1afc95a3 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += delay.h generic-y += div64.h generic-y += dma.h diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index eaa5bbe3fa87..b4b34004a21e 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -29,41 +29,29 @@ #include <clocksource/arm_arch_timer.h> -#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585) +#if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) extern struct static_key_false arch_timer_read_ool_enabled; -#define needs_fsl_a008585_workaround() \ +#define needs_unstable_timer_counter_workaround() \ static_branch_unlikely(&arch_timer_read_ool_enabled) #else -#define needs_fsl_a008585_workaround() false +#define needs_unstable_timer_counter_workaround() false #endif -u32 __fsl_a008585_read_cntp_tval_el0(void); -u32 __fsl_a008585_read_cntv_tval_el0(void); -u64 __fsl_a008585_read_cntvct_el0(void); -/* - * The number of retries is an arbitrary value well beyond the highest number - * of iterations the loop has been observed to take. - */ -#define __fsl_a008585_read_reg(reg) ({ \ - u64 _old, _new; \ - int _retries = 200; \ - \ - do { \ - _old = read_sysreg(reg); \ - _new = read_sysreg(reg); \ - _retries--; \ - } while (unlikely(_old != _new) && _retries); \ - \ - WARN_ON_ONCE(!_retries); \ - _new; \ -}) +struct arch_timer_erratum_workaround { + const char *id; /* Indicate the Erratum ID */ + u32 (*read_cntp_tval_el0)(void); + u32 (*read_cntv_tval_el0)(void); + u64 (*read_cntvct_el0)(void); +}; + +extern const struct arch_timer_erratum_workaround *timer_unstable_counter_workaround; #define arch_timer_reg_read_stable(reg) \ ({ \ u64 _val; \ - if (needs_fsl_a008585_workaround()) \ - _val = __fsl_a008585_read_##reg(); \ + if (needs_unstable_timer_counter_workaround()) \ + _val = timer_unstable_counter_workaround->read_##reg();\ else \ _val = read_sysreg(reg); \ _val; \ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 0b6b1633017f..e7445281e534 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -50,6 +50,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); #define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) #define __efi_call_early(f, ...) f(__VA_ARGS__) +#define efi_call_runtime(f, ...) sys_table_arg->runtime->f(__VA_ARGS__) #define efi_is_64bit() (true) #define efi_call_proto(protocol, f, instance, ...) \ diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 241b9b9729d8..3d7ef2c17a7c 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += delay.h generic-y += device.h generic-y += div64.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 2fb67b59d188..d6fa60b158be 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 64465e7e2245..4e9f57433f3a 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 1778805f6380..9f19e19bff9d 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -4,7 +4,6 @@ generic-y += barrier.h generic-y += bitsperlong.h generic-y += clkdev.h generic-y += cmpxchg.h -generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += errno.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 1fa084cf1a43..0f5b0d5d313c 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 373cb23301e3..5efd0c87f3c0 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index db8ddabc6bd2..a43a7c90e4af 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -6,7 +6,6 @@ generic-y += barrier.h generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index e2d3f5baf265..3d665c0627a8 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -18,11 +18,7 @@ #ifndef __IA64_CPUTIME_H #define __IA64_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -# include <asm-generic/cputime.h> -#else -# include <asm/processor.h> -# include <asm-generic/cputime_nsecs.h> +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE extern void arch_vtime_task_switch(struct task_struct *tsk); #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index c7026429816b..8742d741d19a 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -27,6 +27,12 @@ struct thread_info { mm_segment_t addr_limit; /* user-level address space limit */ int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + __u64 utime; + __u64 stime; + __u64 gtime; + __u64 hardirq_time; + __u64 softirq_time; + __u64 idle_time; __u64 ac_stamp; __u64 ac_leave; __u64 ac_stime; diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S index c9b5e942f671..3204fddc439c 100644 --- a/arch/ia64/kernel/head.S +++ b/arch/ia64/kernel/head.S @@ -1031,7 +1031,7 @@ GLOBAL_ENTRY(ia64_native_sched_clock) END(ia64_native_sched_clock) #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -GLOBAL_ENTRY(cycle_to_cputime) +GLOBAL_ENTRY(cycle_to_nsec) alloc r16=ar.pfs,1,0,0,0 addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 ;; @@ -1047,7 +1047,7 @@ GLOBAL_ENTRY(cycle_to_cputime) ;; shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT br.ret.sptk.many rp -END(cycle_to_cputime) +END(cycle_to_nsec) #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #ifdef CONFIG_IA64_BRL_EMU diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 7ec7acc844c2..c483ece3eb84 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -619,6 +619,8 @@ setup_arch (char **cmdline_p) check_sal_cache_flush(); #endif paging_init(); + + clear_sched_clock_stable(); } /* diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 71775b95d6cc..faa116822c4c 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -21,6 +21,7 @@ #include <linux/timex.h> #include <linux/timekeeper_internal.h> #include <linux/platform_device.h> +#include <linux/cputime.h> #include <asm/machvec.h> #include <asm/delay.h> @@ -59,18 +60,43 @@ static struct clocksource *itc_clocksource; #include <linux/kernel_stat.h> -extern cputime_t cycle_to_cputime(u64 cyc); +extern u64 cycle_to_nsec(u64 cyc); -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { - cputime_t delta_utime; struct thread_info *ti = task_thread_info(tsk); + u64 delta; - if (ti->ac_utime) { - delta_utime = cycle_to_cputime(ti->ac_utime); - account_user_time(tsk, delta_utime); - ti->ac_utime = 0; + if (ti->utime) + account_user_time(tsk, cycle_to_nsec(ti->utime)); + + if (ti->gtime) + account_guest_time(tsk, cycle_to_nsec(ti->gtime)); + + if (ti->idle_time) + account_idle_time(cycle_to_nsec(ti->idle_time)); + + if (ti->stime) { + delta = cycle_to_nsec(ti->stime); + account_system_index_time(tsk, delta, CPUTIME_SYSTEM); + } + + if (ti->hardirq_time) { + delta = cycle_to_nsec(ti->hardirq_time); + account_system_index_time(tsk, delta, CPUTIME_IRQ); + } + + if (ti->softirq_time) { + delta = cycle_to_nsec(ti->softirq_time)); + account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ); } + + ti->utime = 0; + ti->gtime = 0; + ti->idle_time = 0; + ti->stime = 0; + ti->hardirq_time = 0; + ti->softirq_time = 0; } /* @@ -83,7 +109,7 @@ void arch_vtime_task_switch(struct task_struct *prev) struct thread_info *pi = task_thread_info(prev); struct thread_info *ni = task_thread_info(current); - pi->ac_stamp = ni->ac_stamp; + ni->ac_stamp = pi->ac_stamp; ni->ac_stime = ni->ac_utime = 0; } @@ -91,18 +117,15 @@ void arch_vtime_task_switch(struct task_struct *prev) * Account time for a transition between system, hard irq or soft irq state. * Note that this function is called with interrupts enabled. */ -static cputime_t vtime_delta(struct task_struct *tsk) +static __u64 vtime_delta(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); - cputime_t delta_stime; - __u64 now; + __u64 now, delta_stime; WARN_ON_ONCE(!irqs_disabled()); now = ia64_get_itc(); - - delta_stime = cycle_to_cputime(ti->ac_stime + (now - ti->ac_stamp)); - ti->ac_stime = 0; + delta_stime = now - ti->ac_stamp; ti->ac_stamp = now; return delta_stime; @@ -110,15 +133,25 @@ static cputime_t vtime_delta(struct task_struct *tsk) void vtime_account_system(struct task_struct *tsk) { - cputime_t delta = vtime_delta(tsk); - - account_system_time(tsk, 0, delta); + struct thread_info *ti = task_thread_info(tsk); + __u64 stime = vtime_delta(tsk); + + if ((tsk->flags & PF_VCPU) && !irq_count()) + ti->gtime += stime; + else if (hardirq_count()) + ti->hardirq_time += stime; + else if (in_serving_softirq()) + ti->softirq_time += stime; + else + ti->stime += stime; } EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - account_idle_time(vtime_delta(tsk)); + struct thread_info *ti = task_thread_info(tsk); + + ti->idle_time += vtime_delta(tsk); } #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 860e440611c9..652100b64a71 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += kvm_para.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1f2e5d31cb24..6c76d6c24b3d 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += bitsperlong.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += emergency-restart.h generic-y += errno.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 167150c701d1..d3731f0db73b 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += dma.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index b0ae88c9fed9..6275eb051801 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += exec.h generic-y += irq_work.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 3269b742a75e..994b1c4392be 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,7 +1,6 @@ # MIPS headers generic-(CONFIG_GENERIC_CSUM) += checksum.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += dma-contiguous.h generic-y += emergency-restart.h diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 9c7f3e136d50..4a2ff3953b99 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -99,15 +99,7 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value) #undef TASK_SIZE #define TASK_SIZE TASK_SIZE32 -#undef cputime_to_timeval -#define cputime_to_timeval cputime_to_compat_timeval -static __inline__ void -cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) -{ - unsigned long jiffies = cputime_to_jiffies(cputime); - - value->tv_usec = (jiffies % HZ) * (1000000L / HZ); - value->tv_sec = jiffies / HZ; -} +#undef ns_to_timeval +#define ns_to_timeval ns_to_compat_timeval #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index 1ab34322dd97..3916404e7fd1 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -102,15 +102,7 @@ jiffies_to_compat_timeval(unsigned long jiffies, struct compat_timeval *value) #undef TASK_SIZE #define TASK_SIZE TASK_SIZE32 -#undef cputime_to_timeval -#define cputime_to_timeval cputime_to_compat_timeval -static __inline__ void -cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) -{ - unsigned long jiffies = cputime_to_jiffies(cputime); - - value->tv_usec = (jiffies % HZ) * (1000000L / HZ); - value->tv_sec = jiffies / HZ; -} +#undef ns_to_timeval +#define ns_to_timeval ns_to_compat_timeval #include "../../../fs/binfmt_elf.c" diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 1c8dd0f5cd5d..97f64c723a0c 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index d63330e88379..35b0e883761a 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -6,7 +6,6 @@ generic-y += bitsperlong.h generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 2832f031fb11..ef8d1ccc3e45 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += checksum.h generic-y += clkdev.h generic-y += cmpxchg-local.h generic-y += cmpxchg.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 91f53c07f410..4e179d770d69 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h diff --git a/arch/parisc/kernel/binfmt_elf32.c b/arch/parisc/kernel/binfmt_elf32.c index 00dc66f9c2ba..f2adcf33f8f2 100644 --- a/arch/parisc/kernel/binfmt_elf32.c +++ b/arch/parisc/kernel/binfmt_elf32.c @@ -91,14 +91,7 @@ struct elf_prpsinfo32 current->thread.map_base = DEFAULT_MAP_BASE32; \ current->thread.task_size = DEFAULT_TASK_SIZE32 \ -#undef cputime_to_timeval -#define cputime_to_timeval cputime_to_compat_timeval -static __inline__ void -cputime_to_compat_timeval(const cputime_t cputime, struct compat_timeval *value) -{ - unsigned long jiffies = cputime_to_jiffies(cputime); - value->tv_usec = (jiffies % HZ) * (1000000L / HZ); - value->tv_sec = jiffies / HZ; -} +#undef ns_to_timeval +#define ns_to_timeval ns_to_compat_timeval #include "../../../fs/binfmt_elf.c" diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 2e66a887788e..068ed3607bac 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -36,6 +36,7 @@ #undef PCI_DEBUG #include <linux/proc_fs.h> #include <linux/export.h> +#include <linux/sched.h> #include <asm/processor.h> #include <asm/sections.h> @@ -176,6 +177,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; /* we use do_take_over_console() later ! */ #endif + clear_sched_clock_stable(); } /* diff --git a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi index c744569a20e1..a97296c64eb2 100644 --- a/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/t2081si-post.dtsi @@ -678,5 +678,6 @@ compatible = "fsl,t2080-l2-cache-controller"; reg = <0xc20000 0x40000>; next-level-cache = <&cpc>; + interrupts = <16 2 1 9>; }; }; diff --git a/arch/powerpc/include/asm/accounting.h b/arch/powerpc/include/asm/accounting.h index c133246df467..3abcf98ed2e0 100644 --- a/arch/powerpc/include/asm/accounting.h +++ b/arch/powerpc/include/asm/accounting.h @@ -12,9 +12,17 @@ /* Stuff for accurate time accounting */ struct cpu_accounting_data { - unsigned long user_time; /* accumulated usermode TB ticks */ - unsigned long system_time; /* accumulated system TB ticks */ - unsigned long user_time_scaled; /* accumulated usermode SPURR ticks */ + /* Accumulated cputime values to flush on ticks*/ + unsigned long utime; + unsigned long stime; + unsigned long utime_scaled; + unsigned long stime_scaled; + unsigned long gtime; + unsigned long hardirq_time; + unsigned long softirq_time; + unsigned long steal_time; + unsigned long idle_time; + /* Internal counters */ unsigned long starttime; /* TB value snapshot */ unsigned long starttime_user; /* TB value on exit to usermode */ unsigned long startspurr; /* SPURR value snapshot */ diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index aa2e6a34b872..99b541865d8d 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -16,12 +16,7 @@ #ifndef __POWERPC_CPUTIME_H #define __POWERPC_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#include <asm-generic/cputime.h> -#ifdef __KERNEL__ -static inline void setup_cputime_one_jiffy(void) { } -#endif -#else +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include <linux/types.h> #include <linux/time.h> @@ -36,65 +31,6 @@ typedef u64 __nocast cputime64_t; #define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new) #ifdef __KERNEL__ - -/* - * One jiffy in timebase units computed during initialization - */ -extern cputime_t cputime_one_jiffy; - -/* - * Convert cputime <-> jiffies - */ -extern u64 __cputime_jiffies_factor; - -static inline unsigned long cputime_to_jiffies(const cputime_t ct) -{ - return mulhdu((__force u64) ct, __cputime_jiffies_factor); -} - -static inline cputime_t jiffies_to_cputime(const unsigned long jif) -{ - u64 ct; - unsigned long sec; - - /* have to be a little careful about overflow */ - ct = jif % HZ; - sec = jif / HZ; - if (ct) { - ct *= tb_ticks_per_sec; - do_div(ct, HZ); - } - if (sec) - ct += (cputime_t) sec * tb_ticks_per_sec; - return (__force cputime_t) ct; -} - -static inline void setup_cputime_one_jiffy(void) -{ - cputime_one_jiffy = jiffies_to_cputime(1); -} - -static inline cputime64_t jiffies64_to_cputime64(const u64 jif) -{ - u64 ct; - u64 sec = jif; - - /* have to be a little careful about overflow */ - ct = do_div(sec, HZ); - if (ct) { - ct *= tb_ticks_per_sec; - do_div(ct, HZ); - } - if (sec) - ct += (u64) sec * tb_ticks_per_sec; - return (__force cputime64_t) ct; -} - -static inline u64 cputime64_to_jiffies64(const cputime_t ct) -{ - return mulhdu((__force u64) ct, __cputime_jiffies_factor); -} - /* * Convert cputime <-> microseconds */ @@ -105,117 +41,6 @@ static inline unsigned long cputime_to_usecs(const cputime_t ct) return mulhdu((__force u64) ct, __cputime_usec_factor); } -static inline cputime_t usecs_to_cputime(const unsigned long us) -{ - u64 ct; - unsigned long sec; - - /* have to be a little careful about overflow */ - ct = us % 1000000; - sec = us / 1000000; - if (ct) { - ct *= tb_ticks_per_sec; - do_div(ct, 1000000); - } - if (sec) - ct += (cputime_t) sec * tb_ticks_per_sec; - return (__force cputime_t) ct; -} - -#define usecs_to_cputime64(us) usecs_to_cputime(us) - -/* - * Convert cputime <-> seconds - */ -extern u64 __cputime_sec_factor; - -static inline unsigned long cputime_to_secs(const cputime_t ct) -{ - return mulhdu((__force u64) ct, __cputime_sec_factor); -} - -static inline cputime_t secs_to_cputime(const unsigned long sec) -{ - return (__force cputime_t)((u64) sec * tb_ticks_per_sec); -} - -/* - * Convert cputime <-> timespec - */ -static inline void cputime_to_timespec(const cputime_t ct, struct timespec *p) -{ - u64 x = (__force u64) ct; - unsigned int frac; - - frac = do_div(x, tb_ticks_per_sec); - p->tv_sec = x; - x = (u64) frac * 1000000000; - do_div(x, tb_ticks_per_sec); - p->tv_nsec = x; -} - -static inline cputime_t timespec_to_cputime(const struct timespec *p) -{ - u64 ct; - - ct = (u64) p->tv_nsec * tb_ticks_per_sec; - do_div(ct, 1000000000); - return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec); -} - -/* - * Convert cputime <-> timeval - */ -static inline void cputime_to_timeval(const cputime_t ct, struct timeval *p) -{ - u64 x = (__force u64) ct; - unsigned int frac; - - frac = do_div(x, tb_ticks_per_sec); - p->tv_sec = x; - x = (u64) frac * 1000000; - do_div(x, tb_ticks_per_sec); - p->tv_usec = x; -} - -static inline cputime_t timeval_to_cputime(const struct timeval *p) -{ - u64 ct; - - ct = (u64) p->tv_usec * tb_ticks_per_sec; - do_div(ct, 1000000); - return (__force cputime_t)(ct + (u64) p->tv_sec * tb_ticks_per_sec); -} - -/* - * Convert cputime <-> clock_t (units of 1/USER_HZ seconds) - */ -extern u64 __cputime_clockt_factor; - -static inline unsigned long cputime_to_clock_t(const cputime_t ct) -{ - return mulhdu((__force u64) ct, __cputime_clockt_factor); -} - -static inline cputime_t clock_t_to_cputime(const unsigned long clk) -{ - u64 ct; - unsigned long sec; - - /* have to be a little careful about overflow */ - ct = clk % USER_HZ; - sec = clk / USER_HZ; - if (ct) { - ct *= tb_ticks_per_sec; - do_div(ct, USER_HZ); - } - if (sec) - ct += (u64) sec * tb_ticks_per_sec; - return (__force cputime_t) ct; -} - -#define cputime64_to_clock_t(ct) cputime_to_clock_t((cputime_t)(ct)) - /* * PPC64 uses PACA which is task independent for storing accounting data while * PPC32 uses struct thread_info, therefore at task switch the accounting data diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 6a6792bb39fb..708c3e592eeb 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -187,7 +187,6 @@ struct paca_struct { /* Stuff for accurate time accounting */ struct cpu_accounting_data accounting; - u64 stolen_time; /* TB ticks taken by hypervisor */ u64 dtl_ridx; /* read index in dispatch log */ struct dtl_entry *dtl_curr; /* pointer corresponding to dtl_ridx */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 195a9fc8f81c..9e8e771f8acb 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -249,9 +249,9 @@ int main(void) DEFINE(ACCOUNT_STARTTIME_USER, offsetof(struct paca_struct, accounting.starttime_user)); DEFINE(ACCOUNT_USER_TIME, - offsetof(struct paca_struct, accounting.user_time)); + offsetof(struct paca_struct, accounting.utime)); DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct paca_struct, accounting.system_time)); + offsetof(struct paca_struct, accounting.stime)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); @@ -262,9 +262,9 @@ int main(void) DEFINE(ACCOUNT_STARTTIME_USER, offsetof(struct thread_info, accounting.starttime_user)); DEFINE(ACCOUNT_USER_TIME, - offsetof(struct thread_info, accounting.user_time)); + offsetof(struct thread_info, accounting.utime)); DEFINE(ACCOUNT_SYSTEM_TIME, - offsetof(struct thread_info, accounting.system_time)); + offsetof(struct thread_info, accounting.stime)); #endif #endif /* CONFIG_PPC64 */ diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc2e08d415fa..14e485525e31 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -57,6 +57,7 @@ #include <linux/clk-provider.h> #include <linux/suspend.h> #include <linux/rtc.h> +#include <linux/cputime.h> #include <asm/trace.h> #include <asm/io.h> @@ -72,7 +73,6 @@ #include <asm/smp.h> #include <asm/vdso_datapage.h> #include <asm/firmware.h> -#include <asm/cputime.h> #include <asm/asm-prototypes.h> /* powerpc clocksource/clockevent code */ @@ -152,20 +152,11 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* - * Factors for converting from cputime_t (timebase ticks) to - * jiffies, microseconds, seconds, and clock_t (1/USER_HZ seconds). - * These are all stored as 0.64 fixed-point binary fractions. + * Factor for converting from cputime_t (timebase ticks) to + * microseconds. This is stored as 0.64 fixed-point binary fraction. */ -u64 __cputime_jiffies_factor; -EXPORT_SYMBOL(__cputime_jiffies_factor); u64 __cputime_usec_factor; EXPORT_SYMBOL(__cputime_usec_factor); -u64 __cputime_sec_factor; -EXPORT_SYMBOL(__cputime_sec_factor); -u64 __cputime_clockt_factor; -EXPORT_SYMBOL(__cputime_clockt_factor); - -cputime_t cputime_one_jiffy; #ifdef CONFIG_PPC_SPLPAR void (*dtl_consumer)(struct dtl_entry *, u64); @@ -181,14 +172,8 @@ static void calc_cputime_factors(void) { struct div_result res; - div128_by_32(HZ, 0, tb_ticks_per_sec, &res); - __cputime_jiffies_factor = res.result_low; div128_by_32(1000000, 0, tb_ticks_per_sec, &res); __cputime_usec_factor = res.result_low; - div128_by_32(1, 0, tb_ticks_per_sec, &res); - __cputime_sec_factor = res.result_low; - div128_by_32(USER_HZ, 0, tb_ticks_per_sec, &res); - __cputime_clockt_factor = res.result_low; } /* @@ -271,25 +256,19 @@ void accumulate_stolen_time(void) sst = scan_dispatch_log(acct->starttime_user); ust = scan_dispatch_log(acct->starttime); - acct->system_time -= sst; - acct->user_time -= ust; - local_paca->stolen_time += ust + sst; + acct->stime -= sst; + acct->utime -= ust; + acct->steal_time += ust + sst; local_paca->soft_enabled = save_soft_enabled; } static inline u64 calculate_stolen_time(u64 stop_tb) { - u64 stolen = 0; + if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) + return scan_dispatch_log(stop_tb); - if (get_paca()->dtl_ridx != be64_to_cpu(get_lppaca()->dtl_idx)) { - stolen = scan_dispatch_log(stop_tb); - get_paca()->accounting.system_time -= stolen; - } - - stolen += get_paca()->stolen_time; - get_paca()->stolen_time = 0; - return stolen; + return 0; } #else /* CONFIG_PPC_SPLPAR */ @@ -305,28 +284,27 @@ static inline u64 calculate_stolen_time(u64 stop_tb) * or soft irq state. */ static unsigned long vtime_delta(struct task_struct *tsk, - unsigned long *sys_scaled, - unsigned long *stolen) + unsigned long *stime_scaled, + unsigned long *steal_time) { unsigned long now, nowscaled, deltascaled; - unsigned long udelta, delta, user_scaled; + unsigned long stime; + unsigned long utime, utime_scaled; struct cpu_accounting_data *acct = get_accounting(tsk); WARN_ON_ONCE(!irqs_disabled()); now = mftb(); nowscaled = read_spurr(now); - acct->system_time += now - acct->starttime; + stime = now - acct->starttime; acct->starttime = now; deltascaled = nowscaled - acct->startspurr; acct->startspurr = nowscaled; - *stolen = calculate_stolen_time(now); + *steal_time = calculate_stolen_time(now); - delta = acct->system_time; - acct->system_time = 0; - udelta = acct->user_time - acct->utime_sspurr; - acct->utime_sspurr = acct->user_time; + utime = acct->utime - acct->utime_sspurr; + acct->utime_sspurr = acct->utime; /* * Because we don't read the SPURR on every kernel entry/exit, @@ -338,62 +316,105 @@ static unsigned long vtime_delta(struct task_struct *tsk, * the user ticks get saved up in paca->user_time_scaled to be * used by account_process_tick. */ - *sys_scaled = delta; - user_scaled = udelta; - if (deltascaled != delta + udelta) { - if (udelta) { - *sys_scaled = deltascaled * delta / (delta + udelta); - user_scaled = deltascaled - *sys_scaled; + *stime_scaled = stime; + utime_scaled = utime; + if (deltascaled != stime + utime) { + if (utime) { + *stime_scaled = deltascaled * stime / (stime + utime); + utime_scaled = deltascaled - *stime_scaled; } else { - *sys_scaled = deltascaled; + *stime_scaled = deltascaled; } } - acct->user_time_scaled += user_scaled; + acct->utime_scaled += utime_scaled; - return delta; + return stime; } void vtime_account_system(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); + + stime = vtime_delta(tsk, &stime_scaled, &steal_time); - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_system_time(tsk, 0, delta); - tsk->stimescaled += sys_scaled; - if (stolen) - account_steal_time(stolen); + stime -= min(stime, steal_time); + acct->steal_time += steal_time; + + if ((tsk->flags & PF_VCPU) && !irq_count()) { + acct->gtime += stime; + acct->utime_scaled += stime_scaled; + } else { + if (hardirq_count()) + acct->hardirq_time += stime; + else if (in_serving_softirq()) + acct->softirq_time += stime; + else + acct->stime += stime; + + acct->stime_scaled += stime_scaled; + } } EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { - unsigned long delta, sys_scaled, stolen; + unsigned long stime, stime_scaled, steal_time; + struct cpu_accounting_data *acct = get_accounting(tsk); - delta = vtime_delta(tsk, &sys_scaled, &stolen); - account_idle_time(delta + stolen); + stime = vtime_delta(tsk, &stime_scaled, &steal_time); + acct->idle_time += stime + steal_time; } /* - * Transfer the user time accumulated in the paca - * by the exception entry and exit code to the generic - * process user time records. + * Account the whole cputime accumulated in the paca * Must be called with interrupts disabled. * Assumes that vtime_account_system/idle() has been called * recently (i.e. since the last entry from usermode) so that * get_paca()->user_time_scaled is up to date. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { - cputime_t utime, utimescaled; struct cpu_accounting_data *acct = get_accounting(tsk); - utime = acct->user_time; - utimescaled = acct->user_time_scaled; - acct->user_time = 0; - acct->user_time_scaled = 0; + if (acct->utime) + account_user_time(tsk, cputime_to_nsecs(acct->utime)); + + if (acct->utime_scaled) + tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); + + if (acct->gtime) + account_guest_time(tsk, cputime_to_nsecs(acct->gtime)); + + if (acct->steal_time) + account_steal_time(cputime_to_nsecs(acct->steal_time)); + + if (acct->idle_time) + account_idle_time(cputime_to_nsecs(acct->idle_time)); + + if (acct->stime) + account_system_index_time(tsk, cputime_to_nsecs(acct->stime), + CPUTIME_SYSTEM); + if (acct->stime_scaled) + tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); + + if (acct->hardirq_time) + account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time), + CPUTIME_IRQ); + if (acct->softirq_time) + account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time), + CPUTIME_SOFTIRQ); + + acct->utime = 0; + acct->utime_scaled = 0; acct->utime_sspurr = 0; - account_user_time(tsk, utime); - tsk->utimescaled += utimescaled; + acct->gtime = 0; + acct->steal_time = 0; + acct->idle_time = 0; + acct->stime = 0; + acct->stime_scaled = 0; + acct->hardirq_time = 0; + acct->softirq_time = 0; } #ifdef CONFIG_PPC32 @@ -407,8 +428,7 @@ void arch_vtime_task_switch(struct task_struct *prev) struct cpu_accounting_data *acct = get_accounting(current); acct->starttime = get_accounting(prev)->starttime; - acct->system_time = 0; - acct->user_time = 0; + acct->startspurr = get_accounting(prev)->startspurr; } #endif /* CONFIG_PPC32 */ @@ -1018,7 +1038,6 @@ void __init time_init(void) tb_ticks_per_sec = ppc_tb_freq; tb_ticks_per_usec = ppc_tb_freq / 1000000; calc_cputime_factors(); - setup_cputime_one_jiffy(); /* * Compute scale factor for sched_clock. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 93abf8a9813d..8e1588021d1c 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -347,7 +347,8 @@ early_param("disable_radix", parse_disable_radix); void __init mmu_early_init_devtree(void) { /* Disable radix mode based on kernel command line. */ - if (disable_radix) + /* We don't yet have the machinery to do radix as a guest. */ + if (disable_radix || !(mfmsr() & MSR_HV)) cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; if (early_radix_enabled()) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 9c0e17cf6886..3f864c36d847 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2287,14 +2287,14 @@ static void dump_one_paca(int cpu) DUMP(p, subcore_sibling_mask, "x"); #endif - DUMP(p, accounting.user_time, "llx"); - DUMP(p, accounting.system_time, "llx"); - DUMP(p, accounting.user_time_scaled, "llx"); + DUMP(p, accounting.utime, "llx"); + DUMP(p, accounting.stime, "llx"); + DUMP(p, accounting.utime_scaled, "llx"); DUMP(p, accounting.starttime, "llx"); DUMP(p, accounting.starttime_user, "llx"); DUMP(p, accounting.startspurr, "llx"); DUMP(p, accounting.utime_sspurr, "llx"); - DUMP(p, stolen_time, "llx"); + DUMP(p, accounting.steal_time, "llx"); #undef DUMP catch_memory_errors = 0; diff --git a/arch/s390/appldata/appldata_os.c b/arch/s390/appldata/appldata_os.c index 69b23b25ac34..08b9e942a262 100644 --- a/arch/s390/appldata/appldata_os.c +++ b/arch/s390/appldata/appldata_os.c @@ -113,21 +113,21 @@ static void appldata_get_os_data(void *data) j = 0; for_each_online_cpu(i) { os_data->os_cpu[j].per_cpu_user = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_USER]); os_data->os_cpu[j].per_cpu_nice = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_NICE]); os_data->os_cpu[j].per_cpu_system = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SYSTEM]); os_data->os_cpu[j].per_cpu_idle = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IDLE]); os_data->os_cpu[j].per_cpu_irq = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IRQ]); os_data->os_cpu[j].per_cpu_softirq = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_SOFTIRQ]); os_data->os_cpu[j].per_cpu_iowait = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_IOWAIT]); os_data->os_cpu[j].per_cpu_steal = - cputime_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); + nsecs_to_jiffies(kcpustat_cpu(i).cpustat[CPUTIME_STEAL]); os_data->os_cpu[j].cpu_id = i; j++; } diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index 221b454c734a..d1c407ddf703 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -25,33 +25,6 @@ static inline unsigned long __div(unsigned long long n, unsigned long base) return n / base; } -#define cputime_one_jiffy jiffies_to_cputime(1) - -/* - * Convert cputime to jiffies and back. - */ -static inline unsigned long cputime_to_jiffies(const cputime_t cputime) -{ - return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / HZ); -} - -static inline cputime_t jiffies_to_cputime(const unsigned int jif) -{ - return (__force cputime_t)(jif * (CPUTIME_PER_SEC / HZ)); -} - -static inline u64 cputime64_to_jiffies64(cputime64_t cputime) -{ - unsigned long long jif = (__force unsigned long long) cputime; - do_div(jif, CPUTIME_PER_SEC / HZ); - return jif; -} - -static inline cputime64_t jiffies64_to_cputime64(const u64 jif) -{ - return (__force cputime64_t)(jif * (CPUTIME_PER_SEC / HZ)); -} - /* * Convert cputime to microseconds and back. */ @@ -60,88 +33,8 @@ static inline unsigned int cputime_to_usecs(const cputime_t cputime) return (__force unsigned long long) cputime >> 12; } -static inline cputime_t usecs_to_cputime(const unsigned int m) -{ - return (__force cputime_t)(m * CPUTIME_PER_USEC); -} - -#define usecs_to_cputime64(m) usecs_to_cputime(m) - -/* - * Convert cputime to milliseconds and back. - */ -static inline unsigned int cputime_to_secs(const cputime_t cputime) -{ - return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / 2) >> 1; -} - -static inline cputime_t secs_to_cputime(const unsigned int s) -{ - return (__force cputime_t)(s * CPUTIME_PER_SEC); -} - -/* - * Convert cputime to timespec and back. - */ -static inline cputime_t timespec_to_cputime(const struct timespec *value) -{ - unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC; - return (__force cputime_t)(ret + __div(value->tv_nsec * CPUTIME_PER_USEC, NSEC_PER_USEC)); -} - -static inline void cputime_to_timespec(const cputime_t cputime, - struct timespec *value) -{ - unsigned long long __cputime = (__force unsigned long long) cputime; - value->tv_nsec = (__cputime % CPUTIME_PER_SEC) * NSEC_PER_USEC / CPUTIME_PER_USEC; - value->tv_sec = __cputime / CPUTIME_PER_SEC; -} - -/* - * Convert cputime to timeval and back. - * Since cputime and timeval have the same resolution (microseconds) - * this is easy. - */ -static inline cputime_t timeval_to_cputime(const struct timeval *value) -{ - unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC; - return (__force cputime_t)(ret + value->tv_usec * CPUTIME_PER_USEC); -} - -static inline void cputime_to_timeval(const cputime_t cputime, - struct timeval *value) -{ - unsigned long long __cputime = (__force unsigned long long) cputime; - value->tv_usec = (__cputime % CPUTIME_PER_SEC) / CPUTIME_PER_USEC; - value->tv_sec = __cputime / CPUTIME_PER_SEC; -} - -/* - * Convert cputime to clock and back. - */ -static inline clock_t cputime_to_clock_t(cputime_t cputime) -{ - unsigned long long clock = (__force unsigned long long) cputime; - do_div(clock, CPUTIME_PER_SEC / USER_HZ); - return clock; -} - -static inline cputime_t clock_t_to_cputime(unsigned long x) -{ - return (__force cputime_t)(x * (CPUTIME_PER_SEC / USER_HZ)); -} - -/* - * Convert cputime64 to clock. - */ -static inline clock_t cputime64_to_clock_t(cputime64_t cputime) -{ - unsigned long long clock = (__force unsigned long long) cputime; - do_div(clock, CPUTIME_PER_SEC / USER_HZ); - return clock; -} -cputime64_t arch_cpu_idle_time(int cpu); +u64 arch_cpu_idle_time(int cpu); #define arch_idle_time(cpu) arch_cpu_idle_time(cpu) diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 9bfad2ad6312..61261e0e95c0 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -85,53 +85,56 @@ struct lowcore { __u64 mcck_enter_timer; /* 0x02c0 */ __u64 exit_timer; /* 0x02c8 */ __u64 user_timer; /* 0x02d0 */ - __u64 system_timer; /* 0x02d8 */ - __u64 steal_timer; /* 0x02e0 */ - __u64 last_update_timer; /* 0x02e8 */ - __u64 last_update_clock; /* 0x02f0 */ - __u64 int_clock; /* 0x02f8 */ - __u64 mcck_clock; /* 0x0300 */ - __u64 clock_comparator; /* 0x0308 */ + __u64 guest_timer; /* 0x02d8 */ + __u64 system_timer; /* 0x02e0 */ + __u64 hardirq_timer; /* 0x02e8 */ + __u64 softirq_timer; /* 0x02f0 */ + __u64 steal_timer; /* 0x02f8 */ + __u64 last_update_timer; /* 0x0300 */ + __u64 last_update_clock; /* 0x0308 */ + __u64 int_clock; /* 0x0310 */ + __u64 mcck_clock; /* 0x0318 */ + __u64 clock_comparator; /* 0x0320 */ /* Current process. */ - __u64 current_task; /* 0x0310 */ - __u8 pad_0x318[0x320-0x318]; /* 0x0318 */ - __u64 kernel_stack; /* 0x0320 */ + __u64 current_task; /* 0x0328 */ + __u8 pad_0x318[0x320-0x318]; /* 0x0330 */ + __u64 kernel_stack; /* 0x0338 */ /* Interrupt, panic and restart stack. */ - __u64 async_stack; /* 0x0328 */ - __u64 panic_stack; /* 0x0330 */ - __u64 restart_stack; /* 0x0338 */ + __u64 async_stack; /* 0x0340 */ + __u64 panic_stack; /* 0x0348 */ + __u64 restart_stack; /* 0x0350 */ /* Restart function and parameter. */ - __u64 restart_fn; /* 0x0340 */ - __u64 restart_data; /* 0x0348 */ - __u64 restart_source; /* 0x0350 */ + __u64 restart_fn; /* 0x0358 */ + __u64 restart_data; /* 0x0360 */ + __u64 restart_source; /* 0x0368 */ /* Address space pointer. */ - __u64 kernel_asce; /* 0x0358 */ - __u64 user_asce; /* 0x0360 */ + __u64 kernel_asce; /* 0x0370 */ + __u64 user_asce; /* 0x0378 */ /* * The lpp and current_pid fields form a * 64-bit value that is set as program * parameter with the LPP instruction. */ - __u32 lpp; /* 0x0368 */ - __u32 current_pid; /* 0x036c */ + __u32 lpp; /* 0x0380 */ + __u32 current_pid; /* 0x0384 */ /* SMP info area */ - __u32 cpu_nr; /* 0x0370 */ - __u32 softirq_pending; /* 0x0374 */ - __u64 percpu_offset; /* 0x0378 */ - __u64 vdso_per_cpu_data; /* 0x0380 */ - __u64 machine_flags; /* 0x0388 */ - __u32 preempt_count; /* 0x0390 */ - __u8 pad_0x0394[0x0398-0x0394]; /* 0x0394 */ - __u64 gmap; /* 0x0398 */ - __u32 spinlock_lockval; /* 0x03a0 */ - __u32 fpu_flags; /* 0x03a4 */ - __u8 pad_0x03a8[0x0400-0x03a8]; /* 0x03a8 */ + __u32 cpu_nr; /* 0x0388 */ + __u32 softirq_pending; /* 0x038c */ + __u64 percpu_offset; /* 0x0390 */ + __u64 vdso_per_cpu_data; /* 0x0398 */ + __u64 machine_flags; /* 0x03a0 */ + __u32 preempt_count; /* 0x03a8 */ + __u8 pad_0x03ac[0x03b0-0x03ac]; /* 0x03ac */ + __u64 gmap; /* 0x03b0 */ + __u32 spinlock_lockval; /* 0x03b8 */ + __u32 fpu_flags; /* 0x03bc */ + __u8 pad_0x03c0[0x0400-0x03c0]; /* 0x03c0 */ /* Per cpu primary space access list */ __u32 paste[16]; /* 0x0400 */ diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 6bca916a5ba0..977a5b6501b8 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -111,7 +111,10 @@ struct thread_struct { unsigned int acrs[NUM_ACRS]; unsigned long ksp; /* kernel stack pointer */ unsigned long user_timer; /* task cputime in user space */ + unsigned long guest_timer; /* task cputime in kvm guest */ unsigned long system_timer; /* task cputime in kernel space */ + unsigned long hardirq_timer; /* task cputime in hardirq context */ + unsigned long softirq_timer; /* task cputime in softirq context */ unsigned long sys_call_table; /* system call table address */ mm_segment_t mm_segment; unsigned long gmap_addr; /* address of last gmap fault. */ diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 7a55c29b0b33..d3bf69ef42cf 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -12,7 +12,7 @@ #include <linux/notifier.h> #include <linux/init.h> #include <linux/cpu.h> -#include <asm/cputime.h> +#include <linux/cputime.h> #include <asm/nmi.h> #include <asm/smp.h> #include "entry.h" @@ -43,7 +43,7 @@ void enabled_wait(void) idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; idle->idle_time += idle_time; idle->idle_count++; - account_idle_time(idle_time); + account_idle_time(cputime_to_nsecs(idle_time)); write_seqcount_end(&idle->seqcount); } NOKPROBE_SYMBOL(enabled_wait); @@ -84,7 +84,7 @@ static ssize_t show_idle_time(struct device *dev, } DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL); -cputime64_t arch_cpu_idle_time(int cpu) +u64 arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); unsigned long long now, idle_enter, idle_exit; @@ -96,7 +96,8 @@ cputime64_t arch_cpu_idle_time(int cpu) idle_enter = ACCESS_ONCE(idle->clock_idle_enter); idle_exit = ACCESS_ONCE(idle->clock_idle_exit); } while (read_seqcount_retry(&idle->seqcount, seq)); - return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; + + return cputime_to_nsecs(idle_enter ? ((idle_exit ?: now) - idle_enter) : 0); } void arch_cpu_idle_enter(void) diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 1b5c5ee9fc1b..b4a3e9e06ef2 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -6,13 +6,13 @@ */ #include <linux/kernel_stat.h> +#include <linux/cputime.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/timex.h> #include <linux/types.h> #include <linux/time.h> -#include <asm/cputime.h> #include <asm/vtimer.h> #include <asm/vtime.h> #include <asm/cpu_mf.h> @@ -90,14 +90,41 @@ static void update_mt_scaling(void) __this_cpu_write(mt_scaling_jiffies, jiffies_64); } +static inline u64 update_tsk_timer(unsigned long *tsk_vtime, u64 new) +{ + u64 delta; + + delta = new - *tsk_vtime; + *tsk_vtime = new; + return delta; +} + + +static inline u64 scale_vtime(u64 vtime) +{ + u64 mult = __this_cpu_read(mt_scaling_mult); + u64 div = __this_cpu_read(mt_scaling_div); + + if (smp_cpu_mtid) + return vtime * mult / div; + return vtime; +} + +static void account_system_index_scaled(struct task_struct *p, + cputime_t cputime, cputime_t scaled, + enum cpu_usage_stat index) +{ + p->stimescaled += cputime_to_nsecs(scaled); + account_system_index_time(p, cputime_to_nsecs(cputime), index); +} + /* * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ static int do_account_vtime(struct task_struct *tsk) { - u64 timer, clock, user, system, steal; - u64 user_scaled, system_scaled; + u64 timer, clock, user, guest, system, hardirq, softirq, steal; timer = S390_lowcore.last_update_timer; clock = S390_lowcore.last_update_clock; @@ -110,53 +137,76 @@ static int do_account_vtime(struct task_struct *tsk) #endif : "=m" (S390_lowcore.last_update_timer), "=m" (S390_lowcore.last_update_clock)); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - S390_lowcore.steal_timer += S390_lowcore.last_update_clock - clock; + clock = S390_lowcore.last_update_clock - clock; + timer -= S390_lowcore.last_update_timer; + + if (hardirq_count()) + S390_lowcore.hardirq_timer += timer; + else + S390_lowcore.system_timer += timer; /* Update MT utilization calculation */ if (smp_cpu_mtid && time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) update_mt_scaling(); - user = S390_lowcore.user_timer - tsk->thread.user_timer; - S390_lowcore.steal_timer -= user; - tsk->thread.user_timer = S390_lowcore.user_timer; - - system = S390_lowcore.system_timer - tsk->thread.system_timer; - S390_lowcore.steal_timer -= system; - tsk->thread.system_timer = S390_lowcore.system_timer; - - user_scaled = user; - system_scaled = system; - /* Do MT utilization scaling */ - if (smp_cpu_mtid) { - u64 mult = __this_cpu_read(mt_scaling_mult); - u64 div = __this_cpu_read(mt_scaling_div); + /* Calculate cputime delta */ + user = update_tsk_timer(&tsk->thread.user_timer, + READ_ONCE(S390_lowcore.user_timer)); + guest = update_tsk_timer(&tsk->thread.guest_timer, + READ_ONCE(S390_lowcore.guest_timer)); + system = update_tsk_timer(&tsk->thread.system_timer, + READ_ONCE(S390_lowcore.system_timer)); + hardirq = update_tsk_timer(&tsk->thread.hardirq_timer, + READ_ONCE(S390_lowcore.hardirq_timer)); + softirq = update_tsk_timer(&tsk->thread.softirq_timer, + READ_ONCE(S390_lowcore.softirq_timer)); + S390_lowcore.steal_timer += + clock - user - guest - system - hardirq - softirq; + + /* Push account value */ + if (user) { + account_user_time(tsk, cputime_to_nsecs(user)); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(user)); + } - user_scaled = (user_scaled * mult) / div; - system_scaled = (system_scaled * mult) / div; + if (guest) { + account_guest_time(tsk, cputime_to_nsecs(guest)); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest)); } - account_user_time(tsk, user); - tsk->utimescaled += user_scaled; - account_system_time(tsk, 0, system); - tsk->stimescaled += system_scaled; + + if (system) + account_system_index_scaled(tsk, system, scale_vtime(system), + CPUTIME_SYSTEM); + if (hardirq) + account_system_index_scaled(tsk, hardirq, scale_vtime(hardirq), + CPUTIME_IRQ); + if (softirq) + account_system_index_scaled(tsk, softirq, scale_vtime(softirq), + CPUTIME_SOFTIRQ); steal = S390_lowcore.steal_timer; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; - account_steal_time(steal); + account_steal_time(cputime_to_nsecs(steal)); } - return virt_timer_forward(user + system); + return virt_timer_forward(user + guest + system + hardirq + softirq); } void vtime_task_switch(struct task_struct *prev) { do_account_vtime(prev); prev->thread.user_timer = S390_lowcore.user_timer; + prev->thread.guest_timer = S390_lowcore.guest_timer; prev->thread.system_timer = S390_lowcore.system_timer; + prev->thread.hardirq_timer = S390_lowcore.hardirq_timer; + prev->thread.softirq_timer = S390_lowcore.softirq_timer; S390_lowcore.user_timer = current->thread.user_timer; + S390_lowcore.guest_timer = current->thread.guest_timer; S390_lowcore.system_timer = current->thread.system_timer; + S390_lowcore.hardirq_timer = current->thread.hardirq_timer; + S390_lowcore.softirq_timer = current->thread.softirq_timer; } /* @@ -164,7 +214,7 @@ void vtime_task_switch(struct task_struct *prev) * accounting system time in order to correctly compute * the stolen time accounting. */ -void vtime_account_user(struct task_struct *tsk) +void vtime_flush(struct task_struct *tsk) { if (do_account_vtime(tsk)) virt_timer_expire(); @@ -176,32 +226,22 @@ void vtime_account_user(struct task_struct *tsk) */ void vtime_account_irq_enter(struct task_struct *tsk) { - u64 timer, system, system_scaled; + u64 timer; timer = S390_lowcore.last_update_timer; S390_lowcore.last_update_timer = get_vtimer(); - S390_lowcore.system_timer += timer - S390_lowcore.last_update_timer; - - /* Update MT utilization calculation */ - if (smp_cpu_mtid && - time_after64(jiffies_64, this_cpu_read(mt_scaling_jiffies))) - update_mt_scaling(); - - system = S390_lowcore.system_timer - tsk->thread.system_timer; - S390_lowcore.steal_timer -= system; - tsk->thread.system_timer = S390_lowcore.system_timer; - system_scaled = system; - /* Do MT utilization scaling */ - if (smp_cpu_mtid) { - u64 mult = __this_cpu_read(mt_scaling_mult); - u64 div = __this_cpu_read(mt_scaling_div); - - system_scaled = (system_scaled * mult) / div; - } - account_system_time(tsk, 0, system); - tsk->stimescaled += system_scaled; - - virt_timer_forward(system); + timer -= S390_lowcore.last_update_timer; + + if ((tsk->flags & PF_VCPU) && (irq_count() == 0)) + S390_lowcore.guest_timer += timer; + else if (hardirq_count()) + S390_lowcore.hardirq_timer += timer; + else if (in_serving_softirq()) + S390_lowcore.softirq_timer += timer; + else + S390_lowcore.system_timer += timer; + + virt_timer_forward(timer); } EXPORT_SYMBOL_GPL(vtime_account_irq_enter); diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index a05218ff3fe4..51970bb6c4fe 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,7 +4,6 @@ header-y += generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 751c3373a92c..cf2a75063b53 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += bitsperlong.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += delay.h generic-y += div64.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 0569bfac4afb..e9e837bc3158 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += div64.h generic-y += emergency-restart.h generic-y += exec.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 2d1f5638974c..aa48b6eaff2d 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -4,8 +4,6 @@ header-y += ../arch/ generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h -generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h new file mode 100644 index 000000000000..9f765cdf09a5 --- /dev/null +++ b/arch/tile/include/asm/div64.h @@ -0,0 +1,16 @@ +#ifndef _ASM_TILE_DIV64_H +#define _ASM_TILE_DIV64_H + +#include <linux/types.h> + +#ifdef __tilegx__ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return __insn_mul_lu_lu(a, b); +} +#define mul_u32_u32 mul_u32_u32 +#endif + +#include <asm-generic/div64.h> + +#endif /* _ASM_TILE_DIV64_H */ diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 05523f14d7b2..57f03050c850 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -76,7 +76,7 @@ static ssize_t rng_dev_read (struct file *filp, char __user *buf, size_t size, add_sigio_fd(random_fd); add_wait_queue(&host_read_wait, &wait); - set_task_state(current, TASK_INTERRUPTIBLE); + set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(&host_read_wait, &wait); diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 052f7f6d0551..90c281cd7e1d 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += bug.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 256c45b3ae34..5d51ade89f4c 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -4,7 +4,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..f8fbfc5a98ba 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1070,7 +1070,7 @@ config X86_MCE_THRESHOLD def_bool y config X86_MCE_INJECT - depends on X86_MCE + depends on X86_MCE && X86_LOCAL_APIC tristate "Machine check injector support" ---help--- Provide support for injecting machine checks for testing purposes. @@ -1994,10 +1994,6 @@ config RANDOMIZE_BASE theoretically possible, but the implementations are further limited due to memory layouts. - If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot - time. To enable it, boot with "kaslr" on the kernel command - line (which will also disable hibernation). - If unsure, say N. # Relocation on x86 needs some additional build support diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index e5612f3e3b57..9b42b6d1e902 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -333,6 +333,7 @@ size_t strnlen(const char *s, size_t maxlen); unsigned int atou(const char *s); unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); size_t strlen(const char *s); +char *strchr(const char *s, int c); /* tty.c */ void puts(const char *); diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff01c8fc76f7..801c7a158e55 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -32,160 +32,13 @@ static void setup_boot_services##bits(struct efi_config *c) \ \ table = (typeof(table))sys_table; \ \ + c->runtime_services = table->runtime; \ c->boot_services = table->boottime; \ c->text_output = table->con_out; \ } BOOT_SERVICES(32); BOOT_SERVICES(64); -void efi_char16_printk(efi_system_table_t *, efi_char16_t *); - -static efi_status_t -__file_size32(void *__fh, efi_char16_t *filename_16, - void **handle, u64 *file_sz) -{ - efi_file_handle_32_t *h, *fh = __fh; - efi_file_info_t *info; - efi_status_t status; - efi_guid_t info_guid = EFI_FILE_INFO_ID; - u32 info_sz; - - status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, - EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to open file: "); - efi_char16_printk(sys_table, filename_16); - efi_printk(sys_table, "\n"); - return status; - } - - *handle = h; - - info_sz = 0; - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table, "Failed to get file info size\n"); - return status; - } - -grow: - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - info_sz, (void **)&info); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to alloc mem for file info\n"); - return status; - } - - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, info); - if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_early(free_pool, info); - goto grow; - } - - *file_sz = info->file_size; - efi_call_early(free_pool, info); - - if (status != EFI_SUCCESS) - efi_printk(sys_table, "Failed to get initrd info\n"); - - return status; -} - -static efi_status_t -__file_size64(void *__fh, efi_char16_t *filename_16, - void **handle, u64 *file_sz) -{ - efi_file_handle_64_t *h, *fh = __fh; - efi_file_info_t *info; - efi_status_t status; - efi_guid_t info_guid = EFI_FILE_INFO_ID; - u64 info_sz; - - status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, - EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to open file: "); - efi_char16_printk(sys_table, filename_16); - efi_printk(sys_table, "\n"); - return status; - } - - *handle = h; - - info_sz = 0; - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table, "Failed to get file info size\n"); - return status; - } - -grow: - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - info_sz, (void **)&info); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to alloc mem for file info\n"); - return status; - } - - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, info); - if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_early(free_pool, info); - goto grow; - } - - *file_sz = info->file_size; - efi_call_early(free_pool, info); - - if (status != EFI_SUCCESS) - efi_printk(sys_table, "Failed to get initrd info\n"); - - return status; -} -efi_status_t -efi_file_size(efi_system_table_t *sys_table, void *__fh, - efi_char16_t *filename_16, void **handle, u64 *file_sz) -{ - if (efi_early->is64) - return __file_size64(__fh, filename_16, handle, file_sz); - - return __file_size32(__fh, filename_16, handle, file_sz); -} - -efi_status_t -efi_file_read(void *handle, unsigned long *size, void *addr) -{ - unsigned long func; - - if (efi_early->is64) { - efi_file_handle_64_t *fh = handle; - - func = (unsigned long)fh->read; - return efi_early->call(func, handle, size, addr); - } else { - efi_file_handle_32_t *fh = handle; - - func = (unsigned long)fh->read; - return efi_early->call(func, handle, size, addr); - } -} - -efi_status_t efi_file_close(void *handle) -{ - if (efi_early->is64) { - efi_file_handle_64_t *fh = handle; - - return efi_early->call((unsigned long)fh->close, handle); - } else { - efi_file_handle_32_t *fh = handle; - - return efi_early->call((unsigned long)fh->close, handle); - } -} - static inline efi_status_t __open_volume32(void *__image, void **__fh) { efi_file_io_interface_t *io; @@ -249,30 +102,8 @@ efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) { - unsigned long output_string; - size_t offset; - - if (efi_early->is64) { - struct efi_simple_text_output_protocol_64 *out; - u64 *func; - - offset = offsetof(typeof(*out), output_string); - output_string = efi_early->text_output + offset; - out = (typeof(out))(unsigned long)efi_early->text_output; - func = (u64 *)output_string; - - efi_early->call(*func, out, str); - } else { - struct efi_simple_text_output_protocol_32 *out; - u32 *func; - - offset = offsetof(typeof(*out), output_string); - output_string = efi_early->text_output + offset; - out = (typeof(out))(unsigned long)efi_early->text_output; - func = (u32 *)output_string; - - efi_early->call(*func, out, str); - } + efi_call_proto(efi_simple_text_output_protocol, output_string, + efi_early->text_output, str); } static efi_status_t @@ -1157,6 +988,13 @@ struct boot_params *efi_main(struct efi_config *c, else setup_boot_services32(efi_early); + /* + * If the boot loader gave us a value for secure_boot then we use that, + * otherwise we ask the BIOS. + */ + if (boot_params->secure_boot == efi_secureboot_mode_unset) + boot_params->secure_boot = efi_get_secureboot(sys_table); + setup_graphics(boot_params); setup_efi_pci(boot_params); diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index fd0b6a272dd5..d85b9625e836 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -82,7 +82,7 @@ ENTRY(efi_pe_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 32(%eax) + add %esi, 40(%eax) pushl %eax call make_boot_params @@ -108,7 +108,7 @@ ENTRY(efi32_stub_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 32(%eax) + add %esi, 40(%eax) pushl %eax 2: call efi_main @@ -264,7 +264,7 @@ relocated: #ifdef CONFIG_EFI_STUB .data efi32_config: - .fill 4,8,0 + .fill 5,8,0 .long efi_call_phys .long 0 .byte 0 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4d85e600db78..d2ae1f821e0c 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -264,7 +264,7 @@ ENTRY(efi_pe_entry) /* * Relocate efi_config->call(). */ - addq %rbp, efi64_config+32(%rip) + addq %rbp, efi64_config+40(%rip) movq %rax, %rdi call make_boot_params @@ -284,7 +284,7 @@ handover_entry: * Relocate efi_config->call(). */ movq efi_config(%rip), %rax - addq %rbp, 32(%rax) + addq %rbp, 40(%rax) 2: movq efi_config(%rip), %rdi call efi_main @@ -456,14 +456,14 @@ efi_config: #ifdef CONFIG_EFI_MIXED .global efi32_config efi32_config: - .fill 4,8,0 + .fill 5,8,0 .quad efi64_thunk .byte 0 #endif .global efi64_config efi64_config: - .fill 4,8,0 + .fill 5,8,0 .quad efi_call .byte 1 #endif /* CONFIG_EFI_STUB */ diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index a66854d99ee1..8b7c9e75edcb 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -11,6 +11,7 @@ */ #include "misc.h" #include "error.h" +#include "../boot.h" #include <generated/compile.h> #include <linux/module.h> @@ -52,15 +53,22 @@ static unsigned long get_boot_seed(void) #include "../../lib/kaslr.c" struct mem_vector { - unsigned long start; - unsigned long size; + unsigned long long start; + unsigned long long size; }; +/* Only supporting at most 4 unusable memmap regions with kaslr */ +#define MAX_MEMMAP_REGIONS 4 + +static bool memmap_too_large; + enum mem_avoid_index { MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, MEM_AVOID_BOOTPARAMS, + MEM_AVOID_MEMMAP_BEGIN, + MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, MEM_AVOID_MAX, }; @@ -77,6 +85,123 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) return true; } +/** + * _memparse - Parse a string with mem suffixes into a number + * @ptr: Where parse begins + * @retptr: (output) Optional pointer to next char after parse completes + * + * Parses a string into a number. The number stored at @ptr is + * potentially suffixed with K, M, G, T, P, E. + */ +static unsigned long long _memparse(const char *ptr, char **retptr) +{ + char *endptr; /* Local pointer to end of parsed string */ + + unsigned long long ret = simple_strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} + +static int +parse_memmap(char *p, unsigned long long *start, unsigned long long *size) +{ + char *oldp; + + if (!p) + return -EINVAL; + + /* We don't care about this option here */ + if (!strncmp(p, "exactmap", 8)) + return -EINVAL; + + oldp = p; + *size = _memparse(p, &p); + if (p == oldp) + return -EINVAL; + + switch (*p) { + case '@': + /* Skip this region, usable */ + *start = 0; + *size = 0; + return 0; + case '#': + case '$': + case '!': + *start = _memparse(p + 1, &p); + return 0; + } + + return -EINVAL; +} + +static void mem_avoid_memmap(void) +{ + char arg[128]; + int rc; + int i; + char *str; + + /* See if we have any memmap areas */ + rc = cmdline_find_option("memmap", arg, sizeof(arg)); + if (rc <= 0) + return; + + i = 0; + str = arg; + while (str && (i < MAX_MEMMAP_REGIONS)) { + int rc; + unsigned long long start, size; + char *k = strchr(str, ','); + + if (k) + *k++ = 0; + + rc = parse_memmap(str, &start, &size); + if (rc < 0) + break; + str = k; + /* A usable region that should not be skipped */ + if (size == 0) + continue; + + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; + i++; + } + + /* More than 4 memmaps, fail kaslr */ + if ((i >= MAX_MEMMAP_REGIONS) && str) + memmap_too_large = true; +} + /* * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). * The mem_avoid array is used to store the ranges that need to be avoided @@ -197,6 +322,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* We don't need to set a mapping for setup_data. */ + /* Mark the memmap regions we need to avoid */ + mem_avoid_memmap(); + #ifdef CONFIG_X86_VERBOSE_BOOTUP /* Make sure video RAM can be used. */ add_identity_map(0, PMD_SIZE); @@ -379,6 +507,12 @@ static unsigned long find_random_phys_addr(unsigned long minimum, int i; unsigned long addr; + /* Check if we had too many memmaps. */ + if (memmap_too_large) { + debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n"); + return 0; + } + /* Make sure minimum is aligned. */ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); @@ -456,7 +590,7 @@ void choose_random_location(unsigned long input, /* Walk e820 and find a random address. */ random_addr = find_random_phys_addr(min_addr, output_size); if (!random_addr) { - warn("KASLR disabled: could not find suitable E820 region!"); + warn("Physical KASLR disabled: no suitable memory region!"); } else { /* Update the new physical address location. */ if (*output != random_addr) { diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 9e240fcba784..5457b02fc050 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -156,3 +156,16 @@ char *strstr(const char *s1, const char *s2) } return NULL; } + +/** + * strchr - Find the first occurrence of the character c in the string s. + * @s: the string to be searched + * @c: the character to search for + */ +char *strchr(const char *s, int c) +{ + while (*s != (char)c) + if (*s++ == '\0') + return NULL; + return (char *)s; +} diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 1d392c39fe56..b8ccdb5c9244 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,11 +1,4 @@ -obj-y += core.o - -obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o -obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += amd/power.o -obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o -ifdef CONFIG_AMD_IOMMU -obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o -endif - -obj-$(CONFIG_CPU_SUP_INTEL) += msr.o +obj-y += core.o +obj-y += amd/ +obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile new file mode 100644 index 000000000000..b1da46f396e0 --- /dev/null +++ b/arch/x86/events/amd/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o +obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o +obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += iommu.o +endif + diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a0b1bdb3ad42..4d1f7f2d9aff 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -22,13 +22,17 @@ #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 -#define MAX_COUNTERS NUM_COUNTERS_NB +#define NUM_COUNTERS_L3 6 +#define MAX_COUNTERS 6 #define RDPMC_BASE_NB 6 -#define RDPMC_BASE_L2 10 +#define RDPMC_BASE_LLC 10 #define COUNTER_SHIFT 16 +static int num_counters_llc; +static int num_counters_nb; + static HLIST_HEAD(uncore_unused_list); struct amd_uncore { @@ -45,30 +49,30 @@ struct amd_uncore { }; static struct amd_uncore * __percpu *amd_uncore_nb; -static struct amd_uncore * __percpu *amd_uncore_l2; +static struct amd_uncore * __percpu *amd_uncore_llc; static struct pmu amd_nb_pmu; -static struct pmu amd_l2_pmu; +static struct pmu amd_llc_pmu; static cpumask_t amd_nb_active_mask; -static cpumask_t amd_l2_active_mask; +static cpumask_t amd_llc_active_mask; static bool is_nb_event(struct perf_event *event) { return event->pmu->type == amd_nb_pmu.type; } -static bool is_l2_event(struct perf_event *event) +static bool is_llc_event(struct perf_event *event) { - return event->pmu->type == amd_l2_pmu.type; + return event->pmu->type == amd_llc_pmu.type; } static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) { if (is_nb_event(event) && amd_uncore_nb) return *per_cpu_ptr(amd_uncore_nb, event->cpu); - else if (is_l2_event(event) && amd_uncore_l2) - return *per_cpu_ptr(amd_uncore_l2, event->cpu); + else if (is_llc_event(event) && amd_uncore_llc) + return *per_cpu_ptr(amd_uncore_llc, event->cpu); return NULL; } @@ -183,16 +187,16 @@ static int amd_uncore_event_init(struct perf_event *event) return -ENOENT; /* - * NB and L2 counters (MSRs) are shared across all cores that share the - * same NB / L2 cache. Interrupts can be directed to a single target - * core, however, event counts generated by processes running on other - * cores cannot be masked out. So we do not support sampling and - * per-thread events. + * NB and Last level cache counters (MSRs) are shared across all cores + * that share the same NB / Last level cache. Interrupts can be directed + * to a single target core, however, event counts generated by processes + * running on other cores cannot be masked out. So we do not support + * sampling and per-thread events. */ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - /* NB and L2 counters do not have usr/os/guest/host bits */ + /* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; @@ -226,8 +230,8 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, if (pmu->type == amd_nb_pmu.type) active_mask = &amd_nb_active_mask; - else if (pmu->type == amd_l2_pmu.type) - active_mask = &amd_l2_active_mask; + else if (pmu->type == amd_llc_pmu.type) + active_mask = &amd_llc_active_mask; else return 0; @@ -244,30 +248,47 @@ static struct attribute_group amd_uncore_attr_group = { .attrs = amd_uncore_attrs, }; -PMU_FORMAT_ATTR(event, "config:0-7,32-35"); -PMU_FORMAT_ATTR(umask, "config:8-15"); - -static struct attribute *amd_uncore_format_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - NULL, -}; - -static struct attribute_group amd_uncore_format_group = { - .name = "format", - .attrs = amd_uncore_format_attr, +/* + * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based + * on family + */ +#define AMD_FORMAT_ATTR(_dev, _name, _format) \ +static ssize_t \ +_dev##_show##_name(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev); + +/* Used for each uncore counter type */ +#define AMD_ATTRIBUTE(_name) \ +static struct attribute *amd_uncore_format_attr_##_name[] = { \ + &format_attr_event_##_name.attr, \ + &format_attr_umask.attr, \ + NULL, \ +}; \ +static struct attribute_group amd_uncore_format_group_##_name = { \ + .name = "format", \ + .attrs = amd_uncore_format_attr_##_name, \ +}; \ +static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \ + &amd_uncore_attr_group, \ + &amd_uncore_format_group_##_name, \ + NULL, \ }; -static const struct attribute_group *amd_uncore_attr_groups[] = { - &amd_uncore_attr_group, - &amd_uncore_format_group, - NULL, -}; +AMD_FORMAT_ATTR(event, , "config:0-7,32-35"); +AMD_FORMAT_ATTR(umask, , "config:8-15"); +AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60"); +AMD_FORMAT_ATTR(event, _l3, "config:0-7"); +AMD_ATTRIBUTE(df); +AMD_ATTRIBUTE(l3); static struct pmu amd_nb_pmu = { .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_attr_groups, - .name = "amd_nb", .event_init = amd_uncore_event_init, .add = amd_uncore_add, .del = amd_uncore_del, @@ -276,10 +297,8 @@ static struct pmu amd_nb_pmu = { .read = amd_uncore_read, }; -static struct pmu amd_l2_pmu = { +static struct pmu amd_llc_pmu = { .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_attr_groups, - .name = "amd_l2", .event_init = amd_uncore_event_init, .add = amd_uncore_add, .del = amd_uncore_del, @@ -296,14 +315,14 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore_nb = NULL, *uncore_l2; + struct amd_uncore *uncore_nb = NULL, *uncore_llc; if (amd_uncore_nb) { uncore_nb = amd_uncore_alloc(cpu); if (!uncore_nb) goto fail; uncore_nb->cpu = cpu; - uncore_nb->num_counters = NUM_COUNTERS_NB; + uncore_nb->num_counters = num_counters_nb; uncore_nb->rdpmc_base = RDPMC_BASE_NB; uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; @@ -312,18 +331,18 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } - if (amd_uncore_l2) { - uncore_l2 = amd_uncore_alloc(cpu); - if (!uncore_l2) + if (amd_uncore_llc) { + uncore_llc = amd_uncore_alloc(cpu); + if (!uncore_llc) goto fail; - uncore_l2->cpu = cpu; - uncore_l2->num_counters = NUM_COUNTERS_L2; - uncore_l2->rdpmc_base = RDPMC_BASE_L2; - uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore_l2->active_mask = &amd_l2_active_mask; - uncore_l2->pmu = &amd_l2_pmu; - uncore_l2->id = -1; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; + uncore_llc->cpu = cpu; + uncore_llc->num_counters = num_counters_llc; + uncore_llc->rdpmc_base = RDPMC_BASE_LLC; + uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_llc->active_mask = &amd_llc_active_mask; + uncore_llc->pmu = &amd_llc_pmu; + uncore_llc->id = -1; + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc; } return 0; @@ -376,17 +395,17 @@ static int amd_uncore_cpu_starting(unsigned int cpu) *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; } - if (amd_uncore_l2) { + if (amd_uncore_llc) { unsigned int apicid = cpu_data(cpu).apicid; unsigned int nshared; - uncore = *per_cpu_ptr(amd_uncore_l2, cpu); + uncore = *per_cpu_ptr(amd_uncore_llc, cpu); cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx); nshared = ((eax >> 14) & 0xfff) + 1; uncore->id = apicid - (apicid % nshared); - uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2); - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; } return 0; @@ -419,8 +438,8 @@ static int amd_uncore_cpu_online(unsigned int cpu) if (amd_uncore_nb) uncore_online(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_online(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_online(cpu, amd_uncore_llc); return 0; } @@ -456,8 +475,8 @@ static int amd_uncore_cpu_down_prepare(unsigned int cpu) if (amd_uncore_nb) uncore_down_prepare(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_down_prepare(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_down_prepare(cpu, amd_uncore_llc); return 0; } @@ -479,8 +498,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu) if (amd_uncore_nb) uncore_dead(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_dead(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_dead(cpu, amd_uncore_llc); return 0; } @@ -492,6 +511,47 @@ static int __init amd_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) goto fail_nodev; + switch(boot_cpu_data.x86) { + case 23: + /* Family 17h: */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L3; + /* + * For Family17h, the NorthBridge counters are + * re-purposed as Data Fabric counters. Also, support is + * added for L3 counters. The pmus are exported based on + * family as either L2 or L3 and NB or DF. + */ + amd_nb_pmu.name = "amd_df"; + amd_llc_pmu.name = "amd_l3"; + format_attr_event_df.show = &event_show_df; + format_attr_event_l3.show = &event_show_l3; + break; + case 22: + /* Family 16h - may change: */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + amd_nb_pmu.name = "amd_nb"; + amd_llc_pmu.name = "amd_l2"; + format_attr_event_df = format_attr_event; + format_attr_event_l3 = format_attr_event; + break; + default: + /* + * All prior families have the same number of + * NorthBridge and Last Level Cache counters + */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + amd_nb_pmu.name = "amd_nb"; + amd_llc_pmu.name = "amd_l2"; + format_attr_event_df = format_attr_event; + format_attr_event_l3 = format_attr_event; + break; + } + amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; + amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3; + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) goto fail_nodev; @@ -510,16 +570,16 @@ static int __init amd_uncore_init(void) } if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) { - amd_uncore_l2 = alloc_percpu(struct amd_uncore *); - if (!amd_uncore_l2) { + amd_uncore_llc = alloc_percpu(struct amd_uncore *); + if (!amd_uncore_llc) { ret = -ENOMEM; - goto fail_l2; + goto fail_llc; } - ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1); if (ret) - goto fail_l2; + goto fail_llc; - pr_info("perf: AMD L2I counters detected\n"); + pr_info("perf: AMD LLC counters detected\n"); ret = 0; } @@ -529,7 +589,7 @@ static int __init amd_uncore_init(void) if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, "perf/x86/amd/uncore:prepare", amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) - goto fail_l2; + goto fail_llc; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, "perf/x86/amd/uncore:starting", @@ -546,11 +606,11 @@ fail_start: cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); fail_prep: cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); -fail_l2: +fail_llc: if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); - if (amd_uncore_l2) - free_percpu(amd_uncore_l2); + if (amd_uncore_llc) + free_percpu(amd_uncore_llc); fail_nb: if (amd_uncore_nb) free_percpu(amd_uncore_nb); diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 1076c9a77292..aff4b5b69d40 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -541,6 +541,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), { }, diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 1c1b9fe705c8..5900471ee508 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -99,18 +99,24 @@ static struct attribute_group pt_cap_group = { }; PMU_FORMAT_ATTR(cyc, "config:1" ); +PMU_FORMAT_ATTR(pwr_evt, "config:4" ); +PMU_FORMAT_ATTR(fup_on_ptw, "config:5" ); PMU_FORMAT_ATTR(mtc, "config:9" ); PMU_FORMAT_ATTR(tsc, "config:10" ); PMU_FORMAT_ATTR(noretcomp, "config:11" ); +PMU_FORMAT_ATTR(ptw, "config:12" ); PMU_FORMAT_ATTR(mtc_period, "config:14-17" ); PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" ); PMU_FORMAT_ATTR(psb_period, "config:24-27" ); static struct attribute *pt_formats_attr[] = { &format_attr_cyc.attr, + &format_attr_pwr_evt.attr, + &format_attr_fup_on_ptw.attr, &format_attr_mtc.attr, &format_attr_tsc.attr, &format_attr_noretcomp.attr, + &format_attr_ptw.attr, &format_attr_mtc_period.attr, &format_attr_cyc_thresh.attr, &format_attr_psb_period.attr, diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22ef4f72cf32..22054ca49026 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -771,6 +771,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), {}, }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 1ab45976474d..758c1aa5009d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1328,6 +1328,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), {}, }; diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 2b892e2313a9..5d6a53fd7521 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -7,7 +7,6 @@ generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h generic-y += clkdev.h -generic-y += cputime.h generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += mcs_spinlock.h diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 0c5fbc68e82d..eff8e36aaf72 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -195,7 +195,7 @@ static inline void native_apic_msr_write(u32 reg, u32 v) static inline void native_apic_msr_eoi_write(u32 reg, u32 v) { - wrmsr_notrace(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); + __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); } static inline u32 native_apic_msr_read(u32 reg) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index eafee3161d1c..4e7772387c6e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ @@ -186,7 +186,7 @@ * * Reuse free bits when adding new feature flags! */ - +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ @@ -288,6 +288,7 @@ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ @@ -320,5 +321,4 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index ced283ac79df..af95c47d5c9e 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -59,6 +59,17 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) } #define div_u64_rem div_u64_rem +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#define mul_u32_u32 mul_u32_u32 + #else # include <asm-generic/div64.h> #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index ec23d8e1297c..67313f3a9874 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, int checktype); extern void update_e820(void); extern void e820_setup_gap(void); -extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr); struct setup_data; extern void parse_e820_ext(u64 phys_addr, u32 data_len); diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index e99675b9c861..2f77bcefe6b4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -191,6 +191,7 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( struct efi_config { u64 image_handle; u64 table; + u64 runtime_services; u64 boot_services; u64 text_output; efi_status_t (*call)(unsigned long, ...); @@ -226,6 +227,10 @@ static inline bool efi_is_64bit(void) #define __efi_call_early(f, ...) \ __efi_early()->call((unsigned long)f, __VA_ARGS__); +#define efi_call_runtime(f, ...) \ + __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ + __efi_early()->runtime_services), __VA_ARGS__) + extern bool efi_reboot_required(void); #else diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index e7f155c3045e..9d49c18b5ea9 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -258,6 +258,15 @@ extern int force_personality32; #define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX]) +extern u32 elf_hwcap2; + +/* + * HWCAP2 supplies mask with kernel enabled CPU features, so that + * the application can discover that it can safely use them. + * The bits are defined in uapi/asm/hwcap2.h. + */ +#define ELF_HWCAP2 (elf_hwcap2) + /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d4a684997497..255645f60ca2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -87,6 +87,16 @@ extern void fpstate_init_soft(struct swregs_state *soft); #else static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif + +static inline void fpstate_init_xstate(struct xregs_state *xsave) +{ + /* + * XRSTORS requires these bits set in xcomp_bv, or it will + * trigger #GP: + */ + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask; +} + static inline void fpstate_init_fxstate(struct fxregs_state *fx) { fx->cwd = 0x37f; diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 5132f2a6c0a2..e63873683d4a 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -97,10 +97,6 @@ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ -/* Software defined banks */ -#define MCE_EXTENDED_BANK 128 -#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) - #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" @@ -193,6 +189,15 @@ extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; extern struct mca_msr_regs msr_ops; + +enum mce_notifier_prios { + MCE_PRIO_SRAO = INT_MAX, + MCE_PRIO_EXTLOG = INT_MAX - 1, + MCE_PRIO_NFIT = INT_MAX - 2, + MCE_PRIO_EDAC = INT_MAX - 3, + MCE_PRIO_LOWEST = 0, +}; + extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); @@ -306,8 +311,6 @@ extern void (*deferred_error_int_vector)(void); void intel_init_thermal(struct cpuinfo_x86 *c); -void mce_log_therm_throt_event(__u64 status); - /* Interrupt Handler for core thermal thresholds */ extern int (*platform_thermal_notify)(__u64 msr_val); @@ -362,12 +365,13 @@ struct smca_hwid { unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ u32 hwid_mcatype; /* (hwid,mcatype) tuple */ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ + u8 count; /* Number of instances. */ }; struct smca_bank { struct smca_hwid *hwid; - /* Instance ID */ - u32 id; + u32 id; /* Value of MCA_IPID[InstanceId]. */ + u8 sysfs_id; /* Value used for sysfs name. */ }; extern struct smca_bank smca_banks[MAX_NR_BANKS]; diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 2266f864b747..daadeeea00b1 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -7,18 +7,17 @@ #define native_rdmsr(msr, val1, val2) \ do { \ - u64 __val = native_read_msr((msr)); \ + u64 __val = __rdmsr((msr)); \ (void)((val1) = (u32)__val); \ (void)((val2) = (u32)(__val >> 32)); \ } while (0) #define native_wrmsr(msr, low, high) \ - native_write_msr(msr, low, high) + __wrmsr(msr, low, high) #define native_wrmsrl(msr, val) \ - native_write_msr((msr), \ - (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) struct ucode_patch { struct list_head plist; diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 3e3e20be829a..3d57009e168b 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -54,6 +54,4 @@ static inline int __init save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } void reload_ucode_amd(void) {} #endif - -extern bool check_current_patch_level(u32 *rev, bool early); #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 710273c617b8..00293a94ffaf 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -543,6 +543,11 @@ #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) +/* MISC_FEATURE_ENABLES non-architectural features */ +#define MSR_MISC_FEATURE_ENABLES 0x00000140 + +#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1 + #define MSR_IA32_TSC_DEADLINE 0x000006E0 /* P4/Xeon+ specific */ diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index db0b90c3b03e..898dba2e2e2c 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -80,7 +80,14 @@ static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} #endif -static inline unsigned long long native_read_msr(unsigned int msr) +/* + * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR + * accessors and should not have any tracing or other functionality piggybacking + * on them - those are *purely* for accessing MSRs and nothing more. So don't even + * think of extending them - you will be slapped with a stinking trout or a frozen + * shark will reach you, wherever you are! You've been warned. + */ +static inline unsigned long long notrace __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); @@ -88,11 +95,30 @@ static inline unsigned long long native_read_msr(unsigned int msr) "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) : EAX_EDX_RET(val, low, high) : "c" (msr)); - if (msr_tracepoint_active(__tracepoint_read_msr)) - do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); + return EAX_EDX_VAL(val, low, high); } +static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) +{ + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + : : "c" (msr), "a"(low), "d" (high) : "memory"); +} + +static inline unsigned long long native_read_msr(unsigned int msr) +{ + unsigned long long val; + + val = __rdmsr(msr); + + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_read_msr(msr, val, 0); + + return val; +} + static inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { @@ -116,29 +142,14 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, /* Can be uninlined because referenced by paravirt */ static inline void notrace -__native_write_msr_notrace(unsigned int msr, u32 low, u32 high) -{ - asm volatile("1: wrmsr\n" - "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) - : : "c" (msr), "a"(low), "d" (high) : "memory"); -} - -/* Can be uninlined because referenced by paravirt */ -static inline void notrace native_write_msr(unsigned int msr, u32 low, u32 high) { - __native_write_msr_notrace(msr, low, high); + __wrmsr(msr, low, high); + if (msr_tracepoint_active(__tracepoint_write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } -static inline void -wrmsr_notrace(unsigned int msr, u32 low, u32 high) -{ - __native_write_msr_notrace(msr, low, high); -} - /* Can be uninlined because referenced by paravirt */ static inline int notrace native_write_msr_safe(unsigned int msr, u32 low, u32 high) diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b6c0b404898a..fbc73360aea0 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -27,6 +27,7 @@ struct vm_area_struct; extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; +extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } @@ -75,4 +76,35 @@ do { \ #define kern_addr_valid(kaddr) (0) #endif +/* + * This is how much memory in addition to the memory covered up to + * and including _end we need mapped initially. + * We need: + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) + * + * Modulo rounding, each megabyte assigned here requires a kilobyte of + * memory, which is currently unreclaimed. + * + * This should be a multiple of a page. + * + * KERNEL_IMAGE_SIZE should be greater than pa(_end) + * and small than max_low_pfn, otherwise will waste some page table entries + */ +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif + +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) + #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 921bea7a2708..6d391909e864 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -23,9 +23,6 @@ /* How long a lock should spin before we consider blocking */ #define SPIN_THRESHOLD (1 << 15) -extern struct static_key paravirt_ticketlocks_enabled; -static __always_inline bool static_key_false(struct static_key *key); - #include <asm/qspinlock.h> /* diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index b10bf319ed20..5138dacf8bb8 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -135,7 +135,8 @@ struct boot_params { __u8 eddbuf_entries; /* 0x1e9 */ __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ __u8 kbd_status; /* 0x1eb */ - __u8 _pad5[3]; /* 0x1ec */ + __u8 secure_boot; /* 0x1ec */ + __u8 _pad5[2]; /* 0x1ed */ /* * The sentinel is set to a nonzero value (0xff) in header.S. * diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h new file mode 100644 index 000000000000..0bd2be5c7617 --- /dev/null +++ b/arch/x86/include/uapi/asm/hwcap2.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_HWCAP2_H +#define _ASM_X86_HWCAP2_H + +/* MONITOR/MWAIT enabled in Ring 3 */ +#define HWCAP2_RING3MWAIT (1 << 0) + +#endif diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 64422f850e95..7ff007ed899d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -35,6 +35,7 @@ #include <linux/bootmem.h> #include <linux/ioport.h> #include <linux/pci.h> +#include <linux/efi-bgrt.h> #include <asm/irqdomain.h> #include <asm/pci_x86.h> @@ -1557,6 +1558,12 @@ int __init early_acpi_boot_init(void) return 0; } +static int __init acpi_parse_bgrt(struct acpi_table_header *table) +{ + efi_bgrt_init(table); + return 0; +} + int __init acpi_boot_init(void) { /* those are executed after early-quirks are executed */ @@ -1581,6 +1588,8 @@ int __init acpi_boot_init(void) acpi_process_madt(); acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) + acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); if (!acpi_noirq) x86_init.pci.init = pci_acpi_init; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5b7e43eff139..8567c851172c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -529,18 +529,19 @@ static void lapic_timer_broadcast(const struct cpumask *mask) * The local apic timer can be used for any function which is CPU local. */ static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP - | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_state_shutdown = lapic_timer_shutdown, - .set_state_periodic = lapic_timer_set_periodic, - .set_state_oneshot = lapic_timer_set_oneshot, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP + | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_state_shutdown = lapic_timer_shutdown, + .set_state_periodic = lapic_timer_set_periodic, + .set_state_oneshot = lapic_timer_set_oneshot, + .set_state_oneshot_stopped = lapic_timer_shutdown, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); @@ -1245,7 +1246,7 @@ static void lapic_setup_esr(void) /** * setup_local_APIC - setup the local APIC * - * Used to setup local APIC while initializing BSP or bringin up APs. + * Used to setup local APIC while initializing BSP or bringing up APs. * Always called with preemption disabled. */ void setup_local_APIC(void) @@ -2028,8 +2029,8 @@ void disconnect_bsp_APIC(int virt_wire_setup) /* * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of - * nr_logical_cpuids is nr_cpu_ids. + * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, + * so the maximum of nr_logical_cpuids is nr_cpu_ids. * * NOTE: Reserve 0 for BSP. */ @@ -2094,7 +2095,7 @@ int __generic_processor_info(int apicid, int version, bool enabled) * Since fixing handling of boot_cpu_physical_apicid requires * another discussion and tests on each platform, we leave it * for now and here we use read_apic_id() directly in this - * function, generic_processor_info(). + * function, __generic_processor_info(). */ if (disabled_cpu_apicid != BAD_APICID && disabled_cpu_apicid != read_apic_id() && diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bd6b8c270c24..52f352b063fd 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1875,6 +1875,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1886,6 +1887,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 45d44c173cf9..4a7080c84a5a 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -905,8 +905,8 @@ static int apm_cpu_idle(struct cpuidle_device *dev, { static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ - static unsigned int last_stime; /* = 0 */ - cputime_t stime, utime; + static u64 last_stime; /* = 0 */ + u64 stime, utime; int apm_idle_done = 0; unsigned int jiffies_since_last_check = jiffies - last_jiffies; @@ -919,7 +919,7 @@ recalc: } else if (jiffies_since_last_check > idle_period) { unsigned int idle_percentage; - idle_percentage = cputime_to_jiffies(stime - last_stime); + idle_percentage = nsecs_to_jiffies(stime - last_stime); idle_percentage *= 100; idle_percentage /= jiffies_since_last_check; use_apm_idle = (idle_percentage > idle_threshold); diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index c62e015b126c..de827d6ac8c2 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -81,6 +81,7 @@ void common(void) { BLANK(); OFFSET(BP_scratch, boot_params, scratch); + OFFSET(BP_secure_boot, boot_params, secure_boot); OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 2b4cf04239b6..4e95b2e0d95f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -555,8 +555,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 1661d8ec9280..2c234a6d94c4 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,5 +1,5 @@ -#include <linux/bitops.h> -#include <linux/kernel.h> + +#include <linux/sched.h> #include <asm/cpufeature.h> #include <asm/e820.h> @@ -104,6 +104,8 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif + + clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index ede03e849a8b..f07005e6f461 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -35,6 +35,7 @@ #include <asm/desc.h> #include <asm/fpu/internal.h> #include <asm/mtrr.h> +#include <asm/hwcap2.h> #include <linux/numa.h> #include <asm/asm.h> #include <asm/bugs.h> @@ -51,6 +52,8 @@ #include "cpu.h" +u32 elf_hwcap2 __read_mostly; + /* all of these masks are initialized in setup_cpu_local_masks() */ cpumask_var_t cpu_initialized_mask; cpumask_var_t cpu_callout_mask; @@ -83,6 +86,7 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif + clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -655,6 +659,16 @@ void cpu_detect(struct cpuinfo_x86 *c) } } +static void apply_forced_caps(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -748,6 +762,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + + /* + * Clear/Set all flags overridden by options, after probe. + * This needs to happen each time we re-probe, which may happen + * several times during CPU initialization. + */ + apply_forced_caps(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -801,14 +822,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; - if (!have_cpuid_p()) - identify_cpu_without_cpuid(c); - /* cyrix could have cpuid enabled via c_identify()*/ if (have_cpuid_p()) { cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); + setup_force_cpu_cap(X86_FEATURE_CPUID); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -818,6 +837,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); + } else { + identify_cpu_without_cpuid(c); + setup_clear_cpu_cap(X86_FEATURE_CPUID); } setup_force_cpu_cap(X86_FEATURE_ALWAYS); @@ -1035,10 +1057,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); /* Clear/Set all flags overridden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); @@ -1056,6 +1075,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); + else + clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); @@ -1097,10 +1118,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) * Clear/Set all flags overridden by options, need do it * before following smp all cpus cap AND. */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); /* * On SMP, boot_cpu_data holds the common feature set between diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index bd9dcd6b712d..47416f959a48 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -9,6 +9,7 @@ #include <asm/pci-direct.h> #include <asm/tsc.h> #include <asm/cpufeature.h> +#include <linux/sched.h> #include "cpu.h" @@ -183,6 +184,7 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } + clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 203f860d2ab3..017ecd3bb553 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -15,6 +15,8 @@ #include <asm/cpu.h> #include <asm/intel-family.h> #include <asm/microcode_intel.h> +#include <asm/hwcap2.h> +#include <asm/elf.h> #ifdef CONFIG_X86_64 #include <linux/topology.h> @@ -62,6 +64,46 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +static bool ring3mwait_disabled __read_mostly; + +static int __init ring3mwait_disable(char *__unused) +{ + ring3mwait_disabled = true; + return 0; +} +__setup("ring3mwait=disable", ring3mwait_disable); + +static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) +{ + /* + * Ring 3 MONITOR/MWAIT feature cannot be detected without + * cpu model and family comparison. + */ + if (c->x86 != 6) + return; + switch (c->x86_model) { + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: + break; + default: + return; + } + + if (ring3mwait_disabled) { + msr_clear_bit(MSR_MISC_FEATURE_ENABLES, + MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); + return; + } + + msr_set_bit(MSR_MISC_FEATURE_ENABLES, + MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); + + set_cpu_cap(c, X86_FEATURE_RING3MWAIT); + + if (c == &boot_cpu_data) + ELF_HWCAP2 |= HWCAP2_RING3MWAIT; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -119,8 +161,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ @@ -560,6 +604,8 @@ static void init_intel(struct cpuinfo_x86 *c) detect_vmx_virtcap(c); init_intel_energy_perf(c); + + probe_xeon_phi_r3mwait(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 83f1a98d37db..2eee85379689 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -52,8 +52,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) if (severity >= GHES_SEV_RECOVERABLE) m.status |= MCI_STATUS_UC; - if (severity >= GHES_SEV_PANIC) + + if (severity >= GHES_SEV_PANIC) { m.status |= MCI_STATUS_PCC; + m.tsc = rdtsc(); + } m.addr = mem_err->physical_addr; mce_log(&m); diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 93d824ec3120..1e5a50c11d3c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -72,7 +72,7 @@ struct llist_node *mce_gen_pool_prepare_records(void) return new_head.first; } -void mce_gen_pool_process(void) +void mce_gen_pool_process(struct work_struct *__unused) { struct llist_node *head; struct mce_evt_llist *node, *tmp; diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 517619ea6498..99165b206df3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -152,7 +152,6 @@ static void raise_mce(struct mce *m) if (context == MCJ_CTX_RANDOM) return; -#ifdef CONFIG_X86_LOCAL_APIC if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { unsigned long start; int cpu; @@ -192,9 +191,7 @@ static void raise_mce(struct mce *m) raise_local(); put_cpu(); put_online_cpus(); - } else -#endif - { + } else { preempt_disable(); raise_local(); preempt_enable(); diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index cd74a3f00aea..903043e6a62b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -31,7 +31,7 @@ struct mce_evt_llist { struct mce mce; }; -void mce_gen_pool_process(void); +void mce_gen_pool_process(struct work_struct *__unused); bool mce_gen_pool_empty(void); int mce_gen_pool_add(struct mce *mce); int mce_gen_pool_init(void); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 537c6647d84c..8e9725c607ea 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -128,7 +128,6 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -217,9 +216,7 @@ void mce_register_decode_chain(struct notifier_block *nb) { atomic_inc(&num_notifiers); - /* Ensure SRAO notifier has the highest priority in the decode chain. */ - if (nb != &mce_srao_nb && nb->priority == INT_MAX) - nb->priority -= 1; + WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC); atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); } @@ -583,7 +580,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, } static struct notifier_block mce_srao_nb = { .notifier_call = srao_decode_notifier, - .priority = INT_MAX, + .priority = MCE_PRIO_SRAO, }; static int mce_default_notifier(struct notifier_block *nb, unsigned long val, @@ -609,7 +606,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val, static struct notifier_block mce_default_nb = { .notifier_call = mce_default_notifier, /* lowest prio, we want it to run last. */ - .priority = 0, + .priority = MCE_PRIO_LOWEST, }; /* @@ -710,14 +707,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_gather_info(&m, NULL); - /* - * m.tsc was set in mce_setup(). Clear it if not requested. - * - * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid - * that dance. - */ - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; + if (flags & MCP_TIMESTAMP) + m.tsc = rdtsc(); for (i = 0; i < mca_cfg.banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) @@ -1156,6 +1147,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) goto out; mce_gather_info(&m, regs); + m.tsc = rdtsc(); final = this_cpu_ptr(&mces_seen); *final = m; @@ -1322,41 +1314,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) #endif /* - * Action optional processing happens here (picking up - * from the list of faulting pages that do_machine_check() - * placed into the genpool). - */ -static void mce_process_work(struct work_struct *dummy) -{ - mce_gen_pool_process(); -} - -#ifdef CONFIG_X86_MCE_INTEL -/*** - * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog - * @cpu: The CPU on which the event occurred. - * @status: Event status information - * - * This function should be called by the thermal interrupt after the - * event has been processed and the decision was made to log the event - * further. - * - * The status parameter will be saved to the 'status' field of 'struct mce' - * and historically has been the register value of the - * MSR_IA32_THERMAL_STATUS (Intel) msr. - */ -void mce_log_therm_throt_event(__u64 status) -{ - struct mce m; - - mce_setup(&m); - m.bank = MCE_THERMAL_BANK; - m.status = status; - mce_log(&m); -} -#endif /* CONFIG_X86_MCE_INTEL */ - -/* * Periodic polling timer for "silent" machine check errors. If the * poller finds an MCE, poll 2x faster. When the poller finds no more * errors, poll 2x slower (up to check_interval seconds). @@ -2189,7 +2146,7 @@ int __init mcheck_init(void) mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); - INIT_WORK(&mce_work, mce_process_work); + INIT_WORK(&mce_work, mce_gen_pool_process); init_irq_work(&mce_irq_work, mce_irq_work_cb); return 0; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a5fd137417a2..9e5427df3243 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -192,6 +192,7 @@ static void get_smca_bank_info(unsigned int bank) smca_banks[bank].hwid = s_hwid; smca_banks[bank].id = instance_id; + smca_banks[bank].sysfs_id = s_hwid->count++; break; } } @@ -777,7 +778,8 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) mce_setup(&m); m.status = status; - m.bank = bank; + m.bank = bank; + m.tsc = rdtsc(); if (threshold_err) m.misc = misc; @@ -1064,9 +1066,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return NULL; } + if (smca_banks[bank].hwid->count == 1) + return smca_get_name(bank_type); + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "%s_%x", smca_get_name(bank_type), - smca_banks[bank].id); + smca_banks[bank].sysfs_id); return buf_mcatype; } diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 465aca8be009..85469f84c921 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -6,7 +6,7 @@ * * Maintains a counter in /sys that keeps track of the number of thermal * events, such that the user knows how bad the thermal problem might be - * (since the logging to syslog and mcelog is rate limited). + * (since the logging to syslog is rate limited). * * Author: Dmitriy Zavin (dmitriyz@google.com) * @@ -141,13 +141,8 @@ static struct attribute_group thermal_attr_group = { * IRQ has been acknowledged. * * It will take care of rate limiting and printing messages to the syslog. - * - * Returns: 0 : Event should NOT be further logged, i.e. still in - * "timeout" from previous log message. - * 1 : Event should be logged further, and a message has been - * printed to the syslog. */ -static int therm_throt_process(bool new_event, int event, int level) +static void therm_throt_process(bool new_event, int event, int level) { struct _thermal_state *state; unsigned int this_cpu = smp_processor_id(); @@ -162,16 +157,16 @@ static int therm_throt_process(bool new_event, int event, int level) else if (event == POWER_LIMIT_EVENT) state = &pstate->core_power_limit; else - return 0; + return; } else if (level == PACKAGE_LEVEL) { if (event == THERMAL_THROTTLING_EVENT) state = &pstate->package_throttle; else if (event == POWER_LIMIT_EVENT) state = &pstate->package_power_limit; else - return 0; + return; } else - return 0; + return; old_event = state->new_event; state->new_event = new_event; @@ -181,7 +176,7 @@ static int therm_throt_process(bool new_event, int event, int level) if (time_before64(now, state->next_check) && state->count != state->last_count) - return 0; + return; state->next_check = now + CHECK_INTERVAL; state->last_count = state->count; @@ -193,16 +188,14 @@ static int therm_throt_process(bool new_event, int event, int level) this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); - return 1; + return; } if (old_event) { if (event == THERMAL_THROTTLING_EVENT) pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); - return 1; + return; } - - return 0; } static int thresh_event_valid(int level, int event) @@ -365,10 +358,9 @@ static void intel_thermal_interrupt(void) /* Check for violation of core thermal thresholds*/ notify_thresholds(msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, - THERMAL_THROTTLING_EVENT, - CORE_LEVEL) != 0) - mce_log_therm_throt_event(msr_val); + therm_throt_process(msr_val & THERM_STATUS_PROCHOT, + THERMAL_THROTTLING_EVENT, + CORE_LEVEL); if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 079e81733a58..7889ae492af0 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -42,16 +42,19 @@ static struct equiv_cpu_entry *equiv_cpu_table; /* * This points to the current valid container of microcode patches which we will - * save from the initrd/builtin before jettisoning its contents. + * save from the initrd/builtin before jettisoning its contents. @mc is the + * microcode patch we found to match. */ -struct container { - u8 *data; - size_t size; -} cont; +struct cont_desc { + struct microcode_amd *mc; + u32 cpuid_1_eax; + u32 psize; + u8 *data; + size_t size; +}; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; -static u16 this_equiv_id; /* * Microcode patch container file is prepended to the initrd in cpio @@ -60,57 +63,13 @@ static u16 this_equiv_id; static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; -static size_t compute_container_size(u8 *data, u32 total_size) +static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) { - size_t size = 0; - u32 *header = (u32 *)data; - - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return size; - - size = header[2] + CONTAINER_HDR_SZ; - total_size -= size; - data += size; - - while (total_size) { - u16 patch_size; - - header = (u32 *)data; - - if (header[0] != UCODE_UCODE_TYPE) - break; - - /* - * Sanity-check patch size. - */ - patch_size = header[1]; - if (patch_size > PATCH_MAX_SIZE) - break; - - size += patch_size + SECTION_HDR_SIZE; - data += patch_size + SECTION_HDR_SIZE; - total_size -= patch_size + SECTION_HDR_SIZE; + for (; equiv_table && equiv_table->installed_cpu; equiv_table++) { + if (sig == equiv_table->installed_cpu) + return equiv_table->equiv_cpu; } - return size; -} - -static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, - unsigned int sig) -{ - int i = 0; - - if (!equiv_cpu_table) - return 0; - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (sig == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; - } return 0; } @@ -118,91 +77,109 @@ static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, * This scans the ucode blob for the proper container as we can have multiple * containers glued together. Returns the equivalence ID from the equivalence * table or 0 if none found. + * Returns the amount of bytes consumed while scanning. @desc contains all the + * data we're going to use in later stages of the application. */ -static u16 -find_proper_container(u8 *ucode, size_t size, struct container *ret_cont) +static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) { - struct container ret = { NULL, 0 }; - u32 eax, ebx, ecx, edx; struct equiv_cpu_entry *eq; - int offset, left; - u16 eq_id = 0; - u32 *header; - u8 *data; + ssize_t orig_size = size; + u32 *hdr = (u32 *)ucode; + u16 eq_id; + u8 *buf; - data = ucode; - left = size; - header = (u32 *)data; + /* Am I looking at an equivalence table header? */ + if (hdr[0] != UCODE_MAGIC || + hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE || + hdr[2] == 0) + return CONTAINER_HDR_SZ; + buf = ucode; - /* find equiv cpu table */ - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return eq_id; + eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ); - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); + /* Find the equivalence ID of our CPU in this table: */ + eq_id = find_equiv_id(eq, desc->cpuid_1_eax); - while (left > 0) { - eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); + buf += hdr[2] + CONTAINER_HDR_SZ; + size -= hdr[2] + CONTAINER_HDR_SZ; + + /* + * Scan through the rest of the container to find where it ends. We do + * some basic sanity-checking too. + */ + while (size > 0) { + struct microcode_amd *mc; + u32 patch_size; - ret.data = data; + hdr = (u32 *)buf; - /* Advance past the container header */ - offset = header[2] + CONTAINER_HDR_SZ; - data += offset; - left -= offset; + if (hdr[0] != UCODE_UCODE_TYPE) + break; - eq_id = find_equiv_id(eq, eax); - if (eq_id) { - ret.size = compute_container_size(ret.data, left + offset); + /* Sanity-check patch size. */ + patch_size = hdr[1]; + if (patch_size > PATCH_MAX_SIZE) + break; - /* - * truncate how much we need to iterate over in the - * ucode update loop below - */ - left = ret.size - offset; + /* Skip patch section header: */ + buf += SECTION_HDR_SIZE; + size -= SECTION_HDR_SIZE; - *ret_cont = ret; - return eq_id; + mc = (struct microcode_amd *)buf; + if (eq_id == mc->hdr.processor_rev_id) { + desc->psize = patch_size; + desc->mc = mc; } - /* - * support multiple container files appended together. if this - * one does not have a matching equivalent cpu entry, we fast - * forward to the next container file. - */ - while (left > 0) { - header = (u32 *)data; - - if (header[0] == UCODE_MAGIC && - header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) - break; - - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; - } + buf += patch_size; + size -= patch_size; + } - /* mark where the next microcode container file starts */ - offset = data - (u8 *)ucode; - ucode = data; + /* + * If we have found a patch (desc->mc), it means we're looking at the + * container which has a patch for this CPU so return 0 to mean, @ucode + * already points to the proper container. Otherwise, we return the size + * we scanned so that we can advance to the next container in the + * buffer. + */ + if (desc->mc) { + desc->data = ucode; + desc->size = orig_size - size; + + return 0; } - return eq_id; + return orig_size - size; } -static int __apply_microcode_amd(struct microcode_amd *mc_amd) +/* + * Scan the ucode blob for the proper container as we can have multiple + * containers glued together. + */ +static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) +{ + ssize_t rem = size; + + while (rem >= 0) { + ssize_t s = parse_container(ucode, rem, desc); + if (!s) + return; + + ucode += s; + rem -= s; + } +} + +static int __apply_microcode_amd(struct microcode_amd *mc) { u32 rev, dummy; - native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); + native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc->hdr.data_code); /* verify patch application was successful */ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev != mc_amd->hdr.patch_id) + if (rev != mc->hdr.patch_id) return -1; return 0; @@ -217,17 +194,16 @@ static int __apply_microcode_amd(struct microcode_amd *mc_amd) * load_microcode_amd() to save equivalent cpu table and microcode patches in * kernel heap memory. * - * Returns true if container found (sets @ret_cont), false otherwise. + * Returns true if container found (sets @desc), false otherwise. */ -static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, - struct container *ret_cont) +static bool +apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch) { + struct cont_desc desc = { 0 }; u8 (*patch)[PATCH_MAX_SIZE]; - u32 rev, *header, *new_rev; - struct container ret; - int offset, left; - u16 eq_id = 0; - u8 *data; + struct microcode_amd *mc; + u32 rev, dummy, *new_rev; + bool ret = false; #ifdef CONFIG_X86_32 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); @@ -237,50 +213,27 @@ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, patch = &amd_ucode_patch; #endif - if (check_current_patch_level(&rev, true)) - return false; - - eq_id = find_proper_container(ucode, size, &ret); - if (!eq_id) - return false; - - this_equiv_id = eq_id; - header = (u32 *)ret.data; - - /* We're pointing to an equiv table, skip over it. */ - data = ret.data + header[2] + CONTAINER_HDR_SZ; - left = ret.size - (header[2] + CONTAINER_HDR_SZ); - - while (left > 0) { - struct microcode_amd *mc; - - header = (u32 *)data; - if (header[0] != UCODE_UCODE_TYPE || /* type */ - header[1] == 0) /* size */ - break; + desc.cpuid_1_eax = cpuid_1_eax; - mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); + scan_containers(ucode, size, &desc); - if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { + mc = desc.mc; + if (!mc) + return ret; - if (!__apply_microcode_amd(mc)) { - rev = mc->hdr.patch_id; - *new_rev = rev; + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev >= mc->hdr.patch_id) + return ret; - if (save_patch) - memcpy(patch, mc, min_t(u32, header[1], PATCH_MAX_SIZE)); - } - } + if (!__apply_microcode_amd(mc)) { + *new_rev = mc->hdr.patch_id; + ret = true; - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; + if (save_patch) + memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE)); } - if (ret_cont) - *ret_cont = ret; - - return true; + return ret; } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -298,10 +251,9 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) #endif } -void __init load_ucode_amd_bsp(unsigned int family) +void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret) { struct ucode_cpu_info *uci; - u32 eax, ebx, ecx, edx; struct cpio_data cp; const char *path; bool use_pa; @@ -316,184 +268,95 @@ void __init load_ucode_amd_bsp(unsigned int family) use_pa = false; } - if (!get_builtin_microcode(&cp, family)) + if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) cp = find_microcode_in_initrd(path, use_pa); - if (!(cp.data && cp.size)) - return; - - /* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */ - eax = 1; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - uci->cpu_sig.sig = eax; + /* Needed in load_microcode_amd() */ + uci->cpu_sig.sig = cpuid_1_eax; - apply_microcode_early_amd(cp.data, cp.size, true, NULL); + *ret = cp; } -#ifdef CONFIG_X86_32 -/* - * On 32-bit, since AP's early load occurs before paging is turned on, we - * cannot traverse cpu_equiv_table and microcode_cache in kernel heap memory. - * So during cold boot, AP will apply_ucode_in_initrd() just like the BSP. - * In save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, - * which is used upon resume from suspend. - */ -void load_ucode_amd_ap(unsigned int family) +void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) { - struct microcode_amd *mc; - struct cpio_data cp; - - mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); - if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { - __apply_microcode_amd(mc); - return; - } - - if (!get_builtin_microcode(&cp, family)) - cp = find_microcode_in_initrd((const char *)__pa_nodebug(ucode_path), true); + struct cpio_data cp = { }; + __load_ucode_amd(cpuid_1_eax, &cp); if (!(cp.data && cp.size)) return; - /* - * This would set amd_ucode_patch above so that the following APs can - * use it directly instead of going down this path again. - */ - apply_microcode_early_amd(cp.data, cp.size, true, NULL); + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true); } -#else -void load_ucode_amd_ap(unsigned int family) + +void load_ucode_amd_ap(unsigned int cpuid_1_eax) { - struct equiv_cpu_entry *eq; struct microcode_amd *mc; - u32 rev, eax; - u16 eq_id; - - /* 64-bit runs with paging enabled, thus early==false. */ - if (check_current_patch_level(&rev, false)) - return; - - /* First AP hasn't cached it yet, go through the blob. */ - if (!cont.data) { - struct cpio_data cp = { NULL, 0, "" }; + struct cpio_data cp; + u32 *new_rev, rev, dummy; - if (cont.size == -1) - return; + if (IS_ENABLED(CONFIG_X86_32)) { + mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); + new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); + } else { + mc = (struct microcode_amd *)amd_ucode_patch; + new_rev = &ucode_new_rev; + } -reget: - if (!get_builtin_microcode(&cp, family)) { -#ifdef CONFIG_BLK_DEV_INITRD - if (!initrd_gone) - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); -#endif - if (!(cp.data && cp.size)) { - /* - * Mark it so that other APs do not scan again - * for no real reason and slow down boot - * needlessly. - */ - cont.size = -1; - return; - } - } + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) { - cont.size = -1; + /* Check whether we have saved a new patch already: */ + if (*new_rev && rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) { + *new_rev = mc->hdr.patch_id; return; } } - eax = cpuid_eax(0x00000001); - eq = (struct equiv_cpu_entry *)(cont.data + CONTAINER_HDR_SZ); - - eq_id = find_equiv_id(eq, eax); - if (!eq_id) + __load_ucode_amd(cpuid_1_eax, &cp); + if (!(cp.data && cp.size)) return; - if (eq_id == this_equiv_id) { - mc = (struct microcode_amd *)amd_ucode_patch; - - if (mc && rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) - ucode_new_rev = mc->hdr.patch_id; - } - - } else { - - /* - * AP has a different equivalence ID than BSP, looks like - * mixed-steppings silicon so go through the ucode blob anew. - */ - goto reget; - } + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false); } -#endif /* CONFIG_X86_32 */ static enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); -int __init save_microcode_in_initrd_amd(unsigned int fam) +int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { + struct cont_desc desc = { 0 }; enum ucode_state ret; - int retval = 0; - u16 eq_id; - - if (!cont.data) { - if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) { - struct cpio_data cp = { NULL, 0, "" }; - -#ifdef CONFIG_BLK_DEV_INITRD - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); -#endif + struct cpio_data cp; - if (!(cp.data && cp.size)) { - cont.size = -1; - return -EINVAL; - } + cp = find_microcode_in_initrd(ucode_path, false); + if (!(cp.data && cp.size)) + return -EINVAL; - eq_id = find_proper_container(cp.data, cp.size, &cont); - if (!eq_id) { - cont.size = -1; - return -EINVAL; - } + desc.cpuid_1_eax = cpuid_1_eax; - } else - return -EINVAL; - } + scan_containers(cp.data, cp.size, &desc); + if (!desc.mc) + return -EINVAL; - ret = load_microcode_amd(smp_processor_id(), fam, cont.data, cont.size); + ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), + desc.data, desc.size); if (ret != UCODE_OK) - retval = -EINVAL; - - /* - * This will be freed any msec now, stash patches for the current - * family and switch to patch cache for cpu hotplug, etc later. - */ - cont.data = NULL; - cont.size = 0; + return -EINVAL; - return retval; + return 0; } void reload_ucode_amd(void) { struct microcode_amd *mc; - u32 rev; - - /* - * early==false because this is a syscore ->resume path and by - * that time paging is long enabled. - */ - if (check_current_patch_level(&rev, false)) - return; + u32 rev, dummy; mc = (struct microcode_amd *)amd_ucode_patch; if (!mc) return; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev < mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { ucode_new_rev = mc->hdr.patch_id; @@ -631,60 +494,13 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -/* - * Those patch levels cannot be updated to newer ones and thus should be final. - */ -static u32 final_levels[] = { - 0x01000098, - 0x0100009f, - 0x010000af, - 0, /* T-101 terminator */ -}; - -/* - * Check the current patch level on this CPU. - * - * @rev: Use it to return the patch level. It is set to 0 in the case of - * error. - * - * Returns: - * - true: if update should stop - * - false: otherwise - */ -bool check_current_patch_level(u32 *rev, bool early) -{ - u32 lvl, dummy, i; - bool ret = false; - u32 *levels; - - native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); - - if (IS_ENABLED(CONFIG_X86_32) && early) - levels = (u32 *)__pa_nodebug(&final_levels); - else - levels = final_levels; - - for (i = 0; levels[i]; i++) { - if (lvl == levels[i]) { - lvl = 0; - ret = true; - break; - } - } - - if (rev) - *rev = lvl; - - return ret; -} - static int apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; struct ucode_cpu_info *uci; struct ucode_patch *p; - u32 rev; + u32 rev, dummy; BUG_ON(raw_smp_processor_id() != cpu); @@ -697,8 +513,7 @@ static int apply_microcode_amd(int cpu) mc_amd = p->data; uci->mc = p->data; - if (check_current_patch_level(&rev, false)) - return -1; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 73102d932760..b4a4cd39b358 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -66,19 +66,50 @@ static DEFINE_MUTEX(microcode_mutex); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -/* - * Operations that are run on a target cpu: - */ - struct cpu_info_ctx { struct cpu_signature *cpu_sig; int err; }; +/* + * Those patch levels cannot be updated to newer ones and thus should be final. + */ +static u32 final_levels[] = { + 0x01000098, + 0x0100009f, + 0x010000af, + 0, /* T-101 terminator */ +}; + +/* + * Check the current patch level on this CPU. + * + * Returns: + * - true: if update should stop + * - false: otherwise + */ +static bool amd_check_current_patch_level(void) +{ + u32 lvl, dummy, i; + u32 *levels; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); + + if (IS_ENABLED(CONFIG_X86_32)) + levels = (u32 *)__pa_nodebug(&final_levels); + else + levels = final_levels; + + for (i = 0; levels[i]; i++) { + if (lvl == levels[i]) + return true; + } + return false; +} + static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; - u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); @@ -94,18 +125,19 @@ static bool __init check_loader_disabled_bsp(void) if (!have_cpuid_p()) return *res; - a = 1; - c = 0; - native_cpuid(&a, &b, &c, &d); - /* * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not * completely accurate as xen pv guests don't see that CPUID bit set but * that's good enough as they don't land on the BSP path anyway. */ - if (c & BIT(31)) + if (native_cpuid_ecx(1) & BIT(31)) return *res; + if (x86_cpuid_vendor() == X86_VENDOR_AMD) { + if (amd_check_current_patch_level()) + return *res; + } + if (cmdline_find_option_bool(cmdline, option) <= 0) *res = false; @@ -133,23 +165,21 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name) void __init load_ucode_bsp(void) { - int vendor; - unsigned int family; + unsigned int cpuid_1_eax; if (check_loader_disabled_bsp()) return; - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); + cpuid_1_eax = native_cpuid_eax(1); - switch (vendor) { + switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: - if (family >= 6) + if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_bsp(); break; case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); + if (x86_family(cpuid_1_eax) >= 0x10) + load_ucode_amd_bsp(cpuid_1_eax); break; default: break; @@ -167,22 +197,21 @@ static bool check_loader_disabled_ap(void) void load_ucode_ap(void) { - int vendor, family; + unsigned int cpuid_1_eax; if (check_loader_disabled_ap()) return; - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); + cpuid_1_eax = native_cpuid_eax(1); - switch (vendor) { + switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: - if (family >= 6) + if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_ap(); break; case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_ap(family); + if (x86_family(cpuid_1_eax) >= 0x10) + load_ucode_amd_ap(cpuid_1_eax); break; default: break; @@ -201,7 +230,7 @@ static int __init save_microcode_in_initrd(void) break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - ret = save_microcode_in_initrd_amd(c->x86); + return save_microcode_in_initrd_amd(cpuid_eax(1)); break; default: break; diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 34178564be2a..c1ea5b999839 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include <linux/kernel.h> +#include <linux/sched.h> #include <linux/mm.h> #include <asm/cpufeature.h> #include <asm/msr.h> @@ -14,6 +15,8 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } + + clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 90e8dde3ec26..b2bbad6ebe4d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -580,24 +580,19 @@ static void __init update_e820_saved(void) } #define MAX_GAP_END 0x100000000ull /* - * Search for a gap in the e820 memory space from start_addr to end_addr. + * Search for a gap in the e820 memory space from 0 to MAX_GAP_END. */ -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr) +static int __init e820_search_gap(unsigned long *gapstart, + unsigned long *gapsize) { - unsigned long long last; + unsigned long long last = MAX_GAP_END; int i = e820->nr_map; int found = 0; - last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; - while (--i >= 0) { unsigned long long start = e820->map[i].addr; unsigned long long end = start + e820->map[i].size; - if (end < start_addr) - continue; - /* * Since "last" is at most 4GB, we know we'll * fit in 32 bits if this condition is true @@ -628,18 +623,19 @@ __init void e820_setup_gap(void) unsigned long gapstart, gapsize; int found; - gapstart = 0x10000000; gapsize = 0x400000; - found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); + found = e820_search_gap(&gapstart, &gapsize); -#ifdef CONFIG_X86_64 if (!found) { +#ifdef CONFIG_X86_64 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; printk(KERN_ERR "e820: cannot find a gap in the 32bit address range\n" "e820: PCI devices with unassigned 32bit BARs may break!\n"); - } +#else + gapstart = 0x10000000; #endif + } /* * e820_reserve_resources_late protect stolen RAM already diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index de7234401275..e1114f070c2d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,7 +9,6 @@ #include <asm/fpu/regset.h> #include <asm/fpu/signal.h> #include <asm/fpu/types.h> -#include <asm/fpu/xstate.h> #include <asm/traps.h> #include <linux/hardirq.h> @@ -179,14 +178,8 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, fpu_kernel_xstate_size); - /* - * XRSTORS requires that this bit is set in xcomp_bv, or - * it will #GP. Make sure it is replaced after the memset(). - */ if (static_cpu_has(X86_FEATURE_XSAVES)) - state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | - xfeatures_mask; - + fpstate_init_xstate(&state->xsave); if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 60dece392b3a..19bdd1bf8160 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -48,13 +48,7 @@ void fpu__init_cpu(void) fpu__init_cpu_xstate(); } -/* - * The earliest FPU detection code. - * - * Set the X86_FEATURE_FPU CPU-capability bit based on - * trying to execute an actual sequence of FPU instructions: - */ -static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +static bool fpu__probe_without_cpuid(void) { unsigned long cr0; u16 fsw, fcw; @@ -65,18 +59,25 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) cr0 &= ~(X86_CR0_TS | X86_CR0_EM); write_cr0(cr0); - if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" : "+m" (fsw), "+m" (fcw)); + + pr_info("x86/fpu: Probing for FPU: FSW=0x%04hx FCW=0x%04hx\n", fsw, fcw); - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +{ + if (!boot_cpu_has(X86_FEATURE_CPUID) && + !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { + if (fpu__probe_without_cpuid()) + setup_force_cpu_cap(X86_FEATURE_FPU); else - clear_cpu_cap(c, X86_FEATURE_FPU); + setup_clear_cpu_cap(X86_FEATURE_FPU); } #ifndef CONFIG_MATH_EMULATION - if (!boot_cpu_has(X86_FEATURE_FPU)) { + if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_FPU)) { pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); for (;;) asm volatile("hlt"); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1d7770447b3e..c24ac1efb12d 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -78,6 +78,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_PKU); setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); + setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); } /* @@ -705,8 +706,14 @@ void __init fpu__init_system_xstate(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; + if (!boot_cpu_has(X86_FEATURE_FPU)) { + pr_info("x86/fpu: No FPU detected\n"); + return; + } + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { - pr_info("x86/fpu: Legacy x87 FPU detected.\n"); + pr_info("x86/fpu: x87 FPU will use %s\n", + boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); return; } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index f16c55bfc090..e5fb436a6548 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void) start_kernel(); } + +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. Note the upper half of each PMD or PTE are + * always zero at this stage. + */ +void __init mk_early_pgtbl_32(void) +{ +#ifdef __pa +#undef __pa +#endif +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + pte_t pte, *ptep; + int i; + unsigned long *ptr; + /* Enough space to fit pagetables for the low memory linear map */ + const unsigned long limit = __pa(_end) + + (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); +#ifdef CONFIG_X86_PAE + pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd); +#define SET_PL2(pl2, val) { (pl2).pmd = (val); } +#else + pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table); +#define SET_PL2(pl2, val) { (pl2).pgd = (val); } +#endif + + ptep = (pte_t *)__pa(__brk_base); + pte.pte = PTE_IDENT_ATTR; + + while ((pte.pte & PTE_PFN_MASK) < limit) { + + SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR); + *pl2p = pl2; +#ifndef CONFIG_X86_PAE + /* Kernel PDE entry */ + *(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2; +#endif + for (i = 0; i < PTRS_PER_PTE; i++) { + *ptep = pte; + pte.pte += PAGE_SIZE; + ptep++; + } + + pl2p++; + } + + ptr = (unsigned long *)__pa(&max_pfn_mapped); + /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */ + *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; + + ptr = (unsigned long *)__pa(&_brk_end); + *ptr = (unsigned long)ptep + PAGE_OFFSET; +} + diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 4e8577d03372..1f85ee8f9439 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -24,6 +24,7 @@ #include <asm/nops.h> #include <asm/bootparam.h> #include <asm/export.h> +#include <asm/pgtable_32.h> /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -41,44 +42,10 @@ #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id -/* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries - */ - -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif #define SIZEOF_PTREGS 17*4 /* - * Number of possible pages in the lowmem region. - * - * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a - * gas warning about overflowing shift count when gas has been compiled - * with only a host target support using a 32-bit type for internal - * representation. - */ -LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT - -/* * Worst-case size of the kernel mapping we need to make: * a relocatable kernel can live anywhere in lowmem, so we need to be able * to map all of lowmem. @@ -160,90 +127,15 @@ ENTRY(startup_32) call load_ucode_bsp #endif -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - */ -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) + /* Create early pagetables. */ + call mk_early_pgtbl_32 /* Do early initialization of the fixmap area */ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#ifdef CONFIG_X86_PAE +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#else movl %eax,pa(initial_page_table+0xffc) #endif @@ -666,6 +558,7 @@ ENTRY(setup_once_ref) __PAGE_ALIGNED_BSS .align PAGE_SIZE #ifdef CONFIG_X86_PAE +.globl initial_pg_pmd initial_pg_pmd: .fill 1024*KPMDS,4,0 #else diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index cb9c1ed1d391..f73f475d0573 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -132,10 +132,8 @@ int sched_set_itmt_support(void) sysctl_sched_itmt_enabled = 1; - if (sysctl_sched_itmt_enabled) { - x86_topology_update = true; - rebuild_sched_domains(); - } + x86_topology_update = true; + rebuild_sched_domains(); mutex_unlock(&itmt_update_mutex); diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index fc25f698d792..c37bd0f39c70 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -32,8 +32,7 @@ static void bug_at(unsigned char *ip, int line) * Something went wrong. Crash the box, as something could be * corrupting the kernel. */ - pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n", - ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line); + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line); BUG(); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb3509338ae0..520b8dfe1640 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -745,7 +745,7 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) * will be the real return address, and all the rest will * point to kretprobe_trampoline. */ - hlist_for_each_entry_safe(ri, tmp, head, hlist) { + hlist_for_each_entry(ri, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 36bc66416021..099fcba4981d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -620,18 +620,4 @@ void __init kvm_spinlock_init(void) } } -static __init int kvm_spinlock_init_jump(void) -{ - if (!kvm_para_available()) - return 0; - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - printk(KERN_INFO "KVM setup paravirtual spinlock\n"); - - return 0; -} -early_initcall(kvm_spinlock_init_jump); - #endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2a5cafdf8808..542710b99f52 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -107,12 +107,12 @@ static inline void kvm_sched_clock_init(bool stable) { if (!stable) { pv_time_ops.sched_clock = kvm_clock_read; + clear_sched_clock_stable(); return; } kvm_sched_clock_offset = kvm_clock_read(); pv_time_ops.sched_clock = kvm_sched_clock_read; - set_sched_clock_stable(); printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", kvm_sched_clock_offset); diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 6d4bf812af45..6259327f3454 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -42,6 +42,3 @@ struct pv_lock_ops pv_lock_ops = { #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); - -struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; -EXPORT_SYMBOL(paravirt_ticketlocks_enabled); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 5d400ba1349d..d47517941bbc 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -296,7 +296,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, /* were we called with bad_dma_address? */ badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { + if (unlikely(dma_addr < badend)) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4cfba947d774..69780edf0dde 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1176,6 +1176,20 @@ void __init setup_arch(char **cmdline_p) /* Allocate bigger log buffer */ setup_log_buf(1); + if (efi_enabled(EFI_BOOT)) { + switch (boot_params.secure_boot) { + case efi_secureboot_mode_disabled: + pr_info("Secure boot disabled\n"); + break; + case efi_secureboot_mode_enabled: + pr_info("Secure boot enabled\n"); + break; + default: + pr_info("Secure boot could not be determined\n"); + break; + } + } + reserve_initrd(); acpi_table_upgrade(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bf0c6d049080..1dc86ee60a03 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -563,11 +563,9 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_disable(); cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); cond_local_irq_disable(regs); - preempt_enable_no_resched(); debug_stack_usage_dec(); exit: ist_exit(regs); @@ -742,14 +740,12 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_disable(); cond_local_irq_enable(regs); if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); cond_local_irq_disable(regs); - preempt_enable_no_resched(); debug_stack_usage_dec(); goto exit; } @@ -769,7 +765,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); cond_local_irq_disable(regs); - preempt_enable_no_resched(); debug_stack_usage_dec(); exit: diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 37e7cf544e51..2724dc82f992 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1107,6 +1107,16 @@ static u64 read_tsc(struct clocksource *cs) return (u64)rdtsc_ordered(); } +static void tsc_cs_mark_unstable(struct clocksource *cs) +{ + if (tsc_unstable) + return; + tsc_unstable = 1; + clear_sched_clock_stable(); + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to clocksource watchdog\n"); +} + /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ @@ -1119,6 +1129,7 @@ static struct clocksource clocksource_tsc = { CLOCK_SOURCE_MUST_VERIFY, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, + .mark_unstable = tsc_cs_mark_unstable, }; void mark_tsc_unstable(char *reason) diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index ec5d7545e6dc..0442d98367ae 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -160,11 +160,12 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) static void mark_screen_rdonly(struct mm_struct *mm) { + struct vm_area_struct *vma; + spinlock_t *ptl; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - spinlock_t *ptl; int i; down_write(&mm->mmap_sem); @@ -177,7 +178,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) pmd = pmd_offset(pud, 0xA0000); if (pmd_trans_huge(*pmd)) { - struct vm_area_struct *vma = find_vma(mm, 0xA0000); + vma = find_vma(mm, 0xA0000); split_huge_pmd(vma, pmd, 0xA0000); } if (pmd_none_or_clear_bad(pmd)) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 1572c35b4f1a..2ecd7dab4631 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -964,10 +964,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, /* Calculate cpu time spent by current task in 100ns units */ static u64 current_task_runtime_100ns(void) { - cputime_t utime, stime; + u64 utime, stime; task_cputime_adjusted(current, &utime, &stime); - return div_u64(cputime_to_nsecs(utime + stime), 100); + + return div_u64(utime + stime, 100); } static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 073d1f1a620b..a8e91ae89fb3 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -156,13 +156,13 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { + unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; int d0; xloops *= 4; asm("mull %%edx" :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4))); + :"1" (xloops), "0" (lpj * (HZ / 4))); __delay(++xloops); } diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 6aad870e8962..04ca8764f0c0 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -19,8 +19,7 @@ #include <linux/efi.h> #include <linux/efi-bgrt.h> -struct acpi_table_bgrt *bgrt_tab; -void *__initdata bgrt_image; +struct acpi_table_bgrt bgrt_tab; size_t __initdata bgrt_image_size; struct bmp_header { @@ -28,66 +27,58 @@ struct bmp_header { u32 size; } __packed; -void __init efi_bgrt_init(void) +void __init efi_bgrt_init(struct acpi_table_header *table) { - acpi_status status; void *image; struct bmp_header bmp_header; + struct acpi_table_bgrt *bgrt = &bgrt_tab; if (acpi_disabled) return; - status = acpi_get_table("BGRT", 0, - (struct acpi_table_header **)&bgrt_tab); - if (ACPI_FAILURE(status)) - return; - - if (bgrt_tab->header.length < sizeof(*bgrt_tab)) { + if (table->length < sizeof(bgrt_tab)) { pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n", - bgrt_tab->header.length, sizeof(*bgrt_tab)); + table->length, sizeof(bgrt_tab)); return; } - if (bgrt_tab->version != 1) { + *bgrt = *(struct acpi_table_bgrt *)table; + if (bgrt->version != 1) { pr_notice("Ignoring BGRT: invalid version %u (expected 1)\n", - bgrt_tab->version); - return; + bgrt->version); + goto out; } - if (bgrt_tab->status & 0xfe) { + if (bgrt->status & 0xfe) { pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n", - bgrt_tab->status); - return; + bgrt->status); + goto out; } - if (bgrt_tab->image_type != 0) { + if (bgrt->image_type != 0) { pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", - bgrt_tab->image_type); - return; + bgrt->image_type); + goto out; } - if (!bgrt_tab->image_address) { + if (!bgrt->image_address) { pr_notice("Ignoring BGRT: null image address\n"); - return; + goto out; } - image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); + image = early_memremap(bgrt->image_address, sizeof(bmp_header)); if (!image) { pr_notice("Ignoring BGRT: failed to map image header memory\n"); - return; + goto out; } memcpy(&bmp_header, image, sizeof(bmp_header)); - memunmap(image); + early_memunmap(image, sizeof(bmp_header)); if (bmp_header.id != 0x4d42) { pr_notice("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", bmp_header.id); - return; + goto out; } bgrt_image_size = bmp_header.size; + efi_mem_reserve(bgrt->image_address, bgrt_image_size); - bgrt_image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); - if (!bgrt_image) { - pr_notice("Ignoring BGRT: failed to map image memory\n"); - bgrt_image = NULL; - return; - } - - efi_mem_reserve(bgrt_tab->image_address, bgrt_image_size); + return; +out: + memset(bgrt, 0, sizeof(bgrt_tab)); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 274dfc481849..565dff3c9a12 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -542,11 +542,6 @@ void __init efi_init(void) efi_print_memmap(); } -void __init efi_late_init(void) -{ - efi_bgrt_init(); -} - void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; @@ -960,6 +955,11 @@ static void __init __efi_enter_virtual_mode(void) return; } + if (efi_enabled(EFI_DBG)) { + pr_info("EFI runtime memory map:\n"); + efi_print_memmap(); + } + BUG_ON(!efi.systab); if (efi_setup_page_tables(pa, 1 << pg_shift)) { diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2f25a363068c..a4695da42d77 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -414,10 +414,44 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len) efi_setup = phys_addr + sizeof(struct setup_data); } -void __init efi_runtime_update_mappings(void) +static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf) { unsigned long pfn; pgd_t *pgd = efi_pgd; + int err1, err2; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf); + if (err1) { + pr_err("Error while updating 1:1 mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + err2 = kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf); + if (err2) { + pr_err("Error while updating VA mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + return err1 || err2; +} + +static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md) +{ + unsigned long pf = 0; + + if (md->attribute & EFI_MEMORY_XP) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO)) + pf |= _PAGE_RW; + + return efi_update_mappings(md, pf); +} + +void __init efi_runtime_update_mappings(void) +{ efi_memory_desc_t *md; if (efi_enabled(EFI_OLD_MEMMAP)) { @@ -426,6 +460,24 @@ void __init efi_runtime_update_mappings(void) return; } + /* + * Use the EFI Memory Attribute Table for mapping permissions if it + * exists, since it is intended to supersede EFI_PROPERTIES_TABLE. + */ + if (efi_enabled(EFI_MEM_ATTR)) { + efi_memattr_apply_permissions(NULL, efi_update_mem_attr); + return; + } + + /* + * EFI_MEMORY_ATTRIBUTES_TABLE is intended to replace + * EFI_PROPERTIES_TABLE. So, use EFI_PROPERTIES_TABLE to update + * permissions only if EFI_MEMORY_ATTRIBUTES_TABLE is not + * published by the firmware. Even if we find a buggy implementation of + * EFI_MEMORY_ATTRIBUTES_TABLE, don't fall back to + * EFI_PROPERTIES_TABLE, because of the same reason. + */ + if (!efi_enabled(EFI_NX_PE_DATA)) return; @@ -446,15 +498,7 @@ void __init efi_runtime_update_mappings(void) (md->type != EFI_RUNTIME_SERVICES_CODE)) pf |= _PAGE_RW; - /* Update the 1:1 mapping */ - pfn = md->phys_addr >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf)) - pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", - md->phys_addr, md->virt_addr); - - if (kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf)) - pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", - md->phys_addr, md->virt_addr); + efi_update_mappings(md, pf); } } diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig index d957d5f21a86..0bc60a308730 100644 --- a/arch/x86/ras/Kconfig +++ b/arch/x86/ras/Kconfig @@ -1,6 +1,6 @@ config MCE_AMD_INJ tristate "Simple MCE injection interface for AMD processors" - depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB + depends on RAS && X86_MCE && DEBUG_FS && AMD_NB default n help This is a simple debugfs interface to inject MCEs and test different diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e8a9ea7d7a21..25a7c4302ce7 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -141,25 +141,6 @@ void __init xen_init_spinlocks(void) pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } -/* - * While the jump_label init code needs to happend _after_ the jump labels are - * enabled and before SMP is started. Hence we use pre-SMP initcall level - * init. We cannot do it in xen_init_spinlocks as that is done before - * jump labels are activated. - */ -static __init int xen_init_spinlocks_jump(void) -{ - if (!xen_pvspin) - return 0; - - if (!xen_domain()) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - return 0; -} -early_initcall(xen_init_spinlocks_jump); - static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index b7fbaa56b51a..9e9760b20be5 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += bitsperlong.h generic-y += bug.h generic-y += clkdev.h -generic-y += cputime.h generic-y += div64.h generic-y += dma-contiguous.h generic-y += emergency-restart.h |