diff options
Diffstat (limited to 'kernel')
33 files changed, 804 insertions, 162 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 42d17f730780..5b30f8baaf02 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1668,7 +1668,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (err < 0) goto free_prog; - prog->aux->load_time = ktime_get_boot_ns(); + prog->aux->load_time = ktime_get_boottime_ns(); err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name); if (err) goto free_prog; diff --git a/kernel/cpu.c b/kernel/cpu.c index 077fde6fb953..e84c0873559e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -522,7 +522,7 @@ static int bringup_wait_for_ap(unsigned int cpu) /* * SMT soft disabling on X86 requires to bring the CPU out of the * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The - * CPU marked itself as booted_once in cpu_notify_starting() so the + * CPU marked itself as booted_once in notify_cpu_starting() so the * cpu_smt_allowed() check will now return false if this is not the * primary sibling. */ @@ -1221,6 +1221,13 @@ int freeze_secondary_cpus(int primary) for_each_online_cpu(cpu) { if (cpu == primary) continue; + + if (pm_wakeup_pending()) { + pr_info("Wakeup pending. Abort CPU freeze\n"); + error = -EBUSY; + break; + } + trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1, CPUHP_OFFLINE); trace_suspend_resume(TPS("CPU_OFF"), cpu, false); @@ -1964,6 +1971,9 @@ static ssize_t write_cpuhp_fail(struct device *dev, if (ret) return ret; + if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) + return -EINVAL; + /* * Cannot fail STARTING/DYING callbacks. */ @@ -2339,6 +2349,9 @@ static int __init mitigations_parse_cmdline(char *arg) cpu_mitigations = CPU_MITIGATIONS_AUTO; else if (!strcmp(arg, "auto,nosmt")) cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT; + else + pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", + arg); return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index abbd4b3b96c2..29e5f7880a4b 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg) if (perf_event_check_period(event, value)) return -EINVAL; + if (!event->attr.freq && (value & (1ULL << 63))) + return -EINVAL; + event_function_call(event, __perf_event_period, &value); return 0; @@ -5923,7 +5926,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user, if (user_mode(regs)) { regs_user->abi = perf_reg_abi(current); regs_user->regs = regs; - } else if (current->mm) { + } else if (!(current->flags & PF_KTHREAD)) { perf_get_regs_user(regs_user, regs, regs_user_copy); } else { regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE; @@ -10033,6 +10036,12 @@ void perf_pmu_unregister(struct pmu *pmu) } EXPORT_SYMBOL_GPL(perf_pmu_unregister); +static inline bool has_extended_regs(struct perf_event *event) +{ + return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) || + (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK); +} + static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) { struct perf_event_context *ctx = NULL; @@ -10064,12 +10073,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event) perf_event_ctx_unlock(event->group_leader, ctx); if (!ret) { + if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) && + has_extended_regs(event)) + ret = -EOPNOTSUPP; + if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE && - event_has_any_exclude_flag(event)) { - if (event->destroy) - event->destroy(event); + event_has_any_exclude_flag(event)) ret = -EINVAL; - } + + if (ret && event->destroy) + event->destroy(event); } if (ret) @@ -10680,11 +10693,11 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id) break; case CLOCK_BOOTTIME: - event->clock = &ktime_get_boot_ns; + event->clock = &ktime_get_boottime_ns; break; case CLOCK_TAI: - event->clock = &ktime_get_tai_ns; + event->clock = &ktime_get_clocktai_ns; break; default: diff --git a/kernel/fork.c b/kernel/fork.c index 399aca51ff75..a83ef7243ccc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -248,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node) struct page *page = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); - return page ? page_address(page) : NULL; + if (likely(page)) { + tsk->stack = page_address(page); + return tsk->stack; + } + return NULL; #endif } @@ -2032,6 +2036,7 @@ static __latent_entropy struct task_struct *copy_process( O_RDWR | O_CLOEXEC); if (IS_ERR(pidfile)) { put_unused_fd(pidfd); + retval = PTR_ERR(pidfile); goto bad_fork_free_pid; } get_pid(pid); /* held by pidfile now */ @@ -2112,7 +2117,7 @@ static __latent_entropy struct task_struct *copy_process( */ p->start_time = ktime_get_ns(); - p->real_start_time = ktime_get_boot_ns(); + p->real_start_time = ktime_get_boottime_ns(); /* * Make it visible to the rest of the system, but dont wake it up yet. diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile index ff6e352e3a6c..b4f53717d143 100644 --- a/kernel/irq/Makefile +++ b/kernel/irq/Makefile @@ -2,6 +2,9 @@ obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o obj-$(CONFIG_IRQ_TIMINGS) += timings.o +ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y) + CFLAGS_timings.o += -DDEBUG +endif obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c index f18cd5aa33e8..4352b08ae48d 100644 --- a/kernel/irq/affinity.c +++ b/kernel/irq/affinity.c @@ -94,8 +94,7 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, return nodes; } -static int __irq_build_affinity_masks(const struct irq_affinity *affd, - unsigned int startvec, +static int __irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, unsigned int firstvec, cpumask_var_t *node_to_cpumask, @@ -171,8 +170,7 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd, * 1) spread present CPU on these vectors * 2) spread other possible CPUs on these vectors */ -static int irq_build_affinity_masks(const struct irq_affinity *affd, - unsigned int startvec, unsigned int numvecs, +static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, unsigned int firstvec, struct irq_affinity_desc *masks) { @@ -197,7 +195,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, build_node_to_cpumask(node_to_cpumask); /* Spread on present CPUs starting from affd->pre_vectors */ - nr_present = __irq_build_affinity_masks(affd, curvec, numvecs, + nr_present = __irq_build_affinity_masks(curvec, numvecs, firstvec, node_to_cpumask, cpu_present_mask, nmsk, masks); @@ -212,7 +210,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd, else curvec = firstvec + nr_present; cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); - nr_others = __irq_build_affinity_masks(affd, curvec, numvecs, + nr_others = __irq_build_affinity_masks(curvec, numvecs, firstvec, node_to_cpumask, npresmsk, nmsk, masks); put_online_cpus(); @@ -295,7 +293,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) unsigned int this_vecs = affd->set_size[i]; int ret; - ret = irq_build_affinity_masks(affd, curvec, this_vecs, + ret = irq_build_affinity_masks(curvec, this_vecs, curvec, masks); if (ret) { kfree(masks); diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c index 16cbf6beb276..ae60cae24e9a 100644 --- a/kernel/irq/autoprobe.c +++ b/kernel/irq/autoprobe.c @@ -90,7 +90,7 @@ unsigned long probe_irq_on(void) /* It triggered already - consider it spurious. */ if (!(desc->istate & IRQS_WAITING)) { desc->istate &= ~IRQS_AUTODETECT; - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); } else if (i < 32) mask |= 1 << i; @@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned long val) mask |= 1 << i; desc->istate &= ~IRQS_AUTODETECT; - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); } raw_spin_unlock_irq(&desc->lock); } @@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val) nr_of_irqs++; } desc->istate &= ~IRQS_AUTODETECT; - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); } raw_spin_unlock_irq(&desc->lock); } diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index 29d6c7d070b4..b76703b2c0af 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc) } irq_state_clr_started(desc); } +} + + +void irq_shutdown_and_deactivate(struct irq_desc *desc) +{ + irq_shutdown(desc); /* * This must be called even if the interrupt was never started up, * because the activation can happen before the interrupt is @@ -748,6 +754,8 @@ void handle_fasteoi_nmi(struct irq_desc *desc) unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; + __kstat_incr_irqs_this_cpu(desc); + trace_irq_handler_entry(irq, action); /* * NMIs cannot be shared, there is only one action. @@ -962,6 +970,8 @@ void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc) unsigned int irq = irq_desc_get_irq(desc); irqreturn_t res; + __kstat_incr_irqs_this_cpu(desc); + trace_irq_handler_entry(irq, action); res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id)); trace_irq_handler_exit(irq, action, res); diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c index 5b1072e394b2..6c7ca2e983a5 100644 --- a/kernel/irq/cpuhotplug.c +++ b/kernel/irq/cpuhotplug.c @@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_desc *desc) */ if (irqd_affinity_is_managed(d)) { irqd_set_managed_shutdown(d); - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); return false; } affinity = cpu_online_mask; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index 70c3053bc1f6..3924fbe829d4 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -82,6 +82,7 @@ extern int irq_activate_and_startup(struct irq_desc *desc, bool resend); extern int irq_startup(struct irq_desc *desc, bool resend, bool force); extern void irq_shutdown(struct irq_desc *desc); +extern void irq_shutdown_and_deactivate(struct irq_desc *desc); extern void irq_enable(struct irq_desc *desc); extern void irq_disable(struct irq_desc *desc); extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu); @@ -96,6 +97,10 @@ static inline void irq_mark_irq(unsigned int irq) { } extern void irq_mark_irq(unsigned int irq); #endif +extern int __irq_get_irqchip_state(struct irq_data *data, + enum irqchip_irq_state which, + bool *state); + extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr); irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags); @@ -354,6 +359,16 @@ static inline int irq_timing_decode(u64 value, u64 *timestamp) return value & U16_MAX; } +static __always_inline void irq_timings_push(u64 ts, int irq) +{ + struct irq_timings *timings = this_cpu_ptr(&irq_timings); + + timings->values[timings->count & IRQ_TIMINGS_MASK] = + irq_timing_encode(ts, irq); + + timings->count++; +} + /* * The function record_irq_time is only called in one place in the * interrupts handler. We want this function always inline so the code @@ -367,15 +382,8 @@ static __always_inline void record_irq_time(struct irq_desc *desc) if (!static_branch_likely(&irq_timing_enabled)) return; - if (desc->istate & IRQS_TIMINGS) { - struct irq_timings *timings = this_cpu_ptr(&irq_timings); - - timings->values[timings->count & IRQ_TIMINGS_MASK] = - irq_timing_encode(local_clock(), - irq_desc_get_irq(desc)); - - timings->count++; - } + if (desc->istate & IRQS_TIMINGS) + irq_timings_push(local_clock(), irq_desc_get_irq(desc)); } #else static inline void irq_remove_timings(struct irq_desc *desc) {} diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index c52b737ab8e3..9484e88dabc2 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq, * @hwirq: The HW irq number to convert to a logical one * @regs: Register file coming from the low-level handling code * + * This function must be called from an NMI context. + * * Returns: 0 on success, or -EINVAL if conversion has failed */ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, @@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, unsigned int irq; int ret = 0; - nmi_enter(); + /* + * NMI context needs to be setup earlier in order to deal with tracing. + */ + WARN_ON(!in_nmi()); irq = irq_find_mapping(domain, hwirq); @@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq, else ret = -EINVAL; - nmi_exit(); set_irq_regs(old_regs); return ret; } @@ -946,6 +950,11 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu) *per_cpu_ptr(desc->kstat_irqs, cpu) : 0; } +static bool irq_is_nmi(struct irq_desc *desc) +{ + return desc->istate & IRQS_NMI; +} + /** * kstat_irqs - Get the statistics for an interrupt * @irq: The interrupt number @@ -963,7 +972,8 @@ unsigned int kstat_irqs(unsigned int irq) if (!desc || !desc->kstat_irqs) return 0; if (!irq_settings_is_per_cpu_devid(desc) && - !irq_settings_is_per_cpu(desc)) + !irq_settings_is_per_cpu(desc) && + !irq_is_nmi(desc)) return desc->tot_count; for_each_possible_cpu(cpu) diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index a453e229f99c..3078d0e48bba 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); * @ops: domain callbacks * @host_data: Controller private data pointer * - * Allocates and initialize and irq_domain structure. + * Allocates and initializes an irq_domain structure. * Returns pointer to IRQ domain, or NULL on failure. */ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, @@ -139,7 +139,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size), GFP_KERNEL, of_node_to_nid(of_node)); - if (WARN_ON(!domain)) + if (!domain) return NULL; if (fwnode && is_fwnode_irqchip(fwnode)) { diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 78f3ddeb7fe4..e8f7f179bf77 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/random.h> #include <linux/interrupt.h> +#include <linux/irqdomain.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/sched/rt.h> @@ -34,8 +35,9 @@ static int __init setup_forced_irqthreads(char *arg) early_param("threadirqs", setup_forced_irqthreads); #endif -static void __synchronize_hardirq(struct irq_desc *desc) +static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip) { + struct irq_data *irqd = irq_desc_get_irq_data(desc); bool inprogress; do { @@ -51,6 +53,20 @@ static void __synchronize_hardirq(struct irq_desc *desc) /* Ok, that indicated we're done: double-check carefully. */ raw_spin_lock_irqsave(&desc->lock, flags); inprogress = irqd_irq_inprogress(&desc->irq_data); + + /* + * If requested and supported, check at the chip whether it + * is in flight at the hardware level, i.e. already pending + * in a CPU and waiting for service and acknowledge. + */ + if (!inprogress && sync_chip) { + /* + * Ignore the return code. inprogress is only updated + * when the chip supports it. + */ + __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE, + &inprogress); + } raw_spin_unlock_irqrestore(&desc->lock, flags); /* Oops, that failed? */ @@ -73,13 +89,18 @@ static void __synchronize_hardirq(struct irq_desc *desc) * Returns: false if a threaded handler is active. * * This function may be called - with care - from IRQ context. + * + * It does not check whether there is an interrupt in flight at the + * hardware level, but not serviced yet, as this might deadlock when + * called with interrupts disabled and the target CPU of the interrupt + * is the current CPU. */ bool synchronize_hardirq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); if (desc) { - __synchronize_hardirq(desc); + __synchronize_hardirq(desc, false); return !atomic_read(&desc->threads_active); } @@ -95,14 +116,19 @@ EXPORT_SYMBOL(synchronize_hardirq); * to complete before returning. If you use this function while * holding a resource the IRQ handler may need you will deadlock. * - * This function may be called - with care - from IRQ context. + * Can only be called from preemptible code as it might sleep when + * an interrupt thread is associated to @irq. + * + * It optionally makes sure (when the irq chip supports that method) + * that the interrupt is not pending in any CPU and waiting for + * service. */ void synchronize_irq(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); if (desc) { - __synchronize_hardirq(desc); + __synchronize_hardirq(desc, true); /* * We made sure that no hardirq handler is * running. Now verify that no threaded handlers are @@ -1699,6 +1725,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) /* If this was the last handler, shut down the IRQ line: */ if (!desc->action) { irq_settings_clr_disable_unlazy(desc); + /* Only shutdown. Deactivate after synchronize_hardirq() */ irq_shutdown(desc); } @@ -1727,8 +1754,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) unregister_handler_proc(irq, action); - /* Make sure it's not being used on another CPU: */ - synchronize_hardirq(irq); + /* + * Make sure it's not being used on another CPU and if the chip + * supports it also make sure that there is no (not yet serviced) + * interrupt in flight at the hardware level. + */ + __synchronize_hardirq(desc, true); #ifdef CONFIG_DEBUG_SHIRQ /* @@ -1768,6 +1799,14 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) * require it to deallocate resources over the slow bus. */ chip_bus_lock(desc); + /* + * There is no interrupt on the fly anymore. Deactivate it + * completely. + */ + raw_spin_lock_irqsave(&desc->lock, flags); + irq_domain_deactivate_irq(&desc->irq_data); + raw_spin_unlock_irqrestore(&desc->lock, flags); + irq_release_resources(desc); chip_bus_sync_unlock(desc); irq_remove_timings(desc); @@ -1855,7 +1894,7 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc) } irq_settings_clr_disable_unlazy(desc); - irq_shutdown(desc); + irq_shutdown_and_deactivate(desc); irq_release_resources(desc); @@ -2578,6 +2617,28 @@ out: irq_put_desc_unlock(desc, flags); } +int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which, + bool *state) +{ + struct irq_chip *chip; + int err = -EINVAL; + + do { + chip = irq_data_get_irq_chip(data); + if (chip->irq_get_irqchip_state) + break; +#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + data = data->parent_data; +#else + data = NULL; +#endif + } while (data); + + if (data) + err = chip->irq_get_irqchip_state(data, which, state); + return err; +} + /** * irq_get_irqchip_state - returns the irqchip state of a interrupt. * @irq: Interrupt line that is forwarded to a VM @@ -2596,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, { struct irq_desc *desc; struct irq_data *data; - struct irq_chip *chip; unsigned long flags; int err = -EINVAL; @@ -2606,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which, data = irq_desc_get_irq_data(desc); - do { - chip = irq_data_get_irq_chip(data); - if (chip->irq_get_irqchip_state) - break; -#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY - data = data->parent_data; -#else - data = NULL; -#endif - } while (data); - - if (data) - err = chip->irq_get_irqchip_state(data, which, state); + err = __irq_get_irqchip_state(data, which, state); irq_put_desc_busunlock(desc, flags); return err; diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 90c735da15d0..e960d7ce7bcc 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c @@ -1,10 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> +#define pr_fmt(fmt) "irq_timings: " fmt #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/static_key.h> +#include <linux/init.h> #include <linux/interrupt.h> #include <linux/idr.h> #include <linux/irq.h> @@ -261,12 +263,29 @@ void irq_timings_disable(void) #define EMA_ALPHA_VAL 64 #define EMA_ALPHA_SHIFT 7 -#define PREDICTION_PERIOD_MIN 2 +#define PREDICTION_PERIOD_MIN 3 #define PREDICTION_PERIOD_MAX 5 #define PREDICTION_FACTOR 4 #define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ #define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ +/* + * Number of elements in the circular buffer: If it happens it was + * flushed before, then the number of elements could be smaller than + * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is + * used as we wrapped. The index begins from zero when we did not + * wrap. That could be done in a nicer way with the proper circular + * array structure type but with the cost of extra computation in the + * interrupt handler hot path. We choose efficiency. + */ +#define for_each_irqts(i, irqts) \ + for (i = irqts->count < IRQ_TIMINGS_SIZE ? \ + 0 : irqts->count & IRQ_TIMINGS_MASK, \ + irqts->count = min(IRQ_TIMINGS_SIZE, \ + irqts->count); \ + irqts->count > 0; irqts->count--, \ + i = (i + 1) & IRQ_TIMINGS_MASK) + struct irqt_stat { u64 last_ts; u64 ema_time[PREDICTION_BUFFER_SIZE]; @@ -297,7 +316,16 @@ static u64 irq_timings_ema_new(u64 value, u64 ema_old) static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) { - int i; + int period; + + /* + * Move the beginning pointer to the end minus the max period x 3. + * We are at the point we can begin searching the pattern + */ + buffer = &buffer[len - (period_max * 3)]; + + /* Adjust the length to the maximum allowed period x 3 */ + len = period_max * 3; /* * The buffer contains the suite of intervals, in a ilog2 @@ -306,21 +334,45 @@ static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) * period beginning at the end of the buffer. We do that for * each suffix. */ - for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) { + for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) { - int *begin = &buffer[len - (i * 3)]; - int *ptr = begin; + /* + * The first comparison always succeed because the + * suffix is deduced from the first n-period bytes of + * the buffer and we compare the initial suffix with + * itself, so we can skip the first iteration. + */ + int idx = period; + size_t size = period; /* * We look if the suite with period 'i' repeat * itself. If it is truncated at the end, as it * repeats we can use the period to find out the next - * element. + * element with the modulo. */ - while (!memcmp(ptr, begin, i * sizeof(*ptr))) { - ptr += i; - if (ptr >= &buffer[len]) - return begin[((i * 3) % i)]; + while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) { + + /* + * Move the index in a period basis + */ + idx += size; + + /* + * If this condition is reached, all previous + * memcmp were successful, so the period is + * found. + */ + if (idx == len) + return buffer[len % period]; + + /* + * If the remaining elements to compare are + * smaller than the period, readjust the size + * of the comparison for the last iteration. + */ + if (len - idx < period) + size = len - idx; } } @@ -380,11 +432,43 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) return irqs->last_ts + irqs->ema_time[index]; } +static __always_inline int irq_timings_interval_index(u64 interval) +{ + /* + * The PREDICTION_FACTOR increase the interval size for the + * array of exponential average. + */ + u64 interval_us = (interval >> 10) / PREDICTION_FACTOR; + + return likely(interval_us) ? ilog2(interval_us) : 0; +} + +static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs, + u64 interval) +{ + int index; + + /* + * Get the index in the ema table for this interrupt. + */ + index = irq_timings_interval_index(interval); + + /* + * Store the index as an element of the pattern in another + * circular array. + */ + irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; + + irqs->ema_time[index] = irq_timings_ema_new(interval, + irqs->ema_time[index]); + + irqs->count++; +} + static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) { u64 old_ts = irqs->last_ts; u64 interval; - int index; /* * The timestamps are absolute time values, we need to compute @@ -415,24 +499,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) return; } - /* - * Get the index in the ema table for this interrupt. The - * PREDICTION_FACTOR increase the interval size for the array - * of exponential average. - */ - index = likely(interval) ? - ilog2((interval >> 10) / PREDICTION_FACTOR) : 0; - - /* - * Store the index as an element of the pattern in another - * circular array. - */ - irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; - - irqs->ema_time[index] = irq_timings_ema_new(interval, - irqs->ema_time[index]); - - irqs->count++; + __irq_timings_store(irq, irqs, interval); } /** @@ -493,11 +560,7 @@ u64 irq_timings_next_event(u64 now) * model while decrementing the counter because we consume the * data from our circular buffer. */ - - i = (irqts->count & IRQ_TIMINGS_MASK) - 1; - irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); - - for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { + for_each_irqts(i, irqts) { irq = irq_timing_decode(irqts->values[i], &ts); s = idr_find(&irqt_stats, irq); if (s) @@ -564,3 +627,325 @@ int irq_timings_alloc(int irq) return 0; } + +#ifdef CONFIG_TEST_IRQ_TIMINGS +struct timings_intervals { + u64 *intervals; + size_t count; +}; + +/* + * Intervals are given in nanosecond base + */ +static u64 intervals0[] __initdata = { + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, 500000, + 10000, 50000, 200000, +}; + +static u64 intervals1[] __initdata = { + 223947000, 1240000, 1384000, 1386000, 1386000, + 217416000, 1236000, 1384000, 1386000, 1387000, + 214719000, 1241000, 1386000, 1387000, 1384000, + 213696000, 1234000, 1384000, 1386000, 1388000, + 219904000, 1240000, 1385000, 1389000, 1385000, + 212240000, 1240000, 1386000, 1386000, 1386000, + 214415000, 1236000, 1384000, 1386000, 1387000, + 214276000, 1234000, +}; + +static u64 intervals2[] __initdata = { + 4000, 3000, 5000, 100000, + 3000, 3000, 5000, 117000, + 4000, 4000, 5000, 112000, + 4000, 3000, 4000, 110000, + 3000, 5000, 3000, 117000, + 4000, 4000, 5000, 112000, + 4000, 3000, 4000, 110000, + 3000, 4000, 5000, 112000, + 4000, +}; + +static u64 intervals3[] __initdata = { + 1385000, 212240000, 1240000, + 1386000, 214415000, 1236000, + 1384000, 214276000, 1234000, + 1386000, 214415000, 1236000, + 1385000, 212240000, 1240000, + 1386000, 214415000, 1236000, + 1384000, 214276000, 1234000, + 1386000, 214415000, 1236000, + 1385000, 212240000, 1240000, +}; + +static u64 intervals4[] __initdata = { + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, 50000, 10000, 50000, + 10000, +}; + +static struct timings_intervals tis[] __initdata = { + { intervals0, ARRAY_SIZE(intervals0) }, + { intervals1, ARRAY_SIZE(intervals1) }, + { intervals2, ARRAY_SIZE(intervals2) }, + { intervals3, ARRAY_SIZE(intervals3) }, + { intervals4, ARRAY_SIZE(intervals4) }, +}; + +static int __init irq_timings_test_next_index(struct timings_intervals *ti) +{ + int _buffer[IRQ_TIMINGS_SIZE]; + int buffer[IRQ_TIMINGS_SIZE]; + int index, start, i, count, period_max; + + count = ti->count - 1; + + period_max = count > (3 * PREDICTION_PERIOD_MAX) ? + PREDICTION_PERIOD_MAX : count / 3; + + /* + * Inject all values except the last one which will be used + * to compare with the next index result. + */ + pr_debug("index suite: "); + + for (i = 0; i < count; i++) { + index = irq_timings_interval_index(ti->intervals[i]); + _buffer[i & IRQ_TIMINGS_MASK] = index; + pr_cont("%d ", index); + } + + start = count < IRQ_TIMINGS_SIZE ? 0 : + count & IRQ_TIMINGS_MASK; + + count = min_t(int, count, IRQ_TIMINGS_SIZE); + + for (i = 0; i < count; i++) { + int index = (start + i) & IRQ_TIMINGS_MASK; + buffer[i] = _buffer[index]; + } + + index = irq_timings_next_event_index(buffer, count, period_max); + i = irq_timings_interval_index(ti->intervals[ti->count - 1]); + + if (index != i) { + pr_err("Expected (%d) and computed (%d) next indexes differ\n", + i, index); + return -EINVAL; + } + + return 0; +} + +static int __init irq_timings_next_index_selftest(void) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(tis); i++) { + + pr_info("---> Injecting intervals number #%d (count=%zd)\n", + i, tis[i].count); + + ret = irq_timings_test_next_index(&tis[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_test_irqs(struct timings_intervals *ti) +{ + struct irqt_stat __percpu *s; + struct irqt_stat *irqs; + int i, index, ret, irq = 0xACE5; + + ret = irq_timings_alloc(irq); + if (ret) { + pr_err("Failed to allocate irq timings\n"); + return ret; + } + + s = idr_find(&irqt_stats, irq); + if (!s) { + ret = -EIDRM; + goto out; + } + + irqs = this_cpu_ptr(s); + + for (i = 0; i < ti->count; i++) { + + index = irq_timings_interval_index(ti->intervals[i]); + pr_debug("%d: interval=%llu ema_index=%d\n", + i, ti->intervals[i], index); + + __irq_timings_store(irq, irqs, ti->intervals[i]); + if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) { + pr_err("Failed to store in the circular buffer\n"); + goto out; + } + } + + if (irqs->count != ti->count) { + pr_err("Count differs\n"); + goto out; + } + + ret = 0; +out: + irq_timings_free(irq); + + return ret; +} + +static int __init irq_timings_irqs_selftest(void) +{ + int i, ret; + + for (i = 0; i < ARRAY_SIZE(tis); i++) { + pr_info("---> Injecting intervals number #%d (count=%zd)\n", + i, tis[i].count); + ret = irq_timings_test_irqs(&tis[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_test_irqts(struct irq_timings *irqts, + unsigned count) +{ + int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0; + int i, irq, oirq = 0xBEEF; + u64 ots = 0xDEAD, ts; + + /* + * Fill the circular buffer by using the dedicated function. + */ + for (i = 0; i < count; i++) { + pr_debug("%d: index=%d, ts=%llX irq=%X\n", + i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i); + + irq_timings_push(ots + i, oirq + i); + } + + /* + * Compute the first elements values after the index wrapped + * up or not. + */ + ots += start; + oirq += start; + + /* + * Test the circular buffer count is correct. + */ + pr_debug("---> Checking timings array count (%d) is right\n", count); + if (WARN_ON(irqts->count != count)) + return -EINVAL; + + /* + * Test the macro allowing to browse all the irqts. + */ + pr_debug("---> Checking the for_each_irqts() macro\n"); + for_each_irqts(i, irqts) { + + irq = irq_timing_decode(irqts->values[i], &ts); + + pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n", + i, ts, ots, irq, oirq); + + if (WARN_ON(ts != ots || irq != oirq)) + return -EINVAL; + + ots++; oirq++; + } + + /* + * The circular buffer should have be flushed when browsed + * with for_each_irqts + */ + pr_debug("---> Checking timings array is empty after browsing it\n"); + if (WARN_ON(irqts->count)) + return -EINVAL; + + return 0; +} + +static int __init irq_timings_irqts_selftest(void) +{ + struct irq_timings *irqts = this_cpu_ptr(&irq_timings); + int i, ret; + + /* + * Test the circular buffer with different number of + * elements. The purpose is to test at the limits (empty, half + * full, full, wrapped with the cursor at the boundaries, + * wrapped several times, etc ... + */ + int count[] = { 0, + IRQ_TIMINGS_SIZE >> 1, + IRQ_TIMINGS_SIZE, + IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1), + 2 * IRQ_TIMINGS_SIZE, + (2 * IRQ_TIMINGS_SIZE) + 3, + }; + + for (i = 0; i < ARRAY_SIZE(count); i++) { + + pr_info("---> Checking the timings with %d/%d values\n", + count[i], IRQ_TIMINGS_SIZE); + + ret = irq_timings_test_irqts(irqts, count[i]); + if (ret) + break; + } + + return ret; +} + +static int __init irq_timings_selftest(void) +{ + int ret; + + pr_info("------------------- selftest start -----------------\n"); + + /* + * At this point, we don't except any subsystem to use the irq + * timings but us, so it should not be enabled. + */ + if (static_branch_unlikely(&irq_timing_enabled)) { + pr_warn("irq timings already initialized, skipping selftest\n"); + return 0; + } + + ret = irq_timings_irqts_selftest(); + if (ret) + goto out; + + ret = irq_timings_irqs_selftest(); + if (ret) + goto out; + + ret = irq_timings_next_index_selftest(); +out: + pr_info("---------- selftest end with %s -----------\n", + ret ? "failure" : "success"); + + return ret; +} +early_initcall(irq_timings_selftest); +#endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 9505101ed2bc..096211299c07 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -493,6 +493,9 @@ int suspend_devices_and_enter(suspend_state_t state) pm_suspend_target_state = state; + if (state == PM_SUSPEND_TO_IDLE) + pm_set_suspend_no_platform(); + error = platform_suspend_begin(state); if (error) goto Close; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8456b6e2205f..83a531cea2f3 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -79,9 +79,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent, */ static void ptrace_link(struct task_struct *child, struct task_struct *new_parent) { - rcu_read_lock(); - __ptrace_link(child, new_parent, __task_cred(new_parent)); - rcu_read_unlock(); + __ptrace_link(child, new_parent, current_cred()); } /** @@ -118,6 +116,9 @@ void __ptrace_unlink(struct task_struct *child) BUG_ON(!child->ptrace); clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); +#ifdef TIF_SYSCALL_EMU + clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); +#endif child->parent = child->real_parent; list_del_init(&child->ptrace_entry); diff --git a/kernel/signal.c b/kernel/signal.c index d622eac9d169..edf8915ddd54 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask); * This is useful for syscalls such as ppoll, pselect, io_pgetevents and * epoll_pwait where a new sigmask is passed in from userland for the syscalls. */ -void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) +void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved, + bool interrupted) { if (!usigmask) @@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved) * Restoring sigmask here can lead to delivering signals that the above * syscalls are intended to block because of the sigmask passed in. */ - if (signal_pending(current)) { + if (interrupted) { current->saved_sigmask = *sigsaved; set_restore_sigmask(); return; diff --git a/kernel/smp.c b/kernel/smp.c index d155374632eb..616d4d114847 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -34,7 +34,7 @@ struct call_function_data { cpumask_var_t cpumask_ipi; }; -static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data); +static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data); static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); @@ -487,13 +487,11 @@ EXPORT_SYMBOL(smp_call_function_many); * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ -int smp_call_function(smp_call_func_t func, void *info, int wait) +void smp_call_function(smp_call_func_t func, void *info, int wait) { preempt_disable(); smp_call_function_many(cpu_online_mask, func, info, wait); preempt_enable(); - - return 0; } EXPORT_SYMBOL(smp_call_function); @@ -594,18 +592,16 @@ void __init smp_init(void) * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead * of local_irq_disable/enable(). */ -int on_each_cpu(void (*func) (void *info), void *info, int wait) +void on_each_cpu(void (*func) (void *info), void *info, int wait) { unsigned long flags; - int ret = 0; preempt_disable(); - ret = smp_call_function(func, info, wait); + smp_call_function(func, info, wait); local_irq_save(flags); func(info); local_irq_restore(flags); preempt_enable(); - return ret; } EXPORT_SYMBOL(on_each_cpu); diff --git a/kernel/softirq.c b/kernel/softirq.c index a6b81c6b6bff..0427a86743a4 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -649,7 +649,7 @@ static int takeover_tasklets(unsigned int cpu) /* Find end, append list for that CPU. */ if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) { *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head; - this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); + __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail); per_cpu(tasklet_vec, cpu).head = NULL; per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head; } diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 2b5a6754646f..b4f83f7bdf86 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -177,12 +177,18 @@ static void ack_state(struct multi_stop_data *msdata) set_state(msdata, msdata->state + 1); } +void __weak stop_machine_yield(const struct cpumask *cpumask) +{ + cpu_relax(); +} + /* This is the cpu_stop function which stops the CPU. */ static int multi_cpu_stop(void *data) { struct multi_stop_data *msdata = data; enum multi_stop_state curstate = MULTI_STOP_NONE; int cpu = smp_processor_id(), err = 0; + const struct cpumask *cpumask; unsigned long flags; bool is_active; @@ -192,15 +198,18 @@ static int multi_cpu_stop(void *data) */ local_save_flags(flags); - if (!msdata->active_cpus) - is_active = cpu == cpumask_first(cpu_online_mask); - else - is_active = cpumask_test_cpu(cpu, msdata->active_cpus); + if (!msdata->active_cpus) { + cpumask = cpu_online_mask; + is_active = cpu == cpumask_first(cpumask); + } else { + cpumask = msdata->active_cpus; + is_active = cpumask_test_cpu(cpu, cpumask); + } /* Simple state machine */ do { /* Chill out and ensure we re-read multi_stop_state. */ - cpu_relax_yield(); + stop_machine_yield(cpumask); if (msdata->state != curstate) { curstate = msdata->state; switch (curstate) { diff --git a/kernel/time/Makefile b/kernel/time/Makefile index f1e46f338a9c..1867044800bb 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -16,5 +16,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o +obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 0519a8805aab..57518efc3810 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -233,7 +233,6 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining); /** * alarmtimer_suspend - Suspend time callback * @dev: unused - * @state: unused * * When we are going into suspend, we look through the bases * to see which is the soonest timer to expire. We then diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 3bcc19ceb073..fff5f64981c6 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -105,12 +105,12 @@ static DEFINE_SPINLOCK(watchdog_lock); static int watchdog_running; static atomic_t watchdog_reset_pending; -static void inline clocksource_watchdog_lock(unsigned long *flags) +static inline void clocksource_watchdog_lock(unsigned long *flags) { spin_lock_irqsave(&watchdog_lock, *flags); } -static void inline clocksource_watchdog_unlock(unsigned long *flags) +static inline void clocksource_watchdog_unlock(unsigned long *flags) { spin_unlock_irqrestore(&watchdog_lock, *flags); } diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 41dfff23c1f9..5ee77f1a8a92 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -30,7 +30,6 @@ #include <linux/syscalls.h> #include <linux/interrupt.h> #include <linux/tick.h> -#include <linux/seq_file.h> #include <linux/err.h> #include <linux/debugobjects.h> #include <linux/sched/signal.h> @@ -1115,9 +1114,10 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); * @timer: hrtimer to stop * * Returns: - * 0 when the timer was not active - * 1 when the timer was active - * -1 when the timer is currently executing the callback function and + * + * * 0 when the timer was not active + * * 1 when the timer was active + * * -1 when the timer is currently executing the callback function and * cannot be stopped */ int hrtimer_try_to_cancel(struct hrtimer *timer) diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 8de4f789dc1b..65eb796610dc 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -43,6 +43,7 @@ static u64 tick_length_base; #define MAX_TICKADJ 500LL /* usecs */ #define MAX_TICKADJ_SCALED \ (((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ) +#define MAX_TAI_OFFSET 100000 /* * phase-lock loop variables @@ -691,7 +692,8 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, time_constant = max(time_constant, 0l); } - if (txc->modes & ADJ_TAI && txc->constant >= 0) + if (txc->modes & ADJ_TAI && + txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET) *time_tai = txc->constant; if (txc->modes & ADJ_OFFSET) diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 29176635991f..d7f2d91acdac 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -980,23 +980,16 @@ retry_delete: */ static void itimer_delete(struct k_itimer *timer) { - unsigned long flags; - retry_delete: - spin_lock_irqsave(&timer->it_lock, flags); + spin_lock_irq(&timer->it_lock); if (timer_delete_hook(timer) == TIMER_RETRY) { - unlock_timer(timer, flags); + spin_unlock_irq(&timer->it_lock); goto retry_delete; } list_del(&timer->list); - /* - * This keeps any tasks waiting on the spin lock from thinking - * they got something (see the lock code above). - */ - timer->it_signal = NULL; - unlock_timer(timer, flags); + spin_unlock_irq(&timer->it_lock); release_posix_timer(timer, IT_ID_SET); } diff --git a/kernel/time/time.c b/kernel/time/time.c index 7f7d6914ddd5..5c54ca632d08 100644 --- a/kernel/time/time.c +++ b/kernel/time/time.c @@ -251,6 +251,10 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv, if (tv) { if (compat_get_timeval(&user_tv, tv)) return -EFAULT; + + if (!timeval_valid(&user_tv)) + return -EINVAL; + new_ts.tv_sec = user_tv.tv_sec; new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC; } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 44b726bab4bd..d911c8470149 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -819,7 +819,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs) } while (read_seqcount_retry(&tk_core.seq, seq)); - return base + nsecs; + return ktime_add_ns(base, nsecs); } EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset); diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index 98ba50dcb1b2..acb326f5f50a 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -282,23 +282,6 @@ static inline void timer_list_header(struct seq_file *m, u64 now) SEQ_printf(m, "\n"); } -static int timer_list_show(struct seq_file *m, void *v) -{ - struct timer_list_iter *iter = v; - - if (iter->cpu == -1 && !iter->second_pass) - timer_list_header(m, iter->now); - else if (!iter->second_pass) - print_cpu(m, iter->cpu, iter->now); -#ifdef CONFIG_GENERIC_CLOCKEVENTS - else if (iter->cpu == -1 && iter->second_pass) - timer_list_show_tickdevices_header(m); - else - print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu); -#endif - return 0; -} - void sysrq_timer_list_show(void) { u64 now = ktime_to_ns(ktime_get()); @@ -317,6 +300,24 @@ void sysrq_timer_list_show(void) return; } +#ifdef CONFIG_PROC_FS +static int timer_list_show(struct seq_file *m, void *v) +{ + struct timer_list_iter *iter = v; + + if (iter->cpu == -1 && !iter->second_pass) + timer_list_header(m, iter->now); + else if (!iter->second_pass) + print_cpu(m, iter->cpu, iter->now); +#ifdef CONFIG_GENERIC_CLOCKEVENTS + else if (iter->cpu == -1 && iter->second_pass) + timer_list_show_tickdevices_header(m); + else + print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu); +#endif + return 0; +} + static void *move_iter(struct timer_list_iter *iter, loff_t offset) { for (; offset; offset--) { @@ -376,3 +377,4 @@ static int __init init_timer_list_procfs(void) return 0; } __initcall(init_timer_list_procfs); +#endif diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c new file mode 100644 index 000000000000..a80893180826 --- /dev/null +++ b/kernel/time/vsyscall.c @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2019 ARM Ltd. + * + * Generic implementation of update_vsyscall and update_vsyscall_tz. + * + * Based on the x86 specific implementation. + */ + +#include <linux/hrtimer.h> +#include <linux/timekeeper_internal.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> +#include <vdso/vsyscall.h> + +static inline void update_vdso_data(struct vdso_data *vdata, + struct timekeeper *tk) +{ + struct vdso_timestamp *vdso_ts; + u64 nsec; + + vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last; + vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask; + vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult; + vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift; + vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last; + vdata[CS_RAW].mask = tk->tkr_raw.mask; + vdata[CS_RAW].mult = tk->tkr_raw.mult; + vdata[CS_RAW].shift = tk->tkr_raw.shift; + + /* CLOCK_REALTIME */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME]; + vdso_ts->sec = tk->xtime_sec; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec; + + /* CLOCK_MONOTONIC */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC]; + vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + + nsec = tk->tkr_mono.xtime_nsec; + nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift); + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift); + vdso_ts->sec++; + } + vdso_ts->nsec = nsec; + + /* CLOCK_MONOTONIC_RAW */ + vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW]; + vdso_ts->sec = tk->raw_sec; + vdso_ts->nsec = tk->tkr_raw.xtime_nsec; + + /* CLOCK_BOOTTIME */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME]; + vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + nsec = tk->tkr_mono.xtime_nsec; + nsec += ((u64)(tk->wall_to_monotonic.tv_nsec + + ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift); + while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift); + vdso_ts->sec++; + } + vdso_ts->nsec = nsec; + + /* CLOCK_TAI */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI]; + vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec; + + /* + * Read without the seqlock held by clock_getres(). + * Note: No need to have a second copy. + */ + WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution); +} + +void update_vsyscall(struct timekeeper *tk) +{ + struct vdso_data *vdata = __arch_get_k_vdso_data(); + struct vdso_timestamp *vdso_ts; + u64 nsec; + + if (__arch_update_vdso_data()) { + /* + * Some architectures might want to skip the update of the + * data page. + */ + return; + } + + /* copy vsyscall data */ + vdso_write_begin(vdata); + + vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk); + vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk); + + /* CLOCK_REALTIME_COARSE */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE]; + vdso_ts->sec = tk->xtime_sec; + vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + + /* CLOCK_MONOTONIC_COARSE */ + vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE]; + vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec; + nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift; + nsec = nsec + tk->wall_to_monotonic.tv_nsec; + while (nsec >= NSEC_PER_SEC) { + nsec = nsec - NSEC_PER_SEC; + vdso_ts->sec++; + } + vdso_ts->nsec = nsec; + + if (__arch_use_vsyscall(vdata)) + update_vdso_data(vdata, tk); + + __arch_update_vsyscall(vdata, tk); + + vdso_write_end(vdata); + + __arch_sync_vdso_data(vdata); +} + +void update_vsyscall_tz(void) +{ + struct vdso_data *vdata = __arch_get_k_vdso_data(); + + if (__arch_use_vsyscall(vdata)) { + vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest; + vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime; + } + + __arch_sync_vdso_data(vdata); +} diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 38277af44f5c..576c41644e77 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -34,7 +34,6 @@ #include <linux/hash.h> #include <linux/rcupdate.h> #include <linux/kprobes.h> -#include <linux/memory.h> #include <trace/events/sched.h> @@ -2611,12 +2610,10 @@ static void ftrace_run_update_code(int command) { int ret; - mutex_lock(&text_mutex); - ret = ftrace_arch_code_modify_prepare(); FTRACE_WARN_ON(ret); if (ret) - goto out_unlock; + return; /* * By default we use stop_machine() to modify the code. @@ -2628,9 +2625,6 @@ static void ftrace_run_update_code(int command) ret = ftrace_arch_code_modify_post_process(); FTRACE_WARN_ON(ret); - -out_unlock: - mutex_unlock(&text_mutex); } static void ftrace_run_modify_code(struct ftrace_ops *ops, int command, @@ -5784,7 +5778,6 @@ void ftrace_module_enable(struct module *mod) struct ftrace_page *pg; mutex_lock(&ftrace_lock); - mutex_lock(&text_mutex); if (ftrace_disabled) goto out_unlock; @@ -5846,7 +5839,6 @@ void ftrace_module_enable(struct module *mod) ftrace_arch_code_modify_post_process(); out_unlock: - mutex_unlock(&text_mutex); mutex_unlock(&ftrace_lock); process_cached_mods(mod->name); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 83e08b78dbee..c3aabb576fe5 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt, break; } #endif - if (!tr->allocated_snapshot) { + if (tr->allocated_snapshot) + ret = resize_buffer_duplicate_size(&tr->max_buffer, + &tr->trace_buffer, iter->cpu_file); + else ret = tracing_alloc_snapshot_instance(tr); - if (ret < 0) - break; - } + if (ret < 0) + break; local_irq_disable(); /* Now, we're going to swap */ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) @@ -7126,12 +7128,24 @@ static ssize_t tracing_err_log_write(struct file *file, return count; } +static int tracing_err_log_release(struct inode *inode, struct file *file) +{ + struct trace_array *tr = inode->i_private; + + trace_array_put(tr); + + if (file->f_mode & FMODE_READ) + seq_release(inode, file); + + return 0; +} + static const struct file_operations tracing_err_log_fops = { .open = tracing_err_log_open, .write = tracing_err_log_write, .read = seq_read, .llseek = seq_lseek, - .release = tracing_release_generic_tr, + .release = tracing_err_log_release, }; static int tracing_buffers_open(struct inode *inode, struct file *filp) diff --git a/kernel/up.c b/kernel/up.c index 483c9962c999..862b460ab97a 100644 --- a/kernel/up.c +++ b/kernel/up.c @@ -35,14 +35,13 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd) } EXPORT_SYMBOL(smp_call_function_single_async); -int on_each_cpu(smp_call_func_t func, void *info, int wait) +void on_each_cpu(smp_call_func_t func, void *info, int wait) { unsigned long flags; local_irq_save(flags); func(info); local_irq_restore(flags); - return 0; } EXPORT_SYMBOL(on_each_cpu); |