summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/bpf/syscall.c2
-rw-r--r--kernel/cpu.c15
-rw-r--r--kernel/events/core.c27
-rw-r--r--kernel/fork.c9
-rw-r--r--kernel/irq/Makefile3
-rw-r--r--kernel/irq/affinity.c12
-rw-r--r--kernel/irq/autoprobe.c6
-rw-r--r--kernel/irq/chip.c10
-rw-r--r--kernel/irq/cpuhotplug.c2
-rw-r--r--kernel/irq/internals.h26
-rw-r--r--kernel/irq/irqdesc.c16
-rw-r--r--kernel/irq/irqdomain.c4
-rw-r--r--kernel/irq/manage.c90
-rw-r--r--kernel/irq/timings.c453
-rw-r--r--kernel/power/suspend.c3
-rw-r--r--kernel/ptrace.c7
-rw-r--r--kernel/signal.c5
-rw-r--r--kernel/smp.c12
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c19
-rw-r--r--kernel/time/Makefile1
-rw-r--r--kernel/time/alarmtimer.c1
-rw-r--r--kernel/time/clocksource.c4
-rw-r--r--kernel/time/hrtimer.c8
-rw-r--r--kernel/time/ntp.c4
-rw-r--r--kernel/time/posix-timers.c13
-rw-r--r--kernel/time/time.c4
-rw-r--r--kernel/time/timekeeping.c2
-rw-r--r--kernel/time/timer_list.c36
-rw-r--r--kernel/time/vsyscall.c133
-rw-r--r--kernel/trace/ftrace.c10
-rw-r--r--kernel/trace/trace.c24
-rw-r--r--kernel/up.c3
33 files changed, 804 insertions, 162 deletions
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 42d17f730780..5b30f8baaf02 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1668,7 +1668,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
if (err < 0)
goto free_prog;
- prog->aux->load_time = ktime_get_boot_ns();
+ prog->aux->load_time = ktime_get_boottime_ns();
err = bpf_obj_name_cpy(prog->aux->name, attr->prog_name);
if (err)
goto free_prog;
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 077fde6fb953..e84c0873559e 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -522,7 +522,7 @@ static int bringup_wait_for_ap(unsigned int cpu)
/*
* SMT soft disabling on X86 requires to bring the CPU out of the
* BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The
- * CPU marked itself as booted_once in cpu_notify_starting() so the
+ * CPU marked itself as booted_once in notify_cpu_starting() so the
* cpu_smt_allowed() check will now return false if this is not the
* primary sibling.
*/
@@ -1221,6 +1221,13 @@ int freeze_secondary_cpus(int primary)
for_each_online_cpu(cpu) {
if (cpu == primary)
continue;
+
+ if (pm_wakeup_pending()) {
+ pr_info("Wakeup pending. Abort CPU freeze\n");
+ error = -EBUSY;
+ break;
+ }
+
trace_suspend_resume(TPS("CPU_OFF"), cpu, true);
error = _cpu_down(cpu, 1, CPUHP_OFFLINE);
trace_suspend_resume(TPS("CPU_OFF"), cpu, false);
@@ -1964,6 +1971,9 @@ static ssize_t write_cpuhp_fail(struct device *dev,
if (ret)
return ret;
+ if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE)
+ return -EINVAL;
+
/*
* Cannot fail STARTING/DYING callbacks.
*/
@@ -2339,6 +2349,9 @@ static int __init mitigations_parse_cmdline(char *arg)
cpu_mitigations = CPU_MITIGATIONS_AUTO;
else if (!strcmp(arg, "auto,nosmt"))
cpu_mitigations = CPU_MITIGATIONS_AUTO_NOSMT;
+ else
+ pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n",
+ arg);
return 0;
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index abbd4b3b96c2..29e5f7880a4b 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5005,6 +5005,9 @@ static int perf_event_period(struct perf_event *event, u64 __user *arg)
if (perf_event_check_period(event, value))
return -EINVAL;
+ if (!event->attr.freq && (value & (1ULL << 63)))
+ return -EINVAL;
+
event_function_call(event, __perf_event_period, &value);
return 0;
@@ -5923,7 +5926,7 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
if (user_mode(regs)) {
regs_user->abi = perf_reg_abi(current);
regs_user->regs = regs;
- } else if (current->mm) {
+ } else if (!(current->flags & PF_KTHREAD)) {
perf_get_regs_user(regs_user, regs, regs_user_copy);
} else {
regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
@@ -10033,6 +10036,12 @@ void perf_pmu_unregister(struct pmu *pmu)
}
EXPORT_SYMBOL_GPL(perf_pmu_unregister);
+static inline bool has_extended_regs(struct perf_event *event)
+{
+ return (event->attr.sample_regs_user & PERF_REG_EXTENDED_MASK) ||
+ (event->attr.sample_regs_intr & PERF_REG_EXTENDED_MASK);
+}
+
static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
{
struct perf_event_context *ctx = NULL;
@@ -10064,12 +10073,16 @@ static int perf_try_init_event(struct pmu *pmu, struct perf_event *event)
perf_event_ctx_unlock(event->group_leader, ctx);
if (!ret) {
+ if (!(pmu->capabilities & PERF_PMU_CAP_EXTENDED_REGS) &&
+ has_extended_regs(event))
+ ret = -EOPNOTSUPP;
+
if (pmu->capabilities & PERF_PMU_CAP_NO_EXCLUDE &&
- event_has_any_exclude_flag(event)) {
- if (event->destroy)
- event->destroy(event);
+ event_has_any_exclude_flag(event))
ret = -EINVAL;
- }
+
+ if (ret && event->destroy)
+ event->destroy(event);
}
if (ret)
@@ -10680,11 +10693,11 @@ static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
break;
case CLOCK_BOOTTIME:
- event->clock = &ktime_get_boot_ns;
+ event->clock = &ktime_get_boottime_ns;
break;
case CLOCK_TAI:
- event->clock = &ktime_get_tai_ns;
+ event->clock = &ktime_get_clocktai_ns;
break;
default:
diff --git a/kernel/fork.c b/kernel/fork.c
index 399aca51ff75..a83ef7243ccc 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -248,7 +248,11 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)
struct page *page = alloc_pages_node(node, THREADINFO_GFP,
THREAD_SIZE_ORDER);
- return page ? page_address(page) : NULL;
+ if (likely(page)) {
+ tsk->stack = page_address(page);
+ return tsk->stack;
+ }
+ return NULL;
#endif
}
@@ -2032,6 +2036,7 @@ static __latent_entropy struct task_struct *copy_process(
O_RDWR | O_CLOEXEC);
if (IS_ERR(pidfile)) {
put_unused_fd(pidfd);
+ retval = PTR_ERR(pidfile);
goto bad_fork_free_pid;
}
get_pid(pid); /* held by pidfile now */
@@ -2112,7 +2117,7 @@ static __latent_entropy struct task_struct *copy_process(
*/
p->start_time = ktime_get_ns();
- p->real_start_time = ktime_get_boot_ns();
+ p->real_start_time = ktime_get_boottime_ns();
/*
* Make it visible to the rest of the system, but dont wake it up yet.
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index ff6e352e3a6c..b4f53717d143 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -2,6 +2,9 @@
obj-y := irqdesc.o handle.o manage.o spurious.o resend.o chip.o dummychip.o devres.o
obj-$(CONFIG_IRQ_TIMINGS) += timings.o
+ifeq ($(CONFIG_TEST_IRQ_TIMINGS),y)
+ CFLAGS_timings.o += -DDEBUG
+endif
obj-$(CONFIG_GENERIC_IRQ_CHIP) += generic-chip.o
obj-$(CONFIG_GENERIC_IRQ_PROBE) += autoprobe.o
obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
diff --git a/kernel/irq/affinity.c b/kernel/irq/affinity.c
index f18cd5aa33e8..4352b08ae48d 100644
--- a/kernel/irq/affinity.c
+++ b/kernel/irq/affinity.c
@@ -94,8 +94,7 @@ static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask,
return nodes;
}
-static int __irq_build_affinity_masks(const struct irq_affinity *affd,
- unsigned int startvec,
+static int __irq_build_affinity_masks(unsigned int startvec,
unsigned int numvecs,
unsigned int firstvec,
cpumask_var_t *node_to_cpumask,
@@ -171,8 +170,7 @@ static int __irq_build_affinity_masks(const struct irq_affinity *affd,
* 1) spread present CPU on these vectors
* 2) spread other possible CPUs on these vectors
*/
-static int irq_build_affinity_masks(const struct irq_affinity *affd,
- unsigned int startvec, unsigned int numvecs,
+static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs,
unsigned int firstvec,
struct irq_affinity_desc *masks)
{
@@ -197,7 +195,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
build_node_to_cpumask(node_to_cpumask);
/* Spread on present CPUs starting from affd->pre_vectors */
- nr_present = __irq_build_affinity_masks(affd, curvec, numvecs,
+ nr_present = __irq_build_affinity_masks(curvec, numvecs,
firstvec, node_to_cpumask,
cpu_present_mask, nmsk, masks);
@@ -212,7 +210,7 @@ static int irq_build_affinity_masks(const struct irq_affinity *affd,
else
curvec = firstvec + nr_present;
cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask);
- nr_others = __irq_build_affinity_masks(affd, curvec, numvecs,
+ nr_others = __irq_build_affinity_masks(curvec, numvecs,
firstvec, node_to_cpumask,
npresmsk, nmsk, masks);
put_online_cpus();
@@ -295,7 +293,7 @@ irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd)
unsigned int this_vecs = affd->set_size[i];
int ret;
- ret = irq_build_affinity_masks(affd, curvec, this_vecs,
+ ret = irq_build_affinity_masks(curvec, this_vecs,
curvec, masks);
if (ret) {
kfree(masks);
diff --git a/kernel/irq/autoprobe.c b/kernel/irq/autoprobe.c
index 16cbf6beb276..ae60cae24e9a 100644
--- a/kernel/irq/autoprobe.c
+++ b/kernel/irq/autoprobe.c
@@ -90,7 +90,7 @@ unsigned long probe_irq_on(void)
/* It triggered already - consider it spurious. */
if (!(desc->istate & IRQS_WAITING)) {
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
} else
if (i < 32)
mask |= 1 << i;
@@ -127,7 +127,7 @@ unsigned int probe_irq_mask(unsigned long val)
mask |= 1 << i;
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
@@ -169,7 +169,7 @@ int probe_irq_off(unsigned long val)
nr_of_irqs++;
}
desc->istate &= ~IRQS_AUTODETECT;
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
}
raw_spin_unlock_irq(&desc->lock);
}
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 29d6c7d070b4..b76703b2c0af 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -314,6 +314,12 @@ void irq_shutdown(struct irq_desc *desc)
}
irq_state_clr_started(desc);
}
+}
+
+
+void irq_shutdown_and_deactivate(struct irq_desc *desc)
+{
+ irq_shutdown(desc);
/*
* This must be called even if the interrupt was never started up,
* because the activation can happen before the interrupt is
@@ -748,6 +754,8 @@ void handle_fasteoi_nmi(struct irq_desc *desc)
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
+ __kstat_incr_irqs_this_cpu(desc);
+
trace_irq_handler_entry(irq, action);
/*
* NMIs cannot be shared, there is only one action.
@@ -962,6 +970,8 @@ void handle_percpu_devid_fasteoi_nmi(struct irq_desc *desc)
unsigned int irq = irq_desc_get_irq(desc);
irqreturn_t res;
+ __kstat_incr_irqs_this_cpu(desc);
+
trace_irq_handler_entry(irq, action);
res = action->handler(irq, raw_cpu_ptr(action->percpu_dev_id));
trace_irq_handler_exit(irq, action, res);
diff --git a/kernel/irq/cpuhotplug.c b/kernel/irq/cpuhotplug.c
index 5b1072e394b2..6c7ca2e983a5 100644
--- a/kernel/irq/cpuhotplug.c
+++ b/kernel/irq/cpuhotplug.c
@@ -116,7 +116,7 @@ static bool migrate_one_irq(struct irq_desc *desc)
*/
if (irqd_affinity_is_managed(d)) {
irqd_set_managed_shutdown(d);
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
return false;
}
affinity = cpu_online_mask;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 70c3053bc1f6..3924fbe829d4 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -82,6 +82,7 @@ extern int irq_activate_and_startup(struct irq_desc *desc, bool resend);
extern int irq_startup(struct irq_desc *desc, bool resend, bool force);
extern void irq_shutdown(struct irq_desc *desc);
+extern void irq_shutdown_and_deactivate(struct irq_desc *desc);
extern void irq_enable(struct irq_desc *desc);
extern void irq_disable(struct irq_desc *desc);
extern void irq_percpu_enable(struct irq_desc *desc, unsigned int cpu);
@@ -96,6 +97,10 @@ static inline void irq_mark_irq(unsigned int irq) { }
extern void irq_mark_irq(unsigned int irq);
#endif
+extern int __irq_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which,
+ bool *state);
+
extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
irqreturn_t __handle_irq_event_percpu(struct irq_desc *desc, unsigned int *flags);
@@ -354,6 +359,16 @@ static inline int irq_timing_decode(u64 value, u64 *timestamp)
return value & U16_MAX;
}
+static __always_inline void irq_timings_push(u64 ts, int irq)
+{
+ struct irq_timings *timings = this_cpu_ptr(&irq_timings);
+
+ timings->values[timings->count & IRQ_TIMINGS_MASK] =
+ irq_timing_encode(ts, irq);
+
+ timings->count++;
+}
+
/*
* The function record_irq_time is only called in one place in the
* interrupts handler. We want this function always inline so the code
@@ -367,15 +382,8 @@ static __always_inline void record_irq_time(struct irq_desc *desc)
if (!static_branch_likely(&irq_timing_enabled))
return;
- if (desc->istate & IRQS_TIMINGS) {
- struct irq_timings *timings = this_cpu_ptr(&irq_timings);
-
- timings->values[timings->count & IRQ_TIMINGS_MASK] =
- irq_timing_encode(local_clock(),
- irq_desc_get_irq(desc));
-
- timings->count++;
- }
+ if (desc->istate & IRQS_TIMINGS)
+ irq_timings_push(local_clock(), irq_desc_get_irq(desc));
}
#else
static inline void irq_remove_timings(struct irq_desc *desc) {}
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index c52b737ab8e3..9484e88dabc2 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -680,6 +680,8 @@ int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
* @hwirq: The HW irq number to convert to a logical one
* @regs: Register file coming from the low-level handling code
*
+ * This function must be called from an NMI context.
+ *
* Returns: 0 on success, or -EINVAL if conversion has failed
*/
int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
@@ -689,7 +691,10 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
unsigned int irq;
int ret = 0;
- nmi_enter();
+ /*
+ * NMI context needs to be setup earlier in order to deal with tracing.
+ */
+ WARN_ON(!in_nmi());
irq = irq_find_mapping(domain, hwirq);
@@ -702,7 +707,6 @@ int handle_domain_nmi(struct irq_domain *domain, unsigned int hwirq,
else
ret = -EINVAL;
- nmi_exit();
set_irq_regs(old_regs);
return ret;
}
@@ -946,6 +950,11 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
}
+static bool irq_is_nmi(struct irq_desc *desc)
+{
+ return desc->istate & IRQS_NMI;
+}
+
/**
* kstat_irqs - Get the statistics for an interrupt
* @irq: The interrupt number
@@ -963,7 +972,8 @@ unsigned int kstat_irqs(unsigned int irq)
if (!desc || !desc->kstat_irqs)
return 0;
if (!irq_settings_is_per_cpu_devid(desc) &&
- !irq_settings_is_per_cpu(desc))
+ !irq_settings_is_per_cpu(desc) &&
+ !irq_is_nmi(desc))
return desc->tot_count;
for_each_possible_cpu(cpu)
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index a453e229f99c..3078d0e48bba 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -123,7 +123,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
* @ops: domain callbacks
* @host_data: Controller private data pointer
*
- * Allocates and initialize and irq_domain structure.
+ * Allocates and initializes an irq_domain structure.
* Returns pointer to IRQ domain, or NULL on failure.
*/
struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
@@ -139,7 +139,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
domain = kzalloc_node(sizeof(*domain) + (sizeof(unsigned int) * size),
GFP_KERNEL, of_node_to_nid(of_node));
- if (WARN_ON(!domain))
+ if (!domain)
return NULL;
if (fwnode && is_fwnode_irqchip(fwnode)) {
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 78f3ddeb7fe4..e8f7f179bf77 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/random.h>
#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/sched/rt.h>
@@ -34,8 +35,9 @@ static int __init setup_forced_irqthreads(char *arg)
early_param("threadirqs", setup_forced_irqthreads);
#endif
-static void __synchronize_hardirq(struct irq_desc *desc)
+static void __synchronize_hardirq(struct irq_desc *desc, bool sync_chip)
{
+ struct irq_data *irqd = irq_desc_get_irq_data(desc);
bool inprogress;
do {
@@ -51,6 +53,20 @@ static void __synchronize_hardirq(struct irq_desc *desc)
/* Ok, that indicated we're done: double-check carefully. */
raw_spin_lock_irqsave(&desc->lock, flags);
inprogress = irqd_irq_inprogress(&desc->irq_data);
+
+ /*
+ * If requested and supported, check at the chip whether it
+ * is in flight at the hardware level, i.e. already pending
+ * in a CPU and waiting for service and acknowledge.
+ */
+ if (!inprogress && sync_chip) {
+ /*
+ * Ignore the return code. inprogress is only updated
+ * when the chip supports it.
+ */
+ __irq_get_irqchip_state(irqd, IRQCHIP_STATE_ACTIVE,
+ &inprogress);
+ }
raw_spin_unlock_irqrestore(&desc->lock, flags);
/* Oops, that failed? */
@@ -73,13 +89,18 @@ static void __synchronize_hardirq(struct irq_desc *desc)
* Returns: false if a threaded handler is active.
*
* This function may be called - with care - from IRQ context.
+ *
+ * It does not check whether there is an interrupt in flight at the
+ * hardware level, but not serviced yet, as this might deadlock when
+ * called with interrupts disabled and the target CPU of the interrupt
+ * is the current CPU.
*/
bool synchronize_hardirq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, false);
return !atomic_read(&desc->threads_active);
}
@@ -95,14 +116,19 @@ EXPORT_SYMBOL(synchronize_hardirq);
* to complete before returning. If you use this function while
* holding a resource the IRQ handler may need you will deadlock.
*
- * This function may be called - with care - from IRQ context.
+ * Can only be called from preemptible code as it might sleep when
+ * an interrupt thread is associated to @irq.
+ *
+ * It optionally makes sure (when the irq chip supports that method)
+ * that the interrupt is not pending in any CPU and waiting for
+ * service.
*/
void synchronize_irq(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
if (desc) {
- __synchronize_hardirq(desc);
+ __synchronize_hardirq(desc, true);
/*
* We made sure that no hardirq handler is
* running. Now verify that no threaded handlers are
@@ -1699,6 +1725,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
/* If this was the last handler, shut down the IRQ line: */
if (!desc->action) {
irq_settings_clr_disable_unlazy(desc);
+ /* Only shutdown. Deactivate after synchronize_hardirq() */
irq_shutdown(desc);
}
@@ -1727,8 +1754,12 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
unregister_handler_proc(irq, action);
- /* Make sure it's not being used on another CPU: */
- synchronize_hardirq(irq);
+ /*
+ * Make sure it's not being used on another CPU and if the chip
+ * supports it also make sure that there is no (not yet serviced)
+ * interrupt in flight at the hardware level.
+ */
+ __synchronize_hardirq(desc, true);
#ifdef CONFIG_DEBUG_SHIRQ
/*
@@ -1768,6 +1799,14 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
* require it to deallocate resources over the slow bus.
*/
chip_bus_lock(desc);
+ /*
+ * There is no interrupt on the fly anymore. Deactivate it
+ * completely.
+ */
+ raw_spin_lock_irqsave(&desc->lock, flags);
+ irq_domain_deactivate_irq(&desc->irq_data);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
+
irq_release_resources(desc);
chip_bus_sync_unlock(desc);
irq_remove_timings(desc);
@@ -1855,7 +1894,7 @@ static const void *__cleanup_nmi(unsigned int irq, struct irq_desc *desc)
}
irq_settings_clr_disable_unlazy(desc);
- irq_shutdown(desc);
+ irq_shutdown_and_deactivate(desc);
irq_release_resources(desc);
@@ -2578,6 +2617,28 @@ out:
irq_put_desc_unlock(desc, flags);
}
+int __irq_get_irqchip_state(struct irq_data *data, enum irqchip_irq_state which,
+ bool *state)
+{
+ struct irq_chip *chip;
+ int err = -EINVAL;
+
+ do {
+ chip = irq_data_get_irq_chip(data);
+ if (chip->irq_get_irqchip_state)
+ break;
+#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
+ data = data->parent_data;
+#else
+ data = NULL;
+#endif
+ } while (data);
+
+ if (data)
+ err = chip->irq_get_irqchip_state(data, which, state);
+ return err;
+}
+
/**
* irq_get_irqchip_state - returns the irqchip state of a interrupt.
* @irq: Interrupt line that is forwarded to a VM
@@ -2596,7 +2657,6 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
{
struct irq_desc *desc;
struct irq_data *data;
- struct irq_chip *chip;
unsigned long flags;
int err = -EINVAL;
@@ -2606,19 +2666,7 @@ int irq_get_irqchip_state(unsigned int irq, enum irqchip_irq_state which,
data = irq_desc_get_irq_data(desc);
- do {
- chip = irq_data_get_irq_chip(data);
- if (chip->irq_get_irqchip_state)
- break;
-#ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
- data = data->parent_data;
-#else
- data = NULL;
-#endif
- } while (data);
-
- if (data)
- err = chip->irq_get_irqchip_state(data, which, state);
+ err = __irq_get_irqchip_state(data, which, state);
irq_put_desc_busunlock(desc, flags);
return err;
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 90c735da15d0..e960d7ce7bcc 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -1,10 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (C) 2016, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org>
+#define pr_fmt(fmt) "irq_timings: " fmt
#include <linux/kernel.h>
#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/static_key.h>
+#include <linux/init.h>
#include <linux/interrupt.h>
#include <linux/idr.h>
#include <linux/irq.h>
@@ -261,12 +263,29 @@ void irq_timings_disable(void)
#define EMA_ALPHA_VAL 64
#define EMA_ALPHA_SHIFT 7
-#define PREDICTION_PERIOD_MIN 2
+#define PREDICTION_PERIOD_MIN 3
#define PREDICTION_PERIOD_MAX 5
#define PREDICTION_FACTOR 4
#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
+/*
+ * Number of elements in the circular buffer: If it happens it was
+ * flushed before, then the number of elements could be smaller than
+ * IRQ_TIMINGS_SIZE, so the count is used, otherwise the array size is
+ * used as we wrapped. The index begins from zero when we did not
+ * wrap. That could be done in a nicer way with the proper circular
+ * array structure type but with the cost of extra computation in the
+ * interrupt handler hot path. We choose efficiency.
+ */
+#define for_each_irqts(i, irqts) \
+ for (i = irqts->count < IRQ_TIMINGS_SIZE ? \
+ 0 : irqts->count & IRQ_TIMINGS_MASK, \
+ irqts->count = min(IRQ_TIMINGS_SIZE, \
+ irqts->count); \
+ irqts->count > 0; irqts->count--, \
+ i = (i + 1) & IRQ_TIMINGS_MASK)
+
struct irqt_stat {
u64 last_ts;
u64 ema_time[PREDICTION_BUFFER_SIZE];
@@ -297,7 +316,16 @@ static u64 irq_timings_ema_new(u64 value, u64 ema_old)
static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
{
- int i;
+ int period;
+
+ /*
+ * Move the beginning pointer to the end minus the max period x 3.
+ * We are at the point we can begin searching the pattern
+ */
+ buffer = &buffer[len - (period_max * 3)];
+
+ /* Adjust the length to the maximum allowed period x 3 */
+ len = period_max * 3;
/*
* The buffer contains the suite of intervals, in a ilog2
@@ -306,21 +334,45 @@ static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
* period beginning at the end of the buffer. We do that for
* each suffix.
*/
- for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) {
+ for (period = period_max; period >= PREDICTION_PERIOD_MIN; period--) {
- int *begin = &buffer[len - (i * 3)];
- int *ptr = begin;
+ /*
+ * The first comparison always succeed because the
+ * suffix is deduced from the first n-period bytes of
+ * the buffer and we compare the initial suffix with
+ * itself, so we can skip the first iteration.
+ */
+ int idx = period;
+ size_t size = period;
/*
* We look if the suite with period 'i' repeat
* itself. If it is truncated at the end, as it
* repeats we can use the period to find out the next
- * element.
+ * element with the modulo.
*/
- while (!memcmp(ptr, begin, i * sizeof(*ptr))) {
- ptr += i;
- if (ptr >= &buffer[len])
- return begin[((i * 3) % i)];
+ while (!memcmp(buffer, &buffer[idx], size * sizeof(int))) {
+
+ /*
+ * Move the index in a period basis
+ */
+ idx += size;
+
+ /*
+ * If this condition is reached, all previous
+ * memcmp were successful, so the period is
+ * found.
+ */
+ if (idx == len)
+ return buffer[len % period];
+
+ /*
+ * If the remaining elements to compare are
+ * smaller than the period, readjust the size
+ * of the comparison for the last iteration.
+ */
+ if (len - idx < period)
+ size = len - idx;
}
}
@@ -380,11 +432,43 @@ static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
return irqs->last_ts + irqs->ema_time[index];
}
+static __always_inline int irq_timings_interval_index(u64 interval)
+{
+ /*
+ * The PREDICTION_FACTOR increase the interval size for the
+ * array of exponential average.
+ */
+ u64 interval_us = (interval >> 10) / PREDICTION_FACTOR;
+
+ return likely(interval_us) ? ilog2(interval_us) : 0;
+}
+
+static __always_inline void __irq_timings_store(int irq, struct irqt_stat *irqs,
+ u64 interval)
+{
+ int index;
+
+ /*
+ * Get the index in the ema table for this interrupt.
+ */
+ index = irq_timings_interval_index(interval);
+
+ /*
+ * Store the index as an element of the pattern in another
+ * circular array.
+ */
+ irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
+
+ irqs->ema_time[index] = irq_timings_ema_new(interval,
+ irqs->ema_time[index]);
+
+ irqs->count++;
+}
+
static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
{
u64 old_ts = irqs->last_ts;
u64 interval;
- int index;
/*
* The timestamps are absolute time values, we need to compute
@@ -415,24 +499,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
return;
}
- /*
- * Get the index in the ema table for this interrupt. The
- * PREDICTION_FACTOR increase the interval size for the array
- * of exponential average.
- */
- index = likely(interval) ?
- ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
-
- /*
- * Store the index as an element of the pattern in another
- * circular array.
- */
- irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
-
- irqs->ema_time[index] = irq_timings_ema_new(interval,
- irqs->ema_time[index]);
-
- irqs->count++;
+ __irq_timings_store(irq, irqs, interval);
}
/**
@@ -493,11 +560,7 @@ u64 irq_timings_next_event(u64 now)
* model while decrementing the counter because we consume the
* data from our circular buffer.
*/
-
- i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
- irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
-
- for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+ for_each_irqts(i, irqts) {
irq = irq_timing_decode(irqts->values[i], &ts);
s = idr_find(&irqt_stats, irq);
if (s)
@@ -564,3 +627,325 @@ int irq_timings_alloc(int irq)
return 0;
}
+
+#ifdef CONFIG_TEST_IRQ_TIMINGS
+struct timings_intervals {
+ u64 *intervals;
+ size_t count;
+};
+
+/*
+ * Intervals are given in nanosecond base
+ */
+static u64 intervals0[] __initdata = {
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000, 500000,
+ 10000, 50000, 200000,
+};
+
+static u64 intervals1[] __initdata = {
+ 223947000, 1240000, 1384000, 1386000, 1386000,
+ 217416000, 1236000, 1384000, 1386000, 1387000,
+ 214719000, 1241000, 1386000, 1387000, 1384000,
+ 213696000, 1234000, 1384000, 1386000, 1388000,
+ 219904000, 1240000, 1385000, 1389000, 1385000,
+ 212240000, 1240000, 1386000, 1386000, 1386000,
+ 214415000, 1236000, 1384000, 1386000, 1387000,
+ 214276000, 1234000,
+};
+
+static u64 intervals2[] __initdata = {
+ 4000, 3000, 5000, 100000,
+ 3000, 3000, 5000, 117000,
+ 4000, 4000, 5000, 112000,
+ 4000, 3000, 4000, 110000,
+ 3000, 5000, 3000, 117000,
+ 4000, 4000, 5000, 112000,
+ 4000, 3000, 4000, 110000,
+ 3000, 4000, 5000, 112000,
+ 4000,
+};
+
+static u64 intervals3[] __initdata = {
+ 1385000, 212240000, 1240000,
+ 1386000, 214415000, 1236000,
+ 1384000, 214276000, 1234000,
+ 1386000, 214415000, 1236000,
+ 1385000, 212240000, 1240000,
+ 1386000, 214415000, 1236000,
+ 1384000, 214276000, 1234000,
+ 1386000, 214415000, 1236000,
+ 1385000, 212240000, 1240000,
+};
+
+static u64 intervals4[] __initdata = {
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000, 50000, 10000, 50000,
+ 10000,
+};
+
+static struct timings_intervals tis[] __initdata = {
+ { intervals0, ARRAY_SIZE(intervals0) },
+ { intervals1, ARRAY_SIZE(intervals1) },
+ { intervals2, ARRAY_SIZE(intervals2) },
+ { intervals3, ARRAY_SIZE(intervals3) },
+ { intervals4, ARRAY_SIZE(intervals4) },
+};
+
+static int __init irq_timings_test_next_index(struct timings_intervals *ti)
+{
+ int _buffer[IRQ_TIMINGS_SIZE];
+ int buffer[IRQ_TIMINGS_SIZE];
+ int index, start, i, count, period_max;
+
+ count = ti->count - 1;
+
+ period_max = count > (3 * PREDICTION_PERIOD_MAX) ?
+ PREDICTION_PERIOD_MAX : count / 3;
+
+ /*
+ * Inject all values except the last one which will be used
+ * to compare with the next index result.
+ */
+ pr_debug("index suite: ");
+
+ for (i = 0; i < count; i++) {
+ index = irq_timings_interval_index(ti->intervals[i]);
+ _buffer[i & IRQ_TIMINGS_MASK] = index;
+ pr_cont("%d ", index);
+ }
+
+ start = count < IRQ_TIMINGS_SIZE ? 0 :
+ count & IRQ_TIMINGS_MASK;
+
+ count = min_t(int, count, IRQ_TIMINGS_SIZE);
+
+ for (i = 0; i < count; i++) {
+ int index = (start + i) & IRQ_TIMINGS_MASK;
+ buffer[i] = _buffer[index];
+ }
+
+ index = irq_timings_next_event_index(buffer, count, period_max);
+ i = irq_timings_interval_index(ti->intervals[ti->count - 1]);
+
+ if (index != i) {
+ pr_err("Expected (%d) and computed (%d) next indexes differ\n",
+ i, index);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int __init irq_timings_next_index_selftest(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
+
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
+ i, tis[i].count);
+
+ ret = irq_timings_test_next_index(&tis[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_test_irqs(struct timings_intervals *ti)
+{
+ struct irqt_stat __percpu *s;
+ struct irqt_stat *irqs;
+ int i, index, ret, irq = 0xACE5;
+
+ ret = irq_timings_alloc(irq);
+ if (ret) {
+ pr_err("Failed to allocate irq timings\n");
+ return ret;
+ }
+
+ s = idr_find(&irqt_stats, irq);
+ if (!s) {
+ ret = -EIDRM;
+ goto out;
+ }
+
+ irqs = this_cpu_ptr(s);
+
+ for (i = 0; i < ti->count; i++) {
+
+ index = irq_timings_interval_index(ti->intervals[i]);
+ pr_debug("%d: interval=%llu ema_index=%d\n",
+ i, ti->intervals[i], index);
+
+ __irq_timings_store(irq, irqs, ti->intervals[i]);
+ if (irqs->circ_timings[i & IRQ_TIMINGS_MASK] != index) {
+ pr_err("Failed to store in the circular buffer\n");
+ goto out;
+ }
+ }
+
+ if (irqs->count != ti->count) {
+ pr_err("Count differs\n");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ irq_timings_free(irq);
+
+ return ret;
+}
+
+static int __init irq_timings_irqs_selftest(void)
+{
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(tis); i++) {
+ pr_info("---> Injecting intervals number #%d (count=%zd)\n",
+ i, tis[i].count);
+ ret = irq_timings_test_irqs(&tis[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_test_irqts(struct irq_timings *irqts,
+ unsigned count)
+{
+ int start = count >= IRQ_TIMINGS_SIZE ? count - IRQ_TIMINGS_SIZE : 0;
+ int i, irq, oirq = 0xBEEF;
+ u64 ots = 0xDEAD, ts;
+
+ /*
+ * Fill the circular buffer by using the dedicated function.
+ */
+ for (i = 0; i < count; i++) {
+ pr_debug("%d: index=%d, ts=%llX irq=%X\n",
+ i, i & IRQ_TIMINGS_MASK, ots + i, oirq + i);
+
+ irq_timings_push(ots + i, oirq + i);
+ }
+
+ /*
+ * Compute the first elements values after the index wrapped
+ * up or not.
+ */
+ ots += start;
+ oirq += start;
+
+ /*
+ * Test the circular buffer count is correct.
+ */
+ pr_debug("---> Checking timings array count (%d) is right\n", count);
+ if (WARN_ON(irqts->count != count))
+ return -EINVAL;
+
+ /*
+ * Test the macro allowing to browse all the irqts.
+ */
+ pr_debug("---> Checking the for_each_irqts() macro\n");
+ for_each_irqts(i, irqts) {
+
+ irq = irq_timing_decode(irqts->values[i], &ts);
+
+ pr_debug("index=%d, ts=%llX / %llX, irq=%X / %X\n",
+ i, ts, ots, irq, oirq);
+
+ if (WARN_ON(ts != ots || irq != oirq))
+ return -EINVAL;
+
+ ots++; oirq++;
+ }
+
+ /*
+ * The circular buffer should have be flushed when browsed
+ * with for_each_irqts
+ */
+ pr_debug("---> Checking timings array is empty after browsing it\n");
+ if (WARN_ON(irqts->count))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int __init irq_timings_irqts_selftest(void)
+{
+ struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+ int i, ret;
+
+ /*
+ * Test the circular buffer with different number of
+ * elements. The purpose is to test at the limits (empty, half
+ * full, full, wrapped with the cursor at the boundaries,
+ * wrapped several times, etc ...
+ */
+ int count[] = { 0,
+ IRQ_TIMINGS_SIZE >> 1,
+ IRQ_TIMINGS_SIZE,
+ IRQ_TIMINGS_SIZE + (IRQ_TIMINGS_SIZE >> 1),
+ 2 * IRQ_TIMINGS_SIZE,
+ (2 * IRQ_TIMINGS_SIZE) + 3,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(count); i++) {
+
+ pr_info("---> Checking the timings with %d/%d values\n",
+ count[i], IRQ_TIMINGS_SIZE);
+
+ ret = irq_timings_test_irqts(irqts, count[i]);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+static int __init irq_timings_selftest(void)
+{
+ int ret;
+
+ pr_info("------------------- selftest start -----------------\n");
+
+ /*
+ * At this point, we don't except any subsystem to use the irq
+ * timings but us, so it should not be enabled.
+ */
+ if (static_branch_unlikely(&irq_timing_enabled)) {
+ pr_warn("irq timings already initialized, skipping selftest\n");
+ return 0;
+ }
+
+ ret = irq_timings_irqts_selftest();
+ if (ret)
+ goto out;
+
+ ret = irq_timings_irqs_selftest();
+ if (ret)
+ goto out;
+
+ ret = irq_timings_next_index_selftest();
+out:
+ pr_info("---------- selftest end with %s -----------\n",
+ ret ? "failure" : "success");
+
+ return ret;
+}
+early_initcall(irq_timings_selftest);
+#endif
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 9505101ed2bc..096211299c07 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -493,6 +493,9 @@ int suspend_devices_and_enter(suspend_state_t state)
pm_suspend_target_state = state;
+ if (state == PM_SUSPEND_TO_IDLE)
+ pm_set_suspend_no_platform();
+
error = platform_suspend_begin(state);
if (error)
goto Close;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 8456b6e2205f..83a531cea2f3 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -79,9 +79,7 @@ void __ptrace_link(struct task_struct *child, struct task_struct *new_parent,
*/
static void ptrace_link(struct task_struct *child, struct task_struct *new_parent)
{
- rcu_read_lock();
- __ptrace_link(child, new_parent, __task_cred(new_parent));
- rcu_read_unlock();
+ __ptrace_link(child, new_parent, current_cred());
}
/**
@@ -118,6 +116,9 @@ void __ptrace_unlink(struct task_struct *child)
BUG_ON(!child->ptrace);
clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+#ifdef TIF_SYSCALL_EMU
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
+#endif
child->parent = child->real_parent;
list_del_init(&child->ptrace_entry);
diff --git a/kernel/signal.c b/kernel/signal.c
index d622eac9d169..edf8915ddd54 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2912,7 +2912,8 @@ EXPORT_SYMBOL(set_compat_user_sigmask);
* This is useful for syscalls such as ppoll, pselect, io_pgetevents and
* epoll_pwait where a new sigmask is passed in from userland for the syscalls.
*/
-void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
+void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved,
+ bool interrupted)
{
if (!usigmask)
@@ -2922,7 +2923,7 @@ void restore_user_sigmask(const void __user *usigmask, sigset_t *sigsaved)
* Restoring sigmask here can lead to delivering signals that the above
* syscalls are intended to block because of the sigmask passed in.
*/
- if (signal_pending(current)) {
+ if (interrupted) {
current->saved_sigmask = *sigsaved;
set_restore_sigmask();
return;
diff --git a/kernel/smp.c b/kernel/smp.c
index d155374632eb..616d4d114847 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -34,7 +34,7 @@ struct call_function_data {
cpumask_var_t cpumask_ipi;
};
-static DEFINE_PER_CPU_SHARED_ALIGNED(struct call_function_data, cfd_data);
+static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data);
static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue);
@@ -487,13 +487,11 @@ EXPORT_SYMBOL(smp_call_function_many);
* You must not call this function with disabled interrupts or from a
* hardware interrupt handler or from a bottom half handler.
*/
-int smp_call_function(smp_call_func_t func, void *info, int wait)
+void smp_call_function(smp_call_func_t func, void *info, int wait)
{
preempt_disable();
smp_call_function_many(cpu_online_mask, func, info, wait);
preempt_enable();
-
- return 0;
}
EXPORT_SYMBOL(smp_call_function);
@@ -594,18 +592,16 @@ void __init smp_init(void)
* early_boot_irqs_disabled is set. Use local_irq_save/restore() instead
* of local_irq_disable/enable().
*/
-int on_each_cpu(void (*func) (void *info), void *info, int wait)
+void on_each_cpu(void (*func) (void *info), void *info, int wait)
{
unsigned long flags;
- int ret = 0;
preempt_disable();
- ret = smp_call_function(func, info, wait);
+ smp_call_function(func, info, wait);
local_irq_save(flags);
func(info);
local_irq_restore(flags);
preempt_enable();
- return ret;
}
EXPORT_SYMBOL(on_each_cpu);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index a6b81c6b6bff..0427a86743a4 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -649,7 +649,7 @@ static int takeover_tasklets(unsigned int cpu)
/* Find end, append list for that CPU. */
if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
*__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
- this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
+ __this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
per_cpu(tasklet_vec, cpu).head = NULL;
per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
}
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 2b5a6754646f..b4f83f7bdf86 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -177,12 +177,18 @@ static void ack_state(struct multi_stop_data *msdata)
set_state(msdata, msdata->state + 1);
}
+void __weak stop_machine_yield(const struct cpumask *cpumask)
+{
+ cpu_relax();
+}
+
/* This is the cpu_stop function which stops the CPU. */
static int multi_cpu_stop(void *data)
{
struct multi_stop_data *msdata = data;
enum multi_stop_state curstate = MULTI_STOP_NONE;
int cpu = smp_processor_id(), err = 0;
+ const struct cpumask *cpumask;
unsigned long flags;
bool is_active;
@@ -192,15 +198,18 @@ static int multi_cpu_stop(void *data)
*/
local_save_flags(flags);
- if (!msdata->active_cpus)
- is_active = cpu == cpumask_first(cpu_online_mask);
- else
- is_active = cpumask_test_cpu(cpu, msdata->active_cpus);
+ if (!msdata->active_cpus) {
+ cpumask = cpu_online_mask;
+ is_active = cpu == cpumask_first(cpumask);
+ } else {
+ cpumask = msdata->active_cpus;
+ is_active = cpumask_test_cpu(cpu, cpumask);
+ }
/* Simple state machine */
do {
/* Chill out and ensure we re-read multi_stop_state. */
- cpu_relax_yield();
+ stop_machine_yield(cpumask);
if (msdata->state != curstate) {
curstate = msdata->state;
switch (curstate) {
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f1e46f338a9c..1867044800bb 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -16,5 +16,6 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y)
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
+obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 0519a8805aab..57518efc3810 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -233,7 +233,6 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
/**
* alarmtimer_suspend - Suspend time callback
* @dev: unused
- * @state: unused
*
* When we are going into suspend, we look through the bases
* to see which is the soonest timer to expire. We then
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 3bcc19ceb073..fff5f64981c6 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -105,12 +105,12 @@ static DEFINE_SPINLOCK(watchdog_lock);
static int watchdog_running;
static atomic_t watchdog_reset_pending;
-static void inline clocksource_watchdog_lock(unsigned long *flags)
+static inline void clocksource_watchdog_lock(unsigned long *flags)
{
spin_lock_irqsave(&watchdog_lock, *flags);
}
-static void inline clocksource_watchdog_unlock(unsigned long *flags)
+static inline void clocksource_watchdog_unlock(unsigned long *flags)
{
spin_unlock_irqrestore(&watchdog_lock, *flags);
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 41dfff23c1f9..5ee77f1a8a92 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -30,7 +30,6 @@
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/tick.h>
-#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
#include <linux/sched/signal.h>
@@ -1115,9 +1114,10 @@ EXPORT_SYMBOL_GPL(hrtimer_start_range_ns);
* @timer: hrtimer to stop
*
* Returns:
- * 0 when the timer was not active
- * 1 when the timer was active
- * -1 when the timer is currently executing the callback function and
+ *
+ * * 0 when the timer was not active
+ * * 1 when the timer was active
+ * * -1 when the timer is currently executing the callback function and
* cannot be stopped
*/
int hrtimer_try_to_cancel(struct hrtimer *timer)
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 8de4f789dc1b..65eb796610dc 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -43,6 +43,7 @@ static u64 tick_length_base;
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
+#define MAX_TAI_OFFSET 100000
/*
* phase-lock loop variables
@@ -691,7 +692,8 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
time_constant = max(time_constant, 0l);
}
- if (txc->modes & ADJ_TAI && txc->constant >= 0)
+ if (txc->modes & ADJ_TAI &&
+ txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 29176635991f..d7f2d91acdac 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -980,23 +980,16 @@ retry_delete:
*/
static void itimer_delete(struct k_itimer *timer)
{
- unsigned long flags;
-
retry_delete:
- spin_lock_irqsave(&timer->it_lock, flags);
+ spin_lock_irq(&timer->it_lock);
if (timer_delete_hook(timer) == TIMER_RETRY) {
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
goto retry_delete;
}
list_del(&timer->list);
- /*
- * This keeps any tasks waiting on the spin lock from thinking
- * they got something (see the lock code above).
- */
- timer->it_signal = NULL;
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
release_posix_timer(timer, IT_ID_SET);
}
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 7f7d6914ddd5..5c54ca632d08 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -251,6 +251,10 @@ COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
if (tv) {
if (compat_get_timeval(&user_tv, tv))
return -EFAULT;
+
+ if (!timeval_valid(&user_tv))
+ return -EINVAL;
+
new_ts.tv_sec = user_tv.tv_sec;
new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
}
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 44b726bab4bd..d911c8470149 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -819,7 +819,7 @@ ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
} while (read_seqcount_retry(&tk_core.seq, seq));
- return base + nsecs;
+ return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 98ba50dcb1b2..acb326f5f50a 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -282,23 +282,6 @@ static inline void timer_list_header(struct seq_file *m, u64 now)
SEQ_printf(m, "\n");
}
-static int timer_list_show(struct seq_file *m, void *v)
-{
- struct timer_list_iter *iter = v;
-
- if (iter->cpu == -1 && !iter->second_pass)
- timer_list_header(m, iter->now);
- else if (!iter->second_pass)
- print_cpu(m, iter->cpu, iter->now);
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
- else if (iter->cpu == -1 && iter->second_pass)
- timer_list_show_tickdevices_header(m);
- else
- print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
-#endif
- return 0;
-}
-
void sysrq_timer_list_show(void)
{
u64 now = ktime_to_ns(ktime_get());
@@ -317,6 +300,24 @@ void sysrq_timer_list_show(void)
return;
}
+#ifdef CONFIG_PROC_FS
+static int timer_list_show(struct seq_file *m, void *v)
+{
+ struct timer_list_iter *iter = v;
+
+ if (iter->cpu == -1 && !iter->second_pass)
+ timer_list_header(m, iter->now);
+ else if (!iter->second_pass)
+ print_cpu(m, iter->cpu, iter->now);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ else if (iter->cpu == -1 && iter->second_pass)
+ timer_list_show_tickdevices_header(m);
+ else
+ print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
+#endif
+ return 0;
+}
+
static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
for (; offset; offset--) {
@@ -376,3 +377,4 @@ static int __init init_timer_list_procfs(void)
return 0;
}
__initcall(init_timer_list_procfs);
+#endif
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
new file mode 100644
index 000000000000..a80893180826
--- /dev/null
+++ b/kernel/time/vsyscall.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 ARM Ltd.
+ *
+ * Generic implementation of update_vsyscall and update_vsyscall_tz.
+ *
+ * Based on the x86 specific implementation.
+ */
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
+
+static inline void update_vdso_data(struct vdso_data *vdata,
+ struct timekeeper *tk)
+{
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec;
+
+ vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
+ vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
+ vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
+ vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
+ vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
+ vdata[CS_RAW].mask = tk->tkr_raw.mask;
+ vdata[CS_RAW].mult = tk->tkr_raw.mult;
+ vdata[CS_RAW].shift = tk->tkr_raw.shift;
+
+ /* CLOCK_REALTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /* CLOCK_MONOTONIC */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* CLOCK_MONOTONIC_RAW */
+ vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+ vdso_ts->sec = tk->raw_sec;
+ vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+
+ /* CLOCK_BOOTTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)(tk->wall_to_monotonic.tv_nsec +
+ ktime_to_ns(tk->offs_boot)) << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* CLOCK_TAI */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
+ vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /*
+ * Read without the seqlock held by clock_getres().
+ * Note: No need to have a second copy.
+ */
+ WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec;
+
+ if (__arch_update_vdso_data()) {
+ /*
+ * Some architectures might want to skip the update of the
+ * data page.
+ */
+ return;
+ }
+
+ /* copy vsyscall data */
+ vdso_write_begin(vdata);
+
+ vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk);
+ vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk);
+
+ /* CLOCK_REALTIME_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+
+ /* CLOCK_MONOTONIC_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+ nsec = nsec + tk->wall_to_monotonic.tv_nsec;
+ while (nsec >= NSEC_PER_SEC) {
+ nsec = nsec - NSEC_PER_SEC;
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ if (__arch_use_vsyscall(vdata))
+ update_vdso_data(vdata, tk);
+
+ __arch_update_vsyscall(vdata, tk);
+
+ vdso_write_end(vdata);
+
+ __arch_sync_vdso_data(vdata);
+}
+
+void update_vsyscall_tz(void)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+
+ if (__arch_use_vsyscall(vdata)) {
+ vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
+ vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
+ }
+
+ __arch_sync_vdso_data(vdata);
+}
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 38277af44f5c..576c41644e77 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -34,7 +34,6 @@
#include <linux/hash.h>
#include <linux/rcupdate.h>
#include <linux/kprobes.h>
-#include <linux/memory.h>
#include <trace/events/sched.h>
@@ -2611,12 +2610,10 @@ static void ftrace_run_update_code(int command)
{
int ret;
- mutex_lock(&text_mutex);
-
ret = ftrace_arch_code_modify_prepare();
FTRACE_WARN_ON(ret);
if (ret)
- goto out_unlock;
+ return;
/*
* By default we use stop_machine() to modify the code.
@@ -2628,9 +2625,6 @@ static void ftrace_run_update_code(int command)
ret = ftrace_arch_code_modify_post_process();
FTRACE_WARN_ON(ret);
-
-out_unlock:
- mutex_unlock(&text_mutex);
}
static void ftrace_run_modify_code(struct ftrace_ops *ops, int command,
@@ -5784,7 +5778,6 @@ void ftrace_module_enable(struct module *mod)
struct ftrace_page *pg;
mutex_lock(&ftrace_lock);
- mutex_lock(&text_mutex);
if (ftrace_disabled)
goto out_unlock;
@@ -5846,7 +5839,6 @@ void ftrace_module_enable(struct module *mod)
ftrace_arch_code_modify_post_process();
out_unlock:
- mutex_unlock(&text_mutex);
mutex_unlock(&ftrace_lock);
process_cached_mods(mod->name);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 83e08b78dbee..c3aabb576fe5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6719,11 +6719,13 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
break;
}
#endif
- if (!tr->allocated_snapshot) {
+ if (tr->allocated_snapshot)
+ ret = resize_buffer_duplicate_size(&tr->max_buffer,
+ &tr->trace_buffer, iter->cpu_file);
+ else
ret = tracing_alloc_snapshot_instance(tr);
- if (ret < 0)
- break;
- }
+ if (ret < 0)
+ break;
local_irq_disable();
/* Now, we're going to swap */
if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
@@ -7126,12 +7128,24 @@ static ssize_t tracing_err_log_write(struct file *file,
return count;
}
+static int tracing_err_log_release(struct inode *inode, struct file *file)
+{
+ struct trace_array *tr = inode->i_private;
+
+ trace_array_put(tr);
+
+ if (file->f_mode & FMODE_READ)
+ seq_release(inode, file);
+
+ return 0;
+}
+
static const struct file_operations tracing_err_log_fops = {
.open = tracing_err_log_open,
.write = tracing_err_log_write,
.read = seq_read,
.llseek = seq_lseek,
- .release = tracing_release_generic_tr,
+ .release = tracing_err_log_release,
};
static int tracing_buffers_open(struct inode *inode, struct file *filp)
diff --git a/kernel/up.c b/kernel/up.c
index 483c9962c999..862b460ab97a 100644
--- a/kernel/up.c
+++ b/kernel/up.c
@@ -35,14 +35,13 @@ int smp_call_function_single_async(int cpu, call_single_data_t *csd)
}
EXPORT_SYMBOL(smp_call_function_single_async);
-int on_each_cpu(smp_call_func_t func, void *info, int wait)
+void on_each_cpu(smp_call_func_t func, void *info, int wait)
{
unsigned long flags;
local_irq_save(flags);
func(info);
local_irq_restore(flags);
- return 0;
}
EXPORT_SYMBOL(on_each_cpu);