From 668802c25729a8e3423015c33c05f1c3be3858e9 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 30 Jan 2017 12:57:43 -0500 Subject: tick/broadcast: Reduce lock cacheline contention It was observed that on an Intel x86 system without the ARAT (Always running APIC timer) feature and with fairly large number of CPUs as well as CPUs coming in and out of intel_idle frequently, the lock contention on the tick_broadcast_lock can become significant. To reduce contention, the lock is put into its own cacheline and all the cpumask_var_t variables are put into the __read_mostly section. Running the SP benchmark of the NAS Parallel Benchmarks on a 4-socket 16-core 32-thread Nehalam system, the performance number improved from 3353.94 Mop/s to 3469.31 Mop/s when this patch was applied on a 4.9.6 kernel. This is a 3.4% improvement. Signed-off-by: Waiman Long Cc: "Peter Zijlstra (Intel)" Cc: Andrew Morton Link: http://lkml.kernel.org/r/1485799063-20857-1-git-send-email-longman@redhat.com Signed-off-by: Thomas Gleixner --- include/linux/cpumask.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/cpumask.h') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index c717f5ea88cb..23c1a6d09ec5 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -649,11 +649,15 @@ static inline size_t cpumask_size(void) * used. Please use this_cpu_cpumask_var_t in those cases. The direct use * of this_cpu_ptr() or this_cpu_read() will lead to failures when the * other type of cpumask_var_t implementation is configured. + * + * Please also note that __cpumask_var_read_mostly can be used to declare + * a cpumask_var_t variable itself (not its content) as read mostly. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; -#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define __cpumask_var_read_mostly __read_mostly bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); @@ -667,6 +671,7 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) +#define __cpumask_var_read_mostly static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { -- cgit v1.2.3