From 6802f9347993a534797847f627c27f4334067945 Mon Sep 17 00:00:00 2001 From: Kyle Meyer Date: Wed, 10 Apr 2024 16:33:10 -0500 Subject: cpumask: Add for_each_cpu_from() Add for_each_cpu_from() as a generic cpumask macro. for_each_cpu_from() is the same as for_each_cpu(), except it starts at @cpu instead of zero. Signed-off-by: Kyle Meyer Acked-by: Yury Norov Signed-off-by: Yury Norov --- include/linux/cpumask.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1c29947db848..d75060fbd058 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -368,6 +368,16 @@ unsigned int __pure cpumask_next_wrap(int n, const struct cpumask *mask, int sta #define for_each_cpu_or(cpu, mask1, mask2) \ for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits) +/** + * for_each_cpu_from - iterate over CPUs present in @mask, from @cpu to the end of @mask. + * @cpu: the (optionally unsigned) integer iterator + * @mask: the cpumask pointer + * + * After the loop, cpu is >= nr_cpu_ids. + */ +#define for_each_cpu_from(cpu, mask) \ + for_each_set_bit_from(cpu, cpumask_bits(mask), small_cpumask_bits) + /** * cpumask_any_but - return a "random" in a cpumask, but not this one. * @mask: the cpumask to search -- cgit v1.2.3 From efe3a85eab78b6cc02bdfd16aec4410111ea69c0 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Fri, 3 May 2024 12:12:00 -0700 Subject: Compiler Attributes: Add __always_used macro In some cases like performance benchmarking, we need to call a function, but don't need to read the returned value. If compiler recognizes the function as pure or const, it can remove the function invocation, which is not what we want. To prevent that, the common practice is assigning the return value to a temporary static volatile variable. From compiler's point of view, the variable is unused because never read back after been assigned. To make sure the variable is always emitted, we provide a __used attribute. This works with GCC, but clang still emits Wunused-but-set-variable. To suppress that warning, we need to teach clang to do that with the 'unused' attribute. Nathan Chancellor explained that in details: While having used and unused attributes together might look unusual, reading the GCC attribute manual makes it seem like these attributes fulfill similar yet different roles, __unused__ prevents any unused warnings while __used__ forces the variable to be emitted. A strict reading of that does not make it seem like __used__ implies disabling unused warnings The compiler documentation makes it clear what happens behind the 'used' and 'unused' attributes, but the chosen names may confuse readers if such combination catches an eye in a random code. This patch adds __always_used macro, which combines both attributes and comments on what happens for those interested in details. Suggested-by: Nathan Chancellor Reported-by: kernel test robot Reviewed-by: Nathan Chancellor Closes: https://lore.kernel.org/oe-kbuild-all/202405030808.UsoMKFNP-lkp@intel.com/ Acked-by: Miguel Ojeda Signed-off-by: Yury Norov --- include/linux/compiler_attributes.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 8bdf6e0918c1..32284cd26d52 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -361,6 +361,19 @@ */ #define __used __attribute__((__used__)) +/* + * The __used attribute guarantees that the attributed variable will be + * always emitted by a compiler. It doesn't prevent the compiler from + * throwing 'unused' warnings when it can't detect how the variable is + * actually used. It's a compiler implementation details either emit + * the warning in that case or not. + * + * The combination of both 'used' and 'unused' attributes ensures that + * the variable would be emitted, and will not trigger 'unused' warnings. + * The attribute is applicable for functions, static and global variables. + */ +#define __always_used __used __maybe_unused + /* * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-warn_005funused_005fresult-function-attribute * clang: https://clang.llvm.org/docs/AttributeReference.html#nodiscard-warn-unused-result -- cgit v1.2.3 From 1c2aa5619348f7573d6f2269e04fd1dac8eddc47 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Thu, 2 May 2024 17:24:43 +0800 Subject: bitops: Optimize fns() for improved performance The current fns() repeatedly uses __ffs() to find the index of the least significant bit and then clears the corresponding bit using __clear_bit(). The method for clearing the least significant bit can be optimized by using word &= word - 1 instead. Typically, the execution time of one __ffs() plus one __clear_bit() is longer than that of a bitwise AND operation and a subtraction. To improve performance, the loop for clearing the least significant bit has been replaced with word &= word - 1, followed by a single __ffs() operation to obtain the answer. This change reduces the number of __ffs() iterations from n to just one, enhancing overall performance. This modification significantly accelerates the fns() function in the test_bitops benchmark, improving its speed by approximately 7.6 times. Additionally, it enhances the performance of find_nth_bit() in the find_bit benchmark by approximately 26%. Before: test_bitops: fns: 58033164 ns find_nth_bit: 4254313 ns, 16525 iterations After: test_bitops: fns: 7637268 ns find_nth_bit: 3362863 ns, 16501 iterations CC: Andrew Morton CC: Rasmus Villemoes Signed-off-by: Kuan-Wei Chiu Signed-off-by: Yury Norov --- include/linux/bitops.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2ba557e067fe..57ecef354f47 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -254,16 +254,10 @@ static inline unsigned long __ffs64(u64 word) */ static inline unsigned long fns(unsigned long word, unsigned int n) { - unsigned int bit; + while (word && n--) + word &= word - 1; - while (word) { - bit = __ffs(word); - if (n-- == 0) - return bit; - __clear_bit(bit, &word); - } - - return BITS_PER_LONG; + return word ? __ffs(word) : BITS_PER_LONG; } /** -- cgit v1.2.3 From 0b2811ba11b04353033237359c9d042eb0cdc1c1 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Thu, 2 May 2024 10:12:56 -0700 Subject: bitmap: relax find_nth_bit() limitation on return value The function claims to return the bitmap size, if Nth bit doesn't exist. This rule is violated in inline case because the fns() that is used there doesn't know anything about size of the bitmap. So, relax this requirement to '>= size', and make the outline implementation a bit cheaper. All in-tree kernel users of find_nth_bit() are safe against that. Reported-by: Rasmus Villemoes Closes: https://lore.kernel.org/all/Zi50cAgR8nZvgLa3@yury-ThinkPad/T/#m6da806a0525e74dcc91f35e5f20766ed4e853e8a Signed-off-by: Yury Norov --- include/linux/find.h | 2 +- lib/find_bit.c | 2 +- lib/test_bitmap.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/find.h b/include/linux/find.h index c69598e383c1..02751e43d448 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -220,7 +220,7 @@ unsigned long find_first_bit(const unsigned long *addr, unsigned long size) * idx = find_first_bit(addr, size); * * Returns the bit number of the N'th set bit. - * If no such, returns @size. + * If no such, returns >= @size. */ static inline unsigned long find_nth_bit(const unsigned long *addr, unsigned long size, unsigned long n) diff --git a/lib/find_bit.c b/lib/find_bit.c index 32f99e9a670e..0bddfc3ff248 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -87,7 +87,7 @@ out: \ if (sz % BITS_PER_LONG) \ tmp = (FETCH) & BITMAP_LAST_WORD_MASK(sz); \ found: \ - sz = min(idx * BITS_PER_LONG + fns(tmp, nr), sz); \ + sz = idx * BITS_PER_LONG + fns(tmp, nr); \ out: \ sz; \ }) diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index 6b2b33579f56..088838f829c9 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -244,7 +244,7 @@ static void __init test_find_nth_bit(void) expect_eq_uint(60, find_nth_bit(bmap, 64 * 3, 5)); expect_eq_uint(80, find_nth_bit(bmap, 64 * 3, 6)); expect_eq_uint(123, find_nth_bit(bmap, 64 * 3, 7)); - expect_eq_uint(64 * 3, find_nth_bit(bmap, 64 * 3, 8)); + expect_eq_uint(0, !!(find_nth_bit(bmap, 64 * 3, 8) < 64 * 3)); expect_eq_uint(10, find_nth_bit(bmap, 64 * 3 - 1, 0)); expect_eq_uint(20, find_nth_bit(bmap, 64 * 3 - 1, 1)); @@ -254,7 +254,7 @@ static void __init test_find_nth_bit(void) expect_eq_uint(60, find_nth_bit(bmap, 64 * 3 - 1, 5)); expect_eq_uint(80, find_nth_bit(bmap, 64 * 3 - 1, 6)); expect_eq_uint(123, find_nth_bit(bmap, 64 * 3 - 1, 7)); - expect_eq_uint(64 * 3 - 1, find_nth_bit(bmap, 64 * 3 - 1, 8)); + expect_eq_uint(0, !!(find_nth_bit(bmap, 64 * 3 - 1, 8) < 64 * 3 - 1)); for_each_set_bit(bit, exp1, EXP1_IN_BITS) { b = find_nth_bit(exp1, EXP1_IN_BITS, cnt++); -- cgit v1.2.3 From 9f2c2d6ba13da08643c65b948ce5e3d616864c47 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 7 May 2024 23:01:31 +0300 Subject: bitops: Move aligned_byte_mask() to wordpart.h The bitops.h is for bit related operations. The aligned_byte_mask() is about byte (or part of the machine word) operations, for which we have a separate header, move the mentioned macro to wordpart.h to consolidate similar operations. Signed-off-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitops.h | 7 ------- include/linux/wordpart.h | 7 +++++++ lib/usercopy.c | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 57ecef354f47..3313d2c04e6d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -8,13 +8,6 @@ #include -/* Set bits in the first 'n' bytes when loaded from memory */ -#ifdef __LITTLE_ENDIAN -# define aligned_byte_mask(n) ((1UL << 8*(n))-1) -#else -# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) -#endif - #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h index f6f8f83b15b0..4ca1ba66d2f0 100644 --- a/include/linux/wordpart.h +++ b/include/linux/wordpart.h @@ -39,4 +39,11 @@ */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) +/* Set bits in the first 'n' bytes when loaded from memory */ +#ifdef __LITTLE_ENDIAN +# define aligned_byte_mask(n) ((1UL << 8*(n))-1) +#else +# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) +#endif + #endif // _LINUX_WORDPART_H diff --git a/lib/usercopy.c b/lib/usercopy.c index d29fe29c6849..4b62e6299cc8 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -3,6 +3,7 @@ #include #include #include +#include #include /* out-of-line parts */ -- cgit v1.2.3