From 1c2aa5619348f7573d6f2269e04fd1dac8eddc47 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Thu, 2 May 2024 17:24:43 +0800 Subject: bitops: Optimize fns() for improved performance The current fns() repeatedly uses __ffs() to find the index of the least significant bit and then clears the corresponding bit using __clear_bit(). The method for clearing the least significant bit can be optimized by using word &= word - 1 instead. Typically, the execution time of one __ffs() plus one __clear_bit() is longer than that of a bitwise AND operation and a subtraction. To improve performance, the loop for clearing the least significant bit has been replaced with word &= word - 1, followed by a single __ffs() operation to obtain the answer. This change reduces the number of __ffs() iterations from n to just one, enhancing overall performance. This modification significantly accelerates the fns() function in the test_bitops benchmark, improving its speed by approximately 7.6 times. Additionally, it enhances the performance of find_nth_bit() in the find_bit benchmark by approximately 26%. Before: test_bitops: fns: 58033164 ns find_nth_bit: 4254313 ns, 16525 iterations After: test_bitops: fns: 7637268 ns find_nth_bit: 3362863 ns, 16501 iterations CC: Andrew Morton CC: Rasmus Villemoes Signed-off-by: Kuan-Wei Chiu Signed-off-by: Yury Norov --- include/linux/bitops.h | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux/bitops.h') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 2ba557e067fe..57ecef354f47 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -254,16 +254,10 @@ static inline unsigned long __ffs64(u64 word) */ static inline unsigned long fns(unsigned long word, unsigned int n) { - unsigned int bit; + while (word && n--) + word &= word - 1; - while (word) { - bit = __ffs(word); - if (n-- == 0) - return bit; - __clear_bit(bit, &word); - } - - return BITS_PER_LONG; + return word ? __ffs(word) : BITS_PER_LONG; } /** -- cgit v1.2.3 From 9f2c2d6ba13da08643c65b948ce5e3d616864c47 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 7 May 2024 23:01:31 +0300 Subject: bitops: Move aligned_byte_mask() to wordpart.h The bitops.h is for bit related operations. The aligned_byte_mask() is about byte (or part of the machine word) operations, for which we have a separate header, move the mentioned macro to wordpart.h to consolidate similar operations. Signed-off-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitops.h | 7 ------- include/linux/wordpart.h | 7 +++++++ lib/usercopy.c | 1 + 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux/bitops.h') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 57ecef354f47..3313d2c04e6d 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -8,13 +8,6 @@ #include -/* Set bits in the first 'n' bytes when loaded from memory */ -#ifdef __LITTLE_ENDIAN -# define aligned_byte_mask(n) ((1UL << 8*(n))-1) -#else -# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) -#endif - #define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) #define BITS_TO_LONGS(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(long)) #define BITS_TO_U64(nr) __KERNEL_DIV_ROUND_UP(nr, BITS_PER_TYPE(u64)) diff --git a/include/linux/wordpart.h b/include/linux/wordpart.h index f6f8f83b15b0..4ca1ba66d2f0 100644 --- a/include/linux/wordpart.h +++ b/include/linux/wordpart.h @@ -39,4 +39,11 @@ */ #define REPEAT_BYTE(x) ((~0ul / 0xff) * (x)) +/* Set bits in the first 'n' bytes when loaded from memory */ +#ifdef __LITTLE_ENDIAN +# define aligned_byte_mask(n) ((1UL << 8*(n))-1) +#else +# define aligned_byte_mask(n) (~0xffUL << (BITS_PER_LONG - 8 - 8*(n))) +#endif + #endif // _LINUX_WORDPART_H diff --git a/lib/usercopy.c b/lib/usercopy.c index d29fe29c6849..4b62e6299cc8 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -3,6 +3,7 @@ #include #include #include +#include #include /* out-of-line parts */ -- cgit v1.2.3