diff options
author | Alexander van Heukelum <heukelum@mailshack.com> | 2008-03-11 16:17:19 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-26 19:21:16 +0200 |
commit | 64970b68d2b3ed32b964b0b30b1b98518fde388e (patch) | |
tree | 7d8eb5ea3ab1a841afa0f7ae1c65e7be4a9ca690 | |
parent | 60b6783a044a55273b637983f52965c2808a6b86 (diff) | |
download | kernel-crypto-64970b68d2b3ed32b964b0b30b1b98518fde388e.tar.gz kernel-crypto-64970b68d2b3ed32b964b0b30b1b98518fde388e.tar.xz kernel-crypto-64970b68d2b3ed32b964b0b30b1b98518fde388e.zip |
x86, generic: optimize find_next_(zero_)bit for small constant-size bitmaps
This moves an optimization for searching constant-sized small
bitmaps form x86_64-specific to generic code.
On an i386 defconfig (the x86#testing one), the size of vmlinux hardly
changes with this applied. I have observed only four places where this
optimization avoids a call into find_next_bit:
In the functions return_unused_surplus_pages, alloc_fresh_huge_page,
and adjust_pool_surplus, this patch avoids a call for a 1-bit bitmap.
In __next_cpu a call is avoided for a 32-bit bitmap. That's it.
On x86_64, 52 locations are optimized with a minimal increase in
code size:
Current #testing defconfig:
146 x bsf, 27 x find_next_*bit
text data bss dec hex filename
5392637 846592 724424 6963653 6a41c5 vmlinux
After removing the x86_64 specific optimization for find_next_*bit:
94 x bsf, 79 x find_next_*bit
text data bss dec hex filename
5392358 846592 724424 6963374 6a40ae vmlinux
After this patch (making the optimization generic):
146 x bsf, 27 x find_next_*bit
text data bss dec hex filename
5392396 846592 724424 6963412 6a40d4 vmlinux
[ tglx@linutronix.de: build fixes ]
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/asm-generic/bitops/find.h | 2 | ||||
-rw-r--r-- | include/asm-x86/bitops.h | 6 | ||||
-rw-r--r-- | include/asm-x86/bitops_64.h | 10 | ||||
-rw-r--r-- | include/linux/bitops.h | 77 | ||||
-rw-r--r-- | lib/find_next_bit.c | 25 |
5 files changed, 88 insertions, 32 deletions
diff --git a/include/asm-generic/bitops/find.h b/include/asm-generic/bitops/find.h index 72a51e5a12e..1914e974251 100644 --- a/include/asm-generic/bitops/find.h +++ b/include/asm-generic/bitops/find.h @@ -1,11 +1,13 @@ #ifndef _ASM_GENERIC_BITOPS_FIND_H_ #define _ASM_GENERIC_BITOPS_FIND_H_ +#ifndef CONFIG_GENERIC_FIND_NEXT_BIT extern unsigned long find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset); extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, unsigned long offset); +#endif #define find_first_bit(addr, size) find_next_bit((addr), (size), 0) #define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0) diff --git a/include/asm-x86/bitops.h b/include/asm-x86/bitops.h index 31e408de90c..1ae7b270a1e 100644 --- a/include/asm-x86/bitops.h +++ b/include/asm-x86/bitops.h @@ -306,12 +306,6 @@ static int test_bit(int nr, const volatile unsigned long *addr); #undef BIT_ADDR #undef ADDR -unsigned long find_next_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); -unsigned long find_next_zero_bit(const unsigned long *addr, - unsigned long size, unsigned long offset); - - #ifdef CONFIG_X86_32 # include "bitops_32.h" #else diff --git a/include/asm-x86/bitops_64.h b/include/asm-x86/bitops_64.h index 65b20fb2ae7..7118ef2cc4e 100644 --- a/include/asm-x86/bitops_64.h +++ b/include/asm-x86/bitops_64.h @@ -15,16 +15,6 @@ static inline long __scanbit(unsigned long val, unsigned long max) return val; } -#define find_next_bit(addr,size,off) \ -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - ((off) + (__scanbit((*(unsigned long *)addr) >> (off),(size)-(off)))) : \ - find_next_bit(addr,size,off))) - -#define find_next_zero_bit(addr,size,off) \ -((__builtin_constant_p(size) && (size) <= BITS_PER_LONG ? \ - ((off)+(__scanbit(~(((*(unsigned long *)addr)) >> (off)),(size)-(off)))) : \ - find_next_zero_bit(addr,size,off))) - #define find_first_bit(addr, size) \ ((__builtin_constant_p((size)) && (size) <= BITS_PER_LONG \ ? (__scanbit(*(unsigned long *)(addr), (size))) \ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 40d54731de7..3865f2c93bd 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -112,4 +112,81 @@ static inline unsigned fls_long(unsigned long l) return fls64(l); } +#ifdef __KERNEL__ +#ifdef CONFIG_GENERIC_FIND_NEXT_BIT +extern unsigned long __find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ +static __always_inline unsigned long +find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + unsigned long value; + + /* Avoid a function call if the bitmap size is a constant */ + /* and not bigger than BITS_PER_LONG. */ + + /* insert a sentinel so that __ffs returns size if there */ + /* are no set bits in the bitmap */ + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { + value = (*addr) & ((~0ul) << offset); + value |= (1ul << size); + return __ffs(value); + } + + /* the result of __ffs(0) is undefined, so it needs to be */ + /* handled separately */ + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { + value = (*addr) & ((~0ul) << offset); + return (value == 0) ? BITS_PER_LONG : __ffs(value); + } + + /* size is not constant or too big */ + return __find_next_bit(addr, size, offset); +} + +extern unsigned long __find_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset); + +/** + * find_next_zero_bit - find the next cleared bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The bitmap size in bits + */ +static __always_inline unsigned long +find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + unsigned long value; + + /* Avoid a function call if the bitmap size is a constant */ + /* and not bigger than BITS_PER_LONG. */ + + /* insert a sentinel so that __ffs returns size if there */ + /* are no set bits in the bitmap */ + if (__builtin_constant_p(size) && (size < BITS_PER_LONG)) { + value = (~(*addr)) & ((~0ul) << offset); + value |= (1ul << size); + return __ffs(value); + } + + /* the result of __ffs(0) is undefined, so it needs to be */ + /* handled separately */ + if (__builtin_constant_p(size) && (size == BITS_PER_LONG)) { + value = (~(*addr)) & ((~0ul) << offset); + return (value == 0) ? BITS_PER_LONG : __ffs(value); + } + + /* size is not constant or too big */ + return __find_next_zero_bit(addr, size, offset); +} +#endif /* CONFIG_GENERIC_FIND_NEXT_BIT */ +#endif /* __KERNEL__ */ #endif diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c index 5820e072b89..ce94c4c92d1 100644 --- a/lib/find_next_bit.c +++ b/lib/find_next_bit.c @@ -15,17 +15,12 @@ #include <asm/byteorder.h> #define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) -#undef find_next_bit -#undef find_next_zero_bit - -/** - * find_next_bit - find the next set bit in a memory region - * @addr: The address to base the search on - * @offset: The bitnumber to start searching at - * @size: The maximum size to search + +/* + * Find the next set bit in a memory region. */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) +unsigned long __find_next_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -62,15 +57,14 @@ found_first: found_middle: return result + __ffs(tmp); } - -EXPORT_SYMBOL(find_next_bit); +EXPORT_SYMBOL(__find_next_bit); /* * This implementation of find_{first,next}_zero_bit was stolen from * Linus' asm-alpha/bitops.h. */ -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) +unsigned long __find_next_zero_bit(const unsigned long *addr, + unsigned long size, unsigned long offset) { const unsigned long *p = addr + BITOP_WORD(offset); unsigned long result = offset & ~(BITS_PER_LONG-1); @@ -107,8 +101,7 @@ found_first: found_middle: return result + ffz(tmp); } - -EXPORT_SYMBOL(find_next_zero_bit); +EXPORT_SYMBOL(__find_next_zero_bit); #ifdef __BIG_ENDIAN |