diff mbox series

[04/13] lib: introduce BITS_{FIRST,LAST} macro

Message ID 20210316015424.1999082-5-yury.norov@gmail.com (mailing list archive)
State New, archived
Headers show
Series lib/find_bit: fast path for small bitmaps | expand

Commit Message

Yury Norov March 16, 2021, 1:54 a.m. UTC
BITMAP_{LAST,FIRST}_WORD_MASK() in linux/bitmap.h duplicates the
functionality of GENMASK(). The scope of BITMAP* macros is wider
than just bitmaps. This patch defines 4 new macros: BITS_FIRST(),
BITS_LAST(), BITS_FIRST_MASK() and BITS_LAST_MASK() in linux/bits.h
on top of GENMASK() and replaces BITMAP_{LAST,FIRST}_WORD_MASK()
to avoid duplication and increase the scope of the macros.

This change doesn't affect code generation. On ARM64:
scripts/bloat-o-meter vmlinux.before vmlinux
add/remove: 1/2 grow/shrink: 2/0 up/down: 17/-16 (1)
Function                                     old     new   delta
ethtool_get_drvinfo                          900     908      +8
e843419@0cf2_0001309d_7f0                      -       8      +8
vermagic                                      48      49      +1
e843419@0d45_000138bb_f68                      8       -      -8
e843419@0cc9_00012bce_198c                     8       -      -8
Total: Before=26092016, After=26092017, chg +0.00%

The compilerr is:
	aarch64-linux-gnu-gcc (Linaro GCC 7.3-2018.05) 7.3.1 20180425

Signed-off-by: Yury Norov <yury.norov@gmail.com>
---
 include/linux/bitmap.h          | 27 ++++++++++++---------------
 include/linux/bits.h            |  6 ++++++
 include/linux/cpumask.h         |  8 ++++----
 include/linux/netdev_features.h |  2 +-
 include/linux/nodemask.h        |  2 +-
 lib/bitmap.c                    | 26 +++++++++++++-------------
 lib/find_bit.c                  |  4 ++--
 lib/genalloc.c                  |  8 ++++----
 8 files changed, 43 insertions(+), 40 deletions(-)

Comments

Rasmus Villemoes March 16, 2021, 8:35 a.m. UTC | #1
On 16/03/2021 02.54, Yury Norov wrote:
> BITMAP_{LAST,FIRST}_WORD_MASK() in linux/bitmap.h duplicates the
> functionality of GENMASK(). The scope of BITMAP* macros is wider
> than just bitmaps. This patch defines 4 new macros: BITS_FIRST(),
> BITS_LAST(), BITS_FIRST_MASK() and BITS_LAST_MASK() in linux/bits.h
> on top of GENMASK() and replaces BITMAP_{LAST,FIRST}_WORD_MASK()
> to avoid duplication and increase the scope of the macros.
> 
> This change doesn't affect code generation. On ARM64:
> scripts/bloat-o-meter vmlinux.before vmlinux
> add/remove: 1/2 grow/shrink: 2/0 up/down: 17/-16 (1)
> Function                                     old     new   delta
> ethtool_get_drvinfo                          900     908      +8
> e843419@0cf2_0001309d_7f0                      -       8      +8
> vermagic                                      48      49      +1
> e843419@0d45_000138bb_f68                      8       -      -8
> e843419@0cc9_00012bce_198c                     8       -      -8

[what on earth are those weird symbols?]


> diff --git a/include/linux/bits.h b/include/linux/bits.h
> index 7f475d59a097..8c191c29506e 100644
> --- a/include/linux/bits.h
> +++ b/include/linux/bits.h
> @@ -37,6 +37,12 @@
>  #define GENMASK(h, l) \
>  	(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
>  
> +#define BITS_FIRST(nr)		GENMASK((nr), 0)
> +#define BITS_LAST(nr)		GENMASK(BITS_PER_LONG - 1, (nr))
> +
> +#define BITS_FIRST_MASK(nr)	BITS_FIRST((nr) % BITS_PER_LONG)
> +#define BITS_LAST_MASK(nr)	BITS_LAST((nr) % BITS_PER_LONG)

I don't think it's a good idea to propagate the unusual closed-range
semantics of GENMASK to those wrappers. Almost all C and kernel code
uses the 'inclusive lower bound, exclusive upper bound', and I'd expect
BITS_FIRST(5) to result in a word with five bits set, not six. So I
think these changes as-is make the code much harder to read and understand.

Regardless, please add some comments on the valid input ranges to the
macros, whether that ends up being 0 <= nr < BITS_PER_LONG or 0 < nr <=
BITS_PER_LONG or whatnot.

It would also be much easier to review if you just redefined the
BITMAP_LAST_WORD_MASK macros etc. in terms of these new things, so you
wouldn't have to do a lot of mechanical changes at the same time as
introducing the new ones - especially when those mechanical changes
involve adding a "minus 1" everywhere.

Rasmus
Andy Shevchenko March 16, 2021, 11:42 a.m. UTC | #2
On Tue, Mar 16, 2021 at 09:35:35AM +0100, Rasmus Villemoes wrote:
> On 16/03/2021 02.54, Yury Norov wrote:
> > BITMAP_{LAST,FIRST}_WORD_MASK() in linux/bitmap.h duplicates the
> > functionality of GENMASK(). The scope of BITMAP* macros is wider
> > than just bitmaps. This patch defines 4 new macros: BITS_FIRST(),
> > BITS_LAST(), BITS_FIRST_MASK() and BITS_LAST_MASK() in linux/bits.h
> > on top of GENMASK() and replaces BITMAP_{LAST,FIRST}_WORD_MASK()
> > to avoid duplication and increase the scope of the macros.
> > 
> > This change doesn't affect code generation. On ARM64:
> > scripts/bloat-o-meter vmlinux.before vmlinux
> > add/remove: 1/2 grow/shrink: 2/0 up/down: 17/-16 (1)
> > Function                                     old     new   delta
> > ethtool_get_drvinfo                          900     908      +8
> > e843419@0cf2_0001309d_7f0                      -       8      +8
> > vermagic                                      48      49      +1
> > e843419@0d45_000138bb_f68                      8       -      -8
> > e843419@0cc9_00012bce_198c                     8       -      -8
> 
> [what on earth are those weird symbols?]
> 
> 
> > diff --git a/include/linux/bits.h b/include/linux/bits.h
> > index 7f475d59a097..8c191c29506e 100644
> > --- a/include/linux/bits.h
> > +++ b/include/linux/bits.h
> > @@ -37,6 +37,12 @@
> >  #define GENMASK(h, l) \
> >  	(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
> >  
> > +#define BITS_FIRST(nr)		GENMASK((nr), 0)
> > +#define BITS_LAST(nr)		GENMASK(BITS_PER_LONG - 1, (nr))
> > +
> > +#define BITS_FIRST_MASK(nr)	BITS_FIRST((nr) % BITS_PER_LONG)
> > +#define BITS_LAST_MASK(nr)	BITS_LAST((nr) % BITS_PER_LONG)
> 
> I don't think it's a good idea to propagate the unusual closed-range
> semantics of GENMASK to those wrappers. Almost all C and kernel code
> uses the 'inclusive lower bound, exclusive upper bound', and I'd expect
> BITS_FIRST(5) to result in a word with five bits set, not six. So I
> think these changes as-is make the code much harder to read and understand.
> 
> Regardless, please add some comments on the valid input ranges to the
> macros, whether that ends up being 0 <= nr < BITS_PER_LONG or 0 < nr <=
> BITS_PER_LONG or whatnot.
> 
> It would also be much easier to review if you just redefined the
> BITMAP_LAST_WORD_MASK macros etc. in terms of these new things, so you
> wouldn't have to do a lot of mechanical changes at the same time as
> introducing the new ones - especially when those mechanical changes
> involve adding a "minus 1" everywhere.

I tend to agree with Rasmus here.
Yury Norov March 17, 2021, 5:40 a.m. UTC | #3
On Tue, Mar 16, 2021 at 01:42:45PM +0200, Andy Shevchenko wrote:
> On Tue, Mar 16, 2021 at 09:35:35AM +0100, Rasmus Villemoes wrote:
> > On 16/03/2021 02.54, Yury Norov wrote:
> > > BITMAP_{LAST,FIRST}_WORD_MASK() in linux/bitmap.h duplicates the
> > > functionality of GENMASK(). The scope of BITMAP* macros is wider
> > > than just bitmaps. This patch defines 4 new macros: BITS_FIRST(),
> > > BITS_LAST(), BITS_FIRST_MASK() and BITS_LAST_MASK() in linux/bits.h
> > > on top of GENMASK() and replaces BITMAP_{LAST,FIRST}_WORD_MASK()
> > > to avoid duplication and increase the scope of the macros.
> > > 
> > > This change doesn't affect code generation. On ARM64:
> > > scripts/bloat-o-meter vmlinux.before vmlinux
> > > add/remove: 1/2 grow/shrink: 2/0 up/down: 17/-16 (1)
> > > Function                                     old     new   delta
> > > ethtool_get_drvinfo                          900     908      +8
> > > e843419@0cf2_0001309d_7f0                      -       8      +8
> > > vermagic                                      48      49      +1
> > > e843419@0d45_000138bb_f68                      8       -      -8
> > > e843419@0cc9_00012bce_198c                     8       -      -8
> > 
> > [what on earth are those weird symbols?]
> > 
> > 
> > > diff --git a/include/linux/bits.h b/include/linux/bits.h
> > > index 7f475d59a097..8c191c29506e 100644
> > > --- a/include/linux/bits.h
> > > +++ b/include/linux/bits.h
> > > @@ -37,6 +37,12 @@
> > >  #define GENMASK(h, l) \
> > >  	(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
> > >  
> > > +#define BITS_FIRST(nr)		GENMASK((nr), 0)
> > > +#define BITS_LAST(nr)		GENMASK(BITS_PER_LONG - 1, (nr))
> > > +
> > > +#define BITS_FIRST_MASK(nr)	BITS_FIRST((nr) % BITS_PER_LONG)
> > > +#define BITS_LAST_MASK(nr)	BITS_LAST((nr) % BITS_PER_LONG)
> > 
> > I don't think it's a good idea to propagate the unusual closed-range
> > semantics of GENMASK to those wrappers. Almost all C and kernel code
> > uses the 'inclusive lower bound, exclusive upper bound', and I'd expect
> > BITS_FIRST(5) to result in a word with five bits set, not six. So I
> > think these changes as-is make the code much harder to read and understand.
> > 
> > Regardless, please add some comments on the valid input ranges to the
> > macros, whether that ends up being 0 <= nr < BITS_PER_LONG or 0 < nr <=
> > BITS_PER_LONG or whatnot.
> > 
> > It would also be much easier to review if you just redefined the
> > BITMAP_LAST_WORD_MASK macros etc. in terms of these new things, so you
> > wouldn't have to do a lot of mechanical changes at the same time as
> > introducing the new ones - especially when those mechanical changes
> > involve adding a "minus 1" everywhere.
> 
> I tend to agree with Rasmus here.

OK. All this plus terrible GENMASK(high, low) design, when high goes
first, makes me feel like we need to deprecate GENMASK and propose a
new interface.

What do you think about this:
BITS_FIRST(bitnum)      -> [0, bitnum)
BITS_LAST(bitnum)       -> [bitnum, BITS_PER_LONG)
BITS_RANGE(begin, end)  -> [begin, end)

We can pick BITS_{LAST,FIRST} implementation from existing BITMAP_*_WORD_MASK
analogues, and make the BITS_RANGE like:
        #define BITS_RANGE(begin, end) BITS_FIRST(end) & BITS_LAST(begin)

Regarding BITMAP_*_WORD_MASK, I can save them in bitmap.h as aliases
to BITS_{LAST,FIRST} to avoid massive renaming. (Should I?)

Would this all work for you?
Rasmus Villemoes March 17, 2021, 7:58 p.m. UTC | #4
On 17/03/2021 06.40, Yury Norov wrote:
> On Tue, Mar 16, 2021 at 01:42:45PM +0200, Andy Shevchenko wrote:

>>> It would also be much easier to review if you just redefined the
>>> BITMAP_LAST_WORD_MASK macros etc. in terms of these new things, so you
>>> wouldn't have to do a lot of mechanical changes at the same time as
>>> introducing the new ones - especially when those mechanical changes
>>> involve adding a "minus 1" everywhere.
>>
>> I tend to agree with Rasmus here.
> 
> OK. All this plus terrible GENMASK(high, low) design, when high goes
> first, makes me feel like we need to deprecate GENMASK and propose a
> new interface.
> 
> What do you think about this:
> BITS_FIRST(bitnum)      -> [0, bitnum)
> BITS_LAST(bitnum)       -> [bitnum, BITS_PER_LONG)
> BITS_RANGE(begin, end)  -> [begin, end)

Better, though I'm not too happy about BITS_LAST(n) not producing a word
with the n highest bits set. I dunno, I don't have better names.
BITS_FROM/BITS_UPTO perhaps, but not really (and upto sounds like it is
inclusive). BITS_LOW/BITS_HIGH have the same problem as BITS_LAST.

Also, be careful to document what one can expect from the boundary
values 0/BITS_PER_LONG. Is BITS_FIRST(0) a valid invocation? Does it
yield 0UL? How about BITS_FIRST(BITS_PER_LONG), does that give ~0UL?
Note that BITMAP_{FIRST,LAST}_WORD_MASK never produce 0, they're never
used except with a word we know to be part of the bitmap.

> We can pick BITS_{LAST,FIRST} implementation from existing BITMAP_*_WORD_MASK
> analogues, and make the BITS_RANGE like:
>         #define BITS_RANGE(begin, end) BITS_FIRST(end) & BITS_LAST(begin)
> 
> Regarding BITMAP_*_WORD_MASK, I can save them in bitmap.h as aliases
> to BITS_{LAST,FIRST} to avoid massive renaming. (Should I?)

Yes, now that I read these again, I definitely think the
BITMAP_{FIRST,LAST}_WORD_MASK should stay (whether their implementation
change I don't care). Their names document what they do much better than
if you replace them with their potential new implementations:
BITMAP_FIRST_WORD_MASK(start) is obviously about having to mask off some
low bits of the first word we're looking at because we're looking at an
offset into the bitmap, and similarly BITMAP_LAST_WORD_MASK(nbits)
explains itself: nbits is such that the last word needs some masking.
But their replacements would be BITS_LAST(start) and BITS_FIRST(nbits)
respectively (possibly with those arguments reduced mod N), which is
quite confusing.

> Would this all work for you?

Maybe, I think I'd have to see the implementation and how those new
macros get used.

Thanks,
Rasmus
Yury Norov March 17, 2021, 11:33 p.m. UTC | #5
On Wed, Mar 17, 2021 at 08:58:04PM +0100, Rasmus Villemoes wrote:
> On 17/03/2021 06.40, Yury Norov wrote:
> > On Tue, Mar 16, 2021 at 01:42:45PM +0200, Andy Shevchenko wrote:
> 
> >>> It would also be much easier to review if you just redefined the
> >>> BITMAP_LAST_WORD_MASK macros etc. in terms of these new things, so you
> >>> wouldn't have to do a lot of mechanical changes at the same time as
> >>> introducing the new ones - especially when those mechanical changes
> >>> involve adding a "minus 1" everywhere.
> >>
> >> I tend to agree with Rasmus here.
> > 
> > OK. All this plus terrible GENMASK(high, low) design, when high goes
> > first, makes me feel like we need to deprecate GENMASK and propose a
> > new interface.
> > 
> > What do you think about this:
> > BITS_FIRST(bitnum)      -> [0, bitnum)
> > BITS_LAST(bitnum)       -> [bitnum, BITS_PER_LONG)
> > BITS_RANGE(begin, end)  -> [begin, end)
> 
> Better, though I'm not too happy about BITS_LAST(n) not producing a word
> with the n highest bits set. I dunno, I don't have better names.
> BITS_FROM/BITS_UPTO perhaps, but not really (and upto sounds like it is
> inclusive). BITS_LOW/BITS_HIGH have the same problem as BITS_LAST.
> 
> Also, be careful to document what one can expect from the boundary
> values 0/BITS_PER_LONG. Is BITS_FIRST(0) a valid invocation? Does it
> yield 0UL? How about BITS_FIRST(BITS_PER_LONG), does that give ~0UL?
> Note that BITMAP_{FIRST,LAST}_WORD_MASK never produce 0, they're never
> used except with a word we know to be part of the bitmap.
> 
> > We can pick BITS_{LAST,FIRST} implementation from existing BITMAP_*_WORD_MASK
> > analogues, and make the BITS_RANGE like:
> >         #define BITS_RANGE(begin, end) BITS_FIRST(end) & BITS_LAST(begin)
> > 
> > Regarding BITMAP_*_WORD_MASK, I can save them in bitmap.h as aliases
> > to BITS_{LAST,FIRST} to avoid massive renaming. (Should I?)
> 
> Yes, now that I read these again, I definitely think the
> BITMAP_{FIRST,LAST}_WORD_MASK should stay (whether their implementation
> change I don't care). Their names document what they do much better than
> if you replace them with their potential new implementations:
> BITMAP_FIRST_WORD_MASK(start) is obviously about having to mask off some
> low bits of the first word we're looking at because we're looking at an
> offset into the bitmap, and similarly BITMAP_LAST_WORD_MASK(nbits)
> explains itself: nbits is such that the last word needs some masking.
> But their replacements would be BITS_LAST(start) and BITS_FIRST(nbits)
> respectively (possibly with those arguments reduced mod N), which is
> quite confusing.
> 
> > Would this all work for you?
> 
> Maybe, I think I'd have to see the implementation and how those new
> macros get used.
> 
> Thanks,
> Rasmus

I looked at this with a fresh eye this morning. All the noise we
discuss I made to just call BITS_FIRST() 3 times in find.h. I think
that for the purpose of this series, in find.h, it's worth to use
GENMASK(size - 1, 0) where needed.

Regarding the general view on this, all the problems come from the
fact that bitmap API is split between linux/bitmap.h and
asm_generic/bitops/find.h. Find.h is naturally a part of bitmaps,
but because of the split, it's hard to use handy bitmap.h macros
in find.h.

Joining the headers is far out of the scope of this series. If no
objections, I'd prefer to drop this patch now, and later carefully
figure out how to join find.h and bitmap.h.

Yury
diff mbox series

Patch

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index 70a932470b2d..adf7bd9f0467 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -219,9 +219,6 @@  extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int
 extern int bitmap_print_to_pagebuf(bool list, char *buf,
 				   const unsigned long *maskp, int nmaskbits);
 
-#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
-#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
-
 /*
  * The static inlines below do not handle constant nbits==0 correctly,
  * so make such users (should any ever turn up) call the out-of-line
@@ -257,7 +254,7 @@  static inline void bitmap_copy_clear_tail(unsigned long *dst,
 {
 	bitmap_copy(dst, src, nbits);
 	if (nbits % BITS_PER_LONG)
-		dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits);
+		dst[nbits / BITS_PER_LONG] &= BITS_FIRST_MASK(nbits - 1);
 }
 
 /*
@@ -282,7 +279,7 @@  static inline int bitmap_and(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+		return (*dst = *src1 & *src2 & BITS_FIRST(nbits - 1)) != 0;
 	return __bitmap_and(dst, src1, src2, nbits);
 }
 
@@ -308,7 +305,7 @@  static inline int bitmap_andnot(unsigned long *dst, const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+		return (*dst = *src1 & ~(*src2) & BITS_FIRST(nbits - 1)) != 0;
 	return __bitmap_andnot(dst, src1, src2, nbits);
 }
 
@@ -332,7 +329,7 @@  static inline int bitmap_equal(const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return !((*src1 ^ *src2) & BITMAP_LAST_WORD_MASK(nbits));
+		return !((*src1 ^ *src2) & BITS_FIRST(nbits - 1));
 	if (__builtin_constant_p(nbits & BITMAP_MEM_MASK) &&
 	    IS_ALIGNED(nbits, BITMAP_MEM_ALIGNMENT))
 		return !memcmp(src1, src2, nbits / 8);
@@ -356,14 +353,14 @@  static inline bool bitmap_or_equal(const unsigned long *src1,
 	if (!small_const_nbits(nbits))
 		return __bitmap_or_equal(src1, src2, src3, nbits);
 
-	return !(((*src1 | *src2) ^ *src3) & BITMAP_LAST_WORD_MASK(nbits));
+	return !(((*src1 | *src2) ^ *src3) & BITS_FIRST(nbits - 1));
 }
 
 static inline int bitmap_intersects(const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return ((*src1 & *src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0;
+		return ((*src1 & *src2) & BITS_FIRST(nbits - 1)) != 0;
 	else
 		return __bitmap_intersects(src1, src2, nbits);
 }
@@ -372,7 +369,7 @@  static inline int bitmap_subset(const unsigned long *src1,
 			const unsigned long *src2, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits));
+		return !((*src1 & ~(*src2)) & BITS_FIRST(nbits - 1));
 	else
 		return __bitmap_subset(src1, src2, nbits);
 }
@@ -380,7 +377,7 @@  static inline int bitmap_subset(const unsigned long *src1,
 static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
 {
 	if (small_const_nbits(nbits))
-		return ! (*src & BITMAP_LAST_WORD_MASK(nbits));
+		return !(*src & BITS_FIRST(nbits - 1));
 
 	return find_first_bit(src, nbits) == nbits;
 }
@@ -388,7 +385,7 @@  static inline bool bitmap_empty(const unsigned long *src, unsigned nbits)
 static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return ! (~(*src) & BITMAP_LAST_WORD_MASK(nbits));
+		return !(~(*src) & BITS_FIRST(nbits - 1));
 
 	return find_first_zero_bit(src, nbits) == nbits;
 }
@@ -396,7 +393,7 @@  static inline bool bitmap_full(const unsigned long *src, unsigned int nbits)
 static __always_inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
+		return hweight_long(*src & BITS_FIRST(nbits - 1));
 	return __bitmap_weight(src, nbits);
 }
 
@@ -432,7 +429,7 @@  static inline void bitmap_shift_right(unsigned long *dst, const unsigned long *s
 				unsigned int shift, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = (*src & BITMAP_LAST_WORD_MASK(nbits)) >> shift;
+		*dst = (*src & BITS_FIRST(nbits - 1)) >> shift;
 	else
 		__bitmap_shift_right(dst, src, shift, nbits);
 }
@@ -441,7 +438,7 @@  static inline void bitmap_shift_left(unsigned long *dst, const unsigned long *sr
 				unsigned int shift, unsigned int nbits)
 {
 	if (small_const_nbits(nbits))
-		*dst = (*src << shift) & BITMAP_LAST_WORD_MASK(nbits);
+		*dst = (*src << shift) & BITS_FIRST(nbits - 1);
 	else
 		__bitmap_shift_left(dst, src, shift, nbits);
 }
diff --git a/include/linux/bits.h b/include/linux/bits.h
index 7f475d59a097..8c191c29506e 100644
--- a/include/linux/bits.h
+++ b/include/linux/bits.h
@@ -37,6 +37,12 @@ 
 #define GENMASK(h, l) \
 	(GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
 
+#define BITS_FIRST(nr)		GENMASK((nr), 0)
+#define BITS_LAST(nr)		GENMASK(BITS_PER_LONG - 1, (nr))
+
+#define BITS_FIRST_MASK(nr)	BITS_FIRST((nr) % BITS_PER_LONG)
+#define BITS_LAST_MASK(nr)	BITS_LAST((nr) % BITS_PER_LONG)
+
 #define __GENMASK_ULL(h, l) \
 	(((~ULL(0)) - (ULL(1) << (l)) + 1) & \
 	 (~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index c53364c4296d..57d7cdc22eca 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -899,7 +899,7 @@  static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 #if NR_CPUS <= BITS_PER_LONG
 #define CPU_BITS_ALL						\
 {								\
-	[BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)	\
+	[BITS_TO_LONGS(NR_CPUS)-1] = BITS_FIRST_MASK(NR_CPUS - 1)	\
 }
 
 #else /* NR_CPUS > BITS_PER_LONG */
@@ -907,7 +907,7 @@  static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 #define CPU_BITS_ALL						\
 {								\
 	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,		\
-	[BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)	\
+	[BITS_TO_LONGS(NR_CPUS)-1] = BITS_FIRST_MASK(NR_CPUS - 1)	\
 }
 #endif /* NR_CPUS > BITS_PER_LONG */
 
@@ -931,13 +931,13 @@  cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
 #if NR_CPUS <= BITS_PER_LONG
 #define CPU_MASK_ALL							\
 (cpumask_t) { {								\
-	[BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)	\
+	[BITS_TO_LONGS(NR_CPUS)-1] = BITS_FIRST_MASK(NR_CPUS - 1)	\
 } }
 #else
 #define CPU_MASK_ALL							\
 (cpumask_t) { {								\
 	[0 ... BITS_TO_LONGS(NR_CPUS)-2] = ~0UL,			\
-	[BITS_TO_LONGS(NR_CPUS)-1] = BITMAP_LAST_WORD_MASK(NR_CPUS)	\
+	[BITS_TO_LONGS(NR_CPUS)-1] = BITS_FIRST_MASK(NR_CPUS - 1)	\
 } }
 #endif /* NR_CPUS > BITS_PER_LONG */
 
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 3de38d6a0aea..4cef7fe02f09 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -173,7 +173,7 @@  enum {
  */
 static inline int find_next_netdev_feature(u64 feature, unsigned long start)
 {
-	/* like BITMAP_LAST_WORD_MASK() for u64
+	/* like BITS_FIRST_MASK() for u64
 	 * this sets the most significant 64 - start to 0.
 	 */
 	feature &= ~0ULL >> (-start & ((sizeof(feature) * 8) - 1));
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index ac398e143c9a..2df0787c9155 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -302,7 +302,7 @@  static inline int __first_unset_node(const nodemask_t *maskp)
 			find_first_zero_bit(maskp->bits, MAX_NUMNODES));
 }
 
-#define NODE_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(MAX_NUMNODES)
+#define NODE_MASK_LAST_WORD BITS_FIRST_MASK(MAX_NUMNODES - 1)
 
 #if MAX_NUMNODES <= BITS_PER_LONG
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 9f4626a4c95f..549b184a3bae 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -52,7 +52,7 @@  int __bitmap_equal(const unsigned long *bitmap1,
 			return 0;
 
 	if (bits % BITS_PER_LONG)
-		if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+		if ((bitmap1[k] ^ bitmap2[k]) & BITS_FIRST_MASK(bits - 1))
 			return 0;
 
 	return 1;
@@ -76,7 +76,7 @@  bool __bitmap_or_equal(const unsigned long *bitmap1,
 		return true;
 
 	tmp = (bitmap1[k] | bitmap2[k]) ^ bitmap3[k];
-	return (tmp & BITMAP_LAST_WORD_MASK(bits)) == 0;
+	return (tmp & BITS_FIRST_MASK(bits - 1)) == 0;
 }
 
 void __bitmap_complement(unsigned long *dst, const unsigned long *src, unsigned int bits)
@@ -103,7 +103,7 @@  void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
 {
 	unsigned k, lim = BITS_TO_LONGS(nbits);
 	unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG;
-	unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
+	unsigned long mask = BITS_FIRST_MASK(nbits - 1);
 	for (k = 0; off + k < lim; ++k) {
 		unsigned long upper, lower;
 
@@ -246,7 +246,7 @@  int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
 		result |= (dst[k] = bitmap1[k] & bitmap2[k]);
 	if (bits % BITS_PER_LONG)
 		result |= (dst[k] = bitmap1[k] & bitmap2[k] &
-			   BITMAP_LAST_WORD_MASK(bits));
+			   BITS_FIRST_MASK(bits - 1));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_and);
@@ -284,7 +284,7 @@  int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
 		result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
 	if (bits % BITS_PER_LONG)
 		result |= (dst[k] = bitmap1[k] & ~bitmap2[k] &
-			   BITMAP_LAST_WORD_MASK(bits));
+			   BITS_FIRST_MASK(bits - 1));
 	return result != 0;
 }
 EXPORT_SYMBOL(__bitmap_andnot);
@@ -310,7 +310,7 @@  int __bitmap_intersects(const unsigned long *bitmap1,
 			return 1;
 
 	if (bits % BITS_PER_LONG)
-		if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+		if ((bitmap1[k] & bitmap2[k]) & BITS_FIRST_MASK(bits - 1))
 			return 1;
 	return 0;
 }
@@ -325,7 +325,7 @@  int __bitmap_subset(const unsigned long *bitmap1,
 			return 0;
 
 	if (bits % BITS_PER_LONG)
-		if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits))
+		if ((bitmap1[k] & ~bitmap2[k]) & BITS_FIRST_MASK(bits - 1))
 			return 0;
 	return 1;
 }
@@ -340,7 +340,7 @@  int __bitmap_weight(const unsigned long *bitmap, unsigned int bits)
 		w += hweight_long(bitmap[k]);
 
 	if (bits % BITS_PER_LONG)
-		w += hweight_long(bitmap[k] & BITMAP_LAST_WORD_MASK(bits));
+		w += hweight_long(bitmap[k] & BITS_FIRST_MASK(bits - 1));
 
 	return w;
 }
@@ -351,7 +351,7 @@  void __bitmap_set(unsigned long *map, unsigned int start, int len)
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
 	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
-	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+	unsigned long mask_to_set = BITS_LAST_MASK(start);
 
 	while (len - bits_to_set >= 0) {
 		*p |= mask_to_set;
@@ -361,7 +361,7 @@  void __bitmap_set(unsigned long *map, unsigned int start, int len)
 		p++;
 	}
 	if (len) {
-		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		mask_to_set &= BITS_FIRST_MASK(size - 1);
 		*p |= mask_to_set;
 	}
 }
@@ -372,7 +372,7 @@  void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned int size = start + len;
 	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
-	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+	unsigned long mask_to_clear = BITS_LAST_MASK(start);
 
 	while (len - bits_to_clear >= 0) {
 		*p &= ~mask_to_clear;
@@ -382,7 +382,7 @@  void __bitmap_clear(unsigned long *map, unsigned int start, int len)
 		p++;
 	}
 	if (len) {
-		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		mask_to_clear &= BITS_FIRST_MASK(size - 1);
 		*p &= ~mask_to_clear;
 	}
 }
@@ -1291,7 +1291,7 @@  void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf, unsigned int nbits
 
 	/* Clear tail bits in last word beyond nbits. */
 	if (nbits % BITS_PER_LONG)
-		bitmap[(halfwords - 1) / 2] &= BITMAP_LAST_WORD_MASK(nbits);
+		bitmap[(halfwords - 1) / 2] &= BITS_FIRST_MASK(nbits - 1);
 }
 EXPORT_SYMBOL(bitmap_from_arr32);
 
diff --git a/lib/find_bit.c b/lib/find_bit.c
index f67f86fd2f62..8c2a71a18793 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -44,7 +44,7 @@  static unsigned long _find_next_bit(const unsigned long *addr1,
 	tmp ^= invert;
 
 	/* Handle 1st word. */
-	mask = BITMAP_FIRST_WORD_MASK(start);
+	mask = BITS_LAST_MASK(start);
 	if (le)
 		mask = swab(mask);
 
@@ -141,7 +141,7 @@  EXPORT_SYMBOL(find_first_zero_bit);
 unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
 {
 	if (size) {
-		unsigned long val = BITMAP_LAST_WORD_MASK(size);
+		unsigned long val = BITS_FIRST_MASK(size - 1);
 		unsigned long idx = (size-1) / BITS_PER_LONG;
 
 		do {
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 5dcf9cdcbc46..0af7275517ff 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -87,7 +87,7 @@  bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned long size = start + nr;
 	int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
-	unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+	unsigned long mask_to_set = BITS_LAST_MASK(start);
 
 	while (nr >= bits_to_set) {
 		if (set_bits_ll(p, mask_to_set))
@@ -98,7 +98,7 @@  bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
 		p++;
 	}
 	if (nr) {
-		mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+		mask_to_set &= BITS_FIRST_MASK(size - 1);
 		if (set_bits_ll(p, mask_to_set))
 			return nr;
 	}
@@ -123,7 +123,7 @@  bitmap_clear_ll(unsigned long *map, unsigned long start, unsigned long nr)
 	unsigned long *p = map + BIT_WORD(start);
 	const unsigned long size = start + nr;
 	int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
-	unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+	unsigned long mask_to_clear = BITS_LAST_MASK(start);
 
 	while (nr >= bits_to_clear) {
 		if (clear_bits_ll(p, mask_to_clear))
@@ -134,7 +134,7 @@  bitmap_clear_ll(unsigned long *map, unsigned long start, unsigned long nr)
 		p++;
 	}
 	if (nr) {
-		mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+		mask_to_clear &= BITS_FIRST_MASK(size - 1);
 		if (clear_bits_ll(p, mask_to_clear))
 			return nr;
 	}