diff mbox

[RESEND] slab: introduce the flag SLAB_MINIMIZE_WASTE

Message ID alpine.LRH.2.02.1804251917460.2429@file01.intranet.prod.int.rdu2.redhat.com (mailing list archive)
State Not Applicable, archived
Delegated to: Mike Snitzer
Headers show

Commit Message

Mikulas Patocka April 25, 2018, 11:24 p.m. UTC
On Wed, 25 Apr 2018, Mikulas Patocka wrote:

> 
> 
> On Wed, 18 Apr 2018, Christopher Lameter wrote:
> 
> > On Tue, 17 Apr 2018, Mikulas Patocka wrote:
> > 
> > > I can make a slub-only patch with no extra flag (on a freshly booted
> > > system it increases only the order of caches "TCPv6" and "sighand_cache"
> > > by one - so it should not have unexpected effects):
> > >
> > > Doing a generic solution for slab would be more comlpicated because slab
> > > assumes that all slabs have the same order, so it can't fall-back to
> > > lower-order allocations.
> > 
> > Well again SLAB uses compound pages and thus would be able to detect the
> > size of the page. It may be some work but it could be done.
> > 
> > >
> > > Index: linux-2.6/mm/slub.c
> > > ===================================================================
> > > --- linux-2.6.orig/mm/slub.c	2018-04-17 19:59:49.000000000 +0200
> > > +++ linux-2.6/mm/slub.c	2018-04-17 20:58:23.000000000 +0200
> > > @@ -3252,6 +3252,7 @@ static inline unsigned int slab_order(un
> > >  static inline int calculate_order(unsigned int size, unsigned int reserved)
> > >  {
> > >  	unsigned int order;
> > > +	unsigned int test_order;
> > >  	unsigned int min_objects;
> > >  	unsigned int max_objects;
> > >
> > > @@ -3277,7 +3278,7 @@ static inline int calculate_order(unsign
> > >  			order = slab_order(size, min_objects,
> > >  					slub_max_order, fraction, reserved);
> > >  			if (order <= slub_max_order)
> > > -				return order;
> > > +				goto ret_order;
> > >  			fraction /= 2;
> > >  		}
> > >  		min_objects--;
> > > @@ -3289,15 +3290,25 @@ static inline int calculate_order(unsign
> > >  	 */
> > >  	order = slab_order(size, 1, slub_max_order, 1, reserved);
> > 
> > The slab order is determined in slab_order()
> > 
> > >  	if (order <= slub_max_order)
> > > -		return order;
> > > +		goto ret_order;
> > >
> > >  	/*
> > >  	 * Doh this slab cannot be placed using slub_max_order.
> > >  	 */
> > >  	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
> > > -	if (order < MAX_ORDER)
> > > -		return order;
> > > -	return -ENOSYS;
> > > +	if (order >= MAX_ORDER)
> > > +		return -ENOSYS;
> > > +
> > > +ret_order:
> > > +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> > > +		unsigned long order_objects = ((PAGE_SIZE << order) - reserved) / size;
> > > +		unsigned long test_order_objects = ((PAGE_SIZE << test_order) - reserved) / size;
> > > +		if (test_order_objects > min(32, MAX_OBJS_PER_PAGE))
> > > +			break;
> > > +		if (test_order_objects > order_objects << (test_order - order))
> > > +			order = test_order;
> > > +	}
> > > +	return order;
> > 
> > Could yo move that logic into slab_order()? It does something awfully
> > similar.
> 
> But slab_order (and its caller) limits the order to "max_order" and we 
> want more.
> 
> Perhaps slab_order should be dropped and calculate_order totally 
> rewritten?
> 
> Mikulas

Do you want this? It deletes slab_order and replaces it with the 
"minimize_waste" logic directly.

The patch starts with a minimal order for a given size and increases the 
order if one of these conditions is met:
* we is below slub_min_order
* we are below min_objects and slub_max_order
* we go above slub_max_order only if it minimizes waste and if we don't 
  increase the object count above 32

It simplifies the code and it is very similar to the old algorithms, most 
slab caches have the same order, so it shouldn't cause any regressions.

This patch changes order of these slabs:
TCPv6: 3 -> 4
sighand_cache: 3 -> 4
task_struct: 3 -> 4

---
 mm/slub.c |   76 +++++++++++++++++++++-----------------------------------------
 1 file changed, 26 insertions(+), 50 deletions(-)


--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel

Comments

Christoph Lameter (Ampere) April 26, 2018, 7:01 p.m. UTC | #1
On Wed, 25 Apr 2018, Mikulas Patocka wrote:

> Do you want this? It deletes slab_order and replaces it with the
> "minimize_waste" logic directly.

Well yes that looks better. Now we need to make it easy to read and less
complicated. Maybe try to keep as much as possible of the old code
and also the names of variables to make it easier to review?

> It simplifies the code and it is very similar to the old algorithms, most
> slab caches have the same order, so it shouldn't cause any regressions.
>
> This patch changes order of these slabs:
> TCPv6: 3 -> 4
> sighand_cache: 3 -> 4
> task_struct: 3 -> 4

Hmmm... order 4 for these caches may cause some concern. These should stay
under costly order I think. Otherwise allocations are no longer
guaranteed.

> @@ -3269,35 +3245,35 @@ static inline int calculate_order(unsign
>  	max_objects = order_objects(slub_max_order, size, reserved);
>  	min_objects = min(min_objects, max_objects);
>
> -	while (min_objects > 1) {
> -		unsigned int fraction;
> +	/* Get the minimum acceptable order for one object */
> +	order = get_order(size + reserved);
> +
> +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> +		unsigned order_obj = order_objects(order, size, reserved);
> +		unsigned test_order_obj = order_objects(test_order, size, reserved);
> +
> +		/* If there are too many objects, stop searching */
> +		if (test_order_obj > MAX_OBJS_PER_PAGE)
> +			break;
>
> -		fraction = 16;
> -		while (fraction >= 4) {
> -			order = slab_order(size, min_objects,
> -					slub_max_order, fraction, reserved);
> -			if (order <= slub_max_order)
> -				return order;
> -			fraction /= 2;
> -		}
> -		min_objects--;
> +		/* Always increase up to slub_min_order */
> +		if (test_order <= slub_min_order)
> +			order = test_order;

Well that is a significant change. In our current scheme the order
boundart wins.


> +
> +		/* If we are below min_objects and slub_max_order, increase order */
> +		if (order_obj < min_objects && test_order <= slub_max_order)
> +			order = test_order;
> +
> +		/* Increase order even more, but only if it reduces waste */
> +		if (test_order_obj <= 32 &&

Where does the 32 come from?

> +		    test_order_obj > order_obj << (test_order - order))

Add more () to make the condition better readable.

> +			order = test_order;

Can we just call test_order order and avoid using the long variable names
here? Variable names in functions are typically short.

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Mikulas Patocka April 26, 2018, 9:09 p.m. UTC | #2
On Thu, 26 Apr 2018, Christopher Lameter wrote:

> On Wed, 25 Apr 2018, Mikulas Patocka wrote:
> 
> > Do you want this? It deletes slab_order and replaces it with the
> > "minimize_waste" logic directly.
> 
> Well yes that looks better. Now we need to make it easy to read and less
> complicated. Maybe try to keep as much as possible of the old code
> and also the names of variables to make it easier to review?
> 
> > It simplifies the code and it is very similar to the old algorithms, most
> > slab caches have the same order, so it shouldn't cause any regressions.
> >
> > This patch changes order of these slabs:
> > TCPv6: 3 -> 4
> > sighand_cache: 3 -> 4
> > task_struct: 3 -> 4
> 
> Hmmm... order 4 for these caches may cause some concern. These should stay
> under costly order I think. Otherwise allocations are no longer
> guaranteed.

You said that slub has fallback to smaller order allocations.

The whole purpose of this "minimize waste" approach is to use higher-order 
allocations to use memory more efficiently, so it is just doing its job. 
(for these 3 caches, order-4 really wastes less memory than order-3 - on 
my system TCPv6 and sighand_cache have size 2112, task_struct 2752).

We could improve the fallback code, so that if order-4 allocation fails, 
it tries order-3 allocation, and then falls back to order-0. But I think 
that these failures are rare enough that it is not a problem.

> > @@ -3269,35 +3245,35 @@ static inline int calculate_order(unsign
> >  	max_objects = order_objects(slub_max_order, size, reserved);
> >  	min_objects = min(min_objects, max_objects);
> >
> > -	while (min_objects > 1) {
> > -		unsigned int fraction;
> > +	/* Get the minimum acceptable order for one object */
> > +	order = get_order(size + reserved);
> > +
> > +	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
> > +		unsigned order_obj = order_objects(order, size, reserved);
> > +		unsigned test_order_obj = order_objects(test_order, size, reserved);
> > +
> > +		/* If there are too many objects, stop searching */
> > +		if (test_order_obj > MAX_OBJS_PER_PAGE)
> > +			break;
> >
> > -		fraction = 16;
> > -		while (fraction >= 4) {
> > -			order = slab_order(size, min_objects,
> > -					slub_max_order, fraction, reserved);
> > -			if (order <= slub_max_order)
> > -				return order;
> > -			fraction /= 2;
> > -		}
> > -		min_objects--;
> > +		/* Always increase up to slub_min_order */
> > +		if (test_order <= slub_min_order)
> > +			order = test_order;
> 
> Well that is a significant change. In our current scheme the order
> boundart wins.

I think it's not a change. The existing function slab_order() starts with 
min_order (unless it overshoots MAX_OBJS_PER_PAGE) and then goes upwards. 
My code does the same - my code tests for MAX_OBJS_PER_PAGE (and bails out 
if we would overshoot it) and increases the order until it reaches 
slub_min_order (and then increases it even more if it satisfies the other 
conditions).

If you believe that it behaves differently, please describe the situation 
in detail.

> > +
> > +		/* If we are below min_objects and slub_max_order, increase order */
> > +		if (order_obj < min_objects && test_order <= slub_max_order)
> > +			order = test_order;
> > +
> > +		/* Increase order even more, but only if it reduces waste */
> > +		if (test_order_obj <= 32 &&
> 
> Where does the 32 come from?

It is to avoid extremely high order for extremely small slabs.

For example, see kmalloc-96.
10922 96-byte objects would fit into 1MiB
21845 96-byte objects would fit into 2MiB

The algorithm would recognize this one more object that fits into 2MiB 
slab as "waste reduction" and increase the order to 2MiB - and we don't 
want this.

So, the general reasoning is - if we have 32 objects in a slab, then it is 
already considered that wasted space is reasonably low and we don't want 
to increase the order more.

Currently, kmalloc-96 uses order-0 - that is reasonable (we already have 
42 objects in 4k page, so we don't need to use higher order, even if it 
wastes one-less object).

> > +		    test_order_obj > order_obj << (test_order - order))
> 
> Add more () to make the condition better readable.
> 
> > +			order = test_order;
> 
> Can we just call test_order order and avoid using the long variable names
> here? Variable names in functions are typically short.

You need two variables - "order" and "test_order".

"order" is the best order found so far and "test_order" is the order that 
we are now testing. If "test_order" wastes less space than "order", we 
assign order = test_order.

Mikulas

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
Christoph Lameter (Ampere) April 27, 2018, 4:41 p.m. UTC | #3
On Thu, 26 Apr 2018, Mikulas Patocka wrote:

> > Hmmm... order 4 for these caches may cause some concern. These should stay
> > under costly order I think. Otherwise allocations are no longer
> > guaranteed.
>
> You said that slub has fallback to smaller order allocations.

Yes it does...

> The whole purpose of this "minimize waste" approach is to use higher-order
> allocations to use memory more efficiently, so it is just doing its job.
> (for these 3 caches, order-4 really wastes less memory than order-3 - on
> my system TCPv6 and sighand_cache have size 2112, task_struct 2752).

Hmmm... Ok if the others are fine with this as well. I got some pushback
there in the past.

> We could improve the fallback code, so that if order-4 allocation fails,
> it tries order-3 allocation, and then falls back to order-0. But I think
> that these failures are rare enough that it is not a problem.

I also think that would be too many fallbacks.

> > > +		/* Increase order even more, but only if it reduces waste */
> > > +		if (test_order_obj <= 32 &&
> >
> > Where does the 32 come from?
>
> It is to avoid extremely high order for extremely small slabs.
>
> For example, see kmalloc-96.
> 10922 96-byte objects would fit into 1MiB
> 21845 96-byte objects would fit into 2MiB

That is the result of considering absolute byte wastage..

> The algorithm would recognize this one more object that fits into 2MiB
> slab as "waste reduction" and increase the order to 2MiB - and we don't
> want this.
>
> So, the general reasoning is - if we have 32 objects in a slab, then it is
> already considered that wasted space is reasonably low and we don't want
> to increase the order more.
>
> Currently, kmalloc-96 uses order-0 - that is reasonable (we already have
> 42 objects in 4k page, so we don't need to use higher order, even if it
> wastes one-less object).


The old code uses the concept of a "fraction" to calculate overhead. The
code here uses absolute counts of bytes. Fraction looks better to me.

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel
diff mbox

Patch

Index: linux-2.6/mm/slub.c
===================================================================
--- linux-2.6.orig/mm/slub.c	2018-04-26 00:07:30.000000000 +0200
+++ linux-2.6/mm/slub.c	2018-04-26 00:21:37.000000000 +0200
@@ -3224,34 +3224,10 @@  static unsigned int slub_min_objects;
  * requested a higher mininum order then we start with that one instead of
  * the smallest order which will fit the object.
  */
-static inline unsigned int slab_order(unsigned int size,
-		unsigned int min_objects, unsigned int max_order,
-		unsigned int fract_leftover, unsigned int reserved)
-{
-	unsigned int min_order = slub_min_order;
-	unsigned int order;
-
-	if (order_objects(min_order, size, reserved) > MAX_OBJS_PER_PAGE)
-		return get_order(size * MAX_OBJS_PER_PAGE) - 1;
-
-	for (order = max(min_order, (unsigned int)get_order(min_objects * size + reserved));
-			order <= max_order; order++) {
-
-		unsigned int slab_size = (unsigned int)PAGE_SIZE << order;
-		unsigned int rem;
-
-		rem = (slab_size - reserved) % size;
-
-		if (rem <= slab_size / fract_leftover)
-			break;
-	}
-
-	return order;
-}
-
 static inline int calculate_order(unsigned int size, unsigned int reserved)
 {
 	unsigned int order;
+	unsigned int test_order;
 	unsigned int min_objects;
 	unsigned int max_objects;
 
@@ -3269,35 +3245,35 @@  static inline int calculate_order(unsign
 	max_objects = order_objects(slub_max_order, size, reserved);
 	min_objects = min(min_objects, max_objects);
 
-	while (min_objects > 1) {
-		unsigned int fraction;
+	/* Get the minimum acceptable order for one object */
+	order = get_order(size + reserved);
+
+	for (test_order = order + 1; test_order < MAX_ORDER; test_order++) {
+		unsigned order_obj = order_objects(order, size, reserved);
+		unsigned test_order_obj = order_objects(test_order, size, reserved);
+
+		/* If there are too many objects, stop searching */
+		if (test_order_obj > MAX_OBJS_PER_PAGE)
+			break;
 
-		fraction = 16;
-		while (fraction >= 4) {
-			order = slab_order(size, min_objects,
-					slub_max_order, fraction, reserved);
-			if (order <= slub_max_order)
-				return order;
-			fraction /= 2;
-		}
-		min_objects--;
+		/* Always increase up to slub_min_order */
+		if (test_order <= slub_min_order)
+			order = test_order;
+
+		/* If we are below min_objects and slub_max_order, increase order */
+		if (order_obj < min_objects && test_order <= slub_max_order)
+			order = test_order;
+
+		/* Increase order even more, but only if it reduces waste */
+		if (test_order_obj <= 32 &&
+		    test_order_obj > order_obj << (test_order - order))
+			order = test_order;
 	}
 
-	/*
-	 * We were unable to place multiple objects in a slab. Now
-	 * lets see if we can place a single object there.
-	 */
-	order = slab_order(size, 1, slub_max_order, 1, reserved);
-	if (order <= slub_max_order)
-		return order;
+	if (order >= MAX_ORDER)
+		return -ENOSYS;
 
-	/*
-	 * Doh this slab cannot be placed using slub_max_order.
-	 */
-	order = slab_order(size, 1, MAX_ORDER, 1, reserved);
-	if (order < MAX_ORDER)
-		return order;
-	return -ENOSYS;
+	return order;
 }
 
 static void