diff mbox series

[3/5] dm: support retrieving struct dm_target from struct dm_dev

Message ID 20240514090445.2847-4-yang.yang@vivo.com (mailing list archive)
State Superseded, archived
Delegated to: Mike Snitzer
Headers show
Series dm: empty flush optimization | expand

Commit Message

YangYang May 14, 2024, 9:04 a.m. UTC
Add a list to the struct dm_dev structure to store the associated
targets, while also allowing differentiation between different target
types.

Signed-off-by: Yang Yang <yang.yang@vivo.com>
---
 drivers/md/dm-table.c         | 36 +++++++++++++++++++++++++++++++++++
 include/linux/device-mapper.h |  3 +++
 2 files changed, 39 insertions(+)

Comments

Benjamin Marzinski May 15, 2024, 3:42 p.m. UTC | #1
On Tue, May 14, 2024 at 05:04:42PM +0800, Yang Yang wrote:
> Add a list to the struct dm_dev structure to store the associated
> targets, while also allowing differentiation between different target
> types.

I still think this is more complex than it needs to be. If devices that
support flush_pass_around can guarantee that:

1. They will send a flush bio to all of their table devices
2. They are fine with another target sending the flush bio to their
   table devices

Then I don't see why we need the table devices to keep track of all the
different target types that are using them. Am I missing something here?

If we don't need to worry about sending a flush bio to a target of each
type that is using a table device, then all we need to do is call
__send_empty_flush_bios() for enough targets to cover all the table
devices. This seems a lot easier to track. We just need another flag in
dm_target, something like sends_pass_around_flush.

When a target calls dm_get_device(), if it adds a new table device to
t->devices, then it's the first target in this table to use that device.
If flush_pass_around is set for this target, then it also sets
sends_pass_around_flush. In __send_empty_flush() if the table has
flush_pass_around set, when you iterate through the devices, you only
call __send_empty_flush_bios() for the ones with sends_pass_around_flush
set.

Or am I overlooking something?

-Ben

> 
> Signed-off-by: Yang Yang <yang.yang@vivo.com>
> ---
>  drivers/md/dm-table.c         | 36 +++++++++++++++++++++++++++++++++++
>  include/linux/device-mapper.h |  3 +++
>  2 files changed, 39 insertions(+)
> 
> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> index bd68af10afed..f6554590b7af 100644
> --- a/drivers/md/dm-table.c
> +++ b/drivers/md/dm-table.c
> @@ -741,6 +741,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
>  	if (ti->flush_pass_around == 0)
>  		t->flush_pass_around = 0;
>  
> +	INIT_LIST_HEAD(&ti->list);
> +
>  	return 0;
>  
>   bad:
> @@ -2134,6 +2136,25 @@ void dm_table_postsuspend_targets(struct dm_table *t)
>  	suspend_targets(t, POSTSUSPEND);
>  }
>  
> +static int dm_link_dev_to_target(struct dm_target *ti, struct dm_dev *dev,
> +		sector_t start, sector_t len, void *data)
> +{
> +	struct list_head *targets = &dev->targets;
> +	struct dm_target *pti;
> +
> +	if (!list_empty(targets)) {
> +		list_for_each_entry(pti, targets, list) {
> +			if (pti->type == ti->type)
> +				return 0;
> +		}
> +	}
> +
> +	if (list_empty(&ti->list))
> +		list_add_tail(&ti->list, targets);
> +
> +	return 0;
> +}
> +
>  int dm_table_resume_targets(struct dm_table *t)
>  {
>  	unsigned int i;
> @@ -2162,6 +2183,21 @@ int dm_table_resume_targets(struct dm_table *t)
>  			ti->type->resume(ti);
>  	}
>  
> +	if (t->flush_pass_around) {
> +		struct list_head *devices = &t->devices;
> +		struct dm_dev_internal *dd;
> +
> +		list_for_each_entry(dd, devices, list)
> +			INIT_LIST_HEAD(&dd->dm_dev->targets);
> +
> +		for (i = 0; i < t->num_targets; i++) {
> +			struct dm_target *ti = dm_table_get_target(t, i);
> +
> +			if (ti->type->iterate_devices)
> +				ti->type->iterate_devices(ti, dm_link_dev_to_target, NULL);
> +		}
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
> index 0893ff8c01b6..19e03f9b2589 100644
> --- a/include/linux/device-mapper.h
> +++ b/include/linux/device-mapper.h
> @@ -169,6 +169,7 @@ struct dm_dev {
>  	struct dax_device *dax_dev;
>  	blk_mode_t mode;
>  	char name[16];
> +	struct list_head targets;
>  };
>  
>  /*
> @@ -298,6 +299,8 @@ struct dm_target {
>  	struct dm_table *table;
>  	struct target_type *type;
>  
> +	struct list_head list;
> +
>  	/* target limits */
>  	sector_t begin;
>  	sector_t len;
> -- 
> 2.34.1
>
Mikulas Patocka May 15, 2024, 3:53 p.m. UTC | #2
On Wed, 15 May 2024, Benjamin Marzinski wrote:

> On Tue, May 14, 2024 at 05:04:42PM +0800, Yang Yang wrote:
> > Add a list to the struct dm_dev structure to store the associated
> > targets, while also allowing differentiation between different target
> > types.
> 
> I still think this is more complex than it needs to be. If devices that
> support flush_pass_around can guarantee that:
> 
> 1. They will send a flush bio to all of their table devices
> 2. They are fine with another target sending the flush bio to their
>    table devices
> 
> Then I don't see why we need the table devices to keep track of all the
> different target types that are using them. Am I missing something here?
> 
> If we don't need to worry about sending a flush bio to a target of each
> type that is using a table device, then all we need to do is call
> __send_empty_flush_bios() for enough targets to cover all the table
> devices. This seems a lot easier to track. We just need another flag in
> dm_target, something like sends_pass_around_flush.
> 
> When a target calls dm_get_device(), if it adds a new table device to
> t->devices, then it's the first target in this table to use that device.
> If flush_pass_around is set for this target, then it also sets
> sends_pass_around_flush. In __send_empty_flush() if the table has
> flush_pass_around set, when you iterate through the devices, you only
> call __send_empty_flush_bios() for the ones with sends_pass_around_flush
> set.
> 
> Or am I overlooking something?
> 
> -Ben

Yes, I agree that it is complex.

I reworked the patch, I'm testing it now and I'll send it when it passes 
the tests.

Mikulas
Benjamin Marzinski May 15, 2024, 4 p.m. UTC | #3
On Wed, May 15, 2024 at 11:42:04AM -0400, Benjamin Marzinski wrote:
> When a target calls dm_get_device(), if it adds a new table device to
> t->devices, then it's the first target in this table to use that device.
> If flush_pass_around is set for this target, then it also sets
> sends_pass_around_flush. In __send_empty_flush() if the table has
> flush_pass_around set, when you iterate through the devices, you only

Err, "When you iterate through the *targets*, you only ..." In this
method you don't iterate through the list of devices (which is supposed
to be protected by t->devices_lock).

> call __send_empty_flush_bios() for the ones with sends_pass_around_flush
> set.
> 
> Or am I overlooking something?
> 
> -Ben
YangYang May 16, 2024, 1:55 a.m. UTC | #4
On 2024/5/15 23:42, Benjamin Marzinski wrote:
> On Tue, May 14, 2024 at 05:04:42PM +0800, Yang Yang wrote:
>> Add a list to the struct dm_dev structure to store the associated
>> targets, while also allowing differentiation between different target
>> types.
> 
> I still think this is more complex than it needs to be. If devices that
> support flush_pass_around can guarantee that:
> 
> 1. They will send a flush bio to all of their table devices
> 2. They are fine with another target sending the flush bio to their
>     table devices
> 
> Then I don't see why we need the table devices to keep track of all the
> different target types that are using them. Am I missing something here?

I attempted to enhance this solution to support additional target types,
such as those with num_flush_bios greater than 1.

> If we don't need to worry about sending a flush bio to a target of each
> type that is using a table device, then all we need to do is call
> __send_empty_flush_bios() for enough targets to cover all the table
> devices. This seems a lot easier to track. We just need another flag in
> dm_target, something like sends_pass_around_flush.
> 
> When a target calls dm_get_device(), if it adds a new table device to
> t->devices, then it's the first target in this table to use that device.
> If flush_pass_around is set for this target, then it also sets
> sends_pass_around_flush. In __send_empty_flush() if the table has
> flush_pass_around set, when you iterate through the devices, you only
> call __send_empty_flush_bios() for the ones with sends_pass_around_flush
> set.
> 
> Or am I overlooking something?

If I understand correctly, you are suggesting to iterate through all the
targets, handling those with sends_pass_around_flush set, and skipping
those where sends_pass_around_flush is not set. I believe this approach
may result in some CPU wastage.

   for i in {0..1023}; do
     echo $((8000*$i)) 8000 linear /dev/sda2 $((16384*$i))
   done | sudo dmsetup create example

In this specific scenario, a single iteration of the loop is all that
is needed.

> 
> -Ben
> 
>>
>> Signed-off-by: Yang Yang <yang.yang@vivo.com>
>> ---
>>   drivers/md/dm-table.c         | 36 +++++++++++++++++++++++++++++++++++
>>   include/linux/device-mapper.h |  3 +++
>>   2 files changed, 39 insertions(+)
>>
>> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
>> index bd68af10afed..f6554590b7af 100644
>> --- a/drivers/md/dm-table.c
>> +++ b/drivers/md/dm-table.c
>> @@ -741,6 +741,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
>>   	if (ti->flush_pass_around == 0)
>>   		t->flush_pass_around = 0;
>>   
>> +	INIT_LIST_HEAD(&ti->list);
>> +
>>   	return 0;
>>   
>>    bad:
>> @@ -2134,6 +2136,25 @@ void dm_table_postsuspend_targets(struct dm_table *t)
>>   	suspend_targets(t, POSTSUSPEND);
>>   }
>>   
>> +static int dm_link_dev_to_target(struct dm_target *ti, struct dm_dev *dev,
>> +		sector_t start, sector_t len, void *data)
>> +{
>> +	struct list_head *targets = &dev->targets;
>> +	struct dm_target *pti;
>> +
>> +	if (!list_empty(targets)) {
>> +		list_for_each_entry(pti, targets, list) {
>> +			if (pti->type == ti->type)
>> +				return 0;
>> +		}
>> +	}
>> +
>> +	if (list_empty(&ti->list))
>> +		list_add_tail(&ti->list, targets);
>> +
>> +	return 0;
>> +}
>> +
>>   int dm_table_resume_targets(struct dm_table *t)
>>   {
>>   	unsigned int i;
>> @@ -2162,6 +2183,21 @@ int dm_table_resume_targets(struct dm_table *t)
>>   			ti->type->resume(ti);
>>   	}
>>   
>> +	if (t->flush_pass_around) {
>> +		struct list_head *devices = &t->devices;
>> +		struct dm_dev_internal *dd;
>> +
>> +		list_for_each_entry(dd, devices, list)
>> +			INIT_LIST_HEAD(&dd->dm_dev->targets);
>> +
>> +		for (i = 0; i < t->num_targets; i++) {
>> +			struct dm_target *ti = dm_table_get_target(t, i);
>> +
>> +			if (ti->type->iterate_devices)
>> +				ti->type->iterate_devices(ti, dm_link_dev_to_target, NULL);
>> +		}
>> +	}
>> +
>>   	return 0;
>>   }
>>   
>> diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
>> index 0893ff8c01b6..19e03f9b2589 100644
>> --- a/include/linux/device-mapper.h
>> +++ b/include/linux/device-mapper.h
>> @@ -169,6 +169,7 @@ struct dm_dev {
>>   	struct dax_device *dax_dev;
>>   	blk_mode_t mode;
>>   	char name[16];
>> +	struct list_head targets;
>>   };
>>   
>>   /*
>> @@ -298,6 +299,8 @@ struct dm_target {
>>   	struct dm_table *table;
>>   	struct target_type *type;
>>   
>> +	struct list_head list;
>> +
>>   	/* target limits */
>>   	sector_t begin;
>>   	sector_t len;
>> -- 
>> 2.34.1
>>
>
YangYang May 16, 2024, 2:12 a.m. UTC | #5
On 2024/5/16 0:00, Benjamin Marzinski wrote:
> On Wed, May 15, 2024 at 11:42:04AM -0400, Benjamin Marzinski wrote:
>> When a target calls dm_get_device(), if it adds a new table device to
>> t->devices, then it's the first target in this table to use that device.
>> If flush_pass_around is set for this target, then it also sets
>> sends_pass_around_flush. In __send_empty_flush() if the table has
>> flush_pass_around set, when you iterate through the devices, you only
> 
> Err, "When you iterate through the *targets*, you only ..." In this
> method you don't iterate through the list of devices (which is supposed
> to be protected by t->devices_lock).

I'm not very familiar with this area, I thought that the device list
of an active table cannot be modified, so it doesn't need to be
protected by t->devices_lock.

> 
>> call __send_empty_flush_bios() for the ones with sends_pass_around_flush
>> set.
>>
>> Or am I overlooking something?
>>
>> -Ben
>
Benjamin Marzinski May 16, 2024, 3:29 p.m. UTC | #6
On Thu, May 16, 2024 at 09:55:53AM +0800, YangYang wrote:
> On 2024/5/15 23:42, Benjamin Marzinski wrote:
> > On Tue, May 14, 2024 at 05:04:42PM +0800, Yang Yang wrote:
> > > Add a list to the struct dm_dev structure to store the associated
> > > targets, while also allowing differentiation between different target
> > > types.
> > 
> > I still think this is more complex than it needs to be. If devices that
> > support flush_pass_around can guarantee that:
> > 
> > 1. They will send a flush bio to all of their table devices
> > 2. They are fine with another target sending the flush bio to their
> >     table devices
> > 
> > Then I don't see why we need the table devices to keep track of all the
> > different target types that are using them. Am I missing something here?
> 
> I attempted to enhance this solution to support additional target types,
> such as those with num_flush_bios greater than 1.

I'm still missing why sending a flush to each target type is necessary
to handle num_flush_bios > 1. As long as the targets meet the
requirements I listed before, AFAICS it should still work with only one
of the targets mapped to each device.

Say the table devices are sda, sdb, sdc, and sdd.  If you have 4 linear
targets, each mapped to a different table device, and one stripe target
mapped to all of them.  It doesn't really matter if you don't call
__send_empty_flush_bios() for the stripe target, does it? all if its
stripe devs will still get flushes. Similarly, it's fine if one of the
linear targets doesn't get called (in fact it's fine if all the linear
targets don't get called, since the stripe target will send flushes to
all the devices). If there were only 3 linear targets, then the stripe
target would get linked to a table device, so it would get a flush sent
to it. Can you explain a situation where you need the to send a flush to
multiple targets per table device for this to work, if you assume the 2
guarantees I mentioned above (the target sends flushes to all their
devices, and they don't do something special so they need to be the one
to send the flushes).

> 
> > If we don't need to worry about sending a flush bio to a target of each
> > type that is using a table device, then all we need to do is call
> > __send_empty_flush_bios() for enough targets to cover all the table
> > devices. This seems a lot easier to track. We just need another flag in
> > dm_target, something like sends_pass_around_flush.
> > 
> > When a target calls dm_get_device(), if it adds a new table device to
> > t->devices, then it's the first target in this table to use that device.
> > If flush_pass_around is set for this target, then it also sets
> > sends_pass_around_flush. In __send_empty_flush() if the table has
> > flush_pass_around set, when you iterate through the devices, you only
> > call __send_empty_flush_bios() for the ones with sends_pass_around_flush
> > set.
> > 
> > Or am I overlooking something?
> 
> If I understand correctly, you are suggesting to iterate through all the
> targets, handling those with sends_pass_around_flush set, and skipping
> those where sends_pass_around_flush is not set. I believe this approach
> may result in some CPU wastage.
> 
>   for i in {0..1023}; do
>     echo $((8000*$i)) 8000 linear /dev/sda2 $((16384*$i))
>   done | sudo dmsetup create example
> 
> In this specific scenario, a single iteration of the loop is all that
> is needed.

It's just one iteration of the loop either way. You either loop through
the targets or the devices.  It's true that if you have lots of targets
all mapped to the same device, you would waste time looping through all
the targets instead of looping through the devices.  But if you only had
one striped target mapped to lots of devices, you would waste time
looping through all of the devices instead of looping through the
targets.  

-Ben
 
> > 
> > -Ben
> > 
> > > 
> > > Signed-off-by: Yang Yang <yang.yang@vivo.com>
> > > ---
> > >   drivers/md/dm-table.c         | 36 +++++++++++++++++++++++++++++++++++
> > >   include/linux/device-mapper.h |  3 +++
> > >   2 files changed, 39 insertions(+)
> > > 
> > > diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
> > > index bd68af10afed..f6554590b7af 100644
> > > --- a/drivers/md/dm-table.c
> > > +++ b/drivers/md/dm-table.c
> > > @@ -741,6 +741,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
> > >   	if (ti->flush_pass_around == 0)
> > >   		t->flush_pass_around = 0;
> > > +	INIT_LIST_HEAD(&ti->list);
> > > +
> > >   	return 0;
> > >    bad:
> > > @@ -2134,6 +2136,25 @@ void dm_table_postsuspend_targets(struct dm_table *t)
> > >   	suspend_targets(t, POSTSUSPEND);
> > >   }
> > > +static int dm_link_dev_to_target(struct dm_target *ti, struct dm_dev *dev,
> > > +		sector_t start, sector_t len, void *data)
> > > +{
> > > +	struct list_head *targets = &dev->targets;
> > > +	struct dm_target *pti;
> > > +
> > > +	if (!list_empty(targets)) {
> > > +		list_for_each_entry(pti, targets, list) {
> > > +			if (pti->type == ti->type)
> > > +				return 0;
> > > +		}
> > > +	}
> > > +
> > > +	if (list_empty(&ti->list))
> > > +		list_add_tail(&ti->list, targets);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > >   int dm_table_resume_targets(struct dm_table *t)
> > >   {
> > >   	unsigned int i;
> > > @@ -2162,6 +2183,21 @@ int dm_table_resume_targets(struct dm_table *t)
> > >   			ti->type->resume(ti);
> > >   	}
> > > +	if (t->flush_pass_around) {
> > > +		struct list_head *devices = &t->devices;
> > > +		struct dm_dev_internal *dd;
> > > +
> > > +		list_for_each_entry(dd, devices, list)
> > > +			INIT_LIST_HEAD(&dd->dm_dev->targets);
> > > +
> > > +		for (i = 0; i < t->num_targets; i++) {
> > > +			struct dm_target *ti = dm_table_get_target(t, i);
> > > +
> > > +			if (ti->type->iterate_devices)
> > > +				ti->type->iterate_devices(ti, dm_link_dev_to_target, NULL);
> > > +		}
> > > +	}
> > > +
> > >   	return 0;
> > >   }
> > > diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
> > > index 0893ff8c01b6..19e03f9b2589 100644
> > > --- a/include/linux/device-mapper.h
> > > +++ b/include/linux/device-mapper.h
> > > @@ -169,6 +169,7 @@ struct dm_dev {
> > >   	struct dax_device *dax_dev;
> > >   	blk_mode_t mode;
> > >   	char name[16];
> > > +	struct list_head targets;
> > >   };
> > >   /*
> > > @@ -298,6 +299,8 @@ struct dm_target {
> > >   	struct dm_table *table;
> > >   	struct target_type *type;
> > > +	struct list_head list;
> > > +
> > >   	/* target limits */
> > >   	sector_t begin;
> > >   	sector_t len;
> > > -- 
> > > 2.34.1
> > > 
> >
Benjamin Marzinski May 16, 2024, 4:39 p.m. UTC | #7
On Thu, May 16, 2024 at 10:12:25AM +0800, YangYang wrote:
> On 2024/5/16 0:00, Benjamin Marzinski wrote:
> > On Wed, May 15, 2024 at 11:42:04AM -0400, Benjamin Marzinski wrote:
> > > When a target calls dm_get_device(), if it adds a new table device to
> > > t->devices, then it's the first target in this table to use that device.
> > > If flush_pass_around is set for this target, then it also sets
> > > sends_pass_around_flush. In __send_empty_flush() if the table has
> > > flush_pass_around set, when you iterate through the devices, you only
> > 
> > Err, "When you iterate through the *targets*, you only ..." In this
> > method you don't iterate through the list of devices (which is supposed
> > to be protected by t->devices_lock).
> 
> I'm not very familiar with this area, I thought that the device list
> of an active table cannot be modified, so it doesn't need to be
> protected by t->devices_lock.

Actually, looking at this some more you're basically correct. The only
place where dm can modify the devices list of active table is in
multipath_message(), and that's bug. So you should be safe not locking
here.

I'll post a patch to fix the multipath target.

-Ben
 
> > 
> > > call __send_empty_flush_bios() for the ones with sends_pass_around_flush
> > > set.
> > > 
> > > Or am I overlooking something?
> > > 
> > > -Ben
> >
YangYang May 17, 2024, 7:48 a.m. UTC | #8
On 2024/5/16 23:29, Benjamin Marzinski wrote:
> On Thu, May 16, 2024 at 09:55:53AM +0800, YangYang wrote:
>> On 2024/5/15 23:42, Benjamin Marzinski wrote:
>>> On Tue, May 14, 2024 at 05:04:42PM +0800, Yang Yang wrote:
>>>> Add a list to the struct dm_dev structure to store the associated
>>>> targets, while also allowing differentiation between different target
>>>> types.
>>>
>>> I still think this is more complex than it needs to be. If devices that
>>> support flush_pass_around can guarantee that:
>>>
>>> 1. They will send a flush bio to all of their table devices
>>> 2. They are fine with another target sending the flush bio to their
>>>      table devices
>>>
>>> Then I don't see why we need the table devices to keep track of all the
>>> different target types that are using them. Am I missing something here?
>>
>> I attempted to enhance this solution to support additional target types,
>> such as those with num_flush_bios greater than 1.
> 
> I'm still missing why sending a flush to each target type is necessary
> to handle num_flush_bios > 1. As long as the targets meet the
> requirements I listed before, AFAICS it should still work with only one
> of the targets mapped to each device.
> 
> Say the table devices are sda, sdb, sdc, and sdd.  If you have 4 linear
> targets, each mapped to a different table device, and one stripe target
> mapped to all of them.  It doesn't really matter if you don't call
> __send_empty_flush_bios() for the stripe target, does it? all if its
> stripe devs will still get flushes. Similarly, it's fine if one of the
> linear targets doesn't get called (in fact it's fine if all the linear
> targets don't get called, since the stripe target will send flushes to
> all the devices). If there were only 3 linear targets, then the stripe
> target would get linked to a table device, so it would get a flush sent
> to it. Can you explain a situation where you need the to send a flush to
> multiple targets per table device for this to work, if you assume the 2
> guarantees I mentioned above (the target sends flushes to all their
> devices, and they don't do something special so they need to be the one
> to send the flushes).

Yes, if the targets meet the requirements you listed previously, there
is no need to send a flush to each target type.
I think I may be overthinking this. I tried to handle some targets with
num_flush_bios > 1 that don't meet the requirements.

>>
>>> If we don't need to worry about sending a flush bio to a target of each
>>> type that is using a table device, then all we need to do is call
>>> __send_empty_flush_bios() for enough targets to cover all the table
>>> devices. This seems a lot easier to track. We just need another flag in
>>> dm_target, something like sends_pass_around_flush.
>>>
>>> When a target calls dm_get_device(), if it adds a new table device to
>>> t->devices, then it's the first target in this table to use that device.
>>> If flush_pass_around is set for this target, then it also sets
>>> sends_pass_around_flush. In __send_empty_flush() if the table has
>>> flush_pass_around set, when you iterate through the devices, you only
>>> call __send_empty_flush_bios() for the ones with sends_pass_around_flush
>>> set.
>>>
>>> Or am I overlooking something?
>>
>> If I understand correctly, you are suggesting to iterate through all the
>> targets, handling those with sends_pass_around_flush set, and skipping
>> those where sends_pass_around_flush is not set. I believe this approach
>> may result in some CPU wastage.
>>
>>    for i in {0..1023}; do
>>      echo $((8000*$i)) 8000 linear /dev/sda2 $((16384*$i))
>>    done | sudo dmsetup create example
>>
>> In this specific scenario, a single iteration of the loop is all that
>> is needed.
> 
> It's just one iteration of the loop either way. You either loop through
> the targets or the devices.  It's true that if you have lots of targets
> all mapped to the same device, you would waste time looping through all
> the targets instead of looping through the devices.  But if you only had
> one striped target mapped to lots of devices, you would waste time
> looping through all of the devices instead of looping through the
> targets.

Yes, I get your point. This patchset may make things even worse for
the striped target.
I am just curious, in what scenario is the "dm-strip" target mapped to
a large number of underlying devices from the same block device.

> -Ben
>   
>>>
>>> -Ben
>>>
>>>>
>>>> Signed-off-by: Yang Yang <yang.yang@vivo.com>
>>>> ---
>>>>    drivers/md/dm-table.c         | 36 +++++++++++++++++++++++++++++++++++
>>>>    include/linux/device-mapper.h |  3 +++
>>>>    2 files changed, 39 insertions(+)
>>>>
>>>> diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
>>>> index bd68af10afed..f6554590b7af 100644
>>>> --- a/drivers/md/dm-table.c
>>>> +++ b/drivers/md/dm-table.c
>>>> @@ -741,6 +741,8 @@ int dm_table_add_target(struct dm_table *t, const char *type,
>>>>    	if (ti->flush_pass_around == 0)
>>>>    		t->flush_pass_around = 0;
>>>> +	INIT_LIST_HEAD(&ti->list);
>>>> +
>>>>    	return 0;
>>>>     bad:
>>>> @@ -2134,6 +2136,25 @@ void dm_table_postsuspend_targets(struct dm_table *t)
>>>>    	suspend_targets(t, POSTSUSPEND);
>>>>    }
>>>> +static int dm_link_dev_to_target(struct dm_target *ti, struct dm_dev *dev,
>>>> +		sector_t start, sector_t len, void *data)
>>>> +{
>>>> +	struct list_head *targets = &dev->targets;
>>>> +	struct dm_target *pti;
>>>> +
>>>> +	if (!list_empty(targets)) {
>>>> +		list_for_each_entry(pti, targets, list) {
>>>> +			if (pti->type == ti->type)
>>>> +				return 0;
>>>> +		}
>>>> +	}
>>>> +
>>>> +	if (list_empty(&ti->list))
>>>> +		list_add_tail(&ti->list, targets);
>>>> +
>>>> +	return 0;
>>>> +}
>>>> +
>>>>    int dm_table_resume_targets(struct dm_table *t)
>>>>    {
>>>>    	unsigned int i;
>>>> @@ -2162,6 +2183,21 @@ int dm_table_resume_targets(struct dm_table *t)
>>>>    			ti->type->resume(ti);
>>>>    	}
>>>> +	if (t->flush_pass_around) {
>>>> +		struct list_head *devices = &t->devices;
>>>> +		struct dm_dev_internal *dd;
>>>> +
>>>> +		list_for_each_entry(dd, devices, list)
>>>> +			INIT_LIST_HEAD(&dd->dm_dev->targets);
>>>> +
>>>> +		for (i = 0; i < t->num_targets; i++) {
>>>> +			struct dm_target *ti = dm_table_get_target(t, i);
>>>> +
>>>> +			if (ti->type->iterate_devices)
>>>> +				ti->type->iterate_devices(ti, dm_link_dev_to_target, NULL);
>>>> +		}
>>>> +	}
>>>> +
>>>>    	return 0;
>>>>    }
>>>> diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
>>>> index 0893ff8c01b6..19e03f9b2589 100644
>>>> --- a/include/linux/device-mapper.h
>>>> +++ b/include/linux/device-mapper.h
>>>> @@ -169,6 +169,7 @@ struct dm_dev {
>>>>    	struct dax_device *dax_dev;
>>>>    	blk_mode_t mode;
>>>>    	char name[16];
>>>> +	struct list_head targets;
>>>>    };
>>>>    /*
>>>> @@ -298,6 +299,8 @@ struct dm_target {
>>>>    	struct dm_table *table;
>>>>    	struct target_type *type;
>>>> +	struct list_head list;
>>>> +
>>>>    	/* target limits */
>>>>    	sector_t begin;
>>>>    	sector_t len;
>>>> -- 
>>>> 2.34.1
>>>>
>>>
>
Benjamin Marzinski May 17, 2024, 2:33 p.m. UTC | #9
On Fri, May 17, 2024 at 03:48:49PM +0800, YangYang wrote:
> On 2024/5/16 23:29, Benjamin Marzinski wrote:
> > On Thu, May 16, 2024 at 09:55:53AM +0800, YangYang wrote:
> > > On 2024/5/15 23:42, Benjamin Marzinski wrote:
> > > > On Tue, May 14, 2024 at 05:04:42PM +0800, Yang Yang wrote:

> > > 
> > > If I understand correctly, you are suggesting to iterate through all the
> > > targets, handling those with sends_pass_around_flush set, and skipping
> > > those where sends_pass_around_flush is not set. I believe this approach
> > > may result in some CPU wastage.
> > > 
> > >    for i in {0..1023}; do
> > >      echo $((8000*$i)) 8000 linear /dev/sda2 $((16384*$i))
> > >    done | sudo dmsetup create example
> > > 
> > > In this specific scenario, a single iteration of the loop is all that
> > > is needed.
> > 
> > It's just one iteration of the loop either way. You either loop through
> > the targets or the devices.  It's true that if you have lots of targets
> > all mapped to the same device, you would waste time looping through all
> > the targets instead of looping through the devices.  But if you only had
> > one striped target mapped to lots of devices, you would waste time
> > looping through all of the devices instead of looping through the
> > targets.
> 
> Yes, I get your point. This patchset may make things even worse for
> the striped target.
> I am just curious, in what scenario is the "dm-strip" target mapped to
> a large number of underlying devices from the same block device.
> 

I don't think anyone in the real world does create dm-stripe devices with a
huge number of stripe table devices. My point was that it didn't seem
obvious me that looping through the targets was a significant problem
compared to looping through the devices.

At any rate, Mikulas's patch already does this optimally, even for
targets like dm-stripe, so it doesn't really matter now.

-Ben
YangYang May 20, 2024, 3:12 a.m. UTC | #10
On 2024/5/17 22:33, Benjamin Marzinski wrote:
> On Fri, May 17, 2024 at 03:48:49PM +0800, YangYang wrote:
>> On 2024/5/16 23:29, Benjamin Marzinski wrote:
>>> On Thu, May 16, 2024 at 09:55:53AM +0800, YangYang wrote:
>>>> On 2024/5/15 23:42, Benjamin Marzinski wrote:
>>>>> On Tue, May 14, 2024 at 05:04:42PM +0800, Yang Yang wrote:
> 
>>>>
>>>> If I understand correctly, you are suggesting to iterate through all the
>>>> targets, handling those with sends_pass_around_flush set, and skipping
>>>> those where sends_pass_around_flush is not set. I believe this approach
>>>> may result in some CPU wastage.
>>>>
>>>>     for i in {0..1023}; do
>>>>       echo $((8000*$i)) 8000 linear /dev/sda2 $((16384*$i))
>>>>     done | sudo dmsetup create example
>>>>
>>>> In this specific scenario, a single iteration of the loop is all that
>>>> is needed.
>>>
>>> It's just one iteration of the loop either way. You either loop through
>>> the targets or the devices.  It's true that if you have lots of targets
>>> all mapped to the same device, you would waste time looping through all
>>> the targets instead of looping through the devices.  But if you only had
>>> one striped target mapped to lots of devices, you would waste time
>>> looping through all of the devices instead of looping through the
>>> targets.
>>
>> Yes, I get your point. This patchset may make things even worse for
>> the striped target.
>> I am just curious, in what scenario is the "dm-strip" target mapped to
>> a large number of underlying devices from the same block device.
>>
> 
> I don't think anyone in the real world does create dm-stripe devices with a
> huge number of stripe table devices. My point was that it didn't seem
> obvious me that looping through the targets was a significant problem
> compared to looping through the devices.
> 
> At any rate, Mikulas's patch already does this optimally, even for
> targets like dm-stripe, so it doesn't really matter now.
> 
> -Ben
> 

Yeah. Thanks for the explanation and your patience.
diff mbox series

Patch

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bd68af10afed..f6554590b7af 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -741,6 +741,8 @@  int dm_table_add_target(struct dm_table *t, const char *type,
 	if (ti->flush_pass_around == 0)
 		t->flush_pass_around = 0;
 
+	INIT_LIST_HEAD(&ti->list);
+
 	return 0;
 
  bad:
@@ -2134,6 +2136,25 @@  void dm_table_postsuspend_targets(struct dm_table *t)
 	suspend_targets(t, POSTSUSPEND);
 }
 
+static int dm_link_dev_to_target(struct dm_target *ti, struct dm_dev *dev,
+		sector_t start, sector_t len, void *data)
+{
+	struct list_head *targets = &dev->targets;
+	struct dm_target *pti;
+
+	if (!list_empty(targets)) {
+		list_for_each_entry(pti, targets, list) {
+			if (pti->type == ti->type)
+				return 0;
+		}
+	}
+
+	if (list_empty(&ti->list))
+		list_add_tail(&ti->list, targets);
+
+	return 0;
+}
+
 int dm_table_resume_targets(struct dm_table *t)
 {
 	unsigned int i;
@@ -2162,6 +2183,21 @@  int dm_table_resume_targets(struct dm_table *t)
 			ti->type->resume(ti);
 	}
 
+	if (t->flush_pass_around) {
+		struct list_head *devices = &t->devices;
+		struct dm_dev_internal *dd;
+
+		list_for_each_entry(dd, devices, list)
+			INIT_LIST_HEAD(&dd->dm_dev->targets);
+
+		for (i = 0; i < t->num_targets; i++) {
+			struct dm_target *ti = dm_table_get_target(t, i);
+
+			if (ti->type->iterate_devices)
+				ti->type->iterate_devices(ti, dm_link_dev_to_target, NULL);
+		}
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 0893ff8c01b6..19e03f9b2589 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -169,6 +169,7 @@  struct dm_dev {
 	struct dax_device *dax_dev;
 	blk_mode_t mode;
 	char name[16];
+	struct list_head targets;
 };
 
 /*
@@ -298,6 +299,8 @@  struct dm_target {
 	struct dm_table *table;
 	struct target_type *type;
 
+	struct list_head list;
+
 	/* target limits */
 	sector_t begin;
 	sector_t len;