diff mbox series

[08/11] target iblock: add backend plug/unplug callouts

Message ID 20210204113513.93204-9-michael.christie@oracle.com (mailing list archive)
State New, archived
Headers show
Series target: fix cmd plugging and completion | expand

Commit Message

Mike Christie Feb. 4, 2021, 11:35 a.m. UTC
This patch adds plug/unplug callouts for iblock. For initiator drivers
like iscsi which wants to pass multiple cmds to its xmit thread instead
of one cmd at a time, this increases IOPs by around 10% with vhost-scsi
(combined with the last patches we can see a total 40-50% increase). For
driver combos like tcm_loop and faster drivers like the iser initiator, we
can still see IOPs increase by 20-30% when tcm_loop's nr_hw_queues setting
is also increased.

Signed-off-by: Mike Christie <michael.christie@oracle.com>
---
 drivers/target/target_core_iblock.c | 41 ++++++++++++++++++++++++++++-
 drivers/target/target_core_iblock.h | 10 +++++++
 2 files changed, 50 insertions(+), 1 deletion(-)

Comments

Chaitanya Kulkarni Feb. 4, 2021, 11:23 p.m. UTC | #1
On 2/4/21 03:40, Mike Christie wrote:
> This patch adds plug/unplug callouts for iblock. For initiator drivers
> like iscsi which wants to pass multiple cmds to its xmit thread instead
> of one cmd at a time, this increases IOPs by around 10% with vhost-scsi
> (combined with the last patches we can see a total 40-50% increase). For
> driver combos like tcm_loop and faster drivers like the iser initiator, we
> can still see IOPs increase by 20-30% when tcm_loop's nr_hw_queues setting
> is also increased.
>
> Signed-off-by: Mike Christie <michael.christie@oracle.com>
> ---
>  drivers/target/target_core_iblock.c | 41 ++++++++++++++++++++++++++++-
>  drivers/target/target_core_iblock.h | 10 +++++++
>  2 files changed, 50 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
> index 8ed93fd205c7..a4951e662615 100644
> --- a/drivers/target/target_core_iblock.c
> +++ b/drivers/target/target_core_iblock.c
> @@ -61,9 +61,18 @@ static struct se_device *iblock_alloc_device(struct se_hba *hba, const char *nam
>  		return NULL;
>  	}
>  
> +	ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug),
> +				   GFP_KERNEL);
> +	if (!ib_dev->ibd_plug)
> +		goto free_dev;
> +
>  	pr_debug( "IBLOCK: Allocated ib_dev for %s\n", name);
>  
>  	return &ib_dev->dev;
> +
> +free_dev:
> +	kfree(ib_dev);
> +	return NULL;
>  }
>  
>  static int iblock_configure_device(struct se_device *dev)
> @@ -171,6 +180,7 @@ static void iblock_dev_call_rcu(struct rcu_head *p)
>  	struct se_device *dev = container_of(p, struct se_device, rcu_head);
>  	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
>  
> +	kfree(ib_dev->ibd_plug);
>  	kfree(ib_dev);
>  }
>  
> @@ -188,6 +198,30 @@ static void iblock_destroy_device(struct se_device *dev)
>  	bioset_exit(&ib_dev->ibd_bio_set);
>  }
>  
> +static struct se_dev_plug *iblock_plug_device(struct se_cmd *se_cmd)
> +{
> +	struct se_device *se_dev = se_cmd->se_dev;
> +	struct iblock_dev *ib_dev = IBLOCK_DEV(se_dev);
> +	struct iblock_dev_plug *ib_dev_plug;
> +
> +	ib_dev_plug = &ib_dev->ibd_plug[se_cmd->cpuid];
> +	if (test_and_set_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags))
> +		return NULL;
> +
> +	blk_start_plug(&ib_dev_plug->blk_plug);
> +	return &ib_dev_plug->se_plug;
> +}
> +
> +static void iblock_unplug_device(struct se_dev_plug *se_plug)
> +{
> +	struct iblock_dev_plug *ib_dev_plug =
> +				container_of(se_plug, struct iblock_dev_plug,
> +					     se_plug);
I think something like on the new line read much easier for me atleast :-

        ib_dev_plug = container_of(se_plug, struct iblock_dev_plug,
se_plug);
> +
> +	blk_finish_plug(&ib_dev_plug->blk_plug);
> +	clear_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags);
> +}
> +
>  static unsigned long long iblock_emulate_read_cap_with_block_size(
>  	struct se_device *dev,
>  	struct block_device *bd,
> @@ -337,7 +371,10 @@ static void iblock_submit_bios(struct bio_list *list)
>  {
>  	struct blk_plug plug;
>  	struct bio *bio;
> -
> +	/*
> +	 * The block layer handles nested plugs, so just plug/unplug to handle
> +	 * fabric drivers that didn't support batching and multi bio cmds.
> +	 */
>  	blk_start_plug(&plug);
>  	while ((bio = bio_list_pop(list)))
>  		submit_bio(bio);
> @@ -870,6 +907,8 @@ static const struct target_backend_ops iblock_ops = {
>  	.configure_device	= iblock_configure_device,
>  	.destroy_device		= iblock_destroy_device,
>  	.free_device		= iblock_free_device,
> +	.plug_device		= iblock_plug_device,
> +	.unplug_device		= iblock_unplug_device,
>  	.parse_cdb		= iblock_parse_cdb,
>  	.set_configfs_dev_params = iblock_set_configfs_dev_params,
>  	.show_configfs_dev_params = iblock_show_configfs_dev_params,
> diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
> index cefc641145b3..8c55375d2f75 100644
> --- a/drivers/target/target_core_iblock.h
> +++ b/drivers/target/target_core_iblock.h
> @@ -4,6 +4,7 @@
>  
>  #include <linux/atomic.h>
>  #include <linux/refcount.h>
> +#include <linux/blkdev.h>
>  #include <target/target_core_base.h>
>  
>  #define IBLOCK_VERSION		"4.0"
> @@ -17,6 +18,14 @@ struct iblock_req {
>  
>  #define IBDF_HAS_UDEV_PATH		0x01
>  
> +#define IBD_PLUGF_PLUGGED		0x01
> +
> +struct iblock_dev_plug {
> +	struct se_dev_plug se_plug;
> +	struct blk_plug blk_plug;
> +	unsigned long flags;
> +};
> +
>  struct iblock_dev {
>  	struct se_device dev;
>  	unsigned char ibd_udev_path[SE_UDEV_PATH_LEN];
> @@ -24,6 +33,7 @@ struct iblock_dev {
>  	struct bio_set	ibd_bio_set;
>  	struct block_device *ibd_bd;
>  	bool ibd_readonly;
> +	struct iblock_dev_plug *ibd_plug;
>  } ____cacheline_aligned;
>  
>  #endif /* TARGET_CORE_IBLOCK_H */
Mike Christie Feb. 5, 2021, 12:45 a.m. UTC | #2
On 2/4/21 5:23 PM, Chaitanya Kulkarni wrote:
> On 2/4/21 03:40, Mike Christie wrote:
>> This patch adds plug/unplug callouts for iblock. For initiator drivers
>> like iscsi which wants to pass multiple cmds to its xmit thread instead
>> of one cmd at a time, this increases IOPs by around 10% with vhost-scsi
>> (combined with the last patches we can see a total 40-50% increase). For
>> driver combos like tcm_loop and faster drivers like the iser initiator, we
>> can still see IOPs increase by 20-30% when tcm_loop's nr_hw_queues setting
>> is also increased.
>>
>> Signed-off-by: Mike Christie <michael.christie@oracle.com>
>> ---
>>   drivers/target/target_core_iblock.c | 41 ++++++++++++++++++++++++++++-
>>   drivers/target/target_core_iblock.h | 10 +++++++
>>   2 files changed, 50 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
>> index 8ed93fd205c7..a4951e662615 100644
>> --- a/drivers/target/target_core_iblock.c
>> +++ b/drivers/target/target_core_iblock.c
>> @@ -61,9 +61,18 @@ static struct se_device *iblock_alloc_device(struct se_hba *hba, const char *nam
>>   		return NULL;
>>   	}
>>   
>> +	ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug),
>> +				   GFP_KERNEL);
>> +	if (!ib_dev->ibd_plug)
>> +		goto free_dev;
>> +
>>   	pr_debug( "IBLOCK: Allocated ib_dev for %s\n", name);
>>   
>>   	return &ib_dev->dev;
>> +
>> +free_dev:
>> +	kfree(ib_dev);
>> +	return NULL;
>>   }
>>   
>>   static int iblock_configure_device(struct se_device *dev)
>> @@ -171,6 +180,7 @@ static void iblock_dev_call_rcu(struct rcu_head *p)
>>   	struct se_device *dev = container_of(p, struct se_device, rcu_head);
>>   	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
>>   
>> +	kfree(ib_dev->ibd_plug);
>>   	kfree(ib_dev);
>>   }
>>   
>> @@ -188,6 +198,30 @@ static void iblock_destroy_device(struct se_device *dev)
>>   	bioset_exit(&ib_dev->ibd_bio_set);
>>   }
>>   
>> +static struct se_dev_plug *iblock_plug_device(struct se_cmd *se_cmd)
>> +{
>> +	struct se_device *se_dev = se_cmd->se_dev;
>> +	struct iblock_dev *ib_dev = IBLOCK_DEV(se_dev);
>> +	struct iblock_dev_plug *ib_dev_plug;
>> +
>> +	ib_dev_plug = &ib_dev->ibd_plug[se_cmd->cpuid];
>> +	if (test_and_set_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags))
>> +		return NULL;
>> +
>> +	blk_start_plug(&ib_dev_plug->blk_plug);
>> +	return &ib_dev_plug->se_plug;
>> +}
>> +
>> +static void iblock_unplug_device(struct se_dev_plug *se_plug)
>> +{
>> +	struct iblock_dev_plug *ib_dev_plug =
>> +				container_of(se_plug, struct iblock_dev_plug,
>> +					     se_plug);
> I think something like on the new line read much easier for me atleast :-
> 
>          ib_dev_plug = container_of(se_plug, struct iblock_dev_plug,
> se_plug);

Yeah nicer. Will change this and the other one.
Chaitanya Kulkarni Feb. 7, 2021, 1:06 a.m. UTC | #3
On 2/4/21 03:40, Mike Christie wrote:
>  
> +	ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug),
> +				   GFP_KERNEL);
I'd actually prefer struct xxx in sizeof, but maybe that is just my
preference.
Not sure what is the standard practice in target code.
Bart Van Assche Feb. 7, 2021, 2:21 a.m. UTC | #4
On 2/6/21 5:06 PM, Chaitanya Kulkarni wrote:
> On 2/4/21 03:40, Mike Christie wrote:
>>  
>> +	ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug),
>> +				   GFP_KERNEL);
> I'd actually prefer struct xxx in sizeof, but maybe that is just my
> preference.
> Not sure what is the standard practice in target code.

The above code is easier to verify than the suggested alternative. With
the alternative one has to look up the definition of ibd_plug to verify
correctness of the code. The above code can be verified without having
to look up the definition of the ibd_plug member.

Bart.
diff mbox series

Patch

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 8ed93fd205c7..a4951e662615 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -61,9 +61,18 @@  static struct se_device *iblock_alloc_device(struct se_hba *hba, const char *nam
 		return NULL;
 	}
 
+	ib_dev->ibd_plug = kcalloc(nr_cpu_ids, sizeof(*ib_dev->ibd_plug),
+				   GFP_KERNEL);
+	if (!ib_dev->ibd_plug)
+		goto free_dev;
+
 	pr_debug( "IBLOCK: Allocated ib_dev for %s\n", name);
 
 	return &ib_dev->dev;
+
+free_dev:
+	kfree(ib_dev);
+	return NULL;
 }
 
 static int iblock_configure_device(struct se_device *dev)
@@ -171,6 +180,7 @@  static void iblock_dev_call_rcu(struct rcu_head *p)
 	struct se_device *dev = container_of(p, struct se_device, rcu_head);
 	struct iblock_dev *ib_dev = IBLOCK_DEV(dev);
 
+	kfree(ib_dev->ibd_plug);
 	kfree(ib_dev);
 }
 
@@ -188,6 +198,30 @@  static void iblock_destroy_device(struct se_device *dev)
 	bioset_exit(&ib_dev->ibd_bio_set);
 }
 
+static struct se_dev_plug *iblock_plug_device(struct se_cmd *se_cmd)
+{
+	struct se_device *se_dev = se_cmd->se_dev;
+	struct iblock_dev *ib_dev = IBLOCK_DEV(se_dev);
+	struct iblock_dev_plug *ib_dev_plug;
+
+	ib_dev_plug = &ib_dev->ibd_plug[se_cmd->cpuid];
+	if (test_and_set_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags))
+		return NULL;
+
+	blk_start_plug(&ib_dev_plug->blk_plug);
+	return &ib_dev_plug->se_plug;
+}
+
+static void iblock_unplug_device(struct se_dev_plug *se_plug)
+{
+	struct iblock_dev_plug *ib_dev_plug =
+				container_of(se_plug, struct iblock_dev_plug,
+					     se_plug);
+
+	blk_finish_plug(&ib_dev_plug->blk_plug);
+	clear_bit(IBD_PLUGF_PLUGGED, &ib_dev_plug->flags);
+}
+
 static unsigned long long iblock_emulate_read_cap_with_block_size(
 	struct se_device *dev,
 	struct block_device *bd,
@@ -337,7 +371,10 @@  static void iblock_submit_bios(struct bio_list *list)
 {
 	struct blk_plug plug;
 	struct bio *bio;
-
+	/*
+	 * The block layer handles nested plugs, so just plug/unplug to handle
+	 * fabric drivers that didn't support batching and multi bio cmds.
+	 */
 	blk_start_plug(&plug);
 	while ((bio = bio_list_pop(list)))
 		submit_bio(bio);
@@ -870,6 +907,8 @@  static const struct target_backend_ops iblock_ops = {
 	.configure_device	= iblock_configure_device,
 	.destroy_device		= iblock_destroy_device,
 	.free_device		= iblock_free_device,
+	.plug_device		= iblock_plug_device,
+	.unplug_device		= iblock_unplug_device,
 	.parse_cdb		= iblock_parse_cdb,
 	.set_configfs_dev_params = iblock_set_configfs_dev_params,
 	.show_configfs_dev_params = iblock_show_configfs_dev_params,
diff --git a/drivers/target/target_core_iblock.h b/drivers/target/target_core_iblock.h
index cefc641145b3..8c55375d2f75 100644
--- a/drivers/target/target_core_iblock.h
+++ b/drivers/target/target_core_iblock.h
@@ -4,6 +4,7 @@ 
 
 #include <linux/atomic.h>
 #include <linux/refcount.h>
+#include <linux/blkdev.h>
 #include <target/target_core_base.h>
 
 #define IBLOCK_VERSION		"4.0"
@@ -17,6 +18,14 @@  struct iblock_req {
 
 #define IBDF_HAS_UDEV_PATH		0x01
 
+#define IBD_PLUGF_PLUGGED		0x01
+
+struct iblock_dev_plug {
+	struct se_dev_plug se_plug;
+	struct blk_plug blk_plug;
+	unsigned long flags;
+};
+
 struct iblock_dev {
 	struct se_device dev;
 	unsigned char ibd_udev_path[SE_UDEV_PATH_LEN];
@@ -24,6 +33,7 @@  struct iblock_dev {
 	struct bio_set	ibd_bio_set;
 	struct block_device *ibd_bd;
 	bool ibd_readonly;
+	struct iblock_dev_plug *ibd_plug;
 } ____cacheline_aligned;
 
 #endif /* TARGET_CORE_IBLOCK_H */