diff mbox

[1/2] tcmu: add qfull timeout for ring space waits

Message ID 1502430219-7644-2-git-send-email-mchristi@redhat.com (mailing list archive)
State New, archived
Headers show

Commit Message

Mike Christie Aug. 11, 2017, 5:43 a.m. UTC
30 secs (default timeout) is too long to wait for memory.
For ESX we would have seen several aborts and linux will
just be starting to send an abort. We would like to fail
these commands with TASK SET FULL so the initaitor knows
it is sendng too many commands. We do not want to use
the same time out value as cmd_time_out because we may not
want that set if userspace has its own timers for commands
that have got ring space and made it to the userspace
daemon.

This adds a new tcmu timeout for this case, qfull_time-out
which controls how long we wait for ring space.

Signed-off-by: Mike Christie <mchristi@redhat.com>
---
 drivers/target/target_core_user.c | 42 +++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

Comments

Mike Christie Aug. 15, 2017, 4:51 p.m. UTC | #1
On 08/11/2017 12:43 AM, Mike Christie wrote:
> 30 secs (default timeout) is too long to wait for memory.
> For ESX we would have seen several aborts and linux will
> just be starting to send an abort. We would like to fail
> these commands with TASK SET FULL so the initaitor knows
> it is sendng too many commands. We do not want to use
> the same time out value as cmd_time_out because we may not
> want that set if userspace has its own timers for commands
> that have got ring space and made it to the userspace
> daemon.
> 
> This adds a new tcmu timeout for this case, qfull_time-out
> which controls how long we wait for ring space.
> 
> Signed-off-by: Mike Christie <mchristi@redhat.com>
> ---
>  drivers/target/target_core_user.c | 42 +++++++++++++++++++++++++++++++++++----
>  1 file changed, 38 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
> index fd9fcea..7d1da43 100644
> --- a/drivers/target/target_core_user.c
> +++ b/drivers/target/target_core_user.c
> @@ -145,6 +145,7 @@ struct tcmu_dev {
>  
>  	struct timer_list timeout;
>  	unsigned int cmd_time_out;
> +	unsigned int qfull_time_out;
>  
>  	spinlock_t nl_cmd_lock;
>  	struct tcmu_nl_cmd curr_nl_cmd;
> @@ -864,12 +865,15 @@ static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd,
>  		if (udev->cmd_time_out)
>  			ret = schedule_timeout(
>  					msecs_to_jiffies(udev->cmd_time_out));
> +		else if (udev->qfull_time_out)
> +			ret = schedule_timeout(
> +					msecs_to_jiffies(udev->qfull_time_out));
>  		else
>  			ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
>  		finish_wait(&udev->wait_cmdr, &__wait);
>  		if (!ret) {
> -			pr_warn("tcmu: command timed out\n");
> -			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
> +			pr_warn("tcmu: command timed out waiting for ring space.\n");
> +			return TCM_OUT_OF_RESOURCES;
>  		}
>  

I am self nacking this for now. I will not have internet access for a
couple weeks, so just drop this patch.

The problem with with the ring space wait code still is that for fabric
modules like iscsi where we end up calling this code from the fabric's
module's receiving context, like the iscsi rx thread, it ends up
blocking the entire target. For iscsi, we cannot respond to nops, and
the initiator will drop the connection.

I think the options are:

1. Just drop all the wait code and always fail right away. The problem
would be that because the ring pool pages are shared across all devices
and there are multiple initiators, we might end up with some slow
initiator's getting starved.

2. Keep the timer, add the configfs qfull timeout setting, but if space
is not available then put the cmd on a list and let the unmap thread do
the timer/wait. When space becomes available it would do
tcmu_queue_cmd_ring (we could break that function up so it could be
called by the initial submission path and the unmap thread).

3. Keep the timer, add the configfs qfull timeout setting, but just
always kick off to another thread.

4. Have the initiators always turn off nops :)

5. ?
--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index fd9fcea..7d1da43 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -145,6 +145,7 @@  struct tcmu_dev {
 
 	struct timer_list timeout;
 	unsigned int cmd_time_out;
+	unsigned int qfull_time_out;
 
 	spinlock_t nl_cmd_lock;
 	struct tcmu_nl_cmd curr_nl_cmd;
@@ -864,12 +865,15 @@  static inline size_t tcmu_cmd_get_cmd_size(struct tcmu_cmd *tcmu_cmd,
 		if (udev->cmd_time_out)
 			ret = schedule_timeout(
 					msecs_to_jiffies(udev->cmd_time_out));
+		else if (udev->qfull_time_out)
+			ret = schedule_timeout(
+					msecs_to_jiffies(udev->qfull_time_out));
 		else
 			ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
 		finish_wait(&udev->wait_cmdr, &__wait);
 		if (!ret) {
-			pr_warn("tcmu: command timed out\n");
-			return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+			pr_warn("tcmu: command timed out waiting for ring space.\n");
+			return TCM_OUT_OF_RESOURCES;
 		}
 
 		mutex_lock(&udev->cmdr_lock);
@@ -1816,8 +1820,8 @@  static ssize_t tcmu_cmd_time_out_show(struct config_item *item, char *page)
 	return snprintf(page, PAGE_SIZE, "%lu\n", udev->cmd_time_out / MSEC_PER_SEC);
 }
 
-static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *page,
-				       size_t count)
+static ssize_t tcmu_cmd_time_out_store(struct config_item *item,
+				       const char *page, size_t count)
 {
 	struct se_dev_attrib *da = container_of(to_config_group(item),
 					struct se_dev_attrib, da_group);
@@ -1840,6 +1844,35 @@  static ssize_t tcmu_cmd_time_out_store(struct config_item *item, const char *pag
 }
 CONFIGFS_ATTR(tcmu_, cmd_time_out);
 
+static ssize_t tcmu_qfull_time_out_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = container_of(da->da_dev,
+					struct tcmu_dev, se_dev);
+
+	return snprintf(page, PAGE_SIZE, "%lu\n", udev->qfull_time_out / MSEC_PER_SEC);
+}
+
+static ssize_t tcmu_qfull_time_out_store(struct config_item *item,
+					 const char *page, size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+					struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = container_of(da->da_dev,
+					struct tcmu_dev, se_dev);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	udev->qfull_time_out = val * MSEC_PER_SEC;
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, qfull_time_out);
+
 static ssize_t tcmu_dev_config_show(struct config_item *item, char *page)
 {
 	struct se_dev_attrib *da = container_of(to_config_group(item),
@@ -1956,6 +1989,7 @@  static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 CONFIGFS_ATTR(tcmu_, emulate_write_cache);
 
 static struct configfs_attribute *tcmu_attrib_attrs[] = {
+	&tcmu_attr_qfull_time_out,
 	&tcmu_attr_cmd_time_out,
 	&tcmu_attr_dev_config,
 	&tcmu_attr_dev_size,