diff mbox

[v2] target/tcmu: Add a timeout for the completion of netlink command reply

Message ID 20170914090309.22297-1-nakayamakenjiro@gmail.com (mailing list archive)
State New, archived
Headers show

Commit Message

Kenjiro Nakayama Sept. 14, 2017, 9:03 a.m. UTC
This patch adds a timeout for the completion of netlink command reply.

Current code waits for the netlink reply from userspace and the status
change, but it hangs forever when userspace failed to reply. To fix
this issue, this patch replace wait_for_completion with
wait_for_completion_timeout.

Default timeout is 300 sec that gives enough time, and this is
configurable via configfs.

Changes in v2:

        Makes default timeout enough long as 300 sec.
        Makes timeout configurable via configfs.
        Suggested by Mike Christie.

Signed-off-by: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
---
 drivers/target/target_core_user.c | 59 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

Comments

Mike Christie Sept. 14, 2017, 4:25 p.m. UTC | #1
On 09/14/2017 04:03 AM, Kenjiro Nakayama wrote:
> This patch adds a timeout for the completion of netlink command reply.
> 
> Current code waits for the netlink reply from userspace and the status
> change, but it hangs forever when userspace failed to reply. To fix
> this issue, this patch replace wait_for_completion with
> wait_for_completion_timeout.
> 
> Default timeout is 300 sec that gives enough time, and this is
> configurable via configfs.
> 
> Changes in v2:
> 
>         Makes default timeout enough long as 300 sec.
>         Makes timeout configurable via configfs.
>         Suggested by Mike Christie.
> 
> Signed-off-by: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
> ---
>  drivers/target/target_core_user.c | 59 +++++++++++++++++++++++++++++++++++++--
>  1 file changed, 56 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
> index 942d094269fb..ef8570cb68a8 100644
> --- a/drivers/target/target_core_user.c
> +++ b/drivers/target/target_core_user.c
> @@ -150,6 +150,9 @@ struct tcmu_dev {
>  	wait_queue_head_t nl_cmd_wq;
>  
>  	char dev_config[TCMU_CONFIG_LEN];
> +
> +	/* timeout for netlink reply from userspace */
> +	unsigned int nl_timeout;
>  };
>  
>  #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
> @@ -1332,8 +1335,14 @@ static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev)
>  	if (!tcmu_kern_cmd_reply_supported)
>  		return 0;
>  
> -	pr_debug("sleeping for nl reply\n");
> -	wait_for_completion(&nl_cmd->complete);
> +	pr_debug("sleeping for nl reply with timeout %lu\n",
> +			msecs_to_jiffies(udev->nl_timeout));
> +	ret = wait_for_completion_timeout(&nl_cmd->complete,
> +			msecs_to_jiffies(udev->nl_timeout));
> +	if (!ret) {
> +		printk(KERN_ERR "timeout waiting for nl reply from userspace\n");
> +		return -ETIME;

If you timeout here then you cannot execute new sync commands until we
eventually get a response, so you only avoid the hang for this initial
command. New commands will still hang. You need to do the bottom half of
the function to reinit the udev->curr_nl_cmd and wake up other calls
waiting.


> +	}
>  
>  	spin_lock(&udev->nl_cmd_lock);
>  	nl_cmd->cmd = TCMU_CMD_UNSPEC;
> @@ -1506,6 +1515,10 @@ static int tcmu_configure_device(struct se_device *dev)
>  		dev->dev_attrib.emulate_write_cache = 0;
>  	dev->dev_attrib.hw_queue_depth = 128;
>  
> +	/* Set default timeout 300 sec */
> +	if (!udev->nl_timeout)
> +		udev->nl_timeout = 300000;
> +
>  	/*
>  	 * Get a ref incase userspace does a close on the uio device before
>  	 * LIO has initiated tcmu_free_device.
> @@ -1610,7 +1623,7 @@ static void tcmu_destroy_device(struct se_device *dev)
>  
>  enum {
>  	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
> -	Opt_err,
> +	Opt_nl_timeout, Opt_err,
>  };
>  
>  static match_table_t tokens = {
> @@ -1618,6 +1631,7 @@ static match_table_t tokens = {
>  	{Opt_dev_size, "dev_size=%u"},
>  	{Opt_hw_block_size, "hw_block_size=%u"},
>  	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
> +	{Opt_nl_timeout, "nl_timeout=%u"},
>  	{Opt_err, NULL}
>  };
>  
> @@ -1692,6 +1706,17 @@ static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
>  			ret = tcmu_set_dev_attrib(&args[0],
>  					&(dev->dev_attrib.hw_max_sectors));
>  			break;
> +		case Opt_nl_timeout:
> +			arg_p = match_strdup(&args[0]);
> +			if (!arg_p) {
> +				ret = -ENOMEM;
> +				break;
> +			}
> +			ret = kstrtou32(arg_p, 0, &udev->nl_timeout);

Why have the user pass it in as msecs?

I think just passing it in seconds is better because we do not need it
that precise and it matches the other timer's units so it is consistent
for the user.


> +			kfree(arg_p);
> +			if (ret < 0)
> +				pr_err("kstrtoul() failed for nl_timeout=\n");
> +			break;
>  		default:
>  			break;
>  		}
> @@ -1879,11 +1904,39 @@ static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
>  }
>  CONFIGFS_ATTR(tcmu_, emulate_write_cache);
>  
> +static ssize_t tcmu_nl_timeout_show(struct config_item *item, char *page)
> +{
> +	struct se_dev_attrib *da = container_of(to_config_group(item),
> +		       struct se_dev_attrib, da_group);
> +	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
> +
> +	return snprintf(page, PAGE_SIZE, "%u\n", udev->nl_timeout);
> +}
> +
> +static ssize_t tcmu_nl_timeout_store(struct config_item *item,
> +		const char *page, size_t count)
> +{
> +	struct se_dev_attrib *da = container_of(to_config_group(item),
> +			struct se_dev_attrib, da_group);
> +	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
> +	u32 val;
> +	int ret;
> +
> +	ret = kstrtou32(page, 0, &val);
> +	if (ret < 0)
> +		return ret;
> +
> +	udev->nl_timeout = val;
> +	return count;
> +}
> +CONFIGFS_ATTR(tcmu_, nl_timeout);
> +
>  static struct configfs_attribute *tcmu_attrib_attrs[] = {
>  	&tcmu_attr_cmd_time_out,
>  	&tcmu_attr_dev_config,
>  	&tcmu_attr_dev_size,
>  	&tcmu_attr_emulate_write_cache,
> +	&tcmu_attr_nl_timeout,
>  	NULL,
>  };
>  
> 

--
To unsubscribe from this list: send the line "unsubscribe target-devel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 942d094269fb..ef8570cb68a8 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -150,6 +150,9 @@  struct tcmu_dev {
 	wait_queue_head_t nl_cmd_wq;
 
 	char dev_config[TCMU_CONFIG_LEN];
+
+	/* timeout for netlink reply from userspace */
+	unsigned int nl_timeout;
 };
 
 #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
@@ -1332,8 +1335,14 @@  static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev)
 	if (!tcmu_kern_cmd_reply_supported)
 		return 0;
 
-	pr_debug("sleeping for nl reply\n");
-	wait_for_completion(&nl_cmd->complete);
+	pr_debug("sleeping for nl reply with timeout %lu\n",
+			msecs_to_jiffies(udev->nl_timeout));
+	ret = wait_for_completion_timeout(&nl_cmd->complete,
+			msecs_to_jiffies(udev->nl_timeout));
+	if (!ret) {
+		printk(KERN_ERR "timeout waiting for nl reply from userspace\n");
+		return -ETIME;
+	}
 
 	spin_lock(&udev->nl_cmd_lock);
 	nl_cmd->cmd = TCMU_CMD_UNSPEC;
@@ -1506,6 +1515,10 @@  static int tcmu_configure_device(struct se_device *dev)
 		dev->dev_attrib.emulate_write_cache = 0;
 	dev->dev_attrib.hw_queue_depth = 128;
 
+	/* Set default timeout 300 sec */
+	if (!udev->nl_timeout)
+		udev->nl_timeout = 300000;
+
 	/*
 	 * Get a ref incase userspace does a close on the uio device before
 	 * LIO has initiated tcmu_free_device.
@@ -1610,7 +1623,7 @@  static void tcmu_destroy_device(struct se_device *dev)
 
 enum {
 	Opt_dev_config, Opt_dev_size, Opt_hw_block_size, Opt_hw_max_sectors,
-	Opt_err,
+	Opt_nl_timeout, Opt_err,
 };
 
 static match_table_t tokens = {
@@ -1618,6 +1631,7 @@  static match_table_t tokens = {
 	{Opt_dev_size, "dev_size=%u"},
 	{Opt_hw_block_size, "hw_block_size=%u"},
 	{Opt_hw_max_sectors, "hw_max_sectors=%u"},
+	{Opt_nl_timeout, "nl_timeout=%u"},
 	{Opt_err, NULL}
 };
 
@@ -1692,6 +1706,17 @@  static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
 			ret = tcmu_set_dev_attrib(&args[0],
 					&(dev->dev_attrib.hw_max_sectors));
 			break;
+		case Opt_nl_timeout:
+			arg_p = match_strdup(&args[0]);
+			if (!arg_p) {
+				ret = -ENOMEM;
+				break;
+			}
+			ret = kstrtou32(arg_p, 0, &udev->nl_timeout);
+			kfree(arg_p);
+			if (ret < 0)
+				pr_err("kstrtoul() failed for nl_timeout=\n");
+			break;
 		default:
 			break;
 		}
@@ -1879,11 +1904,39 @@  static ssize_t tcmu_emulate_write_cache_store(struct config_item *item,
 }
 CONFIGFS_ATTR(tcmu_, emulate_write_cache);
 
+static ssize_t tcmu_nl_timeout_show(struct config_item *item, char *page)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+		       struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+
+	return snprintf(page, PAGE_SIZE, "%u\n", udev->nl_timeout);
+}
+
+static ssize_t tcmu_nl_timeout_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct se_dev_attrib *da = container_of(to_config_group(item),
+			struct se_dev_attrib, da_group);
+	struct tcmu_dev *udev = TCMU_DEV(da->da_dev);
+	u32 val;
+	int ret;
+
+	ret = kstrtou32(page, 0, &val);
+	if (ret < 0)
+		return ret;
+
+	udev->nl_timeout = val;
+	return count;
+}
+CONFIGFS_ATTR(tcmu_, nl_timeout);
+
 static struct configfs_attribute *tcmu_attrib_attrs[] = {
 	&tcmu_attr_cmd_time_out,
 	&tcmu_attr_dev_config,
 	&tcmu_attr_dev_size,
 	&tcmu_attr_emulate_write_cache,
+	&tcmu_attr_nl_timeout,
 	NULL,
 };