diff mbox series

[ndctl,1/1] daxctl: Output more information if memblock is unremovable

Message ID 20241204161457.1113419-1-ming.li@zohomail.com (mailing list archive)
State New
Headers show
Series [ndctl,1/1] daxctl: Output more information if memblock is unremovable | expand

Commit Message

Li Ming Dec. 4, 2024, 4:14 p.m. UTC
If CONFIG_MEMORY_HOTREMOVE is disabled by kernel, memblocks will not be
removed, so 'dax offline-memory all' will output below error logs:

  libdaxctl: offline_one_memblock: dax0.0: Failed to offline /sys/devices/system/node/node6/memory371/state: Invalid argument
  dax0.0: failed to offline memory: Invalid argument
  error offlining memory: Invalid argument
  offlined memory for 0 devices

The log does not clearly show why the command failed. So checking if the
target memblock is removable before offlining it by querying
'/sys/devices/system/node/nodeX/memoryY/removable', then output specific
logs if the memblock is unremovable, output will be:

  libdaxctl: offline_one_memblock: dax0.0: memory371 is unremovable
  dax0.0: failed to offline memory: Operation not supported
  error offlining memory: Operation not supported
  offlined memory for 0 devices

Besides, delay to set up string 'path' for offlining memblock operation,
because string 'path' is stored in 'mem->mem_buf' which is a shared
buffer, it will be used in memblock_is_removable().

Signed-off-by: Li Ming <ming.li@zohomail.com>
---
 daxctl/lib/libdaxctl.c | 52 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 7 deletions(-)

Comments

Alison Schofield Dec. 6, 2024, 1:10 a.m. UTC | #1
On Thu, Dec 05, 2024 at 12:14:56AM +0800, Li Ming wrote:
> If CONFIG_MEMORY_HOTREMOVE is disabled by kernel, memblocks will not be
> removed, so 'dax offline-memory all' will output below error logs:
> 
>   libdaxctl: offline_one_memblock: dax0.0: Failed to offline /sys/devices/system/node/node6/memory371/state: Invalid argument
>   dax0.0: failed to offline memory: Invalid argument
>   error offlining memory: Invalid argument
>   offlined memory for 0 devices
> 
> The log does not clearly show why the command failed. So checking if the
> target memblock is removable before offlining it by querying
> '/sys/devices/system/node/nodeX/memoryY/removable', then output specific
> logs if the memblock is unremovable, output will be:
> 
>   libdaxctl: offline_one_memblock: dax0.0: memory371 is unremovable
>   dax0.0: failed to offline memory: Operation not supported
>   error offlining memory: Operation not supported
>   offlined memory for 0 devices
>

Hi Ming,

This led me to catch up on movable and removable in DAX context.
Not all 'Movable' DAX memory is 'Removable' right?

Would it be useful to add 'removable' to the daxctl list json:

# daxctl list
[
  {
    "chardev":"dax0.0",
    "size":536870912,
    "target_node":0,
    "align":2097152,
    "mode":"system-ram",
    "online_memblocks":4,
    "total_memblocks":4,
    "movable":true
    "removable":false  <----
  }
]

You've already added the helper to discover removable.

Otherwise, LGTM,
Reviewed-by: Alison Schofield <alison.schofield@intel.com>


> Besides, delay to set up string 'path' for offlining memblock operation,
> because string 'path' is stored in 'mem->mem_buf' which is a shared
> buffer, it will be used in memblock_is_removable().
> 
> Signed-off-by: Li Ming <ming.li@zohomail.com>
> ---
>  daxctl/lib/libdaxctl.c | 52 ++++++++++++++++++++++++++++++++++++------
>  1 file changed, 45 insertions(+), 7 deletions(-)
> 
> diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
> index 9fbefe2e8329..b7fa0de0b73d 100644
> --- a/daxctl/lib/libdaxctl.c
> +++ b/daxctl/lib/libdaxctl.c
> @@ -1310,6 +1310,37 @@ static int memblock_is_online(struct daxctl_memory *mem, char *memblock)
>  	return 0;
>  }
>  
> +static int memblock_is_removable(struct daxctl_memory *mem, char *memblock)
> +{
> +	struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
> +	const char *devname = daxctl_dev_get_devname(dev);
> +	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
> +	int len = mem->buf_len, rc;
> +	char buf[SYSFS_ATTR_SIZE];
> +	char *path = mem->mem_buf;
> +	const char *node_path;
> +
> +	node_path = daxctl_memory_get_node_path(mem);
> +	if (!node_path)
> +		return -ENXIO;
> +
> +	rc = snprintf(path, len, "%s/%s/removable", node_path, memblock);
> +	if (rc < 0)
> +		return -ENOMEM;
> +
> +	rc = sysfs_read_attr(ctx, path, buf);
> +	if (rc) {
> +		err(ctx, "%s: Failed to read %s: %s\n",
> +			devname, path, strerror(-rc));
> +		return rc;
> +	}
> +
> +	if (strtoul(buf, NULL, 0) == 0)
> +		return -EOPNOTSUPP;
> +
> +	return 0;
> +}
> +
>  static int online_one_memblock(struct daxctl_memory *mem, char *memblock,
>  		enum memory_zones zone, int *status)
>  {
> @@ -1362,6 +1393,20 @@ static int offline_one_memblock(struct daxctl_memory *mem, char *memblock)
>  	char *path = mem->mem_buf;
>  	const char *node_path;
>  
> +	/* if already offline, there is nothing to do */
> +	rc = memblock_is_online(mem, memblock);
> +	if (rc < 0)
> +		return rc;
> +	if (!rc)
> +		return 1;
> +
> +	rc = memblock_is_removable(mem, memblock);
> +	if (rc) {
> +		if (rc == -EOPNOTSUPP)
> +			err(ctx, "%s: %s is unremovable\n", devname, memblock);
> +		return rc;
> +	}
> +
>  	node_path = daxctl_memory_get_node_path(mem);
>  	if (!node_path)
>  		return -ENXIO;
> @@ -1370,13 +1415,6 @@ static int offline_one_memblock(struct daxctl_memory *mem, char *memblock)
>  	if (rc < 0)
>  		return -ENOMEM;
>  
> -	/* if already offline, there is nothing to do */
> -	rc = memblock_is_online(mem, memblock);
> -	if (rc < 0)
> -		return rc;
> -	if (!rc)
> -		return 1;
> -
>  	rc = sysfs_write_attr_quiet(ctx, path, mode);
>  	if (rc) {
>  		/* check if something raced us to offline (unlikely) */
> -- 
> 2.34.1
> 
>
Li Ming Dec. 6, 2024, 7:06 a.m. UTC | #2
On 12/6/2024 9:10 AM, Alison Schofield wrote:
> On Thu, Dec 05, 2024 at 12:14:56AM +0800, Li Ming wrote:
>> If CONFIG_MEMORY_HOTREMOVE is disabled by kernel, memblocks will not be
>> removed, so 'dax offline-memory all' will output below error logs:
>>
>>   libdaxctl: offline_one_memblock: dax0.0: Failed to offline /sys/devices/system/node/node6/memory371/state: Invalid argument
>>   dax0.0: failed to offline memory: Invalid argument
>>   error offlining memory: Invalid argument
>>   offlined memory for 0 devices
>>
>> The log does not clearly show why the command failed. So checking if the
>> target memblock is removable before offlining it by querying
>> '/sys/devices/system/node/nodeX/memoryY/removable', then output specific
>> logs if the memblock is unremovable, output will be:
>>
>>   libdaxctl: offline_one_memblock: dax0.0: memory371 is unremovable
>>   dax0.0: failed to offline memory: Operation not supported
>>   error offlining memory: Operation not supported
>>   offlined memory for 0 devices
>>
> Hi Ming,
>
> This led me to catch up on movable and removable in DAX context.
> Not all 'Movable' DAX memory is 'Removable' right?

Hi Alison,


After investigation, I noticed if memblock is unremovable, will not have "movable" field in the output of "daxctl list" because memblock can be only attached as ZONE_NORMAL.

If memblock is removable, "movable" will be showed up by "daxctl list", because memblock can be attached as ZONE_NORMAL or ZONE_MOVABLE. 

So seems like "movable" field in daxctl list implying that the dax device is removable or not. if there is a "movable" field in the output of "daxctl list", that means the DAX device is removable.

But I think it is a hint, user cannot know such details, adding a "removable" field in daxctl list json is worth as you mentioned.


Thanks

Ming

>
> Would it be useful to add 'removable' to the daxctl list json:
>
> # daxctl list
> [
>   {
>     "chardev":"dax0.0",
>     "size":536870912,
>     "target_node":0,
>     "align":2097152,
>     "mode":"system-ram",
>     "online_memblocks":4,
>     "total_memblocks":4,
>     "movable":true
>     "removable":false  <----
>   }
> ]
>
> You've already added the helper to discover removable.
>
> Otherwise, LGTM,
> Reviewed-by: Alison Schofield <alison.schofield@intel.com>
>
>
>> Besides, delay to set up string 'path' for offlining memblock operation,
>> because string 'path' is stored in 'mem->mem_buf' which is a shared
>> buffer, it will be used in memblock_is_removable().
>>
>> Signed-off-by: Li Ming <ming.li@zohomail.com>
>> ---
>>  daxctl/lib/libdaxctl.c | 52 ++++++++++++++++++++++++++++++++++++------
>>  1 file changed, 45 insertions(+), 7 deletions(-)
>>
>> diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
>> index 9fbefe2e8329..b7fa0de0b73d 100644
>> --- a/daxctl/lib/libdaxctl.c
>> +++ b/daxctl/lib/libdaxctl.c
>> @@ -1310,6 +1310,37 @@ static int memblock_is_online(struct daxctl_memory *mem, char *memblock)
>>  	return 0;
>>  }
>>  
>> +static int memblock_is_removable(struct daxctl_memory *mem, char *memblock)
>> +{
>> +	struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
>> +	const char *devname = daxctl_dev_get_devname(dev);
>> +	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
>> +	int len = mem->buf_len, rc;
>> +	char buf[SYSFS_ATTR_SIZE];
>> +	char *path = mem->mem_buf;
>> +	const char *node_path;
>> +
>> +	node_path = daxctl_memory_get_node_path(mem);
>> +	if (!node_path)
>> +		return -ENXIO;
>> +
>> +	rc = snprintf(path, len, "%s/%s/removable", node_path, memblock);
>> +	if (rc < 0)
>> +		return -ENOMEM;
>> +
>> +	rc = sysfs_read_attr(ctx, path, buf);
>> +	if (rc) {
>> +		err(ctx, "%s: Failed to read %s: %s\n",
>> +			devname, path, strerror(-rc));
>> +		return rc;
>> +	}
>> +
>> +	if (strtoul(buf, NULL, 0) == 0)
>> +		return -EOPNOTSUPP;
>> +
>> +	return 0;
>> +}
>> +
>>  static int online_one_memblock(struct daxctl_memory *mem, char *memblock,
>>  		enum memory_zones zone, int *status)
>>  {
>> @@ -1362,6 +1393,20 @@ static int offline_one_memblock(struct daxctl_memory *mem, char *memblock)
>>  	char *path = mem->mem_buf;
>>  	const char *node_path;
>>  
>> +	/* if already offline, there is nothing to do */
>> +	rc = memblock_is_online(mem, memblock);
>> +	if (rc < 0)
>> +		return rc;
>> +	if (!rc)
>> +		return 1;
>> +
>> +	rc = memblock_is_removable(mem, memblock);
>> +	if (rc) {
>> +		if (rc == -EOPNOTSUPP)
>> +			err(ctx, "%s: %s is unremovable\n", devname, memblock);
>> +		return rc;
>> +	}
>> +
>>  	node_path = daxctl_memory_get_node_path(mem);
>>  	if (!node_path)
>>  		return -ENXIO;
>> @@ -1370,13 +1415,6 @@ static int offline_one_memblock(struct daxctl_memory *mem, char *memblock)
>>  	if (rc < 0)
>>  		return -ENOMEM;
>>  
>> -	/* if already offline, there is nothing to do */
>> -	rc = memblock_is_online(mem, memblock);
>> -	if (rc < 0)
>> -		return rc;
>> -	if (!rc)
>> -		return 1;
>> -
>>  	rc = sysfs_write_attr_quiet(ctx, path, mode);
>>  	if (rc) {
>>  		/* check if something raced us to offline (unlikely) */
>> -- 
>> 2.34.1
>>
>>
Li Ming Dec. 11, 2024, 7:30 a.m. UTC | #3
On 12/6/2024 9:10 AM, Alison Schofield wrote:
> On Thu, Dec 05, 2024 at 12:14:56AM +0800, Li Ming wrote:
>> If CONFIG_MEMORY_HOTREMOVE is disabled by kernel, memblocks will not be
>> removed, so 'dax offline-memory all' will output below error logs:
>>
>>   libdaxctl: offline_one_memblock: dax0.0: Failed to offline /sys/devices/system/node/node6/memory371/state: Invalid argument
>>   dax0.0: failed to offline memory: Invalid argument
>>   error offlining memory: Invalid argument
>>   offlined memory for 0 devices
>>
>> The log does not clearly show why the command failed. So checking if the
>> target memblock is removable before offlining it by querying
>> '/sys/devices/system/node/nodeX/memoryY/removable', then output specific
>> logs if the memblock is unremovable, output will be:
>>
>>   libdaxctl: offline_one_memblock: dax0.0: memory371 is unremovable
>>   dax0.0: failed to offline memory: Operation not supported
>>   error offlining memory: Operation not supported
>>   offlined memory for 0 devices
>>
> Hi Ming,
>
> This led me to catch up on movable and removable in DAX context.
> Not all 'Movable' DAX memory is 'Removable' right?
>
> Would it be useful to add 'removable' to the daxctl list json:
>
> # daxctl list
> [
>   {
>     "chardev":"dax0.0",
>     "size":536870912,
>     "target_node":0,
>     "align":2097152,
>     "mode":"system-ram",
>     "online_memblocks":4,
>     "total_memblocks":4,
>     "movable":true
>     "removable":false  <----
>   }
> ]

Hi Alison,

After investigation, if there is no "movable" in dax list json, that means the kernel does not support MEMORY_HOTREMOVE.

if there is a "movable" in dax list json, that means the kernel supports MEMORY_HOTREMOVE and the value of "movable" decides if memblocks can be offlined. So user cannot offline the memblocks if "movable" is false and "removable" is true.

Feels like the "removable" field does not make much sense in kernel supporting MEMORY_HOTREMOVE case. user can use "movable" to check if memblocks can be offlined. do you think if it is still worth adding a "removable" in daxctl list json?


Thanks

Ming
diff mbox series

Patch

diff --git a/daxctl/lib/libdaxctl.c b/daxctl/lib/libdaxctl.c
index 9fbefe2e8329..b7fa0de0b73d 100644
--- a/daxctl/lib/libdaxctl.c
+++ b/daxctl/lib/libdaxctl.c
@@ -1310,6 +1310,37 @@  static int memblock_is_online(struct daxctl_memory *mem, char *memblock)
 	return 0;
 }
 
+static int memblock_is_removable(struct daxctl_memory *mem, char *memblock)
+{
+	struct daxctl_dev *dev = daxctl_memory_get_dev(mem);
+	const char *devname = daxctl_dev_get_devname(dev);
+	struct daxctl_ctx *ctx = daxctl_dev_get_ctx(dev);
+	int len = mem->buf_len, rc;
+	char buf[SYSFS_ATTR_SIZE];
+	char *path = mem->mem_buf;
+	const char *node_path;
+
+	node_path = daxctl_memory_get_node_path(mem);
+	if (!node_path)
+		return -ENXIO;
+
+	rc = snprintf(path, len, "%s/%s/removable", node_path, memblock);
+	if (rc < 0)
+		return -ENOMEM;
+
+	rc = sysfs_read_attr(ctx, path, buf);
+	if (rc) {
+		err(ctx, "%s: Failed to read %s: %s\n",
+			devname, path, strerror(-rc));
+		return rc;
+	}
+
+	if (strtoul(buf, NULL, 0) == 0)
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 static int online_one_memblock(struct daxctl_memory *mem, char *memblock,
 		enum memory_zones zone, int *status)
 {
@@ -1362,6 +1393,20 @@  static int offline_one_memblock(struct daxctl_memory *mem, char *memblock)
 	char *path = mem->mem_buf;
 	const char *node_path;
 
+	/* if already offline, there is nothing to do */
+	rc = memblock_is_online(mem, memblock);
+	if (rc < 0)
+		return rc;
+	if (!rc)
+		return 1;
+
+	rc = memblock_is_removable(mem, memblock);
+	if (rc) {
+		if (rc == -EOPNOTSUPP)
+			err(ctx, "%s: %s is unremovable\n", devname, memblock);
+		return rc;
+	}
+
 	node_path = daxctl_memory_get_node_path(mem);
 	if (!node_path)
 		return -ENXIO;
@@ -1370,13 +1415,6 @@  static int offline_one_memblock(struct daxctl_memory *mem, char *memblock)
 	if (rc < 0)
 		return -ENOMEM;
 
-	/* if already offline, there is nothing to do */
-	rc = memblock_is_online(mem, memblock);
-	if (rc < 0)
-		return rc;
-	if (!rc)
-		return 1;
-
 	rc = sysfs_write_attr_quiet(ctx, path, mode);
 	if (rc) {
 		/* check if something raced us to offline (unlikely) */