[4/4] btrfs-progs: change -t option for subvolume list to print a simple space-separated table (making it machine-readable)
diff mbox

Message ID 1443804083-876-5-git-send-email-axel@tty0.ch
State New
Headers show

Commit Message

Axel Burri Oct. 2, 2015, 4:41 p.m. UTC
Old implementation used tabs "\t", and tried to work around problems
by guessing amount of tabs needed (e.g. "\t\t" after top level", with
buggy output as soon as empty uuids are printed). This will never work
correctly, as tab width is a user-defined setting in the terminal.

Keep it simple and don't reimplement the wheel, for nice tabular
output we have the "column" command from util-linux:

    btrfs subvolume list -t <path> | column -t

Signed-off-by: Axel Burri <axel@tty0.ch>
---
 btrfs-list.c | 40 ++++++++++++----------------------------
 1 file changed, 12 insertions(+), 28 deletions(-)

Comments

Goffredo Baroncelli Oct. 3, 2015, 9:56 a.m. UTC | #1
On 2015-10-02 18:41, axel@tty0.ch wrote:
> Old implementation used tabs "\t", and tried to work around problems
> by guessing amount of tabs needed (e.g. "\t\t" after top level", with
> buggy output as soon as empty uuids are printed). This will never work
> correctly, as tab width is a user-defined setting in the terminal.


Why not use string_table() and table_*() functions  ? 

> 
> Keep it simple and don't reimplement the wheel, for nice tabular
> output we have the "column" command from util-linux:
> 
>     btrfs subvolume list -t <path> | column -t
> 
> Signed-off-by: Axel Burri <axel@tty0.ch>
> ---
>  btrfs-list.c | 40 ++++++++++++----------------------------
>  1 file changed, 12 insertions(+), 28 deletions(-)
> 
> diff --git a/btrfs-list.c b/btrfs-list.c
> index f8396e7..c09257a 100644
> --- a/btrfs-list.c
> +++ b/btrfs-list.c
> @@ -45,12 +45,12 @@ struct root_lookup {
>  };
>  
>  static struct {
> -	char	*name;
> +	char	*name;  /* machine-readable column identifier: [a-z_]+ */
>  	char	*column_name;
>  	int	need_print;
>  } btrfs_list_columns[] = {
>  	{
> -		.name		= "ID",
> +		.name		= "id",
>  		.column_name	= "ID",
>  		.need_print	= 0,
>  	},
> @@ -70,7 +70,7 @@ static struct {
>  		.need_print	= 0,
>  	},
>  	{
> -		.name		= "top level",
> +		.name		= "top_level",
>  		.column_name	= "Top Level",
>  		.need_print	= 0,
>  	},
> @@ -1465,13 +1465,10 @@ static void print_single_volume_info_table(struct root_info *subv)
>  		if (!btrfs_list_columns[i].need_print)
>  			continue;
>  
> -		print_subvolume_column(subv, i);
> -
> -		if (i != BTRFS_LIST_PATH)
> -			printf("\t");
> +		if (i != 0)
> +			printf(" ");
>  
> -		if (i == BTRFS_LIST_TOP_LEVEL)
> -			printf("\t");
> +		print_subvolume_column(subv, i);
>  	}
>  	printf("\n");
>  }
> @@ -1496,30 +1493,17 @@ static void print_single_volume_info_default(struct root_info *subv)
>  static void print_all_volume_info_tab_head(void)
>  {
>  	int i;
> -	int len;
> -	char barrier[20];
> -
> -	for (i = 0; i < BTRFS_LIST_ALL; i++) {
> -		if (btrfs_list_columns[i].need_print)
> -			printf("%s\t", btrfs_list_columns[i].name);
> -
> -		if (i == BTRFS_LIST_ALL-1)
> -			printf("\n");
> -	}
>  
>  	for (i = 0; i < BTRFS_LIST_ALL; i++) {
> -		memset(barrier, 0, sizeof(barrier));
> +		if (!btrfs_list_columns[i].need_print)
> +			continue;
>  
> -		if (btrfs_list_columns[i].need_print) {
> -			len = strlen(btrfs_list_columns[i].name);
> -			while (len--)
> -				strcat(barrier, "-");
> +		if (i != 0)
> +			printf(" ");
>  
> -			printf("%s\t", barrier);
> -		}
> -		if (i == BTRFS_LIST_ALL-1)
> -			printf("\n");
> +		printf(btrfs_list_columns[i].name);
>  	}
> +	printf("\n");
>  }
>  
>  static void print_all_volume_info(struct root_lookup *sorted_tree,
>
Goffredo Baroncelli Oct. 3, 2015, 10:06 a.m. UTC | #2
On 2015-10-03 11:56, Goffredo Baroncelli wrote:
> On 2015-10-02 18:41, axel@tty0.ch wrote:
>> Old implementation used tabs "\t", and tried to work around problems
>> by guessing amount of tabs needed (e.g. "\t\t" after top level", with
>> buggy output as soon as empty uuids are printed). This will never work
>> correctly, as tab width is a user-defined setting in the terminal.
> 
> 
> Why not use string_table() and table_*() functions  ? 

Hem sorry, .... create_table() and table_*() functions (see string-table.h)

[...]
Axel Burri Oct. 3, 2015, 10:17 a.m. UTC | #3
On 2015-10-03 11:56, Goffredo Baroncelli wrote:
> On 2015-10-02 18:41, axel@tty0.ch wrote:
>> Old implementation used tabs "\t", and tried to work around problems
>> by guessing amount of tabs needed (e.g. "\t\t" after top level", with
>> buggy output as soon as empty uuids are printed). This will never work
>> correctly, as tab width is a user-defined setting in the terminal.
> 
> 
> Why not use string_table() and table_*() functions  ? 

string_table(), as well as all table functions by nature, needs to know
the maximum size of all cells in a row before printing, and therefore
buffers all the output before printing. It would eat up a lot of memory
for large tables (it is not unusual to have 1000+ subvolumes in btrfs if
you make heavy use of snapshotting). Furthermore, it would slow down
things by not printing the output linewise.

> 
>>
>> Keep it simple and don't reimplement the wheel, for nice tabular
>> output we have the "column" command from util-linux:
>>
>>     btrfs subvolume list -t <path> | column -t
>>
>> Signed-off-by: Axel Burri <axel@tty0.ch>
>> ---
>>  btrfs-list.c | 40 ++++++++++++----------------------------
>>  1 file changed, 12 insertions(+), 28 deletions(-)
>>
>> diff --git a/btrfs-list.c b/btrfs-list.c
>> index f8396e7..c09257a 100644
>> --- a/btrfs-list.c
>> +++ b/btrfs-list.c
>> @@ -45,12 +45,12 @@ struct root_lookup {
>>  };
>>  
>>  static struct {
>> -	char	*name;
>> +	char	*name;  /* machine-readable column identifier: [a-z_]+ */
>>  	char	*column_name;
>>  	int	need_print;
>>  } btrfs_list_columns[] = {
>>  	{
>> -		.name		= "ID",
>> +		.name		= "id",
>>  		.column_name	= "ID",
>>  		.need_print	= 0,
>>  	},
>> @@ -70,7 +70,7 @@ static struct {
>>  		.need_print	= 0,
>>  	},
>>  	{
>> -		.name		= "top level",
>> +		.name		= "top_level",
>>  		.column_name	= "Top Level",
>>  		.need_print	= 0,
>>  	},
>> @@ -1465,13 +1465,10 @@ static void print_single_volume_info_table(struct root_info *subv)
>>  		if (!btrfs_list_columns[i].need_print)
>>  			continue;
>>  
>> -		print_subvolume_column(subv, i);
>> -
>> -		if (i != BTRFS_LIST_PATH)
>> -			printf("\t");
>> +		if (i != 0)
>> +			printf(" ");
>>  
>> -		if (i == BTRFS_LIST_TOP_LEVEL)
>> -			printf("\t");
>> +		print_subvolume_column(subv, i);
>>  	}
>>  	printf("\n");
>>  }
>> @@ -1496,30 +1493,17 @@ static void print_single_volume_info_default(struct root_info *subv)
>>  static void print_all_volume_info_tab_head(void)
>>  {
>>  	int i;
>> -	int len;
>> -	char barrier[20];
>> -
>> -	for (i = 0; i < BTRFS_LIST_ALL; i++) {
>> -		if (btrfs_list_columns[i].need_print)
>> -			printf("%s\t", btrfs_list_columns[i].name);
>> -
>> -		if (i == BTRFS_LIST_ALL-1)
>> -			printf("\n");
>> -	}
>>  
>>  	for (i = 0; i < BTRFS_LIST_ALL; i++) {
>> -		memset(barrier, 0, sizeof(barrier));
>> +		if (!btrfs_list_columns[i].need_print)
>> +			continue;
>>  
>> -		if (btrfs_list_columns[i].need_print) {
>> -			len = strlen(btrfs_list_columns[i].name);
>> -			while (len--)
>> -				strcat(barrier, "-");
>> +		if (i != 0)
>> +			printf(" ");
>>  
>> -			printf("%s\t", barrier);
>> -		}
>> -		if (i == BTRFS_LIST_ALL-1)
>> -			printf("\n");
>> +		printf(btrfs_list_columns[i].name);
>>  	}
>> +	printf("\n");
>>  }
>>  
>>  static void print_all_volume_info(struct root_lookup *sorted_tree,
>>
> 
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli Oct. 3, 2015, 5:41 p.m. UTC | #4
On 2015-10-03 12:09, Axel Burri wrote:
> 
> 
> On 2015-10-03 11:56, Goffredo Baroncelli wrote:
>> On 2015-10-02 18:41, axel@tty0.ch wrote:
>>> Old implementation used tabs "\t", and tried to work around problems
>>> by guessing amount of tabs needed (e.g. "\t\t" after top level", with
>>> buggy output as soon as empty uuids are printed). This will never work
>>> correctly, as tab width is a user-defined setting in the terminal.
>>
>>
>> Why not use string_table() and table_*() functions  ? 
> 
> string_table(), as well as all table functions by nature, needs to know
> the maximum size of all cells in a row before printing, and therefore
> buffers all the output before printing. It would eat up a lot of memory
> for large tables (it is not unusual to have 1000+ subvolumes in btrfs if
> you make heavy use of snapshotting). Furthermore, it would slow down
> things by not printing the output linewise.


Assuming 200bytes per row (== subvolume) x 1000 subvolumes = 200kB... I don't think that this could be a problem, nor in terms of memory used nor in terms of speed: if you have 1000+ subvolumes the most time consuming activity is traversing the filesystem looking at the snapshot...



> 
>>
>>>
>>> Keep it simple and don't reimplement the wheel, for nice tabular
>>> output we have the "column" command from util-linux:
>>>
>>>     btrfs subvolume list -t <path> | column -t
>>>
>>> Signed-off-by: Axel Burri <axel@tty0.ch>
>>> ---
>>>  btrfs-list.c | 40 ++++++++++++----------------------------
>>>  1 file changed, 12 insertions(+), 28 deletions(-)
>>>
>>> diff --git a/btrfs-list.c b/btrfs-list.c
>>> index f8396e7..c09257a 100644
>>> --- a/btrfs-list.c
>>> +++ b/btrfs-list.c
>>> @@ -45,12 +45,12 @@ struct root_lookup {
>>>  };
>>>  
>>>  static struct {
>>> -	char	*name;
>>> +	char	*name;  /* machine-readable column identifier: [a-z_]+ */
>>>  	char	*column_name;
>>>  	int	need_print;
>>>  } btrfs_list_columns[] = {
>>>  	{
>>> -		.name		= "ID",
>>> +		.name		= "id",
>>>  		.column_name	= "ID",
>>>  		.need_print	= 0,
>>>  	},
>>> @@ -70,7 +70,7 @@ static struct {
>>>  		.need_print	= 0,
>>>  	},
>>>  	{
>>> -		.name		= "top level",
>>> +		.name		= "top_level",
>>>  		.column_name	= "Top Level",
>>>  		.need_print	= 0,
>>>  	},
>>> @@ -1465,13 +1465,10 @@ static void print_single_volume_info_table(struct root_info *subv)
>>>  		if (!btrfs_list_columns[i].need_print)
>>>  			continue;
>>>  
>>> -		print_subvolume_column(subv, i);
>>> -
>>> -		if (i != BTRFS_LIST_PATH)
>>> -			printf("\t");
>>> +		if (i != 0)
>>> +			printf(" ");
>>>  
>>> -		if (i == BTRFS_LIST_TOP_LEVEL)
>>> -			printf("\t");
>>> +		print_subvolume_column(subv, i);
>>>  	}
>>>  	printf("\n");
>>>  }
>>> @@ -1496,30 +1493,17 @@ static void print_single_volume_info_default(struct root_info *subv)
>>>  static void print_all_volume_info_tab_head(void)
>>>  {
>>>  	int i;
>>> -	int len;
>>> -	char barrier[20];
>>> -
>>> -	for (i = 0; i < BTRFS_LIST_ALL; i++) {
>>> -		if (btrfs_list_columns[i].need_print)
>>> -			printf("%s\t", btrfs_list_columns[i].name);
>>> -
>>> -		if (i == BTRFS_LIST_ALL-1)
>>> -			printf("\n");
>>> -	}
>>>  
>>>  	for (i = 0; i < BTRFS_LIST_ALL; i++) {
>>> -		memset(barrier, 0, sizeof(barrier));
>>> +		if (!btrfs_list_columns[i].need_print)
>>> +			continue;
>>>  
>>> -		if (btrfs_list_columns[i].need_print) {
>>> -			len = strlen(btrfs_list_columns[i].name);
>>> -			while (len--)
>>> -				strcat(barrier, "-");
>>> +		if (i != 0)
>>> +			printf(" ");
>>>  
>>> -			printf("%s\t", barrier);
>>> -		}
>>> -		if (i == BTRFS_LIST_ALL-1)
>>> -			printf("\n");
>>> +		printf(btrfs_list_columns[i].name);
>>>  	}
>>> +	printf("\n");
>>>  }
>>>  
>>>  static void print_all_volume_info(struct root_lookup *sorted_tree,
>>>
>>
>>
>
Duncan Oct. 4, 2015, 3:37 a.m. UTC | #5
Goffredo Baroncelli posted on Sat, 03 Oct 2015 19:41:33 +0200 as
excerpted:

> On 2015-10-03 12:09, Axel Burri wrote:
>> 
>> 
>> On 2015-10-03 11:56, Goffredo Baroncelli wrote:
>>> On 2015-10-02 18:41, axel@tty0.ch wrote:
>>>> Old implementation used tabs "\t", and tried to work around problems
>>>> by guessing amount of tabs needed (e.g. "\t\t" after top level", with
>>>> buggy output as soon as empty uuids are printed). This will never
>>>> work correctly, as tab width is a user-defined setting in the
>>>> terminal.
>>>
>>>
>>> Why not use string_table() and table_*() functions  ?
>> 
>> string_table(), as well as all table functions by nature, needs to know
>> the maximum size of all cells in a row before printing, and therefore
>> buffers all the output before printing. It would eat up a lot of memory
>> for large tables (it is not unusual to have 1000+ subvolumes in btrfs
>> if you make heavy use of snapshotting). Furthermore, it would slow down
>> things by not printing the output linewise.
> 
> 
> Assuming 200bytes per row (== subvolume) x 1000 subvolumes = 200kB... I
> don't think that this could be a problem, nor in terms of memory used
> nor in terms of speed: if you have 1000+ subvolumes the most time
> consuming activity is traversing the filesystem looking at the
> snapshot...

Perhaps unfortunately, scaling to millions of snapshots/subvolumes really 
*is* expected by some people.  You'd be surprised at the number of folks 
that setup automated per-minute snapshotting with no automated thinning, 
and expect to be able to keep several years' worth of snapshots, and then 
wonder why btrfs maintenance commands such as balance take weeks/months...

5 years * 365 days/year * 24 hours/day * 60 minutes/hour * 1 snapshot/
minute = ...

(years, days, hours, minutes cancel out, leaving snapshots... yeah, the 
math should be right)

2.6 million plus snapshots.  And that's just one subvolume.  Multiply 
that by a half dozen subvolumes...

Over 15 million snapshots.  Multiply that by 200 bytes/snapshot...

Several gigs of buffer memory, now.

Obviously btrfs doesn't scale to that level now, and if it did, someone 
making the mistake of trying to get a listing of millions of snapshots 
would very likely change their mind before even hitting 10%...

But that's why actually processing line-by-line is important, so they'll 
actually /see/ what happened and ctrl-C it, instead of the program 
aborting as it runs into (for example) the 32-bit user/kernel memory 
barrier, without printing anything useful...

The line-by-line way... "Oops, that's waayyy too many snapshots to be 
practical, maybe I should start thinning..."

The buffer-it-all-way... "Oops, there's a bug in the program; it crashed."
Goffredo Baroncelli Oct. 4, 2015, 2:34 p.m. UTC | #6
On 2015-10-04 05:37, Duncan wrote:
> Goffredo Baroncelli posted on Sat, 03 Oct 2015 19:41:33 +0200 as
> excerpted:
> 
>> On 2015-10-03 12:09, Axel Burri wrote:
>>>
>>>
>>> On 2015-10-03 11:56, Goffredo Baroncelli wrote:
>>>> On 2015-10-02 18:41, axel@tty0.ch wrote:
>>>>> Old implementation used tabs "\t", and tried to work around problems
>>>>> by guessing amount of tabs needed (e.g. "\t\t" after top level", with
>>>>> buggy output as soon as empty uuids are printed). This will never
>>>>> work correctly, as tab width is a user-defined setting in the
>>>>> terminal.
>>>>
>>>>
>>>> Why not use string_table() and table_*() functions  ?
>>>
>>> string_table(), as well as all table functions by nature, needs to know
>>> the maximum size of all cells in a row before printing, and therefore
>>> buffers all the output before printing. It would eat up a lot of memory
>>> for large tables (it is not unusual to have 1000+ subvolumes in btrfs
>>> if you make heavy use of snapshotting). Furthermore, it would slow down
>>> things by not printing the output linewise.
>>
>>
>> Assuming 200bytes per row (== subvolume) x 1000 subvolumes = 200kB... I
>> don't think that this could be a problem, nor in terms of memory used
>> nor in terms of speed: if you have 1000+ subvolumes the most time
>> consuming activity is traversing the filesystem looking at the
>> snapshot...
> 
> Perhaps unfortunately, scaling to millions of snapshots/subvolumes really 
> *is* expected by some people.  You'd be surprised at the number of folks 
> that setup automated per-minute snapshotting with no automated thinning, 
> and expect to be able to keep several years' worth of snapshots, and then 
> wonder why btrfs maintenance commands such as balance take weeks/months...
[...]
> Obviously btrfs doesn't scale to that level now, and if it did, someone 
> making the mistake of trying to get a listing of millions of snapshots 
> would very likely change their mind before even hitting 10%...
> 
> But that's why actually processing line-by-line is important, so they'll 
> actually /see/ what happened and ctrl-C it, instead of the program 
> aborting as it runs into (for example) the 32-bit user/kernel memory 
> barrier, without printing anything useful...

Please Ducan, read the code.

*TODAY* "btrfs list" does a scan of all subvolumes storing information in memory !

This is the most time consuming activity (think traversing a filesystem with millions of snapshots)

*Then* "btrfs list" print the info.

So you are already blocked at the screen until all subvolume are read. And I repeat (re)processing the information requires less time than reading the information from the disk.

[....]
Axel Burri Oct. 5, 2015, 3:08 p.m. UTC | #7
On 2015-10-04 16:34, Goffredo Baroncelli wrote:
> On 2015-10-04 05:37, Duncan wrote:
>> Goffredo Baroncelli posted on Sat, 03 Oct 2015 19:41:33 +0200 as
>> excerpted:
>>
>>> On 2015-10-03 12:09, Axel Burri wrote:
>>>>
>>>>
>>>> On 2015-10-03 11:56, Goffredo Baroncelli wrote:
>>>>> On 2015-10-02 18:41, axel@tty0.ch wrote:
>>>>>> Old implementation used tabs "\t", and tried to work around problems
>>>>>> by guessing amount of tabs needed (e.g. "\t\t" after top level", with
>>>>>> buggy output as soon as empty uuids are printed). This will never
>>>>>> work correctly, as tab width is a user-defined setting in the
>>>>>> terminal.
>>>>>
>>>>>
>>>>> Why not use string_table() and table_*() functions  ?
>>>>
>>>> string_table(), as well as all table functions by nature, needs to know
>>>> the maximum size of all cells in a row before printing, and therefore
>>>> buffers all the output before printing. It would eat up a lot of memory
>>>> for large tables (it is not unusual to have 1000+ subvolumes in btrfs
>>>> if you make heavy use of snapshotting). Furthermore, it would slow down
>>>> things by not printing the output linewise.
>>>
>>>
>>> Assuming 200bytes per row (== subvolume) x 1000 subvolumes = 200kB... I
>>> don't think that this could be a problem, nor in terms of memory used
>>> nor in terms of speed: if you have 1000+ subvolumes the most time
>>> consuming activity is traversing the filesystem looking at the
>>> snapshot...
>>
>> Perhaps unfortunately, scaling to millions of snapshots/subvolumes really 
>> *is* expected by some people.  You'd be surprised at the number of folks 
>> that setup automated per-minute snapshotting with no automated thinning, 
>> and expect to be able to keep several years' worth of snapshots, and then 
>> wonder why btrfs maintenance commands such as balance take weeks/months...
> [...]
>> Obviously btrfs doesn't scale to that level now, and if it did, someone 
>> making the mistake of trying to get a listing of millions of snapshots 
>> would very likely change their mind before even hitting 10%...
>>
>> But that's why actually processing line-by-line is important, so they'll 
>> actually /see/ what happened and ctrl-C it, instead of the program 
>> aborting as it runs into (for example) the 32-bit user/kernel memory 
>> barrier, without printing anything useful...
> 
> Please Ducan, read the code.
> 
> *TODAY* "btrfs list" does a scan of all subvolumes storing information in memory !
> 
> This is the most time consuming activity (think traversing a filesystem with millions of snapshots)
> 
> *Then* "btrfs list" print the info.
> 
> So you are already blocked at the screen until all subvolume are read. And I repeat (re)processing the information requires less time than reading the information from the disk.
> 
> [....]
> 

A quick look at the code shows me that Goffredo is right here, as
__list_subvol_search() always fetches ALL data from
BTRFS_IOC_TREE_SEARCH, putting it into a rbtree for later processing
(assemble full paths, sorting).

While there is certainly room for improvements here (assuming that
BTRFS_IOC_TREE_SEARCH returns objectid's in sorted order, it would
definitively be possible to produce line-by-line output), the code looks
pretty elegant the way it is.

I still don't think it is wise to bloat things further just for printing
nice tables. My impression is that "btrfs subvolume list" is
human-readable enough without the '-t' flag, while the output with '-t'
flag is much more machine-readable-friendly, and thus should have the
highest possible performance. e.g.:

  btrfs sub list  -t / | (read th; while read $th ; do echo $gen; done)
  btrfs sub list -t | column -t

Again, this is just my opinion, being a "unix-purist". Maybe a good
compromise would be to use a single "\t" instead of " " as column delimiter.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli Oct. 5, 2015, 3:42 p.m. UTC | #8
Hi Axel,

On 2015-10-05 17:04, Axel Burri wrote:
[...]
> 
> A quick look at the code shows me that Goffredo is right here, as
> __list_subvol_search() always fetches ALL data from
> BTRFS_IOC_TREE_SEARCH, putting it into a rbtree for later processing
> (assemble full paths, sorting).
> 
> While there is certainly room for improvements here (assuming that
> BTRFS_IOC_TREE_SEARCH returns objectid's in sorted order, it would
> definitively be possible to produce line-by-line output), the code looks
> pretty elegant the way it is.
> 
> I still don't think it is wise to bloat things further just for printing
> nice tables. My impression is that "btrfs subvolume list" is
> human-readable enough without the '-t' flag, while the output with '-t'
> flag is much more machine-readable-friendly, and thus should have the
> highest possible performance. e.g.:

I disagree, the "-t" mode is for the human; the non '-t' mode would be for machine (if you take the space as separator and the first word as key, with the exception of the path which starts from the "path " word and ends to the end of line); unfortunately "top level" is an exception. Anyway btrfs is not very machine-friendly (sic).


Compare the two output below:

$ sudo btrfs sub list -a /
ID 257 gen 44309 top level 5 path <FS_TREE>/debian
ID 289 gen 17415 top level 257 path debian/var/lib/machines
ID 298 gen 35434 top level 5 path <FS_TREE>/debian-i32
ID 299 gen 43961 top level 5 path <FS_TREE>/boot
ID 300 gen 39452 top level 5 path <FS_TREE>/debian-jessie

$ sudo btrfs sub list -at / 
ID	gen	top level	path	
--	---	---------	----	
257	44310	5		<FS_TREE>/debian
289	17415	257		debian/var/lib/machines
298	35434	5		<FS_TREE>/debian-i32
299	43961	5		<FS_TREE>/boot
300	39452	5		<FS_TREE>/debian-jessie


I think that is easy to declare the latter more "human" friendly. 

BTW with the use of the table_* function the output become:

$ sudo ./btrfs sub list -at / 
ID  gen   top level path                   
=== ===== ========= =======================
257 44311         5 <FS_TREE>/debian       
289 17415       257 debian/var/lib/machines
298 35434         5 <FS_TREE>/debian-i32   
299 43961         5 <FS_TREE>/boot         
300 39452         5 <FS_TREE>/debian-jessie

$ sudo ./btrfs sub list -aptcguqR / 
ID  gen   cgen  parent top level parent_uuid received_uuid uuid                                 path                   
=== ===== ===== ====== ========= =========== ============= ==================================== =======================
257 44313     7      5         5           -             - 840c86cf-e78b-d54a-ab38-66662858812d <FS_TREE>/debian       
289 17415 17415    257       257           -             - 8b857250-3a3e-754d-810e-57342bbb2f56 debian/var/lib/machines
298 35434 35399      5         5           -             - 1f38049b-b153-d741-b903-d2de6fd7b3fd <FS_TREE>/debian-i32   
299 43961 35512      5         5           -             - f9d52b6b-a6d1-8c45-a6cd-ddb68cf58062 <FS_TREE>/boot         
300 39452 37744      5         5           -             - 026e44bd-66d4-e341-9a14-0124acf79793 <FS_TREE>/debian-jessie

I right aligned each field with the exception of the path (which is left aligned).
I already prepared the patch which convert "btrfs sub list -t" to use the table_* function. If you want I can send it to you to extend/replace your patch #4.

BR
G.Baroncelli






> 
>   btrfs sub list  -t / | (read th; while read $th ; do echo $gen; done)
>   btrfs sub list -t | column -t
> 
> Again, this is just my opinion, being a "unix-purist". Maybe a good
> compromise would be to use a single "\t" instead of " " as column delimiter.




>
Axel Burri Oct. 5, 2015, 4:58 p.m. UTC | #9
On 2015-10-05 17:42, Goffredo Baroncelli wrote:
> Hi Axel,
> 
> On 2015-10-05 17:04, Axel Burri wrote:
> [...]
>> I still don't think it is wise to bloat things further just for printing
>> nice tables. My impression is that "btrfs subvolume list" is
>> human-readable enough without the '-t' flag, while the output with '-t'
>> flag is much more machine-readable-friendly, and thus should have the
>> highest possible performance. e.g.:
> 
> I disagree, the "-t" mode is for the human; the non '-t' mode would be for machine (if you take the space as separator and the first word as key, with the exception of the path which starts from the "path " word and ends to the end of line); unfortunately "top level" is an exception. Anyway btrfs is not very machine-friendly (sic).

That's what my patches are all about in the first place, to make it more
machine-friendly :)

> Compare the two output below:
> 
> $ sudo btrfs sub list -a /
> ID 257 gen 44309 top level 5 path <FS_TREE>/debian
> ID 289 gen 17415 top level 257 path debian/var/lib/machines
> ID 298 gen 35434 top level 5 path <FS_TREE>/debian-i32
> ID 299 gen 43961 top level 5 path <FS_TREE>/boot
> ID 300 gen 39452 top level 5 path <FS_TREE>/debian-jessie

"grep-friendly"

> 
> $ sudo btrfs sub list -at / 
> ID	gen	top level	path	
> --	---	---------	----	
> 257	44310	5		<FS_TREE>/debian
> 289	17415	257		debian/var/lib/machines
> 298	35434	5		<FS_TREE>/debian-i32
> 299	43961	5		<FS_TREE>/boot
> 300	39452	5		<FS_TREE>/debian-jessie
> 
> 
> I think that is easy to declare the latter more "human" friendly. 

Well yes, but in some ways the latter is also more machine-friendly, as
there is less data to pipe and a whitespace-separated list is much
easier to parse (of course the same applies to your table implementation
below).

Problems here are:

- "top level" (whitespace, addressed in [PATCH 4/4])

- timestamp (-s) "YYYY-MM-DD HH:MM:SS" (whitspace and no timezone,
addressed in [PATCH 3/4])

- path can contain whitespace (not _that_ important as it's always
printed last).

> 
> BTW with the use of the table_* function the output become:
> 
> $ sudo ./btrfs sub list -at / 
> ID  gen   top level path                   
> === ===== ========= =======================
> 257 44311         5 <FS_TREE>/debian       
> 289 17415       257 debian/var/lib/machines
> 298 35434         5 <FS_TREE>/debian-i32   
> 299 43961         5 <FS_TREE>/boot         
> 300 39452         5 <FS_TREE>/debian-jessie
> 
> $ sudo ./btrfs sub list -aptcguqR / 
> ID  gen   cgen  parent top level parent_uuid received_uuid uuid                                 path                   
> === ===== ===== ====== ========= =========== ============= ==================================== =======================
> 257 44313     7      5         5           -             - 840c86cf-e78b-d54a-ab38-66662858812d <FS_TREE>/debian       
> 289 17415 17415    257       257           -             - 8b857250-3a3e-754d-810e-57342bbb2f56 debian/var/lib/machines
> 298 35434 35399      5         5           -             - 1f38049b-b153-d741-b903-d2de6fd7b3fd <FS_TREE>/debian-i32   
> 299 43961 35512      5         5           -             - f9d52b6b-a6d1-8c45-a6cd-ddb68cf58062 <FS_TREE>/boot         
> 300 39452 37744      5         5           -             - 026e44bd-66d4-e341-9a14-0124acf79793 <FS_TREE>/debian-jessie
> 
> I right aligned each field with the exception of the path (which is left aligned).
> I already prepared the patch which convert "btrfs sub list -t" to use the table_* function. If you want I can send it to you to extend/replace your patch #4.

Looks promising, although I'd still not recommend it because of the
increased memory/cpu requirements.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Goffredo Baroncelli Oct. 5, 2015, 8:09 p.m. UTC | #10
Hi Axel,

I add CC:linux-btrfs because I think that what you are saying is important and it was discussed in ML.

[we are talking about the Axel patches to "btrfs sub list" command]
On 2015-10-05 19:27, Axel Burri wrote:
> Hi Goffredo
> 
> Don't get me wrong, I don't want to blame you (or anyone), actually I'm
> only arguing what I think is best for btrfs. I started to write those
> patches because btrfs-scriptability is pretty bad (there is also "btrfs
> sub show" which has pretty un-parseable output"), and I think this is
> very important for the success of btrfs, and no-one seemed to address this.

In the past, when I developed the "btrfs fi usage/btrfs dev usage" I put a lot of effort to have an output easily machine readable: one data per line, key without space; I remember the battle to have in the key "_" instead of space :-)
However other people changed that to a more (human) readable output.

May be that I was wrong; I don't know. 

However for sure, if I look to other projects:
- systemd: it uses some btrfs capability, but it uses the ioctl directly.
- snapper: it uses ioctl directly

So I have to conclude that it is not common to use btrfs (as progs) directly.
Few times I noticed somebodies are working on a libbtrfs. May be this is a better approach.

> 
> I know that nowadays people expect to get shiny output from command-line
> tools, but IMHO it is much more important to have consistent
> machine-readable output before even thinking of human readability (since
> there are so many unix commands to translate it, e.g. "columns"). Now
> this is sadly not the case with btrfs-progs, and it's hard to change it
> when people are used to it. Note that when writing the patches I was
> also thinking of introducing some "--machine-readable" option, but this
> seemed so wrong to me...

I think that "--machine-readable" (or similar) is a better approach: the output of the btrfs command in the past is changed even drastically (give a look to my work on the mkfs.btrfs); but we could force us to have an output "backward compatible" and machine (programmer) friendly if a switch like "--machine-readable" is used.

[...]
> 
> Regards,
> 
> - Axel

BR
Goffredo

Patch
diff mbox

diff --git a/btrfs-list.c b/btrfs-list.c
index f8396e7..c09257a 100644
--- a/btrfs-list.c
+++ b/btrfs-list.c
@@ -45,12 +45,12 @@  struct root_lookup {
 };
 
 static struct {
-	char	*name;
+	char	*name;  /* machine-readable column identifier: [a-z_]+ */
 	char	*column_name;
 	int	need_print;
 } btrfs_list_columns[] = {
 	{
-		.name		= "ID",
+		.name		= "id",
 		.column_name	= "ID",
 		.need_print	= 0,
 	},
@@ -70,7 +70,7 @@  static struct {
 		.need_print	= 0,
 	},
 	{
-		.name		= "top level",
+		.name		= "top_level",
 		.column_name	= "Top Level",
 		.need_print	= 0,
 	},
@@ -1465,13 +1465,10 @@  static void print_single_volume_info_table(struct root_info *subv)
 		if (!btrfs_list_columns[i].need_print)
 			continue;
 
-		print_subvolume_column(subv, i);
-
-		if (i != BTRFS_LIST_PATH)
-			printf("\t");
+		if (i != 0)
+			printf(" ");
 
-		if (i == BTRFS_LIST_TOP_LEVEL)
-			printf("\t");
+		print_subvolume_column(subv, i);
 	}
 	printf("\n");
 }
@@ -1496,30 +1493,17 @@  static void print_single_volume_info_default(struct root_info *subv)
 static void print_all_volume_info_tab_head(void)
 {
 	int i;
-	int len;
-	char barrier[20];
-
-	for (i = 0; i < BTRFS_LIST_ALL; i++) {
-		if (btrfs_list_columns[i].need_print)
-			printf("%s\t", btrfs_list_columns[i].name);
-
-		if (i == BTRFS_LIST_ALL-1)
-			printf("\n");
-	}
 
 	for (i = 0; i < BTRFS_LIST_ALL; i++) {
-		memset(barrier, 0, sizeof(barrier));
+		if (!btrfs_list_columns[i].need_print)
+			continue;
 
-		if (btrfs_list_columns[i].need_print) {
-			len = strlen(btrfs_list_columns[i].name);
-			while (len--)
-				strcat(barrier, "-");
+		if (i != 0)
+			printf(" ");
 
-			printf("%s\t", barrier);
-		}
-		if (i == BTRFS_LIST_ALL-1)
-			printf("\n");
+		printf(btrfs_list_columns[i].name);
 	}
+	printf("\n");
 }
 
 static void print_all_volume_info(struct root_lookup *sorted_tree,