[preview] btrfs: allow to set compression level for zlib
diff mbox

Message ID 20170724172939.24527-1-dsterba@suse.com
State New
Headers show

Commit Message

David Sterba July 24, 2017, 5:29 p.m. UTC
Preliminary support for setting compression level for zlib, the
following works:

$ mount -o compess=zlib                 # default
$ mount -o compess=zlib0                # same
$ mount -o compess=zlib9                # level 9, slower sync, less data
$ mount -o compess=zlib1                # level 1, faster sync, more data
$ mount -o remount,compress=zlib3	# level set by remount

The level is visible in the same format in /proc/mounts. Level set via
file property does not work yet.

Required patch: "btrfs: prepare for extensions in compression options"

Signed-off-by: David Sterba <dsterba@suse.com>
---
 fs/btrfs/compression.c | 20 +++++++++++++++++++-
 fs/btrfs/compression.h |  6 +++++-
 fs/btrfs/ctree.h       |  1 +
 fs/btrfs/inode.c       |  5 ++++-
 fs/btrfs/lzo.c         |  5 +++++
 fs/btrfs/super.c       |  7 +++++--
 fs/btrfs/zlib.c        | 12 +++++++++++-
 7 files changed, 50 insertions(+), 6 deletions(-)

Comments

Anand Jain July 27, 2017, 7:52 a.m. UTC | #1
On 07/25/2017 01:29 AM, David Sterba wrote:
> Preliminary support for setting compression level for zlib, the
> following works:

  I got too busy with stuff outside of btrfs, though I agreed to work
  on this, sorry that I couldn't prioritize ahead of yours. Now deleted
  this from my list.

> $ mount -o compess=zlib                 # default
> $ mount -o compess=zlib0                # same
> $ mount -o compess=zlib9                # level 9, slower sync, less data
> $ mount -o compess=zlib1                # level 1, faster sync, more data
> $ mount -o remount,compress=zlib3	# level set by remount

  this is much better than what I was initially thinking..

> The level is visible in the same format in /proc/mounts. Level set via
> file property does not work yet.
> 
> Required patch: "btrfs: prepare for extensions in compression options"

  one concern as mentioned in the respective thread.

> Signed-off-by: David Sterba <dsterba@suse.com>
> ---
>   fs/btrfs/compression.c | 20 +++++++++++++++++++-
>   fs/btrfs/compression.h |  6 +++++-
>   fs/btrfs/ctree.h       |  1 +
>   fs/btrfs/inode.c       |  5 ++++-
>   fs/btrfs/lzo.c         |  5 +++++
>   fs/btrfs/super.c       |  7 +++++--
>   fs/btrfs/zlib.c        | 12 +++++++++++-
>   7 files changed, 50 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
> index 8ba1b86c9b72..142206d68495 100644
> --- a/fs/btrfs/compression.c
> +++ b/fs/btrfs/compression.c
> @@ -866,6 +866,11 @@ static void free_workspaces(void)
>    * Given an address space and start and length, compress the bytes into @pages
>    * that are allocated on demand.
>    *
> + * @type_level is encoded algorithm and level, where level 0 means whatever
> + * default the algorithm chooses and is opaque here;
> + * - compression algo are 0-3
> + * - the level are bits 4-7
> + *

  nice ! ..

>    * @out_pages is an in/out parameter, holds maximum number of pages to allocate
>    * and returns number of actually allocated pages
>    *
> @@ -880,7 +885,7 @@ static void free_workspaces(void)
>    * @max_out tells us the max number of bytes that we're allowed to
>    * stuff into pages
>    */
> -int btrfs_compress_pages(int type, struct address_space *mapping,
> +int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
>   			 u64 start, struct page **pages,
>   			 unsigned long *out_pages,
>   			 unsigned long *total_in,
> @@ -888,9 +893,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
>   {
>   	struct list_head *workspace;
>   	int ret;
> +	int type = type_level & 0xF;
>   
>   	workspace = find_workspace(type);
>   
> +	btrfs_compress_op[type - 1]->set_level(workspace, type_level);
>   	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
>   						      start, pages,
>   						      out_pages,
> @@ -1047,3 +1054,14 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
>   
>   	return 1;
>   }
> +
> +unsigned int btrfs_compress_str2level(const char *str)
> +{
> +	if (strncmp(str, "zlib", 4) != 0)
> +		return 0;
> +
> +	if ('1' <= str[4] && str[4] <= '9' )
> +		return str[4] - '0';
> +
> +	return 0;
> +}
> diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
> index 89bcf975efb8..8a6db02d8732 100644
> --- a/fs/btrfs/compression.h
> +++ b/fs/btrfs/compression.h
> @@ -76,7 +76,7 @@ struct compressed_bio {
>   void btrfs_init_compress(void);
>   void btrfs_exit_compress(void);
>   
> -int btrfs_compress_pages(int type, struct address_space *mapping,
> +int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
>   			 u64 start, struct page **pages,
>   			 unsigned long *out_pages,
>   			 unsigned long *total_in,
> @@ -95,6 +95,8 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
>   int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
>   				 int mirror_num, unsigned long bio_flags);
>   
> +unsigned btrfs_compress_str2level(const char *str);
> +
>   enum btrfs_compression_type {
>   	BTRFS_COMPRESS_NONE  = 0,
>   	BTRFS_COMPRESS_ZLIB  = 1,
> @@ -124,6 +126,8 @@ struct btrfs_compress_op {
>   			  struct page *dest_page,
>   			  unsigned long start_byte,
>   			  size_t srclen, size_t destlen);
> +
> +	void (*set_level)(struct list_head *ws, unsigned int type);
>   };
>   
>   extern const struct btrfs_compress_op btrfs_zlib_compress;
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 5bdd36664421..3393aed07132 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -791,6 +791,7 @@ struct btrfs_fs_info {
>   	 */
>   	unsigned long pending_changes;
>   	unsigned long compress_type:4;
> +	unsigned int compress_level;
>   	int commit_interval;
>   	/*
>   	 * It is a suggestive number, the read side is safe even it gets a
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index eb495e956d53..a832bf8cebab 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -525,7 +525,10 @@ static noinline void compress_file_range(struct inode *inode,
>   		 */
>   		extent_range_clear_dirty_for_io(inode, start, end);
>   		redirty = 1;
> -		ret = btrfs_compress_pages(compress_type,
> +
> +		/* Compression level is applied here and only here */
> +		ret = btrfs_compress_pages(
> +			compress_type | (fs_info->compress_level << 4),
>   					   inode->i_mapping, start,
>   					   pages,
>   					   &nr_pages,
> diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
> index d433e75d489a..6c7f18cd3b61 100644
> --- a/fs/btrfs/lzo.c
> +++ b/fs/btrfs/lzo.c
> @@ -430,10 +430,15 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
>   	return ret;
>   }
>   
> +static void lzo_set_level(struct list_head *ws, unsigned int type)
> +{
> +}
> +
>   const struct btrfs_compress_op btrfs_lzo_compress = {
>   	.alloc_workspace	= lzo_alloc_workspace,
>   	.free_workspace		= lzo_free_workspace,
>   	.compress_pages		= lzo_compress_pages,
>   	.decompress_bio		= lzo_decompress_bio,
>   	.decompress		= lzo_decompress,
> +	.set_level		= lzo_set_level,
>   };
> diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
> index 147330454c17..25ffebb101c9 100644
> --- a/fs/btrfs/super.c
> +++ b/fs/btrfs/super.c
> @@ -501,6 +501,7 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
>   			    strncmp(args[0].from, "zlib", 4) == 0) {
>   				compress_type = "zlib";
>   				info->compress_type = BTRFS_COMPRESS_ZLIB;
> +				info->compress_level = btrfs_compress_str2level(args[0].from);
>   				btrfs_set_opt(info->mount_opt, COMPRESS);
>   				btrfs_clear_opt(info->mount_opt, NODATACOW);
>   				btrfs_clear_opt(info->mount_opt, NODATASUM);
> @@ -540,9 +541,9 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
>   			      compress_force != saved_compress_force)) ||
>   			    (!btrfs_test_opt(info, COMPRESS) &&
>   			     no_compress == 1)) {
> -				btrfs_info(info, "%s %s compression",
> +				btrfs_info(info, "%s %s compression, level %d",
>   					   (compress_force) ? "force" : "use",
> -					   compress_type);
> +					   compress_type, info->compress_level);
>   			}
>   			compress_force = false;
>   			break;
> @@ -1234,6 +1235,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
>   			seq_printf(seq, ",compress-force=%s", compress_type);
>   		else
>   			seq_printf(seq, ",compress=%s", compress_type);
> +		if (info->compress_level)
> +			seq_printf(seq, "%d", info->compress_level);
>   	}
>   	if (btrfs_test_opt(info, NOSSD))
>   		seq_puts(seq, ",nossd");
> diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
> index c248f9286366..c890032e680f 100644
> --- a/fs/btrfs/zlib.c
> +++ b/fs/btrfs/zlib.c
> @@ -37,6 +37,7 @@ struct workspace {
>   	z_stream strm;
>   	char *buf;
>   	struct list_head list;
> +	int level;
>   };

  No need to set it for the workspace. We could just redefine the
  function prototype for btrfs_compress_op->compress_pages()
  and pass as an argument.

Thanks, Anand

>   static void zlib_free_workspace(struct list_head *ws)
> @@ -96,7 +97,7 @@ static int zlib_compress_pages(struct list_head *ws,
>   	*total_out = 0;
>   	*total_in = 0;
>   
> -	if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) {
> +	if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) {
>   		pr_warn("BTRFS: deflateInit failed\n");
>   		ret = -EIO;
>   		goto out;
> @@ -402,10 +403,19 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
>   	return ret;
>   }
>   
> +static void zlib_set_level(struct list_head *ws, unsigned int type)
> +{
> +	struct workspace *workspace = list_entry(ws, struct workspace, list);
> +	unsigned level = (type & 0xF0) >> 4;
> +
> +	workspace->level = level > 0 ? level : 3;
> +}
> +
>   const struct btrfs_compress_op btrfs_zlib_compress = {
>   	.alloc_workspace	= zlib_alloc_workspace,
>   	.free_workspace		= zlib_free_workspace,
>   	.compress_pages		= zlib_compress_pages,
>   	.decompress_bio		= zlib_decompress_bio,
>   	.decompress		= zlib_decompress,
> +	.set_level              = zlib_set_level,
>   };
> 


How about the compress-force to accept the compression levels ?
or I wonder if your are planning to deprecate it in the long run.

Thanks, Anand
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba July 27, 2017, 4:49 p.m. UTC | #2
On Thu, Jul 27, 2017 at 03:52:56PM +0800, Anand Jain wrote:
> > --- a/fs/btrfs/zlib.c
> > +++ b/fs/btrfs/zlib.c
> > @@ -37,6 +37,7 @@ struct workspace {
> >   	z_stream strm;
> >   	char *buf;
> >   	struct list_head list;
> > +	int level;
> >   };
> 
>   No need to set it for the workspace. We could just redefine the
>   function prototype for btrfs_compress_op->compress_pages()
>   and pass as an argument.

I think this comes from the early version of the patch where the way the
tpe and level were mangled in a bit different way, so it was up to the
compression algo to set it and decipher back without the changes to any
of the interfaces.


> > +static void zlib_set_level(struct list_head *ws, unsigned int type)
> > +{
> > +	struct workspace *workspace = list_entry(ws, struct workspace, list);
> > +	unsigned level = (type & 0xF0) >> 4;
> > +
> > +	workspace->level = level > 0 ? level : 3;
> > +}
> > +
> >   const struct btrfs_compress_op btrfs_zlib_compress = {
> >   	.alloc_workspace	= zlib_alloc_workspace,
> >   	.free_workspace		= zlib_free_workspace,
> >   	.compress_pages		= zlib_compress_pages,
> >   	.decompress_bio		= zlib_decompress_bio,
> >   	.decompress		= zlib_decompress,
> > +	.set_level              = zlib_set_level,
> >   };
> > 
> 
> 
> How about the compress-force to accept the compression levels ?
> or I wonder if your are planning to deprecate it in the long run.

Although not mentioned, compress-force also accepts the level. I'll add
it to the changelog in the next version.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Adam Borowski Aug. 5, 2017, 1:27 a.m. UTC | #3
On Fri, Aug 04, 2017 at 09:51:44PM +0000, Nick Terrell wrote:
> On 07/25/2017 01:29 AM, David Sterba wrote:
> > Preliminary support for setting compression level for zlib, the
> > following works:
> 
> Thanks for working on this, I think it is a great feature.
> I have a few comments relating to how it would work with zstd.

Like, currently crashing because of ->set_level being 0? :p

> > --- a/fs/btrfs/compression.c
> > +++ b/fs/btrfs/compression.c
> > @@ -866,6 +866,11 @@ static void free_workspaces(void)
> >   * Given an address space and start and length, compress the bytes into @pages
> >   * that are allocated on demand.
> >   *
> > + * @type_level is encoded algorithm and level, where level 0 means whatever
> > + * default the algorithm chooses and is opaque here;
> > + * - compression algo are 0-3
> > + * - the level are bits 4-7
> 
> zstd has 19 levels, but we can either only allow the first 15 + default, or
> provide a mapping from zstd-level to BtrFS zstd-level.

Or give it more bits.  Issues like this are exactly why this patch is marked
"preview".

But, does zstd give any gains with high compression level but input data
capped at 128KB?  I don't see levels above 15 on your benchmark, and certain
compression algorithms give worse results at highest levels for small
blocks.

> > @@ -888,9 +893,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
> >  {
> >  	struct list_head *workspace;
> >  	int ret;
> > +	int type = type_level & 0xF;
> >  
> >  	workspace = find_workspace(type);
> >  
> > +	btrfs_compress_op[type - 1]->set_level(workspace, type_level);
> 
> zlib uses the same amount of memory independently of the compression level,
> but zstd uses a different amount of memory for each level. zstd will have
> to allocate memory here if it doesn't have enough (or has way to much),
> will that be okay?

We can instead store workspaces per the encoded type+level, that'd allow
having different levels on different mounts (then props, once we get there).

Depends on whether you want highest levels, though (asked above) -- the
highest ones take drastically more memory, so if they're out, blindly
reserving space for the highest supported level might be not too wasteful.

(I have only briefly looked at memory usage and set_level(), please ignore
me if I babble incoherently -- in bed on a N900 so I can't test it right
now.)


Meow!
Nick Terrell Aug. 5, 2017, 2:15 a.m. UTC | #4
On 8/4/17, 6:27 PM, "Adam Borowski" <kilobyte@angband.pl> wrote:
> On Fri, Aug 04, 2017 at 09:51:44PM +0000, Nick Terrell wrote:

> > On 07/25/2017 01:29 AM, David Sterba wrote:

> > > Preliminary support for setting compression level for zlib, the

> > > following works:

> > 

> > Thanks for working on this, I think it is a great feature.

> > I have a few comments relating to how it would work with zstd.

> 

> Like, currently crashing because of ->set_level being 0? :p

> 

> > > --- a/fs/btrfs/compression.c

> > > +++ b/fs/btrfs/compression.c

> > > @@ -866,6 +866,11 @@ static void free_workspaces(void)

> > >   * Given an address space and start and length, compress the bytes into @pages

> > >   * that are allocated on demand.

> > >   *

> > > + * @type_level is encoded algorithm and level, where level 0 means whatever

> > > + * default the algorithm chooses and is opaque here;

> > > + * - compression algo are 0-3

> > > + * - the level are bits 4-7

> > 

> > zstd has 19 levels, but we can either only allow the first 15 + default, or

> > provide a mapping from zstd-level to BtrFS zstd-level.

> 

> Or give it more bits.  Issues like this are exactly why this patch is marked

> "preview".

> 

> But, does zstd give any gains with high compression level but input data

> capped at 128KB?  I don't see levels above 15 on your benchmark, and certain

> compression algorithms give worse results at highest levels for small

> blocks.


Yeah, I stopped my benchmarks at 15, since without configurable compression
level, high levels didn't seem useful. But level 19 could be interesting if
you are building a base image that is widely distributed. When testing BtrFS
on the Silesia corpus, the compression ratio improved all the way to level
19.

> 

> > > @@ -888,9 +893,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,

> > >  {

> > >  	struct list_head *workspace;

> > >  	int ret;

> > > +	int type = type_level & 0xF;

> > >  

> > >  	workspace = find_workspace(type);

> > >  

> > > +	btrfs_compress_op[type - 1]->set_level(workspace, type_level);

> > 

> > zlib uses the same amount of memory independently of the compression level,

> > but zstd uses a different amount of memory for each level. zstd will have

> > to allocate memory here if it doesn't have enough (or has way to much),

> > will that be okay?

> 

> We can instead store workspaces per the encoded type+level, that'd allow

> having different levels on different mounts (then props, once we get there).

> 

> Depends on whether you want highest levels, though (asked above) -- the

> highest ones take drastically more memory, so if they're out, blindly

> reserving space for the highest supported level might be not too wasteful.


Looking at the memory usage of BtrFS zstd, the 128 KB window size keeps the
memory usage very reasonable up to level 19. The zstd compression levels
are computed using a tool that selects the parameters that give the best
compression ratio for a given compression speed target. Since BtrFS has a
fixed window size, the default compression levels might not be optimal. We
could compute our own compression levels for a 128 KB window size.

| Level | Memory |
|-------|--------|
| 1     | 0.8 MB |
| 2     | 1.0 MB |
| 3     | 1.3 MB |
| 4     | 0.9 MB |
| 5     | 1.4 MB |
| 6     | 1.5 MB |
| 7     | 1.4 MB |
| 8     | 1.8 MB |
| 9     | 1.8 MB |
| 10    | 1.8 MB |
| 11    | 1.8 MB |
| 12    | 1.8 MB |
| 13    | 2.4 MB |
| 14    | 2.6 MB |
| 15    | 2.6 MB |
| 16    | 3.1 MB |
| 17    | 3.1 MB |
| 18    | 3.1 MB |
| 19    | 3.1 MB |

The workspace memory usage for each compression level.

> 

> (I have only briefly looked at memory usage and set_level(), please ignore

> me if I babble incoherently -- in bed on a N900 so I can't test it right

> now.)

> 

> 

> Meow!

> -- 

> ⢀⣴⠾⠻⢶⣦⠀ What Would Jesus Do, MUD/MMORPG edition:

> ⣾⠁⢰⠒⠀⣿⡁ • multiplay with an admin char to benefit your mortal

> ⢿⡄⠘⠷⠚⠋⠀ • abuse item cloning bugs (the five fishes + two breads affair)

> ⠈⠳⣄⠀⠀⠀⠀ • use glitches to walk on water

>
David Sterba Aug. 18, 2017, 3:55 p.m. UTC | #5
On Fri, Aug 04, 2017 at 09:51:44PM +0000, Nick Terrell wrote:
> > + * @type_level is encoded algorithm and level, where level 0 means whatever
> > + * default the algorithm chooses and is opaque here;
> > + * - compression algo are 0-3
> > + * - the level are bits 4-7
> 
> zstd has 19 levels, but we can either only allow the first 15 + default, or
> provide a mapping from zstd-level to BtrFS zstd-level.

19 levels sounds too much to me, I hoped that 15 should be enough for
everybody. When I teste various zlib level, there were only small
compression gains at a high runtime cost from levels 6-9. So some kind
of mapping would be desirable if the levels 16+ prove to be better than
< 15 under the btrfs contraints.

> >   * @out_pages is an in/out parameter, holds maximum number of pages to allocate
> >   * and returns number of actually allocated pages
> >   *
> > @@ -880,7 +885,7 @@ static void free_workspaces(void)
> >   * @max_out tells us the max number of bytes that we're allowed to
> >   * stuff into pages
> >   */
> > -int btrfs_compress_pages(int type, struct address_space *mapping,
> > +int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
> >  			 u64 start, struct page **pages,
> >  			 unsigned long *out_pages,
> >  			 unsigned long *total_in,
> > @@ -888,9 +893,11 @@ int btrfs_compress_pages(int type, struct address_space *mapping,
> >  {
> >  	struct list_head *workspace;
> >  	int ret;
> > +	int type = type_level & 0xF;
> >  
> >  	workspace = find_workspace(type);
> >  
> > +	btrfs_compress_op[type - 1]->set_level(workspace, type_level);
> 
> zlib uses the same amount of memory independently of the compression level,
> but zstd uses a different amount of memory for each level.

We could extend the code to provide 2 types of workspaces, one to cover
the "fast" levels and one for the "high compression" levels. And the
larger one can be used for 'fast' so it does not just idle around.

> zstd will have
> to allocate memory here if it doesn't have enough (or has way to much),
> will that be okay?

This would be a problem. Imagine that the system is short on free
memory, starts to flush dirty data and now some filesystem starts asking
for hundreds of kilobytes of new memory just to write the data (and free
resources and the memory in turn). That's the case we need to
preallocate when there's enough memory, at least one workspace, so
there's a guarantee of forward progress.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
David Sterba Aug. 18, 2017, 4:08 p.m. UTC | #6
On Sat, Aug 05, 2017 at 02:15:42AM +0000, Nick Terrell wrote:
> Looking at the memory usage of BtrFS zstd, the 128 KB window size keeps the
> memory usage very reasonable up to level 19. The zstd compression levels
> are computed using a tool that selects the parameters that give the best
> compression ratio for a given compression speed target. Since BtrFS has a
> fixed window size, the default compression levels might not be optimal. We
> could compute our own compression levels for a 128 KB window size.
> 
> | Level | Memory |
> |-------|--------|
> | 1     | 0.8 MB |
> | 2     | 1.0 MB |
> | 3     | 1.3 MB |
> | 4     | 0.9 MB |
> | 5     | 1.4 MB |
> | 6     | 1.5 MB |
> | 7     | 1.4 MB |
> | 8     | 1.8 MB |
> | 9     | 1.8 MB |
> | 10    | 1.8 MB |
> | 11    | 1.8 MB |
> | 12    | 1.8 MB |
> | 13    | 2.4 MB |
> | 14    | 2.6 MB |
> | 15    | 2.6 MB |
> | 16    | 3.1 MB |
> | 17    | 3.1 MB |
> | 18    | 3.1 MB |
> | 19    | 3.1 MB |
> 
> The workspace memory usage for each compression level.

That's quite a lot, in kernel. IIRC zlib and lzo use less than 200kb,
zstd wants 800kb for level 1. And this needs to be contiguous memory, so
if we're lucky and get the memory at the mount time, fine. In general
the memory can be fragmented (in the worst case, there are only 4k
chunks available), so we'd have to vmalloc and consume the virtual
mappings in great numbers.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Chris Murphy Aug. 21, 2017, 12:38 a.m. UTC | #7
On Fri, Aug 18, 2017 at 10:08 AM, David Sterba <dsterba@suse.cz> wrote:

> That's quite a lot, in kernel. IIRC zlib and lzo use less than 200kb,
> zstd wants 800kb for level 1. And this needs to be contiguous memory, so
> if we're lucky and get the memory at the mount time, fine. In general
> the memory can be fragmented (in the worst case, there are only 4k
> chunks available), so we'd have to vmalloc and consume the virtual,
> mappings in great numbers.

Any thoughts on bootloader support, both in general, and as it relates
to levels of compression and memory constraints? GRUB switches to
protected mode early on but other bootloaders might have more
limitations. I guess really it's just GRUB and extlinux right now,
there were patches some time ago for Das U-Boot but they still aren't
merged.

Patch
diff mbox

diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 8ba1b86c9b72..142206d68495 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -866,6 +866,11 @@  static void free_workspaces(void)
  * Given an address space and start and length, compress the bytes into @pages
  * that are allocated on demand.
  *
+ * @type_level is encoded algorithm and level, where level 0 means whatever
+ * default the algorithm chooses and is opaque here;
+ * - compression algo are 0-3
+ * - the level are bits 4-7
+ *
  * @out_pages is an in/out parameter, holds maximum number of pages to allocate
  * and returns number of actually allocated pages
  *
@@ -880,7 +885,7 @@  static void free_workspaces(void)
  * @max_out tells us the max number of bytes that we're allowed to
  * stuff into pages
  */
-int btrfs_compress_pages(int type, struct address_space *mapping,
+int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
 			 u64 start, struct page **pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
@@ -888,9 +893,11 @@  int btrfs_compress_pages(int type, struct address_space *mapping,
 {
 	struct list_head *workspace;
 	int ret;
+	int type = type_level & 0xF;
 
 	workspace = find_workspace(type);
 
+	btrfs_compress_op[type - 1]->set_level(workspace, type_level);
 	ret = btrfs_compress_op[type-1]->compress_pages(workspace, mapping,
 						      start, pages,
 						      out_pages,
@@ -1047,3 +1054,14 @@  int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
 
 	return 1;
 }
+
+unsigned int btrfs_compress_str2level(const char *str)
+{
+	if (strncmp(str, "zlib", 4) != 0)
+		return 0;
+
+	if ('1' <= str[4] && str[4] <= '9' )
+		return str[4] - '0';
+
+	return 0;
+}
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 89bcf975efb8..8a6db02d8732 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -76,7 +76,7 @@  struct compressed_bio {
 void btrfs_init_compress(void);
 void btrfs_exit_compress(void);
 
-int btrfs_compress_pages(int type, struct address_space *mapping,
+int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping,
 			 u64 start, struct page **pages,
 			 unsigned long *out_pages,
 			 unsigned long *total_in,
@@ -95,6 +95,8 @@  int btrfs_submit_compressed_write(struct inode *inode, u64 start,
 int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
 				 int mirror_num, unsigned long bio_flags);
 
+unsigned btrfs_compress_str2level(const char *str);
+
 enum btrfs_compression_type {
 	BTRFS_COMPRESS_NONE  = 0,
 	BTRFS_COMPRESS_ZLIB  = 1,
@@ -124,6 +126,8 @@  struct btrfs_compress_op {
 			  struct page *dest_page,
 			  unsigned long start_byte,
 			  size_t srclen, size_t destlen);
+
+	void (*set_level)(struct list_head *ws, unsigned int type);
 };
 
 extern const struct btrfs_compress_op btrfs_zlib_compress;
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5bdd36664421..3393aed07132 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -791,6 +791,7 @@  struct btrfs_fs_info {
 	 */
 	unsigned long pending_changes;
 	unsigned long compress_type:4;
+	unsigned int compress_level;
 	int commit_interval;
 	/*
 	 * It is a suggestive number, the read side is safe even it gets a
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index eb495e956d53..a832bf8cebab 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -525,7 +525,10 @@  static noinline void compress_file_range(struct inode *inode,
 		 */
 		extent_range_clear_dirty_for_io(inode, start, end);
 		redirty = 1;
-		ret = btrfs_compress_pages(compress_type,
+
+		/* Compression level is applied here and only here */
+		ret = btrfs_compress_pages(
+			compress_type | (fs_info->compress_level << 4),
 					   inode->i_mapping, start,
 					   pages,
 					   &nr_pages,
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index d433e75d489a..6c7f18cd3b61 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -430,10 +430,15 @@  static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
 	return ret;
 }
 
+static void lzo_set_level(struct list_head *ws, unsigned int type)
+{
+}
+
 const struct btrfs_compress_op btrfs_lzo_compress = {
 	.alloc_workspace	= lzo_alloc_workspace,
 	.free_workspace		= lzo_free_workspace,
 	.compress_pages		= lzo_compress_pages,
 	.decompress_bio		= lzo_decompress_bio,
 	.decompress		= lzo_decompress,
+	.set_level		= lzo_set_level,
 };
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 147330454c17..25ffebb101c9 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -501,6 +501,7 @@  int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			    strncmp(args[0].from, "zlib", 4) == 0) {
 				compress_type = "zlib";
 				info->compress_type = BTRFS_COMPRESS_ZLIB;
+				info->compress_level = btrfs_compress_str2level(args[0].from);
 				btrfs_set_opt(info->mount_opt, COMPRESS);
 				btrfs_clear_opt(info->mount_opt, NODATACOW);
 				btrfs_clear_opt(info->mount_opt, NODATASUM);
@@ -540,9 +541,9 @@  int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 			      compress_force != saved_compress_force)) ||
 			    (!btrfs_test_opt(info, COMPRESS) &&
 			     no_compress == 1)) {
-				btrfs_info(info, "%s %s compression",
+				btrfs_info(info, "%s %s compression, level %d",
 					   (compress_force) ? "force" : "use",
-					   compress_type);
+					   compress_type, info->compress_level);
 			}
 			compress_force = false;
 			break;
@@ -1234,6 +1235,8 @@  static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 			seq_printf(seq, ",compress-force=%s", compress_type);
 		else
 			seq_printf(seq, ",compress=%s", compress_type);
+		if (info->compress_level)
+			seq_printf(seq, "%d", info->compress_level);
 	}
 	if (btrfs_test_opt(info, NOSSD))
 		seq_puts(seq, ",nossd");
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index c248f9286366..c890032e680f 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -37,6 +37,7 @@  struct workspace {
 	z_stream strm;
 	char *buf;
 	struct list_head list;
+	int level;
 };
 
 static void zlib_free_workspace(struct list_head *ws)
@@ -96,7 +97,7 @@  static int zlib_compress_pages(struct list_head *ws,
 	*total_out = 0;
 	*total_in = 0;
 
-	if (Z_OK != zlib_deflateInit(&workspace->strm, 3)) {
+	if (Z_OK != zlib_deflateInit(&workspace->strm, workspace->level)) {
 		pr_warn("BTRFS: deflateInit failed\n");
 		ret = -EIO;
 		goto out;
@@ -402,10 +403,19 @@  static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
 	return ret;
 }
 
+static void zlib_set_level(struct list_head *ws, unsigned int type)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	unsigned level = (type & 0xF0) >> 4;
+
+	workspace->level = level > 0 ? level : 3;
+}
+
 const struct btrfs_compress_op btrfs_zlib_compress = {
 	.alloc_workspace	= zlib_alloc_workspace,
 	.free_workspace		= zlib_free_workspace,
 	.compress_pages		= zlib_compress_pages,
 	.decompress_bio		= zlib_decompress_bio,
 	.decompress		= zlib_decompress,
+	.set_level              = zlib_set_level,
 };