[3/4] btrfs: Add zstd support
diff mbox

Message ID 20170622220139.2328978-3-terrelln@fb.com
State New
Headers show

Commit Message

Nick Terrell June 22, 2017, 10:01 p.m. UTC
Add zstd compression and decompression support to BtrFS. zstd at its
fastest level compresses almost as well as zlib, while offering much
faster compression and decompression, approaching lzo speeds.

I benchmarked btrfs with zstd compression against no compression, lzo
compression, and zlib compression. I benchmarked two scenarios. Copying
a set of files to btrfs, and then reading the files. Copying a tarball
to btrfs, extracting it to btrfs, and then reading the extracted files.
After every operation, I call `sync` and include the sync time.
Between every pair of operations I unmount and remount the filesystem
to avoid caching. The benchmark files can be found in the upstream
zstd source repository under
`contrib/linux-kernel/{btrfs-benchmark.sh,btrfs-extract-benchmark.sh}`
[1] [2].

I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM.
The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor,
16 GB of RAM, and a SSD.

The first compression benchmark is copying 10 copies of the unzipped
Silesia corpus [3] into a BtrFS filesystem mounted with
`-o compress-force=Method`. The decompression benchmark times how long
it takes to `tar` all 10 copies into `/dev/null`. The compression ratio is
measured by comparing the output of `df` and `du`. See the benchmark file
[1] for details. I benchmarked multiple zstd compression levels, although
the patch uses zstd level 1.

| Method  | Ratio | Compression MB/s | Decompression speed |
|---------|-------|------------------|---------------------|
| None    |  0.99 |              504 |                 686 |
| lzo     |  1.66 |              398 |                 442 |
| zlib    |  2.58 |               65 |                 241 |
| zstd 1  |  2.57 |              260 |                 383 |
| zstd 3  |  2.71 |              174 |                 408 |
| zstd 6  |  2.87 |               70 |                 398 |
| zstd 9  |  2.92 |               43 |                 406 |
| zstd 12 |  2.93 |               21 |                 408 |
| zstd 15 |  3.01 |               11 |                 354 |

The next benchmark first copies `linux-4.11.6.tar` [4] to btrfs. Then it
measures the compression ratio, extracts the tar, and deletes the tar.
Then it measures the compression ratio again, and `tar`s the extracted
files into `/dev/null`. See the benchmark file [2] for details.

| Method | Tar Ratio | Extract Ratio | Copy (s) | Extract (s)| Read (s) |
|--------|-----------|---------------|----------|------------|----------|
| None   |      0.97 |          0.78 |    0.981 |      5.501 |    8.807 |
| lzo    |      2.06 |          1.38 |    1.631 |      8.458 |    8.585 |
| zlib   |      3.40 |          1.86 |    7.750 |     21.544 |   11.744 |
| zstd 1 |      3.57 |          1.85 |    2.579 |     11.479 |    9.389 |

[1] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-benchmark.sh
[2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-extract-benchmark.sh
[3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia
[4] https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.11.6.tar.xz

zstd source repository: https://github.com/facebook/zstd

Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 fs/btrfs/Kconfig           |   2 +
 fs/btrfs/Makefile          |   2 +-
 fs/btrfs/compression.c     |   1 +
 fs/btrfs/compression.h     |   6 +-
 fs/btrfs/ctree.h           |   1 +
 fs/btrfs/disk-io.c         |   2 +
 fs/btrfs/ioctl.c           |   6 +-
 fs/btrfs/props.c           |   6 +
 fs/btrfs/super.c           |  12 +-
 fs/btrfs/sysfs.c           |   2 +
 fs/btrfs/zstd.c            | 433 +++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/btrfs.h |   8 +-
 12 files changed, 469 insertions(+), 12 deletions(-)
 create mode 100644 fs/btrfs/zstd.c

--
2.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

kernel test robot June 25, 2017, 3:02 p.m. UTC | #1
Hi Nick,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.12-rc6 next-20170623]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Nick-Terrell/lib-Add-xxhash-module/20170625-214344
config: blackfin-allyesconfig (attached as .config)
compiler: bfin-uclinux-gcc (GCC) 6.2.0
reproduce:
        wget https://raw.githubusercontent.com/01org/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # save the attached .config to linux build tree
        make.cross ARCH=blackfin 

All error/warnings (new ones prefixed by >>):

   lib//zstd/fse_compress.c: In function 'FSE_buildCTable_wksp':
>> lib//zstd/fse_compress.c:181:1: warning: the frame size of 1036 bytes is larger than 1024 bytes [-Wframe-larger-than=]
    }
    ^
   lib//zstd/fse_compress.c: In function 'FSE_compress_wksp':
   lib//zstd/fse_compress.c:857:1: warning: the frame size of 1552 bytes is larger than 1024 bytes [-Wframe-larger-than=]
    }
    ^
--
   lib//zstd/compress.c: In function 'ZSTD_compressBlock_lazy':
>> lib//zstd/compress.c:2036:1: error: unable to find a register to spill in class 'CCREGS'
    static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); }
    ^~~~~~
>> lib//zstd/compress.c:2036:1: error: this is the insn:
   (insn 213 11 1172 9 (set (reg:BI 1429)
           (eq:BI (reg/v:SI 62 [ mls ])
               (const_int 5 [0x5]))) lib//zstd/compress.c:1855 118 {compare_eq}
        (nil))
   lib//zstd/compress.c:2036: confused by earlier errors, bailing out
--
   lib//zstd/huf_decompress.c: In function 'HUF_readDTableX4':
>> lib//zstd/huf_decompress.c:556:1: warning: the frame size of 1636 bytes is larger than 1024 bytes [-Wframe-larger-than=]
    }
    ^

vim +/CCREGS +2036 lib//zstd/compress.c

87a5643e Nick Terrell 2017-06-22  2020  	/* Save reps for next block */
87a5643e Nick Terrell 2017-06-22  2021  	ctx->repToConfirm[0] = offset_1 ? offset_1 : savedOffset;
87a5643e Nick Terrell 2017-06-22  2022  	ctx->repToConfirm[1] = offset_2 ? offset_2 : savedOffset;
87a5643e Nick Terrell 2017-06-22  2023  
87a5643e Nick Terrell 2017-06-22  2024  	/* Last Literals */
87a5643e Nick Terrell 2017-06-22  2025  	{
87a5643e Nick Terrell 2017-06-22  2026  		size_t const lastLLSize = iend - anchor;
87a5643e Nick Terrell 2017-06-22  2027  		memcpy(seqStorePtr->lit, anchor, lastLLSize);
87a5643e Nick Terrell 2017-06-22  2028  		seqStorePtr->lit += lastLLSize;
87a5643e Nick Terrell 2017-06-22  2029  	}
87a5643e Nick Terrell 2017-06-22  2030  }
87a5643e Nick Terrell 2017-06-22  2031  
87a5643e Nick Terrell 2017-06-22  2032  static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2); }
87a5643e Nick Terrell 2017-06-22  2033  
87a5643e Nick Terrell 2017-06-22  2034  static void ZSTD_compressBlock_lazy2(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2); }
87a5643e Nick Terrell 2017-06-22  2035  
87a5643e Nick Terrell 2017-06-22 @2036  static void ZSTD_compressBlock_lazy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1); }
87a5643e Nick Terrell 2017-06-22  2037  
87a5643e Nick Terrell 2017-06-22  2038  static void ZSTD_compressBlock_greedy(ZSTD_CCtx *ctx, const void *src, size_t srcSize) { ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0); }
87a5643e Nick Terrell 2017-06-22  2039  
87a5643e Nick Terrell 2017-06-22  2040  FORCE_INLINE
87a5643e Nick Terrell 2017-06-22  2041  void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx *ctx, const void *src, size_t srcSize, const U32 searchMethod, const U32 depth)
87a5643e Nick Terrell 2017-06-22  2042  {
87a5643e Nick Terrell 2017-06-22  2043  	seqStore_t *seqStorePtr = &(ctx->seqStore);
87a5643e Nick Terrell 2017-06-22  2044  	const BYTE *const istart = (const BYTE *)src;

:::::: The code at line 2036 was first introduced by commit
:::::: 87a5643e3b02e4cb9fb83bf8f6da13be18677883 lib: Add zstd modules

:::::: TO: Nick Terrell <terrelln@fb.com>
:::::: CC: 0day robot <fengguang.wu@intel.com>

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
kernel test robot June 25, 2017, 7:03 p.m. UTC | #2
Hi Nick,

[auto build test ERROR on linus/master]
[also build test ERROR on v4.12-rc6]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url:    https://github.com/0day-ci/linux/commits/Nick-Terrell/lib-Add-xxhash-module/20170625-214344
config: i386-allmodconfig (attached as .config)
compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
reproduce:
        # save the attached .config to linux build tree
        make ARCH=i386 

All errors (new ones prefixed by >>):

>> ERROR: "__udivdi3" [lib/zstd/zstd_compress.ko] undefined!
   ERROR: "__udivdi3" [fs/ufs/ufs.ko] undefined!

---
0-DAY kernel test infrastructure                Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all                   Intel Corporation
Adam Borowski June 25, 2017, 9:30 p.m. UTC | #3
On Mon, Jun 26, 2017 at 03:03:17AM +0800, kbuild test robot wrote:
> Hi Nick,
> 
> url:    https://github.com/0day-ci/linux/commits/Nick-Terrell/lib-Add-xxhash-module/20170625-214344
> config: i386-allmodconfig (attached as .config)
> compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> reproduce:
>         # save the attached .config to linux build tree
>         make ARCH=i386 
> 
> All errors (new ones prefixed by >>):
> 
> >> ERROR: "__udivdi3" [lib/zstd/zstd_compress.ko] undefined!
>    ERROR: "__udivdi3" [fs/ufs/ufs.ko] undefined!

Just to save you time to figure it out:
for division when one or both arguments are longer than the architecture's
word, gcc uses helper functions that are included when compiling in a hosted
environment -- but not in freestanding.

Thus, you want do_div() instead of /; do check widths and signedness of
arguments.


Meow!
David Sterba June 26, 2017, 12:12 p.m. UTC | #4
On Sun, Jun 25, 2017 at 11:30:22PM +0200, Adam Borowski wrote:
> On Mon, Jun 26, 2017 at 03:03:17AM +0800, kbuild test robot wrote:
> > Hi Nick,
> > 
> > url:    https://github.com/0day-ci/linux/commits/Nick-Terrell/lib-Add-xxhash-module/20170625-214344
> > config: i386-allmodconfig (attached as .config)
> > compiler: gcc-6 (Debian 6.2.0-3) 6.2.0 20160901
> > reproduce:
> >         # save the attached .config to linux build tree
> >         make ARCH=i386 
> > 
> > All errors (new ones prefixed by >>):
> > 
> > >> ERROR: "__udivdi3" [lib/zstd/zstd_compress.ko] undefined!
> >    ERROR: "__udivdi3" [fs/ufs/ufs.ko] undefined!
> 
> Just to save you time to figure it out:
> for division when one or both arguments are longer than the architecture's
> word, gcc uses helper functions that are included when compiling in a hosted
> environment -- but not in freestanding.
> 
> Thus, you want do_div() instead of /; do check widths and signedness of
> arguments.

No do_div please, div_u64 or div64_u64.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Patch
diff mbox

diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 80e9c18..a26c63b 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -6,6 +6,8 @@  config BTRFS_FS
 	select ZLIB_DEFLATE
 	select LZO_COMPRESS
 	select LZO_DECOMPRESS
+	select ZSTD_COMPRESS
+	select ZSTD_DECOMPRESS
 	select RAID6_PQ
 	select XOR_BLOCKS
 	select SRCU
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index 128ce17..962a95a 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -6,7 +6,7 @@  btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
 	   transaction.o inode.o file.o tree-defrag.o \
 	   extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \
 	   extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
-	   export.o tree-log.o free-space-cache.o zlib.o lzo.o \
+	   export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \
 	   compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
 	   reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
 	   uuid-tree.o props.o hash.o free-space-tree.o
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 10e6b28..3beb0d0 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -761,6 +761,7 @@  static struct {
 static const struct btrfs_compress_op * const btrfs_compress_op[] = {
 	&btrfs_zlib_compress,
 	&btrfs_lzo_compress,
+	&btrfs_zstd_compress,
 };

 void __init btrfs_init_compress(void)
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 39ec43a..d99fc21 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -60,8 +60,9 @@  enum btrfs_compression_type {
 	BTRFS_COMPRESS_NONE  = 0,
 	BTRFS_COMPRESS_ZLIB  = 1,
 	BTRFS_COMPRESS_LZO   = 2,
-	BTRFS_COMPRESS_TYPES = 2,
-	BTRFS_COMPRESS_LAST  = 3,
+	BTRFS_COMPRESS_ZSTD  = 3,
+	BTRFS_COMPRESS_TYPES = 3,
+	BTRFS_COMPRESS_LAST  = 4,
 };

 struct btrfs_compress_op {
@@ -92,5 +93,6 @@  struct btrfs_compress_op {

 extern const struct btrfs_compress_op btrfs_zlib_compress;
 extern const struct btrfs_compress_op btrfs_lzo_compress;
+extern const struct btrfs_compress_op btrfs_zstd_compress;

 #endif
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 4f8f75d..61dd3dd 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -271,6 +271,7 @@  struct btrfs_super_block {
 	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS |		\
 	 BTRFS_FEATURE_INCOMPAT_BIG_METADATA |		\
 	 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO |		\
+	 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD |		\
 	 BTRFS_FEATURE_INCOMPAT_RAID56 |		\
 	 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF |		\
 	 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA |	\
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5f678dc..49c0e91 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2831,6 +2831,8 @@  int open_ctree(struct super_block *sb,
 	features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF;
 	if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
 		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+	else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD)
+		features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD;

 	if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
 		btrfs_info(fs_info, "has skinny extents");
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index e176375..f732cfd 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -327,8 +327,10 @@  static int btrfs_ioctl_setflags(struct file *file, void __user *arg)

 		if (fs_info->compress_type == BTRFS_COMPRESS_LZO)
 			comp = "lzo";
-		else
+		else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB)
 			comp = "zlib";
+		else
+			comp = "zstd";
 		ret = btrfs_set_prop(inode, "btrfs.compression",
 				     comp, strlen(comp), 0);
 		if (ret)
@@ -1463,6 +1465,8 @@  int btrfs_defrag_file(struct inode *inode, struct file *file,

 	if (range->compress_type == BTRFS_COMPRESS_LZO) {
 		btrfs_set_fs_incompat(fs_info, COMPRESS_LZO);
+	} else if (range->compress_type == BTRFS_COMPRESS_ZSTD) {
+		btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD);
 	}

 	ret = defrag_count;
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index d6cb155..162105f 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -383,6 +383,8 @@  static int prop_compression_validate(const char *value, size_t len)
 		return 0;
 	else if (!strncmp("zlib", value, len))
 		return 0;
+	else if (!strncmp("zstd", value, len))
+		return 0;

 	return -EINVAL;
 }
@@ -405,6 +407,8 @@  static int prop_compression_apply(struct inode *inode,
 		type = BTRFS_COMPRESS_LZO;
 	else if (!strncmp("zlib", value, len))
 		type = BTRFS_COMPRESS_ZLIB;
+	else if (!strncmp("zstd", value, len))
+		type = BTRFS_COMPRESS_ZSTD;
 	else
 		return -EINVAL;

@@ -422,6 +426,8 @@  static const char *prop_compression_extract(struct inode *inode)
 		return "zlib";
 	case BTRFS_COMPRESS_LZO:
 		return "lzo";
+	case BTRFS_COMPRESS_ZSTD:
+		return "zstd";
 	}

 	return NULL;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 4f1cdd5..4f792d5 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -513,6 +513,14 @@  int btrfs_parse_options(struct btrfs_fs_info *info, char *options,
 				btrfs_clear_opt(info->mount_opt, NODATASUM);
 				btrfs_set_fs_incompat(info, COMPRESS_LZO);
 				no_compress = 0;
+			} else if (strcmp(args[0].from, "zstd") == 0) {
+				compress_type = "zstd";
+				info->compress_type = BTRFS_COMPRESS_ZSTD;
+				btrfs_set_opt(info->mount_opt, COMPRESS);
+				btrfs_clear_opt(info->mount_opt, NODATACOW);
+				btrfs_clear_opt(info->mount_opt, NODATASUM);
+				btrfs_set_fs_incompat(info, COMPRESS_ZSTD);
+				no_compress = 0;
 			} else if (strncmp(args[0].from, "no", 2) == 0) {
 				compress_type = "no";
 				btrfs_clear_opt(info->mount_opt, COMPRESS);
@@ -1240,8 +1248,10 @@  static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
 	if (btrfs_test_opt(info, COMPRESS)) {
 		if (info->compress_type == BTRFS_COMPRESS_ZLIB)
 			compress_type = "zlib";
-		else
+		else if (info->compress_type == BTRFS_COMPRESS_LZO)
 			compress_type = "lzo";
+		else
+			compress_type = "zstd";
 		if (btrfs_test_opt(info, FORCE_COMPRESS))
 			seq_printf(seq, ",compress-force=%s", compress_type);
 		else
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index 1f157fb..b0dec90 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -200,6 +200,7 @@  BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF);
 BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL);
 BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS);
 BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO);
+BTRFS_FEAT_ATTR_INCOMPAT(compress_zstd, COMPRESS_ZSTD);
 BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA);
 BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF);
 BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56);
@@ -212,6 +213,7 @@  static struct attribute *btrfs_supported_feature_attrs[] = {
 	BTRFS_FEAT_ATTR_PTR(default_subvol),
 	BTRFS_FEAT_ATTR_PTR(mixed_groups),
 	BTRFS_FEAT_ATTR_PTR(compress_lzo),
+	BTRFS_FEAT_ATTR_PTR(compress_zstd),
 	BTRFS_FEAT_ATTR_PTR(big_metadata),
 	BTRFS_FEAT_ATTR_PTR(extended_iref),
 	BTRFS_FEAT_ATTR_PTR(raid56),
diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c
new file mode 100644
index 0000000..838741b
--- /dev/null
+++ b/fs/btrfs/zstd.c
@@ -0,0 +1,433 @@ 
+/*
+ * Copyright (c) 2016-present, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/init.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/pagemap.h>
+#include <linux/bio.h>
+#include <linux/zstd.h>
+#include "compression.h"
+
+#define ZSTD_BTRFS_MAX_WINDOWLOG 17
+#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG)
+
+static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len)
+{
+	ZSTD_parameters params = ZSTD_getParams(1, src_len, 0);
+
+	if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG)
+		params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG;
+	WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT);
+	return params;
+}
+
+struct workspace {
+	void *mem;
+	size_t size;
+	char *buf;
+	struct list_head list;
+};
+
+static void zstd_free_workspace(struct list_head *ws)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+
+	vfree(workspace->mem);
+	kfree(workspace->buf);
+	kfree(workspace);
+}
+
+static struct list_head *zstd_alloc_workspace(void)
+{
+	ZSTD_parameters params =
+			zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT);
+	struct workspace *workspace;
+
+	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
+	if (!workspace)
+		return ERR_PTR(-ENOMEM);
+
+	workspace->size = max_t(size_t,
+			ZSTD_CStreamWorkspaceBound(params.cParams),
+			ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT));
+	workspace->mem = vmalloc(workspace->size);
+	workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS);
+	if (!workspace->mem || !workspace->buf)
+		goto fail;
+
+	INIT_LIST_HEAD(&workspace->list);
+
+	return &workspace->list;
+fail:
+	zstd_free_workspace(&workspace->list);
+	return ERR_PTR(-ENOMEM);
+}
+
+static int zstd_compress_pages(struct list_head *ws,
+		struct address_space *mapping,
+		u64 start,
+		struct page **pages,
+		unsigned long *out_pages,
+		unsigned long *total_in,
+		unsigned long *total_out)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	ZSTD_CStream *stream;
+	int ret = 0;
+	int nr_pages = 0;
+	struct page *in_page = NULL;  /* The current page to read */
+	struct page *out_page = NULL; /* The current page to write to */
+	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
+	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
+	unsigned long tot_in = 0;
+	unsigned long tot_out = 0;
+	unsigned long len = *total_out;
+	const unsigned long nr_dest_pages = *out_pages;
+	unsigned long max_out = nr_dest_pages * PAGE_SIZE;
+	ZSTD_parameters params = zstd_get_btrfs_parameters(len);
+
+	*out_pages = 0;
+	*total_out = 0;
+	*total_in = 0;
+
+	/* Initialize the stream */
+	stream = ZSTD_initCStream(params, len, workspace->mem,
+			workspace->size);
+	if (!stream) {
+		pr_warn("BTRFS: ZSTD_initCStream failed\n");
+		ret = -EIO;
+		goto out;
+	}
+
+	/* map in the first page of input data */
+	in_page = find_get_page(mapping, start >> PAGE_SHIFT);
+	in_buf.src = kmap(in_page);
+	in_buf.pos = 0;
+	in_buf.size = min_t(size_t, len, PAGE_SIZE);
+
+
+	/* Allocate and map in the output buffer */
+	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+	if (out_page == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+	pages[nr_pages++] = out_page;
+	out_buf.dst = kmap(out_page);
+	out_buf.pos = 0;
+	out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+
+	while (1) {
+		size_t ret2;
+
+		ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf);
+		if (ZSTD_isError(ret2)) {
+			pr_debug("BTRFS: ZSTD_compressStream returned %d\n",
+					ZSTD_getErrorCode(ret2));
+			ret = -EIO;
+			goto out;
+		}
+
+		/* Check to see if we are making it bigger */
+		if (tot_in + in_buf.pos > 8192 &&
+				tot_in + in_buf.pos <
+				tot_out + out_buf.pos) {
+			ret = -E2BIG;
+			goto out;
+		}
+
+		/* We've reached the end of our output range */
+		if (out_buf.pos >= max_out) {
+			tot_out += out_buf.pos;
+			ret = -E2BIG;
+			goto out;
+		}
+
+		/* Check if we need more output space */
+		if (out_buf.pos == out_buf.size) {
+			tot_out += PAGE_SIZE;
+			max_out -= PAGE_SIZE;
+			kunmap(out_page);
+			if (nr_pages == nr_dest_pages) {
+				out_page = NULL;
+				ret = -E2BIG;
+				goto out;
+			}
+			out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+			if (out_page == NULL) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			pages[nr_pages++] = out_page;
+			out_buf.dst = kmap(out_page);
+			out_buf.pos = 0;
+			out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+		}
+
+		/* We've reached the end of the input */
+		if (in_buf.pos >= len) {
+			tot_in += in_buf.pos;
+			break;
+		}
+
+		/* Check if we need more input */
+		if (in_buf.pos == in_buf.size) {
+			tot_in += PAGE_SIZE;
+			kunmap(in_page);
+			put_page(in_page);
+
+			start += PAGE_SIZE;
+			len -= PAGE_SIZE;
+			in_page = find_get_page(mapping, start >> PAGE_SHIFT);
+			in_buf.src = kmap(in_page);
+			in_buf.pos = 0;
+			in_buf.size = min_t(size_t, len, PAGE_SIZE);
+		}
+	}
+	while (1) {
+		size_t ret2;
+
+		ret2 = ZSTD_endStream(stream, &out_buf);
+		if (ZSTD_isError(ret2)) {
+			pr_debug("BTRFS: ZSTD_endStream returned %d\n",
+					ZSTD_getErrorCode(ret2));
+			ret = -EIO;
+			goto out;
+		}
+		if (ret2 == 0) {
+			tot_out += out_buf.pos;
+			break;
+		}
+		if (out_buf.pos >= max_out) {
+			tot_out += out_buf.pos;
+			ret = -E2BIG;
+			goto out;
+		}
+
+		tot_out += PAGE_SIZE;
+		max_out -= PAGE_SIZE;
+		kunmap(out_page);
+		if (nr_pages == nr_dest_pages) {
+			out_page = NULL;
+			ret = -E2BIG;
+			goto out;
+		}
+		out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
+		if (out_page == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		pages[nr_pages++] = out_page;
+		out_buf.dst = kmap(out_page);
+		out_buf.pos = 0;
+		out_buf.size = min_t(size_t, max_out, PAGE_SIZE);
+	}
+
+	if (tot_out >= tot_in) {
+		ret = -E2BIG;
+		goto out;
+	}
+
+	ret = 0;
+	*total_in = tot_in;
+	*total_out = tot_out;
+out:
+	*out_pages = nr_pages;
+	/* Cleanup */
+	if (in_page) {
+		kunmap(in_page);
+		put_page(in_page);
+	}
+	if (out_page)
+		kunmap(out_page);
+	return ret;
+}
+
+static int zstd_decompress_bio(struct list_head *ws, struct page **pages_in,
+		u64 disk_start,
+		struct bio *orig_bio,
+		size_t srclen)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	ZSTD_DStream *stream;
+	int ret = 0;
+	unsigned long page_in_index = 0;
+	unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
+	unsigned long buf_start;
+	unsigned long total_out = 0;
+	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
+	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
+
+	stream = ZSTD_initDStream(
+			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
+	if (!stream) {
+		pr_debug("BTRFS: ZSTD_initDStream failed\n");
+		ret = -EIO;
+		goto done;
+	}
+
+	in_buf.src = kmap(pages_in[page_in_index]);
+	in_buf.pos = 0;
+	in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+
+	out_buf.dst = workspace->buf;
+	out_buf.pos = 0;
+	out_buf.size = PAGE_SIZE;
+
+	while (1) {
+		size_t ret2;
+
+		ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
+		if (ZSTD_isError(ret2)) {
+			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
+					ZSTD_getErrorCode(ret2));
+			ret = -EIO;
+			goto done;
+		}
+		buf_start = total_out;
+		total_out += out_buf.pos;
+		out_buf.pos = 0;
+
+		ret = btrfs_decompress_buf2page(out_buf.dst, buf_start,
+				total_out, disk_start, orig_bio);
+		if (ret == 0)
+			break;
+
+		if (in_buf.pos >= srclen)
+			break;
+
+		/* Check if we've hit the end of a frame */
+		if (ret2 == 0)
+			break;
+
+		if (in_buf.pos == in_buf.size) {
+			kunmap(pages_in[page_in_index++]);
+			if (page_in_index >= total_pages_in) {
+				in_buf.src = NULL;
+				ret = -EIO;
+				goto done;
+			}
+			srclen -= PAGE_SIZE;
+			in_buf.src = kmap(pages_in[page_in_index]);
+			in_buf.pos = 0;
+			in_buf.size = min_t(size_t, srclen, PAGE_SIZE);
+		}
+	}
+	ret = 0;
+	zero_fill_bio(orig_bio);
+done:
+	if (in_buf.src)
+		kunmap(pages_in[page_in_index]);
+	return ret;
+}
+
+static int zstd_decompress(struct list_head *ws, unsigned char *data_in,
+		struct page *dest_page,
+		unsigned long start_byte,
+		size_t srclen, size_t destlen)
+{
+	struct workspace *workspace = list_entry(ws, struct workspace, list);
+	ZSTD_DStream *stream;
+	int ret = 0;
+	size_t ret2;
+	ZSTD_inBuffer in_buf = { NULL, 0, 0 };
+	ZSTD_outBuffer out_buf = { NULL, 0, 0 };
+	unsigned long total_out = 0;
+	unsigned long pg_offset = 0;
+	char *kaddr;
+
+	stream = ZSTD_initDStream(
+			ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
+	if (!stream) {
+		pr_warn("BTRFS: ZSTD_initDStream failed\n");
+		ret = -EIO;
+		goto finish;
+	}
+
+	destlen = min_t(size_t, destlen, PAGE_SIZE);
+
+	in_buf.src = data_in;
+	in_buf.pos = 0;
+	in_buf.size = srclen;
+
+	out_buf.dst = workspace->buf;
+	out_buf.pos = 0;
+	out_buf.size = PAGE_SIZE;
+
+	ret2 = 1;
+	while (pg_offset < destlen && in_buf.pos < in_buf.size) {
+		unsigned long buf_start;
+		unsigned long buf_offset;
+		unsigned long bytes;
+
+		/* Check if the frame is over and we still need more input */
+		if (ret2 == 0) {
+			pr_debug("BTRFS: ZSTD_decompressStream ended early\n");
+			ret = -EIO;
+			goto finish;
+		}
+		ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf);
+		if (ZSTD_isError(ret2)) {
+			pr_debug("BTRFS: ZSTD_decompressStream returned %d\n",
+					ZSTD_getErrorCode(ret2));
+			ret = -EIO;
+			goto finish;
+		}
+
+		buf_start = total_out;
+		total_out += out_buf.pos;
+		out_buf.pos = 0;
+
+		if (total_out <= start_byte)
+			continue;
+
+		if (total_out > start_byte && buf_start < start_byte)
+			buf_offset = start_byte - buf_start;
+		else
+			buf_offset = 0;
+
+		bytes = min_t(unsigned long, destlen - pg_offset,
+				out_buf.size - buf_offset);
+
+		kaddr = kmap_atomic(dest_page);
+		memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes);
+		kunmap_atomic(kaddr);
+
+		pg_offset += bytes;
+	}
+	ret = 0;
+finish:
+	if (pg_offset < destlen) {
+		kaddr = kmap_atomic(dest_page);
+		memset(kaddr + pg_offset, 0, destlen - pg_offset);
+		kunmap_atomic(kaddr);
+	}
+	return ret;
+}
+
+const struct btrfs_compress_op btrfs_zstd_compress = {
+	.alloc_workspace = zstd_alloc_workspace,
+	.free_workspace = zstd_free_workspace,
+	.compress_pages = zstd_compress_pages,
+	.decompress_bio = zstd_decompress_bio,
+	.decompress = zstd_decompress,
+};
diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h
index a456e53..992c150 100644
--- a/include/uapi/linux/btrfs.h
+++ b/include/uapi/linux/btrfs.h
@@ -255,13 +255,7 @@  struct btrfs_ioctl_fs_info_args {
 #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
 #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
 #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO	(1ULL << 3)
-/*
- * some patches floated around with a second compression method
- * lets save that incompat here for when they do get in
- * Note we don't actually support it, we're just reserving the
- * number
- */
-#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2	(1ULL << 4)
+#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD	(1ULL << 4)

 /*
  * older kernels tried to do bigger metadata blocks, but the