| From 8a9dddfbf6551afea73911e367dd4be64d62b9fd Mon Sep 17 00:00:00 2001 |
| From: Nick Terrell <terrelln@fb.com> |
| Date: Mon, 17 Jul 2017 17:08:39 -0700 |
| Subject: [PATCH v5 3/5] btrfs: Add zstd support |
| |
| Add zstd compression and decompression support to BtrFS. zstd at its |
| fastest level compresses almost as well as zlib, while offering much |
| faster compression and decompression, approaching lzo speeds. |
| |
| I benchmarked btrfs with zstd compression against no compression, lzo |
| compression, and zlib compression. I benchmarked two scenarios. Copying |
| a set of files to btrfs, and then reading the files. Copying a tarball |
| to btrfs, extracting it to btrfs, and then reading the extracted files. |
| After every operation, I call `sync` and include the sync time. |
| Between every pair of operations I unmount and remount the filesystem |
| to avoid caching. The benchmark files can be found in the upstream |
| zstd source repository under |
| `contrib/linux-kernel/{btrfs-benchmark.sh,btrfs-extract-benchmark.sh}` |
| [1] [2]. |
| |
| I ran the benchmarks on a Ubuntu 14.04 VM with 2 cores and 4 GiB of RAM. |
| The VM is running on a MacBook Pro with a 3.1 GHz Intel Core i7 processor, |
| 16 GB of RAM, and a SSD. |
| |
| The first compression benchmark is copying 10 copies of the unzipped |
| Silesia corpus [3] into a BtrFS filesystem mounted with |
| `-o compress-force=Method`. The decompression benchmark times how long |
| it takes to `tar` all 10 copies into `/dev/null`. The compression ratio is |
| measured by comparing the output of `df` and `du`. See the benchmark file |
| [1] for details. I benchmarked multiple zstd compression levels, although |
| the patch uses zstd level 1. |
| |
| | Method | Ratio | Compression MB/s | Decompression speed | |
| |---------|-------|------------------|---------------------| |
| | None | 0.99 | 504 | 686 | |
| | lzo | 1.66 | 398 | 442 | |
| | zlib | 2.58 | 65 | 241 | |
| | zstd 1 | 2.57 | 260 | 383 | |
| | zstd 3 | 2.71 | 174 | 408 | |
| | zstd 6 | 2.87 | 70 | 398 | |
| | zstd 9 | 2.92 | 43 | 406 | |
| | zstd 12 | 2.93 | 21 | 408 | |
| | zstd 15 | 3.01 | 11 | 354 | |
| |
| The next benchmark first copies `linux-4.11.6.tar` [4] to btrfs. Then it |
| measures the compression ratio, extracts the tar, and deletes the tar. |
| Then it measures the compression ratio again, and `tar`s the extracted |
| files into `/dev/null`. See the benchmark file [2] for details. |
| |
| | Method | Tar Ratio | Extract Ratio | Copy (s) | Extract (s)| Read (s) | |
| |--------|-----------|---------------|----------|------------|----------| |
| | None | 0.97 | 0.78 | 0.981 | 5.501 | 8.807 | |
| | lzo | 2.06 | 1.38 | 1.631 | 8.458 | 8.585 | |
| | zlib | 3.40 | 1.86 | 7.750 | 21.544 | 11.744 | |
| | zstd 1 | 3.57 | 1.85 | 2.579 | 11.479 | 9.389 | |
| |
| [1] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-benchmark.sh |
| [2] https://github.com/facebook/zstd/blob/dev/contrib/linux-kernel/btrfs-extract-benchmark.sh |
| [3] http://sun.aei.polsl.pl/~sdeor/index.php?page=silesia |
| [4] https://cdn.kernel.org/pub/linux/kernel/v4.x/linux-4.11.6.tar.xz |
| |
| zstd source repository: https://github.com/facebook/zstd |
| |
| Signed-off-by: Nick Terrell <terrelln@fb.com> |
| --- |
| v2 -> v3: |
| - Port upstream BtrFS commits e1ddce71d6, 389a6cfc2a, and 6acafd1eff |
| - Change default compression level for BtrFS to 3 |
| |
| v3 -> v4: |
| - Add missing includes, which fixes the aarch64 build |
| - Fix minor linter warnings |
| |
| fs/btrfs/Kconfig | 2 + |
| fs/btrfs/Makefile | 2 +- |
| fs/btrfs/compression.c | 1 + |
| fs/btrfs/compression.h | 6 +- |
| fs/btrfs/ctree.h | 1 + |
| fs/btrfs/disk-io.c | 2 + |
| fs/btrfs/ioctl.c | 6 +- |
| fs/btrfs/props.c | 6 + |
| fs/btrfs/super.c | 12 +- |
| fs/btrfs/sysfs.c | 2 + |
| fs/btrfs/zstd.c | 432 +++++++++++++++++++++++++++++++++++++++++++++ |
| include/uapi/linux/btrfs.h | 8 +- |
| 12 files changed, 468 insertions(+), 12 deletions(-) |
| create mode 100644 fs/btrfs/zstd.c |
| |
| diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig |
| index 80e9c18..a26c63b 100644 |
| --- a/fs/btrfs/Kconfig |
| +++ b/fs/btrfs/Kconfig |
| @@ -6,6 +6,8 @@ config BTRFS_FS |
| select ZLIB_DEFLATE |
| select LZO_COMPRESS |
| select LZO_DECOMPRESS |
| + select ZSTD_COMPRESS |
| + select ZSTD_DECOMPRESS |
| select RAID6_PQ |
| select XOR_BLOCKS |
| select SRCU |
| diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile |
| index 128ce17..962a95a 100644 |
| --- a/fs/btrfs/Makefile |
| +++ b/fs/btrfs/Makefile |
| @@ -6,7 +6,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ |
| transaction.o inode.o file.o tree-defrag.o \ |
| extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
| extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
| - export.o tree-log.o free-space-cache.o zlib.o lzo.o \ |
| + export.o tree-log.o free-space-cache.o zlib.o lzo.o zstd.o \ |
| compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \ |
| reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ |
| uuid-tree.o props.o hash.o free-space-tree.o |
| diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c |
| index d2ef9ac..4ff42d1 100644 |
| --- a/fs/btrfs/compression.c |
| +++ b/fs/btrfs/compression.c |
| @@ -704,6 +704,7 @@ static struct { |
| static const struct btrfs_compress_op * const btrfs_compress_op[] = { |
| &btrfs_zlib_compress, |
| &btrfs_lzo_compress, |
| + &btrfs_zstd_compress, |
| }; |
| |
| void __init btrfs_init_compress(void) |
| diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h |
| index 87f6d33..2269e00 100644 |
| --- a/fs/btrfs/compression.h |
| +++ b/fs/btrfs/compression.h |
| @@ -99,8 +99,9 @@ enum btrfs_compression_type { |
| BTRFS_COMPRESS_NONE = 0, |
| BTRFS_COMPRESS_ZLIB = 1, |
| BTRFS_COMPRESS_LZO = 2, |
| - BTRFS_COMPRESS_TYPES = 2, |
| - BTRFS_COMPRESS_LAST = 3, |
| + BTRFS_COMPRESS_ZSTD = 3, |
| + BTRFS_COMPRESS_TYPES = 3, |
| + BTRFS_COMPRESS_LAST = 4, |
| }; |
| |
| struct btrfs_compress_op { |
| @@ -128,5 +129,6 @@ struct btrfs_compress_op { |
| |
| extern const struct btrfs_compress_op btrfs_zlib_compress; |
| extern const struct btrfs_compress_op btrfs_lzo_compress; |
| +extern const struct btrfs_compress_op btrfs_zstd_compress; |
| |
| #endif |
| diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h |
| index 3f3eb7b..845d77c 100644 |
| --- a/fs/btrfs/ctree.h |
| +++ b/fs/btrfs/ctree.h |
| @@ -270,6 +270,7 @@ struct btrfs_super_block { |
| BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ |
| BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ |
| BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ |
| + BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ |
| BTRFS_FEATURE_INCOMPAT_RAID56 | \ |
| BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ |
| BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ |
| diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c |
| index 080e2eb..04632f4 100644 |
| --- a/fs/btrfs/disk-io.c |
| +++ b/fs/btrfs/disk-io.c |
| @@ -2828,6 +2828,8 @@ int open_ctree(struct super_block *sb, |
| features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
| if (fs_info->compress_type == BTRFS_COMPRESS_LZO) |
| features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
| + else if (fs_info->compress_type == BTRFS_COMPRESS_ZSTD) |
| + features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD; |
| |
| if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA) |
| btrfs_info(fs_info, "has skinny extents"); |
| diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c |
| index fa1b78c..b9963d9 100644 |
| --- a/fs/btrfs/ioctl.c |
| +++ b/fs/btrfs/ioctl.c |
| @@ -327,8 +327,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg) |
| |
| if (fs_info->compress_type == BTRFS_COMPRESS_LZO) |
| comp = "lzo"; |
| - else |
| + else if (fs_info->compress_type == BTRFS_COMPRESS_ZLIB) |
| comp = "zlib"; |
| + else |
| + comp = "zstd"; |
| ret = btrfs_set_prop(inode, "btrfs.compression", |
| comp, strlen(comp), 0); |
| if (ret) |
| @@ -1466,6 +1468,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, |
| |
| if (range->compress_type == BTRFS_COMPRESS_LZO) { |
| btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); |
| + } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { |
| + btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); |
| } |
| |
| ret = defrag_count; |
| diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c |
| index 4b23ae5..20631e9 100644 |
| --- a/fs/btrfs/props.c |
| +++ b/fs/btrfs/props.c |
| @@ -390,6 +390,8 @@ static int prop_compression_validate(const char *value, size_t len) |
| return 0; |
| else if (!strncmp("zlib", value, len)) |
| return 0; |
| + else if (!strncmp("zstd", value, len)) |
| + return 0; |
| |
| return -EINVAL; |
| } |
| @@ -412,6 +414,8 @@ static int prop_compression_apply(struct inode *inode, |
| type = BTRFS_COMPRESS_LZO; |
| else if (!strncmp("zlib", value, len)) |
| type = BTRFS_COMPRESS_ZLIB; |
| + else if (!strncmp("zstd", value, len)) |
| + type = BTRFS_COMPRESS_ZSTD; |
| else |
| return -EINVAL; |
| |
| @@ -429,6 +433,8 @@ static const char *prop_compression_extract(struct inode *inode) |
| return "zlib"; |
| case BTRFS_COMPRESS_LZO: |
| return "lzo"; |
| + case BTRFS_COMPRESS_ZSTD: |
| + return "zstd"; |
| } |
| |
| return NULL; |
| diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c |
| index 12540b6..c370dea 100644 |
| --- a/fs/btrfs/super.c |
| +++ b/fs/btrfs/super.c |
| @@ -513,6 +513,14 @@ int btrfs_parse_options(struct btrfs_fs_info *info, char *options, |
| btrfs_clear_opt(info->mount_opt, NODATASUM); |
| btrfs_set_fs_incompat(info, COMPRESS_LZO); |
| no_compress = 0; |
| + } else if (strcmp(args[0].from, "zstd") == 0) { |
| + compress_type = "zstd"; |
| + info->compress_type = BTRFS_COMPRESS_ZSTD; |
| + btrfs_set_opt(info->mount_opt, COMPRESS); |
| + btrfs_clear_opt(info->mount_opt, NODATACOW); |
| + btrfs_clear_opt(info->mount_opt, NODATASUM); |
| + btrfs_set_fs_incompat(info, COMPRESS_ZSTD); |
| + no_compress = 0; |
| } else if (strncmp(args[0].from, "no", 2) == 0) { |
| compress_type = "no"; |
| btrfs_clear_opt(info->mount_opt, COMPRESS); |
| @@ -1227,8 +1235,10 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) |
| if (btrfs_test_opt(info, COMPRESS)) { |
| if (info->compress_type == BTRFS_COMPRESS_ZLIB) |
| compress_type = "zlib"; |
| - else |
| + else if (info->compress_type == BTRFS_COMPRESS_LZO) |
| compress_type = "lzo"; |
| + else |
| + compress_type = "zstd"; |
| if (btrfs_test_opt(info, FORCE_COMPRESS)) |
| seq_printf(seq, ",compress-force=%s", compress_type); |
| else |
| diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c |
| index c2d5f35..2b6d37c 100644 |
| --- a/fs/btrfs/sysfs.c |
| +++ b/fs/btrfs/sysfs.c |
| @@ -200,6 +200,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(mixed_backref, MIXED_BACKREF); |
| BTRFS_FEAT_ATTR_INCOMPAT(default_subvol, DEFAULT_SUBVOL); |
| BTRFS_FEAT_ATTR_INCOMPAT(mixed_groups, MIXED_GROUPS); |
| BTRFS_FEAT_ATTR_INCOMPAT(compress_lzo, COMPRESS_LZO); |
| +BTRFS_FEAT_ATTR_INCOMPAT(compress_zstd, COMPRESS_ZSTD); |
| BTRFS_FEAT_ATTR_INCOMPAT(big_metadata, BIG_METADATA); |
| BTRFS_FEAT_ATTR_INCOMPAT(extended_iref, EXTENDED_IREF); |
| BTRFS_FEAT_ATTR_INCOMPAT(raid56, RAID56); |
| @@ -212,6 +213,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { |
| BTRFS_FEAT_ATTR_PTR(default_subvol), |
| BTRFS_FEAT_ATTR_PTR(mixed_groups), |
| BTRFS_FEAT_ATTR_PTR(compress_lzo), |
| + BTRFS_FEAT_ATTR_PTR(compress_zstd), |
| BTRFS_FEAT_ATTR_PTR(big_metadata), |
| BTRFS_FEAT_ATTR_PTR(extended_iref), |
| BTRFS_FEAT_ATTR_PTR(raid56), |
| diff --git a/fs/btrfs/zstd.c b/fs/btrfs/zstd.c |
| new file mode 100644 |
| index 0000000..607ce47 |
| --- /dev/null |
| +++ b/fs/btrfs/zstd.c |
| @@ -0,0 +1,432 @@ |
| +/* |
| + * Copyright (c) 2016-present, Facebook, Inc. |
| + * All rights reserved. |
| + * |
| + * This program is free software; you can redistribute it and/or |
| + * modify it under the terms of the GNU General Public |
| + * License v2 as published by the Free Software Foundation. |
| + * |
| + * This program is distributed in the hope that it will be useful, |
| + * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + * General Public License for more details. |
| + */ |
| +#include <linux/bio.h> |
| +#include <linux/err.h> |
| +#include <linux/init.h> |
| +#include <linux/kernel.h> |
| +#include <linux/mm.h> |
| +#include <linux/pagemap.h> |
| +#include <linux/refcount.h> |
| +#include <linux/sched.h> |
| +#include <linux/slab.h> |
| +#include <linux/zstd.h> |
| +#include "compression.h" |
| + |
| +#define ZSTD_BTRFS_MAX_WINDOWLOG 17 |
| +#define ZSTD_BTRFS_MAX_INPUT (1 << ZSTD_BTRFS_MAX_WINDOWLOG) |
| +#define ZSTD_BTRFS_DEFAULT_LEVEL 3 |
| + |
| +static ZSTD_parameters zstd_get_btrfs_parameters(size_t src_len) |
| +{ |
| + ZSTD_parameters params = ZSTD_getParams(ZSTD_BTRFS_DEFAULT_LEVEL, |
| + src_len, 0); |
| + |
| + if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) |
| + params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; |
| + WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); |
| + return params; |
| +} |
| + |
| +struct workspace { |
| + void *mem; |
| + size_t size; |
| + char *buf; |
| + struct list_head list; |
| +}; |
| + |
| +static void zstd_free_workspace(struct list_head *ws) |
| +{ |
| + struct workspace *workspace = list_entry(ws, struct workspace, list); |
| + |
| + kvfree(workspace->mem); |
| + kfree(workspace->buf); |
| + kfree(workspace); |
| +} |
| + |
| +static struct list_head *zstd_alloc_workspace(void) |
| +{ |
| + ZSTD_parameters params = |
| + zstd_get_btrfs_parameters(ZSTD_BTRFS_MAX_INPUT); |
| + struct workspace *workspace; |
| + |
| + workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); |
| + if (!workspace) |
| + return ERR_PTR(-ENOMEM); |
| + |
| + workspace->size = max_t(size_t, |
| + ZSTD_CStreamWorkspaceBound(params.cParams), |
| + ZSTD_DStreamWorkspaceBound(ZSTD_BTRFS_MAX_INPUT)); |
| + workspace->mem = kvmalloc(workspace->size, GFP_KERNEL); |
| + workspace->buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
| + if (!workspace->mem || !workspace->buf) |
| + goto fail; |
| + |
| + INIT_LIST_HEAD(&workspace->list); |
| + |
| + return &workspace->list; |
| +fail: |
| + zstd_free_workspace(&workspace->list); |
| + return ERR_PTR(-ENOMEM); |
| +} |
| + |
| +static int zstd_compress_pages(struct list_head *ws, |
| + struct address_space *mapping, |
| + u64 start, |
| + struct page **pages, |
| + unsigned long *out_pages, |
| + unsigned long *total_in, |
| + unsigned long *total_out) |
| +{ |
| + struct workspace *workspace = list_entry(ws, struct workspace, list); |
| + ZSTD_CStream *stream; |
| + int ret = 0; |
| + int nr_pages = 0; |
| + struct page *in_page = NULL; /* The current page to read */ |
| + struct page *out_page = NULL; /* The current page to write to */ |
| + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; |
| + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; |
| + unsigned long tot_in = 0; |
| + unsigned long tot_out = 0; |
| + unsigned long len = *total_out; |
| + const unsigned long nr_dest_pages = *out_pages; |
| + unsigned long max_out = nr_dest_pages * PAGE_SIZE; |
| + ZSTD_parameters params = zstd_get_btrfs_parameters(len); |
| + |
| + *out_pages = 0; |
| + *total_out = 0; |
| + *total_in = 0; |
| + |
| + /* Initialize the stream */ |
| + stream = ZSTD_initCStream(params, len, workspace->mem, |
| + workspace->size); |
| + if (!stream) { |
| + pr_warn("BTRFS: ZSTD_initCStream failed\n"); |
| + ret = -EIO; |
| + goto out; |
| + } |
| + |
| + /* map in the first page of input data */ |
| + in_page = find_get_page(mapping, start >> PAGE_SHIFT); |
| + in_buf.src = kmap(in_page); |
| + in_buf.pos = 0; |
| + in_buf.size = min_t(size_t, len, PAGE_SIZE); |
| + |
| + |
| + /* Allocate and map in the output buffer */ |
| + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| + if (out_page == NULL) { |
| + ret = -ENOMEM; |
| + goto out; |
| + } |
| + pages[nr_pages++] = out_page; |
| + out_buf.dst = kmap(out_page); |
| + out_buf.pos = 0; |
| + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); |
| + |
| + while (1) { |
| + size_t ret2; |
| + |
| + ret2 = ZSTD_compressStream(stream, &out_buf, &in_buf); |
| + if (ZSTD_isError(ret2)) { |
| + pr_debug("BTRFS: ZSTD_compressStream returned %d\n", |
| + ZSTD_getErrorCode(ret2)); |
| + ret = -EIO; |
| + goto out; |
| + } |
| + |
| + /* Check to see if we are making it bigger */ |
| + if (tot_in + in_buf.pos > 8192 && |
| + tot_in + in_buf.pos < |
| + tot_out + out_buf.pos) { |
| + ret = -E2BIG; |
| + goto out; |
| + } |
| + |
| + /* We've reached the end of our output range */ |
| + if (out_buf.pos >= max_out) { |
| + tot_out += out_buf.pos; |
| + ret = -E2BIG; |
| + goto out; |
| + } |
| + |
| + /* Check if we need more output space */ |
| + if (out_buf.pos == out_buf.size) { |
| + tot_out += PAGE_SIZE; |
| + max_out -= PAGE_SIZE; |
| + kunmap(out_page); |
| + if (nr_pages == nr_dest_pages) { |
| + out_page = NULL; |
| + ret = -E2BIG; |
| + goto out; |
| + } |
| + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| + if (out_page == NULL) { |
| + ret = -ENOMEM; |
| + goto out; |
| + } |
| + pages[nr_pages++] = out_page; |
| + out_buf.dst = kmap(out_page); |
| + out_buf.pos = 0; |
| + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); |
| + } |
| + |
| + /* We've reached the end of the input */ |
| + if (in_buf.pos >= len) { |
| + tot_in += in_buf.pos; |
| + break; |
| + } |
| + |
| + /* Check if we need more input */ |
| + if (in_buf.pos == in_buf.size) { |
| + tot_in += PAGE_SIZE; |
| + kunmap(in_page); |
| + put_page(in_page); |
| + |
| + start += PAGE_SIZE; |
| + len -= PAGE_SIZE; |
| + in_page = find_get_page(mapping, start >> PAGE_SHIFT); |
| + in_buf.src = kmap(in_page); |
| + in_buf.pos = 0; |
| + in_buf.size = min_t(size_t, len, PAGE_SIZE); |
| + } |
| + } |
| + while (1) { |
| + size_t ret2; |
| + |
| + ret2 = ZSTD_endStream(stream, &out_buf); |
| + if (ZSTD_isError(ret2)) { |
| + pr_debug("BTRFS: ZSTD_endStream returned %d\n", |
| + ZSTD_getErrorCode(ret2)); |
| + ret = -EIO; |
| + goto out; |
| + } |
| + if (ret2 == 0) { |
| + tot_out += out_buf.pos; |
| + break; |
| + } |
| + if (out_buf.pos >= max_out) { |
| + tot_out += out_buf.pos; |
| + ret = -E2BIG; |
| + goto out; |
| + } |
| + |
| + tot_out += PAGE_SIZE; |
| + max_out -= PAGE_SIZE; |
| + kunmap(out_page); |
| + if (nr_pages == nr_dest_pages) { |
| + out_page = NULL; |
| + ret = -E2BIG; |
| + goto out; |
| + } |
| + out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); |
| + if (out_page == NULL) { |
| + ret = -ENOMEM; |
| + goto out; |
| + } |
| + pages[nr_pages++] = out_page; |
| + out_buf.dst = kmap(out_page); |
| + out_buf.pos = 0; |
| + out_buf.size = min_t(size_t, max_out, PAGE_SIZE); |
| + } |
| + |
| + if (tot_out >= tot_in) { |
| + ret = -E2BIG; |
| + goto out; |
| + } |
| + |
| + ret = 0; |
| + *total_in = tot_in; |
| + *total_out = tot_out; |
| +out: |
| + *out_pages = nr_pages; |
| + /* Cleanup */ |
| + if (in_page) { |
| + kunmap(in_page); |
| + put_page(in_page); |
| + } |
| + if (out_page) |
| + kunmap(out_page); |
| + return ret; |
| +} |
| + |
| +static int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) |
| +{ |
| + struct workspace *workspace = list_entry(ws, struct workspace, list); |
| + struct page **pages_in = cb->compressed_pages; |
| + u64 disk_start = cb->start; |
| + struct bio *orig_bio = cb->orig_bio; |
| + size_t srclen = cb->compressed_len; |
| + ZSTD_DStream *stream; |
| + int ret = 0; |
| + unsigned long page_in_index = 0; |
| + unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE); |
| + unsigned long buf_start; |
| + unsigned long total_out = 0; |
| + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; |
| + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; |
| + |
| + stream = ZSTD_initDStream( |
| + ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); |
| + if (!stream) { |
| + pr_debug("BTRFS: ZSTD_initDStream failed\n"); |
| + ret = -EIO; |
| + goto done; |
| + } |
| + |
| + in_buf.src = kmap(pages_in[page_in_index]); |
| + in_buf.pos = 0; |
| + in_buf.size = min_t(size_t, srclen, PAGE_SIZE); |
| + |
| + out_buf.dst = workspace->buf; |
| + out_buf.pos = 0; |
| + out_buf.size = PAGE_SIZE; |
| + |
| + while (1) { |
| + size_t ret2; |
| + |
| + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); |
| + if (ZSTD_isError(ret2)) { |
| + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", |
| + ZSTD_getErrorCode(ret2)); |
| + ret = -EIO; |
| + goto done; |
| + } |
| + buf_start = total_out; |
| + total_out += out_buf.pos; |
| + out_buf.pos = 0; |
| + |
| + ret = btrfs_decompress_buf2page(out_buf.dst, buf_start, |
| + total_out, disk_start, orig_bio); |
| + if (ret == 0) |
| + break; |
| + |
| + if (in_buf.pos >= srclen) |
| + break; |
| + |
| + /* Check if we've hit the end of a frame */ |
| + if (ret2 == 0) |
| + break; |
| + |
| + if (in_buf.pos == in_buf.size) { |
| + kunmap(pages_in[page_in_index++]); |
| + if (page_in_index >= total_pages_in) { |
| + in_buf.src = NULL; |
| + ret = -EIO; |
| + goto done; |
| + } |
| + srclen -= PAGE_SIZE; |
| + in_buf.src = kmap(pages_in[page_in_index]); |
| + in_buf.pos = 0; |
| + in_buf.size = min_t(size_t, srclen, PAGE_SIZE); |
| + } |
| + } |
| + ret = 0; |
| + zero_fill_bio(orig_bio); |
| +done: |
| + if (in_buf.src) |
| + kunmap(pages_in[page_in_index]); |
| + return ret; |
| +} |
| + |
| +static int zstd_decompress(struct list_head *ws, unsigned char *data_in, |
| + struct page *dest_page, |
| + unsigned long start_byte, |
| + size_t srclen, size_t destlen) |
| +{ |
| + struct workspace *workspace = list_entry(ws, struct workspace, list); |
| + ZSTD_DStream *stream; |
| + int ret = 0; |
| + size_t ret2; |
| + ZSTD_inBuffer in_buf = { NULL, 0, 0 }; |
| + ZSTD_outBuffer out_buf = { NULL, 0, 0 }; |
| + unsigned long total_out = 0; |
| + unsigned long pg_offset = 0; |
| + char *kaddr; |
| + |
| + stream = ZSTD_initDStream( |
| + ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); |
| + if (!stream) { |
| + pr_warn("BTRFS: ZSTD_initDStream failed\n"); |
| + ret = -EIO; |
| + goto finish; |
| + } |
| + |
| + destlen = min_t(size_t, destlen, PAGE_SIZE); |
| + |
| + in_buf.src = data_in; |
| + in_buf.pos = 0; |
| + in_buf.size = srclen; |
| + |
| + out_buf.dst = workspace->buf; |
| + out_buf.pos = 0; |
| + out_buf.size = PAGE_SIZE; |
| + |
| + ret2 = 1; |
| + while (pg_offset < destlen && in_buf.pos < in_buf.size) { |
| + unsigned long buf_start; |
| + unsigned long buf_offset; |
| + unsigned long bytes; |
| + |
| + /* Check if the frame is over and we still need more input */ |
| + if (ret2 == 0) { |
| + pr_debug("BTRFS: ZSTD_decompressStream ended early\n"); |
| + ret = -EIO; |
| + goto finish; |
| + } |
| + ret2 = ZSTD_decompressStream(stream, &out_buf, &in_buf); |
| + if (ZSTD_isError(ret2)) { |
| + pr_debug("BTRFS: ZSTD_decompressStream returned %d\n", |
| + ZSTD_getErrorCode(ret2)); |
| + ret = -EIO; |
| + goto finish; |
| + } |
| + |
| + buf_start = total_out; |
| + total_out += out_buf.pos; |
| + out_buf.pos = 0; |
| + |
| + if (total_out <= start_byte) |
| + continue; |
| + |
| + if (total_out > start_byte && buf_start < start_byte) |
| + buf_offset = start_byte - buf_start; |
| + else |
| + buf_offset = 0; |
| + |
| + bytes = min_t(unsigned long, destlen - pg_offset, |
| + out_buf.size - buf_offset); |
| + |
| + kaddr = kmap_atomic(dest_page); |
| + memcpy(kaddr + pg_offset, out_buf.dst + buf_offset, bytes); |
| + kunmap_atomic(kaddr); |
| + |
| + pg_offset += bytes; |
| + } |
| + ret = 0; |
| +finish: |
| + if (pg_offset < destlen) { |
| + kaddr = kmap_atomic(dest_page); |
| + memset(kaddr + pg_offset, 0, destlen - pg_offset); |
| + kunmap_atomic(kaddr); |
| + } |
| + return ret; |
| +} |
| + |
| +const struct btrfs_compress_op btrfs_zstd_compress = { |
| + .alloc_workspace = zstd_alloc_workspace, |
| + .free_workspace = zstd_free_workspace, |
| + .compress_pages = zstd_compress_pages, |
| + .decompress_bio = zstd_decompress_bio, |
| + .decompress = zstd_decompress, |
| +}; |
| diff --git a/include/uapi/linux/btrfs.h b/include/uapi/linux/btrfs.h |
| index 9aa74f3..378230c 100644 |
| --- a/include/uapi/linux/btrfs.h |
| +++ b/include/uapi/linux/btrfs.h |
| @@ -255,13 +255,7 @@ struct btrfs_ioctl_fs_info_args { |
| #define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL (1ULL << 1) |
| #define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS (1ULL << 2) |
| #define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO (1ULL << 3) |
| -/* |
| - * some patches floated around with a second compression method |
| - * lets save that incompat here for when they do get in |
| - * Note we don't actually support it, we're just reserving the |
| - * number |
| - */ |
| -#define BTRFS_FEATURE_INCOMPAT_COMPRESS_LZOv2 (1ULL << 4) |
| +#define BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD (1ULL << 4) |
| |
| /* |
| * older kernels tried to do bigger metadata blocks, but the |
| -- |
| 2.9.3 |