vfs: enable remap callers that can handle short operations
Plumb in a remap flag that enables the filesystem remap handler to
shorten remapping requests for callers that can handle it. Now
copy_file_range can report partial success (in case we run up against
alignment problems, resource limits, etc.).
We also enable CAN_SHORTEN for fideduperange to maintain existing
userspace-visible behavior where xfs/btrfs shorten the dedupe range to
avoid stale post-eof data exposure.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 1bd2919..5f71a25 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -970,7 +970,9 @@
negative error code if errors occurred before any bytes were remapped.
The remap_flags parameter accepts REMAP_FILE_* flags. If
REMAP_FILE_DEDUP is set then the implementation must only remap if the
- requested file ranges have identical contents.
+ requested file ranges have identical contents. If REMAP_CAN_SHORTEN is
+ set, the caller is ok with the implementation shortening the request
+ length to satisfy alignment or EOF requirements (or any other reason).
fadvise: possibly called by the fadvise64() system call.
diff --git a/fs/read_write.c b/fs/read_write.c
index ea30666..c0bcc1a 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1593,7 +1593,8 @@
cloned = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
- min_t(loff_t, MAX_RW_COUNT, len), 0);
+ min_t(loff_t, MAX_RW_COUNT, len),
+ REMAP_FILE_CAN_SHORTEN);
if (cloned > 0) {
ret = cloned;
goto done;
@@ -1721,6 +1722,8 @@
* can't meaningfully compare post-EOF contents.
*
* For clone we only link a partial EOF block above the destination file's EOF.
+ *
+ * Shorten the request if possible.
*/
static int generic_remap_check_len(struct inode *inode_in,
struct inode *inode_out,
@@ -1729,16 +1732,24 @@
unsigned int remap_flags)
{
u64 blkmask = i_blocksize(inode_in) - 1;
+ loff_t new_len = *len;
if ((*len & blkmask) == 0)
return 0;
- if (remap_flags & REMAP_FILE_DEDUP)
- *len &= ~blkmask;
- else if (pos_out + *len < i_size_read(inode_out))
- return -EINVAL;
+ if ((remap_flags & REMAP_FILE_DEDUP) ||
+ pos_out + *len < i_size_read(inode_out))
+ new_len &= ~blkmask;
- return 0;
+ if (new_len == *len)
+ return 0;
+
+ if (remap_flags & REMAP_FILE_CAN_SHORTEN) {
+ *len = new_len;
+ return 0;
+ }
+
+ return (remap_flags & REMAP_FILE_DEDUP) ? -EBADE : -EINVAL;
}
/*
@@ -2014,7 +2025,8 @@
{
loff_t ret;
- WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP));
+ WARN_ON_ONCE(remap_flags & ~(REMAP_FILE_DEDUP |
+ REMAP_FILE_CAN_SHORTEN));
ret = mnt_want_write_file(dst_file);
if (ret)
@@ -2115,7 +2127,7 @@
deduped = vfs_dedupe_file_range_one(file, off, dst_file,
info->dest_offset, len,
- 0);
+ REMAP_FILE_CAN_SHORTEN);
if (deduped == -EBADE)
info->status = FILE_DEDUPE_RANGE_DIFFERS;
else if (deduped < 0)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 544ab50..34c22d6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1727,8 +1727,10 @@
* See Documentation/filesystems/vfs.txt for more details about this call.
*
* REMAP_FILE_DEDUP: only remap if contents identical (i.e. deduplicate)
+ * REMAP_FILE_CAN_SHORTEN: caller can handle a shortened request
*/
#define REMAP_FILE_DEDUP (1 << 0)
+#define REMAP_FILE_CAN_SHORTEN (1 << 1)
/*
* These flags signal that the caller is ok with altering various aspects of
@@ -1736,9 +1738,8 @@
* implementation; the vfs remap helper functions can take advantage of them.
* Flags in this category exist to preserve the quirky behavior of the hoisted
* btrfs clone/dedupe ioctls.
- * There are no flags yet, but subsequent commits will add some.
*/
-#define REMAP_FILE_ADVISORY (0)
+#define REMAP_FILE_ADVISORY (REMAP_FILE_CAN_SHORTEN)
struct iov_iter;
diff --git a/mm/filemap.c b/mm/filemap.c
index e9091d7..1775d4a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -3045,8 +3045,7 @@
bcount = ALIGN(size_in, bs) - pos_in;
} else {
if (!IS_ALIGNED(count, bs))
- return -EINVAL;
-
+ count = ALIGN_DOWN(count, bs);
bcount = count;
}
@@ -3056,10 +3055,14 @@
pos_out < pos_in + bcount)
return -EINVAL;
- /* For now we don't support changing the length. */
- if (*req_count != count)
+ /*
+ * We shortened the request but the caller can't deal with that, so
+ * bounce the request back to userspace.
+ */
+ if (*req_count != count && !(remap_flags & REMAP_FILE_CAN_SHORTEN))
return -EINVAL;
+ *req_count = count;
return 0;
}