blob: f43df72b0e177e46f9ed02fcf2659a1f0b91453b [file] [log] [blame]
Christoph Hellwigf46b5a62008-06-11 21:53:53 -04001/*
2 * Copyright (C) 2007 Oracle. All rights reserved.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public
6 * License v2 as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public
14 * License along with this program; if not, write to the
15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16 * Boston, MA 021110-1307, USA.
17 */
18
19#include <linux/kernel.h>
20#include <linux/bio.h>
21#include <linux/buffer_head.h>
22#include <linux/file.h>
23#include <linux/fs.h>
Christoph Hellwigcb8e7092008-10-09 13:39:39 -040024#include <linux/fsnotify.h>
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040025#include <linux/pagemap.h>
26#include <linux/highmem.h>
27#include <linux/time.h>
28#include <linux/init.h>
29#include <linux/string.h>
30#include <linux/smp_lock.h>
31#include <linux/backing-dev.h>
Christoph Hellwigcb8e7092008-10-09 13:39:39 -040032#include <linux/mount.h>
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040033#include <linux/mpage.h>
Christoph Hellwigcb8e7092008-10-09 13:39:39 -040034#include <linux/namei.h>
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040035#include <linux/swap.h>
36#include <linux/writeback.h>
37#include <linux/statfs.h>
38#include <linux/compat.h>
39#include <linux/bit_spinlock.h>
Christoph Hellwigcb8e7092008-10-09 13:39:39 -040040#include <linux/security.h>
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040041#include <linux/version.h>
42#include <linux/xattr.h>
Yan Zheng7ea394f2008-08-05 13:05:02 -040043#include <linux/vmalloc.h>
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040044#include "ctree.h"
45#include "disk-io.h"
46#include "transaction.h"
47#include "btrfs_inode.h"
48#include "ioctl.h"
49#include "print-tree.h"
50#include "volumes.h"
Chris Mason925baed2008-06-25 16:01:30 -040051#include "locking.h"
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040052
53
54
Christoph Hellwigcb8e7092008-10-09 13:39:39 -040055static noinline int create_subvol(struct btrfs_root *root,
56 struct dentry *dentry,
57 char *name, int namelen)
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040058{
59 struct btrfs_trans_handle *trans;
60 struct btrfs_key key;
61 struct btrfs_root_item root_item;
62 struct btrfs_inode_item *inode_item;
63 struct extent_buffer *leaf;
64 struct btrfs_root *new_root = root;
65 struct inode *dir;
66 int ret;
67 int err;
68 u64 objectid;
69 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
70 unsigned long nr = 1;
71
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040072 ret = btrfs_check_free_space(root, 1, 0);
73 if (ret)
74 goto fail_commit;
75
76 trans = btrfs_start_transaction(root, 1);
77 BUG_ON(!trans);
78
79 ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
80 0, &objectid);
81 if (ret)
82 goto fail;
83
Zheng Yan31840ae2008-09-23 13:14:14 -040084 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 0,
85 objectid, trans->transid, 0, 0, 0);
Josef Bacik8e8a1e32008-07-24 12:17:14 -040086 if (IS_ERR(leaf)) {
87 ret = PTR_ERR(leaf);
88 goto fail;
89 }
Christoph Hellwigf46b5a62008-06-11 21:53:53 -040090
91 btrfs_set_header_nritems(leaf, 0);
92 btrfs_set_header_level(leaf, 0);
93 btrfs_set_header_bytenr(leaf, leaf->start);
94 btrfs_set_header_generation(leaf, trans->transid);
95 btrfs_set_header_owner(leaf, objectid);
96
97 write_extent_buffer(leaf, root->fs_info->fsid,
98 (unsigned long)btrfs_header_fsid(leaf),
99 BTRFS_FSID_SIZE);
100 btrfs_mark_buffer_dirty(leaf);
101
102 inode_item = &root_item.inode;
103 memset(inode_item, 0, sizeof(*inode_item));
104 inode_item->generation = cpu_to_le64(1);
105 inode_item->size = cpu_to_le64(3);
106 inode_item->nlink = cpu_to_le32(1);
Yan Zhenga76a3cd2008-10-09 11:46:29 -0400107 inode_item->nbytes = cpu_to_le64(root->leafsize);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400108 inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
109
110 btrfs_set_root_bytenr(&root_item, leaf->start);
Yan Zheng84234f32008-10-29 14:49:05 -0400111 btrfs_set_root_generation(&root_item, trans->transid);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400112 btrfs_set_root_level(&root_item, 0);
113 btrfs_set_root_refs(&root_item, 1);
114 btrfs_set_root_used(&root_item, 0);
Yan Zheng80ff3852008-10-30 14:20:02 -0400115 btrfs_set_root_last_snapshot(&root_item, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400116
117 memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
118 root_item.drop_level = 0;
119
Chris Mason925baed2008-06-25 16:01:30 -0400120 btrfs_tree_unlock(leaf);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400121 free_extent_buffer(leaf);
122 leaf = NULL;
123
124 btrfs_set_root_dirid(&root_item, new_dirid);
125
126 key.objectid = objectid;
127 key.offset = 1;
128 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
129 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
130 &root_item);
131 if (ret)
132 goto fail;
133
134 /*
135 * insert the directory item
136 */
137 key.offset = (u64)-1;
138 dir = root->fs_info->sb->s_root->d_inode;
139 ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
140 name, namelen, dir->i_ino, &key,
Josef Bacikaec74772008-07-24 12:12:38 -0400141 BTRFS_FT_DIR, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400142 if (ret)
143 goto fail;
144
145 ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
146 name, namelen, objectid,
Josef Bacikaec74772008-07-24 12:12:38 -0400147 root->fs_info->sb->s_root->d_inode->i_ino, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400148 if (ret)
149 goto fail;
150
151 ret = btrfs_commit_transaction(trans, root);
152 if (ret)
153 goto fail_commit;
154
155 new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
156 BUG_ON(!new_root);
157
158 trans = btrfs_start_transaction(new_root, 1);
159 BUG_ON(!trans);
160
Christoph Hellwigcb8e7092008-10-09 13:39:39 -0400161 ret = btrfs_create_subvol_root(new_root, dentry, trans, new_dirid,
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400162 BTRFS_I(dir)->block_group);
163 if (ret)
164 goto fail;
165
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400166fail:
167 nr = trans->blocks_used;
168 err = btrfs_commit_transaction(trans, new_root);
169 if (err && !ret)
170 ret = err;
171fail_commit:
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400172 btrfs_btree_balance_dirty(root, nr);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400173 return ret;
174}
175
176static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
177{
178 struct btrfs_pending_snapshot *pending_snapshot;
179 struct btrfs_trans_handle *trans;
180 int ret;
181 int err;
182 unsigned long nr = 0;
183
184 if (!root->ref_cows)
185 return -EINVAL;
186
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400187 ret = btrfs_check_free_space(root, 1, 0);
188 if (ret)
189 goto fail_unlock;
190
191 pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
192 if (!pending_snapshot) {
193 ret = -ENOMEM;
194 goto fail_unlock;
195 }
196 pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
197 if (!pending_snapshot->name) {
198 ret = -ENOMEM;
199 kfree(pending_snapshot);
200 goto fail_unlock;
201 }
202 memcpy(pending_snapshot->name, name, namelen);
203 pending_snapshot->name[namelen] = '\0';
204 trans = btrfs_start_transaction(root, 1);
205 BUG_ON(!trans);
206 pending_snapshot->root = root;
207 list_add(&pending_snapshot->list,
208 &trans->transaction->pending_snapshots);
209 ret = btrfs_update_inode(trans, root, root->inode);
210 err = btrfs_commit_transaction(trans, root);
211
212fail_unlock:
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400213 btrfs_btree_balance_dirty(root, nr);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400214 return ret;
215}
216
Christoph Hellwigcb8e7092008-10-09 13:39:39 -0400217/* copy of may_create in fs/namei.c() */
218static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
219{
220 if (child->d_inode)
221 return -EEXIST;
222 if (IS_DEADDIR(dir))
223 return -ENOENT;
224 return inode_permission(dir, MAY_WRITE | MAY_EXEC);
225}
226
227/*
228 * Create a new subvolume below @parent. This is largely modeled after
229 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup
230 * inside this filesystem so it's quite a bit simpler.
231 */
232static noinline int btrfs_mksubvol(struct path *parent, char *name,
233 int mode, int namelen)
234{
235 struct dentry *dentry;
236 int error;
237
238 mutex_lock_nested(&parent->dentry->d_inode->i_mutex, I_MUTEX_PARENT);
239
240 dentry = lookup_one_len(name, parent->dentry, namelen);
241 error = PTR_ERR(dentry);
242 if (IS_ERR(dentry))
243 goto out_unlock;
244
245 error = -EEXIST;
246 if (dentry->d_inode)
247 goto out_dput;
248
249 if (!IS_POSIXACL(parent->dentry->d_inode))
250 mode &= ~current->fs->umask;
251 error = mnt_want_write(parent->mnt);
252 if (error)
253 goto out_dput;
254
255 error = btrfs_may_create(parent->dentry->d_inode, dentry);
256 if (error)
257 goto out_drop_write;
258
Christoph Hellwigcb8e7092008-10-09 13:39:39 -0400259 /*
260 * Actually perform the low-level subvolume creation after all
261 * this VFS fuzz.
262 *
263 * Eventually we want to pass in an inode under which we create this
264 * subvolume, but for now all are under the filesystem root.
265 *
266 * Also we should pass on the mode eventually to allow creating new
267 * subvolume with specific mode bits.
268 */
269 error = create_subvol(BTRFS_I(parent->dentry->d_inode)->root, dentry,
270 name, namelen);
271 if (error)
272 goto out_drop_write;
273
274 fsnotify_mkdir(parent->dentry->d_inode, dentry);
275out_drop_write:
276 mnt_drop_write(parent->mnt);
277out_dput:
278 dput(dentry);
279out_unlock:
280 mutex_unlock(&parent->dentry->d_inode->i_mutex);
281 return error;
282}
283
284
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400285int btrfs_defrag_file(struct file *file)
286{
287 struct inode *inode = fdentry(file)->d_inode;
288 struct btrfs_root *root = BTRFS_I(inode)->root;
289 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
Chris Mason3eaa2882008-07-24 11:57:52 -0400290 struct btrfs_ordered_extent *ordered;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400291 struct page *page;
292 unsigned long last_index;
293 unsigned long ra_pages = root->fs_info->bdi.ra_pages;
294 unsigned long total_read = 0;
295 u64 page_start;
296 u64 page_end;
297 unsigned long i;
298 int ret;
299
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400300 ret = btrfs_check_free_space(root, inode->i_size, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400301 if (ret)
302 return -ENOSPC;
303
304 mutex_lock(&inode->i_mutex);
305 last_index = inode->i_size >> PAGE_CACHE_SHIFT;
306 for (i = 0; i <= last_index; i++) {
307 if (total_read % ra_pages == 0) {
308 btrfs_force_ra(inode->i_mapping, &file->f_ra, file, i,
309 min(last_index, i + ra_pages - 1));
310 }
311 total_read++;
Chris Mason3eaa2882008-07-24 11:57:52 -0400312again:
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400313 page = grab_cache_page(inode->i_mapping, i);
314 if (!page)
315 goto out_unlock;
316 if (!PageUptodate(page)) {
317 btrfs_readpage(NULL, page);
318 lock_page(page);
319 if (!PageUptodate(page)) {
320 unlock_page(page);
321 page_cache_release(page);
322 goto out_unlock;
323 }
324 }
325
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400326 wait_on_page_writeback(page);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400327
328 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
329 page_end = page_start + PAGE_CACHE_SIZE - 1;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400330 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
Chris Mason3eaa2882008-07-24 11:57:52 -0400331
332 ordered = btrfs_lookup_ordered_extent(inode, page_start);
333 if (ordered) {
334 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
335 unlock_page(page);
336 page_cache_release(page);
337 btrfs_start_ordered_extent(inode, ordered, 1);
338 btrfs_put_ordered_extent(ordered);
339 goto again;
340 }
341 set_page_extent_mapped(page);
342
Chris Masonf87f0572008-08-01 11:27:23 -0400343 /*
344 * this makes sure page_mkwrite is called on the
345 * page if it is dirtied again later
346 */
347 clear_page_dirty_for_io(page);
348
Chris Masonea8c2812008-08-04 23:17:27 -0400349 btrfs_set_extent_delalloc(inode, page_start, page_end);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400350
351 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
352 set_page_dirty(page);
353 unlock_page(page);
354 page_cache_release(page);
355 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
356 }
357
358out_unlock:
359 mutex_unlock(&inode->i_mutex);
360 return 0;
361}
362
363/*
364 * Called inside transaction, so use GFP_NOFS
365 */
366
367static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
368{
369 u64 new_size;
370 u64 old_size;
371 u64 devid = 1;
372 struct btrfs_ioctl_vol_args *vol_args;
373 struct btrfs_trans_handle *trans;
374 struct btrfs_device *device = NULL;
375 char *sizestr;
376 char *devstr = NULL;
377 int ret = 0;
378 int namelen;
379 int mod = 0;
380
Yan Zhengc146afa2008-11-12 14:34:12 -0500381 if (root->fs_info->sb->s_flags & MS_RDONLY)
382 return -EROFS;
383
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400384 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
385
386 if (!vol_args)
387 return -ENOMEM;
388
389 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
390 ret = -EFAULT;
391 goto out;
392 }
Mark Fasheh5516e592008-07-24 12:20:14 -0400393
394 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400395 namelen = strlen(vol_args->name);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400396
Chris Mason7d9eb122008-07-08 14:19:17 -0400397 mutex_lock(&root->fs_info->volume_mutex);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400398 sizestr = vol_args->name;
399 devstr = strchr(sizestr, ':');
400 if (devstr) {
401 char *end;
402 sizestr = devstr + 1;
403 *devstr = '\0';
404 devstr = vol_args->name;
405 devid = simple_strtoull(devstr, &end, 10);
406 printk(KERN_INFO "resizing devid %llu\n", devid);
407 }
Yan Zheng2b820322008-11-17 21:11:30 -0500408 device = btrfs_find_device(root, devid, NULL, NULL);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400409 if (!device) {
410 printk(KERN_INFO "resizer unable to find device %llu\n", devid);
411 ret = -EINVAL;
412 goto out_unlock;
413 }
414 if (!strcmp(sizestr, "max"))
415 new_size = device->bdev->bd_inode->i_size;
416 else {
417 if (sizestr[0] == '-') {
418 mod = -1;
419 sizestr++;
420 } else if (sizestr[0] == '+') {
421 mod = 1;
422 sizestr++;
423 }
424 new_size = btrfs_parse_size(sizestr);
425 if (new_size == 0) {
426 ret = -EINVAL;
427 goto out_unlock;
428 }
429 }
430
431 old_size = device->total_bytes;
432
433 if (mod < 0) {
434 if (new_size > old_size) {
435 ret = -EINVAL;
436 goto out_unlock;
437 }
438 new_size = old_size - new_size;
439 } else if (mod > 0) {
440 new_size = old_size + new_size;
441 }
442
443 if (new_size < 256 * 1024 * 1024) {
444 ret = -EINVAL;
445 goto out_unlock;
446 }
447 if (new_size > device->bdev->bd_inode->i_size) {
448 ret = -EFBIG;
449 goto out_unlock;
450 }
451
452 do_div(new_size, root->sectorsize);
453 new_size *= root->sectorsize;
454
455 printk(KERN_INFO "new size for %s is %llu\n",
456 device->name, (unsigned long long)new_size);
457
458 if (new_size > old_size) {
459 trans = btrfs_start_transaction(root, 1);
460 ret = btrfs_grow_device(trans, device, new_size);
461 btrfs_commit_transaction(trans, root);
462 } else {
463 ret = btrfs_shrink_device(device, new_size);
464 }
465
466out_unlock:
Chris Mason7d9eb122008-07-08 14:19:17 -0400467 mutex_unlock(&root->fs_info->volume_mutex);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400468out:
469 kfree(vol_args);
470 return ret;
471}
472
Christoph Hellwigcb8e7092008-10-09 13:39:39 -0400473static noinline int btrfs_ioctl_snap_create(struct file *file,
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400474 void __user *arg)
475{
Christoph Hellwigcb8e7092008-10-09 13:39:39 -0400476 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400477 struct btrfs_ioctl_vol_args *vol_args;
478 struct btrfs_dir_item *di;
479 struct btrfs_path *path;
480 u64 root_dirid;
481 int namelen;
482 int ret;
483
Yan Zhengc146afa2008-11-12 14:34:12 -0500484 if (root->fs_info->sb->s_flags & MS_RDONLY)
485 return -EROFS;
486
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400487 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
488
489 if (!vol_args)
490 return -ENOMEM;
491
492 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
493 ret = -EFAULT;
494 goto out;
495 }
496
Mark Fasheh5516e592008-07-24 12:20:14 -0400497 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400498 namelen = strlen(vol_args->name);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400499 if (strchr(vol_args->name, '/')) {
500 ret = -EINVAL;
501 goto out;
502 }
503
504 path = btrfs_alloc_path();
505 if (!path) {
506 ret = -ENOMEM;
507 goto out;
508 }
509
510 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400511 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
512 path, root_dirid,
513 vol_args->name, namelen, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400514 btrfs_free_path(path);
515
516 if (di && !IS_ERR(di)) {
517 ret = -EEXIST;
518 goto out;
519 }
520
521 if (IS_ERR(di)) {
522 ret = PTR_ERR(di);
523 goto out;
524 }
525
Christoph Hellwigcb8e7092008-10-09 13:39:39 -0400526 if (root == root->fs_info->tree_root) {
527 ret = btrfs_mksubvol(&file->f_path, vol_args->name,
528 file->f_path.dentry->d_inode->i_mode,
529 namelen);
530 } else {
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400531 ret = create_snapshot(root, vol_args->name, namelen);
Christoph Hellwigcb8e7092008-10-09 13:39:39 -0400532 }
533
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400534out:
535 kfree(vol_args);
536 return ret;
537}
538
539static int btrfs_ioctl_defrag(struct file *file)
540{
541 struct inode *inode = fdentry(file)->d_inode;
542 struct btrfs_root *root = BTRFS_I(inode)->root;
Yan Zhengc146afa2008-11-12 14:34:12 -0500543 int ret;
544
545 ret = mnt_want_write(file->f_path.mnt);
546 if (ret)
547 return ret;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400548
549 switch (inode->i_mode & S_IFMT) {
550 case S_IFDIR:
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400551 btrfs_defrag_root(root, 0);
552 btrfs_defrag_root(root->fs_info->extent_root, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400553 break;
554 case S_IFREG:
555 btrfs_defrag_file(file);
556 break;
557 }
558
559 return 0;
560}
561
562long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
563{
564 struct btrfs_ioctl_vol_args *vol_args;
565 int ret;
566
567 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
568
569 if (!vol_args)
570 return -ENOMEM;
571
572 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
573 ret = -EFAULT;
574 goto out;
575 }
Mark Fasheh5516e592008-07-24 12:20:14 -0400576 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400577 ret = btrfs_init_new_device(root, vol_args->name);
578
579out:
580 kfree(vol_args);
581 return ret;
582}
583
584long btrfs_ioctl_rm_dev(struct btrfs_root *root, void __user *arg)
585{
586 struct btrfs_ioctl_vol_args *vol_args;
587 int ret;
588
Yan Zhengc146afa2008-11-12 14:34:12 -0500589 if (root->fs_info->sb->s_flags & MS_RDONLY)
590 return -EROFS;
591
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400592 vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
593
594 if (!vol_args)
595 return -ENOMEM;
596
597 if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
598 ret = -EFAULT;
599 goto out;
600 }
Mark Fasheh5516e592008-07-24 12:20:14 -0400601 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400602 ret = btrfs_rm_device(root, vol_args->name);
603
604out:
605 kfree(vol_args);
606 return ret;
607}
608
Sage Weilc5c9cd42008-11-12 14:32:25 -0500609long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, u64 off,
610 u64 olen, u64 destoff)
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400611{
612 struct inode *inode = fdentry(file)->d_inode;
613 struct btrfs_root *root = BTRFS_I(inode)->root;
614 struct file *src_file;
615 struct inode *src;
616 struct btrfs_trans_handle *trans;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400617 struct btrfs_path *path;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400618 struct extent_buffer *leaf;
Yan Zhengae01a0a2008-08-04 23:23:47 -0400619 char *buf;
620 struct btrfs_key key;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400621 u32 nritems;
622 int slot;
Yan Zhengae01a0a2008-08-04 23:23:47 -0400623 int ret;
Sage Weilc5c9cd42008-11-12 14:32:25 -0500624 u64 len = olen;
625 u64 bs = root->fs_info->sb->s_blocksize;
626 u64 hint_byte;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400627
Sage Weilc5c9cd42008-11-12 14:32:25 -0500628 /*
629 * TODO:
630 * - split compressed inline extents. annoying: we need to
631 * decompress into destination's address_space (the file offset
632 * may change, so source mapping won't do), then recompress (or
633 * otherwise reinsert) a subrange.
634 * - allow ranges within the same file to be cloned (provided
635 * they don't overlap)?
636 */
637
Yan Zhengc146afa2008-11-12 14:34:12 -0500638 ret = mnt_want_write(file->f_path.mnt);
639 if (ret)
640 return ret;
641
Sage Weilc5c9cd42008-11-12 14:32:25 -0500642 src_file = fget(srcfd);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400643 if (!src_file)
644 return -EBADF;
645 src = src_file->f_dentry->d_inode;
646
Sage Weilc5c9cd42008-11-12 14:32:25 -0500647 ret = -EINVAL;
648 if (src == inode)
649 goto out_fput;
650
Yan Zhengae01a0a2008-08-04 23:23:47 -0400651 ret = -EISDIR;
652 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400653 goto out_fput;
654
Yan Zhengae01a0a2008-08-04 23:23:47 -0400655 ret = -EXDEV;
656 if (src->i_sb != inode->i_sb || BTRFS_I(src)->root != root)
657 goto out_fput;
658
659 ret = -ENOMEM;
660 buf = vmalloc(btrfs_level_size(root, 0));
661 if (!buf)
662 goto out_fput;
663
664 path = btrfs_alloc_path();
665 if (!path) {
666 vfree(buf);
667 goto out_fput;
668 }
669 path->reada = 2;
670
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400671 if (inode < src) {
672 mutex_lock(&inode->i_mutex);
673 mutex_lock(&src->i_mutex);
674 } else {
675 mutex_lock(&src->i_mutex);
676 mutex_lock(&inode->i_mutex);
677 }
678
Sage Weilc5c9cd42008-11-12 14:32:25 -0500679 /* determine range to clone */
680 ret = -EINVAL;
681 if (off >= src->i_size || off + len > src->i_size)
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400682 goto out_unlock;
Sage Weilc5c9cd42008-11-12 14:32:25 -0500683 if (len == 0)
684 olen = len = src->i_size - off;
685 /* if we extend to eof, continue to block boundary */
686 if (off + len == src->i_size)
687 len = ((src->i_size + bs-1) & ~(bs-1))
688 - off;
689
690 /* verify the end result is block aligned */
691 if ((off & (bs-1)) ||
692 ((off + len) & (bs-1)))
693 goto out_unlock;
694
695 printk("final src extent is %llu~%llu\n", off, len);
696 printk("final dst extent is %llu~%llu\n", destoff, len);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400697
698 /* do any pending delalloc/csum calc on src, one way or
699 another, and lock file content */
700 while (1) {
Zheng Yan31840ae2008-09-23 13:14:14 -0400701 struct btrfs_ordered_extent *ordered;
Sage Weilc5c9cd42008-11-12 14:32:25 -0500702 lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
703 ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
Yan Zhengae01a0a2008-08-04 23:23:47 -0400704 if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400705 break;
Sage Weilc5c9cd42008-11-12 14:32:25 -0500706 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
Yan Zhengae01a0a2008-08-04 23:23:47 -0400707 if (ordered)
708 btrfs_put_ordered_extent(ordered);
Sage Weilc5c9cd42008-11-12 14:32:25 -0500709 btrfs_wait_ordered_range(src, off, off+len);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400710 }
711
Yan Zhengae01a0a2008-08-04 23:23:47 -0400712 trans = btrfs_start_transaction(root, 1);
713 BUG_ON(!trans);
714
Sage Weilc5c9cd42008-11-12 14:32:25 -0500715 /* punch hole in destination first */
716 btrfs_drop_extents(trans, root, inode, off, off+len, 0, &hint_byte);
717
718 /* clone data */
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400719 key.objectid = src->i_ino;
Yan Zhengae01a0a2008-08-04 23:23:47 -0400720 key.type = BTRFS_EXTENT_DATA_KEY;
721 key.offset = 0;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400722
723 while (1) {
724 /*
725 * note the key will change type as we walk through the
726 * tree.
727 */
728 ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
729 if (ret < 0)
730 goto out;
731
Yan Zhengae01a0a2008-08-04 23:23:47 -0400732 nritems = btrfs_header_nritems(path->nodes[0]);
733 if (path->slots[0] >= nritems) {
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400734 ret = btrfs_next_leaf(root, path);
735 if (ret < 0)
736 goto out;
737 if (ret > 0)
738 break;
Yan Zhengae01a0a2008-08-04 23:23:47 -0400739 nritems = btrfs_header_nritems(path->nodes[0]);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400740 }
741 leaf = path->nodes[0];
742 slot = path->slots[0];
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400743
Yan Zhengae01a0a2008-08-04 23:23:47 -0400744 btrfs_item_key_to_cpu(leaf, &key, slot);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400745 if (btrfs_key_type(&key) > BTRFS_CSUM_ITEM_KEY ||
746 key.objectid != src->i_ino)
747 break;
748
Sage Weilc5c9cd42008-11-12 14:32:25 -0500749 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
750 struct btrfs_file_extent_item *extent;
751 int type;
Zheng Yan31840ae2008-09-23 13:14:14 -0400752 u32 size;
753 struct btrfs_key new_key;
Sage Weilc5c9cd42008-11-12 14:32:25 -0500754 u64 disko = 0, diskl = 0;
755 u64 datao = 0, datal = 0;
756 u8 comp;
Zheng Yan31840ae2008-09-23 13:14:14 -0400757
758 size = btrfs_item_size_nr(leaf, slot);
759 read_extent_buffer(leaf, buf,
760 btrfs_item_ptr_offset(leaf, slot),
761 size);
Sage Weilc5c9cd42008-11-12 14:32:25 -0500762
763 extent = btrfs_item_ptr(leaf, slot,
764 struct btrfs_file_extent_item);
765 comp = btrfs_file_extent_compression(leaf, extent);
766 type = btrfs_file_extent_type(leaf, extent);
767 if (type == BTRFS_FILE_EXTENT_REG) {
768 disko = btrfs_file_extent_disk_bytenr(leaf, extent);
769 diskl = btrfs_file_extent_disk_num_bytes(leaf, extent);
770 datao = btrfs_file_extent_offset(leaf, extent);
771 datal = btrfs_file_extent_num_bytes(leaf, extent);
772 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
773 /* take upper bound, may be compressed */
774 datal = btrfs_file_extent_ram_bytes(leaf,
775 extent);
776 }
Zheng Yan31840ae2008-09-23 13:14:14 -0400777 btrfs_release_path(root, path);
778
Sage Weilc5c9cd42008-11-12 14:32:25 -0500779 if (key.offset + datal < off ||
780 key.offset >= off+len)
781 goto next;
782
Zheng Yan31840ae2008-09-23 13:14:14 -0400783 memcpy(&new_key, &key, sizeof(new_key));
784 new_key.objectid = inode->i_ino;
Sage Weilc5c9cd42008-11-12 14:32:25 -0500785 new_key.offset = key.offset + destoff - off;
786
787 if (type == BTRFS_FILE_EXTENT_REG) {
788 ret = btrfs_insert_empty_item(trans, root, path,
789 &new_key, size);
790 if (ret)
791 goto out;
792
793 leaf = path->nodes[0];
794 slot = path->slots[0];
795 write_extent_buffer(leaf, buf,
796 btrfs_item_ptr_offset(leaf, slot),
797 size);
798
799 extent = btrfs_item_ptr(leaf, slot,
800 struct btrfs_file_extent_item);
801 printk(" orig disk %llu~%llu data %llu~%llu\n",
802 disko, diskl, datao, datal);
803
804 if (off > key.offset) {
805 datao += off - key.offset;
806 datal -= off - key.offset;
807 }
808 if (key.offset + datao + datal + key.offset >
809 off + len)
810 datal = off + len - key.offset - datao;
811 /* disko == 0 means it's a hole */
812 if (!disko)
813 datao = 0;
814 printk(" final disk %llu~%llu data %llu~%llu\n",
815 disko, diskl, datao, datal);
816
817 btrfs_set_file_extent_offset(leaf, extent,
818 datao);
819 btrfs_set_file_extent_num_bytes(leaf, extent,
820 datal);
821 if (disko) {
822 inode_add_bytes(inode, datal);
823 ret = btrfs_inc_extent_ref(trans, root,
824 disko, diskl, leaf->start,
825 root->root_key.objectid,
826 trans->transid,
827 inode->i_ino);
828 BUG_ON(ret);
829 }
830 } else if (type == BTRFS_FILE_EXTENT_INLINE) {
831 u64 skip = 0;
832 u64 trim = 0;
833 if (off > key.offset) {
834 skip = off - key.offset;
835 new_key.offset += skip;
836 }
837 if (key.offset + datal > off+len)
838 trim = key.offset + datal - (off+len);
839 printk("len %lld skip %lld trim %lld\n",
840 datal, skip, trim);
841 if (comp && (skip || trim)) {
842 printk("btrfs clone_range can't split compressed inline extents yet\n");
843 ret = -EINVAL;
844 goto out;
845 }
846 size -= skip + trim;
847 datal -= skip + trim;
848 ret = btrfs_insert_empty_item(trans, root, path,
849 &new_key, size);
850 if (ret)
851 goto out;
852
853 if (skip) {
854 u32 start = btrfs_file_extent_calc_inline_size(0);
855 memmove(buf+start, buf+start+skip,
856 datal);
857 }
858
859 leaf = path->nodes[0];
860 slot = path->slots[0];
861 write_extent_buffer(leaf, buf,
862 btrfs_item_ptr_offset(leaf, slot),
863 size);
864 inode_add_bytes(inode, datal);
865 }
866
867 btrfs_mark_buffer_dirty(leaf);
868 }
869
870 if (btrfs_key_type(&key) == BTRFS_CSUM_ITEM_KEY) {
871 u32 size;
872 struct btrfs_key new_key;
873 u64 coverslen;
874 int coff, clen;
875
876 size = btrfs_item_size_nr(leaf, slot);
877 coverslen = (size / BTRFS_CRC32_SIZE) <<
878 root->fs_info->sb->s_blocksize_bits;
879 printk("csums for %llu~%llu\n",
880 key.offset, coverslen);
881 if (key.offset + coverslen < off ||
882 key.offset >= off+len)
883 goto next;
884
885 read_extent_buffer(leaf, buf,
886 btrfs_item_ptr_offset(leaf, slot),
887 size);
888 btrfs_release_path(root, path);
889
890 coff = 0;
891 if (off > key.offset)
892 coff = ((off - key.offset) >>
893 root->fs_info->sb->s_blocksize_bits) *
894 BTRFS_CRC32_SIZE;
895 clen = size - coff;
896 if (key.offset + coverslen > off+len)
897 clen -= ((key.offset+coverslen-off-len) >>
898 root->fs_info->sb->s_blocksize_bits) *
899 BTRFS_CRC32_SIZE;
900 printk(" will dup %d~%d of %d\n",
901 coff, clen, size);
902
903 memcpy(&new_key, &key, sizeof(new_key));
904 new_key.objectid = inode->i_ino;
905 new_key.offset = key.offset + destoff - off;
906
Zheng Yan31840ae2008-09-23 13:14:14 -0400907 ret = btrfs_insert_empty_item(trans, root, path,
Sage Weilc5c9cd42008-11-12 14:32:25 -0500908 &new_key, clen);
Zheng Yan31840ae2008-09-23 13:14:14 -0400909 if (ret)
910 goto out;
911
912 leaf = path->nodes[0];
913 slot = path->slots[0];
Sage Weilc5c9cd42008-11-12 14:32:25 -0500914 write_extent_buffer(leaf, buf + coff,
Zheng Yan31840ae2008-09-23 13:14:14 -0400915 btrfs_item_ptr_offset(leaf, slot),
Sage Weilc5c9cd42008-11-12 14:32:25 -0500916 clen);
Zheng Yan31840ae2008-09-23 13:14:14 -0400917 btrfs_mark_buffer_dirty(leaf);
918 }
919
Sage Weilc5c9cd42008-11-12 14:32:25 -0500920 next:
Zheng Yan31840ae2008-09-23 13:14:14 -0400921 btrfs_release_path(root, path);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400922 key.offset++;
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400923 }
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400924 ret = 0;
925out:
Yan Zhengae01a0a2008-08-04 23:23:47 -0400926 btrfs_release_path(root, path);
927 if (ret == 0) {
928 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
Sage Weilc5c9cd42008-11-12 14:32:25 -0500929 if (destoff + olen > inode->i_size)
930 btrfs_i_size_write(inode, destoff + olen);
Yan Zhengae01a0a2008-08-04 23:23:47 -0400931 BTRFS_I(inode)->flags = BTRFS_I(src)->flags;
932 ret = btrfs_update_inode(trans, root, inode);
933 }
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400934 btrfs_end_transaction(trans, root);
Sage Weilc5c9cd42008-11-12 14:32:25 -0500935 unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
Yan Zhengae01a0a2008-08-04 23:23:47 -0400936 if (ret)
937 vmtruncate(inode, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400938out_unlock:
939 mutex_unlock(&src->i_mutex);
940 mutex_unlock(&inode->i_mutex);
Yan Zhengae01a0a2008-08-04 23:23:47 -0400941 vfree(buf);
942 btrfs_free_path(path);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400943out_fput:
944 fput(src_file);
945 return ret;
946}
947
Sage Weilc5c9cd42008-11-12 14:32:25 -0500948long btrfs_ioctl_clone_range(struct file *file, unsigned long argptr)
949{
950 struct btrfs_ioctl_clone_range_args args;
951
952 if (copy_from_user(&args, (void *)argptr, sizeof(args)))
953 return -EFAULT;
954 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset,
955 args.src_length, args.dest_offset);
956}
957
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400958/*
959 * there are many ways the trans_start and trans_end ioctls can lead
960 * to deadlocks. They should only be used by applications that
961 * basically own the machine, and have a very in depth understanding
962 * of all the possible deadlocks and enospc problems.
963 */
964long btrfs_ioctl_trans_start(struct file *file)
965{
966 struct inode *inode = fdentry(file)->d_inode;
967 struct btrfs_root *root = BTRFS_I(inode)->root;
968 struct btrfs_trans_handle *trans;
969 int ret = 0;
970
Christoph Hellwigdf5b5522008-06-11 21:53:58 -0400971 if (!capable(CAP_SYS_ADMIN))
972 return -EPERM;
973
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400974 if (file->private_data) {
975 ret = -EINPROGRESS;
976 goto out;
977 }
Sage Weil9ca9ee02008-08-04 10:41:27 -0400978
Yan Zhengc146afa2008-11-12 14:34:12 -0500979 ret = mnt_want_write(file->f_path.mnt);
980 if (ret)
981 goto out;
982
Sage Weil9ca9ee02008-08-04 10:41:27 -0400983 mutex_lock(&root->fs_info->trans_mutex);
984 root->fs_info->open_ioctl_trans++;
985 mutex_unlock(&root->fs_info->trans_mutex);
986
987 trans = btrfs_start_ioctl_transaction(root, 0);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400988 if (trans)
989 file->private_data = trans;
990 else
991 ret = -ENOMEM;
992 /*printk(KERN_INFO "btrfs_ioctl_trans_start on %p\n", file);*/
993out:
Christoph Hellwigf46b5a62008-06-11 21:53:53 -0400994 return ret;
995}
996
997/*
998 * there are many ways the trans_start and trans_end ioctls can lead
999 * to deadlocks. They should only be used by applications that
1000 * basically own the machine, and have a very in depth understanding
1001 * of all the possible deadlocks and enospc problems.
1002 */
1003long btrfs_ioctl_trans_end(struct file *file)
1004{
1005 struct inode *inode = fdentry(file)->d_inode;
1006 struct btrfs_root *root = BTRFS_I(inode)->root;
1007 struct btrfs_trans_handle *trans;
1008 int ret = 0;
1009
Christoph Hellwigf46b5a62008-06-11 21:53:53 -04001010 trans = file->private_data;
1011 if (!trans) {
1012 ret = -EINVAL;
1013 goto out;
1014 }
1015 btrfs_end_transaction(trans, root);
Christoph Hellwigb2141072008-09-05 16:43:31 -04001016 file->private_data = NULL;
Sage Weil9ca9ee02008-08-04 10:41:27 -04001017
1018 mutex_lock(&root->fs_info->trans_mutex);
1019 root->fs_info->open_ioctl_trans--;
1020 mutex_unlock(&root->fs_info->trans_mutex);
1021
Christoph Hellwigf46b5a62008-06-11 21:53:53 -04001022out:
Christoph Hellwigf46b5a62008-06-11 21:53:53 -04001023 return ret;
1024}
1025
1026long btrfs_ioctl(struct file *file, unsigned int
1027 cmd, unsigned long arg)
1028{
1029 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
1030
1031 switch (cmd) {
1032 case BTRFS_IOC_SNAP_CREATE:
Christoph Hellwigcb8e7092008-10-09 13:39:39 -04001033 return btrfs_ioctl_snap_create(file, (void __user *)arg);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -04001034 case BTRFS_IOC_DEFRAG:
1035 return btrfs_ioctl_defrag(file);
1036 case BTRFS_IOC_RESIZE:
1037 return btrfs_ioctl_resize(root, (void __user *)arg);
1038 case BTRFS_IOC_ADD_DEV:
1039 return btrfs_ioctl_add_dev(root, (void __user *)arg);
1040 case BTRFS_IOC_RM_DEV:
1041 return btrfs_ioctl_rm_dev(root, (void __user *)arg);
1042 case BTRFS_IOC_BALANCE:
1043 return btrfs_balance(root->fs_info->dev_root);
1044 case BTRFS_IOC_CLONE:
Sage Weilc5c9cd42008-11-12 14:32:25 -05001045 return btrfs_ioctl_clone(file, arg, 0, 0, 0);
1046 case BTRFS_IOC_CLONE_RANGE:
1047 return btrfs_ioctl_clone_range(file, arg);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -04001048 case BTRFS_IOC_TRANS_START:
1049 return btrfs_ioctl_trans_start(file);
1050 case BTRFS_IOC_TRANS_END:
1051 return btrfs_ioctl_trans_end(file);
1052 case BTRFS_IOC_SYNC:
Chris Masonea8c2812008-08-04 23:17:27 -04001053 btrfs_start_delalloc_inodes(root);
Christoph Hellwigf46b5a62008-06-11 21:53:53 -04001054 btrfs_sync_fs(file->f_dentry->d_sb, 1);
1055 return 0;
1056 }
1057
1058 return -ENOTTY;
1059}