Btrfs: Change the super to point to a tree of trees to enable persistent snapshots

Signed-off-by: Chris Mason <chris.mason@oracle.com>
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile
index f853220..13ed631 100644
--- a/fs/btrfs/Makefile
+++ b/fs/btrfs/Makefile
@@ -2,7 +2,8 @@
 CC=gcc
 CFLAGS = -g -Wall
 headers = radix-tree.h ctree.h disk-io.h kerncompat.h print-tree.h list.h
-objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o
+objects = ctree.o disk-io.o radix-tree.o mkfs.o extent-tree.o print-tree.o \
+	  root-tree.o
 
 # if you don't have sparse installed, use ls instead
 CHECKFLAGS=-D__linux__ -Dlinux -D__STDC__ -Dunix -D__unix__ -Wbitwise \
diff --git a/fs/btrfs/TODO b/fs/btrfs/TODO
index ad29267..ff90e5e 100644
--- a/fs/btrfs/TODO
+++ b/fs/btrfs/TODO
@@ -5,6 +5,7 @@
 * make a real mkfs and superblock
 * Do checksumming
 * Define FS objects in terms of different item types
+* add inode tree
 * Add block mapping tree (simple dm layer)
 * Add simple tree locking (semaphore per tree)
 * Make allocator smarter
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5dfcc90..0968899 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -4,8 +4,13 @@
 #include "list.h"
 #include "kerncompat.h"
 
+#define BTRFS_MAGIC "_BtRfS_M"
 #define BTRFS_BLOCKSIZE 1024
 
+#define BTRFS_ROOT_TREE_OBJECTID 1
+#define BTRFS_EXTENT_TREE_OBJECTID 2
+#define BTRFS_FS_TREE_OBJECTID 3
+
 /*
  * the key defines the order in the tree, and so it also defines (optimal)
  * block layout.  objectid corresonds to the inode number.  The flags
@@ -36,7 +41,7 @@
  * every tree block (leaf or node) starts with this header.
  */
 struct btrfs_header {
-	__le64 fsid[2]; /* FS specific uuid */
+	u8 fsid[16]; /* FS specific uuid */
 	__le64 blocknr; /* which block this node is supposed to live in */
 	__le64 parentid; /* objectid of the tree root */
 	__le32 csum;
@@ -52,6 +57,14 @@
 
 struct btrfs_buffer;
 
+struct btrfs_root_item {
+	__le64 blocknr;
+	__le32 flags;
+	__le64 block_limit;
+	__le64 blocks_used;
+	__le32 refs;
+};
+
 /*
  * in ram representation of the tree.  extent_root is used for all allocations
  * and for the extent tree extent_root root.  current_insert is used
@@ -61,6 +74,7 @@
 	struct btrfs_buffer *node;
 	struct btrfs_buffer *commit_root;
 	struct btrfs_root *extent_root;
+	struct btrfs_root *tree_root;
 	struct btrfs_key current_insert;
 	struct btrfs_key last_insert;
 	int fp;
@@ -69,28 +83,25 @@
 	struct list_head trans;
 	struct list_head cache;
 	int cache_size;
+	int ref_cows;
+	struct btrfs_root_item root_item;
+	struct btrfs_key root_key;
 };
 
 /*
- * describes a tree on disk
- */
-struct btrfs_root_info {
-	u64 fsid[2]; /* FS specific uuid */
-	u64 blocknr; /* blocknr of this block */
-	u64 objectid; /* inode number of this root */
-	u64 tree_root; /* the tree root block */
-	u32 csum;
-	u32 ham;
-	u64 snapuuid[2]; /* root specific uuid */
-} __attribute__ ((__packed__));
-
-/*
  * the super block basically lists the main trees of the FS
  * it currently lacks any block count etc etc
  */
 struct btrfs_super_block {
-	struct btrfs_root_info root_info;
-	struct btrfs_root_info extent_info;
+	u8 fsid[16];    /* FS specific uuid */
+	__le64 blocknr; /* this block number */
+	__le32 csum;
+	__le64 magic;
+	__le16 blocksize;
+	__le64 generation;
+	__le64 root;
+	__le64 total_blocks;
+	__le64 blocks_used;
 } __attribute__ ((__packed__));
 
 /*
@@ -317,6 +328,79 @@
 	return (btrfs_header_level(&n->header) == 0);
 }
 
+static inline u64 btrfs_root_blocknr(struct btrfs_root_item *item)
+{
+	return le64_to_cpu(item->blocknr);
+}
+
+static inline void btrfs_set_root_blocknr(struct btrfs_root_item *item, u64 val)
+{
+	item->blocknr = cpu_to_le64(val);
+}
+
+static inline u32 btrfs_root_refs(struct btrfs_root_item *item)
+{
+	return le32_to_cpu(item->refs);
+}
+
+static inline void btrfs_set_root_refs(struct btrfs_root_item *item, u32 val)
+{
+	item->refs = cpu_to_le32(val);
+}
+
+static inline u64 btrfs_super_blocknr(struct btrfs_super_block *s)
+{
+	return le64_to_cpu(s->blocknr);
+}
+
+static inline void btrfs_set_super_blocknr(struct btrfs_super_block *s, u64 val)
+{
+	s->blocknr = cpu_to_le64(val);
+}
+
+static inline u64 btrfs_super_root(struct btrfs_super_block *s)
+{
+	return le64_to_cpu(s->root);
+}
+
+static inline void btrfs_set_super_root(struct btrfs_super_block *s, u64 val)
+{
+	s->root = cpu_to_le64(val);
+}
+
+static inline u64 btrfs_super_total_blocks(struct btrfs_super_block *s)
+{
+	return le64_to_cpu(s->total_blocks);
+}
+
+static inline void btrfs_set_super_total_blocks(struct btrfs_super_block *s,
+						u64 val)
+{
+	s->total_blocks = cpu_to_le64(val);
+}
+
+static inline u64 btrfs_super_blocks_used(struct btrfs_super_block *s)
+{
+	return le64_to_cpu(s->blocks_used);
+}
+
+static inline void btrfs_set_super_blocks_used(struct btrfs_super_block *s,
+						u64 val)
+{
+	s->blocks_used = cpu_to_le64(val);
+}
+
+static inline u16 btrfs_super_blocksize(struct btrfs_super_block *s)
+{
+	return le16_to_cpu(s->blocksize);
+}
+
+static inline void btrfs_set_super_blocksize(struct btrfs_super_block *s,
+						u16 val)
+{
+	s->blocksize = cpu_to_le16(val);
+}
+
 struct btrfs_buffer *btrfs_alloc_free_block(struct btrfs_root *root);
 int btrfs_inc_ref(struct btrfs_root *root, struct btrfs_buffer *buf);
 int btrfs_free_extent(struct btrfs_root *root, u64 blocknr, u64 num_blocks);
@@ -331,4 +415,11 @@
 int btrfs_leaf_free_space(struct btrfs_leaf *leaf);
 int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap);
 int btrfs_finish_extent_commit(struct btrfs_root *root);
+int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key);
+int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key,
+		      struct btrfs_root_item *item);
+int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key,
+		      struct btrfs_root_item *item);
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+			struct btrfs_root_item *item, struct btrfs_key *key);
 #endif
diff --git a/fs/btrfs/debug-tree.c b/fs/btrfs/debug-tree.c
index 6da0a7a..de45fb4 100644
--- a/fs/btrfs/debug-tree.c
+++ b/fs/btrfs/debug-tree.c
@@ -11,9 +11,11 @@
 	struct btrfs_root *root;
 	radix_tree_init();
 	root = open_ctree("dbfile", &super);
-	printf("root tree\n");
+	printf("fs tree\n");
 	btrfs_print_tree(root, root->node);
 	printf("map tree\n");
 	btrfs_print_tree(root->extent_root, root->extent_root->node);
+	printf("root tree\n");
+	btrfs_print_tree(root->tree_root, root->tree_root->node);
 	return 0;
 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c34c0c6..3d4bf68 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -154,41 +154,96 @@
 	return ret;
 }
 
+static int commit_extent_and_tree_roots(struct btrfs_root *tree_root,
+					struct btrfs_root *extent_root)
+{
+	int ret;
+	u64 old_extent_block;
+
+	while(1) {
+		old_extent_block = btrfs_root_blocknr(&extent_root->root_item);
+		if (old_extent_block == extent_root->node->blocknr)
+			break;
+		btrfs_set_root_blocknr(&extent_root->root_item,
+				       extent_root->node->blocknr);
+		ret = btrfs_update_root(tree_root,
+					&extent_root->root_key,
+					&extent_root->root_item);
+		BUG_ON(ret);
+	}
+	__commit_transaction(extent_root);
+	__commit_transaction(tree_root);
+	return 0;
+}
+
 int btrfs_commit_transaction(struct btrfs_root *root,
 			     struct btrfs_super_block *s)
 {
 	int ret = 0;
+	struct btrfs_buffer *snap = root->commit_root;
+	struct btrfs_key snap_key;
 
 	ret = __commit_transaction(root);
-	if (!ret && root != root->extent_root)
-		ret = __commit_transaction(root->extent_root);
 	BUG_ON(ret);
-	if (root->commit_root != root->node) {
-		struct btrfs_buffer *snap = root->commit_root;
-		root->commit_root = root->node;
-		root->node->count++;
-		ret = btrfs_drop_snapshot(root, snap);
-		BUG_ON(ret);
-		// btrfs_block_release(root, snap);
-	}
+
+	if (root->commit_root == root->node)
+		return 0;
+
+	memcpy(&snap_key, &root->root_key, sizeof(snap_key));
+	root->root_key.offset++;
+
+	btrfs_set_root_blocknr(&root->root_item, root->node->blocknr);
+	ret = btrfs_insert_root(root->tree_root, &root->root_key,
+				&root->root_item);
+	BUG_ON(ret);
+
+	ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root);
+	BUG_ON(ret);
+
         write_ctree_super(root, s);
-	btrfs_finish_extent_commit(root);
+	btrfs_finish_extent_commit(root->extent_root);
+	btrfs_finish_extent_commit(root->tree_root);
+
+	root->commit_root = root->node;
+	root->node->count++;
+	ret = btrfs_drop_snapshot(root, snap);
+	BUG_ON(ret);
+
+	ret = btrfs_del_root(root->tree_root, &snap_key);
+	BUG_ON(ret);
+
 	return ret;
 }
 
-static int __setup_root(struct btrfs_root *root, struct btrfs_root *extent_root,
-			struct btrfs_root_info *info, int fp)
+static int __setup_root(struct btrfs_root *root, u64 objectid, int fp)
 {
 	INIT_LIST_HEAD(&root->trans);
 	INIT_LIST_HEAD(&root->cache);
 	root->cache_size = 0;
 	root->fp = fp;
 	root->node = NULL;
-	root->extent_root = extent_root;
 	root->commit_root = NULL;
-	root->node = read_tree_block(root, info->tree_root);
 	memset(&root->current_insert, 0, sizeof(root->current_insert));
 	memset(&root->last_insert, 0, sizeof(root->last_insert));
+	memset(&root->root_key, 0, sizeof(root->root_key));
+	memset(&root->root_item, 0, sizeof(root->root_item));
+	return 0;
+}
+
+static int find_and_setup_root(struct btrfs_root *tree_root, u64 objectid,
+			struct btrfs_root *root, int fp)
+{
+	int ret;
+
+	__setup_root(root, objectid, fp);
+	ret = btrfs_find_last_root(tree_root, objectid,
+				   &root->root_item, &root->root_key);
+	BUG_ON(ret);
+
+	root->node = read_tree_block(root,
+				     btrfs_root_blocknr(&root->root_item));
+	root->ref_cows = 0;
+	BUG_ON(!root->node);
 	return 0;
 }
 
@@ -196,9 +251,19 @@
 {
 	struct btrfs_root *root = malloc(sizeof(struct btrfs_root));
 	struct btrfs_root *extent_root = malloc(sizeof(struct btrfs_root));
+	struct btrfs_root *tree_root = malloc(sizeof(struct btrfs_root));
 	int fp;
 	int ret;
 
+	root->extent_root = extent_root;
+	root->tree_root = tree_root;
+
+	extent_root->extent_root = extent_root;
+	extent_root->tree_root = tree_root;
+
+	tree_root->extent_root = extent_root;
+	tree_root->tree_root = tree_root;
+
 	fp = open(filename, O_CREAT | O_RDWR, 0600);
 	if (fp < 0) {
 		free(root);
@@ -208,11 +273,14 @@
 	INIT_RADIX_TREE(&root->pinned_radix, GFP_KERNEL);
 	INIT_RADIX_TREE(&extent_root->pinned_radix, GFP_KERNEL);
 	INIT_RADIX_TREE(&extent_root->cache_radix, GFP_KERNEL);
+	INIT_RADIX_TREE(&tree_root->pinned_radix, GFP_KERNEL);
+	INIT_RADIX_TREE(&tree_root->cache_radix, GFP_KERNEL);
+
 	ret = pread(fp, super, sizeof(struct btrfs_super_block),
 		     BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE));
-	if (ret == 0 || super->root_info.tree_root == 0) {
+	if (ret == 0 || btrfs_super_root(super) == 0) {
 		printf("making new FS!\n");
-		ret = mkfs(fp);
+		ret = mkfs(fp, 0, BTRFS_BLOCKSIZE);
 		if (ret)
 			return NULL;
 		ret = pread(fp, super, sizeof(struct btrfs_super_block),
@@ -221,24 +289,29 @@
 			return NULL;
 	}
 	BUG_ON(ret < 0);
-	__setup_root(root, extent_root, &super->root_info, fp);
-	__setup_root(extent_root, extent_root, &super->extent_info, fp);
+
+	__setup_root(tree_root, BTRFS_ROOT_TREE_OBJECTID, fp);
+	tree_root->node = read_tree_block(tree_root, btrfs_super_root(super));
+	BUG_ON(!tree_root->node);
+
+	ret = find_and_setup_root(tree_root, BTRFS_EXTENT_TREE_OBJECTID,
+				  extent_root, fp);
+	BUG_ON(ret);
+
+	ret = find_and_setup_root(tree_root, BTRFS_FS_TREE_OBJECTID,
+				  root, fp);
+	BUG_ON(ret);
+
 	root->commit_root = root->node;
 	root->node->count++;
+	root->ref_cows = 1;
 	return root;
 }
 
-static int __update_root(struct btrfs_root *root, struct btrfs_root_info *info)
-{
-	info->tree_root = root->node->blocknr;
-	return 0;
-}
-
 int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s)
 {
 	int ret;
-	__update_root(root, &s->root_info);
-	__update_root(root->extent_root, &s->extent_info);
+	btrfs_set_super_root(s, root->tree_root->node->blocknr);
 	ret = pwrite(root->fp, s, sizeof(*s),
 		     BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE));
 	if (ret != sizeof(*s)) {
@@ -260,19 +333,25 @@
 }
 int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s)
 {
+	int ret;
 	btrfs_commit_transaction(root, s);
-	__commit_transaction(root->extent_root);
+	ret = commit_extent_and_tree_roots(root->tree_root, root->extent_root);
+	BUG_ON(ret);
 	write_ctree_super(root, s);
 	drop_cache(root->extent_root);
+	drop_cache(root->tree_root);
 	drop_cache(root);
 	BUG_ON(!list_empty(&root->trans));
 	BUG_ON(!list_empty(&root->extent_root->trans));
+	BUG_ON(!list_empty(&root->tree_root->trans));
 
 	close(root->fp);
 	if (root->node)
 		btrfs_block_release(root, root->node);
 	if (root->extent_root->node)
 		btrfs_block_release(root->extent_root, root->extent_root->node);
+	if (root->tree_root->node)
+		btrfs_block_release(root->tree_root, root->tree_root->node);
 	btrfs_block_release(root, root->commit_root);
 	free(root);
 	printf("on close %d blocks are allocated\n", allocated_blocks);
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b391335..c22a61f 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -24,7 +24,8 @@
 int close_ctree(struct btrfs_root *root, struct btrfs_super_block *s);
 void btrfs_block_release(struct btrfs_root *root, struct btrfs_buffer *buf);
 int write_ctree_super(struct btrfs_root *root, struct btrfs_super_block *s);
-int mkfs(int fd);
+int mkfs(int fd, u64 num_blocks, u16 blocksize);
+
 
 #define BTRFS_SUPER_INFO_OFFSET(bs) (16 * (bs))
 
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a696953..21f39b40 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -82,7 +82,7 @@
 	u64 blocknr;
 	int i;
 
-	if (root == root->extent_root)
+	if (!root->ref_cows)
 		return 0;
 	if (btrfs_is_leaf(&buf->node))
 		return 0;
@@ -96,23 +96,22 @@
 
 int btrfs_finish_extent_commit(struct btrfs_root *root)
 {
-	struct btrfs_root *extent_root = root->extent_root;
 	unsigned long gang[8];
 	int ret;
 	int i;
 
 	while(1) {
-		ret = radix_tree_gang_lookup(&extent_root->pinned_radix,
+		ret = radix_tree_gang_lookup(&root->pinned_radix,
 						 (void **)gang, 0,
 						 ARRAY_SIZE(gang));
 		if (!ret)
 			break;
 		for (i = 0; i < ret; i++) {
-			radix_tree_delete(&extent_root->pinned_radix, gang[i]);
+			radix_tree_delete(&root->pinned_radix, gang[i]);
 		}
 	}
-	extent_root->last_insert.objectid = 0;
-	extent_root->last_insert.offset = 0;
+	root->last_insert.objectid = 0;
+	root->last_insert.offset = 0;
 	return 0;
 }
 
@@ -173,7 +172,7 @@
 	refs = btrfs_extent_refs(ei) - 1;
 	btrfs_set_extent_refs(ei, refs);
 	if (refs == 0) {
-		if (root == extent_root) {
+		if (!root->ref_cows) {
 			int err;
 			radix_tree_preload(GFP_KERNEL);
 			err = radix_tree_insert(&extent_root->pinned_radix,
@@ -513,7 +512,7 @@
  */
 int btrfs_drop_snapshot(struct btrfs_root *root, struct btrfs_buffer *snap)
 {
-	int ret = 0;;
+	int ret = 0;
 	int wret;
 	int level;
 	struct btrfs_path path;
diff --git a/fs/btrfs/mkfs.c b/fs/btrfs/mkfs.c
index fc19233..dd14ed4 100644
--- a/fs/btrfs/mkfs.c
+++ b/fs/btrfs/mkfs.c
@@ -10,6 +10,120 @@
 #include "ctree.h"
 #include "disk-io.h"
 
+int mkfs(int fd, u64 num_blocks, u16 blocksize)
+{
+	struct btrfs_super_block super;
+	struct btrfs_leaf empty_leaf;
+	struct btrfs_root_item root_item;
+	struct btrfs_item item;
+	struct btrfs_extent_item extent_item;
+	char *block;
+	int ret;
+	u16 itemoff;
+
+	btrfs_set_super_blocknr(&super, 16);
+	btrfs_set_super_root(&super, 17);
+	strcpy((char *)(&super.magic), BTRFS_MAGIC);
+	btrfs_set_super_blocksize(&super, blocksize);
+	btrfs_set_super_total_blocks(&super, num_blocks);
+	btrfs_set_super_blocks_used(&super, 0);
+
+	block = malloc(blocksize);
+	memset(block, 0, blocksize);
+	BUG_ON(sizeof(super) > blocksize);
+	memcpy(block, &super, sizeof(super));
+	ret = pwrite(fd, block, blocksize, BTRFS_SUPER_INFO_OFFSET(blocksize));
+	BUG_ON(ret != blocksize);
+
+	/* create the tree of root objects */
+	memset(&empty_leaf, 0, sizeof(empty_leaf));
+	btrfs_set_header_parentid(&empty_leaf.header, BTRFS_ROOT_TREE_OBJECTID);
+	btrfs_set_header_blocknr(&empty_leaf.header, 17);
+	btrfs_set_header_nritems(&empty_leaf.header, 2);
+
+	/* create the items for the root tree */
+	btrfs_set_root_blocknr(&root_item, 18);
+	btrfs_set_root_refs(&root_item, 1);
+	itemoff = LEAF_DATA_SIZE - sizeof(root_item);
+	btrfs_set_item_offset(&item, itemoff);
+	btrfs_set_item_size(&item, sizeof(root_item));
+	btrfs_set_key_objectid(&item.key, BTRFS_EXTENT_TREE_OBJECTID);
+	btrfs_set_key_offset(&item.key, 0);
+	btrfs_set_key_flags(&item.key, 0);
+	memcpy(empty_leaf.items, &item, sizeof(item));
+	memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item));
+
+	btrfs_set_root_blocknr(&root_item, 19);
+	itemoff = itemoff - sizeof(root_item);
+	btrfs_set_item_offset(&item, itemoff);
+	btrfs_set_key_objectid(&item.key, BTRFS_FS_TREE_OBJECTID);
+	memcpy(empty_leaf.items + 1, &item, sizeof(item));
+	memcpy(empty_leaf.data + itemoff, &root_item, sizeof(root_item));
+	ret = pwrite(fd, &empty_leaf, blocksize, 17 * blocksize);
+
+	/* create the items for the extent tree */
+	btrfs_set_header_parentid(&empty_leaf.header,
+				  BTRFS_EXTENT_TREE_OBJECTID);
+	btrfs_set_header_blocknr(&empty_leaf.header, 18);
+	btrfs_set_header_nritems(&empty_leaf.header, 4);
+
+	/* item1, reserve blocks 0-16 */
+	btrfs_set_key_objectid(&item.key, 0);
+	btrfs_set_key_offset(&item.key, 17);
+	btrfs_set_key_flags(&item.key, 0);
+	itemoff = LEAF_DATA_SIZE - sizeof(struct btrfs_extent_item);
+	btrfs_set_item_offset(&item, itemoff);
+	btrfs_set_item_size(&item, sizeof(struct btrfs_extent_item));
+	btrfs_set_extent_refs(&extent_item, 1);
+	btrfs_set_extent_owner(&extent_item, 0);
+	memcpy(empty_leaf.items, &item, sizeof(item));
+	memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item,
+		btrfs_item_size(&item));
+
+	/* item2, give block 17 to the root */
+	btrfs_set_key_objectid(&item.key, 17);
+	btrfs_set_key_offset(&item.key, 1);
+	itemoff = itemoff - sizeof(struct btrfs_extent_item);
+	btrfs_set_item_offset(&item, itemoff);
+	btrfs_set_extent_owner(&extent_item, BTRFS_ROOT_TREE_OBJECTID);
+	memcpy(empty_leaf.items + 1, &item, sizeof(item));
+	memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item,
+		btrfs_item_size(&item));
+
+	/* item3, give block 18 to the extent root */
+	btrfs_set_key_objectid(&item.key, 18);
+	btrfs_set_key_offset(&item.key, 1);
+	itemoff = itemoff - sizeof(struct btrfs_extent_item);
+	btrfs_set_item_offset(&item, itemoff);
+	btrfs_set_extent_owner(&extent_item, BTRFS_EXTENT_TREE_OBJECTID);
+	memcpy(empty_leaf.items + 2, &item, sizeof(item));
+	memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item,
+		btrfs_item_size(&item));
+
+	/* item4, give block 19 to the FS root */
+	btrfs_set_key_objectid(&item.key, 19);
+	btrfs_set_key_offset(&item.key, 1);
+	itemoff = itemoff - sizeof(struct btrfs_extent_item);
+	btrfs_set_item_offset(&item, itemoff);
+	btrfs_set_extent_owner(&extent_item, BTRFS_FS_TREE_OBJECTID);
+	memcpy(empty_leaf.items + 3, &item, sizeof(item));
+	memcpy(empty_leaf.data + btrfs_item_offset(&item), &extent_item,
+		btrfs_item_size(&item));
+	ret = pwrite(fd, &empty_leaf, blocksize, 18 * blocksize);
+	if (ret != sizeof(empty_leaf))
+		return -1;
+
+	/* finally create the FS root */
+	btrfs_set_header_parentid(&empty_leaf.header, BTRFS_FS_TREE_OBJECTID);
+	btrfs_set_header_blocknr(&empty_leaf.header, 19);
+	btrfs_set_header_nritems(&empty_leaf.header, 0);
+	ret = pwrite(fd, &empty_leaf, blocksize, 19 * blocksize);
+	if (ret != sizeof(empty_leaf))
+		return -1;
+	return 0;
+}
+
+#if 0
 int mkfs(int fd)
 {
 	struct btrfs_root_info info[2];
@@ -20,13 +134,14 @@
 
 	/* setup the super block area */
 	memset(info, 0, sizeof(info));
-	info[0].blocknr = 16;
-	info[0].objectid = 1;
-	info[0].tree_root = 17;
+	btrfs_set_root_blocknr(info, 16);
+	btrfs_set_root_objectid(info, 1);
+	btrfs_set_root_tree_root(info, 17);
 
-	info[1].blocknr = 16;
-	info[1].objectid = 2;
-	info[1].tree_root = 18;
+	btrfs_set_root_blocknr(info + 1, 16);
+	btrfs_set_root_objectid(info + 1, 2);
+	btrfs_set_root_tree_root(info + 1, 18);
+
 	ret = pwrite(fd, info, sizeof(info),
 		     BTRFS_SUPER_INFO_OFFSET(BTRFS_BLOCKSIZE));
 	if (ret != sizeof(info))
@@ -81,3 +196,4 @@
 		return -1;
 	return 0;
 }
+#endif
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index c95c856..64829b6 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -11,6 +11,7 @@
 	u32 nr = btrfs_header_nritems(&l->header);
 	struct btrfs_item *item;
 	struct btrfs_extent_item *ei;
+	struct btrfs_root_item *ri;
 	printf("leaf %Lu total ptrs %d free space %d\n",
 		btrfs_header_blocknr(&l->header), nr, btrfs_leaf_free_space(l));
 	fflush(stdout);
@@ -23,13 +24,15 @@
 			btrfs_key_offset(&item->key),
 			btrfs_item_offset(item),
 			btrfs_item_size(item));
-		fflush(stdout);
 		printf("\t\titem data %.*s\n", btrfs_item_size(item),
 			l->data + btrfs_item_offset(item));
 		ei = (struct btrfs_extent_item *)(l->data +
 						  btrfs_item_offset(item));
-		printf("\t\textent data refs %u owner %Lu\n", ei->refs,
-			ei->owner);
+		printf("\t\textent data refs %u owner %Lu\n",
+			btrfs_extent_refs(ei), btrfs_extent_owner(ei));
+		ri = (struct btrfs_root_item *)ei;
+		printf("\t\troot data blocknr %Lu refs %u\n",
+			btrfs_root_blocknr(ri), btrfs_root_refs(ri));
 		fflush(stdout);
 	}
 }
@@ -71,6 +74,5 @@
 		btrfs_print_tree(root, next_buf);
 		btrfs_block_release(root, next_buf);
 	}
-
 }
 
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
new file mode 100644
index 0000000..0ab90cf
--- /dev/null
+++ b/fs/btrfs/root-tree.c
@@ -0,0 +1,88 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include "kerncompat.h"
+#include "radix-tree.h"
+#include "ctree.h"
+#include "disk-io.h"
+#include "print-tree.h"
+
+int btrfs_find_last_root(struct btrfs_root *root, u64 objectid,
+			struct btrfs_root_item *item, struct btrfs_key *key)
+{
+	struct btrfs_path path;
+	struct btrfs_key search_key;
+	struct btrfs_leaf *l;
+	int ret;
+	int slot;
+
+	search_key.objectid = objectid;
+	search_key.flags = (u32)-1;
+	search_key.offset = (u32)-1;
+
+	btrfs_init_path(&path);
+	ret = btrfs_search_slot(root, &search_key, &path, 0, 0);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret == 0);
+	l = &path.nodes[0]->leaf;
+	BUG_ON(path.slots[0] == 0);
+	slot = path.slots[0] - 1;
+	if (btrfs_key_objectid(&l->items[slot].key) != objectid) {
+		ret = 1;
+		goto out;
+	}
+	memcpy(item, l->data + btrfs_item_offset(l->items + slot),
+		sizeof(*item));
+	btrfs_disk_key_to_cpu(key, &l->items[slot].key);
+	btrfs_release_path(root, &path);
+	ret = 0;
+out:
+	return ret;
+}
+
+int btrfs_update_root(struct btrfs_root *root, struct btrfs_key *key,
+		      struct btrfs_root_item *item)
+{
+	struct btrfs_path path;
+	struct btrfs_leaf *l;
+	int ret;
+	int slot;
+
+	btrfs_init_path(&path);
+	ret = btrfs_search_slot(root, key, &path, 0, 1);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret != 0);
+	l = &path.nodes[0]->leaf;
+	slot = path.slots[0];
+	memcpy(l->data + btrfs_item_offset(l->items + slot), item,
+		sizeof(*item));
+out:
+	btrfs_release_path(root, &path);
+	return ret;
+}
+
+int btrfs_insert_root(struct btrfs_root *root, struct btrfs_key *key,
+		      struct btrfs_root_item *item)
+{
+	int ret;
+	ret = btrfs_insert_item(root, key, item, sizeof(*item));
+	BUG_ON(ret);
+	return ret;
+}
+
+int btrfs_del_root(struct btrfs_root *root, struct btrfs_key *key)
+{
+	struct btrfs_path path;
+	int ret;
+
+	btrfs_init_path(&path);
+	ret = btrfs_search_slot(root, key, &path, -1, 1);
+	if (ret < 0)
+		goto out;
+	BUG_ON(ret != 0);
+	ret = btrfs_del_item(root, &path);
+out:
+	btrfs_release_path(root, &path);
+	return ret;
+}