fs/btrfs/zlib.c - kernel/msm - Gitiles

 /*
  * Copyright (C) 2008 Oracle.  All rights reserved.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public
  * License v2 as published by the Free Software Foundation.
  *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  *
  * You should have received a copy of the GNU General Public
  * License along with this program; if not, write to the
  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  * Boston, MA 021110-1307, USA.
  *
  * Based on jffs2 zlib code:
  * Copyright © 2001-2007 Red Hat, Inc.
  * Created by David Woodhouse <dwmw2@infradead.org>
  */

 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/zlib.h>
 #include <linux/zutil.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/sched.h>
 #include <linux/pagemap.h>
 #include <linux/bio.h>
 #include "compression.h"

 /* Plan: call deflate() with avail_in == *sourcelen,
 	avail_out = *dstlen - 12 and flush == Z_FINISH.
 	If it doesn't manage to finish,	call it again with
 	avail_in == 0 and avail_out set to the remaining 12
 	bytes for it to clean up.
    Q: Is 12 bytes sufficient?
 */
 #define STREAM_END_SPACE 12

 struct workspace {
 	z_stream inf_strm;
 	z_stream def_strm;
 	char *buf;
 	struct list_head list;
 };

 static LIST_HEAD(idle_workspace);
 static DEFINE_SPINLOCK(workspace_lock);
 static unsigned long num_workspace;
 static atomic_t alloc_workspace = ATOMIC_INIT(0);
 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);

 /*
  * this finds an available zlib workspace or allocates a new one
  * NULL or an ERR_PTR is returned if things go bad.
  */
 static struct workspace *find_zlib_workspace(void)
 {
 	struct workspace *workspace;
 	int ret;
 	int cpus = num_online_cpus();

 again:
 	spin_lock(&workspace_lock);
 	if (!list_empty(&idle_workspace)) {
 		workspace = list_entry(idle_workspace.next, struct workspace,
 				       list);
 		list_del(&workspace->list);
 		num_workspace--;
 		spin_unlock(&workspace_lock);
 		return workspace;

 	}
 	spin_unlock(&workspace_lock);
 	if (atomic_read(&alloc_workspace) > cpus) {
 		DEFINE_WAIT(wait);
 		prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
 		if (atomic_read(&alloc_workspace) > cpus)
 			schedule();
 		finish_wait(&workspace_wait, &wait);
 		goto again;
 	}
 	atomic_inc(&alloc_workspace);
 	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
 	if (!workspace) {
 		ret = -ENOMEM;
 		goto fail;
 	}

 	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
 	if (!workspace->def_strm.workspace) {
 		ret = -ENOMEM;
 		goto fail;
 	}
 	workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
 	if (!workspace->inf_strm.workspace) {
 		ret = -ENOMEM;
 		goto fail_inflate;
 	}
 	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
 	if (!workspace->buf) {
 		ret = -ENOMEM;
 		goto fail_kmalloc;
 	}
 	return workspace;

 fail_kmalloc:
 	vfree(workspace->inf_strm.workspace);
 fail_inflate:
 	vfree(workspace->def_strm.workspace);
 fail:
 	kfree(workspace);
 	atomic_dec(&alloc_workspace);
 	wake_up(&workspace_wait);
 	return ERR_PTR(ret);
 }

 /*
  * put a workspace struct back on the list or free it if we have enough
  * idle ones sitting around
  */
 static int free_workspace(struct workspace *workspace)
 {
 	spin_lock(&workspace_lock);
 	if (num_workspace < num_online_cpus()) {
 		list_add_tail(&workspace->list, &idle_workspace);
 		num_workspace++;
 		spin_unlock(&workspace_lock);
 		if (waitqueue_active(&workspace_wait))
 			wake_up(&workspace_wait);
 		return 0;
 	}
 	spin_unlock(&workspace_lock);
 	vfree(workspace->def_strm.workspace);
 	vfree(workspace->inf_strm.workspace);
 	kfree(workspace->buf);
 	kfree(workspace);

 	atomic_dec(&alloc_workspace);
 	if (waitqueue_active(&workspace_wait))
 		wake_up(&workspace_wait);
 	return 0;
 }

 /*
  * cleanup function for module exit
  */
 static void free_workspaces(void)
 {
 	struct workspace *workspace;
 	while (!list_empty(&idle_workspace)) {
 		workspace = list_entry(idle_workspace.next, struct workspace,
 				       list);
 		list_del(&workspace->list);
 		vfree(workspace->def_strm.workspace);
 		vfree(workspace->inf_strm.workspace);
 		kfree(workspace->buf);
 		kfree(workspace);
 		atomic_dec(&alloc_workspace);
 	}
 }

 /*
  * given an address space and start/len, compress the bytes.
  *
  * pages are allocated to hold the compressed result and stored
  * in 'pages'
  *
  * out_pages is used to return the number of pages allocated.  There
  * may be pages allocated even if we return an error
  *
  * total_in is used to return the number of bytes actually read.  It
  * may be smaller then len if we had to exit early because we
  * ran out of room in the pages array or because we cross the
  * max_out threshold.
  *
  * total_out is used to return the total number of compressed bytes
  *
  * max_out tells us the max number of bytes that we're allowed to
  * stuff into pages
  */
 int btrfs_zlib_compress_pages(struct address_space *mapping,
 			      u64 start, unsigned long len,
 			      struct page **pages,
 			      unsigned long nr_dest_pages,
 			      unsigned long *out_pages,
 			      unsigned long *total_in,
 			      unsigned long *total_out,
 			      unsigned long max_out)
 {
 	int ret;
 	struct workspace *workspace;
 	char *data_in;
 	char *cpage_out;
 	int nr_pages = 0;
 	struct page *in_page = NULL;
 	struct page *out_page = NULL;
 	int out_written = 0;
 	int in_read = 0;
 	unsigned long bytes_left;

 	*out_pages = 0;
 	*total_out = 0;
 	*total_in = 0;

 	workspace = find_zlib_workspace();
 	if (IS_ERR(workspace))
 		return -1;

 	if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
 		printk(KERN_WARNING "deflateInit failed\n");
 		ret = -1;
 		goto out;
 	}

 	workspace->def_strm.total_in = 0;
 	workspace->def_strm.total_out = 0;

 	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
 	data_in = kmap(in_page);

 	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 	cpage_out = kmap(out_page);
 	pages[0] = out_page;
 	nr_pages = 1;

 	workspace->def_strm.next_in = data_in;
 	workspace->def_strm.next_out = cpage_out;
 	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
 	workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);

 	out_written = 0;
 	in_read = 0;

 	while (workspace->def_strm.total_in < len) {
 		ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
 		if (ret != Z_OK) {
 			printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
 			       ret);
 			zlib_deflateEnd(&workspace->def_strm);
 			ret = -1;
 			goto out;
 		}

 		/* we're making it bigger, give up */
 		if (workspace->def_strm.total_in > 8192 &&
 		    workspace->def_strm.total_in <
 		    workspace->def_strm.total_out) {
 			ret = -1;
 			goto out;
 		}
 		/* we need another page for writing out.  Test this
 		 * before the total_in so we will pull in a new page for
 		 * the stream end if required
 		 */
 		if (workspace->def_strm.avail_out == 0) {
 			kunmap(out_page);
 			if (nr_pages == nr_dest_pages) {
 				out_page = NULL;
 				ret = -1;
 				goto out;
 			}
 			out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
 			cpage_out = kmap(out_page);
 			pages[nr_pages] = out_page;
 			nr_pages++;
 			workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
 			workspace->def_strm.next_out = cpage_out;
 		}
 		/* we're all done */
 		if (workspace->def_strm.total_in >= len)
 			break;

 		/* we've read in a full page, get a new one */
 		if (workspace->def_strm.avail_in == 0) {
 			if (workspace->def_strm.total_out > max_out)
 				break;

 			bytes_left = len - workspace->def_strm.total_in;
 			kunmap(in_page);
 			page_cache_release(in_page);

 			start += PAGE_CACHE_SIZE;
 			in_page = find_get_page(mapping,
 						start >> PAGE_CACHE_SHIFT);
 			data_in = kmap(in_page);
 			workspace->def_strm.avail_in = min(bytes_left,
 							   PAGE_CACHE_SIZE);
 			workspace->def_strm.next_in = data_in;
 		}
 	}
 	workspace->def_strm.avail_in = 0;
 	ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
 	zlib_deflateEnd(&workspace->def_strm);

 	if (ret != Z_STREAM_END) {
 		ret = -1;
 		goto out;
 	}

 	if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
 		ret = -1;
 		goto out;
 	}

 	ret = 0;
 	*total_out = workspace->def_strm.total_out;
 	*total_in = workspace->def_strm.total_in;
 out:
 	*out_pages = nr_pages;
 	if (out_page)
 		kunmap(out_page);

 	if (in_page) {
 		kunmap(in_page);
 		page_cache_release(in_page);
 	}
 	free_workspace(workspace);
 	return ret;
 }

 /*
  * pages_in is an array of pages with compressed data.
  *
  * disk_start is the starting logical offset of this array in the file
  *
  * bvec is a bio_vec of pages from the file that we want to decompress into
  *
  * vcnt is the count of pages in the biovec
  *
  * srclen is the number of bytes in pages_in
  *
  * The basic idea is that we have a bio that was created by readpages.
  * The pages in the bio are for the uncompressed data, and they may not
  * be contiguous.  They all correspond to the range of bytes covered by
  * the compressed extent.
  */
 int btrfs_zlib_decompress_biovec(struct page **pages_in,
 			      u64 disk_start,
 			      struct bio_vec *bvec,
 			      int vcnt,
 			      size_t srclen)
 {
 	int ret = 0;
 	int wbits = MAX_WBITS;
 	struct workspace *workspace;
 	char *data_in;
 	size_t total_out = 0;
 	unsigned long page_bytes_left;
 	unsigned long page_in_index = 0;
 	unsigned long page_out_index = 0;
 	struct page *page_out;
 	unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
 					PAGE_CACHE_SIZE;
 	unsigned long buf_start;
 	unsigned long buf_offset;
 	unsigned long bytes;
 	unsigned long working_bytes;
 	unsigned long pg_offset;
 	unsigned long start_byte;
 	unsigned long current_buf_start;
 	char *kaddr;

 	workspace = find_zlib_workspace();
 	if (IS_ERR(workspace))
 		return -ENOMEM;

 	data_in = kmap(pages_in[page_in_index]);
 	workspace->inf_strm.next_in = data_in;
 	workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
 	workspace->inf_strm.total_in = 0;

 	workspace->inf_strm.total_out = 0;
 	workspace->inf_strm.next_out = workspace->buf;
 	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 	page_out = bvec[page_out_index].bv_page;
 	page_bytes_left = PAGE_CACHE_SIZE;
 	pg_offset = 0;

 	/* If it's deflate, and it's got no preset dictionary, then
 	   we can tell zlib to skip the adler32 check. */
 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {

 		wbits = -((data_in[0] >> 4) + 8);
 		workspace->inf_strm.next_in += 2;
 		workspace->inf_strm.avail_in -= 2;
 	}

 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 		printk(KERN_WARNING "inflateInit failed\n");
 		ret = -1;
 		goto out;
 	}
 	while (workspace->inf_strm.total_in < srclen) {
 		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
 		if (ret != Z_OK && ret != Z_STREAM_END)
 			break;
 		/*
 		 * buf start is the byte offset we're of the start of
 		 * our workspace buffer
 		 */
 		buf_start = total_out;

 		/* total_out is the last byte of the workspace buffer */
 		total_out = workspace->inf_strm.total_out;

 		working_bytes = total_out - buf_start;

 		/*
 		 * start byte is the first byte of the page we're currently
 		 * copying into relative to the start of the compressed data.
 		 */
 		start_byte = page_offset(page_out) - disk_start;

 		if (working_bytes == 0) {
 			/* we didn't make progress in this inflate
 			 * call, we're done
 			 */
 			if (ret != Z_STREAM_END)
 				ret = -1;
 			break;
 		}

 		/* we haven't yet hit data corresponding to this page */
 		if (total_out <= start_byte)
 			goto next;

 		/*
 		 * the start of the data we care about is offset into
 		 * the middle of our working buffer
 		 */
 		if (total_out > start_byte && buf_start < start_byte) {
 			buf_offset = start_byte - buf_start;
 			working_bytes -= buf_offset;
 		} else {
 			buf_offset = 0;
 		}
 		current_buf_start = buf_start;

 		/* copy bytes from the working buffer into the pages */
 		while (working_bytes > 0) {
 			bytes = min(PAGE_CACHE_SIZE - pg_offset,
 				    PAGE_CACHE_SIZE - buf_offset);
 			bytes = min(bytes, working_bytes);
 			kaddr = kmap_atomic(page_out, KM_USER0);
 			memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
 			       bytes);
 			kunmap_atomic(kaddr, KM_USER0);
 			flush_dcache_page(page_out);

 			pg_offset += bytes;
 			page_bytes_left -= bytes;
 			buf_offset += bytes;
 			working_bytes -= bytes;
 			current_buf_start += bytes;

 			/* check if we need to pick another page */
 			if (page_bytes_left == 0) {
 				page_out_index++;
 				if (page_out_index >= vcnt) {
 					ret = 0;
 					goto done;
 				}

 				page_out = bvec[page_out_index].bv_page;
 				pg_offset = 0;
 				page_bytes_left = PAGE_CACHE_SIZE;
 				start_byte = page_offset(page_out) - disk_start;

 				/*
 				 * make sure our new page is covered by this
 				 * working buffer
 				 */
 				if (total_out <= start_byte)
 					goto next;

 				/* the next page in the biovec might not
 				 * be adjacent to the last page, but it
 				 * might still be found inside this working
 				 * buffer.  bump our offset pointer
 				 */
 				if (total_out > start_byte &&
 				    current_buf_start < start_byte) {
 					buf_offset = start_byte - buf_start;
 					working_bytes = total_out - start_byte;
 					current_buf_start = buf_start +
 						buf_offset;
 				}
 			}
 		}
 next:
 		workspace->inf_strm.next_out = workspace->buf;
 		workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;

 		if (workspace->inf_strm.avail_in == 0) {
 			unsigned long tmp;
 			kunmap(pages_in[page_in_index]);
 			page_in_index++;
 			if (page_in_index >= total_pages_in) {
 				data_in = NULL;
 				break;
 			}
 			data_in = kmap(pages_in[page_in_index]);
 			workspace->inf_strm.next_in = data_in;
 			tmp = srclen - workspace->inf_strm.total_in;
 			workspace->inf_strm.avail_in = min(tmp,
 							   PAGE_CACHE_SIZE);
 		}
 	}
 	if (ret != Z_STREAM_END)
 		ret = -1;
 	else
 		ret = 0;
 done:
 	zlib_inflateEnd(&workspace->inf_strm);
 	if (data_in)
 		kunmap(pages_in[page_in_index]);
 out:
 	free_workspace(workspace);
 	return ret;
 }

 /*
  * a less complex decompression routine.  Our compressed data fits in a
  * single page, and we want to read a single page out of it.
  * start_byte tells us the offset into the compressed data we're interested in
  */
 int btrfs_zlib_decompress(unsigned char *data_in,
 			  struct page *dest_page,
 			  unsigned long start_byte,
 			  size_t srclen, size_t destlen)
 {
 	int ret = 0;
 	int wbits = MAX_WBITS;
 	struct workspace *workspace;
 	unsigned long bytes_left = destlen;
 	unsigned long total_out = 0;
 	char *kaddr;

 	if (destlen > PAGE_CACHE_SIZE)
 		return -ENOMEM;

 	workspace = find_zlib_workspace();
 	if (IS_ERR(workspace))
 		return -ENOMEM;

 	workspace->inf_strm.next_in = data_in;
 	workspace->inf_strm.avail_in = srclen;
 	workspace->inf_strm.total_in = 0;

 	workspace->inf_strm.next_out = workspace->buf;
 	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 	workspace->inf_strm.total_out = 0;
 	/* If it's deflate, and it's got no preset dictionary, then
 	   we can tell zlib to skip the adler32 check. */
 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {

 		wbits = -((data_in[0] >> 4) + 8);
 		workspace->inf_strm.next_in += 2;
 		workspace->inf_strm.avail_in -= 2;
 	}

 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
 		printk(KERN_WARNING "inflateInit failed\n");
 		ret = -1;
 		goto out;
 	}

 	while (bytes_left > 0) {
 		unsigned long buf_start;
 		unsigned long buf_offset;
 		unsigned long bytes;
 		unsigned long pg_offset = 0;

 		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
 		if (ret != Z_OK && ret != Z_STREAM_END)
 			break;

 		buf_start = total_out;
 		total_out = workspace->inf_strm.total_out;

 		if (total_out == buf_start) {
 			ret = -1;
 			break;
 		}

 		if (total_out <= start_byte)
 			goto next;

 		if (total_out > start_byte && buf_start < start_byte)
 			buf_offset = start_byte - buf_start;
 		else
 			buf_offset = 0;

 		bytes = min(PAGE_CACHE_SIZE - pg_offset,
 			    PAGE_CACHE_SIZE - buf_offset);
 		bytes = min(bytes, bytes_left);

 		kaddr = kmap_atomic(dest_page, KM_USER0);
 		memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
 		kunmap_atomic(kaddr, KM_USER0);

 		pg_offset += bytes;
 		bytes_left -= bytes;
 next:
 		workspace->inf_strm.next_out = workspace->buf;
 		workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
 	}

 	if (ret != Z_STREAM_END && bytes_left != 0)
 		ret = -1;
 	else
 		ret = 0;

 	zlib_inflateEnd(&workspace->inf_strm);
 out:
 	free_workspace(workspace);
 	return ret;
 }

 void btrfs_zlib_exit(void)
 {
     free_workspaces();
 }
	/*
	* Copyright (C) 2008 Oracle. All rights reserved.
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public
	* License v2 as published by the Free Software Foundation.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	* General Public License for more details.
	*
	* You should have received a copy of the GNU General Public
	* License along with this program; if not, write to the
	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	* Boston, MA 021110-1307, USA.
	*
	* Based on jffs2 zlib code:
	* Copyright © 2001-2007 Red Hat, Inc.
	* Created by David Woodhouse <dwmw2@infradead.org>
	*/

	#include <linux/kernel.h>
	#include <linux/slab.h>
	#include <linux/zlib.h>
	#include <linux/zutil.h>
	#include <linux/vmalloc.h>
	#include <linux/init.h>
	#include <linux/err.h>
	#include <linux/sched.h>
	#include <linux/pagemap.h>
	#include <linux/bio.h>
	#include "compression.h"

	/* Plan: call deflate() with avail_in == *sourcelen,
	avail_out = *dstlen - 12 and flush == Z_FINISH.
	If it doesn't manage to finish, call it again with
	avail_in == 0 and avail_out set to the remaining 12
	bytes for it to clean up.
	Q: Is 12 bytes sufficient?
	*/
	#define STREAM_END_SPACE 12

	struct workspace {
	z_stream inf_strm;
	z_stream def_strm;
	char *buf;
	struct list_head list;
	};

	static LIST_HEAD(idle_workspace);
	static DEFINE_SPINLOCK(workspace_lock);
	static unsigned long num_workspace;
	static atomic_t alloc_workspace = ATOMIC_INIT(0);
	static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);

	/*
	* this finds an available zlib workspace or allocates a new one
	* NULL or an ERR_PTR is returned if things go bad.
	*/
	static struct workspace *find_zlib_workspace(void)
	{
	struct workspace *workspace;
	int ret;
	int cpus = num_online_cpus();

	again:
	spin_lock(&workspace_lock);
	if (!list_empty(&idle_workspace)) {
	workspace = list_entry(idle_workspace.next, struct workspace,
	list);
	list_del(&workspace->list);
	num_workspace--;
	spin_unlock(&workspace_lock);
	return workspace;

	}
	spin_unlock(&workspace_lock);
	if (atomic_read(&alloc_workspace) > cpus) {
	DEFINE_WAIT(wait);
	prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
	if (atomic_read(&alloc_workspace) > cpus)
	schedule();
	finish_wait(&workspace_wait, &wait);
	goto again;
	}
	atomic_inc(&alloc_workspace);
	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
	if (!workspace) {
	ret = -ENOMEM;
	goto fail;
	}

	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
	if (!workspace->def_strm.workspace) {
	ret = -ENOMEM;
	goto fail;
	}
	workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
	if (!workspace->inf_strm.workspace) {
	ret = -ENOMEM;
	goto fail_inflate;
	}
	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
	if (!workspace->buf) {
	ret = -ENOMEM;
	goto fail_kmalloc;
	}
	return workspace;

	fail_kmalloc:
	vfree(workspace->inf_strm.workspace);
	fail_inflate:
	vfree(workspace->def_strm.workspace);
	fail:
	kfree(workspace);
	atomic_dec(&alloc_workspace);
	wake_up(&workspace_wait);
	return ERR_PTR(ret);
	}

	/*
	* put a workspace struct back on the list or free it if we have enough
	* idle ones sitting around
	*/
	static int free_workspace(struct workspace *workspace)
	{
	spin_lock(&workspace_lock);
	if (num_workspace < num_online_cpus()) {
	list_add_tail(&workspace->list, &idle_workspace);
	num_workspace++;
	spin_unlock(&workspace_lock);
	if (waitqueue_active(&workspace_wait))
	wake_up(&workspace_wait);
	return 0;
	}
	spin_unlock(&workspace_lock);
	vfree(workspace->def_strm.workspace);
	vfree(workspace->inf_strm.workspace);
	kfree(workspace->buf);
	kfree(workspace);

	atomic_dec(&alloc_workspace);
	if (waitqueue_active(&workspace_wait))
	wake_up(&workspace_wait);
	return 0;
	}

	/*
	* cleanup function for module exit
	*/
	static void free_workspaces(void)
	{
	struct workspace *workspace;
	while (!list_empty(&idle_workspace)) {
	workspace = list_entry(idle_workspace.next, struct workspace,
	list);
	list_del(&workspace->list);
	vfree(workspace->def_strm.workspace);
	vfree(workspace->inf_strm.workspace);
	kfree(workspace->buf);
	kfree(workspace);
	atomic_dec(&alloc_workspace);
	}
	}

	/*
	* given an address space and start/len, compress the bytes.
	*
	* pages are allocated to hold the compressed result and stored
	* in 'pages'
	*
	* out_pages is used to return the number of pages allocated. There
	* may be pages allocated even if we return an error
	*
	* total_in is used to return the number of bytes actually read. It
	* may be smaller then len if we had to exit early because we
	* ran out of room in the pages array or because we cross the
	* max_out threshold.
	*
	* total_out is used to return the total number of compressed bytes
	*
	* max_out tells us the max number of bytes that we're allowed to
	* stuff into pages
	*/
	int btrfs_zlib_compress_pages(struct address_space *mapping,
	u64 start, unsigned long len,
	struct page **pages,
	unsigned long nr_dest_pages,
	unsigned long *out_pages,
	unsigned long *total_in,
	unsigned long *total_out,
	unsigned long max_out)
	{
	int ret;
	struct workspace *workspace;
	char *data_in;
	char *cpage_out;
	int nr_pages = 0;
	struct page *in_page = NULL;
	struct page *out_page = NULL;
	int out_written = 0;
	int in_read = 0;
	unsigned long bytes_left;

	*out_pages = 0;
	*total_out = 0;
	*total_in = 0;

	workspace = find_zlib_workspace();
	if (IS_ERR(workspace))
	return -1;

	if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
	printk(KERN_WARNING "deflateInit failed\n");
	ret = -1;
	goto out;
	}

	workspace->def_strm.total_in = 0;
	workspace->def_strm.total_out = 0;

	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
	data_in = kmap(in_page);

	out_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
	cpage_out = kmap(out_page);
	pages[0] = out_page;
	nr_pages = 1;

	workspace->def_strm.next_in = data_in;
	workspace->def_strm.next_out = cpage_out;
	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
	workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);

	out_written = 0;
	in_read = 0;

	while (workspace->def_strm.total_in < len) {
	ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
	if (ret != Z_OK) {
	printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
	ret);
	zlib_deflateEnd(&workspace->def_strm);
	ret = -1;
	goto out;
	}

	/* we're making it bigger, give up */
	if (workspace->def_strm.total_in > 8192 &&
	workspace->def_strm.total_in <
	workspace->def_strm.total_out) {
	ret = -1;
	goto out;
	}
	/* we need another page for writing out. Test this
	* before the total_in so we will pull in a new page for
	* the stream end if required
	*/
	if (workspace->def_strm.avail_out == 0) {
	kunmap(out_page);
	if (nr_pages == nr_dest_pages) {
	out_page = NULL;
	ret = -1;
	goto out;
	}
	out_page = alloc_page(GFP_NOFS \| __GFP_HIGHMEM);
	cpage_out = kmap(out_page);
	pages[nr_pages] = out_page;
	nr_pages++;
	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
	workspace->def_strm.next_out = cpage_out;
	}
	/* we're all done */
	if (workspace->def_strm.total_in >= len)
	break;

	/* we've read in a full page, get a new one */
	if (workspace->def_strm.avail_in == 0) {
	if (workspace->def_strm.total_out > max_out)
	break;

	bytes_left = len - workspace->def_strm.total_in;
	kunmap(in_page);
	page_cache_release(in_page);

	start += PAGE_CACHE_SIZE;
	in_page = find_get_page(mapping,
	start >> PAGE_CACHE_SHIFT);
	data_in = kmap(in_page);
	workspace->def_strm.avail_in = min(bytes_left,
	PAGE_CACHE_SIZE);
	workspace->def_strm.next_in = data_in;
	}
	}
	workspace->def_strm.avail_in = 0;
	ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
	zlib_deflateEnd(&workspace->def_strm);

	if (ret != Z_STREAM_END) {
	ret = -1;
	goto out;
	}

	if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
	ret = -1;
	goto out;
	}

	ret = 0;
	*total_out = workspace->def_strm.total_out;
	*total_in = workspace->def_strm.total_in;
	out:
	*out_pages = nr_pages;
	if (out_page)
	kunmap(out_page);

	if (in_page) {
	kunmap(in_page);
	page_cache_release(in_page);
	}
	free_workspace(workspace);
	return ret;
	}

	/*
	* pages_in is an array of pages with compressed data.
	*
	* disk_start is the starting logical offset of this array in the file
	*
	* bvec is a bio_vec of pages from the file that we want to decompress into
	*
	* vcnt is the count of pages in the biovec
	*
	* srclen is the number of bytes in pages_in
	*
	* The basic idea is that we have a bio that was created by readpages.
	* The pages in the bio are for the uncompressed data, and they may not
	* be contiguous. They all correspond to the range of bytes covered by
	* the compressed extent.
	*/
	int btrfs_zlib_decompress_biovec(struct page **pages_in,
	u64 disk_start,
	struct bio_vec *bvec,
	int vcnt,
	size_t srclen)
	{
	int ret = 0;
	int wbits = MAX_WBITS;
	struct workspace *workspace;
	char *data_in;
	size_t total_out = 0;
	unsigned long page_bytes_left;
	unsigned long page_in_index = 0;
	unsigned long page_out_index = 0;
	struct page *page_out;
	unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
	PAGE_CACHE_SIZE;
	unsigned long buf_start;
	unsigned long buf_offset;
	unsigned long bytes;
	unsigned long working_bytes;
	unsigned long pg_offset;
	unsigned long start_byte;
	unsigned long current_buf_start;
	char *kaddr;

	workspace = find_zlib_workspace();
	if (IS_ERR(workspace))
	return -ENOMEM;

	data_in = kmap(pages_in[page_in_index]);
	workspace->inf_strm.next_in = data_in;
	workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
	workspace->inf_strm.total_in = 0;

	workspace->inf_strm.total_out = 0;
	workspace->inf_strm.next_out = workspace->buf;
	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
	page_out = bvec[page_out_index].bv_page;
	page_bytes_left = PAGE_CACHE_SIZE;
	pg_offset = 0;

	/* If it's deflate, and it's got no preset dictionary, then
	we can tell zlib to skip the adler32 check. */
	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
	((data_in[0] & 0x0f) == Z_DEFLATED) &&
	!(((data_in[0]<<8) + data_in[1]) % 31)) {

	wbits = -((data_in[0] >> 4) + 8);
	workspace->inf_strm.next_in += 2;
	workspace->inf_strm.avail_in -= 2;
	}

	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
	printk(KERN_WARNING "inflateInit failed\n");
	ret = -1;
	goto out;
	}
	while (workspace->inf_strm.total_in < srclen) {
	ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
	if (ret != Z_OK && ret != Z_STREAM_END)
	break;
	/*
	* buf start is the byte offset we're of the start of
	* our workspace buffer
	*/
	buf_start = total_out;

	/* total_out is the last byte of the workspace buffer */
	total_out = workspace->inf_strm.total_out;

	working_bytes = total_out - buf_start;

	/*
	* start byte is the first byte of the page we're currently
	* copying into relative to the start of the compressed data.
	*/
	start_byte = page_offset(page_out) - disk_start;

	if (working_bytes == 0) {
	/* we didn't make progress in this inflate
	* call, we're done
	*/
	if (ret != Z_STREAM_END)
	ret = -1;
	break;
	}

	/* we haven't yet hit data corresponding to this page */
	if (total_out <= start_byte)
	goto next;

	/*
	* the start of the data we care about is offset into
	* the middle of our working buffer
	*/
	if (total_out > start_byte && buf_start < start_byte) {
	buf_offset = start_byte - buf_start;
	working_bytes -= buf_offset;
	} else {
	buf_offset = 0;
	}
	current_buf_start = buf_start;

	/* copy bytes from the working buffer into the pages */
	while (working_bytes > 0) {
	bytes = min(PAGE_CACHE_SIZE - pg_offset,
	PAGE_CACHE_SIZE - buf_offset);
	bytes = min(bytes, working_bytes);
	kaddr = kmap_atomic(page_out, KM_USER0);
	memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
	bytes);
	kunmap_atomic(kaddr, KM_USER0);
	flush_dcache_page(page_out);

	pg_offset += bytes;
	page_bytes_left -= bytes;
	buf_offset += bytes;
	working_bytes -= bytes;
	current_buf_start += bytes;

	/* check if we need to pick another page */
	if (page_bytes_left == 0) {
	page_out_index++;
	if (page_out_index >= vcnt) {
	ret = 0;
	goto done;
	}

	page_out = bvec[page_out_index].bv_page;
	pg_offset = 0;
	page_bytes_left = PAGE_CACHE_SIZE;
	start_byte = page_offset(page_out) - disk_start;

	/*
	* make sure our new page is covered by this
	* working buffer
	*/
	if (total_out <= start_byte)
	goto next;

	/* the next page in the biovec might not
	* be adjacent to the last page, but it
	* might still be found inside this working
	* buffer. bump our offset pointer
	*/
	if (total_out > start_byte &&
	current_buf_start < start_byte) {
	buf_offset = start_byte - buf_start;
	working_bytes = total_out - start_byte;
	current_buf_start = buf_start +
	buf_offset;
	}
	}
	}
	next:
	workspace->inf_strm.next_out = workspace->buf;
	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;

	if (workspace->inf_strm.avail_in == 0) {
	unsigned long tmp;
	kunmap(pages_in[page_in_index]);
	page_in_index++;
	if (page_in_index >= total_pages_in) {
	data_in = NULL;
	break;
	}
	data_in = kmap(pages_in[page_in_index]);
	workspace->inf_strm.next_in = data_in;
	tmp = srclen - workspace->inf_strm.total_in;
	workspace->inf_strm.avail_in = min(tmp,
	PAGE_CACHE_SIZE);
	}
	}
	if (ret != Z_STREAM_END)
	ret = -1;
	else
	ret = 0;
	done:
	zlib_inflateEnd(&workspace->inf_strm);
	if (data_in)
	kunmap(pages_in[page_in_index]);
	out:
	free_workspace(workspace);
	return ret;
	}

	/*
	* a less complex decompression routine. Our compressed data fits in a
	* single page, and we want to read a single page out of it.
	* start_byte tells us the offset into the compressed data we're interested in
	*/
	int btrfs_zlib_decompress(unsigned char *data_in,
	struct page *dest_page,
	unsigned long start_byte,
	size_t srclen, size_t destlen)
	{
	int ret = 0;
	int wbits = MAX_WBITS;
	struct workspace *workspace;
	unsigned long bytes_left = destlen;
	unsigned long total_out = 0;
	char *kaddr;

	if (destlen > PAGE_CACHE_SIZE)
	return -ENOMEM;

	workspace = find_zlib_workspace();
	if (IS_ERR(workspace))
	return -ENOMEM;

	workspace->inf_strm.next_in = data_in;
	workspace->inf_strm.avail_in = srclen;
	workspace->inf_strm.total_in = 0;

	workspace->inf_strm.next_out = workspace->buf;
	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
	workspace->inf_strm.total_out = 0;
	/* If it's deflate, and it's got no preset dictionary, then
	we can tell zlib to skip the adler32 check. */
	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
	((data_in[0] & 0x0f) == Z_DEFLATED) &&
	!(((data_in[0]<<8) + data_in[1]) % 31)) {

	wbits = -((data_in[0] >> 4) + 8);
	workspace->inf_strm.next_in += 2;
	workspace->inf_strm.avail_in -= 2;
	}

	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
	printk(KERN_WARNING "inflateInit failed\n");
	ret = -1;
	goto out;
	}

	while (bytes_left > 0) {
	unsigned long buf_start;
	unsigned long buf_offset;
	unsigned long bytes;
	unsigned long pg_offset = 0;

	ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
	if (ret != Z_OK && ret != Z_STREAM_END)
	break;

	buf_start = total_out;
	total_out = workspace->inf_strm.total_out;

	if (total_out == buf_start) {
	ret = -1;
	break;
	}

	if (total_out <= start_byte)
	goto next;

	if (total_out > start_byte && buf_start < start_byte)
	buf_offset = start_byte - buf_start;
	else
	buf_offset = 0;

	bytes = min(PAGE_CACHE_SIZE - pg_offset,
	PAGE_CACHE_SIZE - buf_offset);
	bytes = min(bytes, bytes_left);

	kaddr = kmap_atomic(dest_page, KM_USER0);
	memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
	kunmap_atomic(kaddr, KM_USER0);

	pg_offset += bytes;
	bytes_left -= bytes;
	next:
	workspace->inf_strm.next_out = workspace->buf;
	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
	}

	if (ret != Z_STREAM_END && bytes_left != 0)
	ret = -1;
	else
	ret = 0;

	zlib_inflateEnd(&workspace->inf_strm);
	out:
	free_workspace(workspace);
	return ret;
	}

	void btrfs_zlib_exit(void)
	{
	free_workspaces();
	}