Blame - fs/jbd/journal.c - kernel/msm

blob: e4b516ac4989ef3ddef852f5c764ef816e0c3a56 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* linux/fs/journal.c
				3	*
				4	* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
				5	*
				6	* Copyright 1998 Red Hat corp --- All Rights Reserved
				7	*
				8	* This file is part of the Linux kernel and is made available under
				9	* the terms of the GNU General Public License, version 2, or at your
				10	* option, any later version, incorporated herein by reference.
				11	*
				12	* Generic filesystem journal-writing code; part of the ext2fs
				13	* journaling system.
				14	*
				15	* This file manages journals: areas of disk reserved for logging
				16	* transactional updates. This includes the kernel journaling thread
				17	* which is responsible for scheduling updates to the log.
				18	*
				19	* We do not actually manage the physical storage of the journal in this
				20	* file: that is left to a per-journal policy function, which allows us
				21	* to store the journal within a filesystem-specified area for ext2
				22	* journaling (ext2 can use a reserved inode for storing the log).
				23	*/
				24
				25	#include <linux/module.h>
				26	#include <linux/time.h>
				27	#include <linux/fs.h>
				28	#include <linux/jbd.h>
				29	#include <linux/errno.h>
				30	#include <linux/slab.h>
				31	#include <linux/smp_lock.h>
				32	#include <linux/init.h>
				33	#include <linux/mm.h>
				34	#include <linux/suspend.h>
				35	#include <linux/pagemap.h>
				36	#include <asm/uaccess.h>
				37	#include <asm/page.h>
				38	#include <linux/proc_fs.h>
				39
				40	EXPORT_SYMBOL(journal_start);
				41	EXPORT_SYMBOL(journal_restart);
				42	EXPORT_SYMBOL(journal_extend);
				43	EXPORT_SYMBOL(journal_stop);
				44	EXPORT_SYMBOL(journal_lock_updates);
				45	EXPORT_SYMBOL(journal_unlock_updates);
				46	EXPORT_SYMBOL(journal_get_write_access);
				47	EXPORT_SYMBOL(journal_get_create_access);
				48	EXPORT_SYMBOL(journal_get_undo_access);
				49	EXPORT_SYMBOL(journal_dirty_data);
				50	EXPORT_SYMBOL(journal_dirty_metadata);
				51	EXPORT_SYMBOL(journal_release_buffer);
				52	EXPORT_SYMBOL(journal_forget);
				53	#if 0
				54	EXPORT_SYMBOL(journal_sync_buffer);
				55	#endif
				56	EXPORT_SYMBOL(journal_flush);
				57	EXPORT_SYMBOL(journal_revoke);
				58
				59	EXPORT_SYMBOL(journal_init_dev);
				60	EXPORT_SYMBOL(journal_init_inode);
				61	EXPORT_SYMBOL(journal_update_format);
				62	EXPORT_SYMBOL(journal_check_used_features);
				63	EXPORT_SYMBOL(journal_check_available_features);
				64	EXPORT_SYMBOL(journal_set_features);
				65	EXPORT_SYMBOL(journal_create);
				66	EXPORT_SYMBOL(journal_load);
				67	EXPORT_SYMBOL(journal_destroy);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	68	EXPORT_SYMBOL(journal_update_superblock);
				69	EXPORT_SYMBOL(journal_abort);
				70	EXPORT_SYMBOL(journal_errno);
				71	EXPORT_SYMBOL(journal_ack_err);
				72	EXPORT_SYMBOL(journal_clear_err);
				73	EXPORT_SYMBOL(log_wait_commit);
				74	EXPORT_SYMBOL(journal_start_commit);
				75	EXPORT_SYMBOL(journal_force_commit_nested);
				76	EXPORT_SYMBOL(journal_wipe);
				77	EXPORT_SYMBOL(journal_blocks_per_page);
				78	EXPORT_SYMBOL(journal_invalidatepage);
				79	EXPORT_SYMBOL(journal_try_to_free_buffers);
				80	EXPORT_SYMBOL(journal_force_commit);
				81
				82	static int journal_convert_superblock_v1(journal_t , journal_superblock_t );
Adrian Bunk	022a4a7	2005-09-06 15:16:41 -0700	[diff] [blame]	83	static void __journal_abort_soft (journal_t *journal, int errno);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	84
				85	/*
				86	* Helper function used to manage commit timeouts
				87	*/
				88
				89	static void commit_timeout(unsigned long __data)
				90	{
				91	struct task_struct * p = (struct task_struct *) __data;
				92
				93	wake_up_process(p);
				94	}
				95
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	96	/*
				97	* kjournald: The main thread function used to manage a logging device
				98	* journal.
				99	*
				100	* This kernel thread is responsible for two things:
				101	*
				102	* 1) COMMIT: Every so often we need to commit the current state of the
				103	* filesystem to disk. The journal thread is responsible for writing
				104	* all of the metadata buffers to disk.
				105	*
				106	* 2) CHECKPOINT: We cannot reuse a used section of the log file until all
				107	* of the data in that part of the log has been rewritten elsewhere on
				108	* the disk. Flushing these old buffers to reclaim space in the log is
				109	* known as checkpointing, and this thread is responsible for that job.
				110	*/
				111
Adrian Bunk	022a4a7	2005-09-06 15:16:41 -0700	[diff] [blame]	112	static int kjournald(void *arg)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	113	{
				114	journal_t journal = (journal_t ) arg;
				115	transaction_t *transaction;
				116	struct timer_list timer;
				117
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	118	daemonize("kjournald");
				119
				120	/* Set up an interval timer which can be used to trigger a
				121	commit wakeup after the commit interval expires */
				122	init_timer(&timer);
				123	timer.data = (unsigned long) current;
				124	timer.function = commit_timeout;
				125	journal->j_commit_timer = &timer;
				126
				127	/* Record that the journal thread is running */
				128	journal->j_task = current;
				129	wake_up(&journal->j_wait_done_commit);
				130
				131	printk(KERN_INFO "kjournald starting. Commit interval %ld seconds\n",
				132	journal->j_commit_interval / HZ);
				133
				134	/*
				135	* And now, wait forever for commit wakeup events.
				136	*/
				137	spin_lock(&journal->j_state_lock);
				138
				139	loop:
				140	if (journal->j_flags & JFS_UNMOUNT)
				141	goto end_loop;
				142
				143	jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
				144	journal->j_commit_sequence, journal->j_commit_request);
				145
				146	if (journal->j_commit_sequence != journal->j_commit_request) {
				147	jbd_debug(1, "OK, requests differ\n");
				148	spin_unlock(&journal->j_state_lock);
				149	del_timer_sync(journal->j_commit_timer);
				150	journal_commit_transaction(journal);
				151	spin_lock(&journal->j_state_lock);
				152	goto loop;
				153	}
				154
				155	wake_up(&journal->j_wait_done_commit);
Christoph Lameter	3e1d1d2	2005-06-24 23:13:50 -0700	[diff] [blame]	156	if (freezing(current)) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	157	/*
				158	* The simpler the better. Flushing journal isn't a
				159	* good idea, because that depends on threads that may
				160	* be already stopped.
				161	*/
				162	jbd_debug(1, "Now suspending kjournald\n");
				163	spin_unlock(&journal->j_state_lock);
Christoph Lameter	3e1d1d2	2005-06-24 23:13:50 -0700	[diff] [blame]	164	refrigerator();
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	165	spin_lock(&journal->j_state_lock);
				166	} else {
				167	/*
				168	* We assume on resume that commits are already there,
				169	* so we don't sleep
				170	*/
				171	DEFINE_WAIT(wait);
				172	int should_sleep = 1;
				173
				174	prepare_to_wait(&journal->j_wait_commit, &wait,
				175	TASK_INTERRUPTIBLE);
				176	if (journal->j_commit_sequence != journal->j_commit_request)
				177	should_sleep = 0;
				178	transaction = journal->j_running_transaction;
				179	if (transaction && time_after_eq(jiffies,
				180	transaction->t_expires))
				181	should_sleep = 0;
Mark Fasheh	cbf0d27	2005-09-06 15:19:08 -0700	[diff] [blame]	182	if (journal->j_flags & JFS_UNMOUNT)
				183	should_sleep = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	184	if (should_sleep) {
				185	spin_unlock(&journal->j_state_lock);
				186	schedule();
				187	spin_lock(&journal->j_state_lock);
				188	}
				189	finish_wait(&journal->j_wait_commit, &wait);
				190	}
				191
				192	jbd_debug(1, "kjournald wakes\n");
				193
				194	/*
				195	* Were we woken up by a commit wakeup event?
				196	*/
				197	transaction = journal->j_running_transaction;
				198	if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
				199	journal->j_commit_request = transaction->t_tid;
				200	jbd_debug(1, "woke because of timeout\n");
				201	}
				202	goto loop;
				203
				204	end_loop:
				205	spin_unlock(&journal->j_state_lock);
				206	del_timer_sync(journal->j_commit_timer);
				207	journal->j_task = NULL;
				208	wake_up(&journal->j_wait_done_commit);
				209	jbd_debug(1, "Journal thread exiting.\n");
				210	return 0;
				211	}
				212
				213	static void journal_start_thread(journal_t *journal)
				214	{
				215	kernel_thread(kjournald, journal, CLONE_VM\|CLONE_FS\|CLONE_FILES);
				216	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
				217	}
				218
				219	static void journal_kill_thread(journal_t *journal)
				220	{
				221	spin_lock(&journal->j_state_lock);
				222	journal->j_flags \|= JFS_UNMOUNT;
				223
				224	while (journal->j_task) {
				225	wake_up(&journal->j_wait_commit);
				226	spin_unlock(&journal->j_state_lock);
				227	wait_event(journal->j_wait_done_commit, journal->j_task == 0);
				228	spin_lock(&journal->j_state_lock);
				229	}
				230	spin_unlock(&journal->j_state_lock);
				231	}
				232
				233	/*
				234	* journal_write_metadata_buffer: write a metadata buffer to the journal.
				235	*
				236	* Writes a metadata buffer to a given disk block. The actual IO is not
				237	* performed but a new buffer_head is constructed which labels the data
				238	* to be written with the correct destination disk block.
				239	*
				240	* Any magic-number escaping which needs to be done will cause a
				241	* copy-out here. If the buffer happens to start with the
				242	* JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
				243	* magic number is only written to the log for descripter blocks. In
				244	* this case, we copy the data and replace the first word with 0, and we
				245	* return a result code which indicates that this buffer needs to be
				246	* marked as an escaped buffer in the corresponding log descriptor
				247	* block. The missing word can then be restored when the block is read
				248	* during recovery.
				249	*
				250	* If the source buffer has already been modified by a new transaction
				251	* since we took the last commit snapshot, we use the frozen copy of
				252	* that data for IO. If we end up using the existing buffer_head's data
				253	* for the write, then we have to lock the buffer to prevent anyone
				254	* else from using and possibly modifying it while the IO is in
				255	* progress.
				256	*
				257	* The function returns a pointer to the buffer_heads to be used for IO.
				258	*
				259	* We assume that the journal has already been locked in this function.
				260	*
				261	* Return value:
				262	* <0: Error
				263	* >=0: Finished OK
				264	*
				265	* On success:
				266	* Bit 0 set == escape performed on the data
				267	* Bit 1 set == buffer copy-out performed (kfree the data after IO)
				268	*/
				269
				270	int journal_write_metadata_buffer(transaction_t *transaction,
				271	struct journal_head *jh_in,
				272	struct journal_head **jh_out,
				273	int blocknr)
				274	{
				275	int need_copy_out = 0;
				276	int done_copy_out = 0;
				277	int do_escape = 0;
				278	char *mapped_data;
				279	struct buffer_head *new_bh;
				280	struct journal_head *new_jh;
				281	struct page *new_page;
				282	unsigned int new_offset;
				283	struct buffer_head *bh_in = jh2bh(jh_in);
				284
				285	/*
				286	* The buffer really shouldn't be locked: only the current committing
				287	* transaction is allowed to write it, so nobody else is allowed
				288	* to do any IO.
				289	*
				290	* akpm: except if we're journalling data, and write() output is
				291	* also part of a shared mapping, and another thread has
				292	* decided to launch a writepage() against this buffer.
				293	*/
				294	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
				295
				296	new_bh = alloc_buffer_head(GFP_NOFS\|__GFP_NOFAIL);
				297
				298	/*
				299	* If a new transaction has already done a buffer copy-out, then
				300	* we use that version of the data for the commit.
				301	*/
				302	jbd_lock_bh_state(bh_in);
				303	repeat:
				304	if (jh_in->b_frozen_data) {
				305	done_copy_out = 1;
				306	new_page = virt_to_page(jh_in->b_frozen_data);
				307	new_offset = offset_in_page(jh_in->b_frozen_data);
				308	} else {
				309	new_page = jh2bh(jh_in)->b_page;
				310	new_offset = offset_in_page(jh2bh(jh_in)->b_data);
				311	}
				312
				313	mapped_data = kmap_atomic(new_page, KM_USER0);
				314	/*
				315	* Check for escaping
				316	*/
				317	if (((__be32 )(mapped_data + new_offset)) ==
				318	cpu_to_be32(JFS_MAGIC_NUMBER)) {
				319	need_copy_out = 1;
				320	do_escape = 1;
				321	}
				322	kunmap_atomic(mapped_data, KM_USER0);
				323
				324	/*
				325	* Do we need to do a data copy?
				326	*/
				327	if (need_copy_out && !done_copy_out) {
				328	char *tmp;
				329
				330	jbd_unlock_bh_state(bh_in);
				331	tmp = jbd_rep_kmalloc(bh_in->b_size, GFP_NOFS);
				332	jbd_lock_bh_state(bh_in);
				333	if (jh_in->b_frozen_data) {
				334	kfree(tmp);
				335	goto repeat;
				336	}
				337
				338	jh_in->b_frozen_data = tmp;
				339	mapped_data = kmap_atomic(new_page, KM_USER0);
				340	memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
				341	kunmap_atomic(mapped_data, KM_USER0);
				342
				343	new_page = virt_to_page(tmp);
				344	new_offset = offset_in_page(tmp);
				345	done_copy_out = 1;
				346	}
				347
				348	/*
				349	* Did we need to do an escaping? Now we've done all the
				350	* copying, we can finally do so.
				351	*/
				352	if (do_escape) {
				353	mapped_data = kmap_atomic(new_page, KM_USER0);
				354	((unsigned int )(mapped_data + new_offset)) = 0;
				355	kunmap_atomic(mapped_data, KM_USER0);
				356	}
				357
				358	/* keep subsequent assertions sane */
				359	new_bh->b_state = 0;
				360	init_buffer(new_bh, NULL, NULL);
				361	atomic_set(&new_bh->b_count, 1);
				362	jbd_unlock_bh_state(bh_in);
				363
				364	new_jh = journal_add_journal_head(new_bh); /* This sleeps */
				365
				366	set_bh_page(new_bh, new_page, new_offset);
				367	new_jh->b_transaction = NULL;
				368	new_bh->b_size = jh2bh(jh_in)->b_size;
				369	new_bh->b_bdev = transaction->t_journal->j_dev;
				370	new_bh->b_blocknr = blocknr;
				371	set_buffer_mapped(new_bh);
				372	set_buffer_dirty(new_bh);
				373
				374	*jh_out = new_jh;
				375
				376	/*
				377	* The to-be-written buffer needs to get moved to the io queue,
				378	* and the original buffer whose contents we are shadowing or
				379	* copying is moved to the transaction's shadow queue.
				380	*/
				381	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
				382	journal_file_buffer(jh_in, transaction, BJ_Shadow);
				383	JBUFFER_TRACE(new_jh, "file as BJ_IO");
				384	journal_file_buffer(new_jh, transaction, BJ_IO);
				385
				386	return do_escape \| (done_copy_out << 1);
				387	}
				388
				389	/*
				390	* Allocation code for the journal file. Manage the space left in the
				391	* journal, so that we can begin checkpointing when appropriate.
				392	*/
				393
				394	/*
				395	* __log_space_left: Return the number of free blocks left in the journal.
				396	*
				397	* Called with the journal already locked.
				398	*
				399	* Called under j_state_lock
				400	*/
				401
				402	int __log_space_left(journal_t *journal)
				403	{
				404	int left = journal->j_free;
				405
				406	assert_spin_locked(&journal->j_state_lock);
				407
				408	/*
				409	* Be pessimistic here about the number of those free blocks which
				410	* might be required for log descriptor control blocks.
				411	*/
				412
				413	#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
				414
				415	left -= MIN_LOG_RESERVED_BLOCKS;
				416
				417	if (left <= 0)
				418	return 0;
				419	left -= (left >> 3);
				420	return left;
				421	}
				422
				423	/*
				424	* Called under j_state_lock. Returns true if a transaction was started.
				425	*/
				426	int __log_start_commit(journal_t *journal, tid_t target)
				427	{
				428	/*
				429	* Are we already doing a recent enough commit?
				430	*/
				431	if (!tid_geq(journal->j_commit_request, target)) {
				432	/*
				433	* We want a new commit: OK, mark the request and wakup the
				434	* commit thread. We do _not_ do the commit ourselves.
				435	*/
				436
				437	journal->j_commit_request = target;
				438	jbd_debug(1, "JBD: requesting commit %d/%d\n",
				439	journal->j_commit_request,
				440	journal->j_commit_sequence);
				441	wake_up(&journal->j_wait_commit);
				442	return 1;
				443	}
				444	return 0;
				445	}
				446
				447	int log_start_commit(journal_t *journal, tid_t tid)
				448	{
				449	int ret;
				450
				451	spin_lock(&journal->j_state_lock);
				452	ret = __log_start_commit(journal, tid);
				453	spin_unlock(&journal->j_state_lock);
				454	return ret;
				455	}
				456
				457	/*
				458	* Force and wait upon a commit if the calling process is not within
				459	* transaction. This is used for forcing out undo-protected data which contains
				460	* bitmaps, when the fs is running out of space.
				461	*
				462	* We can only force the running transaction if we don't have an active handle;
				463	* otherwise, we will deadlock.
				464	*
				465	* Returns true if a transaction was started.
				466	*/
				467	int journal_force_commit_nested(journal_t *journal)
				468	{
				469	transaction_t *transaction = NULL;
				470	tid_t tid;
				471
				472	spin_lock(&journal->j_state_lock);
				473	if (journal->j_running_transaction && !current->journal_info) {
				474	transaction = journal->j_running_transaction;
				475	__log_start_commit(journal, transaction->t_tid);
				476	} else if (journal->j_committing_transaction)
				477	transaction = journal->j_committing_transaction;
				478
				479	if (!transaction) {
				480	spin_unlock(&journal->j_state_lock);
				481	return 0; /* Nothing to retry */
				482	}
				483
				484	tid = transaction->t_tid;
				485	spin_unlock(&journal->j_state_lock);
				486	log_wait_commit(journal, tid);
				487	return 1;
				488	}
				489
				490	/*
				491	* Start a commit of the current running transaction (if any). Returns true
				492	* if a transaction was started, and fills its tid in at *ptid
				493	*/
				494	int journal_start_commit(journal_t journal, tid_t ptid)
				495	{
				496	int ret = 0;
				497
				498	spin_lock(&journal->j_state_lock);
				499	if (journal->j_running_transaction) {
				500	tid_t tid = journal->j_running_transaction->t_tid;
				501
				502	ret = __log_start_commit(journal, tid);
				503	if (ret && ptid)
				504	*ptid = tid;
				505	} else if (journal->j_committing_transaction && ptid) {
				506	/*
				507	* If ext3_write_super() recently started a commit, then we
				508	* have to wait for completion of that transaction
				509	*/
				510	*ptid = journal->j_committing_transaction->t_tid;
				511	ret = 1;
				512	}
				513	spin_unlock(&journal->j_state_lock);
				514	return ret;
				515	}
				516
				517	/*
				518	* Wait for a specified commit to complete.
				519	* The caller may not hold the journal lock.
				520	*/
				521	int log_wait_commit(journal_t *journal, tid_t tid)
				522	{
				523	int err = 0;
				524
				525	#ifdef CONFIG_JBD_DEBUG
				526	spin_lock(&journal->j_state_lock);
				527	if (!tid_geq(journal->j_commit_request, tid)) {
				528	printk(KERN_EMERG
				529	"%s: error: j_commit_request=%d, tid=%d\n",
				530	__FUNCTION__, journal->j_commit_request, tid);
				531	}
				532	spin_unlock(&journal->j_state_lock);
				533	#endif
				534	spin_lock(&journal->j_state_lock);
				535	while (tid_gt(tid, journal->j_commit_sequence)) {
				536	jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
				537	tid, journal->j_commit_sequence);
				538	wake_up(&journal->j_wait_commit);
				539	spin_unlock(&journal->j_state_lock);
				540	wait_event(journal->j_wait_done_commit,
				541	!tid_gt(tid, journal->j_commit_sequence));
				542	spin_lock(&journal->j_state_lock);
				543	}
				544	spin_unlock(&journal->j_state_lock);
				545
				546	if (unlikely(is_journal_aborted(journal))) {
				547	printk(KERN_EMERG "journal commit I/O error\n");
				548	err = -EIO;
				549	}
				550	return err;
				551	}
				552
				553	/*
				554	* Log buffer allocation routines:
				555	*/
				556
				557	int journal_next_log_block(journal_t journal, unsigned long retp)
				558	{
				559	unsigned long blocknr;
				560
				561	spin_lock(&journal->j_state_lock);
				562	J_ASSERT(journal->j_free > 1);
				563
				564	blocknr = journal->j_head;
				565	journal->j_head++;
				566	journal->j_free--;
				567	if (journal->j_head == journal->j_last)
				568	journal->j_head = journal->j_first;
				569	spin_unlock(&journal->j_state_lock);
				570	return journal_bmap(journal, blocknr, retp);
				571	}
				572
				573	/*
				574	* Conversion of logical to physical block numbers for the journal
				575	*
				576	* On external journals the journal blocks are identity-mapped, so
				577	* this is a no-op. If needed, we can use j_blk_offset - everything is
				578	* ready.
				579	*/
				580	int journal_bmap(journal_t *journal, unsigned long blocknr,
				581	unsigned long *retp)
				582	{
				583	int err = 0;
				584	unsigned long ret;
				585
				586	if (journal->j_inode) {
				587	ret = bmap(journal->j_inode, blocknr);
				588	if (ret)
				589	*retp = ret;
				590	else {
				591	char b[BDEVNAME_SIZE];
				592
				593	printk(KERN_ALERT "%s: journal block not found "
				594	"at offset %lu on %s\n",
				595	__FUNCTION__,
				596	blocknr,
				597	bdevname(journal->j_dev, b));
				598	err = -EIO;
				599	__journal_abort_soft(journal, err);
				600	}
				601	} else {
				602	retp = blocknr; / +journal->j_blk_offset */
				603	}
				604	return err;
				605	}
				606
				607	/*
				608	* We play buffer_head aliasing tricks to write data/metadata blocks to
				609	* the journal without copying their contents, but for journal
				610	* descriptor blocks we do need to generate bona fide buffers.
				611	*
				612	* After the caller of journal_get_descriptor_buffer() has finished modifying
				613	* the buffer's contents they really should run flush_dcache_page(bh->b_page).
				614	* But we don't bother doing that, so there will be coherency problems with
				615	* mmaps of blockdevs which hold live JBD-controlled filesystems.
				616	*/
				617	struct journal_head journal_get_descriptor_buffer(journal_t journal)
				618	{
				619	struct buffer_head *bh;
				620	unsigned long blocknr;
				621	int err;
				622
				623	err = journal_next_log_block(journal, &blocknr);
				624
				625	if (err)
				626	return NULL;
				627
				628	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
				629	lock_buffer(bh);
				630	memset(bh->b_data, 0, journal->j_blocksize);
				631	set_buffer_uptodate(bh);
				632	unlock_buffer(bh);
				633	BUFFER_TRACE(bh, "return this buffer");
				634	return journal_add_journal_head(bh);
				635	}
				636
				637	/*
				638	* Management for journal control blocks: functions to create and
				639	* destroy journal_t structures, and to initialise and read existing
				640	* journal blocks from disk. */
				641
				642	/* First: create and setup a journal_t object in memory. We initialise
				643	* very few fields yet: that has to wait until we have created the
				644	* journal structures from from scratch, or loaded them from disk. */
				645
				646	static journal_t * journal_init_common (void)
				647	{
				648	journal_t *journal;
				649	int err;
				650
				651	journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
				652	if (!journal)
				653	goto fail;
				654	memset(journal, 0, sizeof(*journal));
				655
				656	init_waitqueue_head(&journal->j_wait_transaction_locked);
				657	init_waitqueue_head(&journal->j_wait_logspace);
				658	init_waitqueue_head(&journal->j_wait_done_commit);
				659	init_waitqueue_head(&journal->j_wait_checkpoint);
				660	init_waitqueue_head(&journal->j_wait_commit);
				661	init_waitqueue_head(&journal->j_wait_updates);
				662	init_MUTEX(&journal->j_barrier);
				663	init_MUTEX(&journal->j_checkpoint_sem);
				664	spin_lock_init(&journal->j_revoke_lock);
				665	spin_lock_init(&journal->j_list_lock);
				666	spin_lock_init(&journal->j_state_lock);
				667
				668	journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
				669
				670	/* The journal is marked for error until we succeed with recovery! */
				671	journal->j_flags = JFS_ABORT;
				672
				673	/* Set up a default-sized revoke table for the new mount. */
				674	err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
				675	if (err) {
				676	kfree(journal);
				677	goto fail;
				678	}
				679	return journal;
				680	fail:
				681	return NULL;
				682	}
				683
				684	/* journal_init_dev and journal_init_inode:
				685	*
				686	* Create a journal structure assigned some fixed set of disk blocks to
				687	* the journal. We don't actually touch those disk blocks yet, but we
				688	* need to set up all of the mapping information to tell the journaling
				689	* system where the journal blocks are.
				690	*
				691	*/
				692
				693	/**
				694	* journal_t * journal_init_dev() - creates an initialises a journal structure
				695	* @bdev: Block device on which to create the journal
				696	* @fs_dev: Device which hold journalled filesystem for this journal.
				697	* @start: Block nr Start of journal.
				698	* @len: Lenght of the journal in blocks.
				699	* @blocksize: blocksize of journalling device
				700	* @returns: a newly created journal_t *
				701	*
				702	* journal_init_dev creates a journal which maps a fixed contiguous
				703	* range of blocks on an arbitrary block device.
				704	*
				705	*/
				706	journal_t * journal_init_dev(struct block_device *bdev,
				707	struct block_device *fs_dev,
				708	int start, int len, int blocksize)
				709	{
				710	journal_t *journal = journal_init_common();
				711	struct buffer_head *bh;
				712	int n;
				713
				714	if (!journal)
				715	return NULL;
				716
				717	journal->j_dev = bdev;
				718	journal->j_fs_dev = fs_dev;
				719	journal->j_blk_offset = start;
				720	journal->j_maxlen = len;
				721	journal->j_blocksize = blocksize;
				722
				723	bh = __getblk(journal->j_dev, start, journal->j_blocksize);
				724	J_ASSERT(bh != NULL);
				725	journal->j_sb_buffer = bh;
				726	journal->j_superblock = (journal_superblock_t *)bh->b_data;
				727
				728	/* journal descriptor can store up to n blocks -bzzz */
				729	n = journal->j_blocksize / sizeof(journal_block_tag_t);
				730	journal->j_wbufsize = n;
				731	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
				732	if (!journal->j_wbuf) {
				733	printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
				734	__FUNCTION__);
				735	kfree(journal);
				736	journal = NULL;
				737	}
				738
				739	return journal;
				740	}
				741
				742	/**
				743	* journal_t * journal_init_inode () - creates a journal which maps to a inode.
				744	* @inode: An inode to create the journal in
				745	*
				746	* journal_init_inode creates a journal which maps an on-disk inode as
				747	* the journal. The inode must exist already, must support bmap() and
				748	* must have all data blocks preallocated.
				749	*/
				750	journal_t * journal_init_inode (struct inode *inode)
				751	{
				752	struct buffer_head *bh;
				753	journal_t *journal = journal_init_common();
				754	int err;
				755	int n;
				756	unsigned long blocknr;
				757
				758	if (!journal)
				759	return NULL;
				760
				761	journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
				762	journal->j_inode = inode;
				763	jbd_debug(1,
				764	"journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
				765	journal, inode->i_sb->s_id, inode->i_ino,
				766	(long long) inode->i_size,
				767	inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
				768
				769	journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
				770	journal->j_blocksize = inode->i_sb->s_blocksize;
				771
				772	/* journal descriptor can store up to n blocks -bzzz */
				773	n = journal->j_blocksize / sizeof(journal_block_tag_t);
				774	journal->j_wbufsize = n;
				775	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
				776	if (!journal->j_wbuf) {
				777	printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
				778	__FUNCTION__);
				779	kfree(journal);
				780	return NULL;
				781	}
				782
				783	err = journal_bmap(journal, 0, &blocknr);
				784	/* If that failed, give up */
				785	if (err) {
				786	printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
				787	__FUNCTION__);
				788	kfree(journal);
				789	return NULL;
				790	}
				791
				792	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
				793	J_ASSERT(bh != NULL);
				794	journal->j_sb_buffer = bh;
				795	journal->j_superblock = (journal_superblock_t *)bh->b_data;
				796
				797	return journal;
				798	}
				799
				800	/*
				801	* If the journal init or create aborts, we need to mark the journal
				802	* superblock as being NULL to prevent the journal destroy from writing
				803	* back a bogus superblock.
				804	*/
				805	static void journal_fail_superblock (journal_t *journal)
				806	{
				807	struct buffer_head *bh = journal->j_sb_buffer;
				808	brelse(bh);
				809	journal->j_sb_buffer = NULL;
				810	}
				811
				812	/*
				813	* Given a journal_t structure, initialise the various fields for
				814	* startup of a new journaling session. We use this both when creating
				815	* a journal, and after recovering an old journal to reset it for
				816	* subsequent use.
				817	*/
				818
				819	static int journal_reset(journal_t *journal)
				820	{
				821	journal_superblock_t *sb = journal->j_superblock;
				822	unsigned int first, last;
				823
				824	first = be32_to_cpu(sb->s_first);
				825	last = be32_to_cpu(sb->s_maxlen);
				826
				827	journal->j_first = first;
				828	journal->j_last = last;
				829
				830	journal->j_head = first;
				831	journal->j_tail = first;
				832	journal->j_free = last - first;
				833
				834	journal->j_tail_sequence = journal->j_transaction_sequence;
				835	journal->j_commit_sequence = journal->j_transaction_sequence - 1;
				836	journal->j_commit_request = journal->j_commit_sequence;
				837
				838	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
				839
				840	/* Add the dynamic fields and write it to disk. */
				841	journal_update_superblock(journal, 1);
				842	journal_start_thread(journal);
				843	return 0;
				844	}
				845
				846	/**
				847	* int journal_create() - Initialise the new journal file
				848	* @journal: Journal to create. This structure must have been initialised
				849	*
				850	* Given a journal_t structure which tells us which disk blocks we can
				851	* use, create a new journal superblock and initialise all of the
				852	* journal fields from scratch.
				853	**/
				854	int journal_create(journal_t *journal)
				855	{
				856	unsigned long blocknr;
				857	struct buffer_head *bh;
				858	journal_superblock_t *sb;
				859	int i, err;
				860
				861	if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
				862	printk (KERN_ERR "Journal length (%d blocks) too short.\n",
				863	journal->j_maxlen);
				864	journal_fail_superblock(journal);
				865	return -EINVAL;
				866	}
				867
				868	if (journal->j_inode == NULL) {
				869	/*
				870	* We don't know what block to start at!
				871	*/
				872	printk(KERN_EMERG
				873	"%s: creation of journal on external device!\n",
				874	__FUNCTION__);
				875	BUG();
				876	}
				877
				878	/* Zero out the entire journal on disk. We cannot afford to
				879	have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
				880	jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
				881	for (i = 0; i < journal->j_maxlen; i++) {
				882	err = journal_bmap(journal, i, &blocknr);
				883	if (err)
				884	return err;
				885	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
				886	lock_buffer(bh);
				887	memset (bh->b_data, 0, journal->j_blocksize);
				888	BUFFER_TRACE(bh, "marking dirty");
				889	mark_buffer_dirty(bh);
				890	BUFFER_TRACE(bh, "marking uptodate");
				891	set_buffer_uptodate(bh);
				892	unlock_buffer(bh);
				893	__brelse(bh);
				894	}
				895
				896	sync_blockdev(journal->j_dev);
				897	jbd_debug(1, "JBD: journal cleared.\n");
				898
				899	/* OK, fill in the initial static fields in the new superblock */
				900	sb = journal->j_superblock;
				901
				902	sb->s_header.h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
				903	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
				904
				905	sb->s_blocksize = cpu_to_be32(journal->j_blocksize);
				906	sb->s_maxlen = cpu_to_be32(journal->j_maxlen);
				907	sb->s_first = cpu_to_be32(1);
				908
				909	journal->j_transaction_sequence = 1;
				910
				911	journal->j_flags &= ~JFS_ABORT;
				912	journal->j_format_version = 2;
				913
				914	return journal_reset(journal);
				915	}
				916
				917	/**
				918	* void journal_update_superblock() - Update journal sb on disk.
				919	* @journal: The journal to update.
				920	* @wait: Set to '0' if you don't want to wait for IO completion.
				921	*
				922	* Update a journal's dynamic superblock fields and write it to disk,
				923	* optionally waiting for the IO to complete.
				924	*/
				925	void journal_update_superblock(journal_t *journal, int wait)
				926	{
				927	journal_superblock_t *sb = journal->j_superblock;
				928	struct buffer_head *bh = journal->j_sb_buffer;
				929
				930	/*
				931	* As a special case, if the on-disk copy is already marked as needing
				932	* no recovery (s_start == 0) and there are no outstanding transactions
				933	* in the filesystem, then we can safely defer the superblock update
				934	* until the next commit by setting JFS_FLUSHED. This avoids
				935	* attempting a write to a potential-readonly device.
				936	*/
				937	if (sb->s_start == 0 && journal->j_tail_sequence ==
				938	journal->j_transaction_sequence) {
				939	jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
				940	"(start %ld, seq %d, errno %d)\n",
				941	journal->j_tail, journal->j_tail_sequence,
				942	journal->j_errno);
				943	goto out;
				944	}
				945
				946	spin_lock(&journal->j_state_lock);
				947	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
				948	journal->j_tail, journal->j_tail_sequence, journal->j_errno);
				949
				950	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
				951	sb->s_start = cpu_to_be32(journal->j_tail);
				952	sb->s_errno = cpu_to_be32(journal->j_errno);
				953	spin_unlock(&journal->j_state_lock);
				954
				955	BUFFER_TRACE(bh, "marking dirty");
				956	mark_buffer_dirty(bh);
				957	if (wait)
				958	sync_dirty_buffer(bh);
				959	else
Jan Kara	2670769	2005-09-06 15:19:12 -0700	[diff] [blame]	960	ll_rw_block(SWRITE, 1, &bh);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	961
				962	out:
				963	/* If we have just flushed the log (by marking s_start==0), then
				964	* any future commit will have to be careful to update the
				965	* superblock again to re-record the true start of the log. */
				966
				967	spin_lock(&journal->j_state_lock);
				968	if (sb->s_start)
				969	journal->j_flags &= ~JFS_FLUSHED;
				970	else
				971	journal->j_flags \|= JFS_FLUSHED;
				972	spin_unlock(&journal->j_state_lock);
				973	}
				974
				975	/*
				976	* Read the superblock for a given journal, performing initial
				977	* validation of the format.
				978	*/
				979
				980	static int journal_get_superblock(journal_t *journal)
				981	{
				982	struct buffer_head *bh;
				983	journal_superblock_t *sb;
				984	int err = -EIO;
				985
				986	bh = journal->j_sb_buffer;
				987
				988	J_ASSERT(bh != NULL);
				989	if (!buffer_uptodate(bh)) {
				990	ll_rw_block(READ, 1, &bh);
				991	wait_on_buffer(bh);
				992	if (!buffer_uptodate(bh)) {
				993	printk (KERN_ERR
				994	"JBD: IO error reading journal superblock\n");
				995	goto out;
				996	}
				997	}
				998
				999	sb = journal->j_superblock;
				1000
				1001	err = -EINVAL;
				1002
				1003	if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) \|\|
				1004	sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
				1005	printk(KERN_WARNING "JBD: no valid journal superblock found\n");
				1006	goto out;
				1007	}
				1008
				1009	switch(be32_to_cpu(sb->s_header.h_blocktype)) {
				1010	case JFS_SUPERBLOCK_V1:
				1011	journal->j_format_version = 1;
				1012	break;
				1013	case JFS_SUPERBLOCK_V2:
				1014	journal->j_format_version = 2;
				1015	break;
				1016	default:
				1017	printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
				1018	goto out;
				1019	}
				1020
				1021	if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
				1022	journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
				1023	else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
				1024	printk (KERN_WARNING "JBD: journal file too short\n");
				1025	goto out;
				1026	}
				1027
				1028	return 0;
				1029
				1030	out:
				1031	journal_fail_superblock(journal);
				1032	return err;
				1033	}
				1034
				1035	/*
				1036	* Load the on-disk journal superblock and read the key fields into the
				1037	* journal_t.
				1038	*/
				1039
				1040	static int load_superblock(journal_t *journal)
				1041	{
				1042	int err;
				1043	journal_superblock_t *sb;
				1044
				1045	err = journal_get_superblock(journal);
				1046	if (err)
				1047	return err;
				1048
				1049	sb = journal->j_superblock;
				1050
				1051	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
				1052	journal->j_tail = be32_to_cpu(sb->s_start);
				1053	journal->j_first = be32_to_cpu(sb->s_first);
				1054	journal->j_last = be32_to_cpu(sb->s_maxlen);
				1055	journal->j_errno = be32_to_cpu(sb->s_errno);
				1056
				1057	return 0;
				1058	}
				1059
				1060
				1061	/**
				1062	* int journal_load() - Read journal from disk.
				1063	* @journal: Journal to act on.
				1064	*
				1065	* Given a journal_t structure which tells us which disk blocks contain
				1066	* a journal, read the journal from disk to initialise the in-memory
				1067	* structures.
				1068	*/
				1069	int journal_load(journal_t *journal)
				1070	{
				1071	int err;
				1072
				1073	err = load_superblock(journal);
				1074	if (err)
				1075	return err;
				1076
				1077	/* If this is a V2 superblock, then we have to check the
				1078	* features flags on it. */
				1079
				1080	if (journal->j_format_version >= 2) {
				1081	journal_superblock_t *sb = journal->j_superblock;
				1082
				1083	if ((sb->s_feature_ro_compat &
				1084	~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) \|\|
				1085	(sb->s_feature_incompat &
				1086	~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
				1087	printk (KERN_WARNING
				1088	"JBD: Unrecognised features on journal\n");
				1089	return -EINVAL;
				1090	}
				1091	}
				1092
				1093	/* Let the recovery code check whether it needs to recover any
				1094	* data from the journal. */
				1095	if (journal_recover(journal))
				1096	goto recovery_error;
				1097
				1098	/* OK, we've finished with the dynamic journal bits:
				1099	* reinitialise the dynamic contents of the superblock in memory
				1100	* and reset them on disk. */
				1101	if (journal_reset(journal))
				1102	goto recovery_error;
				1103
				1104	journal->j_flags &= ~JFS_ABORT;
				1105	journal->j_flags \|= JFS_LOADED;
				1106	return 0;
				1107
				1108	recovery_error:
				1109	printk (KERN_WARNING "JBD: recovery failed\n");
				1110	return -EIO;
				1111	}
				1112
				1113	/**
				1114	* void journal_destroy() - Release a journal_t structure.
				1115	* @journal: Journal to act on.
				1116	*
				1117	* Release a journal_t structure once it is no longer in use by the
				1118	* journaled object.
				1119	*/
				1120	void journal_destroy(journal_t *journal)
				1121	{
				1122	/* Wait for the commit thread to wake up and die. */
				1123	journal_kill_thread(journal);
				1124
				1125	/* Force a final log commit */
				1126	if (journal->j_running_transaction)
				1127	journal_commit_transaction(journal);
				1128
				1129	/* Force any old transactions to disk */
				1130
				1131	/* Totally anal locking here... */
				1132	spin_lock(&journal->j_list_lock);
				1133	while (journal->j_checkpoint_transactions != NULL) {
				1134	spin_unlock(&journal->j_list_lock);
				1135	log_do_checkpoint(journal);
				1136	spin_lock(&journal->j_list_lock);
				1137	}
				1138
				1139	J_ASSERT(journal->j_running_transaction == NULL);
				1140	J_ASSERT(journal->j_committing_transaction == NULL);
				1141	J_ASSERT(journal->j_checkpoint_transactions == NULL);
				1142	spin_unlock(&journal->j_list_lock);
				1143
				1144	/* We can now mark the journal as empty. */
				1145	journal->j_tail = 0;
				1146	journal->j_tail_sequence = ++journal->j_transaction_sequence;
				1147	if (journal->j_sb_buffer) {
				1148	journal_update_superblock(journal, 1);
				1149	brelse(journal->j_sb_buffer);
				1150	}
				1151
				1152	if (journal->j_inode)
				1153	iput(journal->j_inode);
				1154	if (journal->j_revoke)
				1155	journal_destroy_revoke(journal);
				1156	kfree(journal->j_wbuf);
				1157	kfree(journal);
				1158	}
				1159
				1160
				1161	/**
				1162	*int journal_check_used_features () - Check if features specified are used.
				1163	* @journal: Journal to check.
				1164	* @compat: bitmask of compatible features
				1165	* @ro: bitmask of features that force read-only mount
				1166	* @incompat: bitmask of incompatible features
				1167	*
				1168	* Check whether the journal uses all of a given set of
				1169	* features. Return true (non-zero) if it does.
				1170	**/
				1171
				1172	int journal_check_used_features (journal_t *journal, unsigned long compat,
				1173	unsigned long ro, unsigned long incompat)
				1174	{
				1175	journal_superblock_t *sb;
				1176
				1177	if (!compat && !ro && !incompat)
				1178	return 1;
				1179	if (journal->j_format_version == 1)
				1180	return 0;
				1181
				1182	sb = journal->j_superblock;
				1183
				1184	if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
				1185	((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
				1186	((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
				1187	return 1;
				1188
				1189	return 0;
				1190	}
				1191
				1192	/**
				1193	* int journal_check_available_features() - Check feature set in journalling layer
				1194	* @journal: Journal to check.
				1195	* @compat: bitmask of compatible features
				1196	* @ro: bitmask of features that force read-only mount
				1197	* @incompat: bitmask of incompatible features
				1198	*
				1199	* Check whether the journaling code supports the use of
				1200	* all of a given set of features on this journal. Return true
				1201	* (non-zero) if it can. */
				1202
				1203	int journal_check_available_features (journal_t *journal, unsigned long compat,
				1204	unsigned long ro, unsigned long incompat)
				1205	{
				1206	journal_superblock_t *sb;
				1207
				1208	if (!compat && !ro && !incompat)
				1209	return 1;
				1210
				1211	sb = journal->j_superblock;
				1212
				1213	/* We can support any known requested features iff the
				1214	* superblock is in version 2. Otherwise we fail to support any
				1215	* extended sb features. */
				1216
				1217	if (journal->j_format_version != 2)
				1218	return 0;
				1219
				1220	if ((compat & JFS_KNOWN_COMPAT_FEATURES) == compat &&
				1221	(ro & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
				1222	(incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
				1223	return 1;
				1224
				1225	return 0;
				1226	}
				1227
				1228	/**
				1229	* int journal_set_features () - Mark a given journal feature in the superblock
				1230	* @journal: Journal to act on.
				1231	* @compat: bitmask of compatible features
				1232	* @ro: bitmask of features that force read-only mount
				1233	* @incompat: bitmask of incompatible features
				1234	*
				1235	* Mark a given journal feature as present on the
				1236	* superblock. Returns true if the requested features could be set.
				1237	*
				1238	*/
				1239
				1240	int journal_set_features (journal_t *journal, unsigned long compat,
				1241	unsigned long ro, unsigned long incompat)
				1242	{
				1243	journal_superblock_t *sb;
				1244
				1245	if (journal_check_used_features(journal, compat, ro, incompat))
				1246	return 1;
				1247
				1248	if (!journal_check_available_features(journal, compat, ro, incompat))
				1249	return 0;
				1250
				1251	jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
				1252	compat, ro, incompat);
				1253
				1254	sb = journal->j_superblock;
				1255
				1256	sb->s_feature_compat \|= cpu_to_be32(compat);
				1257	sb->s_feature_ro_compat \|= cpu_to_be32(ro);
				1258	sb->s_feature_incompat \|= cpu_to_be32(incompat);
				1259
				1260	return 1;
				1261	}
				1262
				1263
				1264	/**
				1265	* int journal_update_format () - Update on-disk journal structure.
				1266	* @journal: Journal to act on.
				1267	*
				1268	* Given an initialised but unloaded journal struct, poke about in the
				1269	* on-disk structure to update it to the most recent supported version.
				1270	*/
				1271	int journal_update_format (journal_t *journal)
				1272	{
				1273	journal_superblock_t *sb;
				1274	int err;
				1275
				1276	err = journal_get_superblock(journal);
				1277	if (err)
				1278	return err;
				1279
				1280	sb = journal->j_superblock;
				1281
				1282	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
				1283	case JFS_SUPERBLOCK_V2:
				1284	return 0;
				1285	case JFS_SUPERBLOCK_V1:
				1286	return journal_convert_superblock_v1(journal, sb);
				1287	default:
				1288	break;
				1289	}
				1290	return -EINVAL;
				1291	}
				1292
				1293	static int journal_convert_superblock_v1(journal_t *journal,
				1294	journal_superblock_t *sb)
				1295	{
				1296	int offset, blocksize;
				1297	struct buffer_head *bh;
				1298
				1299	printk(KERN_WARNING
				1300	"JBD: Converting superblock from version 1 to 2.\n");
				1301
				1302	/* Pre-initialise new fields to zero */
				1303	offset = ((char ) &(sb->s_feature_compat)) - ((char ) sb);
				1304	blocksize = be32_to_cpu(sb->s_blocksize);
				1305	memset(&sb->s_feature_compat, 0, blocksize-offset);
				1306
				1307	sb->s_nr_users = cpu_to_be32(1);
				1308	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
				1309	journal->j_format_version = 2;
				1310
				1311	bh = journal->j_sb_buffer;
				1312	BUFFER_TRACE(bh, "marking dirty");
				1313	mark_buffer_dirty(bh);
				1314	sync_dirty_buffer(bh);
				1315	return 0;
				1316	}
				1317
				1318
				1319	/**
				1320	* int journal_flush () - Flush journal
				1321	* @journal: Journal to act on.
				1322	*
				1323	* Flush all data for a given journal to disk and empty the journal.
				1324	* Filesystems can use this when remounting readonly to ensure that
				1325	* recovery does not need to happen on remount.
				1326	*/
				1327
				1328	int journal_flush(journal_t *journal)
				1329	{
				1330	int err = 0;
				1331	transaction_t *transaction = NULL;
				1332	unsigned long old_tail;
				1333
				1334	spin_lock(&journal->j_state_lock);
				1335
				1336	/* Force everything buffered to the log... */
				1337	if (journal->j_running_transaction) {
				1338	transaction = journal->j_running_transaction;
				1339	__log_start_commit(journal, transaction->t_tid);
				1340	} else if (journal->j_committing_transaction)
				1341	transaction = journal->j_committing_transaction;
				1342
				1343	/* Wait for the log commit to complete... */
				1344	if (transaction) {
				1345	tid_t tid = transaction->t_tid;
				1346
				1347	spin_unlock(&journal->j_state_lock);
				1348	log_wait_commit(journal, tid);
				1349	} else {
				1350	spin_unlock(&journal->j_state_lock);
				1351	}
				1352
				1353	/* ...and flush everything in the log out to disk. */
				1354	spin_lock(&journal->j_list_lock);
				1355	while (!err && journal->j_checkpoint_transactions != NULL) {
				1356	spin_unlock(&journal->j_list_lock);
				1357	err = log_do_checkpoint(journal);
				1358	spin_lock(&journal->j_list_lock);
				1359	}
				1360	spin_unlock(&journal->j_list_lock);
				1361	cleanup_journal_tail(journal);
				1362
				1363	/* Finally, mark the journal as really needing no recovery.
				1364	* This sets s_start==0 in the underlying superblock, which is
				1365	* the magic code for a fully-recovered superblock. Any future
				1366	* commits of data to the journal will restore the current
				1367	* s_start value. */
				1368	spin_lock(&journal->j_state_lock);
				1369	old_tail = journal->j_tail;
				1370	journal->j_tail = 0;
				1371	spin_unlock(&journal->j_state_lock);
				1372	journal_update_superblock(journal, 1);
				1373	spin_lock(&journal->j_state_lock);
				1374	journal->j_tail = old_tail;
				1375
				1376	J_ASSERT(!journal->j_running_transaction);
				1377	J_ASSERT(!journal->j_committing_transaction);
				1378	J_ASSERT(!journal->j_checkpoint_transactions);
				1379	J_ASSERT(journal->j_head == journal->j_tail);
				1380	J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
				1381	spin_unlock(&journal->j_state_lock);
				1382	return err;
				1383	}
				1384
				1385	/**
				1386	* int journal_wipe() - Wipe journal contents
				1387	* @journal: Journal to act on.
				1388	* @write: flag (see below)
				1389	*
				1390	* Wipe out all of the contents of a journal, safely. This will produce
				1391	* a warning if the journal contains any valid recovery information.
				1392	* Must be called between journal_init_*() and journal_load().
				1393	*
				1394	* If 'write' is non-zero, then we wipe out the journal on disk; otherwise
				1395	* we merely suppress recovery.
				1396	*/
				1397
				1398	int journal_wipe(journal_t *journal, int write)
				1399	{
				1400	journal_superblock_t *sb;
				1401	int err = 0;
				1402
				1403	J_ASSERT (!(journal->j_flags & JFS_LOADED));
				1404
				1405	err = load_superblock(journal);
				1406	if (err)
				1407	return err;
				1408
				1409	sb = journal->j_superblock;
				1410
				1411	if (!journal->j_tail)
				1412	goto no_recovery;
				1413
				1414	printk (KERN_WARNING "JBD: %s recovery information on journal\n",
				1415	write ? "Clearing" : "Ignoring");
				1416
				1417	err = journal_skip_recovery(journal);
				1418	if (write)
				1419	journal_update_superblock(journal, 1);
				1420
				1421	no_recovery:
				1422	return err;
				1423	}
				1424
				1425	/*
				1426	* journal_dev_name: format a character string to describe on what
				1427	* device this journal is present.
				1428	*/
				1429
Adrian Bunk	022a4a7	2005-09-06 15:16:41 -0700	[diff] [blame]	1430	static const char journal_dev_name(journal_t journal, char *buffer)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1431	{
				1432	struct block_device *bdev;
				1433
				1434	if (journal->j_inode)
				1435	bdev = journal->j_inode->i_sb->s_bdev;
				1436	else
				1437	bdev = journal->j_dev;
				1438
				1439	return bdevname(bdev, buffer);
				1440	}
				1441
				1442	/*
				1443	* Journal abort has very specific semantics, which we describe
				1444	* for journal abort.
				1445	*
				1446	* Two internal function, which provide abort to te jbd layer
				1447	* itself are here.
				1448	*/
				1449
				1450	/*
				1451	* Quick version for internal journal use (doesn't lock the journal).
				1452	* Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
				1453	* and don't attempt to make any other journal updates.
				1454	*/
				1455	void __journal_abort_hard(journal_t *journal)
				1456	{
				1457	transaction_t *transaction;
				1458	char b[BDEVNAME_SIZE];
				1459
				1460	if (journal->j_flags & JFS_ABORT)
				1461	return;
				1462
				1463	printk(KERN_ERR "Aborting journal on device %s.\n",
				1464	journal_dev_name(journal, b));
				1465
				1466	spin_lock(&journal->j_state_lock);
				1467	journal->j_flags \|= JFS_ABORT;
				1468	transaction = journal->j_running_transaction;
				1469	if (transaction)
				1470	__log_start_commit(journal, transaction->t_tid);
				1471	spin_unlock(&journal->j_state_lock);
				1472	}
				1473
				1474	/* Soft abort: record the abort error status in the journal superblock,
				1475	* but don't do any other IO. */
Adrian Bunk	022a4a7	2005-09-06 15:16:41 -0700	[diff] [blame]	1476	static void __journal_abort_soft (journal_t *journal, int errno)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1477	{
				1478	if (journal->j_flags & JFS_ABORT)
				1479	return;
				1480
				1481	if (!journal->j_errno)
				1482	journal->j_errno = errno;
				1483
				1484	__journal_abort_hard(journal);
				1485
				1486	if (errno)
				1487	journal_update_superblock(journal, 1);
				1488	}
				1489
				1490	/**
				1491	* void journal_abort () - Shutdown the journal immediately.
				1492	* @journal: the journal to shutdown.
				1493	* @errno: an error number to record in the journal indicating
				1494	* the reason for the shutdown.
				1495	*
				1496	* Perform a complete, immediate shutdown of the ENTIRE
				1497	* journal (not of a single transaction). This operation cannot be
				1498	* undone without closing and reopening the journal.
				1499	*
				1500	* The journal_abort function is intended to support higher level error
				1501	* recovery mechanisms such as the ext2/ext3 remount-readonly error
				1502	* mode.
				1503	*
				1504	* Journal abort has very specific semantics. Any existing dirty,
				1505	* unjournaled buffers in the main filesystem will still be written to
				1506	* disk by bdflush, but the journaling mechanism will be suspended
				1507	* immediately and no further transaction commits will be honoured.
				1508	*
				1509	* Any dirty, journaled buffers will be written back to disk without
				1510	* hitting the journal. Atomicity cannot be guaranteed on an aborted
				1511	* filesystem, but we _do_ attempt to leave as much data as possible
				1512	* behind for fsck to use for cleanup.
				1513	*
				1514	* Any attempt to get a new transaction handle on a journal which is in
				1515	* ABORT state will just result in an -EROFS error return. A
				1516	* journal_stop on an existing handle will return -EIO if we have
				1517	* entered abort state during the update.
				1518	*
				1519	* Recursive transactions are not disturbed by journal abort until the
				1520	* final journal_stop, which will receive the -EIO error.
				1521	*
				1522	* Finally, the journal_abort call allows the caller to supply an errno
				1523	* which will be recorded (if possible) in the journal superblock. This
				1524	* allows a client to record failure conditions in the middle of a
				1525	* transaction without having to complete the transaction to record the
				1526	* failure to disk. ext3_error, for example, now uses this
				1527	* functionality.
				1528	*
				1529	* Errors which originate from within the journaling layer will NOT
				1530	* supply an errno; a null errno implies that absolutely no further
				1531	* writes are done to the journal (unless there are any already in
				1532	* progress).
				1533	*
				1534	*/
				1535
				1536	void journal_abort(journal_t *journal, int errno)
				1537	{
				1538	__journal_abort_soft(journal, errno);
				1539	}
				1540
				1541	/**
				1542	* int journal_errno () - returns the journal's error state.
				1543	* @journal: journal to examine.
				1544	*
				1545	* This is the errno numbet set with journal_abort(), the last
				1546	* time the journal was mounted - if the journal was stopped
				1547	* without calling abort this will be 0.
				1548	*
				1549	* If the journal has been aborted on this mount time -EROFS will
				1550	* be returned.
				1551	*/
				1552	int journal_errno(journal_t *journal)
				1553	{
				1554	int err;
				1555
				1556	spin_lock(&journal->j_state_lock);
				1557	if (journal->j_flags & JFS_ABORT)
				1558	err = -EROFS;
				1559	else
				1560	err = journal->j_errno;
				1561	spin_unlock(&journal->j_state_lock);
				1562	return err;
				1563	}
				1564
				1565	/**
				1566	* int journal_clear_err () - clears the journal's error state
				1567	* @journal: journal to act on.
				1568	*
				1569	* An error must be cleared or Acked to take a FS out of readonly
				1570	* mode.
				1571	*/
				1572	int journal_clear_err(journal_t *journal)
				1573	{
				1574	int err = 0;
				1575
				1576	spin_lock(&journal->j_state_lock);
				1577	if (journal->j_flags & JFS_ABORT)
				1578	err = -EROFS;
				1579	else
				1580	journal->j_errno = 0;
				1581	spin_unlock(&journal->j_state_lock);
				1582	return err;
				1583	}
				1584
				1585	/**
				1586	* void journal_ack_err() - Ack journal err.
				1587	* @journal: journal to act on.
				1588	*
				1589	* An error must be cleared or Acked to take a FS out of readonly
				1590	* mode.
				1591	*/
				1592	void journal_ack_err(journal_t *journal)
				1593	{
				1594	spin_lock(&journal->j_state_lock);
				1595	if (journal->j_errno)
				1596	journal->j_flags \|= JFS_ACK_ERR;
				1597	spin_unlock(&journal->j_state_lock);
				1598	}
				1599
				1600	int journal_blocks_per_page(struct inode *inode)
				1601	{
				1602	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
				1603	}
				1604
				1605	/*
				1606	* Simple support for retrying memory allocations. Introduced to help to
				1607	* debug different VM deadlock avoidance strategies.
				1608	*/
Al Viro	27496a8	2005-10-21 03:20:48 -0400	[diff] [blame]	1609	void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1610	{
				1611	return kmalloc(size, flags \| (retry ? __GFP_NOFAIL : 0));
				1612	}
				1613
				1614	/*
				1615	* Journal_head storage management
				1616	*/
				1617	static kmem_cache_t *journal_head_cache;
				1618	#ifdef CONFIG_JBD_DEBUG
				1619	static atomic_t nr_journal_heads = ATOMIC_INIT(0);
				1620	#endif
				1621
				1622	static int journal_init_journal_head_cache(void)
				1623	{
				1624	int retval;
				1625
				1626	J_ASSERT(journal_head_cache == 0);
				1627	journal_head_cache = kmem_cache_create("journal_head",
				1628	sizeof(struct journal_head),
				1629	0, /* offset */
				1630	0, /* flags */
				1631	NULL, /* ctor */
				1632	NULL); /* dtor */
				1633	retval = 0;
				1634	if (journal_head_cache == 0) {
				1635	retval = -ENOMEM;
				1636	printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
				1637	}
				1638	return retval;
				1639	}
				1640
				1641	static void journal_destroy_journal_head_cache(void)
				1642	{
				1643	J_ASSERT(journal_head_cache != NULL);
				1644	kmem_cache_destroy(journal_head_cache);
				1645	journal_head_cache = NULL;
				1646	}
				1647
				1648	/*
				1649	* journal_head splicing and dicing
				1650	*/
				1651	static struct journal_head *journal_alloc_journal_head(void)
				1652	{
				1653	struct journal_head *ret;
				1654	static unsigned long last_warning;
				1655
				1656	#ifdef CONFIG_JBD_DEBUG
				1657	atomic_inc(&nr_journal_heads);
				1658	#endif
				1659	ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
				1660	if (ret == 0) {
				1661	jbd_debug(1, "out of memory for journal_head\n");
				1662	if (time_after(jiffies, last_warning + 5*HZ)) {
				1663	printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
				1664	__FUNCTION__);
				1665	last_warning = jiffies;
				1666	}
				1667	while (ret == 0) {
				1668	yield();
				1669	ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
				1670	}
				1671	}
				1672	return ret;
				1673	}
				1674
				1675	static void journal_free_journal_head(struct journal_head *jh)
				1676	{
				1677	#ifdef CONFIG_JBD_DEBUG
				1678	atomic_dec(&nr_journal_heads);
				1679	memset(jh, 0x5b, sizeof(*jh));
				1680	#endif
				1681	kmem_cache_free(journal_head_cache, jh);
				1682	}
				1683
				1684	/*
				1685	* A journal_head is attached to a buffer_head whenever JBD has an
				1686	* interest in the buffer.
				1687	*
				1688	* Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
				1689	* is set. This bit is tested in core kernel code where we need to take
				1690	* JBD-specific actions. Testing the zeroness of ->b_private is not reliable
				1691	* there.
				1692	*
				1693	* When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
				1694	*
				1695	* When a buffer has its BH_JBD bit set it is immune from being released by
				1696	* core kernel code, mainly via ->b_count.
				1697	*
				1698	* A journal_head may be detached from its buffer_head when the journal_head's
				1699	* b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
				1700	* Various places in JBD call journal_remove_journal_head() to indicate that the
				1701	* journal_head can be dropped if needed.
				1702	*
				1703	* Various places in the kernel want to attach a journal_head to a buffer_head
				1704	* _before_ attaching the journal_head to a transaction. To protect the
				1705	* journal_head in this situation, journal_add_journal_head elevates the
				1706	* journal_head's b_jcount refcount by one. The caller must call
				1707	* journal_put_journal_head() to undo this.
				1708	*
				1709	* So the typical usage would be:
				1710	*
				1711	* (Attach a journal_head if needed. Increments b_jcount)
				1712	* struct journal_head *jh = journal_add_journal_head(bh);
				1713	* ...
				1714	* jh->b_transaction = xxx;
				1715	* journal_put_journal_head(jh);
				1716	*
				1717	* Now, the journal_head's b_jcount is zero, but it is safe from being released
				1718	* because it has a non-zero b_transaction.
				1719	*/
				1720
				1721	/*
				1722	* Give a buffer_head a journal_head.
				1723	*
				1724	* Doesn't need the journal lock.
				1725	* May sleep.
				1726	*/
				1727	struct journal_head journal_add_journal_head(struct buffer_head bh)
				1728	{
				1729	struct journal_head *jh;
				1730	struct journal_head *new_jh = NULL;
				1731
				1732	repeat:
				1733	if (!buffer_jbd(bh)) {
				1734	new_jh = journal_alloc_journal_head();
				1735	memset(new_jh, 0, sizeof(*new_jh));
				1736	}
				1737
				1738	jbd_lock_bh_journal_head(bh);
				1739	if (buffer_jbd(bh)) {
				1740	jh = bh2jh(bh);
				1741	} else {
				1742	J_ASSERT_BH(bh,
				1743	(atomic_read(&bh->b_count) > 0) \|\|
				1744	(bh->b_page && bh->b_page->mapping));
				1745
				1746	if (!new_jh) {
				1747	jbd_unlock_bh_journal_head(bh);
				1748	goto repeat;
				1749	}
				1750
				1751	jh = new_jh;
				1752	new_jh = NULL; /* We consumed it */
				1753	set_buffer_jbd(bh);
				1754	bh->b_private = jh;
				1755	jh->b_bh = bh;
				1756	get_bh(bh);
				1757	BUFFER_TRACE(bh, "added journal_head");
				1758	}
				1759	jh->b_jcount++;
				1760	jbd_unlock_bh_journal_head(bh);
				1761	if (new_jh)
				1762	journal_free_journal_head(new_jh);
				1763	return bh->b_private;
				1764	}
				1765
				1766	/*
				1767	* Grab a ref against this buffer_head's journal_head. If it ended up not
				1768	* having a journal_head, return NULL
				1769	*/
				1770	struct journal_head journal_grab_journal_head(struct buffer_head bh)
				1771	{
				1772	struct journal_head *jh = NULL;
				1773
				1774	jbd_lock_bh_journal_head(bh);
				1775	if (buffer_jbd(bh)) {
				1776	jh = bh2jh(bh);
				1777	jh->b_jcount++;
				1778	}
				1779	jbd_unlock_bh_journal_head(bh);
				1780	return jh;
				1781	}
				1782
				1783	static void __journal_remove_journal_head(struct buffer_head *bh)
				1784	{
				1785	struct journal_head *jh = bh2jh(bh);
				1786
				1787	J_ASSERT_JH(jh, jh->b_jcount >= 0);
				1788
				1789	get_bh(bh);
				1790	if (jh->b_jcount == 0) {
				1791	if (jh->b_transaction == NULL &&
				1792	jh->b_next_transaction == NULL &&
				1793	jh->b_cp_transaction == NULL) {
				1794	J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
				1795	J_ASSERT_BH(bh, buffer_jbd(bh));
				1796	J_ASSERT_BH(bh, jh2bh(jh) == bh);
				1797	BUFFER_TRACE(bh, "remove journal_head");
				1798	if (jh->b_frozen_data) {
				1799	printk(KERN_WARNING "%s: freeing "
				1800	"b_frozen_data\n",
				1801	__FUNCTION__);
				1802	kfree(jh->b_frozen_data);
				1803	}
				1804	if (jh->b_committed_data) {
				1805	printk(KERN_WARNING "%s: freeing "
				1806	"b_committed_data\n",
				1807	__FUNCTION__);
				1808	kfree(jh->b_committed_data);
				1809	}
				1810	bh->b_private = NULL;
				1811	jh->b_bh = NULL; /* debug, really */
				1812	clear_buffer_jbd(bh);
				1813	__brelse(bh);
				1814	journal_free_journal_head(jh);
				1815	} else {
				1816	BUFFER_TRACE(bh, "journal_head was locked");
				1817	}
				1818	}
				1819	}
				1820
				1821	/*
				1822	* journal_remove_journal_head(): if the buffer isn't attached to a transaction
				1823	* and has a zero b_jcount then remove and release its journal_head. If we did
				1824	* see that the buffer is not used by any transaction we also "logically"
				1825	* decrement ->b_count.
				1826	*
				1827	* We in fact take an additional increment on ->b_count as a convenience,
				1828	* because the caller usually wants to do additional things with the bh
				1829	* after calling here.
				1830	* The caller of journal_remove_journal_head() must run __brelse(bh) at some
				1831	* time. Once the caller has run __brelse(), the buffer is eligible for
				1832	* reaping by try_to_free_buffers().
				1833	*/
				1834	void journal_remove_journal_head(struct buffer_head *bh)
				1835	{
				1836	jbd_lock_bh_journal_head(bh);
				1837	__journal_remove_journal_head(bh);
				1838	jbd_unlock_bh_journal_head(bh);
				1839	}
				1840
				1841	/*
				1842	* Drop a reference on the passed journal_head. If it fell to zero then try to
				1843	* release the journal_head from the buffer_head.
				1844	*/
				1845	void journal_put_journal_head(struct journal_head *jh)
				1846	{
				1847	struct buffer_head *bh = jh2bh(jh);
				1848
				1849	jbd_lock_bh_journal_head(bh);
				1850	J_ASSERT_JH(jh, jh->b_jcount > 0);
				1851	--jh->b_jcount;
				1852	if (!jh->b_jcount && !jh->b_transaction) {
				1853	__journal_remove_journal_head(bh);
				1854	__brelse(bh);
				1855	}
				1856	jbd_unlock_bh_journal_head(bh);
				1857	}
				1858
				1859	/*
				1860	* /proc tunables
				1861	*/
				1862	#if defined(CONFIG_JBD_DEBUG)
				1863	int journal_enable_debug;
				1864	EXPORT_SYMBOL(journal_enable_debug);
				1865	#endif
				1866
				1867	#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
				1868
				1869	static struct proc_dir_entry *proc_jbd_debug;
				1870
Adrian Bunk	022a4a7	2005-09-06 15:16:41 -0700	[diff] [blame]	1871	static int read_jbd_debug(char page, char *start, off_t off,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1872	int count, int eof, void data)
				1873	{
				1874	int ret;
				1875
				1876	ret = sprintf(page + off, "%d\n", journal_enable_debug);
				1877	*eof = 1;
				1878	return ret;
				1879	}
				1880
Adrian Bunk	022a4a7	2005-09-06 15:16:41 -0700	[diff] [blame]	1881	static int write_jbd_debug(struct file file, const char __user buffer,
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1882	unsigned long count, void *data)
				1883	{
				1884	char buf[32];
				1885
				1886	if (count > ARRAY_SIZE(buf) - 1)
				1887	count = ARRAY_SIZE(buf) - 1;
				1888	if (copy_from_user(buf, buffer, count))
				1889	return -EFAULT;
				1890	buf[ARRAY_SIZE(buf) - 1] = '\0';
				1891	journal_enable_debug = simple_strtoul(buf, NULL, 10);
				1892	return count;
				1893	}
				1894
				1895	#define JBD_PROC_NAME "sys/fs/jbd-debug"
				1896
				1897	static void __init create_jbd_proc_entry(void)
				1898	{
				1899	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
				1900	if (proc_jbd_debug) {
				1901	/* Why is this so hard? */
				1902	proc_jbd_debug->read_proc = read_jbd_debug;
				1903	proc_jbd_debug->write_proc = write_jbd_debug;
				1904	}
				1905	}
				1906
				1907	static void __exit remove_jbd_proc_entry(void)
				1908	{
				1909	if (proc_jbd_debug)
				1910	remove_proc_entry(JBD_PROC_NAME, NULL);
				1911	}
				1912
				1913	#else
				1914
				1915	#define create_jbd_proc_entry() do {} while (0)
				1916	#define remove_jbd_proc_entry() do {} while (0)
				1917
				1918	#endif
				1919
				1920	kmem_cache_t *jbd_handle_cache;
				1921
				1922	static int __init journal_init_handle_cache(void)
				1923	{
				1924	jbd_handle_cache = kmem_cache_create("journal_handle",
				1925	sizeof(handle_t),
				1926	0, /* offset */
				1927	0, /* flags */
				1928	NULL, /* ctor */
				1929	NULL); /* dtor */
				1930	if (jbd_handle_cache == NULL) {
				1931	printk(KERN_EMERG "JBD: failed to create handle cache\n");
				1932	return -ENOMEM;
				1933	}
				1934	return 0;
				1935	}
				1936
				1937	static void journal_destroy_handle_cache(void)
				1938	{
				1939	if (jbd_handle_cache)
				1940	kmem_cache_destroy(jbd_handle_cache);
				1941	}
				1942
				1943	/*
				1944	* Module startup and shutdown
				1945	*/
				1946
				1947	static int __init journal_init_caches(void)
				1948	{
				1949	int ret;
				1950
				1951	ret = journal_init_revoke_caches();
				1952	if (ret == 0)
				1953	ret = journal_init_journal_head_cache();
				1954	if (ret == 0)
				1955	ret = journal_init_handle_cache();
				1956	return ret;
				1957	}
				1958
				1959	static void journal_destroy_caches(void)
				1960	{
				1961	journal_destroy_revoke_caches();
				1962	journal_destroy_journal_head_cache();
				1963	journal_destroy_handle_cache();
				1964	}
				1965
				1966	static int __init journal_init(void)
				1967	{
				1968	int ret;
				1969
Adrian Bunk	022a4a7	2005-09-06 15:16:41 -0700	[diff] [blame]	1970	/* Static check for data structure consistency. There's no code
				1971	* invoked --- we'll just get a linker failure if things aren't right.
				1972	*/
				1973	extern void journal_bad_superblock_size(void);
				1974	if (sizeof(struct journal_superblock_s) != 1024)
				1975	journal_bad_superblock_size();
				1976
				1977
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1978	ret = journal_init_caches();
				1979	if (ret != 0)
				1980	journal_destroy_caches();
				1981	create_jbd_proc_entry();
				1982	return ret;
				1983	}
				1984
				1985	static void __exit journal_exit(void)
				1986	{
				1987	#ifdef CONFIG_JBD_DEBUG
				1988	int n = atomic_read(&nr_journal_heads);
				1989	if (n)
				1990	printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
				1991	#endif
				1992	remove_jbd_proc_entry();
				1993	journal_destroy_caches();
				1994	}
				1995
				1996	MODULE_LICENSE("GPL");
				1997	module_init(journal_init);
				1998	module_exit(journal_exit);
				1999