Blame - fs/jbd2/transaction.c - kernel/msm-4.9

blob: 4f925a4f3d05051ec7b1edd65c50ecdabe27000a [file] [log] [blame]

Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1	/*
Uwe Kleine-König	5886269	2007-05-09 07:51:49 +0200	[diff] [blame]	2	* linux/fs/jbd2/transaction.c
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	3	*
				4	* Written by Stephen C. Tweedie <sct@redhat.com>, 1998
				5	*
				6	* Copyright 1998 Red Hat corp --- All Rights Reserved
				7	*
				8	* This file is part of the Linux kernel and is made available under
				9	* the terms of the GNU General Public License, version 2, or at your
				10	* option, any later version, incorporated herein by reference.
				11	*
				12	* Generic filesystem transaction handling code; part of the ext2fs
				13	* journaling system.
				14	*
				15	* This file manages transactions (compound commits managed by the
				16	* journaling code) and handles (individual atomic operations by the
				17	* filesystem).
				18	*/
				19
				20	#include <linux/time.h>
				21	#include <linux/fs.h>
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	22	#include <linux/jbd2.h>
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	23	#include <linux/errno.h>
				24	#include <linux/slab.h>
				25	#include <linux/timer.h>
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	26	#include <linux/mm.h>
				27	#include <linux/highmem.h>
				28
Adrian Bunk	7ddae86	2006-12-06 20:38:27 -0800	[diff] [blame]	29	static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
				30
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	31	/*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	32	* jbd2_get_transaction: obtain a new transaction_t object.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	33	*
				34	* Simply allocate and initialise a new transaction. Create it in
				35	* RUNNING state and add it to the current journal (which should not
				36	* have an existing running transaction: we only make a new transaction
				37	* once we have started to commit the old one).
				38	*
				39	* Preconditions:
				40	* The journal MUST be locked. We don't perform atomic mallocs on the
				41	* new transaction and we can't block without protecting against other
				42	* processes trying to touch the journal while it is in transition.
				43	*
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	44	*/
				45
				46	static transaction_t *
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	47	jbd2_get_transaction(journal_t journal, transaction_t transaction)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	48	{
				49	transaction->t_journal = journal;
				50	transaction->t_state = T_RUNNING;
				51	transaction->t_tid = journal->j_transaction_sequence++;
				52	transaction->t_expires = jiffies + journal->j_commit_interval;
				53	spin_lock_init(&transaction->t_handle_lock);
Jan Kara	c851ed5	2008-07-11 19:27:31 -0400	[diff] [blame]	54	INIT_LIST_HEAD(&transaction->t_inode_list);
Theodore Ts'o	3e624fc	2008-10-16 20:00:24 -0400	[diff] [blame]	55	INIT_LIST_HEAD(&transaction->t_private_list);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	56
				57	/* Set up the commit timer for the new transaction. */
Mingming Cao	db857da	2008-01-28 23:58:27 -0500	[diff] [blame]	58	journal->j_commit_timer.expires = round_jiffies(transaction->t_expires);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	59	add_timer(&journal->j_commit_timer);
				60
				61	J_ASSERT(journal->j_running_transaction == NULL);
				62	journal->j_running_transaction = transaction;
Johann Lombardi	8e85fb3	2008-01-28 23:58:27 -0500	[diff] [blame]	63	transaction->t_max_wait = 0;
				64	transaction->t_start = jiffies;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	65
				66	return transaction;
				67	}
				68
				69	/*
				70	* Handle management.
				71	*
				72	* A handle_t is an object which represents a single atomic update to a
				73	* filesystem, and which tracks all of the modifications which form part
				74	* of that one update.
				75	*/
				76
				77	/*
				78	* start_this_handle: Given a handle, deal with any locking or stalling
				79	* needed to make sure that there is enough journal space for the handle
				80	* to begin. Attach the handle to a transaction and set up the
				81	* transaction's buffer credits.
				82	*/
				83
				84	static int start_this_handle(journal_t journal, handle_t handle)
				85	{
				86	transaction_t *transaction;
				87	int needed;
				88	int nblocks = handle->h_buffer_credits;
				89	transaction_t *new_transaction = NULL;
				90	int ret = 0;
Johann Lombardi	8e85fb3	2008-01-28 23:58:27 -0500	[diff] [blame]	91	unsigned long ts = jiffies;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	92
				93	if (nblocks > journal->j_max_transaction_buffers) {
				94	printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
				95	current->comm, nblocks,
				96	journal->j_max_transaction_buffers);
				97	ret = -ENOSPC;
				98	goto out;
				99	}
				100
				101	alloc_transaction:
				102	if (!journal->j_running_transaction) {
Mingming Cao	d802ffa	2007-10-16 18:38:25 -0400	[diff] [blame]	103	new_transaction = kzalloc(sizeof(*new_transaction),
Mingming Cao	2d917969	2007-10-16 18:38:25 -0400	[diff] [blame]	104	GFP_NOFS\|__GFP_NOFAIL);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	105	if (!new_transaction) {
				106	ret = -ENOMEM;
				107	goto out;
				108	}
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	109	}
				110
				111	jbd_debug(3, "New handle %p going live.\n", handle);
				112
				113	repeat:
				114
				115	/*
				116	* We need to hold j_state_lock until t_updates has been incremented,
				117	* for proper journal barrier handling
				118	*/
				119	spin_lock(&journal->j_state_lock);
				120	repeat_locked:
				121	if (is_journal_aborted(journal) \|\|
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	122	(journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	123	spin_unlock(&journal->j_state_lock);
				124	ret = -EROFS;
				125	goto out;
				126	}
				127
				128	/* Wait on the journal's transaction barrier if necessary */
				129	if (journal->j_barrier_count) {
				130	spin_unlock(&journal->j_state_lock);
				131	wait_event(journal->j_wait_transaction_locked,
				132	journal->j_barrier_count == 0);
				133	goto repeat;
				134	}
				135
				136	if (!journal->j_running_transaction) {
				137	if (!new_transaction) {
				138	spin_unlock(&journal->j_state_lock);
				139	goto alloc_transaction;
				140	}
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	141	jbd2_get_transaction(journal, new_transaction);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	142	new_transaction = NULL;
				143	}
				144
				145	transaction = journal->j_running_transaction;
				146
				147	/*
				148	* If the current transaction is locked down for commit, wait for the
				149	* lock to be released.
				150	*/
				151	if (transaction->t_state == T_LOCKED) {
				152	DEFINE_WAIT(wait);
				153
				154	prepare_to_wait(&journal->j_wait_transaction_locked,
				155	&wait, TASK_UNINTERRUPTIBLE);
				156	spin_unlock(&journal->j_state_lock);
				157	schedule();
				158	finish_wait(&journal->j_wait_transaction_locked, &wait);
				159	goto repeat;
				160	}
				161
				162	/*
				163	* If there is not enough space left in the log to write all potential
				164	* buffers requested by this operation, we need to stall pending a log
				165	* checkpoint to free some more log space.
				166	*/
				167	spin_lock(&transaction->t_handle_lock);
				168	needed = transaction->t_outstanding_credits + nblocks;
				169
				170	if (needed > journal->j_max_transaction_buffers) {
				171	/*
				172	* If the current transaction is already too large, then start
				173	* to commit it: we can then go back and attach this handle to
				174	* a new transaction.
				175	*/
				176	DEFINE_WAIT(wait);
				177
				178	jbd_debug(2, "Handle %p starting new commit...\n", handle);
				179	spin_unlock(&transaction->t_handle_lock);
				180	prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
				181	TASK_UNINTERRUPTIBLE);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	182	__jbd2_log_start_commit(journal, transaction->t_tid);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	183	spin_unlock(&journal->j_state_lock);
				184	schedule();
				185	finish_wait(&journal->j_wait_transaction_locked, &wait);
				186	goto repeat;
				187	}
				188
				189	/*
				190	* The commit code assumes that it can get enough log space
				191	* without forcing a checkpoint. This is critical for
				192	* correctness: a checkpoint of a buffer which is also
				193	* associated with a committing transaction creates a deadlock,
				194	* so commit simply cannot force through checkpoints.
				195	*
				196	* We must therefore ensure the necessary space in the journal
				197	* before starting to dirty potentially checkpointed buffers
				198	* in the new transaction.
				199	*
				200	* The worst part is, any transaction currently committing can
				201	* reduce the free space arbitrarily. Be careful to account for
				202	* those buffers when checkpointing.
				203	*/
				204
				205	/*
				206	* @@@ AKPM: This seems rather over-defensive. We're giving commit
				207	* a _lot_ of headroom: 1/4 of the journal plus the size of
				208	* the committing transaction. Really, we only need to give it
				209	* committing_transaction->t_outstanding_credits plus "enough" for
				210	* the log control blocks.
				211	* Also, this test is inconsitent with the matching one in
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	212	* jbd2_journal_extend().
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	213	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	214	if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	215	jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
				216	spin_unlock(&transaction->t_handle_lock);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	217	__jbd2_log_wait_for_space(journal);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	218	goto repeat_locked;
				219	}
				220
				221	/* OK, account for the buffers that this operation expects to
				222	* use and add the handle to the running transaction. */
				223
Johann Lombardi	8e85fb3	2008-01-28 23:58:27 -0500	[diff] [blame]	224	if (time_after(transaction->t_start, ts)) {
				225	ts = jbd2_time_diff(ts, transaction->t_start);
				226	if (ts > transaction->t_max_wait)
				227	transaction->t_max_wait = ts;
				228	}
				229
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	230	handle->h_transaction = transaction;
				231	transaction->t_outstanding_credits += nblocks;
				232	transaction->t_updates++;
				233	transaction->t_handle_count++;
				234	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
				235	handle, nblocks, transaction->t_outstanding_credits,
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	236	__jbd2_log_space_left(journal));
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	237	spin_unlock(&transaction->t_handle_lock);
				238	spin_unlock(&journal->j_state_lock);
				239	out:
				240	if (unlikely(new_transaction)) /* It's usually NULL */
				241	kfree(new_transaction);
				242	return ret;
				243	}
				244
Mingming Cao	7b75106	2008-01-28 23:58:27 -0500	[diff] [blame]	245	static struct lock_class_key jbd2_handle_key;
				246
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	247	/* Allocate a new handle. This should probably be in a slab... */
				248	static handle_t *new_handle(int nblocks)
				249	{
Mingming Cao	af1e76d	2007-10-16 18:38:25 -0400	[diff] [blame]	250	handle_t *handle = jbd2_alloc_handle(GFP_NOFS);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	251	if (!handle)
				252	return NULL;
				253	memset(handle, 0, sizeof(*handle));
				254	handle->h_buffer_credits = nblocks;
				255	handle->h_ref = 1;
				256
Mingming Cao	7b75106	2008-01-28 23:58:27 -0500	[diff] [blame]	257	lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle",
				258	&jbd2_handle_key, 0);
				259
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	260	return handle;
				261	}
				262
				263	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	264	* handle_t *jbd2_journal_start() - Obtain a new handle.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	265	* @journal: Journal to start transaction on.
				266	* @nblocks: number of block buffer we might modify
				267	*
				268	* We make sure that the transaction can guarantee at least nblocks of
				269	* modified buffers in the log. We block until the log can guarantee
				270	* that much space.
				271	*
				272	* This function is visible to journal users (like ext3fs), so is not
				273	* called with the journal already locked.
				274	*
				275	* Return a pointer to a newly allocated handle, or NULL on failure
				276	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	277	handle_t jbd2_journal_start(journal_t journal, int nblocks)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	278	{
				279	handle_t *handle = journal_current_handle();
				280	int err;
				281
				282	if (!journal)
				283	return ERR_PTR(-EROFS);
				284
				285	if (handle) {
				286	J_ASSERT(handle->h_transaction->t_journal == journal);
				287	handle->h_ref++;
				288	return handle;
				289	}
				290
				291	handle = new_handle(nblocks);
				292	if (!handle)
				293	return ERR_PTR(-ENOMEM);
				294
				295	current->journal_info = handle;
				296
				297	err = start_this_handle(journal, handle);
				298	if (err < 0) {
Mingming Cao	af1e76d	2007-10-16 18:38:25 -0400	[diff] [blame]	299	jbd2_free_handle(handle);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	300	current->journal_info = NULL;
				301	handle = ERR_PTR(err);
Mingming Cao	7b75106	2008-01-28 23:58:27 -0500	[diff] [blame]	302	goto out;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	303	}
Mingming Cao	7b75106	2008-01-28 23:58:27 -0500	[diff] [blame]	304
Ingo Molnar	3295f0e	2008-08-11 10:30:30 +0200	[diff] [blame]	305	lock_map_acquire(&handle->h_lockdep_map);
Mingming Cao	7b75106	2008-01-28 23:58:27 -0500	[diff] [blame]	306	out:
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	307	return handle;
				308	}
				309
				310	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	311	* int jbd2_journal_extend() - extend buffer credits.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	312	* @handle: handle to 'extend'
				313	* @nblocks: nr blocks to try to extend by.
				314	*
				315	* Some transactions, such as large extends and truncates, can be done
				316	* atomically all at once or in several stages. The operation requests
				317	* a credit for a number of buffer modications in advance, but can
				318	* extend its credit if it needs more.
				319	*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	320	* jbd2_journal_extend tries to give the running handle more buffer credits.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	321	* It does not guarantee that allocation - this is a best-effort only.
				322	* The calling process MUST be able to deal cleanly with a failure to
				323	* extend here.
				324	*
				325	* Return 0 on success, non-zero on failure.
				326	*
				327	* return code < 0 implies an error
				328	* return code > 0 implies normal transaction-full status.
				329	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	330	int jbd2_journal_extend(handle_t *handle, int nblocks)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	331	{
				332	transaction_t *transaction = handle->h_transaction;
				333	journal_t *journal = transaction->t_journal;
				334	int result;
				335	int wanted;
				336
				337	result = -EIO;
				338	if (is_handle_aborted(handle))
				339	goto out;
				340
				341	result = 1;
				342
				343	spin_lock(&journal->j_state_lock);
				344
				345	/* Don't extend a locked-down transaction! */
				346	if (handle->h_transaction->t_state != T_RUNNING) {
				347	jbd_debug(3, "denied handle %p %d blocks: "
				348	"transaction not running\n", handle, nblocks);
				349	goto error_out;
				350	}
				351
				352	spin_lock(&transaction->t_handle_lock);
				353	wanted = transaction->t_outstanding_credits + nblocks;
				354
				355	if (wanted > journal->j_max_transaction_buffers) {
				356	jbd_debug(3, "denied handle %p %d blocks: "
				357	"transaction too large\n", handle, nblocks);
				358	goto unlock;
				359	}
				360
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	361	if (wanted > __jbd2_log_space_left(journal)) {
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	362	jbd_debug(3, "denied handle %p %d blocks: "
				363	"insufficient log space\n", handle, nblocks);
				364	goto unlock;
				365	}
				366
				367	handle->h_buffer_credits += nblocks;
				368	transaction->t_outstanding_credits += nblocks;
				369	result = 0;
				370
				371	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
				372	unlock:
				373	spin_unlock(&transaction->t_handle_lock);
				374	error_out:
				375	spin_unlock(&journal->j_state_lock);
				376	out:
				377	return result;
				378	}
				379
				380
				381	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	382	* int jbd2_journal_restart() - restart a handle .
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	383	* @handle: handle to restart
				384	* @nblocks: nr credits requested
				385	*
				386	* Restart a handle for a multi-transaction filesystem
				387	* operation.
				388	*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	389	* If the jbd2_journal_extend() call above fails to grant new buffer credits
				390	* to a running handle, a call to jbd2_journal_restart will commit the
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	391	* handle's transaction so far and reattach the handle to a new
				392	* transaction capabable of guaranteeing the requested number of
				393	* credits.
				394	*/
				395
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	396	int jbd2_journal_restart(handle_t *handle, int nblocks)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	397	{
				398	transaction_t *transaction = handle->h_transaction;
				399	journal_t *journal = transaction->t_journal;
				400	int ret;
				401
				402	/* If we've had an abort of any type, don't even think about
				403	* actually doing the restart! */
				404	if (is_handle_aborted(handle))
				405	return 0;
				406
				407	/*
				408	* First unlink the handle from its current transaction, and start the
				409	* commit on that.
				410	*/
				411	J_ASSERT(transaction->t_updates > 0);
				412	J_ASSERT(journal_current_handle() == handle);
				413
				414	spin_lock(&journal->j_state_lock);
				415	spin_lock(&transaction->t_handle_lock);
				416	transaction->t_outstanding_credits -= handle->h_buffer_credits;
				417	transaction->t_updates--;
				418
				419	if (!transaction->t_updates)
				420	wake_up(&journal->j_wait_updates);
				421	spin_unlock(&transaction->t_handle_lock);
				422
				423	jbd_debug(2, "restarting handle %p\n", handle);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	424	__jbd2_log_start_commit(journal, transaction->t_tid);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	425	spin_unlock(&journal->j_state_lock);
				426
				427	handle->h_buffer_credits = nblocks;
				428	ret = start_this_handle(journal, handle);
				429	return ret;
				430	}
				431
				432
				433	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	434	* void jbd2_journal_lock_updates () - establish a transaction barrier.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	435	* @journal: Journal to establish a barrier on.
				436	*
				437	* This locks out any further updates from being started, and blocks
				438	* until all existing updates have completed, returning only once the
				439	* journal is in a quiescent state with no updates running.
				440	*
				441	* The journal lock should not be held on entry.
				442	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	443	void jbd2_journal_lock_updates(journal_t *journal)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	444	{
				445	DEFINE_WAIT(wait);
				446
				447	spin_lock(&journal->j_state_lock);
				448	++journal->j_barrier_count;
				449
				450	/* Wait until there are no running updates */
				451	while (1) {
				452	transaction_t *transaction = journal->j_running_transaction;
				453
				454	if (!transaction)
				455	break;
				456
				457	spin_lock(&transaction->t_handle_lock);
				458	if (!transaction->t_updates) {
				459	spin_unlock(&transaction->t_handle_lock);
				460	break;
				461	}
				462	prepare_to_wait(&journal->j_wait_updates, &wait,
				463	TASK_UNINTERRUPTIBLE);
				464	spin_unlock(&transaction->t_handle_lock);
				465	spin_unlock(&journal->j_state_lock);
				466	schedule();
				467	finish_wait(&journal->j_wait_updates, &wait);
				468	spin_lock(&journal->j_state_lock);
				469	}
				470	spin_unlock(&journal->j_state_lock);
				471
				472	/*
				473	* We have now established a barrier against other normal updates, but
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	474	* we also need to barrier against other jbd2_journal_lock_updates() calls
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	475	* to make sure that we serialise special journal-locked operations
				476	* too.
				477	*/
				478	mutex_lock(&journal->j_barrier);
				479	}
				480
				481	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	482	* void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	483	* @journal: Journal to release the barrier on.
				484	*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	485	* Release a transaction barrier obtained with jbd2_journal_lock_updates().
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	486	*
				487	* Should be called without the journal lock held.
				488	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	489	void jbd2_journal_unlock_updates (journal_t *journal)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	490	{
				491	J_ASSERT(journal->j_barrier_count != 0);
				492
				493	mutex_unlock(&journal->j_barrier);
				494	spin_lock(&journal->j_state_lock);
				495	--journal->j_barrier_count;
				496	spin_unlock(&journal->j_state_lock);
				497	wake_up(&journal->j_wait_transaction_locked);
				498	}
				499
				500	/*
				501	* Report any unexpected dirty buffers which turn up. Normally those
				502	* indicate an error, but they can occur if the user is running (say)
				503	* tune2fs to modify the live filesystem, so we need the option of
				504	* continuing as gracefully as possible. #
				505	*
				506	* The caller should already hold the journal lock and
				507	* j_list_lock spinlock: most callers will need those anyway
				508	* in order to probe the buffer's journaling state safely.
				509	*/
				510	static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
				511	{
				512	int jlist;
				513
				514	/* If this buffer is one which might reasonably be dirty
				515	* --- ie. data, or not part of this journal --- then
				516	* we're OK to leave it alone, but otherwise we need to
				517	* move the dirty bit to the journal's own internal
				518	* JBDDirty bit. */
				519	jlist = jh->b_jlist;
				520
				521	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|
				522	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {
				523	struct buffer_head *bh = jh2bh(jh);
				524
				525	if (test_clear_buffer_dirty(bh))
				526	set_buffer_jbddirty(bh);
				527	}
				528	}
				529
				530	/*
				531	* If the buffer is already part of the current transaction, then there
				532	* is nothing we need to do. If it is already part of a prior
				533	* transaction which we are still committing to disk, then we need to
				534	* make sure that we do not overwrite the old copy: we do copy-out to
				535	* preserve the copy going to disk. We also account the buffer against
				536	* the handle's metadata buffer credits (unless the buffer is already
				537	* part of the transaction, that is).
				538	*
				539	*/
				540	static int
				541	do_get_write_access(handle_t handle, struct journal_head jh,
				542	int force_copy)
				543	{
				544	struct buffer_head *bh;
				545	transaction_t *transaction;
				546	journal_t *journal;
				547	int error;
				548	char *frozen_buffer = NULL;
				549	int need_copy = 0;
				550
				551	if (is_handle_aborted(handle))
				552	return -EROFS;
				553
				554	transaction = handle->h_transaction;
				555	journal = transaction->t_journal;
				556
				557	jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
				558
				559	JBUFFER_TRACE(jh, "entry");
				560	repeat:
				561	bh = jh2bh(jh);
				562
				563	/* @@@ Need to check for errors here at some point. */
				564
				565	lock_buffer(bh);
				566	jbd_lock_bh_state(bh);
				567
				568	/* We now hold the buffer lock so it is safe to query the buffer
				569	* state. Is the buffer dirty?
				570	*
				571	* If so, there are two possibilities. The buffer may be
				572	* non-journaled, and undergoing a quite legitimate writeback.
				573	* Otherwise, it is journaled, and we don't expect dirty buffers
				574	* in that state (the buffers should be marked JBD_Dirty
				575	* instead.) So either the IO is being done under our own
				576	* control and this is a bug, or it's a third party IO such as
				577	* dump(8) (which may leave the buffer scheduled for read ---
				578	* ie. locked but not dirty) or tune2fs (which may actually have
				579	* the buffer dirtied, ugh.) */
				580
				581	if (buffer_dirty(bh)) {
				582	/*
				583	* First question: is this buffer already part of the current
				584	* transaction or the existing committing transaction?
				585	*/
				586	if (jh->b_transaction) {
				587	J_ASSERT_JH(jh,
				588	jh->b_transaction == transaction \|\|
				589	jh->b_transaction ==
				590	journal->j_committing_transaction);
				591	if (jh->b_next_transaction)
				592	J_ASSERT_JH(jh, jh->b_next_transaction ==
				593	transaction);
				594	}
				595	/*
				596	* In any case we need to clean the dirty flag and we must
				597	* do it under the buffer lock to be sure we don't race
				598	* with running write-out.
				599	*/
				600	JBUFFER_TRACE(jh, "Unexpected dirty buffer");
				601	jbd_unexpected_dirty_buffer(jh);
				602	}
				603
				604	unlock_buffer(bh);
				605
				606	error = -EROFS;
				607	if (is_handle_aborted(handle)) {
				608	jbd_unlock_bh_state(bh);
				609	goto out;
				610	}
				611	error = 0;
				612
				613	/*
				614	* The buffer is already part of this transaction if b_transaction or
				615	* b_next_transaction points to it
				616	*/
				617	if (jh->b_transaction == transaction \|\|
				618	jh->b_next_transaction == transaction)
				619	goto done;
				620
				621	/*
Josef Bacik	9fc7c63	2008-04-17 10:38:59 -0400	[diff] [blame]	622	* this is the first time this transaction is touching this buffer,
				623	* reset the modified flag
				624	*/
				625	jh->b_modified = 0;
				626
				627	/*
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	628	* If there is already a copy-out version of this buffer, then we don't
				629	* need to make another one
				630	*/
				631	if (jh->b_frozen_data) {
				632	JBUFFER_TRACE(jh, "has frozen data");
				633	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
				634	jh->b_next_transaction = transaction;
				635	goto done;
				636	}
				637
				638	/* Is there data here we need to preserve? */
				639
				640	if (jh->b_transaction && jh->b_transaction != transaction) {
				641	JBUFFER_TRACE(jh, "owned by older transaction");
				642	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
				643	J_ASSERT_JH(jh, jh->b_transaction ==
				644	journal->j_committing_transaction);
				645
				646	/* There is one case we have to be very careful about.
				647	* If the committing transaction is currently writing
				648	* this buffer out to disk and has NOT made a copy-out,
				649	* then we cannot modify the buffer contents at all
				650	* right now. The essence of copy-out is that it is the
				651	* extra copy, not the primary copy, which gets
				652	* journaled. If the primary copy is already going to
				653	* disk then we cannot do copy-out here. */
				654
				655	if (jh->b_jlist == BJ_Shadow) {
				656	DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
				657	wait_queue_head_t *wqh;
				658
				659	wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
				660
				661	JBUFFER_TRACE(jh, "on shadow: sleep");
				662	jbd_unlock_bh_state(bh);
				663	/* commit wakes up all shadow buffers after IO */
				664	for ( ; ; ) {
				665	prepare_to_wait(wqh, &wait.wait,
				666	TASK_UNINTERRUPTIBLE);
				667	if (jh->b_jlist != BJ_Shadow)
				668	break;
				669	schedule();
				670	}
				671	finish_wait(wqh, &wait.wait);
				672	goto repeat;
				673	}
				674
				675	/* Only do the copy if the currently-owning transaction
				676	* still needs it. If it is on the Forget list, the
				677	* committing transaction is past that stage. The
				678	* buffer had better remain locked during the kmalloc,
				679	* but that should be true --- we hold the journal lock
				680	* still and the buffer is already on the BUF_JOURNAL
				681	* list so won't be flushed.
				682	*
				683	* Subtle point, though: if this is a get_undo_access,
				684	* then we will be relying on the frozen_data to contain
				685	* the new value of the committed_data record after the
				686	* transaction, so we HAVE to force the frozen_data copy
				687	* in that case. */
				688
				689	if (jh->b_jlist != BJ_Forget \|\| force_copy) {
				690	JBUFFER_TRACE(jh, "generate frozen data");
				691	if (!frozen_buffer) {
				692	JBUFFER_TRACE(jh, "allocate memory for buffer");
				693	jbd_unlock_bh_state(bh);
				694	frozen_buffer =
Mingming Cao	af1e76d	2007-10-16 18:38:25 -0400	[diff] [blame]	695	jbd2_alloc(jh2bh(jh)->b_size,
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	696	GFP_NOFS);
				697	if (!frozen_buffer) {
				698	printk(KERN_EMERG
				699	"%s: OOM for frozen_buffer\n",
Harvey Harrison	329d291	2008-04-17 10:38:59 -0400	[diff] [blame]	700	__func__);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	701	JBUFFER_TRACE(jh, "oom!");
				702	error = -ENOMEM;
				703	jbd_lock_bh_state(bh);
				704	goto done;
				705	}
				706	goto repeat;
				707	}
				708	jh->b_frozen_data = frozen_buffer;
				709	frozen_buffer = NULL;
				710	need_copy = 1;
				711	}
				712	jh->b_next_transaction = transaction;
				713	}
				714
				715
				716	/*
				717	* Finally, if the buffer is not journaled right now, we need to make
				718	* sure it doesn't get written to disk before the caller actually
				719	* commits the new data
				720	*/
				721	if (!jh->b_transaction) {
				722	JBUFFER_TRACE(jh, "no transaction");
				723	J_ASSERT_JH(jh, !jh->b_next_transaction);
				724	jh->b_transaction = transaction;
				725	JBUFFER_TRACE(jh, "file as BJ_Reserved");
				726	spin_lock(&journal->j_list_lock);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	727	__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	728	spin_unlock(&journal->j_list_lock);
				729	}
				730
				731	done:
				732	if (need_copy) {
				733	struct page *page;
				734	int offset;
				735	char *source;
				736
				737	J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
				738	"Possible IO failure.\n");
				739	page = jh2bh(jh)->b_page;
				740	offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
				741	source = kmap_atomic(page, KM_USER0);
				742	memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
				743	kunmap_atomic(source, KM_USER0);
Joel Becker	e06c822	2008-09-11 15:35:47 -0700	[diff] [blame^]	744
				745	/*
				746	* Now that the frozen data is saved off, we need to store
				747	* any matching triggers.
				748	*/
				749	jh->b_frozen_triggers = jh->b_triggers;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	750	}
				751	jbd_unlock_bh_state(bh);
				752
				753	/*
				754	* If we are about to journal a buffer, then any revoke pending on it is
				755	* no longer valid
				756	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	757	jbd2_journal_cancel_revoke(handle, jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	758
				759	out:
				760	if (unlikely(frozen_buffer)) /* It's usually NULL */
Mingming Cao	af1e76d	2007-10-16 18:38:25 -0400	[diff] [blame]	761	jbd2_free(frozen_buffer, bh->b_size);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	762
				763	JBUFFER_TRACE(jh, "exit");
				764	return error;
				765	}
				766
				767	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	768	* int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	769	* @handle: transaction to add buffer modifications to
				770	* @bh: bh to be used for metadata writes
				771	* @credits: variable that will receive credits for the buffer
				772	*
				773	* Returns an error code or 0 on success.
				774	*
				775	* In full data journalling mode the buffer may be of type BJ_AsyncData,
				776	* because we're write()ing a buffer which is also part of a shared mapping.
				777	*/
				778
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	779	int jbd2_journal_get_write_access(handle_t handle, struct buffer_head bh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	780	{
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	781	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	782	int rc;
				783
				784	/* We do not want to get caught playing with fields which the
				785	* log thread also manipulates. Make sure that the buffer
				786	* completes any outstanding IO before proceeding. */
				787	rc = do_get_write_access(handle, jh, 0);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	788	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	789	return rc;
				790	}
				791
				792
				793	/*
				794	* When the user wants to journal a newly created buffer_head
				795	* (ie. getblk() returned a new buffer and we are going to populate it
				796	* manually rather than reading off disk), then we need to keep the
				797	* buffer_head locked until it has been completely filled with new
				798	* data. In this case, we should be able to make the assertion that
				799	* the bh is not already part of an existing transaction.
				800	*
				801	* The buffer should already be locked by the caller by this point.
				802	* There is no lock ranking violation: it was a newly created,
				803	* unlocked buffer beforehand. */
				804
				805	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	806	* int jbd2_journal_get_create_access () - notify intent to use newly created bh
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	807	* @handle: transaction to new buffer to
				808	* @bh: new buffer.
				809	*
				810	* Call this if you create a new bh.
				811	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	812	int jbd2_journal_get_create_access(handle_t handle, struct buffer_head bh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	813	{
				814	transaction_t *transaction = handle->h_transaction;
				815	journal_t *journal = transaction->t_journal;
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	816	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	817	int err;
				818
				819	jbd_debug(5, "journal_head %p\n", jh);
				820	err = -EROFS;
				821	if (is_handle_aborted(handle))
				822	goto out;
				823	err = 0;
				824
				825	JBUFFER_TRACE(jh, "entry");
				826	/*
				827	* The buffer may already belong to this transaction due to pre-zeroing
				828	* in the filesystem's new_block code. It may also be on the previous,
				829	* committing transaction's lists, but it HAS to be in Forget state in
				830	* that case: the transaction must have deleted the buffer for it to be
				831	* reused here.
				832	*/
				833	jbd_lock_bh_state(bh);
				834	spin_lock(&journal->j_list_lock);
				835	J_ASSERT_JH(jh, (jh->b_transaction == transaction \|\|
				836	jh->b_transaction == NULL \|\|
				837	(jh->b_transaction == journal->j_committing_transaction &&
				838	jh->b_jlist == BJ_Forget)));
				839
				840	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
				841	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
				842
				843	if (jh->b_transaction == NULL) {
				844	jh->b_transaction = transaction;
Josef Bacik	9fc7c63	2008-04-17 10:38:59 -0400	[diff] [blame]	845
				846	/* first access by this transaction */
				847	jh->b_modified = 0;
				848
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	849	JBUFFER_TRACE(jh, "file as BJ_Reserved");
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	850	__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	851	} else if (jh->b_transaction == journal->j_committing_transaction) {
Josef Bacik	9fc7c63	2008-04-17 10:38:59 -0400	[diff] [blame]	852	/* first access by this transaction */
				853	jh->b_modified = 0;
				854
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	855	JBUFFER_TRACE(jh, "set next transaction");
				856	jh->b_next_transaction = transaction;
				857	}
				858	spin_unlock(&journal->j_list_lock);
				859	jbd_unlock_bh_state(bh);
				860
				861	/*
				862	* akpm: I added this. ext3_alloc_branch can pick up new indirect
				863	* blocks which contain freed but then revoked metadata. We need
				864	* to cancel the revoke in case we end up freeing it yet again
				865	* and the reallocating as data - this would cause a second revoke,
				866	* which hits an assertion error.
				867	*/
				868	JBUFFER_TRACE(jh, "cancelling revoke");
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	869	jbd2_journal_cancel_revoke(handle, jh);
				870	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	871	out:
				872	return err;
				873	}
				874
				875	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	876	* int jbd2_journal_get_undo_access() - Notify intent to modify metadata with
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	877	* non-rewindable consequences
				878	* @handle: transaction
				879	* @bh: buffer to undo
				880	* @credits: store the number of taken credits here (if not NULL)
				881	*
				882	* Sometimes there is a need to distinguish between metadata which has
				883	* been committed to disk and that which has not. The ext3fs code uses
				884	* this for freeing and allocating space, we have to make sure that we
				885	* do not reuse freed space until the deallocation has been committed,
				886	* since if we overwrote that space we would make the delete
				887	* un-rewindable in case of a crash.
				888	*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	889	* To deal with that, jbd2_journal_get_undo_access requests write access to a
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	890	* buffer for parts of non-rewindable operations such as delete
				891	* operations on the bitmaps. The journaling code must keep a copy of
				892	* the buffer's contents prior to the undo_access call until such time
				893	* as we know that the buffer has definitely been committed to disk.
				894	*
				895	* We never need to know which transaction the committed data is part
				896	* of, buffers touched here are guaranteed to be dirtied later and so
				897	* will be committed to a new transaction in due course, at which point
				898	* we can discard the old committed data pointer.
				899	*
				900	* Returns error number or 0 on success.
				901	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	902	int jbd2_journal_get_undo_access(handle_t handle, struct buffer_head bh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	903	{
				904	int err;
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	905	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	906	char *committed_data = NULL;
				907
				908	JBUFFER_TRACE(jh, "entry");
				909
				910	/*
				911	* Do this first --- it can drop the journal lock, so we want to
				912	* make sure that obtaining the committed_data is done
				913	* atomically wrt. completion of any outstanding commits.
				914	*/
				915	err = do_get_write_access(handle, jh, 1);
				916	if (err)
				917	goto out;
				918
				919	repeat:
				920	if (!jh->b_committed_data) {
Mingming Cao	af1e76d	2007-10-16 18:38:25 -0400	[diff] [blame]	921	committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	922	if (!committed_data) {
				923	printk(KERN_EMERG "%s: No memory for committed data\n",
Harvey Harrison	329d291	2008-04-17 10:38:59 -0400	[diff] [blame]	924	__func__);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	925	err = -ENOMEM;
				926	goto out;
				927	}
				928	}
				929
				930	jbd_lock_bh_state(bh);
				931	if (!jh->b_committed_data) {
				932	/* Copy out the current buffer contents into the
				933	* preserved, committed copy. */
				934	JBUFFER_TRACE(jh, "generate b_committed data");
				935	if (!committed_data) {
				936	jbd_unlock_bh_state(bh);
				937	goto repeat;
				938	}
				939
				940	jh->b_committed_data = committed_data;
				941	committed_data = NULL;
				942	memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
				943	}
				944	jbd_unlock_bh_state(bh);
				945	out:
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	946	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	947	if (unlikely(committed_data))
Mingming Cao	af1e76d	2007-10-16 18:38:25 -0400	[diff] [blame]	948	jbd2_free(committed_data, bh->b_size);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	949	return err;
				950	}
				951
				952	/**
Joel Becker	e06c822	2008-09-11 15:35:47 -0700	[diff] [blame^]	953	* void jbd2_journal_set_triggers() - Add triggers for commit writeout
				954	* @bh: buffer to trigger on
				955	* @type: struct jbd2_buffer_trigger_type containing the trigger(s).
				956	*
				957	* Set any triggers on this journal_head. This is always safe, because
				958	* triggers for a committing buffer will be saved off, and triggers for
				959	* a running transaction will match the buffer in that transaction.
				960	*
				961	* Call with NULL to clear the triggers.
				962	*/
				963	void jbd2_journal_set_triggers(struct buffer_head *bh,
				964	struct jbd2_buffer_trigger_type *type)
				965	{
				966	struct journal_head *jh = bh2jh(bh);
				967
				968	jh->b_triggers = type;
				969	}
				970
				971	void jbd2_buffer_commit_trigger(struct journal_head jh, void mapped_data,
				972	struct jbd2_buffer_trigger_type *triggers)
				973	{
				974	struct buffer_head *bh = jh2bh(jh);
				975
				976	if (!triggers \|\| !triggers->t_commit)
				977	return;
				978
				979	triggers->t_commit(triggers, bh, mapped_data, bh->b_size);
				980	}
				981
				982	void jbd2_buffer_abort_trigger(struct journal_head *jh,
				983	struct jbd2_buffer_trigger_type *triggers)
				984	{
				985	if (!triggers \|\| !triggers->t_abort)
				986	return;
				987
				988	triggers->t_abort(triggers, jh2bh(jh));
				989	}
				990
				991
				992
				993	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	994	* int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	995	* @handle: transaction to add buffer to.
				996	* @bh: buffer to mark
				997	*
				998	* mark dirty metadata which needs to be journaled as part of the current
				999	* transaction.
				1000	*
				1001	* The buffer is placed on the transaction's metadata list and is marked
				1002	* as belonging to the transaction.
				1003	*
				1004	* Returns error number or 0 on success.
				1005	*
				1006	* Special care needs to be taken if the buffer already belongs to the
				1007	* current committing transaction (in which case we should have frozen
				1008	* data present for that commit). In that case, we don't relink the
				1009	* buffer: that only gets done when the old transaction finally
				1010	* completes its commit.
				1011	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1012	int jbd2_journal_dirty_metadata(handle_t handle, struct buffer_head bh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1013	{
				1014	transaction_t *transaction = handle->h_transaction;
				1015	journal_t *journal = transaction->t_journal;
				1016	struct journal_head *jh = bh2jh(bh);
				1017
				1018	jbd_debug(5, "journal_head %p\n", jh);
				1019	JBUFFER_TRACE(jh, "entry");
				1020	if (is_handle_aborted(handle))
				1021	goto out;
				1022
				1023	jbd_lock_bh_state(bh);
				1024
				1025	if (jh->b_modified == 0) {
				1026	/*
				1027	* This buffer's got modified and becoming part
				1028	* of the transaction. This needs to be done
				1029	* once a transaction -bzzz
				1030	*/
				1031	jh->b_modified = 1;
				1032	J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
				1033	handle->h_buffer_credits--;
				1034	}
				1035
				1036	/*
				1037	* fastpath, to avoid expensive locking. If this buffer is already
				1038	* on the running transaction's metadata list there is nothing to do.
				1039	* Nobody can take it off again because there is a handle open.
				1040	* I _think_ we're OK here with SMP barriers - a mistaken decision will
				1041	* result in this test being false, so we go in and take the locks.
				1042	*/
				1043	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
				1044	JBUFFER_TRACE(jh, "fastpath");
				1045	J_ASSERT_JH(jh, jh->b_transaction ==
				1046	journal->j_running_transaction);
				1047	goto out_unlock_bh;
				1048	}
				1049
				1050	set_buffer_jbddirty(bh);
				1051
				1052	/*
				1053	* Metadata already on the current transaction list doesn't
				1054	* need to be filed. Metadata on another transaction's list must
				1055	* be committing, and will be refiled once the commit completes:
				1056	* leave it alone for now.
				1057	*/
				1058	if (jh->b_transaction != transaction) {
				1059	JBUFFER_TRACE(jh, "already on other transaction");
				1060	J_ASSERT_JH(jh, jh->b_transaction ==
				1061	journal->j_committing_transaction);
				1062	J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
				1063	/* And this case is illegal: we can't reuse another
				1064	* transaction's data buffer, ever. */
				1065	goto out_unlock_bh;
				1066	}
				1067
				1068	/* That test should have eliminated the following case: */
Mingming Cao	4019191	2008-01-28 23:58:27 -0500	[diff] [blame]	1069	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1070
				1071	JBUFFER_TRACE(jh, "file as BJ_Metadata");
				1072	spin_lock(&journal->j_list_lock);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1073	__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1074	spin_unlock(&journal->j_list_lock);
				1075	out_unlock_bh:
				1076	jbd_unlock_bh_state(bh);
				1077	out:
				1078	JBUFFER_TRACE(jh, "exit");
				1079	return 0;
				1080	}
				1081
				1082	/*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1083	* jbd2_journal_release_buffer: undo a get_write_access without any buffer
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1084	* updates, if the update decided in the end that it didn't need access.
				1085	*
				1086	*/
				1087	void
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1088	jbd2_journal_release_buffer(handle_t handle, struct buffer_head bh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1089	{
				1090	BUFFER_TRACE(bh, "entry");
				1091	}
				1092
				1093	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1094	* void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1095	* @handle: transaction handle
				1096	* @bh: bh to 'forget'
				1097	*
				1098	* We can only do the bforget if there are no commits pending against the
				1099	* buffer. If the buffer is dirty in the current running transaction we
				1100	* can safely unlink it.
				1101	*
				1102	* bh may not be a journalled buffer at all - it may be a non-JBD
				1103	* buffer which came off the hashtable. Check for this.
				1104	*
				1105	* Decrements bh->b_count by one.
				1106	*
				1107	* Allow this call even if the handle has aborted --- it may be part of
				1108	* the caller's cleanup after an abort.
				1109	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1110	int jbd2_journal_forget (handle_t handle, struct buffer_head bh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1111	{
				1112	transaction_t *transaction = handle->h_transaction;
				1113	journal_t *journal = transaction->t_journal;
				1114	struct journal_head *jh;
				1115	int drop_reserve = 0;
				1116	int err = 0;
Josef Bacik	1dfc322	2008-04-17 10:38:59 -0400	[diff] [blame]	1117	int was_modified = 0;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1118
				1119	BUFFER_TRACE(bh, "entry");
				1120
				1121	jbd_lock_bh_state(bh);
				1122	spin_lock(&journal->j_list_lock);
				1123
				1124	if (!buffer_jbd(bh))
				1125	goto not_jbd;
				1126	jh = bh2jh(bh);
				1127
				1128	/* Critical error: attempting to delete a bitmap buffer, maybe?
				1129	* Don't do any jbd operations, and return an error. */
				1130	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
				1131	"inconsistent data on disk")) {
				1132	err = -EIO;
				1133	goto not_jbd;
				1134	}
				1135
Josef Bacik	1dfc322	2008-04-17 10:38:59 -0400	[diff] [blame]	1136	/* keep track of wether or not this transaction modified us */
				1137	was_modified = jh->b_modified;
				1138
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1139	/*
				1140	* The buffer's going from the transaction, we must drop
				1141	* all references -bzzz
				1142	*/
				1143	jh->b_modified = 0;
				1144
				1145	if (jh->b_transaction == handle->h_transaction) {
				1146	J_ASSERT_JH(jh, !jh->b_frozen_data);
				1147
				1148	/* If we are forgetting a buffer which is already part
				1149	* of this transaction, then we can just drop it from
				1150	* the transaction immediately. */
				1151	clear_buffer_dirty(bh);
				1152	clear_buffer_jbddirty(bh);
				1153
				1154	JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
				1155
Josef Bacik	1dfc322	2008-04-17 10:38:59 -0400	[diff] [blame]	1156	/*
				1157	* we only want to drop a reference if this transaction
				1158	* modified the buffer
				1159	*/
				1160	if (was_modified)
				1161	drop_reserve = 1;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1162
				1163	/*
				1164	* We are no longer going to journal this buffer.
				1165	* However, the commit of this transaction is still
				1166	* important to the buffer: the delete that we are now
				1167	* processing might obsolete an old log entry, so by
				1168	* committing, we can satisfy the buffer's checkpoint.
				1169	*
				1170	* So, if we have a checkpoint on the buffer, we should
				1171	* now refile the buffer on our BJ_Forget list so that
				1172	* we know to remove the checkpoint after we commit.
				1173	*/
				1174
				1175	if (jh->b_cp_transaction) {
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1176	__jbd2_journal_temp_unlink_buffer(jh);
				1177	__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1178	} else {
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1179	__jbd2_journal_unfile_buffer(jh);
				1180	jbd2_journal_remove_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1181	__brelse(bh);
				1182	if (!buffer_jbd(bh)) {
				1183	spin_unlock(&journal->j_list_lock);
				1184	jbd_unlock_bh_state(bh);
				1185	__bforget(bh);
				1186	goto drop;
				1187	}
				1188	}
				1189	} else if (jh->b_transaction) {
				1190	J_ASSERT_JH(jh, (jh->b_transaction ==
				1191	journal->j_committing_transaction));
				1192	/* However, if the buffer is still owned by a prior
				1193	* (committing) transaction, we can't drop it yet... */
				1194	JBUFFER_TRACE(jh, "belongs to older transaction");
				1195	/* ... but we CAN drop it from the new transaction if we
				1196	* have also modified it since the original commit. */
				1197
				1198	if (jh->b_next_transaction) {
				1199	J_ASSERT(jh->b_next_transaction == transaction);
				1200	jh->b_next_transaction = NULL;
Josef Bacik	1dfc322	2008-04-17 10:38:59 -0400	[diff] [blame]	1201
				1202	/*
				1203	* only drop a reference if this transaction modified
				1204	* the buffer
				1205	*/
				1206	if (was_modified)
				1207	drop_reserve = 1;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1208	}
				1209	}
				1210
				1211	not_jbd:
				1212	spin_unlock(&journal->j_list_lock);
				1213	jbd_unlock_bh_state(bh);
				1214	__brelse(bh);
				1215	drop:
				1216	if (drop_reserve) {
				1217	/* no need to reserve log space for this block -bzzz */
				1218	handle->h_buffer_credits++;
				1219	}
				1220	return err;
				1221	}
				1222
				1223	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1224	* int jbd2_journal_stop() - complete a transaction
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1225	* @handle: tranaction to complete.
				1226	*
				1227	* All done for a particular handle.
				1228	*
				1229	* There is not much action needed here. We just return any remaining
				1230	* buffer credits to the transaction and remove the handle. The only
				1231	* complication is that we need to start a commit operation if the
				1232	* filesystem is marked for synchronous update.
				1233	*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1234	* jbd2_journal_stop itself will not usually return an error, but it may
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1235	* do so in unusual circumstances. In particular, expect it to
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1236	* return -EIO if a jbd2_journal_abort has been executed since the
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1237	* transaction began.
				1238	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1239	int jbd2_journal_stop(handle_t *handle)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1240	{
				1241	transaction_t *transaction = handle->h_transaction;
				1242	journal_t *journal = transaction->t_journal;
				1243	int old_handle_count, err;
				1244	pid_t pid;
				1245
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1246	J_ASSERT(journal_current_handle() == handle);
				1247
				1248	if (is_handle_aborted(handle))
				1249	err = -EIO;
OGAWA Hirofumi	3e2a532	2006-10-19 23:29:11 -0700	[diff] [blame]	1250	else {
				1251	J_ASSERT(transaction->t_updates > 0);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1252	err = 0;
OGAWA Hirofumi	3e2a532	2006-10-19 23:29:11 -0700	[diff] [blame]	1253	}
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1254
				1255	if (--handle->h_ref > 0) {
				1256	jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
				1257	handle->h_ref);
				1258	return err;
				1259	}
				1260
				1261	jbd_debug(4, "Handle %p going down\n", handle);
				1262
				1263	/*
				1264	* Implement synchronous transaction batching. If the handle
				1265	* was synchronous, don't force a commit immediately. Let's
				1266	* yield and let another thread piggyback onto this transaction.
				1267	* Keep doing that while new threads continue to arrive.
				1268	* It doesn't cost much - we're about to run a commit and sleep
				1269	* on IO anyway. Speeds up many-threaded, many-dir operations
				1270	* by 30x or more...
				1271	*
				1272	* But don't do this if this process was the most recent one to
				1273	* perform a synchronous write. We do this to detect the case where a
				1274	* single process is doing a stream of sync writes. No point in waiting
				1275	* for joiners in that case.
				1276	*/
				1277	pid = current->pid;
				1278	if (handle->h_sync && journal->j_last_sync_writer != pid) {
				1279	journal->j_last_sync_writer = pid;
				1280	do {
				1281	old_handle_count = transaction->t_handle_count;
				1282	schedule_timeout_uninterruptible(1);
				1283	} while (old_handle_count != transaction->t_handle_count);
				1284	}
				1285
				1286	current->journal_info = NULL;
				1287	spin_lock(&journal->j_state_lock);
				1288	spin_lock(&transaction->t_handle_lock);
				1289	transaction->t_outstanding_credits -= handle->h_buffer_credits;
				1290	transaction->t_updates--;
				1291	if (!transaction->t_updates) {
				1292	wake_up(&journal->j_wait_updates);
				1293	if (journal->j_barrier_count)
				1294	wake_up(&journal->j_wait_transaction_locked);
				1295	}
				1296
				1297	/*
				1298	* If the handle is marked SYNC, we need to set another commit
				1299	* going! We also want to force a commit if the current
				1300	* transaction is occupying too much of the log, or if the
				1301	* transaction is too old now.
				1302	*/
				1303	if (handle->h_sync \|\|
				1304	transaction->t_outstanding_credits >
				1305	journal->j_max_transaction_buffers \|\|
				1306	time_after_eq(jiffies, transaction->t_expires)) {
				1307	/* Do this even for aborted journals: an abort still
				1308	* completes the commit thread, it just doesn't write
				1309	* anything to disk. */
				1310	tid_t tid = transaction->t_tid;
				1311
				1312	spin_unlock(&transaction->t_handle_lock);
				1313	jbd_debug(2, "transaction too old, requesting commit for "
				1314	"handle %p\n", handle);
				1315	/* This is non-blocking */
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1316	__jbd2_log_start_commit(journal, transaction->t_tid);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1317	spin_unlock(&journal->j_state_lock);
				1318
				1319	/*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1320	* Special case: JBD2_SYNC synchronous updates require us
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1321	* to wait for the commit to complete.
				1322	*/
				1323	if (handle->h_sync && !(current->flags & PF_MEMALLOC))
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1324	err = jbd2_log_wait_commit(journal, tid);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1325	} else {
				1326	spin_unlock(&transaction->t_handle_lock);
				1327	spin_unlock(&journal->j_state_lock);
				1328	}
				1329
Ingo Molnar	3295f0e	2008-08-11 10:30:30 +0200	[diff] [blame]	1330	lock_map_release(&handle->h_lockdep_map);
Mingming Cao	7b75106	2008-01-28 23:58:27 -0500	[diff] [blame]	1331
Mingming Cao	af1e76d	2007-10-16 18:38:25 -0400	[diff] [blame]	1332	jbd2_free_handle(handle);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1333	return err;
				1334	}
				1335
Randy Dunlap	5648ba5	2008-04-17 10:38:59 -0400	[diff] [blame]	1336	/**
				1337	* int jbd2_journal_force_commit() - force any uncommitted transactions
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1338	* @journal: journal to force
				1339	*
				1340	* For synchronous operations: force any uncommitted transactions
				1341	* to disk. May seem kludgy, but it reuses all the handle batching
				1342	* code in a very simple manner.
				1343	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1344	int jbd2_journal_force_commit(journal_t *journal)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1345	{
				1346	handle_t *handle;
				1347	int ret;
				1348
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1349	handle = jbd2_journal_start(journal, 1);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1350	if (IS_ERR(handle)) {
				1351	ret = PTR_ERR(handle);
				1352	} else {
				1353	handle->h_sync = 1;
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1354	ret = jbd2_journal_stop(handle);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1355	}
				1356	return ret;
				1357	}
				1358
				1359	/*
				1360	*
				1361	* List management code snippets: various functions for manipulating the
				1362	* transaction buffer lists.
				1363	*
				1364	*/
				1365
				1366	/*
				1367	* Append a buffer to a transaction list, given the transaction's list head
				1368	* pointer.
				1369	*
				1370	* j_list_lock is held.
				1371	*
				1372	* jbd_lock_bh_state(jh2bh(jh)) is held.
				1373	*/
				1374
				1375	static inline void
				1376	__blist_add_buffer(struct journal_head *list, struct journal_head jh)
				1377	{
				1378	if (!*list) {
				1379	jh->b_tnext = jh->b_tprev = jh;
				1380	*list = jh;
				1381	} else {
				1382	/* Insert at the tail of the list to preserve order */
				1383	struct journal_head first = list, *last = first->b_tprev;
				1384	jh->b_tprev = last;
				1385	jh->b_tnext = first;
				1386	last->b_tnext = first->b_tprev = jh;
				1387	}
				1388	}
				1389
				1390	/*
				1391	* Remove a buffer from a transaction list, given the transaction's list
				1392	* head pointer.
				1393	*
				1394	* Called with j_list_lock held, and the journal may not be locked.
				1395	*
				1396	* jbd_lock_bh_state(jh2bh(jh)) is held.
				1397	*/
				1398
				1399	static inline void
				1400	__blist_del_buffer(struct journal_head *list, struct journal_head jh)
				1401	{
				1402	if (*list == jh) {
				1403	*list = jh->b_tnext;
				1404	if (*list == jh)
				1405	*list = NULL;
				1406	}
				1407	jh->b_tprev->b_tnext = jh->b_tnext;
				1408	jh->b_tnext->b_tprev = jh->b_tprev;
				1409	}
				1410
				1411	/*
				1412	* Remove a buffer from the appropriate transaction list.
				1413	*
				1414	* Note that this function can change the value of
Jan Kara	87c89c2	2008-07-11 19:27:31 -0400	[diff] [blame]	1415	* bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list,
				1416	* t_log_list or t_reserved_list. If the caller is holding onto a copy of one
				1417	* of these pointers, it could go bad. Generally the caller needs to re-read
				1418	* the pointer from the transaction_t.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1419	*
				1420	* Called under j_list_lock. The journal may not be locked.
				1421	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1422	void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1423	{
				1424	struct journal_head **list = NULL;
				1425	transaction_t *transaction;
				1426	struct buffer_head *bh = jh2bh(jh);
				1427
				1428	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
				1429	transaction = jh->b_transaction;
				1430	if (transaction)
				1431	assert_spin_locked(&transaction->t_journal->j_list_lock);
				1432
				1433	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
				1434	if (jh->b_jlist != BJ_None)
Mingming Cao	4019191	2008-01-28 23:58:27 -0500	[diff] [blame]	1435	J_ASSERT_JH(jh, transaction != NULL);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1436
				1437	switch (jh->b_jlist) {
				1438	case BJ_None:
				1439	return;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1440	case BJ_Metadata:
				1441	transaction->t_nr_buffers--;
				1442	J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
				1443	list = &transaction->t_buffers;
				1444	break;
				1445	case BJ_Forget:
				1446	list = &transaction->t_forget;
				1447	break;
				1448	case BJ_IO:
				1449	list = &transaction->t_iobuf_list;
				1450	break;
				1451	case BJ_Shadow:
				1452	list = &transaction->t_shadow_list;
				1453	break;
				1454	case BJ_LogCtl:
				1455	list = &transaction->t_log_list;
				1456	break;
				1457	case BJ_Reserved:
				1458	list = &transaction->t_reserved_list;
				1459	break;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1460	}
				1461
				1462	__blist_del_buffer(list, jh);
				1463	jh->b_jlist = BJ_None;
				1464	if (test_clear_buffer_jbddirty(bh))
				1465	mark_buffer_dirty(bh); /* Expose it to the VM */
				1466	}
				1467
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1468	void __jbd2_journal_unfile_buffer(struct journal_head *jh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1469	{
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1470	__jbd2_journal_temp_unlink_buffer(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1471	jh->b_transaction = NULL;
				1472	}
				1473
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1474	void jbd2_journal_unfile_buffer(journal_t journal, struct journal_head jh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1475	{
				1476	jbd_lock_bh_state(jh2bh(jh));
				1477	spin_lock(&journal->j_list_lock);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1478	__jbd2_journal_unfile_buffer(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1479	spin_unlock(&journal->j_list_lock);
				1480	jbd_unlock_bh_state(jh2bh(jh));
				1481	}
				1482
				1483	/*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1484	* Called from jbd2_journal_try_to_free_buffers().
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1485	*
				1486	* Called under jbd_lock_bh_state(bh)
				1487	*/
				1488	static void
				1489	__journal_try_to_free_buffer(journal_t journal, struct buffer_head bh)
				1490	{
				1491	struct journal_head *jh;
				1492
				1493	jh = bh2jh(bh);
				1494
				1495	if (buffer_locked(bh) \|\| buffer_dirty(bh))
				1496	goto out;
				1497
Mingming Cao	4019191	2008-01-28 23:58:27 -0500	[diff] [blame]	1498	if (jh->b_next_transaction != NULL)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1499	goto out;
				1500
				1501	spin_lock(&journal->j_list_lock);
Jan Kara	87c89c2	2008-07-11 19:27:31 -0400	[diff] [blame]	1502	if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) {
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1503	/* written-back checkpointed metadata buffer */
				1504	if (jh->b_jlist == BJ_None) {
				1505	JBUFFER_TRACE(jh, "remove from checkpoint list");
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1506	__jbd2_journal_remove_checkpoint(jh);
				1507	jbd2_journal_remove_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1508	__brelse(bh);
				1509	}
				1510	}
				1511	spin_unlock(&journal->j_list_lock);
				1512	out:
				1513	return;
				1514	}
				1515
Mingming Cao	530576b	2008-07-13 21:06:39 -0400	[diff] [blame]	1516	/*
				1517	* jbd2_journal_try_to_free_buffers() could race with
				1518	* jbd2_journal_commit_transaction(). The later might still hold the
				1519	* reference count to the buffers when inspecting them on
				1520	* t_syncdata_list or t_locked_list.
				1521	*
				1522	* jbd2_journal_try_to_free_buffers() will call this function to
				1523	* wait for the current transaction to finish syncing data buffers, before
				1524	* try to free that buffer.
				1525	*
				1526	* Called with journal->j_state_lock hold.
				1527	*/
				1528	static void jbd2_journal_wait_for_transaction_sync_data(journal_t *journal)
				1529	{
				1530	transaction_t *transaction;
				1531	tid_t tid;
				1532
				1533	spin_lock(&journal->j_state_lock);
				1534	transaction = journal->j_committing_transaction;
				1535
				1536	if (!transaction) {
				1537	spin_unlock(&journal->j_state_lock);
				1538	return;
				1539	}
				1540
				1541	tid = transaction->t_tid;
				1542	spin_unlock(&journal->j_state_lock);
				1543	jbd2_log_wait_commit(journal, tid);
				1544	}
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1545
				1546	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1547	* int jbd2_journal_try_to_free_buffers() - try to free page buffers.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1548	* @journal: journal for operation
				1549	* @page: to try and free
Mingming Cao	530576b	2008-07-13 21:06:39 -0400	[diff] [blame]	1550	* @gfp_mask: we use the mask to detect how hard should we try to release
				1551	* buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to
				1552	* release the buffers.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1553	*
				1554	*
				1555	* For all the buffers on this page,
				1556	* if they are fully written out ordered data, move them onto BUF_CLEAN
				1557	* so try_to_free_buffers() can reap them.
				1558	*
				1559	* This function returns non-zero if we wish try_to_free_buffers()
				1560	* to be called. We do this if the page is releasable by try_to_free_buffers().
				1561	* We also do it if the page has locked or dirty buffers and the caller wants
				1562	* us to perform sync or async writeout.
				1563	*
				1564	* This complicates JBD locking somewhat. We aren't protected by the
				1565	* BKL here. We wish to remove the buffer from its committing or
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1566	* running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1567	*
				1568	* This may change the value of transaction_t->t_datalist, so anyone
				1569	* who looks at t_datalist needs to lock against this function.
				1570	*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1571	* Even worse, someone may be doing a jbd2_journal_dirty_data on this
				1572	* buffer. So we need to lock against that. jbd2_journal_dirty_data()
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1573	* will come out of the lock with the buffer dirty, which makes it
				1574	* ineligible for release here.
				1575	*
				1576	* Who else is affected by this? hmm... Really the only contender
				1577	* is do_get_write_access() - it could be looking at the buffer while
				1578	* journal_try_to_free_buffer() is changing its state. But that
				1579	* cannot happen because we never reallocate freed data as metadata
				1580	* while the data is part of a transaction. Yes?
Mingming Cao	530576b	2008-07-13 21:06:39 -0400	[diff] [blame]	1581	*
				1582	* Return 0 on failure, 1 on success
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1583	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1584	int jbd2_journal_try_to_free_buffers(journal_t *journal,
Mingming Cao	530576b	2008-07-13 21:06:39 -0400	[diff] [blame]	1585	struct page *page, gfp_t gfp_mask)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1586	{
				1587	struct buffer_head *head;
				1588	struct buffer_head *bh;
				1589	int ret = 0;
				1590
				1591	J_ASSERT(PageLocked(page));
				1592
				1593	head = page_buffers(page);
				1594	bh = head;
				1595	do {
				1596	struct journal_head *jh;
				1597
				1598	/*
				1599	* We take our own ref against the journal_head here to avoid
				1600	* having to add tons of locking around each instance of
Mingming Cao	530576b	2008-07-13 21:06:39 -0400	[diff] [blame]	1601	* jbd2_journal_remove_journal_head() and
				1602	* jbd2_journal_put_journal_head().
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1603	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1604	jh = jbd2_journal_grab_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1605	if (!jh)
				1606	continue;
				1607
				1608	jbd_lock_bh_state(bh);
				1609	__journal_try_to_free_buffer(journal, bh);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1610	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1611	jbd_unlock_bh_state(bh);
				1612	if (buffer_jbd(bh))
				1613	goto busy;
				1614	} while ((bh = bh->b_this_page) != head);
Mingming Cao	530576b	2008-07-13 21:06:39 -0400	[diff] [blame]	1615
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1616	ret = try_to_free_buffers(page);
Mingming Cao	530576b	2008-07-13 21:06:39 -0400	[diff] [blame]	1617
				1618	/*
				1619	* There are a number of places where jbd2_journal_try_to_free_buffers()
				1620	* could race with jbd2_journal_commit_transaction(), the later still
				1621	* holds the reference to the buffers to free while processing them.
				1622	* try_to_free_buffers() failed to free those buffers. Some of the
				1623	* caller of releasepage() request page buffers to be dropped, otherwise
				1624	* treat the fail-to-free as errors (such as generic_file_direct_IO())
				1625	*
				1626	* So, if the caller of try_to_release_page() wants the synchronous
				1627	* behaviour(i.e make sure buffers are dropped upon return),
				1628	* let's wait for the current transaction to finish flush of
				1629	* dirty data buffers, then try to free those buffers again,
				1630	* with the journal locked.
				1631	*/
				1632	if (ret == 0 && (gfp_mask & __GFP_WAIT) && (gfp_mask & __GFP_FS)) {
				1633	jbd2_journal_wait_for_transaction_sync_data(journal);
				1634	ret = try_to_free_buffers(page);
				1635	}
				1636
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1637	busy:
				1638	return ret;
				1639	}
				1640
				1641	/*
				1642	* This buffer is no longer needed. If it is on an older transaction's
				1643	* checkpoint list we need to record it on this transaction's forget list
				1644	* to pin this buffer (and hence its checkpointing transaction) down until
				1645	* this transaction commits. If the buffer isn't on a checkpoint list, we
				1646	* release it.
				1647	* Returns non-zero if JBD no longer has an interest in the buffer.
				1648	*
				1649	* Called under j_list_lock.
				1650	*
				1651	* Called under jbd_lock_bh_state(bh).
				1652	*/
				1653	static int __dispose_buffer(struct journal_head jh, transaction_t transaction)
				1654	{
				1655	int may_free = 1;
				1656	struct buffer_head *bh = jh2bh(jh);
				1657
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1658	__jbd2_journal_unfile_buffer(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1659
				1660	if (jh->b_cp_transaction) {
				1661	JBUFFER_TRACE(jh, "on running+cp transaction");
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1662	__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1663	clear_buffer_jbddirty(bh);
				1664	may_free = 0;
				1665	} else {
				1666	JBUFFER_TRACE(jh, "on running transaction");
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1667	jbd2_journal_remove_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1668	__brelse(bh);
				1669	}
				1670	return may_free;
				1671	}
				1672
				1673	/*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1674	* jbd2_journal_invalidatepage
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1675	*
				1676	* This code is tricky. It has a number of cases to deal with.
				1677	*
				1678	* There are two invariants which this code relies on:
				1679	*
				1680	* i_size must be updated on disk before we start calling invalidatepage on the
				1681	* data.
				1682	*
				1683	* This is done in ext3 by defining an ext3_setattr method which
				1684	* updates i_size before truncate gets going. By maintaining this
				1685	* invariant, we can be sure that it is safe to throw away any buffers
				1686	* attached to the current transaction: once the transaction commits,
				1687	* we know that the data will not be needed.
				1688	*
				1689	* Note however that we can not throw away data belonging to the
				1690	* previous, committing transaction!
				1691	*
				1692	* Any disk blocks which are part of the previous, committing
				1693	* transaction (and which therefore cannot be discarded immediately) are
				1694	* not going to be reused in the new running transaction
				1695	*
				1696	* The bitmap committed_data images guarantee this: any block which is
				1697	* allocated in one transaction and removed in the next will be marked
				1698	* as in-use in the committed_data bitmap, so cannot be reused until
				1699	* the next transaction to delete the block commits. This means that
				1700	* leaving committing buffers dirty is quite safe: the disk blocks
				1701	* cannot be reallocated to a different file and so buffer aliasing is
				1702	* not possible.
				1703	*
				1704	*
				1705	* The above applies mainly to ordered data mode. In writeback mode we
				1706	* don't make guarantees about the order in which data hits disk --- in
				1707	* particular we don't guarantee that new dirty data is flushed before
				1708	* transaction commit --- so it is always safe just to discard data
				1709	* immediately in that mode. --sct
				1710	*/
				1711
				1712	/*
				1713	* The journal_unmap_buffer helper function returns zero if the buffer
				1714	* concerned remains pinned as an anonymous buffer belonging to an older
				1715	* transaction.
				1716	*
				1717	* We're outside-transaction here. Either or both of j_running_transaction
				1718	* and j_committing_transaction may be NULL.
				1719	*/
				1720	static int journal_unmap_buffer(journal_t journal, struct buffer_head bh)
				1721	{
				1722	transaction_t *transaction;
				1723	struct journal_head *jh;
				1724	int may_free = 1;
				1725	int ret;
				1726
				1727	BUFFER_TRACE(bh, "entry");
				1728
				1729	/*
				1730	* It is safe to proceed here without the j_list_lock because the
				1731	* buffers cannot be stolen by try_to_free_buffers as long as we are
				1732	* holding the page lock. --sct
				1733	*/
				1734
				1735	if (!buffer_jbd(bh))
				1736	goto zap_buffer_unlocked;
				1737
Jan Kara	87c89c2	2008-07-11 19:27:31 -0400	[diff] [blame]	1738	/* OK, we have data buffer in journaled mode */
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1739	spin_lock(&journal->j_state_lock);
				1740	jbd_lock_bh_state(bh);
				1741	spin_lock(&journal->j_list_lock);
				1742
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1743	jh = jbd2_journal_grab_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1744	if (!jh)
				1745	goto zap_buffer_no_jh;
				1746
				1747	transaction = jh->b_transaction;
				1748	if (transaction == NULL) {
				1749	/* First case: not on any transaction. If it
				1750	* has no checkpoint link, then we can zap it:
				1751	* it's a writeback-mode buffer so we don't care
				1752	* if it hits disk safely. */
				1753	if (!jh->b_cp_transaction) {
				1754	JBUFFER_TRACE(jh, "not on any transaction: zap");
				1755	goto zap_buffer;
				1756	}
				1757
				1758	if (!buffer_dirty(bh)) {
				1759	/* bdflush has written it. We can drop it now */
				1760	goto zap_buffer;
				1761	}
				1762
				1763	/* OK, it must be in the journal but still not
				1764	* written fully to disk: it's metadata or
				1765	* journaled data... */
				1766
				1767	if (journal->j_running_transaction) {
				1768	/* ... and once the current transaction has
				1769	* committed, the buffer won't be needed any
				1770	* longer. */
				1771	JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
				1772	ret = __dispose_buffer(jh,
				1773	journal->j_running_transaction);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1774	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1775	spin_unlock(&journal->j_list_lock);
				1776	jbd_unlock_bh_state(bh);
				1777	spin_unlock(&journal->j_state_lock);
				1778	return ret;
				1779	} else {
				1780	/* There is no currently-running transaction. So the
				1781	* orphan record which we wrote for this file must have
				1782	* passed into commit. We must attach this buffer to
				1783	* the committing transaction, if it exists. */
				1784	if (journal->j_committing_transaction) {
				1785	JBUFFER_TRACE(jh, "give to committing trans");
				1786	ret = __dispose_buffer(jh,
				1787	journal->j_committing_transaction);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1788	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1789	spin_unlock(&journal->j_list_lock);
				1790	jbd_unlock_bh_state(bh);
				1791	spin_unlock(&journal->j_state_lock);
				1792	return ret;
				1793	} else {
				1794	/* The orphan record's transaction has
				1795	* committed. We can cleanse this buffer */
				1796	clear_buffer_jbddirty(bh);
				1797	goto zap_buffer;
				1798	}
				1799	}
				1800	} else if (transaction == journal->j_committing_transaction) {
Eric Sandeen	9b57988	2006-10-28 10:38:28 -0700	[diff] [blame]	1801	JBUFFER_TRACE(jh, "on committing transaction");
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1802	/*
				1803	* If it is committing, we simply cannot touch it. We
				1804	* can remove it's next_transaction pointer from the
				1805	* running transaction if that is set, but nothing
				1806	* else. */
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1807	set_buffer_freed(bh);
				1808	if (jh->b_next_transaction) {
				1809	J_ASSERT(jh->b_next_transaction ==
				1810	journal->j_running_transaction);
				1811	jh->b_next_transaction = NULL;
				1812	}
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1813	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1814	spin_unlock(&journal->j_list_lock);
				1815	jbd_unlock_bh_state(bh);
				1816	spin_unlock(&journal->j_state_lock);
				1817	return 0;
				1818	} else {
				1819	/* Good, the buffer belongs to the running transaction.
				1820	* We are writing our own transaction's data, not any
				1821	* previous one's, so it is safe to throw it away
				1822	* (remember that we expect the filesystem to have set
				1823	* i_size already for this truncate so recovery will not
				1824	* expose the disk blocks we are discarding here.) */
				1825	J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
Eric Sandeen	9b57988	2006-10-28 10:38:28 -0700	[diff] [blame]	1826	JBUFFER_TRACE(jh, "on running transaction");
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1827	may_free = __dispose_buffer(jh, transaction);
				1828	}
				1829
				1830	zap_buffer:
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1831	jbd2_journal_put_journal_head(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1832	zap_buffer_no_jh:
				1833	spin_unlock(&journal->j_list_lock);
				1834	jbd_unlock_bh_state(bh);
				1835	spin_unlock(&journal->j_state_lock);
				1836	zap_buffer_unlocked:
				1837	clear_buffer_dirty(bh);
				1838	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
				1839	clear_buffer_mapped(bh);
				1840	clear_buffer_req(bh);
				1841	clear_buffer_new(bh);
				1842	bh->b_bdev = NULL;
				1843	return may_free;
				1844	}
				1845
				1846	/**
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1847	* void jbd2_journal_invalidatepage()
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1848	* @journal: journal to use for flush...
				1849	* @page: page to flush
				1850	* @offset: length of page to invalidate.
				1851	*
				1852	* Reap page buffers containing data after offset in page.
				1853	*
				1854	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1855	void jbd2_journal_invalidatepage(journal_t *journal,
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1856	struct page *page,
				1857	unsigned long offset)
				1858	{
				1859	struct buffer_head head, bh, *next;
				1860	unsigned int curr_off = 0;
				1861	int may_free = 1;
				1862
				1863	if (!PageLocked(page))
				1864	BUG();
				1865	if (!page_has_buffers(page))
				1866	return;
				1867
				1868	/* We will potentially be playing with lists other than just the
				1869	* data lists (especially for journaled data mode), so be
				1870	* cautious in our locking. */
				1871
				1872	head = bh = page_buffers(page);
				1873	do {
				1874	unsigned int next_off = curr_off + bh->b_size;
				1875	next = bh->b_this_page;
				1876
				1877	if (offset <= curr_off) {
				1878	/* This block is wholly outside the truncation point */
				1879	lock_buffer(bh);
				1880	may_free &= journal_unmap_buffer(journal, bh);
				1881	unlock_buffer(bh);
				1882	}
				1883	curr_off = next_off;
				1884	bh = next;
				1885
				1886	} while (bh != head);
				1887
				1888	if (!offset) {
				1889	if (may_free && try_to_free_buffers(page))
				1890	J_ASSERT(!page_has_buffers(page));
				1891	}
				1892	}
				1893
				1894	/*
				1895	* File a buffer on the given transaction list.
				1896	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1897	void __jbd2_journal_file_buffer(struct journal_head *jh,
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1898	transaction_t *transaction, int jlist)
				1899	{
				1900	struct journal_head **list = NULL;
				1901	int was_dirty = 0;
				1902	struct buffer_head *bh = jh2bh(jh);
				1903
				1904	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
				1905	assert_spin_locked(&transaction->t_journal->j_list_lock);
				1906
				1907	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
				1908	J_ASSERT_JH(jh, jh->b_transaction == transaction \|\|
Mingming Cao	4019191	2008-01-28 23:58:27 -0500	[diff] [blame]	1909	jh->b_transaction == NULL);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1910
				1911	if (jh->b_transaction && jh->b_jlist == jlist)
				1912	return;
				1913
				1914	/* The following list of buffer states needs to be consistent
				1915	* with __jbd_unexpected_dirty_buffer()'s handling of dirty
				1916	* state. */
				1917
				1918	if (jlist == BJ_Metadata \|\| jlist == BJ_Reserved \|\|
				1919	jlist == BJ_Shadow \|\| jlist == BJ_Forget) {
				1920	if (test_clear_buffer_dirty(bh) \|\|
				1921	test_clear_buffer_jbddirty(bh))
				1922	was_dirty = 1;
				1923	}
				1924
				1925	if (jh->b_transaction)
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1926	__jbd2_journal_temp_unlink_buffer(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1927	jh->b_transaction = transaction;
				1928
				1929	switch (jlist) {
				1930	case BJ_None:
				1931	J_ASSERT_JH(jh, !jh->b_committed_data);
				1932	J_ASSERT_JH(jh, !jh->b_frozen_data);
				1933	return;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1934	case BJ_Metadata:
				1935	transaction->t_nr_buffers++;
				1936	list = &transaction->t_buffers;
				1937	break;
				1938	case BJ_Forget:
				1939	list = &transaction->t_forget;
				1940	break;
				1941	case BJ_IO:
				1942	list = &transaction->t_iobuf_list;
				1943	break;
				1944	case BJ_Shadow:
				1945	list = &transaction->t_shadow_list;
				1946	break;
				1947	case BJ_LogCtl:
				1948	list = &transaction->t_log_list;
				1949	break;
				1950	case BJ_Reserved:
				1951	list = &transaction->t_reserved_list;
				1952	break;
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1953	}
				1954
				1955	__blist_add_buffer(list, jh);
				1956	jh->b_jlist = jlist;
				1957
				1958	if (was_dirty)
				1959	set_buffer_jbddirty(bh);
				1960	}
				1961
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1962	void jbd2_journal_file_buffer(struct journal_head *jh,
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1963	transaction_t *transaction, int jlist)
				1964	{
				1965	jbd_lock_bh_state(jh2bh(jh));
				1966	spin_lock(&transaction->t_journal->j_list_lock);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1967	__jbd2_journal_file_buffer(jh, transaction, jlist);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1968	spin_unlock(&transaction->t_journal->j_list_lock);
				1969	jbd_unlock_bh_state(jh2bh(jh));
				1970	}
				1971
				1972	/*
				1973	* Remove a buffer from its current buffer list in preparation for
				1974	* dropping it from its current transaction entirely. If the buffer has
				1975	* already started to be used by a subsequent transaction, refile the
				1976	* buffer on that transaction's metadata list.
				1977	*
				1978	* Called under journal->j_list_lock
				1979	*
				1980	* Called under jbd_lock_bh_state(jh2bh(jh))
				1981	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1982	void __jbd2_journal_refile_buffer(struct journal_head *jh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1983	{
				1984	int was_dirty;
				1985	struct buffer_head *bh = jh2bh(jh);
				1986
				1987	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
				1988	if (jh->b_transaction)
				1989	assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
				1990
				1991	/* If the buffer is now unused, just drop it. */
				1992	if (jh->b_next_transaction == NULL) {
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	1993	__jbd2_journal_unfile_buffer(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	1994	return;
				1995	}
				1996
				1997	/*
				1998	* It has been modified by a later transaction: add it to the new
				1999	* transaction's metadata list.
				2000	*/
				2001
				2002	was_dirty = test_clear_buffer_jbddirty(bh);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	2003	__jbd2_journal_temp_unlink_buffer(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	2004	jh->b_transaction = jh->b_next_transaction;
				2005	jh->b_next_transaction = NULL;
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	2006	__jbd2_journal_file_buffer(jh, jh->b_transaction,
Josef Bacik	1dfc322	2008-04-17 10:38:59 -0400	[diff] [blame]	2007	jh->b_modified ? BJ_Metadata : BJ_Reserved);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	2008	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
				2009
				2010	if (was_dirty)
				2011	set_buffer_jbddirty(bh);
				2012	}
				2013
				2014	/*
				2015	* For the unlocked version of this call, also make sure that any
				2016	* hanging journal_head is cleaned up if necessary.
				2017	*
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	2018	* __jbd2_journal_refile_buffer is usually called as part of a single locked
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	2019	* operation on a buffer_head, in which the caller is probably going to
				2020	* be hooking the journal_head onto other lists. In that case it is up
				2021	* to the caller to remove the journal_head if necessary. For the
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	2022	* unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	2023	* doing anything else to the buffer so we need to do the cleanup
				2024	* ourselves to avoid a jh leak.
				2025	*
				2026	* * The journal_head may be freed by this call! *
				2027	*/
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	2028	void jbd2_journal_refile_buffer(journal_t journal, struct journal_head jh)
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	2029	{
				2030	struct buffer_head *bh = jh2bh(jh);
				2031
				2032	jbd_lock_bh_state(bh);
				2033	spin_lock(&journal->j_list_lock);
				2034
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	2035	__jbd2_journal_refile_buffer(jh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	2036	jbd_unlock_bh_state(bh);
Mingming Cao	f7f4bcc	2006-10-11 01:20:59 -0700	[diff] [blame]	2037	jbd2_journal_remove_journal_head(bh);
Dave Kleikamp	470decc	2006-10-11 01:20:57 -0700	[diff] [blame]	2038
				2039	spin_unlock(&journal->j_list_lock);
				2040	__brelse(bh);
				2041	}
Jan Kara	c851ed5	2008-07-11 19:27:31 -0400	[diff] [blame]	2042
				2043	/*
				2044	* File inode in the inode list of the handle's transaction
				2045	*/
				2046	int jbd2_journal_file_inode(handle_t handle, struct jbd2_inode jinode)
				2047	{
				2048	transaction_t *transaction = handle->h_transaction;
				2049	journal_t *journal = transaction->t_journal;
				2050
				2051	if (is_handle_aborted(handle))
				2052	return -EIO;
				2053
				2054	jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino,
				2055	transaction->t_tid);
				2056
				2057	/*
				2058	* First check whether inode isn't already on the transaction's
				2059	* lists without taking the lock. Note that this check is safe
				2060	* without the lock as we cannot race with somebody removing inode
				2061	* from the transaction. The reason is that we remove inode from the
				2062	* transaction only in journal_release_jbd_inode() and when we commit
				2063	* the transaction. We are guarded from the first case by holding
				2064	* a reference to the inode. We are safe against the second case
				2065	* because if jinode->i_transaction == transaction, commit code
				2066	* cannot touch the transaction because we hold reference to it,
				2067	* and if jinode->i_next_transaction == transaction, commit code
				2068	* will only file the inode where we want it.
				2069	*/
				2070	if (jinode->i_transaction == transaction \|\|
				2071	jinode->i_next_transaction == transaction)
				2072	return 0;
				2073
				2074	spin_lock(&journal->j_list_lock);
				2075
				2076	if (jinode->i_transaction == transaction \|\|
				2077	jinode->i_next_transaction == transaction)
				2078	goto done;
				2079
				2080	/* On some different transaction's list - should be
				2081	* the committing one */
				2082	if (jinode->i_transaction) {
				2083	J_ASSERT(jinode->i_next_transaction == NULL);
				2084	J_ASSERT(jinode->i_transaction ==
				2085	journal->j_committing_transaction);
				2086	jinode->i_next_transaction = transaction;
				2087	goto done;
				2088	}
				2089	/* Not on any transaction list... */
				2090	J_ASSERT(!jinode->i_next_transaction);
				2091	jinode->i_transaction = transaction;
				2092	list_add(&jinode->i_list, &transaction->t_inode_list);
				2093	done:
				2094	spin_unlock(&journal->j_list_lock);
				2095
				2096	return 0;
				2097	}
				2098
				2099	/*
				2100	* This function must be called when inode is journaled in ordered mode
				2101	* before truncation happens. It starts writeout of truncated part in
				2102	* case it is in the committing transaction so that we stand to ordered
				2103	* mode consistency guarantees.
				2104	*/
				2105	int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode,
				2106	loff_t new_size)
				2107	{
				2108	journal_t *journal;
				2109	transaction_t *commit_trans;
				2110	int ret = 0;
				2111
				2112	if (!inode->i_transaction && !inode->i_next_transaction)
				2113	goto out;
				2114	journal = inode->i_transaction->t_journal;
				2115	spin_lock(&journal->j_state_lock);
				2116	commit_trans = journal->j_committing_transaction;
				2117	spin_unlock(&journal->j_state_lock);
				2118	if (inode->i_transaction == commit_trans) {
				2119	ret = filemap_fdatawrite_range(inode->i_vfs_inode->i_mapping,
				2120	new_size, LLONG_MAX);
				2121	if (ret)
				2122	jbd2_journal_abort(journal, ret);
				2123	}
				2124	out:
				2125	return ret;
				2126	}