blob: 7809f41bcbfc4eb5592f6797a5b10a75e8997093 [file] [log] [blame]
Mark Fashehccd979b2005-12-15 14:31:24 -08001/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * suballoc.c
5 *
6 * metadata alloc and free
7 * Inspired by ext3 block groups.
8 *
9 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 021110-1307, USA.
25 */
26
27#include <linux/fs.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h>
31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h>
34
35#include "ocfs2.h"
36
37#include "alloc.h"
Joel Beckerd6b32bb2008-10-17 14:55:01 -070038#include "blockcheck.h"
Mark Fashehccd979b2005-12-15 14:31:24 -080039#include "dlmglue.h"
40#include "inode.h"
41#include "journal.h"
42#include "localalloc.h"
43#include "suballoc.h"
44#include "super.h"
45#include "sysfile.h"
46#include "uptodate.h"
47
48#include "buffer_head_io.h"
49
Tao Maffda89a2008-03-03 17:12:09 +080050#define NOT_ALLOC_NEW_GROUP 0
Tao Ma60ca81e2009-02-25 00:53:24 +080051#define ALLOC_NEW_GROUP 0x1
52#define ALLOC_GROUPS_FROM_GLOBAL 0x2
Tao Maffda89a2008-03-03 17:12:09 +080053
Tiger Yangb89c5422010-01-25 14:11:06 +080054#define OCFS2_MAX_TO_STEAL 1024
Tao Ma4d0ddb22008-03-05 16:11:46 +080055
Joel Becker7d1fe092010-04-13 14:30:19 +080056struct ocfs2_suballoc_result {
Joel Becker2b6cb572010-03-26 10:09:15 +080057 u64 sr_bg_blkno; /* The bg we allocated from. Set
58 to 0 when a block group is
59 contiguous. */
Joel Beckerba206632010-03-26 10:08:59 +080060 u64 sr_blkno; /* The first allocated block */
Joel Becker7d1fe092010-04-13 14:30:19 +080061 unsigned int sr_bit_offset; /* The bit in the bg */
62 unsigned int sr_bits; /* How many bits we claimed */
63};
64
Mark Fashehccd979b2005-12-15 14:31:24 -080065static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
66static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
67static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
Mark Fasheh1fabe142006-10-09 18:11:45 -070068static int ocfs2_block_group_fill(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -080069 struct inode *alloc_inode,
70 struct buffer_head *bg_bh,
71 u64 group_blkno,
Joel Becker798db352010-04-13 14:26:32 +080072 unsigned int group_clusters,
Mark Fashehccd979b2005-12-15 14:31:24 -080073 u16 my_chain,
74 struct ocfs2_chain_list *cl);
75static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
76 struct inode *alloc_inode,
Joel Becker1187c962008-09-03 20:03:39 -070077 struct buffer_head *bh,
Tao Ma60ca81e2009-02-25 00:53:24 +080078 u64 max_block,
Tao Mafeb473a2009-02-25 00:53:25 +080079 u64 *last_alloc_group,
Tao Ma60ca81e2009-02-25 00:53:24 +080080 int flags);
Mark Fashehccd979b2005-12-15 14:31:24 -080081
Mark Fashehccd979b2005-12-15 14:31:24 -080082static int ocfs2_cluster_group_search(struct inode *inode,
83 struct buffer_head *group_bh,
84 u32 bits_wanted, u32 min_bits,
Joel Becker1187c962008-09-03 20:03:39 -070085 u64 max_block,
Joel Becker7d1fe092010-04-13 14:30:19 +080086 struct ocfs2_suballoc_result *res);
Mark Fashehccd979b2005-12-15 14:31:24 -080087static int ocfs2_block_group_search(struct inode *inode,
88 struct buffer_head *group_bh,
89 u32 bits_wanted, u32 min_bits,
Joel Becker1187c962008-09-03 20:03:39 -070090 u64 max_block,
Joel Becker7d1fe092010-04-13 14:30:19 +080091 struct ocfs2_suballoc_result *res);
Joel Beckeraa8f8e92010-03-26 10:08:07 +080092static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -070093 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -080094 u32 bits_wanted,
95 u32 min_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +080096 struct ocfs2_suballoc_result *res);
Mark Fashehccd979b2005-12-15 14:31:24 -080097static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
98 int nr);
Mark Fasheh1fabe142006-10-09 18:11:45 -070099static inline int ocfs2_block_group_set_bits(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -0800100 struct inode *alloc_inode,
101 struct ocfs2_group_desc *bg,
102 struct buffer_head *group_bh,
103 unsigned int bit_off,
104 unsigned int num_bits);
Mark Fasheh1fabe142006-10-09 18:11:45 -0700105static int ocfs2_relink_block_group(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -0800106 struct inode *alloc_inode,
107 struct buffer_head *fe_bh,
108 struct buffer_head *bg_bh,
109 struct buffer_head *prev_bg_bh,
110 u16 chain);
111static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
112 u32 wanted);
Mark Fashehccd979b2005-12-15 14:31:24 -0800113static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
114 u64 bg_blkno,
115 u16 bg_bit_off);
Mark Fashehccd979b2005-12-15 14:31:24 -0800116static inline void ocfs2_block_to_cluster_group(struct inode *inode,
117 u64 data_blkno,
118 u64 *bg_blkno,
119 u16 *bg_bit_off);
Joel Becker1187c962008-09-03 20:03:39 -0700120static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
121 u32 bits_wanted, u64 max_block,
Tao Ma60ca81e2009-02-25 00:53:24 +0800122 int flags,
Joel Becker1187c962008-09-03 20:03:39 -0700123 struct ocfs2_alloc_context **ac);
Mark Fashehccd979b2005-12-15 14:31:24 -0800124
Mark Fasheh9c7af402008-07-28 18:02:53 -0700125void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac)
Mark Fashehccd979b2005-12-15 14:31:24 -0800126{
Mark Fashehda5cbf22006-10-06 18:34:35 -0700127 struct inode *inode = ac->ac_inode;
128
129 if (inode) {
130 if (ac->ac_which != OCFS2_AC_USE_LOCAL)
Mark Fashehe63aecb62007-10-18 15:30:42 -0700131 ocfs2_inode_unlock(inode, 1);
Mark Fashehda5cbf22006-10-06 18:34:35 -0700132
133 mutex_unlock(&inode->i_mutex);
134
135 iput(inode);
Tao Ma4d0ddb22008-03-05 16:11:46 +0800136 ac->ac_inode = NULL;
Mark Fashehda5cbf22006-10-06 18:34:35 -0700137 }
Mark Fasheha81cb882008-10-07 14:25:16 -0700138 brelse(ac->ac_bh);
139 ac->ac_bh = NULL;
Mark Fashehe3b4a972009-12-07 13:16:07 -0800140 ac->ac_resv = NULL;
Tao Ma4d0ddb22008-03-05 16:11:46 +0800141}
142
143void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
144{
145 ocfs2_free_ac_resource(ac);
Mark Fashehccd979b2005-12-15 14:31:24 -0800146 kfree(ac);
147}
148
149static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
150{
151 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
152}
153
Joel Becker57e3e792008-11-13 14:49:13 -0800154#define do_error(fmt, ...) \
155 do{ \
Tao Ma78c37eb2010-03-03 11:26:27 +0800156 if (resize) \
Joel Becker57e3e792008-11-13 14:49:13 -0800157 mlog(ML_ERROR, fmt "\n", ##__VA_ARGS__); \
158 else \
159 ocfs2_error(sb, fmt, ##__VA_ARGS__); \
160 } while (0)
161
Joel Becker970e4932008-11-13 14:49:19 -0800162static int ocfs2_validate_gd_self(struct super_block *sb,
163 struct buffer_head *bh,
Tao Ma78c37eb2010-03-03 11:26:27 +0800164 int resize)
Joel Becker970e4932008-11-13 14:49:19 -0800165{
166 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
167
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700168 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
Joel Becker68f64d42008-11-13 14:49:14 -0800169 do_error("Group descriptor #%llu has bad signature %.*s",
170 (unsigned long long)bh->b_blocknr, 7,
Joel Becker57e3e792008-11-13 14:49:13 -0800171 gd->bg_signature);
172 return -EINVAL;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700173 }
174
Joel Becker68f64d42008-11-13 14:49:14 -0800175 if (le64_to_cpu(gd->bg_blkno) != bh->b_blocknr) {
176 do_error("Group descriptor #%llu has an invalid bg_blkno "
177 "of %llu",
178 (unsigned long long)bh->b_blocknr,
179 (unsigned long long)le64_to_cpu(gd->bg_blkno));
180 return -EINVAL;
181 }
182
183 if (le32_to_cpu(gd->bg_generation) != OCFS2_SB(sb)->fs_generation) {
184 do_error("Group descriptor #%llu has an invalid "
185 "fs_generation of #%u",
186 (unsigned long long)bh->b_blocknr,
187 le32_to_cpu(gd->bg_generation));
188 return -EINVAL;
189 }
190
Joel Becker970e4932008-11-13 14:49:19 -0800191 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
192 do_error("Group descriptor #%llu has bit count %u but "
193 "claims that %u are free",
194 (unsigned long long)bh->b_blocknr,
195 le16_to_cpu(gd->bg_bits),
196 le16_to_cpu(gd->bg_free_bits_count));
197 return -EINVAL;
198 }
199
200 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
201 do_error("Group descriptor #%llu has bit count %u but "
202 "max bitmap bits of %u",
203 (unsigned long long)bh->b_blocknr,
204 le16_to_cpu(gd->bg_bits),
205 8 * le16_to_cpu(gd->bg_size));
206 return -EINVAL;
207 }
208
209 return 0;
210}
211
212static int ocfs2_validate_gd_parent(struct super_block *sb,
213 struct ocfs2_dinode *di,
214 struct buffer_head *bh,
Tao Ma78c37eb2010-03-03 11:26:27 +0800215 int resize)
Joel Becker970e4932008-11-13 14:49:19 -0800216{
217 unsigned int max_bits;
218 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
219
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700220 if (di->i_blkno != gd->bg_parent_dinode) {
Joel Becker68f64d42008-11-13 14:49:14 -0800221 do_error("Group descriptor #%llu has bad parent "
Joel Becker57e3e792008-11-13 14:49:13 -0800222 "pointer (%llu, expected %llu)",
Joel Becker68f64d42008-11-13 14:49:14 -0800223 (unsigned long long)bh->b_blocknr,
Joel Becker57e3e792008-11-13 14:49:13 -0800224 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
225 (unsigned long long)le64_to_cpu(di->i_blkno));
226 return -EINVAL;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700227 }
228
229 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
230 if (le16_to_cpu(gd->bg_bits) > max_bits) {
Joel Becker68f64d42008-11-13 14:49:14 -0800231 do_error("Group descriptor #%llu has bit count of %u",
232 (unsigned long long)bh->b_blocknr,
Joel Becker57e3e792008-11-13 14:49:13 -0800233 le16_to_cpu(gd->bg_bits));
234 return -EINVAL;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700235 }
236
Tao Ma78c37eb2010-03-03 11:26:27 +0800237 /* In resize, we may meet the case bg_chain == cl_next_free_rec. */
238 if ((le16_to_cpu(gd->bg_chain) >
239 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) ||
240 ((le16_to_cpu(gd->bg_chain) ==
241 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) && !resize)) {
Joel Becker68f64d42008-11-13 14:49:14 -0800242 do_error("Group descriptor #%llu has bad chain %u",
243 (unsigned long long)bh->b_blocknr,
Joel Becker57e3e792008-11-13 14:49:13 -0800244 le16_to_cpu(gd->bg_chain));
245 return -EINVAL;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700246 }
247
Joel Becker970e4932008-11-13 14:49:19 -0800248 return 0;
249}
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700250
Joel Becker57e3e792008-11-13 14:49:13 -0800251#undef do_error
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700252
Joel Becker970e4932008-11-13 14:49:19 -0800253/*
254 * This version only prints errors. It does not fail the filesystem, and
255 * exists only for resize.
256 */
257int ocfs2_check_group_descriptor(struct super_block *sb,
258 struct ocfs2_dinode *di,
259 struct buffer_head *bh)
260{
261 int rc;
Joel Beckerd6b32bb2008-10-17 14:55:01 -0700262 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
Joel Becker970e4932008-11-13 14:49:19 -0800263
Joel Beckerd6b32bb2008-10-17 14:55:01 -0700264 BUG_ON(!buffer_uptodate(bh));
265
266 /*
267 * If the ecc fails, we return the error but otherwise
268 * leave the filesystem running. We know any error is
269 * local to this block.
270 */
271 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
Joel Becker13723d02008-10-17 19:25:01 -0700272 if (rc) {
273 mlog(ML_ERROR,
274 "Checksum failed for group descriptor %llu\n",
275 (unsigned long long)bh->b_blocknr);
276 } else
Joel Beckerd6b32bb2008-10-17 14:55:01 -0700277 rc = ocfs2_validate_gd_self(sb, bh, 1);
Joel Becker970e4932008-11-13 14:49:19 -0800278 if (!rc)
279 rc = ocfs2_validate_gd_parent(sb, di, bh, 1);
280
281 return rc;
282}
283
284static int ocfs2_validate_group_descriptor(struct super_block *sb,
285 struct buffer_head *bh)
286{
Joel Beckerd6b32bb2008-10-17 14:55:01 -0700287 int rc;
288 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *)bh->b_data;
289
Joel Becker970e4932008-11-13 14:49:19 -0800290 mlog(0, "Validating group descriptor %llu\n",
291 (unsigned long long)bh->b_blocknr);
292
Joel Beckerd6b32bb2008-10-17 14:55:01 -0700293 BUG_ON(!buffer_uptodate(bh));
294
295 /*
296 * If the ecc fails, we return the error but otherwise
297 * leave the filesystem running. We know any error is
298 * local to this block.
299 */
300 rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &gd->bg_check);
301 if (rc)
302 return rc;
303
304 /*
305 * Errors after here are fatal.
306 */
307
Joel Becker970e4932008-11-13 14:49:19 -0800308 return ocfs2_validate_gd_self(sb, bh, 0);
Mark Fasheh7bf72ed2006-05-03 17:46:50 -0700309}
310
Joel Becker68f64d42008-11-13 14:49:14 -0800311int ocfs2_read_group_descriptor(struct inode *inode, struct ocfs2_dinode *di,
312 u64 gd_blkno, struct buffer_head **bh)
313{
314 int rc;
315 struct buffer_head *tmp = *bh;
316
Joel Becker8cb471e2009-02-10 20:00:41 -0800317 rc = ocfs2_read_block(INODE_CACHE(inode), gd_blkno, &tmp,
Joel Becker970e4932008-11-13 14:49:19 -0800318 ocfs2_validate_group_descriptor);
Joel Becker68f64d42008-11-13 14:49:14 -0800319 if (rc)
320 goto out;
321
Joel Becker970e4932008-11-13 14:49:19 -0800322 rc = ocfs2_validate_gd_parent(inode->i_sb, di, tmp, 0);
Joel Becker68f64d42008-11-13 14:49:14 -0800323 if (rc) {
324 brelse(tmp);
325 goto out;
326 }
327
328 /* If ocfs2_read_block() got us a new bh, pass it up. */
329 if (!*bh)
330 *bh = tmp;
331
332out:
333 return rc;
334}
335
Joel Becker798db352010-04-13 14:26:32 +0800336static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb,
337 struct ocfs2_group_desc *bg,
338 struct ocfs2_chain_list *cl,
339 u64 p_blkno, u32 clusters)
340{
341 struct ocfs2_extent_list *el = &bg->bg_list;
342 struct ocfs2_extent_rec *rec;
343
344 BUG_ON(!ocfs2_supports_discontig_bh(osb));
345 if (!el->l_next_free_rec)
346 el->l_count = cpu_to_le16(ocfs2_extent_recs_per_gd(osb->sb));
347 rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec)];
348 rec->e_blkno = p_blkno;
349 rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) /
350 le16_to_cpu(cl->cl_bpc));
351 rec->e_leaf_clusters = cpu_to_le32(clusters);
352 le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc));
353 le16_add_cpu(&el->l_next_free_rec, 1);
354}
355
Mark Fasheh1fabe142006-10-09 18:11:45 -0700356static int ocfs2_block_group_fill(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -0800357 struct inode *alloc_inode,
358 struct buffer_head *bg_bh,
359 u64 group_blkno,
Joel Becker798db352010-04-13 14:26:32 +0800360 unsigned int group_clusters,
Mark Fashehccd979b2005-12-15 14:31:24 -0800361 u16 my_chain,
362 struct ocfs2_chain_list *cl)
363{
364 int status = 0;
Joel Becker798db352010-04-13 14:26:32 +0800365 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
Mark Fashehccd979b2005-12-15 14:31:24 -0800366 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
367 struct super_block * sb = alloc_inode->i_sb;
368
369 mlog_entry_void();
370
371 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
Mark Fashehb06970532006-03-03 10:24:33 -0800372 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
373 "b_blocknr (%llu)",
374 (unsigned long long)group_blkno,
Mark Fashehccd979b2005-12-15 14:31:24 -0800375 (unsigned long long) bg_bh->b_blocknr);
376 status = -EIO;
377 goto bail;
378 }
379
Joel Becker13723d02008-10-17 19:25:01 -0700380 status = ocfs2_journal_access_gd(handle,
Joel Becker0cf2f762009-02-12 16:41:25 -0800381 INODE_CACHE(alloc_inode),
Joel Becker13723d02008-10-17 19:25:01 -0700382 bg_bh,
383 OCFS2_JOURNAL_ACCESS_CREATE);
Mark Fashehccd979b2005-12-15 14:31:24 -0800384 if (status < 0) {
385 mlog_errno(status);
386 goto bail;
387 }
388
389 memset(bg, 0, sb->s_blocksize);
390 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
391 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
Joel Becker4cbe4242010-04-13 14:26:12 +0800392 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb, 1));
Mark Fashehccd979b2005-12-15 14:31:24 -0800393 bg->bg_chain = cpu_to_le16(my_chain);
394 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
395 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
396 bg->bg_blkno = cpu_to_le64(group_blkno);
Joel Becker798db352010-04-13 14:26:32 +0800397 if (group_clusters == le16_to_cpu(cl->cl_cpg))
398 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
399 else
400 ocfs2_bg_discontig_add_extent(osb, bg, cl, bg->bg_blkno,
401 group_clusters);
402
Mark Fashehccd979b2005-12-15 14:31:24 -0800403 /* set the 1st bit in the bitmap to account for the descriptor block */
404 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
405 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
406
Joel Beckerec20cec2010-03-19 14:13:52 -0700407 ocfs2_journal_dirty(handle, bg_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -0800408
409 /* There is no need to zero out or otherwise initialize the
410 * other blocks in a group - All valid FS metadata in a block
411 * group stores the superblock fs_generation value at
412 * allocation time. */
413
414bail:
415 mlog_exit(status);
416 return status;
417}
418
419static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
420{
421 u16 curr, best;
422
423 best = curr = 0;
424 while (curr < le16_to_cpu(cl->cl_count)) {
425 if (le32_to_cpu(cl->cl_recs[best].c_total) >
426 le32_to_cpu(cl->cl_recs[curr].c_total))
427 best = curr;
428 curr++;
429 }
430 return best;
431}
432
Joel Becker798db352010-04-13 14:26:32 +0800433static struct buffer_head *
434ocfs2_block_group_alloc_contig(struct ocfs2_super *osb, handle_t *handle,
435 struct inode *alloc_inode,
436 struct ocfs2_alloc_context *ac,
437 struct ocfs2_chain_list *cl)
438{
439 int status;
440 u32 bit_off, num_bits;
441 u64 bg_blkno;
442 struct buffer_head *bg_bh;
443 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
444
Joel Becker1ed9b772010-05-06 13:59:06 +0800445 status = ocfs2_claim_clusters(handle, ac,
Joel Becker798db352010-04-13 14:26:32 +0800446 le16_to_cpu(cl->cl_cpg), &bit_off,
447 &num_bits);
448 if (status < 0) {
449 if (status != -ENOSPC)
450 mlog_errno(status);
451 goto bail;
452 }
453
454 /* setup the group */
455 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
456 mlog(0, "new descriptor, record %u, at block %llu\n",
457 alloc_rec, (unsigned long long)bg_blkno);
458
459 bg_bh = sb_getblk(osb->sb, bg_blkno);
460 if (!bg_bh) {
461 status = -EIO;
462 mlog_errno(status);
463 goto bail;
464 }
465 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
466
467 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
468 bg_blkno, num_bits, alloc_rec, cl);
469 if (status < 0) {
470 brelse(bg_bh);
471 mlog_errno(status);
472 }
473
474bail:
475 return status ? ERR_PTR(status) : bg_bh;
476}
477
478static int ocfs2_block_group_claim_bits(struct ocfs2_super *osb,
479 handle_t *handle,
480 struct ocfs2_alloc_context *ac,
481 unsigned int min_bits,
482 u32 *bit_off, u32 *num_bits)
483{
484 int status;
485
486 while (min_bits) {
Joel Becker1ed9b772010-05-06 13:59:06 +0800487 status = ocfs2_claim_clusters(handle, ac, min_bits,
Joel Becker798db352010-04-13 14:26:32 +0800488 bit_off, num_bits);
489 if (status != -ENOSPC)
490 break;
491
492 min_bits >>= 1;
493 }
494
495 return status;
496}
497
498static int ocfs2_block_group_grow_discontig(handle_t *handle,
499 struct inode *alloc_inode,
500 struct buffer_head *bg_bh,
501 struct ocfs2_alloc_context *ac,
502 struct ocfs2_chain_list *cl,
503 unsigned int min_bits)
504{
505 int status;
506 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
507 struct ocfs2_group_desc *bg =
508 (struct ocfs2_group_desc *)bg_bh->b_data;
509 unsigned int needed =
510 ocfs2_bits_per_group(cl) - le16_to_cpu(bg->bg_bits);
511 u32 p_cpos, clusters;
512 u64 p_blkno;
513 struct ocfs2_extent_list *el = &bg->bg_list;
514
515 status = ocfs2_journal_access_gd(handle,
516 INODE_CACHE(alloc_inode),
517 bg_bh,
518 OCFS2_JOURNAL_ACCESS_CREATE);
519 if (status < 0) {
520 mlog_errno(status);
521 goto bail;
522 }
523
524 while ((needed > 0) && (le16_to_cpu(el->l_next_free_rec) <
525 le16_to_cpu(el->l_count))) {
526 status = ocfs2_extend_trans(handle, OCFS2_SUBALLOC_ALLOC);
527 if (status) {
528 mlog_errno(status);
529 goto bail;
530 }
531
532 if (min_bits > needed)
533 min_bits = needed;
534 status = ocfs2_block_group_claim_bits(osb, handle, ac,
535 min_bits, &p_cpos,
536 &clusters);
537 if (status < 0) {
538 if (status != -ENOSPC)
539 mlog_errno(status);
540 goto bail;
541 }
542 p_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cpos);
543 ocfs2_bg_discontig_add_extent(osb, bg, cl, p_blkno,
544 clusters);
545
546 min_bits = clusters;
547 needed = ocfs2_bits_per_group(cl) - le16_to_cpu(bg->bg_bits);
548 }
549
550 if (needed > 0) {
551 }
552
553 ocfs2_journal_dirty(handle, bg_bh);
554
555bail:
556 return status;
557}
558
559static void ocfs2_bg_alloc_cleanup(struct inode *alloc_inode,
560 struct buffer_head *bg_bh,
561 struct ocfs2_cached_dealloc_ctxt *dealloc)
562{
563 int i;
564 struct ocfs2_group_desc *bg;
565 struct ocfs2_extent_list *el;
566 struct ocfs2_extent_rec *rec;
567
568 if (!bg_bh)
569 return;
570
571 bg = (struct ocfs2_group_desc *)bg_bh->b_data;
572 el = &bg->bg_list;
573 for (i = 0; i < le16_to_cpu(el->l_next_free_rec); i++) {
574 rec = &el->l_recs[i];
575 ocfs2_cache_cluster_dealloc(dealloc,
576 le64_to_cpu(rec->e_blkno),
577 le32_to_cpu(rec->e_leaf_clusters));
578 }
579
580 ocfs2_remove_from_cache(INODE_CACHE(alloc_inode), bg_bh);
581 brelse(bg_bh);
582}
583
584static struct buffer_head *
585ocfs2_block_group_alloc_discontig(handle_t *handle,
586 struct inode *alloc_inode,
587 struct ocfs2_alloc_context *ac,
588 struct ocfs2_chain_list *cl,
589 struct ocfs2_cached_dealloc_ctxt *dealloc)
590{
591 int status;
592 u32 bit_off, num_bits;
593 u64 bg_blkno;
594 unsigned int min_bits = le16_to_cpu(cl->cl_cpg) >> 1;
595 struct buffer_head *bg_bh = NULL;
596 unsigned int alloc_rec = ocfs2_find_smallest_chain(cl);
597 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
598
599 if (!ocfs2_supports_discontig_bh(osb)) {
600 status = -ENOSPC;
601 goto bail;
602 }
603
604 /* Claim the first region */
605 status = ocfs2_block_group_claim_bits(osb, handle, ac, min_bits,
606 &bit_off, &num_bits);
607 if (status < 0) {
608 if (status != -ENOSPC)
609 mlog_errno(status);
610 goto bail;
611 }
612 min_bits = num_bits;
613
614 /* setup the group */
615 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
616 mlog(0, "new descriptor, record %u, at block %llu\n",
617 alloc_rec, (unsigned long long)bg_blkno);
618
619 bg_bh = sb_getblk(osb->sb, bg_blkno);
620 if (!bg_bh) {
621 status = -EIO;
622 mlog_errno(status);
623 goto bail;
624 }
625 ocfs2_set_new_buffer_uptodate(INODE_CACHE(alloc_inode), bg_bh);
626
627 status = ocfs2_block_group_fill(handle, alloc_inode, bg_bh,
628 bg_blkno, num_bits, alloc_rec, cl);
629 if (status < 0) {
630 mlog_errno(status);
631 goto bail;
632 }
633
634 status = ocfs2_block_group_grow_discontig(handle, alloc_inode,
635 bg_bh, ac, cl, min_bits);
636 if (status)
637 mlog_errno(status);
638
639bail:
640 if (status)
641 ocfs2_bg_alloc_cleanup(alloc_inode, bg_bh, dealloc);
642 return status ? ERR_PTR(status) : bg_bh;
643}
644
Mark Fashehccd979b2005-12-15 14:31:24 -0800645/*
646 * We expect the block group allocator to already be locked.
647 */
648static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
649 struct inode *alloc_inode,
Joel Becker1187c962008-09-03 20:03:39 -0700650 struct buffer_head *bh,
Tao Ma60ca81e2009-02-25 00:53:24 +0800651 u64 max_block,
Tao Mafeb473a2009-02-25 00:53:25 +0800652 u64 *last_alloc_group,
Tao Ma60ca81e2009-02-25 00:53:24 +0800653 int flags)
Mark Fashehccd979b2005-12-15 14:31:24 -0800654{
655 int status, credits;
656 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
657 struct ocfs2_chain_list *cl;
658 struct ocfs2_alloc_context *ac = NULL;
Mark Fasheh1fabe142006-10-09 18:11:45 -0700659 handle_t *handle = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800660 u64 bg_blkno;
661 struct buffer_head *bg_bh = NULL;
662 struct ocfs2_group_desc *bg;
Joel Becker798db352010-04-13 14:26:32 +0800663 struct ocfs2_cached_dealloc_ctxt dealloc;
Mark Fashehccd979b2005-12-15 14:31:24 -0800664
665 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
666
667 mlog_entry_void();
668
Joel Becker798db352010-04-13 14:26:32 +0800669 ocfs2_init_dealloc_ctxt(&dealloc);
670
Mark Fashehccd979b2005-12-15 14:31:24 -0800671 cl = &fe->id2.i_chain;
Joel Becker1187c962008-09-03 20:03:39 -0700672 status = ocfs2_reserve_clusters_with_limit(osb,
673 le16_to_cpu(cl->cl_cpg),
Tao Ma60ca81e2009-02-25 00:53:24 +0800674 max_block, flags, &ac);
Mark Fashehccd979b2005-12-15 14:31:24 -0800675 if (status < 0) {
676 if (status != -ENOSPC)
677 mlog_errno(status);
678 goto bail;
679 }
680
681 credits = ocfs2_calc_group_alloc_credits(osb->sb,
682 le16_to_cpu(cl->cl_cpg));
Mark Fasheh65eff9c2006-10-09 17:26:22 -0700683 handle = ocfs2_start_trans(osb, credits);
Mark Fashehccd979b2005-12-15 14:31:24 -0800684 if (IS_ERR(handle)) {
685 status = PTR_ERR(handle);
686 handle = NULL;
687 mlog_errno(status);
688 goto bail;
689 }
690
Tao Mafeb473a2009-02-25 00:53:25 +0800691 if (last_alloc_group && *last_alloc_group != 0) {
692 mlog(0, "use old allocation group %llu for block group alloc\n",
693 (unsigned long long)*last_alloc_group);
694 ac->ac_last_group = *last_alloc_group;
695 }
Joel Becker798db352010-04-13 14:26:32 +0800696
697 bg_bh = ocfs2_block_group_alloc_contig(osb, handle, alloc_inode,
698 ac, cl);
699 if (IS_ERR(bg_bh) && (PTR_ERR(bg_bh) == -ENOSPC))
700 bg_bh = ocfs2_block_group_alloc_discontig(handle,
701 alloc_inode,
702 ac, cl,
703 &dealloc);
704 if (IS_ERR(bg_bh)) {
705 status = PTR_ERR(bg_bh);
706 bg_bh = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800707 if (status != -ENOSPC)
708 mlog_errno(status);
709 goto bail;
710 }
Mark Fashehccd979b2005-12-15 14:31:24 -0800711 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
712
Joel Becker0cf2f762009-02-12 16:41:25 -0800713 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
Joel Becker13723d02008-10-17 19:25:01 -0700714 bh, OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fashehccd979b2005-12-15 14:31:24 -0800715 if (status < 0) {
716 mlog_errno(status);
717 goto bail;
718 }
719
Joel Becker798db352010-04-13 14:26:32 +0800720 le32_add_cpu(&cl->cl_recs[bg->bg_chain].c_free,
Mark Fashehccd979b2005-12-15 14:31:24 -0800721 le16_to_cpu(bg->bg_free_bits_count));
Joel Becker798db352010-04-13 14:26:32 +0800722 le32_add_cpu(&cl->cl_recs[bg->bg_chain].c_total,
723 le16_to_cpu(bg->bg_bits));
724 cl->cl_recs[bg->bg_chain].c_blkno = cpu_to_le64(bg_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -0800725 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
726 le16_add_cpu(&cl->cl_next_free_rec, 1);
727
728 le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
729 le16_to_cpu(bg->bg_free_bits_count));
730 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
731 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
732
Joel Beckerec20cec2010-03-19 14:13:52 -0700733 ocfs2_journal_dirty(handle, bh);
Mark Fashehccd979b2005-12-15 14:31:24 -0800734
735 spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
736 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
737 fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
738 le32_to_cpu(fe->i_clusters)));
739 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
740 i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
Mark Fasheh8110b072007-03-22 16:53:23 -0700741 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
Mark Fashehccd979b2005-12-15 14:31:24 -0800742
743 status = 0;
Tao Mafeb473a2009-02-25 00:53:25 +0800744
745 /* save the new last alloc group so that the caller can cache it. */
746 if (last_alloc_group)
747 *last_alloc_group = ac->ac_last_group;
748
Mark Fashehccd979b2005-12-15 14:31:24 -0800749bail:
750 if (handle)
Mark Fasheh02dc1af2006-10-09 16:48:10 -0700751 ocfs2_commit_trans(osb, handle);
Mark Fashehccd979b2005-12-15 14:31:24 -0800752
Joel Becker798db352010-04-13 14:26:32 +0800753 if (ocfs2_dealloc_has_cluster(&dealloc)) {
754 ocfs2_schedule_truncate_log_flush(osb, 1);
755 ocfs2_run_deallocs(osb, &dealloc);
756 }
757
Mark Fashehccd979b2005-12-15 14:31:24 -0800758 if (ac)
759 ocfs2_free_alloc_context(ac);
760
Mark Fasheha81cb882008-10-07 14:25:16 -0700761 brelse(bg_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -0800762
763 mlog_exit(status);
764 return status;
765}
766
767static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
Mark Fashehda5cbf22006-10-06 18:34:35 -0700768 struct ocfs2_alloc_context *ac,
769 int type,
Tao Maffda89a2008-03-03 17:12:09 +0800770 u32 slot,
Tao Mafeb473a2009-02-25 00:53:25 +0800771 u64 *last_alloc_group,
Tao Ma60ca81e2009-02-25 00:53:24 +0800772 int flags)
Mark Fashehccd979b2005-12-15 14:31:24 -0800773{
774 int status;
775 u32 bits_wanted = ac->ac_bits_wanted;
Mark Fashehda5cbf22006-10-06 18:34:35 -0700776 struct inode *alloc_inode;
Mark Fashehccd979b2005-12-15 14:31:24 -0800777 struct buffer_head *bh = NULL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800778 struct ocfs2_dinode *fe;
779 u32 free_bits;
780
781 mlog_entry_void();
782
Mark Fashehda5cbf22006-10-06 18:34:35 -0700783 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
784 if (!alloc_inode) {
785 mlog_errno(-EINVAL);
786 return -EINVAL;
Mark Fashehccd979b2005-12-15 14:31:24 -0800787 }
788
Mark Fashehda5cbf22006-10-06 18:34:35 -0700789 mutex_lock(&alloc_inode->i_mutex);
790
Mark Fashehe63aecb62007-10-18 15:30:42 -0700791 status = ocfs2_inode_lock(alloc_inode, &bh, 1);
Mark Fashehda5cbf22006-10-06 18:34:35 -0700792 if (status < 0) {
793 mutex_unlock(&alloc_inode->i_mutex);
794 iput(alloc_inode);
795
796 mlog_errno(status);
797 return status;
798 }
799
800 ac->ac_inode = alloc_inode;
Tao Maa4a48912008-03-03 17:12:30 +0800801 ac->ac_alloc_slot = slot;
Mark Fashehda5cbf22006-10-06 18:34:35 -0700802
Mark Fashehccd979b2005-12-15 14:31:24 -0800803 fe = (struct ocfs2_dinode *) bh->b_data;
Joel Becker10995aa2008-11-13 14:49:12 -0800804
805 /* The bh was validated by the inode read inside
806 * ocfs2_inode_lock(). Any corruption is a code bug. */
807 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
808
Mark Fashehccd979b2005-12-15 14:31:24 -0800809 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
Mark Fashehb06970532006-03-03 10:24:33 -0800810 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
811 (unsigned long long)le64_to_cpu(fe->i_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -0800812 status = -EIO;
813 goto bail;
814 }
815
816 free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
817 le32_to_cpu(fe->id1.bitmap1.i_used);
818
819 if (bits_wanted > free_bits) {
820 /* cluster bitmap never grows */
821 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
822 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
823 bits_wanted, free_bits);
824 status = -ENOSPC;
825 goto bail;
826 }
827
Tao Ma60ca81e2009-02-25 00:53:24 +0800828 if (!(flags & ALLOC_NEW_GROUP)) {
Tao Maffda89a2008-03-03 17:12:09 +0800829 mlog(0, "Alloc File %u Full: wanted=%u, free_bits=%u, "
830 "and we don't alloc a new group for it.\n",
831 slot, bits_wanted, free_bits);
832 status = -ENOSPC;
833 goto bail;
834 }
835
Joel Becker1187c962008-09-03 20:03:39 -0700836 status = ocfs2_block_group_alloc(osb, alloc_inode, bh,
Tao Mafeb473a2009-02-25 00:53:25 +0800837 ac->ac_max_block,
838 last_alloc_group, flags);
Mark Fashehccd979b2005-12-15 14:31:24 -0800839 if (status < 0) {
840 if (status != -ENOSPC)
841 mlog_errno(status);
842 goto bail;
843 }
844 atomic_inc(&osb->alloc_stats.bg_extends);
845
846 /* You should never ask for this much metadata */
847 BUG_ON(bits_wanted >
848 (le32_to_cpu(fe->id1.bitmap1.i_total)
849 - le32_to_cpu(fe->id1.bitmap1.i_used)));
850 }
851
852 get_bh(bh);
853 ac->ac_bh = bh;
854bail:
Mark Fasheha81cb882008-10-07 14:25:16 -0700855 brelse(bh);
Mark Fashehccd979b2005-12-15 14:31:24 -0800856
857 mlog_exit(status);
858 return status;
859}
860
Tiger Yangb89c5422010-01-25 14:11:06 +0800861static void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb)
862{
863 spin_lock(&osb->osb_lock);
864 osb->s_inode_steal_slot = OCFS2_INVALID_SLOT;
865 spin_unlock(&osb->osb_lock);
866 atomic_set(&osb->s_num_inodes_stolen, 0);
867}
868
869static void ocfs2_init_meta_steal_slot(struct ocfs2_super *osb)
870{
871 spin_lock(&osb->osb_lock);
872 osb->s_meta_steal_slot = OCFS2_INVALID_SLOT;
873 spin_unlock(&osb->osb_lock);
874 atomic_set(&osb->s_num_meta_stolen, 0);
875}
876
877void ocfs2_init_steal_slots(struct ocfs2_super *osb)
878{
879 ocfs2_init_inode_steal_slot(osb);
880 ocfs2_init_meta_steal_slot(osb);
881}
882
883static void __ocfs2_set_steal_slot(struct ocfs2_super *osb, int slot, int type)
884{
885 spin_lock(&osb->osb_lock);
886 if (type == INODE_ALLOC_SYSTEM_INODE)
887 osb->s_inode_steal_slot = slot;
888 else if (type == EXTENT_ALLOC_SYSTEM_INODE)
889 osb->s_meta_steal_slot = slot;
890 spin_unlock(&osb->osb_lock);
891}
892
893static int __ocfs2_get_steal_slot(struct ocfs2_super *osb, int type)
894{
895 int slot = OCFS2_INVALID_SLOT;
896
897 spin_lock(&osb->osb_lock);
898 if (type == INODE_ALLOC_SYSTEM_INODE)
899 slot = osb->s_inode_steal_slot;
900 else if (type == EXTENT_ALLOC_SYSTEM_INODE)
901 slot = osb->s_meta_steal_slot;
902 spin_unlock(&osb->osb_lock);
903
904 return slot;
905}
906
907static int ocfs2_get_inode_steal_slot(struct ocfs2_super *osb)
908{
909 return __ocfs2_get_steal_slot(osb, INODE_ALLOC_SYSTEM_INODE);
910}
911
912static int ocfs2_get_meta_steal_slot(struct ocfs2_super *osb)
913{
914 return __ocfs2_get_steal_slot(osb, EXTENT_ALLOC_SYSTEM_INODE);
915}
916
917static int ocfs2_steal_resource(struct ocfs2_super *osb,
918 struct ocfs2_alloc_context *ac,
919 int type)
920{
921 int i, status = -ENOSPC;
922 int slot = __ocfs2_get_steal_slot(osb, type);
923
924 /* Start to steal resource from the first slot after ours. */
925 if (slot == OCFS2_INVALID_SLOT)
926 slot = osb->slot_num + 1;
927
928 for (i = 0; i < osb->max_slots; i++, slot++) {
929 if (slot == osb->max_slots)
930 slot = 0;
931
932 if (slot == osb->slot_num)
933 continue;
934
935 status = ocfs2_reserve_suballoc_bits(osb, ac,
936 type,
937 (u32)slot, NULL,
938 NOT_ALLOC_NEW_GROUP);
939 if (status >= 0) {
940 __ocfs2_set_steal_slot(osb, slot, type);
941 break;
942 }
943
944 ocfs2_free_ac_resource(ac);
945 }
946
947 return status;
948}
949
950static int ocfs2_steal_inode(struct ocfs2_super *osb,
951 struct ocfs2_alloc_context *ac)
952{
953 return ocfs2_steal_resource(osb, ac, INODE_ALLOC_SYSTEM_INODE);
954}
955
956static int ocfs2_steal_meta(struct ocfs2_super *osb,
957 struct ocfs2_alloc_context *ac)
958{
959 return ocfs2_steal_resource(osb, ac, EXTENT_ALLOC_SYSTEM_INODE);
960}
961
Tiger Yangcf1d6c72008-08-18 17:11:00 +0800962int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb,
963 int blocks,
964 struct ocfs2_alloc_context **ac)
Mark Fashehccd979b2005-12-15 14:31:24 -0800965{
966 int status;
Tiger Yangb89c5422010-01-25 14:11:06 +0800967 int slot = ocfs2_get_meta_steal_slot(osb);
Mark Fashehccd979b2005-12-15 14:31:24 -0800968
Robert P. J. Daycd861282006-12-13 00:34:52 -0800969 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
Mark Fashehccd979b2005-12-15 14:31:24 -0800970 if (!(*ac)) {
971 status = -ENOMEM;
972 mlog_errno(status);
973 goto bail;
974 }
975
Tiger Yangcf1d6c72008-08-18 17:11:00 +0800976 (*ac)->ac_bits_wanted = blocks;
Mark Fashehccd979b2005-12-15 14:31:24 -0800977 (*ac)->ac_which = OCFS2_AC_USE_META;
Mark Fashehccd979b2005-12-15 14:31:24 -0800978 (*ac)->ac_group_search = ocfs2_block_group_search;
979
Tiger Yangb89c5422010-01-25 14:11:06 +0800980 if (slot != OCFS2_INVALID_SLOT &&
981 atomic_read(&osb->s_num_meta_stolen) < OCFS2_MAX_TO_STEAL)
982 goto extent_steal;
983
984 atomic_set(&osb->s_num_meta_stolen, 0);
Mark Fashehda5cbf22006-10-06 18:34:35 -0700985 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
Tao Maffda89a2008-03-03 17:12:09 +0800986 EXTENT_ALLOC_SYSTEM_INODE,
Tiger Yangb89c5422010-01-25 14:11:06 +0800987 (u32)osb->slot_num, NULL,
Mark Fasheh33d5d382010-02-24 13:34:09 -0800988 ALLOC_GROUPS_FROM_GLOBAL|ALLOC_NEW_GROUP);
Tiger Yangb89c5422010-01-25 14:11:06 +0800989
990
991 if (status >= 0) {
992 status = 0;
993 if (slot != OCFS2_INVALID_SLOT)
994 ocfs2_init_meta_steal_slot(osb);
995 goto bail;
996 } else if (status < 0 && status != -ENOSPC) {
997 mlog_errno(status);
998 goto bail;
999 }
1000
1001 ocfs2_free_ac_resource(*ac);
1002
1003extent_steal:
1004 status = ocfs2_steal_meta(osb, *ac);
1005 atomic_inc(&osb->s_num_meta_stolen);
Mark Fashehccd979b2005-12-15 14:31:24 -08001006 if (status < 0) {
1007 if (status != -ENOSPC)
1008 mlog_errno(status);
1009 goto bail;
1010 }
1011
1012 status = 0;
1013bail:
1014 if ((status < 0) && *ac) {
1015 ocfs2_free_alloc_context(*ac);
1016 *ac = NULL;
1017 }
1018
Mark Fashehccd979b2005-12-15 14:31:24 -08001019 mlog_exit(status);
1020 return status;
1021}
1022
Tiger Yangcf1d6c72008-08-18 17:11:00 +08001023int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
1024 struct ocfs2_extent_list *root_el,
1025 struct ocfs2_alloc_context **ac)
1026{
1027 return ocfs2_reserve_new_metadata_blocks(osb,
1028 ocfs2_extend_meta_needed(root_el),
1029 ac);
1030}
1031
Mark Fashehccd979b2005-12-15 14:31:24 -08001032int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
Mark Fashehccd979b2005-12-15 14:31:24 -08001033 struct ocfs2_alloc_context **ac)
1034{
1035 int status;
Tiger Yangb89c5422010-01-25 14:11:06 +08001036 int slot = ocfs2_get_inode_steal_slot(osb);
Tao Mafeb473a2009-02-25 00:53:25 +08001037 u64 alloc_group;
Mark Fashehccd979b2005-12-15 14:31:24 -08001038
Robert P. J. Daycd861282006-12-13 00:34:52 -08001039 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
Mark Fashehccd979b2005-12-15 14:31:24 -08001040 if (!(*ac)) {
1041 status = -ENOMEM;
1042 mlog_errno(status);
1043 goto bail;
1044 }
1045
1046 (*ac)->ac_bits_wanted = 1;
Mark Fashehccd979b2005-12-15 14:31:24 -08001047 (*ac)->ac_which = OCFS2_AC_USE_INODE;
1048
Mark Fashehccd979b2005-12-15 14:31:24 -08001049 (*ac)->ac_group_search = ocfs2_block_group_search;
1050
Tao Ma4d0ddb22008-03-05 16:11:46 +08001051 /*
Joel Becker1187c962008-09-03 20:03:39 -07001052 * stat(2) can't handle i_ino > 32bits, so we tell the
1053 * lower levels not to allocate us a block group past that
Joel Becker12462f12008-09-03 20:03:40 -07001054 * limit. The 'inode64' mount option avoids this behavior.
Joel Becker1187c962008-09-03 20:03:39 -07001055 */
Joel Becker12462f12008-09-03 20:03:40 -07001056 if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64))
1057 (*ac)->ac_max_block = (u32)~0U;
Joel Becker1187c962008-09-03 20:03:39 -07001058
1059 /*
Tao Ma4d0ddb22008-03-05 16:11:46 +08001060 * slot is set when we successfully steal inode from other nodes.
1061 * It is reset in 3 places:
1062 * 1. when we flush the truncate log
1063 * 2. when we complete local alloc recovery.
1064 * 3. when we successfully allocate from our own slot.
1065 * After it is set, we will go on stealing inodes until we find the
1066 * need to check our slots to see whether there is some space for us.
1067 */
1068 if (slot != OCFS2_INVALID_SLOT &&
Tiger Yangb89c5422010-01-25 14:11:06 +08001069 atomic_read(&osb->s_num_inodes_stolen) < OCFS2_MAX_TO_STEAL)
Tao Ma4d0ddb22008-03-05 16:11:46 +08001070 goto inode_steal;
1071
1072 atomic_set(&osb->s_num_inodes_stolen, 0);
Tao Mafeb473a2009-02-25 00:53:25 +08001073 alloc_group = osb->osb_inode_alloc_group;
Mark Fashehda5cbf22006-10-06 18:34:35 -07001074 status = ocfs2_reserve_suballoc_bits(osb, *ac,
1075 INODE_ALLOC_SYSTEM_INODE,
Tiger Yangb89c5422010-01-25 14:11:06 +08001076 (u32)osb->slot_num,
Tao Mafeb473a2009-02-25 00:53:25 +08001077 &alloc_group,
Tao Ma60ca81e2009-02-25 00:53:24 +08001078 ALLOC_NEW_GROUP |
1079 ALLOC_GROUPS_FROM_GLOBAL);
Tao Ma4d0ddb22008-03-05 16:11:46 +08001080 if (status >= 0) {
1081 status = 0;
1082
Tao Mafeb473a2009-02-25 00:53:25 +08001083 spin_lock(&osb->osb_lock);
1084 osb->osb_inode_alloc_group = alloc_group;
1085 spin_unlock(&osb->osb_lock);
1086 mlog(0, "after reservation, new allocation group is "
1087 "%llu\n", (unsigned long long)alloc_group);
1088
Tao Ma4d0ddb22008-03-05 16:11:46 +08001089 /*
1090 * Some inodes must be freed by us, so try to allocate
1091 * from our own next time.
1092 */
1093 if (slot != OCFS2_INVALID_SLOT)
1094 ocfs2_init_inode_steal_slot(osb);
1095 goto bail;
1096 } else if (status < 0 && status != -ENOSPC) {
1097 mlog_errno(status);
1098 goto bail;
1099 }
1100
1101 ocfs2_free_ac_resource(*ac);
1102
1103inode_steal:
Tiger Yangb89c5422010-01-25 14:11:06 +08001104 status = ocfs2_steal_inode(osb, *ac);
Tao Ma4d0ddb22008-03-05 16:11:46 +08001105 atomic_inc(&osb->s_num_inodes_stolen);
Mark Fashehccd979b2005-12-15 14:31:24 -08001106 if (status < 0) {
1107 if (status != -ENOSPC)
1108 mlog_errno(status);
1109 goto bail;
1110 }
1111
1112 status = 0;
1113bail:
1114 if ((status < 0) && *ac) {
1115 ocfs2_free_alloc_context(*ac);
1116 *ac = NULL;
1117 }
1118
Mark Fashehccd979b2005-12-15 14:31:24 -08001119 mlog_exit(status);
1120 return status;
1121}
1122
1123/* local alloc code has to do the same thing, so rather than do this
1124 * twice.. */
1125int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
1126 struct ocfs2_alloc_context *ac)
1127{
1128 int status;
1129
Mark Fashehccd979b2005-12-15 14:31:24 -08001130 ac->ac_which = OCFS2_AC_USE_MAIN;
1131 ac->ac_group_search = ocfs2_cluster_group_search;
1132
Mark Fashehda5cbf22006-10-06 18:34:35 -07001133 status = ocfs2_reserve_suballoc_bits(osb, ac,
1134 GLOBAL_BITMAP_SYSTEM_INODE,
Tao Mafeb473a2009-02-25 00:53:25 +08001135 OCFS2_INVALID_SLOT, NULL,
Tao Maffda89a2008-03-03 17:12:09 +08001136 ALLOC_NEW_GROUP);
Mark Fashehda5cbf22006-10-06 18:34:35 -07001137 if (status < 0 && status != -ENOSPC) {
Mark Fashehccd979b2005-12-15 14:31:24 -08001138 mlog_errno(status);
Mark Fashehda5cbf22006-10-06 18:34:35 -07001139 goto bail;
1140 }
1141
Mark Fashehccd979b2005-12-15 14:31:24 -08001142bail:
1143 return status;
1144}
1145
1146/* Callers don't need to care which bitmap (local alloc or main) to
1147 * use so we figure it out for them, but unfortunately this clutters
1148 * things a bit. */
Joel Becker1187c962008-09-03 20:03:39 -07001149static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb,
1150 u32 bits_wanted, u64 max_block,
Tao Ma60ca81e2009-02-25 00:53:24 +08001151 int flags,
Joel Becker1187c962008-09-03 20:03:39 -07001152 struct ocfs2_alloc_context **ac)
Mark Fashehccd979b2005-12-15 14:31:24 -08001153{
1154 int status;
1155
1156 mlog_entry_void();
1157
Robert P. J. Daycd861282006-12-13 00:34:52 -08001158 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
Mark Fashehccd979b2005-12-15 14:31:24 -08001159 if (!(*ac)) {
1160 status = -ENOMEM;
1161 mlog_errno(status);
1162 goto bail;
1163 }
1164
1165 (*ac)->ac_bits_wanted = bits_wanted;
Joel Becker1187c962008-09-03 20:03:39 -07001166 (*ac)->ac_max_block = max_block;
Mark Fashehccd979b2005-12-15 14:31:24 -08001167
1168 status = -ENOSPC;
Tao Ma60ca81e2009-02-25 00:53:24 +08001169 if (!(flags & ALLOC_GROUPS_FROM_GLOBAL) &&
1170 ocfs2_alloc_should_use_local(osb, bits_wanted)) {
Mark Fashehccd979b2005-12-15 14:31:24 -08001171 status = ocfs2_reserve_local_alloc_bits(osb,
Mark Fashehccd979b2005-12-15 14:31:24 -08001172 bits_wanted,
1173 *ac);
Mark Fasheha57c8fd2010-03-16 21:01:00 -07001174 if ((status < 0) && (status != -ENOSPC)) {
Mark Fashehccd979b2005-12-15 14:31:24 -08001175 mlog_errno(status);
1176 goto bail;
Mark Fashehccd979b2005-12-15 14:31:24 -08001177 }
1178 }
1179
1180 if (status == -ENOSPC) {
1181 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
1182 if (status < 0) {
1183 if (status != -ENOSPC)
1184 mlog_errno(status);
1185 goto bail;
1186 }
1187 }
1188
1189 status = 0;
1190bail:
1191 if ((status < 0) && *ac) {
1192 ocfs2_free_alloc_context(*ac);
1193 *ac = NULL;
1194 }
1195
1196 mlog_exit(status);
1197 return status;
1198}
1199
Joel Becker1187c962008-09-03 20:03:39 -07001200int ocfs2_reserve_clusters(struct ocfs2_super *osb,
1201 u32 bits_wanted,
1202 struct ocfs2_alloc_context **ac)
1203{
Tao Ma60ca81e2009-02-25 00:53:24 +08001204 return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0,
1205 ALLOC_NEW_GROUP, ac);
Joel Becker1187c962008-09-03 20:03:39 -07001206}
1207
Mark Fashehccd979b2005-12-15 14:31:24 -08001208/*
1209 * More or less lifted from ext3. I'll leave their description below:
1210 *
1211 * "For ext3 allocations, we must not reuse any blocks which are
1212 * allocated in the bitmap buffer's "last committed data" copy. This
1213 * prevents deletes from freeing up the page for reuse until we have
1214 * committed the delete transaction.
1215 *
1216 * If we didn't do this, then deleting something and reallocating it as
1217 * data would allow the old block to be overwritten before the
1218 * transaction committed (because we force data to disk before commit).
1219 * This would lead to corruption if we crashed between overwriting the
1220 * data and committing the delete.
1221 *
1222 * @@@ We may want to make this allocation behaviour conditional on
1223 * data-writes at some point, and disable it for metadata allocations or
1224 * sync-data inodes."
1225 *
1226 * Note: OCFS2 already does this differently for metadata vs data
Joe Perchesc78bad12008-02-03 17:33:42 +02001227 * allocations, as those bitmaps are separate and undo access is never
Mark Fashehccd979b2005-12-15 14:31:24 -08001228 * called on a metadata group descriptor.
1229 */
1230static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
1231 int nr)
1232{
1233 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
Sunil Mushran94e41ec2009-06-19 14:45:54 -07001234 int ret;
Mark Fashehccd979b2005-12-15 14:31:24 -08001235
1236 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
1237 return 0;
Sunil Mushran94e41ec2009-06-19 14:45:54 -07001238
1239 if (!buffer_jbd(bg_bh))
Mark Fashehccd979b2005-12-15 14:31:24 -08001240 return 1;
1241
Sunil Mushran94e41ec2009-06-19 14:45:54 -07001242 jbd_lock_bh_state(bg_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001243 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
Sunil Mushran94e41ec2009-06-19 14:45:54 -07001244 if (bg)
1245 ret = !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
1246 else
1247 ret = 1;
1248 jbd_unlock_bh_state(bg_bh);
1249
1250 return ret;
Mark Fashehccd979b2005-12-15 14:31:24 -08001251}
1252
1253static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
1254 struct buffer_head *bg_bh,
1255 unsigned int bits_wanted,
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001256 unsigned int total_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +08001257 struct ocfs2_suballoc_result *res)
Mark Fashehccd979b2005-12-15 14:31:24 -08001258{
1259 void *bitmap;
1260 u16 best_offset, best_size;
1261 int offset, start, found, status = 0;
1262 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
1263
Joel Becker42035302008-11-13 14:49:15 -08001264 /* Callers got this descriptor from
1265 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
1266 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
Mark Fashehccd979b2005-12-15 14:31:24 -08001267
1268 found = start = best_offset = best_size = 0;
1269 bitmap = bg->bg_bitmap;
1270
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001271 while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
1272 if (offset == total_bits)
Mark Fashehccd979b2005-12-15 14:31:24 -08001273 break;
1274
1275 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
1276 /* We found a zero, but we can't use it as it
1277 * hasn't been put to disk yet! */
1278 found = 0;
1279 start = offset + 1;
1280 } else if (offset == start) {
1281 /* we found a zero */
1282 found++;
1283 /* move start to the next bit to test */
1284 start++;
1285 } else {
1286 /* got a zero after some ones */
1287 found = 1;
1288 start = offset + 1;
1289 }
1290 if (found > best_size) {
1291 best_size = found;
1292 best_offset = start - found;
1293 }
1294 /* we got everything we needed */
1295 if (found == bits_wanted) {
1296 /* mlog(0, "Found it all!\n"); */
1297 break;
1298 }
1299 }
1300
Joel Becker7d1fe092010-04-13 14:30:19 +08001301 if (best_size) {
1302 res->sr_bit_offset = best_offset;
1303 res->sr_bits = best_size;
Mark Fashehccd979b2005-12-15 14:31:24 -08001304 } else {
1305 status = -ENOSPC;
1306 /* No error log here -- see the comment above
1307 * ocfs2_test_bg_bit_allocatable */
1308 }
1309
1310 return status;
1311}
1312
Mark Fasheh1fabe142006-10-09 18:11:45 -07001313static inline int ocfs2_block_group_set_bits(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001314 struct inode *alloc_inode,
1315 struct ocfs2_group_desc *bg,
1316 struct buffer_head *group_bh,
1317 unsigned int bit_off,
1318 unsigned int num_bits)
1319{
1320 int status;
1321 void *bitmap = bg->bg_bitmap;
1322 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1323
1324 mlog_entry_void();
1325
Joel Becker42035302008-11-13 14:49:15 -08001326 /* All callers get the descriptor via
1327 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
1328 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
Mark Fashehccd979b2005-12-15 14:31:24 -08001329 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
1330
1331 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
1332 num_bits);
1333
1334 if (ocfs2_is_cluster_bitmap(alloc_inode))
1335 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1336
Joel Becker13723d02008-10-17 19:25:01 -07001337 status = ocfs2_journal_access_gd(handle,
Joel Becker0cf2f762009-02-12 16:41:25 -08001338 INODE_CACHE(alloc_inode),
Joel Becker13723d02008-10-17 19:25:01 -07001339 group_bh,
1340 journal_type);
Mark Fashehccd979b2005-12-15 14:31:24 -08001341 if (status < 0) {
1342 mlog_errno(status);
1343 goto bail;
1344 }
1345
1346 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
Mark Fashehccd979b2005-12-15 14:31:24 -08001347 while(num_bits--)
1348 ocfs2_set_bit(bit_off++, bitmap);
1349
Joel Beckerec20cec2010-03-19 14:13:52 -07001350 ocfs2_journal_dirty(handle, group_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001351
1352bail:
1353 mlog_exit(status);
1354 return status;
1355}
1356
1357/* find the one with the most empty bits */
1358static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
1359{
1360 u16 curr, best;
1361
1362 BUG_ON(!cl->cl_next_free_rec);
1363
1364 best = curr = 0;
1365 while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
1366 if (le32_to_cpu(cl->cl_recs[curr].c_free) >
1367 le32_to_cpu(cl->cl_recs[best].c_free))
1368 best = curr;
1369 curr++;
1370 }
1371
1372 BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
1373 return best;
1374}
1375
Mark Fasheh1fabe142006-10-09 18:11:45 -07001376static int ocfs2_relink_block_group(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001377 struct inode *alloc_inode,
1378 struct buffer_head *fe_bh,
1379 struct buffer_head *bg_bh,
1380 struct buffer_head *prev_bg_bh,
1381 u16 chain)
1382{
1383 int status;
1384 /* there is a really tiny chance the journal calls could fail,
1385 * but we wouldn't want inconsistent blocks in *any* case. */
1386 u64 fe_ptr, bg_ptr, prev_bg_ptr;
1387 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
1388 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
1389 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
1390
Joel Becker42035302008-11-13 14:49:15 -08001391 /* The caller got these descriptors from
1392 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
1393 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
1394 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(prev_bg));
Mark Fashehccd979b2005-12-15 14:31:24 -08001395
Mark Fashehb06970532006-03-03 10:24:33 -08001396 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
Mark Fasheh1ca1a112007-04-27 16:01:25 -07001397 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
1398 (unsigned long long)le64_to_cpu(bg->bg_blkno),
1399 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -08001400
1401 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
1402 bg_ptr = le64_to_cpu(bg->bg_next_group);
1403 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
1404
Joel Becker0cf2f762009-02-12 16:41:25 -08001405 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1406 prev_bg_bh,
Joel Becker13723d02008-10-17 19:25:01 -07001407 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fashehccd979b2005-12-15 14:31:24 -08001408 if (status < 0) {
1409 mlog_errno(status);
1410 goto out_rollback;
1411 }
1412
1413 prev_bg->bg_next_group = bg->bg_next_group;
Joel Beckerec20cec2010-03-19 14:13:52 -07001414 ocfs2_journal_dirty(handle, prev_bg_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001415
Joel Becker0cf2f762009-02-12 16:41:25 -08001416 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
1417 bg_bh, OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fashehccd979b2005-12-15 14:31:24 -08001418 if (status < 0) {
1419 mlog_errno(status);
1420 goto out_rollback;
1421 }
1422
1423 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
Joel Beckerec20cec2010-03-19 14:13:52 -07001424 ocfs2_journal_dirty(handle, bg_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001425
Joel Becker0cf2f762009-02-12 16:41:25 -08001426 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
1427 fe_bh, OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fashehccd979b2005-12-15 14:31:24 -08001428 if (status < 0) {
1429 mlog_errno(status);
1430 goto out_rollback;
1431 }
1432
1433 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
Joel Beckerec20cec2010-03-19 14:13:52 -07001434 ocfs2_journal_dirty(handle, fe_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001435
Mark Fashehccd979b2005-12-15 14:31:24 -08001436out_rollback:
1437 if (status < 0) {
1438 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
1439 bg->bg_next_group = cpu_to_le64(bg_ptr);
1440 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
1441 }
Joel Becker42035302008-11-13 14:49:15 -08001442
Mark Fashehccd979b2005-12-15 14:31:24 -08001443 mlog_exit(status);
1444 return status;
1445}
1446
1447static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
1448 u32 wanted)
1449{
1450 return le16_to_cpu(bg->bg_free_bits_count) > wanted;
1451}
1452
1453/* return 0 on success, -ENOSPC to keep searching and any other < 0
1454 * value on error. */
1455static int ocfs2_cluster_group_search(struct inode *inode,
1456 struct buffer_head *group_bh,
1457 u32 bits_wanted, u32 min_bits,
Joel Becker1187c962008-09-03 20:03:39 -07001458 u64 max_block,
Joel Becker7d1fe092010-04-13 14:30:19 +08001459 struct ocfs2_suballoc_result *res)
Mark Fashehccd979b2005-12-15 14:31:24 -08001460{
1461 int search = -ENOSPC;
1462 int ret;
Joel Becker1187c962008-09-03 20:03:39 -07001463 u64 blkoff;
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001464 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fasheh9c7af402008-07-28 18:02:53 -07001465 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001466 unsigned int max_bits, gd_cluster_off;
Mark Fashehccd979b2005-12-15 14:31:24 -08001467
1468 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1469
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001470 if (gd->bg_free_bits_count) {
1471 max_bits = le16_to_cpu(gd->bg_bits);
1472
1473 /* Tail groups in cluster bitmaps which aren't cpg
1474 * aligned are prone to partial extention by a failed
1475 * fs resize. If the file system resize never got to
1476 * update the dinode cluster count, then we don't want
1477 * to trust any clusters past it, regardless of what
1478 * the group descriptor says. */
1479 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
1480 le64_to_cpu(gd->bg_blkno));
1481 if ((gd_cluster_off + max_bits) >
1482 OCFS2_I(inode)->ip_clusters) {
1483 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
1484 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
1485 (unsigned long long)le64_to_cpu(gd->bg_blkno),
1486 le16_to_cpu(gd->bg_bits),
1487 OCFS2_I(inode)->ip_clusters, max_bits);
1488 }
1489
Mark Fashehccd979b2005-12-15 14:31:24 -08001490 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1491 group_bh, bits_wanted,
Joel Becker7d1fe092010-04-13 14:30:19 +08001492 max_bits, res);
Mark Fashehccd979b2005-12-15 14:31:24 -08001493 if (ret)
1494 return ret;
1495
Joel Becker1187c962008-09-03 20:03:39 -07001496 if (max_block) {
1497 blkoff = ocfs2_clusters_to_blocks(inode->i_sb,
1498 gd_cluster_off +
Joel Becker7d1fe092010-04-13 14:30:19 +08001499 res->sr_bit_offset +
1500 res->sr_bits);
Joel Becker1187c962008-09-03 20:03:39 -07001501 mlog(0, "Checking %llu against %llu\n",
1502 (unsigned long long)blkoff,
1503 (unsigned long long)max_block);
1504 if (blkoff > max_block)
1505 return -ENOSPC;
1506 }
1507
Mark Fashehccd979b2005-12-15 14:31:24 -08001508 /* ocfs2_block_group_find_clear_bits() might
1509 * return success, but we still want to return
1510 * -ENOSPC unless it found the minimum number
1511 * of bits. */
Joel Becker7d1fe092010-04-13 14:30:19 +08001512 if (min_bits <= res->sr_bits)
Mark Fashehccd979b2005-12-15 14:31:24 -08001513 search = 0; /* success */
Joel Becker7d1fe092010-04-13 14:30:19 +08001514 else if (res->sr_bits) {
Mark Fasheh9c7af402008-07-28 18:02:53 -07001515 /*
1516 * Don't show bits which we'll be returning
1517 * for allocation to the local alloc bitmap.
1518 */
Joel Becker7d1fe092010-04-13 14:30:19 +08001519 ocfs2_local_alloc_seen_free_bits(osb, res->sr_bits);
Mark Fashehccd979b2005-12-15 14:31:24 -08001520 }
1521 }
1522
1523 return search;
1524}
1525
1526static int ocfs2_block_group_search(struct inode *inode,
1527 struct buffer_head *group_bh,
1528 u32 bits_wanted, u32 min_bits,
Joel Becker1187c962008-09-03 20:03:39 -07001529 u64 max_block,
Joel Becker7d1fe092010-04-13 14:30:19 +08001530 struct ocfs2_suballoc_result *res)
Mark Fashehccd979b2005-12-15 14:31:24 -08001531{
1532 int ret = -ENOSPC;
Joel Becker1187c962008-09-03 20:03:39 -07001533 u64 blkoff;
Mark Fashehccd979b2005-12-15 14:31:24 -08001534 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
1535
1536 BUG_ON(min_bits != 1);
1537 BUG_ON(ocfs2_is_cluster_bitmap(inode));
1538
Joel Becker1187c962008-09-03 20:03:39 -07001539 if (bg->bg_free_bits_count) {
Mark Fashehccd979b2005-12-15 14:31:24 -08001540 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
1541 group_bh, bits_wanted,
Mark Fasheh7bf72ed2006-05-03 17:46:50 -07001542 le16_to_cpu(bg->bg_bits),
Joel Becker7d1fe092010-04-13 14:30:19 +08001543 res);
Joel Becker1187c962008-09-03 20:03:39 -07001544 if (!ret && max_block) {
Joel Becker7d1fe092010-04-13 14:30:19 +08001545 blkoff = le64_to_cpu(bg->bg_blkno) +
1546 res->sr_bit_offset + res->sr_bits;
Joel Becker1187c962008-09-03 20:03:39 -07001547 mlog(0, "Checking %llu against %llu\n",
1548 (unsigned long long)blkoff,
1549 (unsigned long long)max_block);
1550 if (blkoff > max_block)
1551 ret = -ENOSPC;
1552 }
1553 }
Mark Fashehccd979b2005-12-15 14:31:24 -08001554
1555 return ret;
1556}
1557
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001558static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001559 handle_t *handle,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001560 struct buffer_head *di_bh,
1561 u32 num_bits,
1562 u16 chain)
1563{
1564 int ret;
1565 u32 tmp_used;
1566 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1567 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1568
Joel Becker0cf2f762009-02-12 16:41:25 -08001569 ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
Joel Becker13723d02008-10-17 19:25:01 -07001570 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001571 if (ret < 0) {
1572 mlog_errno(ret);
1573 goto out;
1574 }
1575
1576 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1577 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1578 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
Joel Beckerec20cec2010-03-19 14:13:52 -07001579 ocfs2_journal_dirty(handle, di_bh);
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001580
1581out:
1582 return ret;
1583}
1584
Joel Beckerba206632010-03-26 10:08:59 +08001585static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res,
1586 struct ocfs2_extent_rec *rec,
1587 struct ocfs2_chain_list *cl)
Joel Becker13e434c2010-03-26 10:08:27 +08001588{
1589 unsigned int bpc = le16_to_cpu(cl->cl_bpc);
1590 unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc;
1591 unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc;
1592
1593 if (res->sr_bit_offset < bitoff)
1594 return 0;
1595 if (res->sr_bit_offset >= (bitoff + bitcount))
1596 return 0;
Joel Beckerba206632010-03-26 10:08:59 +08001597 res->sr_blkno = le64_to_cpu(rec->e_blkno) +
1598 (res->sr_bit_offset - bitoff);
Joel Becker13e434c2010-03-26 10:08:27 +08001599 if ((res->sr_bit_offset + res->sr_bits) > (bitoff + bitcount))
1600 res->sr_bits = (bitoff + bitcount) - res->sr_bit_offset;
1601 return 1;
1602}
1603
Joel Beckerba206632010-03-26 10:08:59 +08001604static void ocfs2_bg_discontig_fix_result(struct ocfs2_alloc_context *ac,
1605 struct ocfs2_group_desc *bg,
1606 struct ocfs2_suballoc_result *res)
Joel Becker13e434c2010-03-26 10:08:27 +08001607{
1608 int i;
Joel Becker2b6cb572010-03-26 10:09:15 +08001609 u64 bg_blkno = res->sr_bg_blkno; /* Save off */
Joel Becker13e434c2010-03-26 10:08:27 +08001610 struct ocfs2_extent_rec *rec;
1611 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
1612 struct ocfs2_chain_list *cl = &di->id2.i_chain;
1613
Joel Beckerba206632010-03-26 10:08:59 +08001614 if (ocfs2_is_cluster_bitmap(ac->ac_inode)) {
1615 res->sr_blkno = 0;
Joel Becker13e434c2010-03-26 10:08:27 +08001616 return;
Joel Beckerba206632010-03-26 10:08:59 +08001617 }
Joel Becker13e434c2010-03-26 10:08:27 +08001618
Joel Beckerba206632010-03-26 10:08:59 +08001619 res->sr_blkno = res->sr_bg_blkno + res->sr_bit_offset;
Joel Becker2b6cb572010-03-26 10:09:15 +08001620 res->sr_bg_blkno = 0; /* Clear it for contig block groups */
Joel Beckerba206632010-03-26 10:08:59 +08001621 if (!ocfs2_supports_discontig_bh(OCFS2_SB(ac->ac_inode->i_sb)) ||
1622 !bg->bg_list.l_next_free_rec)
Joel Becker13e434c2010-03-26 10:08:27 +08001623 return;
1624
1625 for (i = 0; i < le16_to_cpu(bg->bg_list.l_next_free_rec); i++) {
1626 rec = &bg->bg_list.l_recs[i];
Joel Becker2b6cb572010-03-26 10:09:15 +08001627 if (ocfs2_bg_discontig_fix_by_rec(res, rec, cl)) {
1628 res->sr_bg_blkno = bg_blkno; /* Restore */
Joel Becker13e434c2010-03-26 10:08:27 +08001629 break;
Joel Becker2b6cb572010-03-26 10:09:15 +08001630 }
Joel Becker13e434c2010-03-26 10:08:27 +08001631 }
1632}
1633
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001634static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001635 handle_t *handle,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001636 u32 bits_wanted,
1637 u32 min_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +08001638 struct ocfs2_suballoc_result *res,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001639 u16 *bits_left)
1640{
1641 int ret;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001642 struct buffer_head *group_bh = NULL;
1643 struct ocfs2_group_desc *gd;
Joel Becker68f64d42008-11-13 14:49:14 -08001644 struct ocfs2_dinode *di = (struct ocfs2_dinode *)ac->ac_bh->b_data;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001645 struct inode *alloc_inode = ac->ac_inode;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001646
Joel Becker7d1fe092010-04-13 14:30:19 +08001647 ret = ocfs2_read_group_descriptor(alloc_inode, di,
1648 res->sr_bg_blkno, &group_bh);
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001649 if (ret < 0) {
1650 mlog_errno(ret);
1651 return ret;
1652 }
1653
1654 gd = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001655 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +08001656 ac->ac_max_block, res);
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001657 if (ret < 0) {
1658 if (ret != -ENOSPC)
1659 mlog_errno(ret);
1660 goto out;
1661 }
1662
Joel Becker13e434c2010-03-26 10:08:27 +08001663 if (!ret)
Joel Beckerba206632010-03-26 10:08:59 +08001664 ocfs2_bg_discontig_fix_result(ac, gd, res);
Joel Becker13e434c2010-03-26 10:08:27 +08001665
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001666 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
Joel Becker7d1fe092010-04-13 14:30:19 +08001667 res->sr_bits,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001668 le16_to_cpu(gd->bg_chain));
1669 if (ret < 0) {
1670 mlog_errno(ret);
1671 goto out;
1672 }
1673
1674 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
Joel Becker7d1fe092010-04-13 14:30:19 +08001675 res->sr_bit_offset, res->sr_bits);
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001676 if (ret < 0)
1677 mlog_errno(ret);
1678
1679 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1680
1681out:
1682 brelse(group_bh);
1683
1684 return ret;
1685}
1686
Mark Fashehccd979b2005-12-15 14:31:24 -08001687static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001688 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001689 u32 bits_wanted,
1690 u32 min_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +08001691 struct ocfs2_suballoc_result *res,
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001692 u16 *bits_left)
Mark Fashehccd979b2005-12-15 14:31:24 -08001693{
1694 int status;
Joel Becker7d1fe092010-04-13 14:30:19 +08001695 u16 chain;
Mark Fashehccd979b2005-12-15 14:31:24 -08001696 u32 tmp_used;
1697 u64 next_group;
Mark Fashehccd979b2005-12-15 14:31:24 -08001698 struct inode *alloc_inode = ac->ac_inode;
1699 struct buffer_head *group_bh = NULL;
1700 struct buffer_head *prev_group_bh = NULL;
1701 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1702 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1703 struct ocfs2_group_desc *bg;
1704
1705 chain = ac->ac_chain;
Mark Fashehb06970532006-03-03 10:24:33 -08001706 mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
1707 bits_wanted, chain,
1708 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08001709
Joel Becker68f64d42008-11-13 14:49:14 -08001710 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1711 le64_to_cpu(cl->cl_recs[chain].c_blkno),
1712 &group_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001713 if (status < 0) {
1714 mlog_errno(status);
1715 goto bail;
1716 }
1717 bg = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fashehccd979b2005-12-15 14:31:24 -08001718
1719 status = -ENOSPC;
1720 /* for now, the chain search is a bit simplistic. We just use
1721 * the 1st group with any empty bits. */
Joel Becker1187c962008-09-03 20:03:39 -07001722 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1723 bits_wanted, min_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +08001724 ac->ac_max_block,
1725 res)) == -ENOSPC) {
Mark Fashehccd979b2005-12-15 14:31:24 -08001726 if (!bg->bg_next_group)
1727 break;
Mark Fasheha81cb882008-10-07 14:25:16 -07001728
1729 brelse(prev_group_bh);
1730 prev_group_bh = NULL;
1731
Mark Fashehccd979b2005-12-15 14:31:24 -08001732 next_group = le64_to_cpu(bg->bg_next_group);
1733 prev_group_bh = group_bh;
1734 group_bh = NULL;
Joel Becker68f64d42008-11-13 14:49:14 -08001735 status = ocfs2_read_group_descriptor(alloc_inode, fe,
1736 next_group, &group_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001737 if (status < 0) {
1738 mlog_errno(status);
1739 goto bail;
1740 }
1741 bg = (struct ocfs2_group_desc *) group_bh->b_data;
Mark Fashehccd979b2005-12-15 14:31:24 -08001742 }
1743 if (status < 0) {
1744 if (status != -ENOSPC)
1745 mlog_errno(status);
1746 goto bail;
1747 }
1748
Mark Fashehb06970532006-03-03 10:24:33 -08001749 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
Joel Becker7d1fe092010-04-13 14:30:19 +08001750 res->sr_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -08001751
Joel Becker7d1fe092010-04-13 14:30:19 +08001752 res->sr_bg_blkno = le64_to_cpu(bg->bg_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08001753
Joel Becker7d1fe092010-04-13 14:30:19 +08001754 BUG_ON(res->sr_bits == 0);
Joel Becker13e434c2010-03-26 10:08:27 +08001755 if (!status)
Joel Beckerba206632010-03-26 10:08:59 +08001756 ocfs2_bg_discontig_fix_result(ac, bg, res);
Joel Becker13e434c2010-03-26 10:08:27 +08001757
Mark Fashehccd979b2005-12-15 14:31:24 -08001758
1759 /*
1760 * Keep track of previous block descriptor read. When
1761 * we find a target, if we have read more than X
1762 * number of descriptors, and the target is reasonably
1763 * empty, relink him to top of his chain.
1764 *
1765 * We've read 0 extra blocks and only send one more to
1766 * the transaction, yet the next guy to search has a
1767 * much easier time.
1768 *
1769 * Do this *after* figuring out how many bits we're taking out
1770 * of our target group.
1771 */
1772 if (ac->ac_allow_chain_relink &&
1773 (prev_group_bh) &&
Joel Becker7d1fe092010-04-13 14:30:19 +08001774 (ocfs2_block_group_reasonably_empty(bg, res->sr_bits))) {
Mark Fashehccd979b2005-12-15 14:31:24 -08001775 status = ocfs2_relink_block_group(handle, alloc_inode,
1776 ac->ac_bh, group_bh,
1777 prev_group_bh, chain);
1778 if (status < 0) {
1779 mlog_errno(status);
1780 goto bail;
1781 }
1782 }
1783
1784 /* Ok, claim our bits now: set the info on dinode, chainlist
1785 * and then the group */
Joel Becker13723d02008-10-17 19:25:01 -07001786 status = ocfs2_journal_access_di(handle,
Joel Becker0cf2f762009-02-12 16:41:25 -08001787 INODE_CACHE(alloc_inode),
Joel Becker13723d02008-10-17 19:25:01 -07001788 ac->ac_bh,
1789 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fashehccd979b2005-12-15 14:31:24 -08001790 if (status < 0) {
1791 mlog_errno(status);
1792 goto bail;
1793 }
1794
1795 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
Joel Becker7d1fe092010-04-13 14:30:19 +08001796 fe->id1.bitmap1.i_used = cpu_to_le32(res->sr_bits + tmp_used);
1797 le32_add_cpu(&cl->cl_recs[chain].c_free, -res->sr_bits);
Joel Beckerec20cec2010-03-19 14:13:52 -07001798 ocfs2_journal_dirty(handle, ac->ac_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001799
1800 status = ocfs2_block_group_set_bits(handle,
1801 alloc_inode,
1802 bg,
1803 group_bh,
Joel Becker7d1fe092010-04-13 14:30:19 +08001804 res->sr_bit_offset,
1805 res->sr_bits);
Mark Fashehccd979b2005-12-15 14:31:24 -08001806 if (status < 0) {
1807 mlog_errno(status);
1808 goto bail;
1809 }
1810
Joel Becker7d1fe092010-04-13 14:30:19 +08001811 mlog(0, "Allocated %u bits from suballocator %llu\n", res->sr_bits,
Mark Fasheh1ca1a112007-04-27 16:01:25 -07001812 (unsigned long long)le64_to_cpu(fe->i_blkno));
Mark Fashehccd979b2005-12-15 14:31:24 -08001813
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001814 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
Mark Fashehccd979b2005-12-15 14:31:24 -08001815bail:
Mark Fasheha81cb882008-10-07 14:25:16 -07001816 brelse(group_bh);
1817 brelse(prev_group_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08001818
1819 mlog_exit(status);
1820 return status;
1821}
1822
1823/* will give out up to bits_wanted contiguous bits. */
Joel Beckeraa8f8e92010-03-26 10:08:07 +08001824static int ocfs2_claim_suballoc_bits(struct ocfs2_alloc_context *ac,
Mark Fasheh1fabe142006-10-09 18:11:45 -07001825 handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001826 u32 bits_wanted,
1827 u32 min_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +08001828 struct ocfs2_suballoc_result *res)
Mark Fashehccd979b2005-12-15 14:31:24 -08001829{
1830 int status;
1831 u16 victim, i;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001832 u16 bits_left = 0;
Mark Fashehccd979b2005-12-15 14:31:24 -08001833 struct ocfs2_chain_list *cl;
1834 struct ocfs2_dinode *fe;
1835
1836 mlog_entry_void();
1837
1838 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1839 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1840 BUG_ON(!ac->ac_bh);
1841
1842 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
Joel Becker10995aa2008-11-13 14:49:12 -08001843
1844 /* The bh was validated by the inode read during
1845 * ocfs2_reserve_suballoc_bits(). Any corruption is a code bug. */
1846 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
1847
Mark Fashehccd979b2005-12-15 14:31:24 -08001848 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1849 le32_to_cpu(fe->id1.bitmap1.i_total)) {
Joel Beckeraa8f8e92010-03-26 10:08:07 +08001850 ocfs2_error(ac->ac_inode->i_sb,
1851 "Chain allocator dinode %llu has %u used "
Mark Fashehb06970532006-03-03 10:24:33 -08001852 "bits but only %u total.",
1853 (unsigned long long)le64_to_cpu(fe->i_blkno),
Mark Fashehccd979b2005-12-15 14:31:24 -08001854 le32_to_cpu(fe->id1.bitmap1.i_used),
1855 le32_to_cpu(fe->id1.bitmap1.i_total));
1856 status = -EIO;
1857 goto bail;
1858 }
1859
Joel Becker7d1fe092010-04-13 14:30:19 +08001860 res->sr_bg_blkno = ac->ac_last_group;
1861 if (res->sr_bg_blkno) {
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001862 /* Attempt to short-circuit the usual search mechanism
1863 * by jumping straight to the most recently used
1864 * allocation group. This helps us mantain some
1865 * contiguousness across allocations. */
Mark Fashehda5cbf22006-10-06 18:34:35 -07001866 status = ocfs2_search_one_group(ac, handle, bits_wanted,
Joel Becker7d1fe092010-04-13 14:30:19 +08001867 min_bits, res, &bits_left);
1868 if (!status)
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001869 goto set_hint;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001870 if (status < 0 && status != -ENOSPC) {
1871 mlog_errno(status);
1872 goto bail;
1873 }
1874 }
1875
Mark Fashehccd979b2005-12-15 14:31:24 -08001876 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1877
1878 victim = ocfs2_find_victim_chain(cl);
1879 ac->ac_chain = victim;
1880 ac->ac_allow_chain_relink = 1;
1881
Joel Becker7d1fe092010-04-13 14:30:19 +08001882 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
1883 res, &bits_left);
Mark Fashehccd979b2005-12-15 14:31:24 -08001884 if (!status)
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001885 goto set_hint;
Mark Fashehccd979b2005-12-15 14:31:24 -08001886 if (status < 0 && status != -ENOSPC) {
1887 mlog_errno(status);
1888 goto bail;
1889 }
1890
1891 mlog(0, "Search of victim chain %u came up with nothing, "
1892 "trying all chains now.\n", victim);
1893
1894 /* If we didn't pick a good victim, then just default to
1895 * searching each chain in order. Don't allow chain relinking
1896 * because we only calculate enough journal credits for one
1897 * relink per alloc. */
1898 ac->ac_allow_chain_relink = 0;
1899 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1900 if (i == victim)
1901 continue;
1902 if (!cl->cl_recs[i].c_free)
1903 continue;
1904
1905 ac->ac_chain = i;
Mark Fashehda5cbf22006-10-06 18:34:35 -07001906 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
Joel Becker7d1fe092010-04-13 14:30:19 +08001907 res, &bits_left);
Mark Fashehccd979b2005-12-15 14:31:24 -08001908 if (!status)
1909 break;
1910 if (status < 0 && status != -ENOSPC) {
1911 mlog_errno(status);
1912 goto bail;
1913 }
1914 }
Mark Fashehccd979b2005-12-15 14:31:24 -08001915
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001916set_hint:
1917 if (status != -ENOSPC) {
1918 /* If the next search of this group is not likely to
1919 * yield a suitable extent, then we reset the last
1920 * group hint so as to not waste a disk read */
1921 if (bits_left < min_bits)
1922 ac->ac_last_group = 0;
1923 else
Joel Becker7d1fe092010-04-13 14:30:19 +08001924 ac->ac_last_group = res->sr_bg_blkno;
Mark Fasheh883d4ca2006-06-05 16:41:00 -04001925 }
1926
1927bail:
Mark Fashehccd979b2005-12-15 14:31:24 -08001928 mlog_exit(status);
1929 return status;
1930}
1931
Joel Becker1ed9b772010-05-06 13:59:06 +08001932int ocfs2_claim_metadata(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001933 struct ocfs2_alloc_context *ac,
1934 u32 bits_wanted,
Joel Becker2b6cb572010-03-26 10:09:15 +08001935 u64 *suballoc_loc,
Mark Fashehccd979b2005-12-15 14:31:24 -08001936 u16 *suballoc_bit_start,
1937 unsigned int *num_bits,
1938 u64 *blkno_start)
1939{
1940 int status;
Joel Beckerba206632010-03-26 10:08:59 +08001941 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
Mark Fashehccd979b2005-12-15 14:31:24 -08001942
1943 BUG_ON(!ac);
1944 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1945 BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
Mark Fashehccd979b2005-12-15 14:31:24 -08001946
Joel Beckeraa8f8e92010-03-26 10:08:07 +08001947 status = ocfs2_claim_suballoc_bits(ac,
Mark Fashehda5cbf22006-10-06 18:34:35 -07001948 handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08001949 bits_wanted,
1950 1,
Joel Becker7d1fe092010-04-13 14:30:19 +08001951 &res);
Mark Fashehccd979b2005-12-15 14:31:24 -08001952 if (status < 0) {
1953 mlog_errno(status);
1954 goto bail;
1955 }
Joel Becker1ed9b772010-05-06 13:59:06 +08001956 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
Mark Fashehccd979b2005-12-15 14:31:24 -08001957
Joel Becker2b6cb572010-03-26 10:09:15 +08001958 *suballoc_loc = res.sr_bg_blkno;
Joel Becker7d1fe092010-04-13 14:30:19 +08001959 *suballoc_bit_start = res.sr_bit_offset;
Joel Beckerba206632010-03-26 10:08:59 +08001960 *blkno_start = res.sr_blkno;
Joel Becker7d1fe092010-04-13 14:30:19 +08001961 ac->ac_bits_given += res.sr_bits;
1962 *num_bits = res.sr_bits;
Mark Fashehccd979b2005-12-15 14:31:24 -08001963 status = 0;
1964bail:
1965 mlog_exit(status);
1966 return status;
1967}
1968
Tao Ma13821152009-02-25 00:53:23 +08001969static void ocfs2_init_inode_ac_group(struct inode *dir,
1970 struct buffer_head *parent_fe_bh,
1971 struct ocfs2_alloc_context *ac)
1972{
1973 struct ocfs2_dinode *fe = (struct ocfs2_dinode *)parent_fe_bh->b_data;
1974 /*
1975 * Try to allocate inodes from some specific group.
1976 *
1977 * If the parent dir has recorded the last group used in allocation,
1978 * cool, use it. Otherwise if we try to allocate new inode from the
1979 * same slot the parent dir belongs to, use the same chunk.
1980 *
1981 * We are very careful here to avoid the mistake of setting
1982 * ac_last_group to a group descriptor from a different (unlocked) slot.
1983 */
1984 if (OCFS2_I(dir)->ip_last_used_group &&
1985 OCFS2_I(dir)->ip_last_used_slot == ac->ac_alloc_slot)
1986 ac->ac_last_group = OCFS2_I(dir)->ip_last_used_group;
1987 else if (le16_to_cpu(fe->i_suballoc_slot) == ac->ac_alloc_slot)
1988 ac->ac_last_group = ocfs2_which_suballoc_group(
1989 le64_to_cpu(fe->i_blkno),
1990 le16_to_cpu(fe->i_suballoc_bit));
1991}
1992
1993static inline void ocfs2_save_inode_ac_group(struct inode *dir,
1994 struct ocfs2_alloc_context *ac)
1995{
1996 OCFS2_I(dir)->ip_last_used_group = ac->ac_last_group;
1997 OCFS2_I(dir)->ip_last_used_slot = ac->ac_alloc_slot;
1998}
1999
Joel Becker1ed9b772010-05-06 13:59:06 +08002000int ocfs2_claim_new_inode(handle_t *handle,
Tao Ma13821152009-02-25 00:53:23 +08002001 struct inode *dir,
2002 struct buffer_head *parent_fe_bh,
Mark Fashehccd979b2005-12-15 14:31:24 -08002003 struct ocfs2_alloc_context *ac,
Joel Becker2b6cb572010-03-26 10:09:15 +08002004 u64 *suballoc_loc,
Mark Fashehccd979b2005-12-15 14:31:24 -08002005 u16 *suballoc_bit,
2006 u64 *fe_blkno)
2007{
2008 int status;
Joel Becker2b6cb572010-03-26 10:09:15 +08002009 struct ocfs2_suballoc_result res;
Mark Fashehccd979b2005-12-15 14:31:24 -08002010
2011 mlog_entry_void();
2012
2013 BUG_ON(!ac);
2014 BUG_ON(ac->ac_bits_given != 0);
2015 BUG_ON(ac->ac_bits_wanted != 1);
2016 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
Mark Fashehccd979b2005-12-15 14:31:24 -08002017
Tao Ma13821152009-02-25 00:53:23 +08002018 ocfs2_init_inode_ac_group(dir, parent_fe_bh, ac);
2019
Joel Beckeraa8f8e92010-03-26 10:08:07 +08002020 status = ocfs2_claim_suballoc_bits(ac,
Mark Fashehda5cbf22006-10-06 18:34:35 -07002021 handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08002022 1,
2023 1,
Joel Becker7d1fe092010-04-13 14:30:19 +08002024 &res);
Mark Fashehccd979b2005-12-15 14:31:24 -08002025 if (status < 0) {
2026 mlog_errno(status);
2027 goto bail;
2028 }
Joel Becker1ed9b772010-05-06 13:59:06 +08002029 atomic_inc(&OCFS2_SB(ac->ac_inode->i_sb)->alloc_stats.bg_allocs);
Mark Fashehccd979b2005-12-15 14:31:24 -08002030
Joel Becker7d1fe092010-04-13 14:30:19 +08002031 BUG_ON(res.sr_bits != 1);
Mark Fashehccd979b2005-12-15 14:31:24 -08002032
Joel Becker2b6cb572010-03-26 10:09:15 +08002033 *suballoc_loc = res.sr_bg_blkno;
Joel Becker7d1fe092010-04-13 14:30:19 +08002034 *suballoc_bit = res.sr_bit_offset;
Joel Beckerba206632010-03-26 10:08:59 +08002035 *fe_blkno = res.sr_blkno;
Mark Fashehccd979b2005-12-15 14:31:24 -08002036 ac->ac_bits_given++;
Tao Ma13821152009-02-25 00:53:23 +08002037 ocfs2_save_inode_ac_group(dir, ac);
Mark Fashehccd979b2005-12-15 14:31:24 -08002038 status = 0;
2039bail:
2040 mlog_exit(status);
2041 return status;
2042}
2043
2044/* translate a group desc. blkno and it's bitmap offset into
2045 * disk cluster offset. */
2046static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
2047 u64 bg_blkno,
2048 u16 bg_bit_off)
2049{
2050 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2051 u32 cluster = 0;
2052
2053 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
2054
2055 if (bg_blkno != osb->first_cluster_group_blkno)
2056 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
2057 cluster += (u32) bg_bit_off;
2058 return cluster;
2059}
2060
2061/* given a cluster offset, calculate which block group it belongs to
2062 * and return that block offset. */
Tao Mad6590722007-12-18 15:47:03 +08002063u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster)
Mark Fashehccd979b2005-12-15 14:31:24 -08002064{
2065 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2066 u32 group_no;
2067
2068 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
2069
2070 group_no = cluster / osb->bitmap_cpg;
2071 if (!group_no)
2072 return osb->first_cluster_group_blkno;
2073 return ocfs2_clusters_to_blocks(inode->i_sb,
2074 group_no * osb->bitmap_cpg);
2075}
2076
2077/* given the block number of a cluster start, calculate which cluster
2078 * group and descriptor bitmap offset that corresponds to. */
2079static inline void ocfs2_block_to_cluster_group(struct inode *inode,
2080 u64 data_blkno,
2081 u64 *bg_blkno,
2082 u16 *bg_bit_off)
2083{
2084 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2085 u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
2086
2087 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
2088
2089 *bg_blkno = ocfs2_which_cluster_group(inode,
2090 data_cluster);
2091
2092 if (*bg_blkno == osb->first_cluster_group_blkno)
2093 *bg_bit_off = (u16) data_cluster;
2094 else
2095 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
2096 data_blkno - *bg_blkno);
2097}
2098
2099/*
2100 * min_bits - minimum contiguous chunk from this total allocation we
2101 * can handle. set to what we asked for originally for a full
2102 * contig. allocation, set to '1' to indicate we can deal with extents
2103 * of any size.
2104 */
Joel Becker1ed9b772010-05-06 13:59:06 +08002105int __ocfs2_claim_clusters(handle_t *handle,
Mark Fasheh415cb802007-09-16 20:10:16 -07002106 struct ocfs2_alloc_context *ac,
2107 u32 min_clusters,
2108 u32 max_clusters,
2109 u32 *cluster_start,
2110 u32 *num_clusters)
Mark Fashehccd979b2005-12-15 14:31:24 -08002111{
2112 int status;
Mark Fasheh415cb802007-09-16 20:10:16 -07002113 unsigned int bits_wanted = max_clusters;
Joel Beckerba206632010-03-26 10:08:59 +08002114 struct ocfs2_suballoc_result res = { .sr_blkno = 0, };
Joel Becker1ed9b772010-05-06 13:59:06 +08002115 struct ocfs2_super *osb = OCFS2_SB(ac->ac_inode->i_sb);
Mark Fashehccd979b2005-12-15 14:31:24 -08002116
2117 mlog_entry_void();
2118
Mark Fashehccd979b2005-12-15 14:31:24 -08002119 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
2120
2121 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
2122 && ac->ac_which != OCFS2_AC_USE_MAIN);
Mark Fashehccd979b2005-12-15 14:31:24 -08002123
2124 if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
Mark Fasheh33d5d382010-02-24 13:34:09 -08002125 WARN_ON(min_clusters > 1);
2126
Mark Fashehccd979b2005-12-15 14:31:24 -08002127 status = ocfs2_claim_local_alloc_bits(osb,
2128 handle,
2129 ac,
2130 bits_wanted,
2131 cluster_start,
2132 num_clusters);
2133 if (!status)
2134 atomic_inc(&osb->alloc_stats.local_data);
2135 } else {
2136 if (min_clusters > (osb->bitmap_cpg - 1)) {
2137 /* The only paths asking for contiguousness
2138 * should know about this already. */
Sunil Mushran2fbe8d12007-12-20 14:58:11 -08002139 mlog(ML_ERROR, "minimum allocation requested %u exceeds "
2140 "group bitmap size %u!\n", min_clusters,
2141 osb->bitmap_cpg);
Mark Fashehccd979b2005-12-15 14:31:24 -08002142 status = -ENOSPC;
2143 goto bail;
2144 }
2145 /* clamp the current request down to a realistic size. */
2146 if (bits_wanted > (osb->bitmap_cpg - 1))
2147 bits_wanted = osb->bitmap_cpg - 1;
2148
Joel Beckeraa8f8e92010-03-26 10:08:07 +08002149 status = ocfs2_claim_suballoc_bits(ac,
Mark Fashehda5cbf22006-10-06 18:34:35 -07002150 handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08002151 bits_wanted,
2152 min_clusters,
Joel Becker7d1fe092010-04-13 14:30:19 +08002153 &res);
Mark Fashehccd979b2005-12-15 14:31:24 -08002154 if (!status) {
Joel Beckerba206632010-03-26 10:08:59 +08002155 BUG_ON(res.sr_blkno); /* cluster alloc can't set */
Mark Fashehccd979b2005-12-15 14:31:24 -08002156 *cluster_start =
2157 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
Joel Becker7d1fe092010-04-13 14:30:19 +08002158 res.sr_bg_blkno,
2159 res.sr_bit_offset);
Mark Fashehccd979b2005-12-15 14:31:24 -08002160 atomic_inc(&osb->alloc_stats.bitmap_data);
2161 }
2162 }
2163 if (status < 0) {
2164 if (status != -ENOSPC)
2165 mlog_errno(status);
2166 goto bail;
2167 }
2168
Joel Becker7d1fe092010-04-13 14:30:19 +08002169 ac->ac_bits_given += res.sr_bits;
2170 *num_clusters = res.sr_bits;
Mark Fashehccd979b2005-12-15 14:31:24 -08002171
2172bail:
2173 mlog_exit(status);
2174 return status;
2175}
2176
Joel Becker1ed9b772010-05-06 13:59:06 +08002177int ocfs2_claim_clusters(handle_t *handle,
Mark Fasheh415cb802007-09-16 20:10:16 -07002178 struct ocfs2_alloc_context *ac,
2179 u32 min_clusters,
2180 u32 *cluster_start,
2181 u32 *num_clusters)
2182{
2183 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
2184
Joel Becker1ed9b772010-05-06 13:59:06 +08002185 return __ocfs2_claim_clusters(handle, ac, min_clusters,
Mark Fasheh415cb802007-09-16 20:10:16 -07002186 bits_wanted, cluster_start, num_clusters);
2187}
2188
Mark Fashehb4414ee2010-03-11 18:31:09 -08002189static int ocfs2_block_group_clear_bits(handle_t *handle,
2190 struct inode *alloc_inode,
2191 struct ocfs2_group_desc *bg,
2192 struct buffer_head *group_bh,
2193 unsigned int bit_off,
2194 unsigned int num_bits,
2195 void (*undo_fn)(unsigned int bit,
2196 unsigned long *bmap))
Mark Fashehccd979b2005-12-15 14:31:24 -08002197{
2198 int status;
2199 unsigned int tmp;
Mark Fashehccd979b2005-12-15 14:31:24 -08002200 struct ocfs2_group_desc *undo_bg = NULL;
2201
2202 mlog_entry_void();
2203
Joel Becker42035302008-11-13 14:49:15 -08002204 /* The caller got this descriptor from
2205 * ocfs2_read_group_descriptor(). Any corruption is a code bug. */
2206 BUG_ON(!OCFS2_IS_VALID_GROUP_DESC(bg));
Mark Fashehccd979b2005-12-15 14:31:24 -08002207
2208 mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
2209
Mark Fashehb4414ee2010-03-11 18:31:09 -08002210 BUG_ON(undo_fn && !ocfs2_is_cluster_bitmap(alloc_inode));
Joel Becker0cf2f762009-02-12 16:41:25 -08002211 status = ocfs2_journal_access_gd(handle, INODE_CACHE(alloc_inode),
Mark Fashehb4414ee2010-03-11 18:31:09 -08002212 group_bh,
2213 undo_fn ?
2214 OCFS2_JOURNAL_ACCESS_UNDO :
2215 OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fashehccd979b2005-12-15 14:31:24 -08002216 if (status < 0) {
2217 mlog_errno(status);
2218 goto bail;
2219 }
2220
Mark Fashehb4414ee2010-03-11 18:31:09 -08002221 if (undo_fn) {
Sunil Mushran94e41ec2009-06-19 14:45:54 -07002222 jbd_lock_bh_state(group_bh);
2223 undo_bg = (struct ocfs2_group_desc *)
2224 bh2jh(group_bh)->b_committed_data;
2225 BUG_ON(!undo_bg);
2226 }
Mark Fashehccd979b2005-12-15 14:31:24 -08002227
2228 tmp = num_bits;
2229 while(tmp--) {
2230 ocfs2_clear_bit((bit_off + tmp),
2231 (unsigned long *) bg->bg_bitmap);
Mark Fashehb4414ee2010-03-11 18:31:09 -08002232 if (undo_fn)
2233 undo_fn(bit_off + tmp,
2234 (unsigned long *) undo_bg->bg_bitmap);
Mark Fashehccd979b2005-12-15 14:31:24 -08002235 }
2236 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
2237
Mark Fashehb4414ee2010-03-11 18:31:09 -08002238 if (undo_fn)
Sunil Mushran94e41ec2009-06-19 14:45:54 -07002239 jbd_unlock_bh_state(group_bh);
2240
Joel Beckerec20cec2010-03-19 14:13:52 -07002241 ocfs2_journal_dirty(handle, group_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08002242bail:
2243 return status;
2244}
2245
2246/*
2247 * expects the suballoc inode to already be locked.
2248 */
Mark Fashehb4414ee2010-03-11 18:31:09 -08002249static int _ocfs2_free_suballoc_bits(handle_t *handle,
2250 struct inode *alloc_inode,
2251 struct buffer_head *alloc_bh,
2252 unsigned int start_bit,
2253 u64 bg_blkno,
2254 unsigned int count,
2255 void (*undo_fn)(unsigned int bit,
2256 unsigned long *bitmap))
Mark Fashehccd979b2005-12-15 14:31:24 -08002257{
2258 int status = 0;
2259 u32 tmp_used;
Mark Fashehccd979b2005-12-15 14:31:24 -08002260 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
2261 struct ocfs2_chain_list *cl = &fe->id2.i_chain;
2262 struct buffer_head *group_bh = NULL;
2263 struct ocfs2_group_desc *group;
2264
2265 mlog_entry_void();
2266
Joel Becker10995aa2008-11-13 14:49:12 -08002267 /* The alloc_bh comes from ocfs2_free_dinode() or
2268 * ocfs2_free_clusters(). The callers have all locked the
2269 * allocator and gotten alloc_bh from the lock call. This
2270 * validates the dinode buffer. Any corruption that has happended
2271 * is a code bug. */
2272 BUG_ON(!OCFS2_IS_VALID_DINODE(fe));
Mark Fashehccd979b2005-12-15 14:31:24 -08002273 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
2274
Mark Fashehb06970532006-03-03 10:24:33 -08002275 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
2276 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
2277 (unsigned long long)bg_blkno, start_bit);
Mark Fashehccd979b2005-12-15 14:31:24 -08002278
Joel Becker68f64d42008-11-13 14:49:14 -08002279 status = ocfs2_read_group_descriptor(alloc_inode, fe, bg_blkno,
2280 &group_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08002281 if (status < 0) {
2282 mlog_errno(status);
2283 goto bail;
2284 }
Mark Fashehccd979b2005-12-15 14:31:24 -08002285 group = (struct ocfs2_group_desc *) group_bh->b_data;
Joel Becker68f64d42008-11-13 14:49:14 -08002286
Mark Fashehccd979b2005-12-15 14:31:24 -08002287 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
2288
2289 status = ocfs2_block_group_clear_bits(handle, alloc_inode,
2290 group, group_bh,
Mark Fashehb4414ee2010-03-11 18:31:09 -08002291 start_bit, count, undo_fn);
Mark Fashehccd979b2005-12-15 14:31:24 -08002292 if (status < 0) {
2293 mlog_errno(status);
2294 goto bail;
2295 }
2296
Joel Becker0cf2f762009-02-12 16:41:25 -08002297 status = ocfs2_journal_access_di(handle, INODE_CACHE(alloc_inode),
2298 alloc_bh, OCFS2_JOURNAL_ACCESS_WRITE);
Mark Fashehccd979b2005-12-15 14:31:24 -08002299 if (status < 0) {
2300 mlog_errno(status);
2301 goto bail;
2302 }
2303
2304 le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
2305 count);
2306 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
2307 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
Joel Beckerec20cec2010-03-19 14:13:52 -07002308 ocfs2_journal_dirty(handle, alloc_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08002309
2310bail:
Mark Fasheha81cb882008-10-07 14:25:16 -07002311 brelse(group_bh);
Mark Fashehccd979b2005-12-15 14:31:24 -08002312
2313 mlog_exit(status);
2314 return status;
2315}
2316
Mark Fashehb4414ee2010-03-11 18:31:09 -08002317int ocfs2_free_suballoc_bits(handle_t *handle,
2318 struct inode *alloc_inode,
2319 struct buffer_head *alloc_bh,
2320 unsigned int start_bit,
2321 u64 bg_blkno,
2322 unsigned int count)
2323{
2324 return _ocfs2_free_suballoc_bits(handle, alloc_inode, alloc_bh,
2325 start_bit, bg_blkno, count, NULL);
2326}
2327
Mark Fasheh1fabe142006-10-09 18:11:45 -07002328int ocfs2_free_dinode(handle_t *handle,
Mark Fashehccd979b2005-12-15 14:31:24 -08002329 struct inode *inode_alloc_inode,
2330 struct buffer_head *inode_alloc_bh,
2331 struct ocfs2_dinode *di)
2332{
2333 u64 blk = le64_to_cpu(di->i_blkno);
2334 u16 bit = le16_to_cpu(di->i_suballoc_bit);
2335 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2336
2337 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
2338 inode_alloc_bh, bit, bg_blkno, 1);
2339}
2340
Mark Fashehb4414ee2010-03-11 18:31:09 -08002341static int _ocfs2_free_clusters(handle_t *handle,
2342 struct inode *bitmap_inode,
2343 struct buffer_head *bitmap_bh,
2344 u64 start_blk,
2345 unsigned int num_clusters,
2346 void (*undo_fn)(unsigned int bit,
2347 unsigned long *bitmap))
Mark Fashehccd979b2005-12-15 14:31:24 -08002348{
2349 int status;
2350 u16 bg_start_bit;
2351 u64 bg_blkno;
2352 struct ocfs2_dinode *fe;
2353
2354 /* You can't ever have a contiguous set of clusters
2355 * bigger than a block group bitmap so we never have to worry
2356 * about looping on them. */
2357
2358 mlog_entry_void();
2359
2360 /* This is expensive. We can safely remove once this stuff has
2361 * gotten tested really well. */
2362 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
2363
2364 fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
2365
2366 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
2367 &bg_start_bit);
2368
Mark Fashehb06970532006-03-03 10:24:33 -08002369 mlog(0, "want to free %u clusters starting at block %llu\n",
2370 num_clusters, (unsigned long long)start_blk);
2371 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
2372 (unsigned long long)bg_blkno, bg_start_bit);
Mark Fashehccd979b2005-12-15 14:31:24 -08002373
Mark Fashehb4414ee2010-03-11 18:31:09 -08002374 status = _ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
2375 bg_start_bit, bg_blkno,
2376 num_clusters, undo_fn);
Mark Fasheh9c7af402008-07-28 18:02:53 -07002377 if (status < 0) {
Mark Fashehccd979b2005-12-15 14:31:24 -08002378 mlog_errno(status);
Mark Fasheh9c7af402008-07-28 18:02:53 -07002379 goto out;
2380 }
Mark Fashehccd979b2005-12-15 14:31:24 -08002381
Mark Fasheh9c7af402008-07-28 18:02:53 -07002382 ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb),
2383 num_clusters);
2384
2385out:
Mark Fashehccd979b2005-12-15 14:31:24 -08002386 mlog_exit(status);
2387 return status;
2388}
2389
Mark Fashehb4414ee2010-03-11 18:31:09 -08002390int ocfs2_free_clusters(handle_t *handle,
2391 struct inode *bitmap_inode,
2392 struct buffer_head *bitmap_bh,
2393 u64 start_blk,
2394 unsigned int num_clusters)
2395{
2396 return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
2397 start_blk, num_clusters,
2398 _ocfs2_set_bit);
2399}
2400
2401/*
2402 * Give never-used clusters back to the global bitmap. We don't need
2403 * to protect these bits in the undo buffer.
2404 */
2405int ocfs2_release_clusters(handle_t *handle,
2406 struct inode *bitmap_inode,
2407 struct buffer_head *bitmap_bh,
2408 u64 start_blk,
2409 unsigned int num_clusters)
2410{
2411 return _ocfs2_free_clusters(handle, bitmap_inode, bitmap_bh,
2412 start_blk, num_clusters,
2413 _ocfs2_clear_bit);
2414}
2415
Mark Fashehccd979b2005-12-15 14:31:24 -08002416static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
2417{
2418 printk("Block Group:\n");
2419 printk("bg_signature: %s\n", bg->bg_signature);
2420 printk("bg_size: %u\n", bg->bg_size);
2421 printk("bg_bits: %u\n", bg->bg_bits);
2422 printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
2423 printk("bg_chain: %u\n", bg->bg_chain);
2424 printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation));
Mark Fashehb06970532006-03-03 10:24:33 -08002425 printk("bg_next_group: %llu\n",
2426 (unsigned long long)bg->bg_next_group);
2427 printk("bg_parent_dinode: %llu\n",
2428 (unsigned long long)bg->bg_parent_dinode);
2429 printk("bg_blkno: %llu\n",
2430 (unsigned long long)bg->bg_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08002431}
2432
2433static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
2434{
2435 int i;
2436
Mark Fashehb06970532006-03-03 10:24:33 -08002437 printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08002438 printk("i_signature: %s\n", fe->i_signature);
Mark Fashehb06970532006-03-03 10:24:33 -08002439 printk("i_size: %llu\n",
2440 (unsigned long long)fe->i_size);
Mark Fashehccd979b2005-12-15 14:31:24 -08002441 printk("i_clusters: %u\n", fe->i_clusters);
2442 printk("i_generation: %u\n",
2443 le32_to_cpu(fe->i_generation));
2444 printk("id1.bitmap1.i_used: %u\n",
2445 le32_to_cpu(fe->id1.bitmap1.i_used));
2446 printk("id1.bitmap1.i_total: %u\n",
2447 le32_to_cpu(fe->id1.bitmap1.i_total));
2448 printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg);
2449 printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc);
2450 printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count);
2451 printk("id2.i_chain.cl_next_free_rec: %u\n",
2452 fe->id2.i_chain.cl_next_free_rec);
2453 for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
2454 printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i,
2455 fe->id2.i_chain.cl_recs[i].c_free);
2456 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
2457 fe->id2.i_chain.cl_recs[i].c_total);
Mark Fashehb06970532006-03-03 10:24:33 -08002458 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
2459 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
Mark Fashehccd979b2005-12-15 14:31:24 -08002460 }
2461}
Tao Mae7d4cb62008-08-18 17:38:44 +08002462
2463/*
2464 * For a given allocation, determine which allocators will need to be
2465 * accessed, and lock them, reserving the appropriate number of bits.
2466 *
2467 * Sparse file systems call this from ocfs2_write_begin_nolock()
2468 * and ocfs2_allocate_unwritten_extents().
2469 *
2470 * File systems which don't support holes call this from
2471 * ocfs2_extend_allocation().
2472 */
Joel Beckerf99b9b72008-08-20 19:36:33 -07002473int ocfs2_lock_allocators(struct inode *inode,
2474 struct ocfs2_extent_tree *et,
Tao Mae7d4cb62008-08-18 17:38:44 +08002475 u32 clusters_to_add, u32 extents_to_split,
2476 struct ocfs2_alloc_context **data_ac,
Joel Beckerf99b9b72008-08-20 19:36:33 -07002477 struct ocfs2_alloc_context **meta_ac)
Tao Mae7d4cb62008-08-18 17:38:44 +08002478{
2479 int ret = 0, num_free_extents;
2480 unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split;
2481 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2482
2483 *meta_ac = NULL;
2484 if (data_ac)
2485 *data_ac = NULL;
2486
2487 BUG_ON(clusters_to_add != 0 && data_ac == NULL);
2488
Joel Becker3d03a302009-02-12 17:49:26 -08002489 num_free_extents = ocfs2_num_free_extents(osb, et);
Tao Mae7d4cb62008-08-18 17:38:44 +08002490 if (num_free_extents < 0) {
2491 ret = num_free_extents;
2492 mlog_errno(ret);
2493 goto out;
2494 }
2495
2496 /*
2497 * Sparse allocation file systems need to be more conservative
2498 * with reserving room for expansion - the actual allocation
2499 * happens while we've got a journal handle open so re-taking
2500 * a cluster lock (because we ran out of room for another
2501 * extent) will violate ordering rules.
2502 *
2503 * Most of the time we'll only be seeing this 1 cluster at a time
2504 * anyway.
2505 *
2506 * Always lock for any unwritten extents - we might want to
2507 * add blocks during a split.
2508 */
2509 if (!num_free_extents ||
2510 (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) {
Joel Beckerf99b9b72008-08-20 19:36:33 -07002511 ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac);
Tao Mae7d4cb62008-08-18 17:38:44 +08002512 if (ret < 0) {
2513 if (ret != -ENOSPC)
2514 mlog_errno(ret);
2515 goto out;
2516 }
2517 }
2518
2519 if (clusters_to_add == 0)
2520 goto out;
2521
2522 ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac);
2523 if (ret < 0) {
2524 if (ret != -ENOSPC)
2525 mlog_errno(ret);
2526 goto out;
2527 }
2528
2529out:
2530 if (ret) {
2531 if (*meta_ac) {
2532 ocfs2_free_alloc_context(*meta_ac);
2533 *meta_ac = NULL;
2534 }
2535
2536 /*
2537 * We cannot have an error and a non null *data_ac.
2538 */
2539 }
2540
2541 return ret;
2542}
wengang wang6ca497a2009-03-06 21:29:10 +08002543
2544/*
2545 * Read the inode specified by blkno to get suballoc_slot and
2546 * suballoc_bit.
2547 */
2548static int ocfs2_get_suballoc_slot_bit(struct ocfs2_super *osb, u64 blkno,
2549 u16 *suballoc_slot, u16 *suballoc_bit)
2550{
2551 int status;
2552 struct buffer_head *inode_bh = NULL;
2553 struct ocfs2_dinode *inode_fe;
2554
Joel Becker5b09b502009-04-21 16:31:20 -07002555 mlog_entry("blkno: %llu\n", (unsigned long long)blkno);
wengang wang6ca497a2009-03-06 21:29:10 +08002556
2557 /* dirty read disk */
2558 status = ocfs2_read_blocks_sync(osb, blkno, 1, &inode_bh);
2559 if (status < 0) {
Joel Becker5b09b502009-04-21 16:31:20 -07002560 mlog(ML_ERROR, "read block %llu failed %d\n",
2561 (unsigned long long)blkno, status);
wengang wang6ca497a2009-03-06 21:29:10 +08002562 goto bail;
2563 }
2564
2565 inode_fe = (struct ocfs2_dinode *) inode_bh->b_data;
2566 if (!OCFS2_IS_VALID_DINODE(inode_fe)) {
Joel Becker5b09b502009-04-21 16:31:20 -07002567 mlog(ML_ERROR, "invalid inode %llu requested\n",
2568 (unsigned long long)blkno);
wengang wang6ca497a2009-03-06 21:29:10 +08002569 status = -EINVAL;
2570 goto bail;
2571 }
2572
Tao Ma0fba8132009-03-19 05:08:43 +08002573 if (le16_to_cpu(inode_fe->i_suballoc_slot) != (u16)OCFS2_INVALID_SLOT &&
wengang wang6ca497a2009-03-06 21:29:10 +08002574 (u32)le16_to_cpu(inode_fe->i_suballoc_slot) > osb->max_slots - 1) {
2575 mlog(ML_ERROR, "inode %llu has invalid suballoc slot %u\n",
Joel Becker5b09b502009-04-21 16:31:20 -07002576 (unsigned long long)blkno,
2577 (u32)le16_to_cpu(inode_fe->i_suballoc_slot));
wengang wang6ca497a2009-03-06 21:29:10 +08002578 status = -EINVAL;
2579 goto bail;
2580 }
2581
2582 if (suballoc_slot)
2583 *suballoc_slot = le16_to_cpu(inode_fe->i_suballoc_slot);
2584 if (suballoc_bit)
2585 *suballoc_bit = le16_to_cpu(inode_fe->i_suballoc_bit);
2586
2587bail:
2588 brelse(inode_bh);
2589
2590 mlog_exit(status);
2591 return status;
2592}
2593
2594/*
2595 * test whether bit is SET in allocator bitmap or not. on success, 0
2596 * is returned and *res is 1 for SET; 0 otherwise. when fails, errno
2597 * is returned and *res is meaningless. Call this after you have
2598 * cluster locked against suballoc, or you may get a result based on
2599 * non-up2date contents
2600 */
2601static int ocfs2_test_suballoc_bit(struct ocfs2_super *osb,
2602 struct inode *suballoc,
2603 struct buffer_head *alloc_bh, u64 blkno,
2604 u16 bit, int *res)
2605{
2606 struct ocfs2_dinode *alloc_fe;
2607 struct ocfs2_group_desc *group;
2608 struct buffer_head *group_bh = NULL;
2609 u64 bg_blkno;
2610 int status;
2611
Joel Becker5b09b502009-04-21 16:31:20 -07002612 mlog_entry("blkno: %llu bit: %u\n", (unsigned long long)blkno,
2613 (unsigned int)bit);
wengang wang6ca497a2009-03-06 21:29:10 +08002614
2615 alloc_fe = (struct ocfs2_dinode *)alloc_bh->b_data;
2616 if ((bit + 1) > ocfs2_bits_per_group(&alloc_fe->id2.i_chain)) {
2617 mlog(ML_ERROR, "suballoc bit %u out of range of %u\n",
2618 (unsigned int)bit,
2619 ocfs2_bits_per_group(&alloc_fe->id2.i_chain));
2620 status = -EINVAL;
2621 goto bail;
2622 }
2623
2624 bg_blkno = ocfs2_which_suballoc_group(blkno, bit);
2625 status = ocfs2_read_group_descriptor(suballoc, alloc_fe, bg_blkno,
2626 &group_bh);
2627 if (status < 0) {
Joel Becker5b09b502009-04-21 16:31:20 -07002628 mlog(ML_ERROR, "read group %llu failed %d\n",
2629 (unsigned long long)bg_blkno, status);
wengang wang6ca497a2009-03-06 21:29:10 +08002630 goto bail;
2631 }
2632
2633 group = (struct ocfs2_group_desc *) group_bh->b_data;
2634 *res = ocfs2_test_bit(bit, (unsigned long *)group->bg_bitmap);
2635
2636bail:
2637 brelse(group_bh);
2638
2639 mlog_exit(status);
2640 return status;
2641}
2642
2643/*
2644 * Test if the bit representing this inode (blkno) is set in the
2645 * suballocator.
2646 *
2647 * On success, 0 is returned and *res is 1 for SET; 0 otherwise.
2648 *
2649 * In the event of failure, a negative value is returned and *res is
2650 * meaningless.
2651 *
2652 * Callers must make sure to hold nfs_sync_lock to prevent
2653 * ocfs2_delete_inode() on another node from accessing the same
2654 * suballocator concurrently.
2655 */
2656int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res)
2657{
2658 int status;
2659 u16 suballoc_bit = 0, suballoc_slot = 0;
2660 struct inode *inode_alloc_inode;
2661 struct buffer_head *alloc_bh = NULL;
2662
Joel Becker5b09b502009-04-21 16:31:20 -07002663 mlog_entry("blkno: %llu", (unsigned long long)blkno);
wengang wang6ca497a2009-03-06 21:29:10 +08002664
2665 status = ocfs2_get_suballoc_slot_bit(osb, blkno, &suballoc_slot,
2666 &suballoc_bit);
2667 if (status < 0) {
2668 mlog(ML_ERROR, "get alloc slot and bit failed %d\n", status);
2669 goto bail;
2670 }
2671
2672 inode_alloc_inode =
2673 ocfs2_get_system_file_inode(osb, INODE_ALLOC_SYSTEM_INODE,
2674 suballoc_slot);
2675 if (!inode_alloc_inode) {
2676 /* the error code could be inaccurate, but we are not able to
2677 * get the correct one. */
2678 status = -EINVAL;
2679 mlog(ML_ERROR, "unable to get alloc inode in slot %u\n",
2680 (u32)suballoc_slot);
2681 goto bail;
2682 }
2683
2684 mutex_lock(&inode_alloc_inode->i_mutex);
2685 status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0);
2686 if (status < 0) {
2687 mutex_unlock(&inode_alloc_inode->i_mutex);
2688 mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n",
2689 (u32)suballoc_slot, status);
2690 goto bail;
2691 }
2692
2693 status = ocfs2_test_suballoc_bit(osb, inode_alloc_inode, alloc_bh,
2694 blkno, suballoc_bit, res);
2695 if (status < 0)
2696 mlog(ML_ERROR, "test suballoc bit failed %d\n", status);
2697
2698 ocfs2_inode_unlock(inode_alloc_inode, 0);
2699 mutex_unlock(&inode_alloc_inode->i_mutex);
2700
2701 iput(inode_alloc_inode);
2702 brelse(alloc_bh);
2703bail:
2704 mlog_exit(status);
2705 return status;
2706}