blob: b5af10713dca698f3b949a7d17b01a1e5024fba7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott7b718762005-11-02 14:58:39 +11002 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +110019#include "xfs_fs.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include "xfs_types.h"
Nathan Scotta844f452005-11-02 14:38:42 +110021#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include "xfs_log.h"
Nathan Scotta844f452005-11-02 14:38:42 +110023#include "xfs_inum.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include "xfs_mount.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include "xfs_bmap_btree.h"
Nathan Scotta844f452005-11-02 14:38:42 +110029#include "xfs_alloc_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include "xfs_ialloc_btree.h"
Nathan Scotta844f452005-11-02 14:38:42 +110031#include "xfs_dinode.h"
32#include "xfs_inode.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070033#include "xfs_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070034#include "xfs_alloc.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include "xfs_error.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000036#include "xfs_trace.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070037
38
39#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
40
41#define XFSA_FIXUP_BNO_OK 1
42#define XFSA_FIXUP_CNT_OK 2
43
Linus Torvalds1da177e2005-04-16 15:20:36 -070044/*
45 * Prototypes for per-ag allocation routines
46 */
47
48STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
49STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
50STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
51STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
52 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
53
54/*
55 * Internal functions.
56 */
57
58/*
Christoph Hellwigfe033cc2008-10-30 16:56:09 +110059 * Lookup the record equal to [bno, len] in the btree given by cur.
60 */
61STATIC int /* error */
62xfs_alloc_lookup_eq(
63 struct xfs_btree_cur *cur, /* btree cursor */
64 xfs_agblock_t bno, /* starting block of extent */
65 xfs_extlen_t len, /* length of extent */
66 int *stat) /* success/failure */
67{
68 cur->bc_rec.a.ar_startblock = bno;
69 cur->bc_rec.a.ar_blockcount = len;
70 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
71}
72
73/*
74 * Lookup the first record greater than or equal to [bno, len]
75 * in the btree given by cur.
76 */
77STATIC int /* error */
78xfs_alloc_lookup_ge(
79 struct xfs_btree_cur *cur, /* btree cursor */
80 xfs_agblock_t bno, /* starting block of extent */
81 xfs_extlen_t len, /* length of extent */
82 int *stat) /* success/failure */
83{
84 cur->bc_rec.a.ar_startblock = bno;
85 cur->bc_rec.a.ar_blockcount = len;
86 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
87}
88
89/*
90 * Lookup the first record less than or equal to [bno, len]
91 * in the btree given by cur.
92 */
Christoph Hellwiga46db602011-01-07 13:02:04 +000093int /* error */
Christoph Hellwigfe033cc2008-10-30 16:56:09 +110094xfs_alloc_lookup_le(
95 struct xfs_btree_cur *cur, /* btree cursor */
96 xfs_agblock_t bno, /* starting block of extent */
97 xfs_extlen_t len, /* length of extent */
98 int *stat) /* success/failure */
99{
100 cur->bc_rec.a.ar_startblock = bno;
101 cur->bc_rec.a.ar_blockcount = len;
102 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
103}
104
Christoph Hellwig278d0ca2008-10-30 16:56:32 +1100105/*
106 * Update the record referred to by cur to the value given
107 * by [bno, len].
108 * This either works (return 0) or gets an EFSCORRUPTED error.
109 */
110STATIC int /* error */
111xfs_alloc_update(
112 struct xfs_btree_cur *cur, /* btree cursor */
113 xfs_agblock_t bno, /* starting block of extent */
114 xfs_extlen_t len) /* length of extent */
115{
116 union xfs_btree_rec rec;
117
118 rec.alloc.ar_startblock = cpu_to_be32(bno);
119 rec.alloc.ar_blockcount = cpu_to_be32(len);
120 return xfs_btree_update(cur, &rec);
121}
Christoph Hellwigfe033cc2008-10-30 16:56:09 +1100122
123/*
Christoph Hellwig8cc938f2008-10-30 16:58:11 +1100124 * Get the data from the pointed-to record.
125 */
Christoph Hellwiga46db602011-01-07 13:02:04 +0000126int /* error */
Christoph Hellwig8cc938f2008-10-30 16:58:11 +1100127xfs_alloc_get_rec(
128 struct xfs_btree_cur *cur, /* btree cursor */
129 xfs_agblock_t *bno, /* output: starting block of extent */
130 xfs_extlen_t *len, /* output: length of extent */
131 int *stat) /* output: success/failure */
132{
133 union xfs_btree_rec *rec;
134 int error;
135
136 error = xfs_btree_get_rec(cur, &rec, stat);
137 if (!error && *stat == 1) {
138 *bno = be32_to_cpu(rec->alloc.ar_startblock);
139 *len = be32_to_cpu(rec->alloc.ar_blockcount);
140 }
141 return error;
142}
143
144/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700145 * Compute aligned version of the found extent.
146 * Takes alignment and min length into account.
147 */
David Chinner12375c82008-04-10 12:21:32 +1000148STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149xfs_alloc_compute_aligned(
Christoph Hellwig86fa8af2011-03-04 12:59:54 +0000150 xfs_alloc_arg_t *args, /* allocation argument structure */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700151 xfs_agblock_t foundbno, /* starting block in found extent */
152 xfs_extlen_t foundlen, /* length in found extent */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 xfs_agblock_t *resbno, /* result block number */
154 xfs_extlen_t *reslen) /* result length */
155{
156 xfs_agblock_t bno;
157 xfs_extlen_t diff;
158 xfs_extlen_t len;
159
Christoph Hellwig86fa8af2011-03-04 12:59:54 +0000160 if (args->alignment > 1 && foundlen >= args->minlen) {
161 bno = roundup(foundbno, args->alignment);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 diff = bno - foundbno;
163 len = diff >= foundlen ? 0 : foundlen - diff;
164 } else {
165 bno = foundbno;
166 len = foundlen;
167 }
168 *resbno = bno;
169 *reslen = len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700170}
171
172/*
173 * Compute best start block and diff for "near" allocations.
174 * freelen >= wantlen already checked by caller.
175 */
176STATIC xfs_extlen_t /* difference value (absolute) */
177xfs_alloc_compute_diff(
178 xfs_agblock_t wantbno, /* target starting block */
179 xfs_extlen_t wantlen, /* target length */
180 xfs_extlen_t alignment, /* target alignment */
181 xfs_agblock_t freebno, /* freespace's starting block */
182 xfs_extlen_t freelen, /* freespace's length */
183 xfs_agblock_t *newbnop) /* result: best start block from free */
184{
185 xfs_agblock_t freeend; /* end of freespace extent */
186 xfs_agblock_t newbno1; /* return block number */
187 xfs_agblock_t newbno2; /* other new block number */
188 xfs_extlen_t newlen1=0; /* length with newbno1 */
189 xfs_extlen_t newlen2=0; /* length with newbno2 */
190 xfs_agblock_t wantend; /* end of target extent */
191
192 ASSERT(freelen >= wantlen);
193 freeend = freebno + freelen;
194 wantend = wantbno + wantlen;
195 if (freebno >= wantbno) {
196 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
197 newbno1 = NULLAGBLOCK;
198 } else if (freeend >= wantend && alignment > 1) {
199 newbno1 = roundup(wantbno, alignment);
200 newbno2 = newbno1 - alignment;
201 if (newbno1 >= freeend)
202 newbno1 = NULLAGBLOCK;
203 else
204 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
205 if (newbno2 < freebno)
206 newbno2 = NULLAGBLOCK;
207 else
208 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
209 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
210 if (newlen1 < newlen2 ||
211 (newlen1 == newlen2 &&
212 XFS_ABSDIFF(newbno1, wantbno) >
213 XFS_ABSDIFF(newbno2, wantbno)))
214 newbno1 = newbno2;
215 } else if (newbno2 != NULLAGBLOCK)
216 newbno1 = newbno2;
217 } else if (freeend >= wantend) {
218 newbno1 = wantbno;
219 } else if (alignment > 1) {
220 newbno1 = roundup(freeend - wantlen, alignment);
221 if (newbno1 > freeend - wantlen &&
222 newbno1 - alignment >= freebno)
223 newbno1 -= alignment;
224 else if (newbno1 >= freeend)
225 newbno1 = NULLAGBLOCK;
226 } else
227 newbno1 = freeend - wantlen;
228 *newbnop = newbno1;
229 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
230}
231
232/*
233 * Fix up the length, based on mod and prod.
234 * len should be k * prod + mod for some k.
235 * If len is too small it is returned unchanged.
236 * If len hits maxlen it is left alone.
237 */
238STATIC void
239xfs_alloc_fix_len(
240 xfs_alloc_arg_t *args) /* allocation argument structure */
241{
242 xfs_extlen_t k;
243 xfs_extlen_t rlen;
244
245 ASSERT(args->mod < args->prod);
246 rlen = args->len;
247 ASSERT(rlen >= args->minlen);
248 ASSERT(rlen <= args->maxlen);
249 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
250 (args->mod == 0 && rlen < args->prod))
251 return;
252 k = rlen % args->prod;
253 if (k == args->mod)
254 return;
255 if (k > args->mod) {
256 if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
257 return;
258 } else {
259 if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
260 (int)args->minlen)
261 return;
262 }
263 ASSERT(rlen >= args->minlen);
264 ASSERT(rlen <= args->maxlen);
265 args->len = rlen;
266}
267
268/*
269 * Fix up length if there is too little space left in the a.g.
270 * Return 1 if ok, 0 if too little, should give up.
271 */
272STATIC int
273xfs_alloc_fix_minleft(
274 xfs_alloc_arg_t *args) /* allocation argument structure */
275{
276 xfs_agf_t *agf; /* a.g. freelist header */
277 int diff; /* free space difference */
278
279 if (args->minleft == 0)
280 return 1;
281 agf = XFS_BUF_TO_AGF(args->agbp);
Christoph Hellwig16259e72005-11-02 15:11:25 +1100282 diff = be32_to_cpu(agf->agf_freeblks)
283 + be32_to_cpu(agf->agf_flcount)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 - args->len - args->minleft;
285 if (diff >= 0)
286 return 1;
287 args->len += diff; /* shrink the allocated space */
288 if (args->len >= args->minlen)
289 return 1;
290 args->agbno = NULLAGBLOCK;
291 return 0;
292}
293
294/*
295 * Update the two btrees, logically removing from freespace the extent
296 * starting at rbno, rlen blocks. The extent is contained within the
297 * actual (current) free extent fbno for flen blocks.
298 * Flags are passed in indicating whether the cursors are set to the
299 * relevant records.
300 */
301STATIC int /* error code */
302xfs_alloc_fixup_trees(
303 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
304 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
305 xfs_agblock_t fbno, /* starting block of free extent */
306 xfs_extlen_t flen, /* length of free extent */
307 xfs_agblock_t rbno, /* starting block of returned extent */
308 xfs_extlen_t rlen, /* length of returned extent */
309 int flags) /* flags, XFSA_FIXUP_... */
310{
311 int error; /* error code */
312 int i; /* operation results */
313 xfs_agblock_t nfbno1; /* first new free startblock */
314 xfs_agblock_t nfbno2; /* second new free startblock */
315 xfs_extlen_t nflen1=0; /* first new free length */
316 xfs_extlen_t nflen2=0; /* second new free length */
317
318 /*
319 * Look up the record in the by-size tree if necessary.
320 */
321 if (flags & XFSA_FIXUP_CNT_OK) {
322#ifdef DEBUG
323 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
324 return error;
325 XFS_WANT_CORRUPTED_RETURN(
326 i == 1 && nfbno1 == fbno && nflen1 == flen);
327#endif
328 } else {
329 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
330 return error;
331 XFS_WANT_CORRUPTED_RETURN(i == 1);
332 }
333 /*
334 * Look up the record in the by-block tree if necessary.
335 */
336 if (flags & XFSA_FIXUP_BNO_OK) {
337#ifdef DEBUG
338 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
339 return error;
340 XFS_WANT_CORRUPTED_RETURN(
341 i == 1 && nfbno1 == fbno && nflen1 == flen);
342#endif
343 } else {
344 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
345 return error;
346 XFS_WANT_CORRUPTED_RETURN(i == 1);
347 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100349#ifdef DEBUG
350 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
351 struct xfs_btree_block *bnoblock;
352 struct xfs_btree_block *cntblock;
353
354 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
355 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
356
357 XFS_WANT_CORRUPTED_RETURN(
358 bnoblock->bb_numrecs == cntblock->bb_numrecs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 }
360#endif
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100361
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 /*
363 * Deal with all four cases: the allocated record is contained
364 * within the freespace record, so we can have new freespace
365 * at either (or both) end, or no freespace remaining.
366 */
367 if (rbno == fbno && rlen == flen)
368 nfbno1 = nfbno2 = NULLAGBLOCK;
369 else if (rbno == fbno) {
370 nfbno1 = rbno + rlen;
371 nflen1 = flen - rlen;
372 nfbno2 = NULLAGBLOCK;
373 } else if (rbno + rlen == fbno + flen) {
374 nfbno1 = fbno;
375 nflen1 = flen - rlen;
376 nfbno2 = NULLAGBLOCK;
377 } else {
378 nfbno1 = fbno;
379 nflen1 = rbno - fbno;
380 nfbno2 = rbno + rlen;
381 nflen2 = (fbno + flen) - nfbno2;
382 }
383 /*
384 * Delete the entry from the by-size btree.
385 */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +1100386 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 return error;
388 XFS_WANT_CORRUPTED_RETURN(i == 1);
389 /*
390 * Add new by-size btree entry(s).
391 */
392 if (nfbno1 != NULLAGBLOCK) {
393 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
394 return error;
395 XFS_WANT_CORRUPTED_RETURN(i == 0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100396 if ((error = xfs_btree_insert(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 return error;
398 XFS_WANT_CORRUPTED_RETURN(i == 1);
399 }
400 if (nfbno2 != NULLAGBLOCK) {
401 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
402 return error;
403 XFS_WANT_CORRUPTED_RETURN(i == 0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100404 if ((error = xfs_btree_insert(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700405 return error;
406 XFS_WANT_CORRUPTED_RETURN(i == 1);
407 }
408 /*
409 * Fix up the by-block btree entry(s).
410 */
411 if (nfbno1 == NULLAGBLOCK) {
412 /*
413 * No remaining freespace, just delete the by-block tree entry.
414 */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +1100415 if ((error = xfs_btree_delete(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 return error;
417 XFS_WANT_CORRUPTED_RETURN(i == 1);
418 } else {
419 /*
420 * Update the by-block entry to start later|be shorter.
421 */
422 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
423 return error;
424 }
425 if (nfbno2 != NULLAGBLOCK) {
426 /*
427 * 2 resulting free entries, need to add one.
428 */
429 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
430 return error;
431 XFS_WANT_CORRUPTED_RETURN(i == 0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100432 if ((error = xfs_btree_insert(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700433 return error;
434 XFS_WANT_CORRUPTED_RETURN(i == 1);
435 }
436 return 0;
437}
438
439/*
440 * Read in the allocation group free block array.
441 */
442STATIC int /* error */
443xfs_alloc_read_agfl(
444 xfs_mount_t *mp, /* mount point structure */
445 xfs_trans_t *tp, /* transaction pointer */
446 xfs_agnumber_t agno, /* allocation group number */
447 xfs_buf_t **bpp) /* buffer for the ag free block array */
448{
449 xfs_buf_t *bp; /* return value */
450 int error;
451
452 ASSERT(agno != NULLAGNUMBER);
453 error = xfs_trans_read_buf(
454 mp, tp, mp->m_ddev_targp,
455 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
456 XFS_FSS_TO_BB(mp, 1), 0, &bp);
457 if (error)
458 return error;
459 ASSERT(bp);
460 ASSERT(!XFS_BUF_GETERROR(bp));
461 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
462 *bpp = bp;
463 return 0;
464}
465
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466/*
467 * Allocation group level functions.
468 */
469
470/*
471 * Allocate a variable extent in the allocation group agno.
472 * Type and bno are used to determine where in the allocation group the
473 * extent will start.
474 * Extent's length (returned in *len) will be between minlen and maxlen,
475 * and of the form k * prod + mod unless there's nothing that large.
476 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
477 */
478STATIC int /* error */
479xfs_alloc_ag_vextent(
480 xfs_alloc_arg_t *args) /* argument structure for allocation */
481{
482 int error=0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700483
484 ASSERT(args->minlen > 0);
485 ASSERT(args->maxlen > 0);
486 ASSERT(args->minlen <= args->maxlen);
487 ASSERT(args->mod < args->prod);
488 ASSERT(args->alignment > 0);
489 /*
490 * Branch to correct routine based on the type.
491 */
492 args->wasfromfl = 0;
493 switch (args->type) {
494 case XFS_ALLOCTYPE_THIS_AG:
495 error = xfs_alloc_ag_vextent_size(args);
496 break;
497 case XFS_ALLOCTYPE_NEAR_BNO:
498 error = xfs_alloc_ag_vextent_near(args);
499 break;
500 case XFS_ALLOCTYPE_THIS_BNO:
501 error = xfs_alloc_ag_vextent_exact(args);
502 break;
503 default:
504 ASSERT(0);
505 /* NOTREACHED */
506 }
507 if (error)
508 return error;
509 /*
510 * If the allocation worked, need to change the agf structure
511 * (and log it), and the superblock.
512 */
513 if (args->agbno != NULLAGBLOCK) {
514 xfs_agf_t *agf; /* allocation group freelist header */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 long slen = (long)args->len;
516
517 ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
518 ASSERT(!(args->wasfromfl) || !args->isfl);
519 ASSERT(args->agbno % args->alignment == 0);
520 if (!(args->wasfromfl)) {
521
522 agf = XFS_BUF_TO_AGF(args->agbp);
Marcin Slusarz413d57c2008-02-13 15:03:29 -0800523 be32_add_cpu(&agf->agf_freeblks, -(args->len));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700524 xfs_trans_agblocks_delta(args->tp,
525 -((long)(args->len)));
526 args->pag->pagf_freeblks -= args->len;
Christoph Hellwig16259e72005-11-02 15:11:25 +1100527 ASSERT(be32_to_cpu(agf->agf_freeblks) <=
528 be32_to_cpu(agf->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700529 xfs_alloc_log_agf(args->tp, args->agbp,
530 XFS_AGF_FREEBLKS);
Dave Chinnered3b4d62010-05-21 12:07:08 +1000531 /*
532 * Search the busylist for these blocks and mark the
533 * transaction as synchronous if blocks are found. This
534 * avoids the need to block due to a synchronous log
535 * force to ensure correct ordering as the synchronous
536 * transaction will guarantee that for us.
537 */
538 if (xfs_alloc_busy_search(args->mp, args->agno,
539 args->agbno, args->len))
540 xfs_trans_set_sync(args->tp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700541 }
542 if (!args->isfl)
543 xfs_trans_mod_sb(args->tp,
544 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
545 XFS_TRANS_SB_FDBLOCKS, -slen);
546 XFS_STATS_INC(xs_allocx);
547 XFS_STATS_ADD(xs_allocb, args->len);
548 }
549 return 0;
550}
551
552/*
553 * Allocate a variable extent at exactly agno/bno.
554 * Extent's length (returned in *len) will be between minlen and maxlen,
555 * and of the form k * prod + mod unless there's nothing that large.
556 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
557 */
558STATIC int /* error */
559xfs_alloc_ag_vextent_exact(
560 xfs_alloc_arg_t *args) /* allocation argument structure */
561{
562 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
563 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
564 xfs_agblock_t end; /* end of allocated extent */
565 int error;
566 xfs_agblock_t fbno; /* start block of found extent */
567 xfs_agblock_t fend; /* end block of found extent */
568 xfs_extlen_t flen; /* length of found extent */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 int i; /* success/failure of operation */
570 xfs_agblock_t maxend; /* end of maximal extent */
571 xfs_agblock_t minend; /* end of minimal extent */
572 xfs_extlen_t rlen; /* length of returned extent */
573
574 ASSERT(args->alignment == 1);
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000575
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576 /*
577 * Allocate/initialize a cursor for the by-number freespace btree.
578 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100579 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000580 args->agno, XFS_BTNUM_BNO);
581
Linus Torvalds1da177e2005-04-16 15:20:36 -0700582 /*
583 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
584 * Look for the closest free block <= bno, it must contain bno
585 * if any free block does.
586 */
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000587 error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
588 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700589 goto error0;
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000590 if (!i)
591 goto not_found;
592
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593 /*
594 * Grab the freespace record.
595 */
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000596 error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
597 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700598 goto error0;
599 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
600 ASSERT(fbno <= args->agbno);
601 minend = args->agbno + args->minlen;
602 maxend = args->agbno + args->maxlen;
603 fend = fbno + flen;
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000604
Linus Torvalds1da177e2005-04-16 15:20:36 -0700605 /*
606 * Give up if the freespace isn't long enough for the minimum request.
607 */
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000608 if (fend < minend)
609 goto not_found;
610
Linus Torvalds1da177e2005-04-16 15:20:36 -0700611 /*
612 * End of extent will be smaller of the freespace end and the
613 * maximal requested end.
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000614 *
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615 * Fix the length according to mod and prod if given.
616 */
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000617 end = XFS_AGBLOCK_MIN(fend, maxend);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618 args->len = end - args->agbno;
619 xfs_alloc_fix_len(args);
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000620 if (!xfs_alloc_fix_minleft(args))
621 goto not_found;
622
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 rlen = args->len;
624 ASSERT(args->agbno + rlen <= fend);
625 end = args->agbno + rlen;
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000626
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 /*
628 * We are allocating agbno for rlen [agbno .. end]
629 * Allocate/initialize a cursor for the by-size btree.
630 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100631 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
632 args->agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700633 ASSERT(args->agbno + args->len <=
Christoph Hellwig16259e72005-11-02 15:11:25 +1100634 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000635 error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
636 args->len, XFSA_FIXUP_BNO_OK);
637 if (error) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700638 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
639 goto error0;
640 }
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000641
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
643 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000644
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 args->wasfromfl = 0;
Christoph Hellwig9f9baab2010-12-10 15:03:57 +0000646 trace_xfs_alloc_exact_done(args);
647 return 0;
648
649not_found:
650 /* Didn't find it, return null. */
651 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
652 args->agbno = NULLAGBLOCK;
653 trace_xfs_alloc_exact_notfound(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 return 0;
655
656error0:
657 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000658 trace_xfs_alloc_exact_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659 return error;
660}
661
662/*
Christoph Hellwig489a1502010-12-10 15:04:11 +0000663 * Search the btree in a given direction via the search cursor and compare
664 * the records found against the good extent we've already found.
665 */
666STATIC int
667xfs_alloc_find_best_extent(
668 struct xfs_alloc_arg *args, /* allocation argument structure */
669 struct xfs_btree_cur **gcur, /* good cursor */
670 struct xfs_btree_cur **scur, /* searching cursor */
671 xfs_agblock_t gdiff, /* difference for search comparison */
672 xfs_agblock_t *sbno, /* extent found by search */
673 xfs_extlen_t *slen,
674 xfs_extlen_t *slena, /* aligned length */
675 int dir) /* 0 = search right, 1 = search left */
676{
677 xfs_agblock_t bno;
678 xfs_agblock_t new;
679 xfs_agblock_t sdiff;
680 int error;
681 int i;
682
683 /* The good extent is perfect, no need to search. */
684 if (!gdiff)
685 goto out_use_good;
686
687 /*
688 * Look until we find a better one, run out of space or run off the end.
689 */
690 do {
691 error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
692 if (error)
693 goto error0;
694 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig86fa8af2011-03-04 12:59:54 +0000695 xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena);
Christoph Hellwig489a1502010-12-10 15:04:11 +0000696
697 /*
698 * The good extent is closer than this one.
699 */
700 if (!dir) {
701 if (bno >= args->agbno + gdiff)
702 goto out_use_good;
703 } else {
704 if (bno <= args->agbno - gdiff)
705 goto out_use_good;
706 }
707
708 /*
709 * Same distance, compare length and pick the best.
710 */
711 if (*slena >= args->minlen) {
712 args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
713 xfs_alloc_fix_len(args);
714
715 sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
716 args->alignment, *sbno,
717 *slen, &new);
718
719 /*
720 * Choose closer size and invalidate other cursor.
721 */
722 if (sdiff < gdiff)
723 goto out_use_search;
724 goto out_use_good;
725 }
726
727 if (!dir)
728 error = xfs_btree_increment(*scur, 0, &i);
729 else
730 error = xfs_btree_decrement(*scur, 0, &i);
731 if (error)
732 goto error0;
733 } while (i);
734
735out_use_good:
736 xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
737 *scur = NULL;
738 return 0;
739
740out_use_search:
741 xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
742 *gcur = NULL;
743 return 0;
744
745error0:
746 /* caller invalidates cursors */
747 return error;
748}
749
750/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751 * Allocate a variable extent near bno in the allocation group agno.
752 * Extent's length (returned in len) will be between minlen and maxlen,
753 * and of the form k * prod + mod unless there's nothing that large.
754 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
755 */
756STATIC int /* error */
757xfs_alloc_ag_vextent_near(
758 xfs_alloc_arg_t *args) /* allocation argument structure */
759{
760 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
761 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
762 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 xfs_agblock_t gtbno; /* start bno of right side entry */
764 xfs_agblock_t gtbnoa; /* aligned ... */
765 xfs_extlen_t gtdiff; /* difference to right side entry */
766 xfs_extlen_t gtlen; /* length of right side entry */
Poyo VL9c169912010-09-02 07:41:55 +0000767 xfs_extlen_t gtlena = 0; /* aligned ... */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 xfs_agblock_t gtnew; /* useful start bno of right side */
769 int error; /* error code */
770 int i; /* result code, temporary */
771 int j; /* result code, temporary */
772 xfs_agblock_t ltbno; /* start bno of left side entry */
773 xfs_agblock_t ltbnoa; /* aligned ... */
774 xfs_extlen_t ltdiff; /* difference to left side entry */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775 xfs_extlen_t ltlen; /* length of left side entry */
Poyo VL9c169912010-09-02 07:41:55 +0000776 xfs_extlen_t ltlena = 0; /* aligned ... */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700777 xfs_agblock_t ltnew; /* useful start bno of left side */
778 xfs_extlen_t rlen; /* length of returned extent */
779#if defined(DEBUG) && defined(__KERNEL__)
780 /*
781 * Randomly don't execute the first algorithm.
782 */
783 int dofirst; /* set to do first algorithm */
784
Joe Perchese7a23a92007-05-08 13:49:03 +1000785 dofirst = random32() & 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786#endif
787 /*
788 * Get a cursor for the by-size btree.
789 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100790 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
791 args->agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700792 ltlen = 0;
793 bno_cur_lt = bno_cur_gt = NULL;
794 /*
795 * See if there are any free extents as big as maxlen.
796 */
797 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
798 goto error0;
799 /*
800 * If none, then pick up the last entry in the tree unless the
801 * tree is empty.
802 */
803 if (!i) {
804 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
805 &ltlen, &i)))
806 goto error0;
807 if (i == 0 || ltlen == 0) {
808 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
809 return 0;
810 }
811 ASSERT(i == 1);
812 }
813 args->wasfromfl = 0;
814 /*
815 * First algorithm.
816 * If the requested extent is large wrt the freespaces available
817 * in this a.g., then the cursor will be pointing to a btree entry
818 * near the right edge of the tree. If it's in the last btree leaf
819 * block, then we just examine all the entries in that block
820 * that are big enough, and pick the best one.
821 * This is written as a while loop so we can break out of it,
822 * but we never loop back to the top.
823 */
824 while (xfs_btree_islastblock(cnt_cur, 0)) {
825 xfs_extlen_t bdiff;
826 int besti=0;
827 xfs_extlen_t blen=0;
828 xfs_agblock_t bnew=0;
829
830#if defined(DEBUG) && defined(__KERNEL__)
831 if (!dofirst)
832 break;
833#endif
834 /*
835 * Start from the entry that lookup found, sequence through
836 * all larger free blocks. If we're actually pointing at a
837 * record smaller than maxlen, go to the start of this block,
838 * and skip all those smaller than minlen.
839 */
840 if (ltlen || args->alignment > 1) {
841 cnt_cur->bc_ptrs[0] = 1;
842 do {
843 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
844 &ltlen, &i)))
845 goto error0;
846 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
847 if (ltlen >= args->minlen)
848 break;
Christoph Hellwig637aa502008-10-30 16:55:45 +1100849 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700850 goto error0;
851 } while (i);
852 ASSERT(ltlen >= args->minlen);
853 if (!i)
854 break;
855 }
856 i = cnt_cur->bc_ptrs[0];
857 for (j = 1, blen = 0, bdiff = 0;
858 !error && j && (blen < args->maxlen || bdiff > 0);
Christoph Hellwig637aa502008-10-30 16:55:45 +1100859 error = xfs_btree_increment(cnt_cur, 0, &j)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700860 /*
861 * For each entry, decide if it's better than
862 * the previous best entry.
863 */
864 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
865 goto error0;
866 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig86fa8af2011-03-04 12:59:54 +0000867 xfs_alloc_compute_aligned(args, ltbno, ltlen,
868 &ltbnoa, &ltlena);
David Chinnere6430032008-04-17 16:49:49 +1000869 if (ltlena < args->minlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700870 continue;
871 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
872 xfs_alloc_fix_len(args);
873 ASSERT(args->len >= args->minlen);
874 if (args->len < blen)
875 continue;
876 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
877 args->alignment, ltbno, ltlen, &ltnew);
878 if (ltnew != NULLAGBLOCK &&
879 (args->len > blen || ltdiff < bdiff)) {
880 bdiff = ltdiff;
881 bnew = ltnew;
882 blen = args->len;
883 besti = cnt_cur->bc_ptrs[0];
884 }
885 }
886 /*
887 * It didn't work. We COULD be in a case where
888 * there's a good record somewhere, so try again.
889 */
890 if (blen == 0)
891 break;
892 /*
893 * Point at the best entry, and retrieve it again.
894 */
895 cnt_cur->bc_ptrs[0] = besti;
896 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
897 goto error0;
898 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig73523a22010-07-20 17:54:45 +1000899 ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700900 args->len = blen;
901 if (!xfs_alloc_fix_minleft(args)) {
902 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000903 trace_xfs_alloc_near_nominleft(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 return 0;
905 }
906 blen = args->len;
907 /*
908 * We are allocating starting at bnew for blen blocks.
909 */
910 args->agbno = bnew;
911 ASSERT(bnew >= ltbno);
Christoph Hellwig73523a22010-07-20 17:54:45 +1000912 ASSERT(bnew + blen <= ltbno + ltlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 /*
914 * Set up a cursor for the by-bno tree.
915 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100916 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
917 args->agbp, args->agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 /*
919 * Fix up the btree entries.
920 */
921 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
922 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
923 goto error0;
924 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
925 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000926
927 trace_xfs_alloc_near_first(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 return 0;
929 }
930 /*
931 * Second algorithm.
932 * Search in the by-bno tree to the left and to the right
933 * simultaneously, until in each case we find a space big enough,
934 * or run into the edge of the tree. When we run into the edge,
935 * we deallocate that cursor.
936 * If both searches succeed, we compare the two spaces and pick
937 * the better one.
938 * With alignment, it's possible for both to fail; the upper
939 * level algorithm that picks allocation groups for allocations
940 * is not supposed to do this.
941 */
942 /*
943 * Allocate and initialize the cursor for the leftward search.
944 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100945 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
946 args->agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 /*
948 * Lookup <= bno to find the leftward search's starting point.
949 */
950 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
951 goto error0;
952 if (!i) {
953 /*
954 * Didn't find anything; use this cursor for the rightward
955 * search.
956 */
957 bno_cur_gt = bno_cur_lt;
958 bno_cur_lt = NULL;
959 }
960 /*
961 * Found something. Duplicate the cursor for the rightward search.
962 */
963 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
964 goto error0;
965 /*
966 * Increment the cursor, so we will point at the entry just right
967 * of the leftward entry if any, or to the leftmost entry.
968 */
Christoph Hellwig637aa502008-10-30 16:55:45 +1100969 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700970 goto error0;
971 if (!i) {
972 /*
973 * It failed, there are no rightward entries.
974 */
975 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
976 bno_cur_gt = NULL;
977 }
978 /*
979 * Loop going left with the leftward cursor, right with the
980 * rightward cursor, until either both directions give up or
981 * we find an entry at least as big as minlen.
982 */
983 do {
984 if (bno_cur_lt) {
985 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
986 goto error0;
987 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig86fa8af2011-03-04 12:59:54 +0000988 xfs_alloc_compute_aligned(args, ltbno, ltlen,
989 &ltbnoa, &ltlena);
David Chinner12375c82008-04-10 12:21:32 +1000990 if (ltlena >= args->minlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700991 break;
Christoph Hellwig8df4da42008-10-30 16:55:58 +1100992 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700993 goto error0;
994 if (!i) {
995 xfs_btree_del_cursor(bno_cur_lt,
996 XFS_BTREE_NOERROR);
997 bno_cur_lt = NULL;
998 }
999 }
1000 if (bno_cur_gt) {
1001 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
1002 goto error0;
1003 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig86fa8af2011-03-04 12:59:54 +00001004 xfs_alloc_compute_aligned(args, gtbno, gtlen,
1005 &gtbnoa, &gtlena);
David Chinner12375c82008-04-10 12:21:32 +10001006 if (gtlena >= args->minlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001007 break;
Christoph Hellwig637aa502008-10-30 16:55:45 +11001008 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009 goto error0;
1010 if (!i) {
1011 xfs_btree_del_cursor(bno_cur_gt,
1012 XFS_BTREE_NOERROR);
1013 bno_cur_gt = NULL;
1014 }
1015 }
1016 } while (bno_cur_lt || bno_cur_gt);
Christoph Hellwig489a1502010-12-10 15:04:11 +00001017
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 /*
1019 * Got both cursors still active, need to find better entry.
1020 */
1021 if (bno_cur_lt && bno_cur_gt) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 if (ltlena >= args->minlen) {
1023 /*
Christoph Hellwig489a1502010-12-10 15:04:11 +00001024 * Left side is good, look for a right side entry.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001025 */
1026 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1027 xfs_alloc_fix_len(args);
Christoph Hellwig489a1502010-12-10 15:04:11 +00001028 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 args->alignment, ltbno, ltlen, &ltnew);
Christoph Hellwig489a1502010-12-10 15:04:11 +00001030
1031 error = xfs_alloc_find_best_extent(args,
1032 &bno_cur_lt, &bno_cur_gt,
1033 ltdiff, &gtbno, &gtlen, &gtlena,
1034 0 /* search right */);
1035 } else {
1036 ASSERT(gtlena >= args->minlen);
1037
Linus Torvalds1da177e2005-04-16 15:20:36 -07001038 /*
Christoph Hellwig489a1502010-12-10 15:04:11 +00001039 * Right side is good, look for a left side entry.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001040 */
1041 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1042 xfs_alloc_fix_len(args);
Christoph Hellwig489a1502010-12-10 15:04:11 +00001043 gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001044 args->alignment, gtbno, gtlen, &gtnew);
Christoph Hellwig489a1502010-12-10 15:04:11 +00001045
1046 error = xfs_alloc_find_best_extent(args,
1047 &bno_cur_gt, &bno_cur_lt,
1048 gtdiff, &ltbno, &ltlen, &ltlena,
1049 1 /* search left */);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001050 }
Christoph Hellwig489a1502010-12-10 15:04:11 +00001051
1052 if (error)
1053 goto error0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001054 }
Christoph Hellwig489a1502010-12-10 15:04:11 +00001055
Linus Torvalds1da177e2005-04-16 15:20:36 -07001056 /*
1057 * If we couldn't get anything, give up.
1058 */
1059 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001060 trace_xfs_alloc_size_neither(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 args->agbno = NULLAGBLOCK;
1062 return 0;
1063 }
Christoph Hellwig489a1502010-12-10 15:04:11 +00001064
Linus Torvalds1da177e2005-04-16 15:20:36 -07001065 /*
1066 * At this point we have selected a freespace entry, either to the
1067 * left or to the right. If it's on the right, copy all the
1068 * useful variables to the "left" set so we only have one
1069 * copy of this code.
1070 */
1071 if (bno_cur_gt) {
1072 bno_cur_lt = bno_cur_gt;
1073 bno_cur_gt = NULL;
1074 ltbno = gtbno;
1075 ltbnoa = gtbnoa;
1076 ltlen = gtlen;
1077 ltlena = gtlena;
1078 j = 1;
1079 } else
1080 j = 0;
Christoph Hellwig489a1502010-12-10 15:04:11 +00001081
Linus Torvalds1da177e2005-04-16 15:20:36 -07001082 /*
1083 * Fix up the length and compute the useful address.
1084 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001085 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1086 xfs_alloc_fix_len(args);
1087 if (!xfs_alloc_fix_minleft(args)) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001088 trace_xfs_alloc_near_nominleft(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1090 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1091 return 0;
1092 }
1093 rlen = args->len;
1094 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1095 ltlen, &ltnew);
1096 ASSERT(ltnew >= ltbno);
Christoph Hellwig73523a22010-07-20 17:54:45 +10001097 ASSERT(ltnew + rlen <= ltbno + ltlen);
Christoph Hellwig16259e72005-11-02 15:11:25 +11001098 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099 args->agbno = ltnew;
1100 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1101 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1102 goto error0;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001103
1104 if (j)
1105 trace_xfs_alloc_near_greater(args);
1106 else
1107 trace_xfs_alloc_near_lesser(args);
1108
Linus Torvalds1da177e2005-04-16 15:20:36 -07001109 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1110 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1111 return 0;
1112
1113 error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001114 trace_xfs_alloc_near_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001115 if (cnt_cur != NULL)
1116 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1117 if (bno_cur_lt != NULL)
1118 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1119 if (bno_cur_gt != NULL)
1120 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1121 return error;
1122}
1123
1124/*
1125 * Allocate a variable extent anywhere in the allocation group agno.
1126 * Extent's length (returned in len) will be between minlen and maxlen,
1127 * and of the form k * prod + mod unless there's nothing that large.
1128 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1129 */
1130STATIC int /* error */
1131xfs_alloc_ag_vextent_size(
1132 xfs_alloc_arg_t *args) /* allocation argument structure */
1133{
1134 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1135 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1136 int error; /* error result */
1137 xfs_agblock_t fbno; /* start of found freespace */
1138 xfs_extlen_t flen; /* length of found freespace */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 int i; /* temp status variable */
1140 xfs_agblock_t rbno; /* returned block number */
1141 xfs_extlen_t rlen; /* length of returned extent */
1142
1143 /*
1144 * Allocate and initialize a cursor for the by-size btree.
1145 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001146 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1147 args->agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001148 bno_cur = NULL;
1149 /*
1150 * Look for an entry >= maxlen+alignment-1 blocks.
1151 */
1152 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1153 args->maxlen + args->alignment - 1, &i)))
1154 goto error0;
1155 /*
1156 * If none, then pick up the last entry in the tree unless the
1157 * tree is empty.
1158 */
1159 if (!i) {
1160 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
1161 &flen, &i)))
1162 goto error0;
1163 if (i == 0 || flen == 0) {
1164 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001165 trace_xfs_alloc_size_noentry(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 return 0;
1167 }
1168 ASSERT(i == 1);
1169 }
1170 /*
1171 * There's a freespace as big as maxlen+alignment-1, get it.
1172 */
1173 else {
1174 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1175 goto error0;
1176 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1177 }
1178 /*
1179 * In the first case above, we got the last entry in the
1180 * by-size btree. Now we check to see if the space hits maxlen
1181 * once aligned; if not, we search left for something better.
1182 * This can't happen in the second case above.
1183 */
Christoph Hellwig86fa8af2011-03-04 12:59:54 +00001184 xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1186 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1187 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1188 if (rlen < args->maxlen) {
1189 xfs_agblock_t bestfbno;
1190 xfs_extlen_t bestflen;
1191 xfs_agblock_t bestrbno;
1192 xfs_extlen_t bestrlen;
1193
1194 bestrlen = rlen;
1195 bestrbno = rbno;
1196 bestflen = flen;
1197 bestfbno = fbno;
1198 for (;;) {
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001199 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 goto error0;
1201 if (i == 0)
1202 break;
1203 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1204 &i)))
1205 goto error0;
1206 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1207 if (flen < bestrlen)
1208 break;
Christoph Hellwig86fa8af2011-03-04 12:59:54 +00001209 xfs_alloc_compute_aligned(args, fbno, flen,
1210 &rbno, &rlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001211 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1212 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1213 (rlen <= flen && rbno + rlen <= fbno + flen),
1214 error0);
1215 if (rlen > bestrlen) {
1216 bestrlen = rlen;
1217 bestrbno = rbno;
1218 bestflen = flen;
1219 bestfbno = fbno;
1220 if (rlen == args->maxlen)
1221 break;
1222 }
1223 }
1224 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1225 &i)))
1226 goto error0;
1227 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1228 rlen = bestrlen;
1229 rbno = bestrbno;
1230 flen = bestflen;
1231 fbno = bestfbno;
1232 }
1233 args->wasfromfl = 0;
1234 /*
1235 * Fix up the length.
1236 */
1237 args->len = rlen;
1238 xfs_alloc_fix_len(args);
1239 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1240 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001241 trace_xfs_alloc_size_nominleft(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 args->agbno = NULLAGBLOCK;
1243 return 0;
1244 }
1245 rlen = args->len;
1246 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1247 /*
1248 * Allocate and initialize a cursor for the by-block tree.
1249 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001250 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1251 args->agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1253 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1254 goto error0;
1255 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1256 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1257 cnt_cur = bno_cur = NULL;
1258 args->len = rlen;
1259 args->agbno = rbno;
1260 XFS_WANT_CORRUPTED_GOTO(
1261 args->agbno + args->len <=
Christoph Hellwig16259e72005-11-02 15:11:25 +11001262 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001263 error0);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001264 trace_xfs_alloc_size_done(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001265 return 0;
1266
1267error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001268 trace_xfs_alloc_size_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 if (cnt_cur)
1270 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1271 if (bno_cur)
1272 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1273 return error;
1274}
1275
1276/*
1277 * Deal with the case where only small freespaces remain.
1278 * Either return the contents of the last freespace record,
1279 * or allocate space from the freelist if there is nothing in the tree.
1280 */
1281STATIC int /* error */
1282xfs_alloc_ag_vextent_small(
1283 xfs_alloc_arg_t *args, /* allocation argument structure */
1284 xfs_btree_cur_t *ccur, /* by-size cursor */
1285 xfs_agblock_t *fbnop, /* result block number */
1286 xfs_extlen_t *flenp, /* result length */
1287 int *stat) /* status: 0-freelist, 1-normal/none */
1288{
1289 int error;
1290 xfs_agblock_t fbno;
1291 xfs_extlen_t flen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 int i;
1293
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001294 if ((error = xfs_btree_decrement(ccur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295 goto error0;
1296 if (i) {
1297 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1298 goto error0;
1299 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1300 }
1301 /*
1302 * Nothing in the btree, try the freelist. Make sure
1303 * to respect minleft even when pulling from the
1304 * freelist.
1305 */
1306 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
Christoph Hellwig16259e72005-11-02 15:11:25 +11001307 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1308 > args->minleft)) {
David Chinner92821e22007-05-24 15:26:31 +10001309 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1310 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311 goto error0;
1312 if (fbno != NULLAGBLOCK) {
1313 if (args->userdata) {
1314 xfs_buf_t *bp;
1315
1316 bp = xfs_btree_get_bufs(args->mp, args->tp,
1317 args->agno, fbno, 0);
1318 xfs_trans_binval(args->tp, bp);
1319 }
1320 args->len = 1;
1321 args->agbno = fbno;
1322 XFS_WANT_CORRUPTED_GOTO(
1323 args->agbno + args->len <=
Christoph Hellwig16259e72005-11-02 15:11:25 +11001324 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001325 error0);
1326 args->wasfromfl = 1;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001327 trace_xfs_alloc_small_freelist(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 *stat = 0;
1329 return 0;
1330 }
1331 /*
1332 * Nothing in the freelist.
1333 */
1334 else
1335 flen = 0;
1336 }
1337 /*
1338 * Can't allocate from the freelist for some reason.
1339 */
Nathan Scottd432c802006-09-28 11:03:44 +10001340 else {
1341 fbno = NULLAGBLOCK;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 flen = 0;
Nathan Scottd432c802006-09-28 11:03:44 +10001343 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001344 /*
1345 * Can't do the allocation, give up.
1346 */
1347 if (flen < args->minlen) {
1348 args->agbno = NULLAGBLOCK;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001349 trace_xfs_alloc_small_notenough(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350 flen = 0;
1351 }
1352 *fbnop = fbno;
1353 *flenp = flen;
1354 *stat = 1;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001355 trace_xfs_alloc_small_done(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001356 return 0;
1357
1358error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001359 trace_xfs_alloc_small_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360 return error;
1361}
1362
1363/*
1364 * Free the extent starting at agno/bno for length.
1365 */
1366STATIC int /* error */
1367xfs_free_ag_extent(
1368 xfs_trans_t *tp, /* transaction pointer */
1369 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
1370 xfs_agnumber_t agno, /* allocation group number */
1371 xfs_agblock_t bno, /* starting block number */
1372 xfs_extlen_t len, /* length of extent */
1373 int isfl) /* set if is freelist blocks - no sb acctg */
1374{
1375 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1376 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1377 int error; /* error return value */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001378 xfs_agblock_t gtbno; /* start of right neighbor block */
1379 xfs_extlen_t gtlen; /* length of right neighbor block */
1380 int haveleft; /* have a left neighbor block */
1381 int haveright; /* have a right neighbor block */
1382 int i; /* temp, result code */
1383 xfs_agblock_t ltbno; /* start of left neighbor block */
1384 xfs_extlen_t ltlen; /* length of left neighbor block */
1385 xfs_mount_t *mp; /* mount point struct for filesystem */
1386 xfs_agblock_t nbno; /* new starting block of freespace */
1387 xfs_extlen_t nlen; /* new length of freespace */
1388
1389 mp = tp->t_mountp;
1390 /*
1391 * Allocate and initialize a cursor for the by-block btree.
1392 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001393 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 cnt_cur = NULL;
1395 /*
1396 * Look for a neighboring block on the left (lower block numbers)
1397 * that is contiguous with this space.
1398 */
1399 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1400 goto error0;
1401 if (haveleft) {
1402 /*
1403 * There is a block to our left.
1404 */
1405 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1406 goto error0;
1407 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1408 /*
1409 * It's not contiguous, though.
1410 */
1411 if (ltbno + ltlen < bno)
1412 haveleft = 0;
1413 else {
1414 /*
1415 * If this failure happens the request to free this
1416 * space was invalid, it's (partly) already free.
1417 * Very bad.
1418 */
1419 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1420 }
1421 }
1422 /*
1423 * Look for a neighboring block on the right (higher block numbers)
1424 * that is contiguous with this space.
1425 */
Christoph Hellwig637aa502008-10-30 16:55:45 +11001426 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 goto error0;
1428 if (haveright) {
1429 /*
1430 * There is a block to our right.
1431 */
1432 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1433 goto error0;
1434 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1435 /*
1436 * It's not contiguous, though.
1437 */
1438 if (bno + len < gtbno)
1439 haveright = 0;
1440 else {
1441 /*
1442 * If this failure happens the request to free this
1443 * space was invalid, it's (partly) already free.
1444 * Very bad.
1445 */
1446 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1447 }
1448 }
1449 /*
1450 * Now allocate and initialize a cursor for the by-size tree.
1451 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001452 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 /*
1454 * Have both left and right contiguous neighbors.
1455 * Merge all three into a single free block.
1456 */
1457 if (haveleft && haveright) {
1458 /*
1459 * Delete the old by-size entry on the left.
1460 */
1461 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1462 goto error0;
1463 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001464 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001465 goto error0;
1466 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1467 /*
1468 * Delete the old by-size entry on the right.
1469 */
1470 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1471 goto error0;
1472 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001473 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 goto error0;
1475 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1476 /*
1477 * Delete the old by-block entry for the right block.
1478 */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001479 if ((error = xfs_btree_delete(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001480 goto error0;
1481 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1482 /*
1483 * Move the by-block cursor back to the left neighbor.
1484 */
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001485 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486 goto error0;
1487 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1488#ifdef DEBUG
1489 /*
1490 * Check that this is the right record: delete didn't
1491 * mangle the cursor.
1492 */
1493 {
1494 xfs_agblock_t xxbno;
1495 xfs_extlen_t xxlen;
1496
1497 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1498 &i)))
1499 goto error0;
1500 XFS_WANT_CORRUPTED_GOTO(
1501 i == 1 && xxbno == ltbno && xxlen == ltlen,
1502 error0);
1503 }
1504#endif
1505 /*
1506 * Update remaining by-block entry to the new, joined block.
1507 */
1508 nbno = ltbno;
1509 nlen = len + ltlen + gtlen;
1510 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1511 goto error0;
1512 }
1513 /*
1514 * Have only a left contiguous neighbor.
1515 * Merge it together with the new freespace.
1516 */
1517 else if (haveleft) {
1518 /*
1519 * Delete the old by-size entry on the left.
1520 */
1521 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1522 goto error0;
1523 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001524 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001525 goto error0;
1526 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1527 /*
1528 * Back up the by-block cursor to the left neighbor, and
1529 * update its length.
1530 */
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001531 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 goto error0;
1533 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1534 nbno = ltbno;
1535 nlen = len + ltlen;
1536 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1537 goto error0;
1538 }
1539 /*
1540 * Have only a right contiguous neighbor.
1541 * Merge it together with the new freespace.
1542 */
1543 else if (haveright) {
1544 /*
1545 * Delete the old by-size entry on the right.
1546 */
1547 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1548 goto error0;
1549 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001550 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 goto error0;
1552 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1553 /*
1554 * Update the starting block and length of the right
1555 * neighbor in the by-block tree.
1556 */
1557 nbno = bno;
1558 nlen = len + gtlen;
1559 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1560 goto error0;
1561 }
1562 /*
1563 * No contiguous neighbors.
1564 * Insert the new freespace into the by-block tree.
1565 */
1566 else {
1567 nbno = bno;
1568 nlen = len;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001569 if ((error = xfs_btree_insert(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 goto error0;
1571 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1572 }
1573 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1574 bno_cur = NULL;
1575 /*
1576 * In all cases we need to insert the new freespace in the by-size tree.
1577 */
1578 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1579 goto error0;
1580 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001581 if ((error = xfs_btree_insert(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582 goto error0;
1583 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1584 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1585 cnt_cur = NULL;
1586 /*
1587 * Update the freespace totals in the ag and superblock.
1588 */
1589 {
1590 xfs_agf_t *agf;
1591 xfs_perag_t *pag; /* per allocation group data */
1592
Dave Chinnera862e0f2010-01-11 11:47:41 +00001593 pag = xfs_perag_get(mp, agno);
1594 pag->pagf_freeblks += len;
1595 xfs_perag_put(pag);
1596
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 agf = XFS_BUF_TO_AGF(agbp);
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001598 be32_add_cpu(&agf->agf_freeblks, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 xfs_trans_agblocks_delta(tp, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 XFS_WANT_CORRUPTED_GOTO(
Christoph Hellwig16259e72005-11-02 15:11:25 +11001601 be32_to_cpu(agf->agf_freeblks) <=
1602 be32_to_cpu(agf->agf_length),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603 error0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1605 if (!isfl)
1606 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1607 XFS_STATS_INC(xs_freex);
1608 XFS_STATS_ADD(xs_freeb, len);
1609 }
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001610
1611 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612
1613 /*
1614 * Since blocks move to the free list without the coordination
1615 * used in xfs_bmap_finish, we can't allow block to be available
1616 * for reallocation and non-transaction writing (user data)
1617 * until we know that the transaction that moved it to the free
1618 * list is permanently on disk. We track the blocks by declaring
1619 * these blocks as "busy"; the busy list is maintained on a per-ag
1620 * basis and each transaction records which entries should be removed
1621 * when the iclog commits to disk. If a busy block is allocated,
1622 * the iclog is pushed up to the LSN that freed the block.
1623 */
Dave Chinnered3b4d62010-05-21 12:07:08 +10001624 xfs_alloc_busy_insert(tp, agno, bno, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001625 return 0;
1626
1627 error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001628 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001629 if (bno_cur)
1630 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1631 if (cnt_cur)
1632 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1633 return error;
1634}
1635
1636/*
1637 * Visible (exported) allocation/free functions.
1638 * Some of these are used just by xfs_alloc_btree.c and this file.
1639 */
1640
1641/*
1642 * Compute and fill in value of m_ag_maxlevels.
1643 */
1644void
1645xfs_alloc_compute_maxlevels(
1646 xfs_mount_t *mp) /* file system mount structure */
1647{
1648 int level;
1649 uint maxblocks;
1650 uint maxleafents;
1651 int minleafrecs;
1652 int minnoderecs;
1653
1654 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1655 minleafrecs = mp->m_alloc_mnr[0];
1656 minnoderecs = mp->m_alloc_mnr[1];
1657 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1658 for (level = 1; maxblocks > 1; level++)
1659 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1660 mp->m_ag_maxlevels = level;
1661}
1662
1663/*
Dave Chinner6cc87642009-03-16 08:29:46 +01001664 * Find the length of the longest extent in an AG.
1665 */
1666xfs_extlen_t
1667xfs_alloc_longest_free_extent(
1668 struct xfs_mount *mp,
1669 struct xfs_perag *pag)
1670{
1671 xfs_extlen_t need, delta = 0;
1672
1673 need = XFS_MIN_FREELIST_PAG(pag, mp);
1674 if (need > pag->pagf_flcount)
1675 delta = need - pag->pagf_flcount;
1676
1677 if (pag->pagf_longest > delta)
1678 return pag->pagf_longest - delta;
1679 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1680}
1681
1682/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 * Decide whether to use this allocation group for this allocation.
1684 * If so, fix up the btree freelist's size.
1685 */
1686STATIC int /* error */
1687xfs_alloc_fix_freelist(
1688 xfs_alloc_arg_t *args, /* allocation argument structure */
1689 int flags) /* XFS_ALLOC_FLAG_... */
1690{
1691 xfs_buf_t *agbp; /* agf buffer pointer */
1692 xfs_agf_t *agf; /* a.g. freespace structure pointer */
1693 xfs_buf_t *agflbp;/* agfl buffer pointer */
1694 xfs_agblock_t bno; /* freelist block */
1695 xfs_extlen_t delta; /* new blocks needed in freelist */
1696 int error; /* error result code */
1697 xfs_extlen_t longest;/* longest extent in allocation group */
1698 xfs_mount_t *mp; /* file system mount point structure */
1699 xfs_extlen_t need; /* total blocks needed in freelist */
1700 xfs_perag_t *pag; /* per-ag information structure */
1701 xfs_alloc_arg_t targs; /* local allocation arguments */
1702 xfs_trans_t *tp; /* transaction pointer */
1703
1704 mp = args->mp;
1705
1706 pag = args->pag;
1707 tp = args->tp;
1708 if (!pag->pagf_init) {
1709 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1710 &agbp)))
1711 return error;
1712 if (!pag->pagf_init) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001713 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1714 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715 args->agbp = NULL;
1716 return 0;
1717 }
1718 } else
1719 agbp = NULL;
1720
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001721 /*
1722 * If this is a metadata preferred pag and we are user data
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 * then try somewhere else if we are not being asked to
1724 * try harder at this point
1725 */
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001726 if (pag->pagf_metadata && args->userdata &&
1727 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1728 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001729 args->agbp = NULL;
1730 return 0;
1731 }
1732
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001733 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001734 /*
1735 * If it looks like there isn't a long enough extent, or enough
1736 * total blocks, reject it.
1737 */
Dave Chinner6cc87642009-03-16 08:29:46 +01001738 need = XFS_MIN_FREELIST_PAG(pag, mp);
1739 longest = xfs_alloc_longest_free_extent(mp, pag);
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001740 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1741 longest ||
1742 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1743 need - args->total) < (int)args->minleft)) {
1744 if (agbp)
1745 xfs_trans_brelse(tp, agbp);
1746 args->agbp = NULL;
1747 return 0;
1748 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001749 }
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001750
Linus Torvalds1da177e2005-04-16 15:20:36 -07001751 /*
1752 * Get the a.g. freespace buffer.
1753 * Can fail if we're not blocking on locks, and it's held.
1754 */
1755 if (agbp == NULL) {
1756 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1757 &agbp)))
1758 return error;
1759 if (agbp == NULL) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001760 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1761 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001762 args->agbp = NULL;
1763 return 0;
1764 }
1765 }
1766 /*
1767 * Figure out how many blocks we should have in the freelist.
1768 */
1769 agf = XFS_BUF_TO_AGF(agbp);
1770 need = XFS_MIN_FREELIST(agf, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001771 /*
1772 * If there isn't enough total or single-extent, reject it.
1773 */
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001774 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1775 delta = need > be32_to_cpu(agf->agf_flcount) ?
1776 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1777 longest = be32_to_cpu(agf->agf_longest);
1778 longest = (longest > delta) ? (longest - delta) :
1779 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1780 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1781 longest ||
1782 ((int)(be32_to_cpu(agf->agf_freeblks) +
1783 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1784 (int)args->minleft)) {
1785 xfs_trans_brelse(tp, agbp);
1786 args->agbp = NULL;
1787 return 0;
1788 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 }
1790 /*
1791 * Make the freelist shorter if it's too long.
1792 */
Christoph Hellwig16259e72005-11-02 15:11:25 +11001793 while (be32_to_cpu(agf->agf_flcount) > need) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001794 xfs_buf_t *bp;
1795
David Chinner92821e22007-05-24 15:26:31 +10001796 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1797 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001798 return error;
1799 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1800 return error;
1801 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1802 xfs_trans_binval(tp, bp);
1803 }
1804 /*
1805 * Initialize the args structure.
1806 */
1807 targs.tp = tp;
1808 targs.mp = mp;
1809 targs.agbp = agbp;
1810 targs.agno = args->agno;
1811 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1812 targs.minalignslop = 0;
1813 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1814 targs.type = XFS_ALLOCTYPE_THIS_AG;
1815 targs.pag = pag;
1816 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1817 return error;
1818 /*
1819 * Make the freelist longer if it's too short.
1820 */
Christoph Hellwig16259e72005-11-02 15:11:25 +11001821 while (be32_to_cpu(agf->agf_flcount) < need) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 targs.agbno = 0;
Christoph Hellwig16259e72005-11-02 15:11:25 +11001823 targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824 /*
1825 * Allocate as many blocks as possible at once.
1826 */
Nathan Scotte63a3692006-05-08 19:51:58 +10001827 if ((error = xfs_alloc_ag_vextent(&targs))) {
1828 xfs_trans_brelse(tp, agflbp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829 return error;
Nathan Scotte63a3692006-05-08 19:51:58 +10001830 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001831 /*
1832 * Stop if we run out. Won't happen if callers are obeying
1833 * the restrictions correctly. Can happen for free calls
1834 * on a completely full ag.
1835 */
Yingping Lud210a282006-06-09 14:55:18 +10001836 if (targs.agbno == NULLAGBLOCK) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001837 if (flags & XFS_ALLOC_FLAG_FREEING)
1838 break;
1839 xfs_trans_brelse(tp, agflbp);
1840 args->agbp = NULL;
1841 return 0;
Yingping Lud210a282006-06-09 14:55:18 +10001842 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843 /*
1844 * Put each allocated block on the list.
1845 */
1846 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
David Chinner92821e22007-05-24 15:26:31 +10001847 error = xfs_alloc_put_freelist(tp, agbp,
1848 agflbp, bno, 0);
1849 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001850 return error;
1851 }
1852 }
Nathan Scotte63a3692006-05-08 19:51:58 +10001853 xfs_trans_brelse(tp, agflbp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854 args->agbp = agbp;
1855 return 0;
1856}
1857
1858/*
1859 * Get a block from the freelist.
1860 * Returns with the buffer for the block gotten.
1861 */
1862int /* error */
1863xfs_alloc_get_freelist(
1864 xfs_trans_t *tp, /* transaction pointer */
1865 xfs_buf_t *agbp, /* buffer containing the agf structure */
David Chinner92821e22007-05-24 15:26:31 +10001866 xfs_agblock_t *bnop, /* block address retrieved from freelist */
1867 int btreeblk) /* destination is a AGF btree */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001868{
1869 xfs_agf_t *agf; /* a.g. freespace structure */
1870 xfs_agfl_t *agfl; /* a.g. freelist structure */
1871 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1872 xfs_agblock_t bno; /* block number returned */
1873 int error;
David Chinner92821e22007-05-24 15:26:31 +10001874 int logflags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 xfs_mount_t *mp; /* mount structure */
1876 xfs_perag_t *pag; /* per allocation group data */
1877
1878 agf = XFS_BUF_TO_AGF(agbp);
1879 /*
1880 * Freelist is empty, give up.
1881 */
1882 if (!agf->agf_flcount) {
1883 *bnop = NULLAGBLOCK;
1884 return 0;
1885 }
1886 /*
1887 * Read the array of free blocks.
1888 */
1889 mp = tp->t_mountp;
1890 if ((error = xfs_alloc_read_agfl(mp, tp,
Christoph Hellwig16259e72005-11-02 15:11:25 +11001891 be32_to_cpu(agf->agf_seqno), &agflbp)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001892 return error;
1893 agfl = XFS_BUF_TO_AGFL(agflbp);
1894 /*
1895 * Get the block number and update the data structures.
1896 */
Christoph Hellwige2101002006-09-28 10:56:51 +10001897 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001898 be32_add_cpu(&agf->agf_flfirst, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 xfs_trans_brelse(tp, agflbp);
Christoph Hellwig16259e72005-11-02 15:11:25 +11001900 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 agf->agf_flfirst = 0;
Dave Chinnera862e0f2010-01-11 11:47:41 +00001902
1903 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001904 be32_add_cpu(&agf->agf_flcount, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905 xfs_trans_agflist_delta(tp, -1);
1906 pag->pagf_flcount--;
Dave Chinnera862e0f2010-01-11 11:47:41 +00001907 xfs_perag_put(pag);
David Chinner92821e22007-05-24 15:26:31 +10001908
1909 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
1910 if (btreeblk) {
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001911 be32_add_cpu(&agf->agf_btreeblks, 1);
David Chinner92821e22007-05-24 15:26:31 +10001912 pag->pagf_btreeblks++;
1913 logflags |= XFS_AGF_BTREEBLKS;
1914 }
1915
David Chinner92821e22007-05-24 15:26:31 +10001916 xfs_alloc_log_agf(tp, agbp, logflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917 *bnop = bno;
1918
1919 /*
Dave Chinnered3b4d62010-05-21 12:07:08 +10001920 * As blocks are freed, they are added to the per-ag busy list and
1921 * remain there until the freeing transaction is committed to disk.
1922 * Now that we have allocated blocks, this list must be searched to see
1923 * if a block is being reused. If one is, then the freeing transaction
1924 * must be pushed to disk before this transaction.
1925 *
1926 * We do this by setting the current transaction to a sync transaction
1927 * which guarantees that the freeing transaction is on disk before this
1928 * transaction. This is done instead of a synchronous log force here so
1929 * that we don't sit and wait with the AGF locked in the transaction
1930 * during the log force.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 */
Dave Chinnered3b4d62010-05-21 12:07:08 +10001932 if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
1933 xfs_trans_set_sync(tp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 return 0;
1935}
1936
1937/*
1938 * Log the given fields from the agf structure.
1939 */
1940void
1941xfs_alloc_log_agf(
1942 xfs_trans_t *tp, /* transaction pointer */
1943 xfs_buf_t *bp, /* buffer for a.g. freelist header */
1944 int fields) /* mask of fields to be logged (XFS_AGF_...) */
1945{
1946 int first; /* first byte offset */
1947 int last; /* last byte offset */
1948 static const short offsets[] = {
1949 offsetof(xfs_agf_t, agf_magicnum),
1950 offsetof(xfs_agf_t, agf_versionnum),
1951 offsetof(xfs_agf_t, agf_seqno),
1952 offsetof(xfs_agf_t, agf_length),
1953 offsetof(xfs_agf_t, agf_roots[0]),
1954 offsetof(xfs_agf_t, agf_levels[0]),
1955 offsetof(xfs_agf_t, agf_flfirst),
1956 offsetof(xfs_agf_t, agf_fllast),
1957 offsetof(xfs_agf_t, agf_flcount),
1958 offsetof(xfs_agf_t, agf_freeblks),
1959 offsetof(xfs_agf_t, agf_longest),
David Chinner92821e22007-05-24 15:26:31 +10001960 offsetof(xfs_agf_t, agf_btreeblks),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 sizeof(xfs_agf_t)
1962 };
1963
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001964 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
1965
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
1967 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
1968}
1969
1970/*
1971 * Interface for inode allocation to force the pag data to be initialized.
1972 */
1973int /* error */
1974xfs_alloc_pagf_init(
1975 xfs_mount_t *mp, /* file system mount structure */
1976 xfs_trans_t *tp, /* transaction pointer */
1977 xfs_agnumber_t agno, /* allocation group number */
1978 int flags) /* XFS_ALLOC_FLAGS_... */
1979{
1980 xfs_buf_t *bp;
1981 int error;
1982
1983 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
1984 return error;
1985 if (bp)
1986 xfs_trans_brelse(tp, bp);
1987 return 0;
1988}
1989
1990/*
1991 * Put the block on the freelist for the allocation group.
1992 */
1993int /* error */
1994xfs_alloc_put_freelist(
1995 xfs_trans_t *tp, /* transaction pointer */
1996 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
1997 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
David Chinner92821e22007-05-24 15:26:31 +10001998 xfs_agblock_t bno, /* block being freed */
1999 int btreeblk) /* block came from a AGF btree */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002000{
2001 xfs_agf_t *agf; /* a.g. freespace structure */
2002 xfs_agfl_t *agfl; /* a.g. free block array */
Christoph Hellwige2101002006-09-28 10:56:51 +10002003 __be32 *blockp;/* pointer to array entry */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004 int error;
David Chinner92821e22007-05-24 15:26:31 +10002005 int logflags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 xfs_mount_t *mp; /* mount structure */
2007 xfs_perag_t *pag; /* per allocation group data */
2008
2009 agf = XFS_BUF_TO_AGF(agbp);
2010 mp = tp->t_mountp;
2011
2012 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
Christoph Hellwig16259e72005-11-02 15:11:25 +11002013 be32_to_cpu(agf->agf_seqno), &agflbp)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014 return error;
2015 agfl = XFS_BUF_TO_AGFL(agflbp);
Marcin Slusarz413d57c2008-02-13 15:03:29 -08002016 be32_add_cpu(&agf->agf_fllast, 1);
Christoph Hellwig16259e72005-11-02 15:11:25 +11002017 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002018 agf->agf_fllast = 0;
Dave Chinnera862e0f2010-01-11 11:47:41 +00002019
2020 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
Marcin Slusarz413d57c2008-02-13 15:03:29 -08002021 be32_add_cpu(&agf->agf_flcount, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 xfs_trans_agflist_delta(tp, 1);
2023 pag->pagf_flcount++;
David Chinner92821e22007-05-24 15:26:31 +10002024
2025 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2026 if (btreeblk) {
Marcin Slusarz413d57c2008-02-13 15:03:29 -08002027 be32_add_cpu(&agf->agf_btreeblks, -1);
David Chinner92821e22007-05-24 15:26:31 +10002028 pag->pagf_btreeblks--;
2029 logflags |= XFS_AGF_BTREEBLKS;
2030 }
Dave Chinnera862e0f2010-01-11 11:47:41 +00002031 xfs_perag_put(pag);
David Chinner92821e22007-05-24 15:26:31 +10002032
David Chinner92821e22007-05-24 15:26:31 +10002033 xfs_alloc_log_agf(tp, agbp, logflags);
2034
Christoph Hellwig16259e72005-11-02 15:11:25 +11002035 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2036 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
Christoph Hellwige2101002006-09-28 10:56:51 +10002037 *blockp = cpu_to_be32(bno);
David Chinner92821e22007-05-24 15:26:31 +10002038 xfs_alloc_log_agf(tp, agbp, logflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039 xfs_trans_log_buf(tp, agflbp,
2040 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2041 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2042 sizeof(xfs_agblock_t) - 1));
2043 return 0;
2044}
2045
2046/*
2047 * Read in the allocation group header (free/alloc section).
2048 */
2049int /* error */
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002050xfs_read_agf(
2051 struct xfs_mount *mp, /* mount point structure */
2052 struct xfs_trans *tp, /* transaction pointer */
2053 xfs_agnumber_t agno, /* allocation group number */
2054 int flags, /* XFS_BUF_ */
2055 struct xfs_buf **bpp) /* buffer for the ag freelist header */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056{
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002057 struct xfs_agf *agf; /* ag freelist header */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 int agf_ok; /* set if agf is consistent */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 int error;
2060
2061 ASSERT(agno != NULLAGNUMBER);
2062 error = xfs_trans_read_buf(
2063 mp, tp, mp->m_ddev_targp,
2064 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002065 XFS_FSS_TO_BB(mp, 1), flags, bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066 if (error)
2067 return error;
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002068 if (!*bpp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069 return 0;
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002070
2071 ASSERT(!XFS_BUF_GETERROR(*bpp));
2072 agf = XFS_BUF_TO_AGF(*bpp);
2073
Linus Torvalds1da177e2005-04-16 15:20:36 -07002074 /*
2075 * Validate the magic number of the agf block.
2076 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077 agf_ok =
Christoph Hellwig16259e72005-11-02 15:11:25 +11002078 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
2079 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2080 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2081 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2082 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002083 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
2084 be32_to_cpu(agf->agf_seqno) == agno;
Barry Naujok89b28392008-10-30 17:05:49 +11002085 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2086 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2087 be32_to_cpu(agf->agf_length);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2089 XFS_RANDOM_ALLOC_READ_AGF))) {
2090 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2091 XFS_ERRLEVEL_LOW, mp, agf);
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002092 xfs_trans_brelse(tp, *bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093 return XFS_ERROR(EFSCORRUPTED);
2094 }
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002095 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
2096 return 0;
2097}
2098
2099/*
2100 * Read in the allocation group header (free/alloc section).
2101 */
2102int /* error */
2103xfs_alloc_read_agf(
2104 struct xfs_mount *mp, /* mount point structure */
2105 struct xfs_trans *tp, /* transaction pointer */
2106 xfs_agnumber_t agno, /* allocation group number */
2107 int flags, /* XFS_ALLOC_FLAG_... */
2108 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2109{
2110 struct xfs_agf *agf; /* ag freelist header */
2111 struct xfs_perag *pag; /* per allocation group data */
2112 int error;
2113
2114 ASSERT(agno != NULLAGNUMBER);
2115
2116 error = xfs_read_agf(mp, tp, agno,
Christoph Hellwig0cadda12010-01-19 09:56:44 +00002117 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002118 bpp);
2119 if (error)
2120 return error;
2121 if (!*bpp)
2122 return 0;
2123 ASSERT(!XFS_BUF_GETERROR(*bpp));
2124
2125 agf = XFS_BUF_TO_AGF(*bpp);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002126 pag = xfs_perag_get(mp, agno);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 if (!pag->pagf_init) {
Christoph Hellwig16259e72005-11-02 15:11:25 +11002128 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
David Chinner92821e22007-05-24 15:26:31 +10002129 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
Christoph Hellwig16259e72005-11-02 15:11:25 +11002130 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2131 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002132 pag->pagf_levels[XFS_BTNUM_BNOi] =
Christoph Hellwig16259e72005-11-02 15:11:25 +11002133 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002134 pag->pagf_levels[XFS_BTNUM_CNTi] =
Christoph Hellwig16259e72005-11-02 15:11:25 +11002135 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
Eric Sandeen007c61c2007-10-11 17:43:56 +10002136 spin_lock_init(&pag->pagb_lock);
Dave Chinnere57336f2010-01-11 11:47:49 +00002137 pag->pagb_count = 0;
Dave Chinnered3b4d62010-05-21 12:07:08 +10002138 pag->pagb_tree = RB_ROOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002139 pag->pagf_init = 1;
2140 }
2141#ifdef DEBUG
2142 else if (!XFS_FORCED_SHUTDOWN(mp)) {
Christoph Hellwig16259e72005-11-02 15:11:25 +11002143 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
Barry Naujok89b28392008-10-30 17:05:49 +11002144 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
Christoph Hellwig16259e72005-11-02 15:11:25 +11002145 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2146 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002147 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
Christoph Hellwig16259e72005-11-02 15:11:25 +11002148 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002149 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
Christoph Hellwig16259e72005-11-02 15:11:25 +11002150 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 }
2152#endif
Dave Chinnera862e0f2010-01-11 11:47:41 +00002153 xfs_perag_put(pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 return 0;
2155}
2156
2157/*
2158 * Allocate an extent (variable-size).
2159 * Depending on the allocation type, we either look in a single allocation
2160 * group or loop over the allocation groups to find the result.
2161 */
2162int /* error */
2163xfs_alloc_vextent(
2164 xfs_alloc_arg_t *args) /* allocation argument structure */
2165{
2166 xfs_agblock_t agsize; /* allocation group size */
2167 int error;
2168 int flags; /* XFS_ALLOC_FLAG_... locking flags */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002169 xfs_extlen_t minleft;/* minimum left value, temp copy */
2170 xfs_mount_t *mp; /* mount structure pointer */
2171 xfs_agnumber_t sagno; /* starting allocation group number */
2172 xfs_alloctype_t type; /* input allocation type */
2173 int bump_rotor = 0;
2174 int no_min = 0;
2175 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2176
2177 mp = args->mp;
2178 type = args->otype = args->type;
2179 args->agbno = NULLAGBLOCK;
2180 /*
2181 * Just fix this up, for the case where the last a.g. is shorter
2182 * (or there's only one a.g.) and the caller couldn't easily figure
2183 * that out (xfs_bmap_alloc).
2184 */
2185 agsize = mp->m_sb.sb_agblocks;
2186 if (args->maxlen > agsize)
2187 args->maxlen = agsize;
2188 if (args->alignment == 0)
2189 args->alignment = 1;
2190 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2191 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2192 ASSERT(args->minlen <= args->maxlen);
2193 ASSERT(args->minlen <= agsize);
2194 ASSERT(args->mod < args->prod);
2195 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2196 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2197 args->minlen > args->maxlen || args->minlen > agsize ||
2198 args->mod >= args->prod) {
2199 args->fsbno = NULLFSBLOCK;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002200 trace_xfs_alloc_vextent_badargs(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002201 return 0;
2202 }
2203 minleft = args->minleft;
2204
2205 switch (type) {
2206 case XFS_ALLOCTYPE_THIS_AG:
2207 case XFS_ALLOCTYPE_NEAR_BNO:
2208 case XFS_ALLOCTYPE_THIS_BNO:
2209 /*
2210 * These three force us into a single a.g.
2211 */
2212 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002213 args->pag = xfs_perag_get(mp, args->agno);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002214 args->minleft = 0;
2215 error = xfs_alloc_fix_freelist(args, 0);
2216 args->minleft = minleft;
2217 if (error) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002218 trace_xfs_alloc_vextent_nofix(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002219 goto error0;
2220 }
2221 if (!args->agbp) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002222 trace_xfs_alloc_vextent_noagbp(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223 break;
2224 }
2225 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2226 if ((error = xfs_alloc_ag_vextent(args)))
2227 goto error0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 break;
2229 case XFS_ALLOCTYPE_START_BNO:
2230 /*
2231 * Try near allocation first, then anywhere-in-ag after
2232 * the first a.g. fails.
2233 */
2234 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2235 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2236 args->fsbno = XFS_AGB_TO_FSB(mp,
2237 ((mp->m_agfrotor / rotorstep) %
2238 mp->m_sb.sb_agcount), 0);
2239 bump_rotor = 1;
2240 }
2241 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2242 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2243 /* FALLTHROUGH */
2244 case XFS_ALLOCTYPE_ANY_AG:
2245 case XFS_ALLOCTYPE_START_AG:
2246 case XFS_ALLOCTYPE_FIRST_AG:
2247 /*
2248 * Rotate through the allocation groups looking for a winner.
2249 */
2250 if (type == XFS_ALLOCTYPE_ANY_AG) {
2251 /*
2252 * Start with the last place we left off.
2253 */
2254 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2255 mp->m_sb.sb_agcount;
2256 args->type = XFS_ALLOCTYPE_THIS_AG;
2257 flags = XFS_ALLOC_FLAG_TRYLOCK;
2258 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2259 /*
2260 * Start with allocation group given by bno.
2261 */
2262 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2263 args->type = XFS_ALLOCTYPE_THIS_AG;
2264 sagno = 0;
2265 flags = 0;
2266 } else {
2267 if (type == XFS_ALLOCTYPE_START_AG)
2268 args->type = XFS_ALLOCTYPE_THIS_AG;
2269 /*
2270 * Start with the given allocation group.
2271 */
2272 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2273 flags = XFS_ALLOC_FLAG_TRYLOCK;
2274 }
2275 /*
2276 * Loop over allocation groups twice; first time with
2277 * trylock set, second time without.
2278 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 for (;;) {
Dave Chinnera862e0f2010-01-11 11:47:41 +00002280 args->pag = xfs_perag_get(mp, args->agno);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002281 if (no_min) args->minleft = 0;
2282 error = xfs_alloc_fix_freelist(args, flags);
2283 args->minleft = minleft;
2284 if (error) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002285 trace_xfs_alloc_vextent_nofix(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 goto error0;
2287 }
2288 /*
2289 * If we get a buffer back then the allocation will fly.
2290 */
2291 if (args->agbp) {
2292 if ((error = xfs_alloc_ag_vextent(args)))
2293 goto error0;
2294 break;
2295 }
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002296
2297 trace_xfs_alloc_vextent_loopfailed(args);
2298
Linus Torvalds1da177e2005-04-16 15:20:36 -07002299 /*
2300 * Didn't work, figure out the next iteration.
2301 */
2302 if (args->agno == sagno &&
2303 type == XFS_ALLOCTYPE_START_BNO)
2304 args->type = XFS_ALLOCTYPE_THIS_AG;
Yingping Lud210a282006-06-09 14:55:18 +10002305 /*
2306 * For the first allocation, we can try any AG to get
2307 * space. However, if we already have allocated a
2308 * block, we don't want to try AGs whose number is below
2309 * sagno. Otherwise, we may end up with out-of-order
2310 * locking of AGF, which might cause deadlock.
2311 */
2312 if (++(args->agno) == mp->m_sb.sb_agcount) {
2313 if (args->firstblock != NULLFSBLOCK)
2314 args->agno = sagno;
2315 else
2316 args->agno = 0;
2317 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 /*
2319 * Reached the starting a.g., must either be done
2320 * or switch to non-trylock mode.
2321 */
2322 if (args->agno == sagno) {
2323 if (no_min == 1) {
2324 args->agbno = NULLAGBLOCK;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002325 trace_xfs_alloc_vextent_allfailed(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002326 break;
2327 }
2328 if (flags == 0) {
2329 no_min = 1;
2330 } else {
2331 flags = 0;
2332 if (type == XFS_ALLOCTYPE_START_BNO) {
2333 args->agbno = XFS_FSB_TO_AGBNO(mp,
2334 args->fsbno);
2335 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2336 }
2337 }
2338 }
Dave Chinnera862e0f2010-01-11 11:47:41 +00002339 xfs_perag_put(args->pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2342 if (args->agno == sagno)
2343 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2344 (mp->m_sb.sb_agcount * rotorstep);
2345 else
2346 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2347 (mp->m_sb.sb_agcount * rotorstep);
2348 }
2349 break;
2350 default:
2351 ASSERT(0);
2352 /* NOTREACHED */
2353 }
2354 if (args->agbno == NULLAGBLOCK)
2355 args->fsbno = NULLFSBLOCK;
2356 else {
2357 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2358#ifdef DEBUG
2359 ASSERT(args->len >= args->minlen);
2360 ASSERT(args->len <= args->maxlen);
2361 ASSERT(args->agbno % args->alignment == 0);
2362 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2363 args->len);
2364#endif
2365 }
Dave Chinnera862e0f2010-01-11 11:47:41 +00002366 xfs_perag_put(args->pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 return 0;
2368error0:
Dave Chinnera862e0f2010-01-11 11:47:41 +00002369 xfs_perag_put(args->pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370 return error;
2371}
2372
2373/*
2374 * Free an extent.
2375 * Just break up the extent address and hand off to xfs_free_ag_extent
2376 * after fixing up the freelist.
2377 */
2378int /* error */
2379xfs_free_extent(
2380 xfs_trans_t *tp, /* transaction pointer */
2381 xfs_fsblock_t bno, /* starting block number of extent */
2382 xfs_extlen_t len) /* length of extent */
2383{
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002384 xfs_alloc_arg_t args;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002385 int error;
2386
2387 ASSERT(len != 0);
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002388 memset(&args, 0, sizeof(xfs_alloc_arg_t));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389 args.tp = tp;
2390 args.mp = tp->t_mountp;
2391 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2392 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2393 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002394 args.pag = xfs_perag_get(args.mp, args.agno);
Yingping Lud210a282006-06-09 14:55:18 +10002395 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002396 goto error0;
2397#ifdef DEBUG
2398 ASSERT(args.agbp != NULL);
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002399 ASSERT((args.agbno + len) <=
2400 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401#endif
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002402 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403error0:
Dave Chinnera862e0f2010-01-11 11:47:41 +00002404 xfs_perag_put(args.pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002405 return error;
2406}
2407
2408
2409/*
2410 * AG Busy list management
2411 * The busy list contains block ranges that have been freed but whose
Nathan Scottc41564b2006-03-29 08:55:14 +10002412 * transactions have not yet hit disk. If any block listed in a busy
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 * list is reused, the transaction that freed it must be forced to disk
2414 * before continuing to use the block.
2415 *
Dave Chinnered3b4d62010-05-21 12:07:08 +10002416 * xfs_alloc_busy_insert - add to the per-ag busy list
2417 * xfs_alloc_busy_clear - remove an item from the per-ag busy list
2418 * xfs_alloc_busy_search - search for a busy extent
2419 */
2420
2421/*
2422 * Insert a new extent into the busy tree.
2423 *
2424 * The busy extent tree is indexed by the start block of the busy extent.
2425 * there can be multiple overlapping ranges in the busy extent tree but only
2426 * ever one entry at a given start block. The reason for this is that
2427 * multi-block extents can be freed, then smaller chunks of that extent
2428 * allocated and freed again before the first transaction commit is on disk.
2429 * If the exact same start block is freed a second time, we have to wait for
2430 * that busy extent to pass out of the tree before the new extent is inserted.
2431 * There are two main cases we have to handle here.
2432 *
2433 * The first case is a transaction that triggers a "free - allocate - free"
2434 * cycle. This can occur during btree manipulations as a btree block is freed
2435 * to the freelist, then allocated from the free list, then freed again. In
2436 * this case, the second extxpnet free is what triggers the duplicate and as
2437 * such the transaction IDs should match. Because the extent was allocated in
2438 * this transaction, the transaction must be marked as synchronous. This is
2439 * true for all cases where the free/alloc/free occurs in the one transaction,
2440 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
2441 * This serves to catch violations of the second case quite effectively.
2442 *
2443 * The second case is where the free/alloc/free occur in different
2444 * transactions. In this case, the thread freeing the extent the second time
2445 * can't mark the extent busy immediately because it is already tracked in a
2446 * transaction that may be committing. When the log commit for the existing
2447 * busy extent completes, the busy extent will be removed from the tree. If we
2448 * allow the second busy insert to continue using that busy extent structure,
2449 * it can be freed before this transaction is safely in the log. Hence our
2450 * only option in this case is to force the log to remove the existing busy
2451 * extent from the list before we insert the new one with the current
2452 * transaction ID.
2453 *
2454 * The problem we are trying to avoid in the free-alloc-free in separate
2455 * transactions is most easily described with a timeline:
2456 *
2457 * Thread 1 Thread 2 Thread 3 xfslogd
2458 * xact alloc
2459 * free X
2460 * mark busy
2461 * commit xact
2462 * free xact
2463 * xact alloc
2464 * alloc X
2465 * busy search
2466 * mark xact sync
2467 * commit xact
2468 * free xact
2469 * force log
2470 * checkpoint starts
2471 * ....
2472 * xact alloc
2473 * free X
2474 * mark busy
2475 * finds match
2476 * *** KABOOM! ***
2477 * ....
2478 * log IO completes
2479 * unbusy X
2480 * checkpoint completes
2481 *
2482 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
2483 * the checkpoint completes, and the busy extent it matched will have been
2484 * removed from the tree when it is woken. Hence it can then continue safely.
2485 *
2486 * However, to ensure this matching process is robust, we need to use the
2487 * transaction ID for identifying transaction, as delayed logging results in
2488 * the busy extent and transaction lifecycles being different. i.e. the busy
2489 * extent is active for a lot longer than the transaction. Hence the
2490 * transaction structure can be freed and reallocated, then mark the same
2491 * extent busy again in the new transaction. In this case the new transaction
2492 * will have a different tid but can have the same address, and hence we need
2493 * to check against the tid.
2494 *
2495 * Future: for delayed logging, we could avoid the log force if the extent was
2496 * first freed in the current checkpoint sequence. This, however, requires the
2497 * ability to pin the current checkpoint in memory until this transaction
2498 * commits to ensure that both the original free and the current one combine
2499 * logically into the one checkpoint. If the checkpoint sequences are
2500 * different, however, we still need to wait on a log force.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 */
2502void
Dave Chinnered3b4d62010-05-21 12:07:08 +10002503xfs_alloc_busy_insert(
2504 struct xfs_trans *tp,
2505 xfs_agnumber_t agno,
2506 xfs_agblock_t bno,
2507 xfs_extlen_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002508{
Dave Chinnered3b4d62010-05-21 12:07:08 +10002509 struct xfs_busy_extent *new;
2510 struct xfs_busy_extent *busyp;
Dave Chinnera862e0f2010-01-11 11:47:41 +00002511 struct xfs_perag *pag;
Dave Chinnered3b4d62010-05-21 12:07:08 +10002512 struct rb_node **rbp;
2513 struct rb_node *parent;
2514 int match;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515
Dave Chinnered3b4d62010-05-21 12:07:08 +10002516
2517 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2518 if (!new) {
2519 /*
2520 * No Memory! Since it is now not possible to track the free
2521 * block, make this a synchronous transaction to insure that
2522 * the block is not reused before this transaction commits.
2523 */
2524 trace_xfs_alloc_busy(tp, agno, bno, len, 1);
2525 xfs_trans_set_sync(tp);
2526 return;
2527 }
2528
2529 new->agno = agno;
2530 new->bno = bno;
2531 new->length = len;
2532 new->tid = xfs_log_get_trans_ident(tp);
2533
2534 INIT_LIST_HEAD(&new->list);
2535
2536 /* trace before insert to be able to see failed inserts */
2537 trace_xfs_alloc_busy(tp, agno, bno, len, 0);
2538
2539 pag = xfs_perag_get(tp->t_mountp, new->agno);
2540restart:
Dave Chinnera862e0f2010-01-11 11:47:41 +00002541 spin_lock(&pag->pagb_lock);
Dave Chinnered3b4d62010-05-21 12:07:08 +10002542 rbp = &pag->pagb_tree.rb_node;
2543 parent = NULL;
2544 busyp = NULL;
2545 match = 0;
2546 while (*rbp && match >= 0) {
2547 parent = *rbp;
2548 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549
Dave Chinnered3b4d62010-05-21 12:07:08 +10002550 if (new->bno < busyp->bno) {
2551 /* may overlap, but exact start block is lower */
2552 rbp = &(*rbp)->rb_left;
2553 if (new->bno + new->length > busyp->bno)
2554 match = busyp->tid == new->tid ? 1 : -1;
2555 } else if (new->bno > busyp->bno) {
2556 /* may overlap, but exact start block is higher */
2557 rbp = &(*rbp)->rb_right;
2558 if (bno < busyp->bno + busyp->length)
2559 match = busyp->tid == new->tid ? 1 : -1;
2560 } else {
2561 match = busyp->tid == new->tid ? 1 : -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002562 break;
2563 }
2564 }
Dave Chinnered3b4d62010-05-21 12:07:08 +10002565 if (match < 0) {
2566 /* overlap marked busy in different transaction */
2567 spin_unlock(&pag->pagb_lock);
2568 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
2569 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570 }
Dave Chinnered3b4d62010-05-21 12:07:08 +10002571 if (match > 0) {
2572 /*
2573 * overlap marked busy in same transaction. Update if exact
2574 * start block match, otherwise combine the busy extents into
2575 * a single range.
2576 */
2577 if (busyp->bno == new->bno) {
2578 busyp->length = max(busyp->length, new->length);
2579 spin_unlock(&pag->pagb_lock);
2580 ASSERT(tp->t_flags & XFS_TRANS_SYNC);
2581 xfs_perag_put(pag);
2582 kmem_free(new);
2583 return;
2584 }
2585 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2586 new->length = max(busyp->bno + busyp->length,
2587 new->bno + new->length) -
2588 min(busyp->bno, new->bno);
2589 new->bno = min(busyp->bno, new->bno);
2590 } else
2591 busyp = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592
Dave Chinnered3b4d62010-05-21 12:07:08 +10002593 rb_link_node(&new->rb_node, parent, rbp);
2594 rb_insert_color(&new->rb_node, &pag->pagb_tree);
2595
2596 list_add(&new->list, &tp->t_busy);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002597 spin_unlock(&pag->pagb_lock);
2598 xfs_perag_put(pag);
Dave Chinnered3b4d62010-05-21 12:07:08 +10002599 kmem_free(busyp);
2600}
2601
2602/*
2603 * Search for a busy extent within the range of the extent we are about to
2604 * allocate. You need to be holding the busy extent tree lock when calling
2605 * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
2606 * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
2607 * match. This is done so that a non-zero return indicates an overlap that
2608 * will require a synchronous transaction, but it can still be
2609 * used to distinguish between a partial or exact match.
2610 */
Christoph Hellwiga46db602011-01-07 13:02:04 +00002611int
Dave Chinnered3b4d62010-05-21 12:07:08 +10002612xfs_alloc_busy_search(
2613 struct xfs_mount *mp,
2614 xfs_agnumber_t agno,
2615 xfs_agblock_t bno,
2616 xfs_extlen_t len)
2617{
2618 struct xfs_perag *pag;
2619 struct rb_node *rbp;
2620 struct xfs_busy_extent *busyp;
2621 int match = 0;
2622
2623 pag = xfs_perag_get(mp, agno);
2624 spin_lock(&pag->pagb_lock);
2625
2626 rbp = pag->pagb_tree.rb_node;
2627
2628 /* find closest start bno overlap */
2629 while (rbp) {
2630 busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node);
2631 if (bno < busyp->bno) {
2632 /* may overlap, but exact start block is lower */
2633 if (bno + len > busyp->bno)
2634 match = -1;
2635 rbp = rbp->rb_left;
2636 } else if (bno > busyp->bno) {
2637 /* may overlap, but exact start block is higher */
2638 if (bno < busyp->bno + busyp->length)
2639 match = -1;
2640 rbp = rbp->rb_right;
2641 } else {
2642 /* bno matches busyp, length determines exact match */
2643 match = (busyp->length == len) ? 1 : -1;
2644 break;
2645 }
2646 }
2647 spin_unlock(&pag->pagb_lock);
2648 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
2649 xfs_perag_put(pag);
2650 return match;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002651}
2652
2653void
Dave Chinnered3b4d62010-05-21 12:07:08 +10002654xfs_alloc_busy_clear(
2655 struct xfs_mount *mp,
2656 struct xfs_busy_extent *busyp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002657{
Dave Chinnera862e0f2010-01-11 11:47:41 +00002658 struct xfs_perag *pag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002659
Dave Chinnered3b4d62010-05-21 12:07:08 +10002660 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
2661 busyp->length);
2662
2663 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
2664 busyp->length) == 1);
2665
2666 list_del_init(&busyp->list);
2667
2668 pag = xfs_perag_get(mp, busyp->agno);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002669 spin_lock(&pag->pagb_lock);
Dave Chinnered3b4d62010-05-21 12:07:08 +10002670 rb_erase(&busyp->rb_node, &pag->pagb_tree);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002671 spin_unlock(&pag->pagb_lock);
2672 xfs_perag_put(pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002673
Dave Chinnered3b4d62010-05-21 12:07:08 +10002674 kmem_free(busyp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002675}