blob: a7fbe8a99b12bf2c360ea19372a380ae5d27c855 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott7b718762005-11-02 14:58:39 +11002 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +110019#include "xfs_fs.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include "xfs_types.h"
Nathan Scotta844f452005-11-02 14:38:42 +110021#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include "xfs_log.h"
Nathan Scotta844f452005-11-02 14:38:42 +110023#include "xfs_inum.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
Nathan Scotta844f452005-11-02 14:38:42 +110027#include "xfs_dir2.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070028#include "xfs_dmapi.h"
29#include "xfs_mount.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include "xfs_bmap_btree.h"
Nathan Scotta844f452005-11-02 14:38:42 +110031#include "xfs_alloc_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include "xfs_ialloc_btree.h"
Nathan Scotta844f452005-11-02 14:38:42 +110033#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h"
36#include "xfs_inode.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070037#include "xfs_btree.h"
38#include "xfs_ialloc.h"
39#include "xfs_alloc.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070040#include "xfs_error.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000041#include "xfs_trace.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070042
43
44#define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
45
46#define XFSA_FIXUP_BNO_OK 1
47#define XFSA_FIXUP_CNT_OK 2
48
Dave Chinnered3b4d62010-05-21 12:07:08 +100049static int
50xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno,
51 xfs_agblock_t bno, xfs_extlen_t len);
Linus Torvalds1da177e2005-04-16 15:20:36 -070052
Linus Torvalds1da177e2005-04-16 15:20:36 -070053/*
54 * Prototypes for per-ag allocation routines
55 */
56
57STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *);
58STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *);
59STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *);
60STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *,
61 xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *);
62
63/*
64 * Internal functions.
65 */
66
67/*
Christoph Hellwigfe033cc2008-10-30 16:56:09 +110068 * Lookup the record equal to [bno, len] in the btree given by cur.
69 */
70STATIC int /* error */
71xfs_alloc_lookup_eq(
72 struct xfs_btree_cur *cur, /* btree cursor */
73 xfs_agblock_t bno, /* starting block of extent */
74 xfs_extlen_t len, /* length of extent */
75 int *stat) /* success/failure */
76{
77 cur->bc_rec.a.ar_startblock = bno;
78 cur->bc_rec.a.ar_blockcount = len;
79 return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
80}
81
82/*
83 * Lookup the first record greater than or equal to [bno, len]
84 * in the btree given by cur.
85 */
86STATIC int /* error */
87xfs_alloc_lookup_ge(
88 struct xfs_btree_cur *cur, /* btree cursor */
89 xfs_agblock_t bno, /* starting block of extent */
90 xfs_extlen_t len, /* length of extent */
91 int *stat) /* success/failure */
92{
93 cur->bc_rec.a.ar_startblock = bno;
94 cur->bc_rec.a.ar_blockcount = len;
95 return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
96}
97
98/*
99 * Lookup the first record less than or equal to [bno, len]
100 * in the btree given by cur.
101 */
102STATIC int /* error */
103xfs_alloc_lookup_le(
104 struct xfs_btree_cur *cur, /* btree cursor */
105 xfs_agblock_t bno, /* starting block of extent */
106 xfs_extlen_t len, /* length of extent */
107 int *stat) /* success/failure */
108{
109 cur->bc_rec.a.ar_startblock = bno;
110 cur->bc_rec.a.ar_blockcount = len;
111 return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
112}
113
Christoph Hellwig278d0ca2008-10-30 16:56:32 +1100114/*
115 * Update the record referred to by cur to the value given
116 * by [bno, len].
117 * This either works (return 0) or gets an EFSCORRUPTED error.
118 */
119STATIC int /* error */
120xfs_alloc_update(
121 struct xfs_btree_cur *cur, /* btree cursor */
122 xfs_agblock_t bno, /* starting block of extent */
123 xfs_extlen_t len) /* length of extent */
124{
125 union xfs_btree_rec rec;
126
127 rec.alloc.ar_startblock = cpu_to_be32(bno);
128 rec.alloc.ar_blockcount = cpu_to_be32(len);
129 return xfs_btree_update(cur, &rec);
130}
Christoph Hellwigfe033cc2008-10-30 16:56:09 +1100131
132/*
Christoph Hellwig8cc938f2008-10-30 16:58:11 +1100133 * Get the data from the pointed-to record.
134 */
135STATIC int /* error */
136xfs_alloc_get_rec(
137 struct xfs_btree_cur *cur, /* btree cursor */
138 xfs_agblock_t *bno, /* output: starting block of extent */
139 xfs_extlen_t *len, /* output: length of extent */
140 int *stat) /* output: success/failure */
141{
142 union xfs_btree_rec *rec;
143 int error;
144
145 error = xfs_btree_get_rec(cur, &rec, stat);
146 if (!error && *stat == 1) {
147 *bno = be32_to_cpu(rec->alloc.ar_startblock);
148 *len = be32_to_cpu(rec->alloc.ar_blockcount);
149 }
150 return error;
151}
152
153/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154 * Compute aligned version of the found extent.
155 * Takes alignment and min length into account.
156 */
David Chinner12375c82008-04-10 12:21:32 +1000157STATIC void
Linus Torvalds1da177e2005-04-16 15:20:36 -0700158xfs_alloc_compute_aligned(
159 xfs_agblock_t foundbno, /* starting block in found extent */
160 xfs_extlen_t foundlen, /* length in found extent */
161 xfs_extlen_t alignment, /* alignment for allocation */
162 xfs_extlen_t minlen, /* minimum length for allocation */
163 xfs_agblock_t *resbno, /* result block number */
164 xfs_extlen_t *reslen) /* result length */
165{
166 xfs_agblock_t bno;
167 xfs_extlen_t diff;
168 xfs_extlen_t len;
169
170 if (alignment > 1 && foundlen >= minlen) {
171 bno = roundup(foundbno, alignment);
172 diff = bno - foundbno;
173 len = diff >= foundlen ? 0 : foundlen - diff;
174 } else {
175 bno = foundbno;
176 len = foundlen;
177 }
178 *resbno = bno;
179 *reslen = len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180}
181
182/*
183 * Compute best start block and diff for "near" allocations.
184 * freelen >= wantlen already checked by caller.
185 */
186STATIC xfs_extlen_t /* difference value (absolute) */
187xfs_alloc_compute_diff(
188 xfs_agblock_t wantbno, /* target starting block */
189 xfs_extlen_t wantlen, /* target length */
190 xfs_extlen_t alignment, /* target alignment */
191 xfs_agblock_t freebno, /* freespace's starting block */
192 xfs_extlen_t freelen, /* freespace's length */
193 xfs_agblock_t *newbnop) /* result: best start block from free */
194{
195 xfs_agblock_t freeend; /* end of freespace extent */
196 xfs_agblock_t newbno1; /* return block number */
197 xfs_agblock_t newbno2; /* other new block number */
198 xfs_extlen_t newlen1=0; /* length with newbno1 */
199 xfs_extlen_t newlen2=0; /* length with newbno2 */
200 xfs_agblock_t wantend; /* end of target extent */
201
202 ASSERT(freelen >= wantlen);
203 freeend = freebno + freelen;
204 wantend = wantbno + wantlen;
205 if (freebno >= wantbno) {
206 if ((newbno1 = roundup(freebno, alignment)) >= freeend)
207 newbno1 = NULLAGBLOCK;
208 } else if (freeend >= wantend && alignment > 1) {
209 newbno1 = roundup(wantbno, alignment);
210 newbno2 = newbno1 - alignment;
211 if (newbno1 >= freeend)
212 newbno1 = NULLAGBLOCK;
213 else
214 newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
215 if (newbno2 < freebno)
216 newbno2 = NULLAGBLOCK;
217 else
218 newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
219 if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
220 if (newlen1 < newlen2 ||
221 (newlen1 == newlen2 &&
222 XFS_ABSDIFF(newbno1, wantbno) >
223 XFS_ABSDIFF(newbno2, wantbno)))
224 newbno1 = newbno2;
225 } else if (newbno2 != NULLAGBLOCK)
226 newbno1 = newbno2;
227 } else if (freeend >= wantend) {
228 newbno1 = wantbno;
229 } else if (alignment > 1) {
230 newbno1 = roundup(freeend - wantlen, alignment);
231 if (newbno1 > freeend - wantlen &&
232 newbno1 - alignment >= freebno)
233 newbno1 -= alignment;
234 else if (newbno1 >= freeend)
235 newbno1 = NULLAGBLOCK;
236 } else
237 newbno1 = freeend - wantlen;
238 *newbnop = newbno1;
239 return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
240}
241
242/*
243 * Fix up the length, based on mod and prod.
244 * len should be k * prod + mod for some k.
245 * If len is too small it is returned unchanged.
246 * If len hits maxlen it is left alone.
247 */
248STATIC void
249xfs_alloc_fix_len(
250 xfs_alloc_arg_t *args) /* allocation argument structure */
251{
252 xfs_extlen_t k;
253 xfs_extlen_t rlen;
254
255 ASSERT(args->mod < args->prod);
256 rlen = args->len;
257 ASSERT(rlen >= args->minlen);
258 ASSERT(rlen <= args->maxlen);
259 if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
260 (args->mod == 0 && rlen < args->prod))
261 return;
262 k = rlen % args->prod;
263 if (k == args->mod)
264 return;
265 if (k > args->mod) {
266 if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen)
267 return;
268 } else {
269 if ((int)(rlen = rlen - args->prod - (args->mod - k)) <
270 (int)args->minlen)
271 return;
272 }
273 ASSERT(rlen >= args->minlen);
274 ASSERT(rlen <= args->maxlen);
275 args->len = rlen;
276}
277
278/*
279 * Fix up length if there is too little space left in the a.g.
280 * Return 1 if ok, 0 if too little, should give up.
281 */
282STATIC int
283xfs_alloc_fix_minleft(
284 xfs_alloc_arg_t *args) /* allocation argument structure */
285{
286 xfs_agf_t *agf; /* a.g. freelist header */
287 int diff; /* free space difference */
288
289 if (args->minleft == 0)
290 return 1;
291 agf = XFS_BUF_TO_AGF(args->agbp);
Christoph Hellwig16259e72005-11-02 15:11:25 +1100292 diff = be32_to_cpu(agf->agf_freeblks)
293 + be32_to_cpu(agf->agf_flcount)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 - args->len - args->minleft;
295 if (diff >= 0)
296 return 1;
297 args->len += diff; /* shrink the allocated space */
298 if (args->len >= args->minlen)
299 return 1;
300 args->agbno = NULLAGBLOCK;
301 return 0;
302}
303
304/*
305 * Update the two btrees, logically removing from freespace the extent
306 * starting at rbno, rlen blocks. The extent is contained within the
307 * actual (current) free extent fbno for flen blocks.
308 * Flags are passed in indicating whether the cursors are set to the
309 * relevant records.
310 */
311STATIC int /* error code */
312xfs_alloc_fixup_trees(
313 xfs_btree_cur_t *cnt_cur, /* cursor for by-size btree */
314 xfs_btree_cur_t *bno_cur, /* cursor for by-block btree */
315 xfs_agblock_t fbno, /* starting block of free extent */
316 xfs_extlen_t flen, /* length of free extent */
317 xfs_agblock_t rbno, /* starting block of returned extent */
318 xfs_extlen_t rlen, /* length of returned extent */
319 int flags) /* flags, XFSA_FIXUP_... */
320{
321 int error; /* error code */
322 int i; /* operation results */
323 xfs_agblock_t nfbno1; /* first new free startblock */
324 xfs_agblock_t nfbno2; /* second new free startblock */
325 xfs_extlen_t nflen1=0; /* first new free length */
326 xfs_extlen_t nflen2=0; /* second new free length */
327
328 /*
329 * Look up the record in the by-size tree if necessary.
330 */
331 if (flags & XFSA_FIXUP_CNT_OK) {
332#ifdef DEBUG
333 if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
334 return error;
335 XFS_WANT_CORRUPTED_RETURN(
336 i == 1 && nfbno1 == fbno && nflen1 == flen);
337#endif
338 } else {
339 if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
340 return error;
341 XFS_WANT_CORRUPTED_RETURN(i == 1);
342 }
343 /*
344 * Look up the record in the by-block tree if necessary.
345 */
346 if (flags & XFSA_FIXUP_BNO_OK) {
347#ifdef DEBUG
348 if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
349 return error;
350 XFS_WANT_CORRUPTED_RETURN(
351 i == 1 && nfbno1 == fbno && nflen1 == flen);
352#endif
353 } else {
354 if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
355 return error;
356 XFS_WANT_CORRUPTED_RETURN(i == 1);
357 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100359#ifdef DEBUG
360 if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
361 struct xfs_btree_block *bnoblock;
362 struct xfs_btree_block *cntblock;
363
364 bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
365 cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
366
367 XFS_WANT_CORRUPTED_RETURN(
368 bnoblock->bb_numrecs == cntblock->bb_numrecs);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700369 }
370#endif
Christoph Hellwig7cc95a82008-10-30 17:14:34 +1100371
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 /*
373 * Deal with all four cases: the allocated record is contained
374 * within the freespace record, so we can have new freespace
375 * at either (or both) end, or no freespace remaining.
376 */
377 if (rbno == fbno && rlen == flen)
378 nfbno1 = nfbno2 = NULLAGBLOCK;
379 else if (rbno == fbno) {
380 nfbno1 = rbno + rlen;
381 nflen1 = flen - rlen;
382 nfbno2 = NULLAGBLOCK;
383 } else if (rbno + rlen == fbno + flen) {
384 nfbno1 = fbno;
385 nflen1 = flen - rlen;
386 nfbno2 = NULLAGBLOCK;
387 } else {
388 nfbno1 = fbno;
389 nflen1 = rbno - fbno;
390 nfbno2 = rbno + rlen;
391 nflen2 = (fbno + flen) - nfbno2;
392 }
393 /*
394 * Delete the entry from the by-size btree.
395 */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +1100396 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 return error;
398 XFS_WANT_CORRUPTED_RETURN(i == 1);
399 /*
400 * Add new by-size btree entry(s).
401 */
402 if (nfbno1 != NULLAGBLOCK) {
403 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
404 return error;
405 XFS_WANT_CORRUPTED_RETURN(i == 0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100406 if ((error = xfs_btree_insert(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 return error;
408 XFS_WANT_CORRUPTED_RETURN(i == 1);
409 }
410 if (nfbno2 != NULLAGBLOCK) {
411 if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
412 return error;
413 XFS_WANT_CORRUPTED_RETURN(i == 0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100414 if ((error = xfs_btree_insert(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415 return error;
416 XFS_WANT_CORRUPTED_RETURN(i == 1);
417 }
418 /*
419 * Fix up the by-block btree entry(s).
420 */
421 if (nfbno1 == NULLAGBLOCK) {
422 /*
423 * No remaining freespace, just delete the by-block tree entry.
424 */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +1100425 if ((error = xfs_btree_delete(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 return error;
427 XFS_WANT_CORRUPTED_RETURN(i == 1);
428 } else {
429 /*
430 * Update the by-block entry to start later|be shorter.
431 */
432 if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
433 return error;
434 }
435 if (nfbno2 != NULLAGBLOCK) {
436 /*
437 * 2 resulting free entries, need to add one.
438 */
439 if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
440 return error;
441 XFS_WANT_CORRUPTED_RETURN(i == 0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +1100442 if ((error = xfs_btree_insert(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443 return error;
444 XFS_WANT_CORRUPTED_RETURN(i == 1);
445 }
446 return 0;
447}
448
449/*
450 * Read in the allocation group free block array.
451 */
452STATIC int /* error */
453xfs_alloc_read_agfl(
454 xfs_mount_t *mp, /* mount point structure */
455 xfs_trans_t *tp, /* transaction pointer */
456 xfs_agnumber_t agno, /* allocation group number */
457 xfs_buf_t **bpp) /* buffer for the ag free block array */
458{
459 xfs_buf_t *bp; /* return value */
460 int error;
461
462 ASSERT(agno != NULLAGNUMBER);
463 error = xfs_trans_read_buf(
464 mp, tp, mp->m_ddev_targp,
465 XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
466 XFS_FSS_TO_BB(mp, 1), 0, &bp);
467 if (error)
468 return error;
469 ASSERT(bp);
470 ASSERT(!XFS_BUF_GETERROR(bp));
471 XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF);
472 *bpp = bp;
473 return 0;
474}
475
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476/*
477 * Allocation group level functions.
478 */
479
480/*
481 * Allocate a variable extent in the allocation group agno.
482 * Type and bno are used to determine where in the allocation group the
483 * extent will start.
484 * Extent's length (returned in *len) will be between minlen and maxlen,
485 * and of the form k * prod + mod unless there's nothing that large.
486 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
487 */
488STATIC int /* error */
489xfs_alloc_ag_vextent(
490 xfs_alloc_arg_t *args) /* argument structure for allocation */
491{
492 int error=0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700493
494 ASSERT(args->minlen > 0);
495 ASSERT(args->maxlen > 0);
496 ASSERT(args->minlen <= args->maxlen);
497 ASSERT(args->mod < args->prod);
498 ASSERT(args->alignment > 0);
499 /*
500 * Branch to correct routine based on the type.
501 */
502 args->wasfromfl = 0;
503 switch (args->type) {
504 case XFS_ALLOCTYPE_THIS_AG:
505 error = xfs_alloc_ag_vextent_size(args);
506 break;
507 case XFS_ALLOCTYPE_NEAR_BNO:
508 error = xfs_alloc_ag_vextent_near(args);
509 break;
510 case XFS_ALLOCTYPE_THIS_BNO:
511 error = xfs_alloc_ag_vextent_exact(args);
512 break;
513 default:
514 ASSERT(0);
515 /* NOTREACHED */
516 }
517 if (error)
518 return error;
519 /*
520 * If the allocation worked, need to change the agf structure
521 * (and log it), and the superblock.
522 */
523 if (args->agbno != NULLAGBLOCK) {
524 xfs_agf_t *agf; /* allocation group freelist header */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700525 long slen = (long)args->len;
526
527 ASSERT(args->len >= args->minlen && args->len <= args->maxlen);
528 ASSERT(!(args->wasfromfl) || !args->isfl);
529 ASSERT(args->agbno % args->alignment == 0);
530 if (!(args->wasfromfl)) {
531
532 agf = XFS_BUF_TO_AGF(args->agbp);
Marcin Slusarz413d57c2008-02-13 15:03:29 -0800533 be32_add_cpu(&agf->agf_freeblks, -(args->len));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700534 xfs_trans_agblocks_delta(args->tp,
535 -((long)(args->len)));
536 args->pag->pagf_freeblks -= args->len;
Christoph Hellwig16259e72005-11-02 15:11:25 +1100537 ASSERT(be32_to_cpu(agf->agf_freeblks) <=
538 be32_to_cpu(agf->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700539 xfs_alloc_log_agf(args->tp, args->agbp,
540 XFS_AGF_FREEBLKS);
Dave Chinnered3b4d62010-05-21 12:07:08 +1000541 /*
542 * Search the busylist for these blocks and mark the
543 * transaction as synchronous if blocks are found. This
544 * avoids the need to block due to a synchronous log
545 * force to ensure correct ordering as the synchronous
546 * transaction will guarantee that for us.
547 */
548 if (xfs_alloc_busy_search(args->mp, args->agno,
549 args->agbno, args->len))
550 xfs_trans_set_sync(args->tp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700551 }
552 if (!args->isfl)
553 xfs_trans_mod_sb(args->tp,
554 args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS :
555 XFS_TRANS_SB_FDBLOCKS, -slen);
556 XFS_STATS_INC(xs_allocx);
557 XFS_STATS_ADD(xs_allocb, args->len);
558 }
559 return 0;
560}
561
562/*
563 * Allocate a variable extent at exactly agno/bno.
564 * Extent's length (returned in *len) will be between minlen and maxlen,
565 * and of the form k * prod + mod unless there's nothing that large.
566 * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
567 */
568STATIC int /* error */
569xfs_alloc_ag_vextent_exact(
570 xfs_alloc_arg_t *args) /* allocation argument structure */
571{
572 xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */
573 xfs_btree_cur_t *cnt_cur;/* by count btree cursor */
574 xfs_agblock_t end; /* end of allocated extent */
575 int error;
576 xfs_agblock_t fbno; /* start block of found extent */
577 xfs_agblock_t fend; /* end block of found extent */
578 xfs_extlen_t flen; /* length of found extent */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 int i; /* success/failure of operation */
580 xfs_agblock_t maxend; /* end of maximal extent */
581 xfs_agblock_t minend; /* end of minimal extent */
582 xfs_extlen_t rlen; /* length of returned extent */
583
584 ASSERT(args->alignment == 1);
585 /*
586 * Allocate/initialize a cursor for the by-number freespace btree.
587 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100588 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
589 args->agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 /*
591 * Lookup bno and minlen in the btree (minlen is irrelevant, really).
592 * Look for the closest free block <= bno, it must contain bno
593 * if any free block does.
594 */
595 if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i)))
596 goto error0;
597 if (!i) {
598 /*
599 * Didn't find it, return null.
600 */
601 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
602 args->agbno = NULLAGBLOCK;
603 return 0;
604 }
605 /*
606 * Grab the freespace record.
607 */
608 if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i)))
609 goto error0;
610 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
611 ASSERT(fbno <= args->agbno);
612 minend = args->agbno + args->minlen;
613 maxend = args->agbno + args->maxlen;
614 fend = fbno + flen;
615 /*
616 * Give up if the freespace isn't long enough for the minimum request.
617 */
618 if (fend < minend) {
619 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
620 args->agbno = NULLAGBLOCK;
621 return 0;
622 }
623 /*
624 * End of extent will be smaller of the freespace end and the
625 * maximal requested end.
626 */
627 end = XFS_AGBLOCK_MIN(fend, maxend);
628 /*
629 * Fix the length according to mod and prod if given.
630 */
631 args->len = end - args->agbno;
632 xfs_alloc_fix_len(args);
633 if (!xfs_alloc_fix_minleft(args)) {
634 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
635 return 0;
636 }
637 rlen = args->len;
638 ASSERT(args->agbno + rlen <= fend);
639 end = args->agbno + rlen;
640 /*
641 * We are allocating agbno for rlen [agbno .. end]
642 * Allocate/initialize a cursor for the by-size btree.
643 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100644 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
645 args->agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 ASSERT(args->agbno + args->len <=
Christoph Hellwig16259e72005-11-02 15:11:25 +1100647 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700648 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
649 args->agbno, args->len, XFSA_FIXUP_BNO_OK))) {
650 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
651 goto error0;
652 }
653 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
654 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000655
656 trace_xfs_alloc_exact_done(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657 args->wasfromfl = 0;
658 return 0;
659
660error0:
661 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000662 trace_xfs_alloc_exact_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700663 return error;
664}
665
666/*
667 * Allocate a variable extent near bno in the allocation group agno.
668 * Extent's length (returned in len) will be between minlen and maxlen,
669 * and of the form k * prod + mod unless there's nothing that large.
670 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
671 */
672STATIC int /* error */
673xfs_alloc_ag_vextent_near(
674 xfs_alloc_arg_t *args) /* allocation argument structure */
675{
676 xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
677 xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
678 xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700679 xfs_agblock_t gtbno; /* start bno of right side entry */
680 xfs_agblock_t gtbnoa; /* aligned ... */
681 xfs_extlen_t gtdiff; /* difference to right side entry */
682 xfs_extlen_t gtlen; /* length of right side entry */
683 xfs_extlen_t gtlena; /* aligned ... */
684 xfs_agblock_t gtnew; /* useful start bno of right side */
685 int error; /* error code */
686 int i; /* result code, temporary */
687 int j; /* result code, temporary */
688 xfs_agblock_t ltbno; /* start bno of left side entry */
689 xfs_agblock_t ltbnoa; /* aligned ... */
690 xfs_extlen_t ltdiff; /* difference to left side entry */
691 /*REFERENCED*/
692 xfs_agblock_t ltend; /* end bno of left side entry */
693 xfs_extlen_t ltlen; /* length of left side entry */
694 xfs_extlen_t ltlena; /* aligned ... */
695 xfs_agblock_t ltnew; /* useful start bno of left side */
696 xfs_extlen_t rlen; /* length of returned extent */
697#if defined(DEBUG) && defined(__KERNEL__)
698 /*
699 * Randomly don't execute the first algorithm.
700 */
701 int dofirst; /* set to do first algorithm */
702
Joe Perchese7a23a92007-05-08 13:49:03 +1000703 dofirst = random32() & 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704#endif
705 /*
706 * Get a cursor for the by-size btree.
707 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100708 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
709 args->agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 ltlen = 0;
711 bno_cur_lt = bno_cur_gt = NULL;
712 /*
713 * See if there are any free extents as big as maxlen.
714 */
715 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
716 goto error0;
717 /*
718 * If none, then pick up the last entry in the tree unless the
719 * tree is empty.
720 */
721 if (!i) {
722 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
723 &ltlen, &i)))
724 goto error0;
725 if (i == 0 || ltlen == 0) {
726 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
727 return 0;
728 }
729 ASSERT(i == 1);
730 }
731 args->wasfromfl = 0;
732 /*
733 * First algorithm.
734 * If the requested extent is large wrt the freespaces available
735 * in this a.g., then the cursor will be pointing to a btree entry
736 * near the right edge of the tree. If it's in the last btree leaf
737 * block, then we just examine all the entries in that block
738 * that are big enough, and pick the best one.
739 * This is written as a while loop so we can break out of it,
740 * but we never loop back to the top.
741 */
742 while (xfs_btree_islastblock(cnt_cur, 0)) {
743 xfs_extlen_t bdiff;
744 int besti=0;
745 xfs_extlen_t blen=0;
746 xfs_agblock_t bnew=0;
747
748#if defined(DEBUG) && defined(__KERNEL__)
749 if (!dofirst)
750 break;
751#endif
752 /*
753 * Start from the entry that lookup found, sequence through
754 * all larger free blocks. If we're actually pointing at a
755 * record smaller than maxlen, go to the start of this block,
756 * and skip all those smaller than minlen.
757 */
758 if (ltlen || args->alignment > 1) {
759 cnt_cur->bc_ptrs[0] = 1;
760 do {
761 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
762 &ltlen, &i)))
763 goto error0;
764 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
765 if (ltlen >= args->minlen)
766 break;
Christoph Hellwig637aa502008-10-30 16:55:45 +1100767 if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 goto error0;
769 } while (i);
770 ASSERT(ltlen >= args->minlen);
771 if (!i)
772 break;
773 }
774 i = cnt_cur->bc_ptrs[0];
775 for (j = 1, blen = 0, bdiff = 0;
776 !error && j && (blen < args->maxlen || bdiff > 0);
Christoph Hellwig637aa502008-10-30 16:55:45 +1100777 error = xfs_btree_increment(cnt_cur, 0, &j)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778 /*
779 * For each entry, decide if it's better than
780 * the previous best entry.
781 */
782 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
783 goto error0;
784 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
David Chinner12375c82008-04-10 12:21:32 +1000785 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
786 args->minlen, &ltbnoa, &ltlena);
David Chinnere6430032008-04-17 16:49:49 +1000787 if (ltlena < args->minlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700788 continue;
789 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
790 xfs_alloc_fix_len(args);
791 ASSERT(args->len >= args->minlen);
792 if (args->len < blen)
793 continue;
794 ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
795 args->alignment, ltbno, ltlen, &ltnew);
796 if (ltnew != NULLAGBLOCK &&
797 (args->len > blen || ltdiff < bdiff)) {
798 bdiff = ltdiff;
799 bnew = ltnew;
800 blen = args->len;
801 besti = cnt_cur->bc_ptrs[0];
802 }
803 }
804 /*
805 * It didn't work. We COULD be in a case where
806 * there's a good record somewhere, so try again.
807 */
808 if (blen == 0)
809 break;
810 /*
811 * Point at the best entry, and retrieve it again.
812 */
813 cnt_cur->bc_ptrs[0] = besti;
814 if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
815 goto error0;
816 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
817 ltend = ltbno + ltlen;
Christoph Hellwig16259e72005-11-02 15:11:25 +1100818 ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 args->len = blen;
820 if (!xfs_alloc_fix_minleft(args)) {
821 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000822 trace_xfs_alloc_near_nominleft(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700823 return 0;
824 }
825 blen = args->len;
826 /*
827 * We are allocating starting at bnew for blen blocks.
828 */
829 args->agbno = bnew;
830 ASSERT(bnew >= ltbno);
831 ASSERT(bnew + blen <= ltend);
832 /*
833 * Set up a cursor for the by-bno tree.
834 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100835 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
836 args->agbp, args->agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 /*
838 * Fix up the btree entries.
839 */
840 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
841 ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
842 goto error0;
843 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
844 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +0000845
846 trace_xfs_alloc_near_first(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return 0;
848 }
849 /*
850 * Second algorithm.
851 * Search in the by-bno tree to the left and to the right
852 * simultaneously, until in each case we find a space big enough,
853 * or run into the edge of the tree. When we run into the edge,
854 * we deallocate that cursor.
855 * If both searches succeed, we compare the two spaces and pick
856 * the better one.
857 * With alignment, it's possible for both to fail; the upper
858 * level algorithm that picks allocation groups for allocations
859 * is not supposed to do this.
860 */
861 /*
862 * Allocate and initialize the cursor for the leftward search.
863 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +1100864 bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
865 args->agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700866 /*
867 * Lookup <= bno to find the leftward search's starting point.
868 */
869 if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
870 goto error0;
871 if (!i) {
872 /*
873 * Didn't find anything; use this cursor for the rightward
874 * search.
875 */
876 bno_cur_gt = bno_cur_lt;
877 bno_cur_lt = NULL;
878 }
879 /*
880 * Found something. Duplicate the cursor for the rightward search.
881 */
882 else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
883 goto error0;
884 /*
885 * Increment the cursor, so we will point at the entry just right
886 * of the leftward entry if any, or to the leftmost entry.
887 */
Christoph Hellwig637aa502008-10-30 16:55:45 +1100888 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700889 goto error0;
890 if (!i) {
891 /*
892 * It failed, there are no rightward entries.
893 */
894 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
895 bno_cur_gt = NULL;
896 }
897 /*
898 * Loop going left with the leftward cursor, right with the
899 * rightward cursor, until either both directions give up or
900 * we find an entry at least as big as minlen.
901 */
902 do {
903 if (bno_cur_lt) {
904 if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
905 goto error0;
906 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
David Chinner12375c82008-04-10 12:21:32 +1000907 xfs_alloc_compute_aligned(ltbno, ltlen, args->alignment,
908 args->minlen, &ltbnoa, &ltlena);
909 if (ltlena >= args->minlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910 break;
Christoph Hellwig8df4da42008-10-30 16:55:58 +1100911 if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 goto error0;
913 if (!i) {
914 xfs_btree_del_cursor(bno_cur_lt,
915 XFS_BTREE_NOERROR);
916 bno_cur_lt = NULL;
917 }
918 }
919 if (bno_cur_gt) {
920 if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
921 goto error0;
922 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
David Chinner12375c82008-04-10 12:21:32 +1000923 xfs_alloc_compute_aligned(gtbno, gtlen, args->alignment,
924 args->minlen, &gtbnoa, &gtlena);
925 if (gtlena >= args->minlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 break;
Christoph Hellwig637aa502008-10-30 16:55:45 +1100927 if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700928 goto error0;
929 if (!i) {
930 xfs_btree_del_cursor(bno_cur_gt,
931 XFS_BTREE_NOERROR);
932 bno_cur_gt = NULL;
933 }
934 }
935 } while (bno_cur_lt || bno_cur_gt);
936 /*
937 * Got both cursors still active, need to find better entry.
938 */
939 if (bno_cur_lt && bno_cur_gt) {
940 /*
941 * Left side is long enough, look for a right side entry.
942 */
943 if (ltlena >= args->minlen) {
944 /*
945 * Fix up the length.
946 */
947 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
948 xfs_alloc_fix_len(args);
949 rlen = args->len;
950 ltdiff = xfs_alloc_compute_diff(args->agbno, rlen,
951 args->alignment, ltbno, ltlen, &ltnew);
952 /*
953 * Not perfect.
954 */
955 if (ltdiff) {
956 /*
957 * Look until we find a better one, run out of
958 * space, or run off the end.
959 */
960 while (bno_cur_lt && bno_cur_gt) {
961 if ((error = xfs_alloc_get_rec(
962 bno_cur_gt, &gtbno,
963 &gtlen, &i)))
964 goto error0;
965 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
966 xfs_alloc_compute_aligned(gtbno, gtlen,
967 args->alignment, args->minlen,
968 &gtbnoa, &gtlena);
969 /*
970 * The left one is clearly better.
971 */
972 if (gtbnoa >= args->agbno + ltdiff) {
973 xfs_btree_del_cursor(
974 bno_cur_gt,
975 XFS_BTREE_NOERROR);
976 bno_cur_gt = NULL;
977 break;
978 }
979 /*
980 * If we reach a big enough entry,
981 * compare the two and pick the best.
982 */
983 if (gtlena >= args->minlen) {
984 args->len =
985 XFS_EXTLEN_MIN(gtlena,
986 args->maxlen);
987 xfs_alloc_fix_len(args);
988 rlen = args->len;
989 gtdiff = xfs_alloc_compute_diff(
990 args->agbno, rlen,
991 args->alignment,
992 gtbno, gtlen, &gtnew);
993 /*
994 * Right side is better.
995 */
996 if (gtdiff < ltdiff) {
997 xfs_btree_del_cursor(
998 bno_cur_lt,
999 XFS_BTREE_NOERROR);
1000 bno_cur_lt = NULL;
1001 }
1002 /*
1003 * Left side is better.
1004 */
1005 else {
1006 xfs_btree_del_cursor(
1007 bno_cur_gt,
1008 XFS_BTREE_NOERROR);
1009 bno_cur_gt = NULL;
1010 }
1011 break;
1012 }
1013 /*
1014 * Fell off the right end.
1015 */
Christoph Hellwig637aa502008-10-30 16:55:45 +11001016 if ((error = xfs_btree_increment(
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 bno_cur_gt, 0, &i)))
1018 goto error0;
1019 if (!i) {
1020 xfs_btree_del_cursor(
1021 bno_cur_gt,
1022 XFS_BTREE_NOERROR);
1023 bno_cur_gt = NULL;
1024 break;
1025 }
1026 }
1027 }
1028 /*
1029 * The left side is perfect, trash the right side.
1030 */
1031 else {
1032 xfs_btree_del_cursor(bno_cur_gt,
1033 XFS_BTREE_NOERROR);
1034 bno_cur_gt = NULL;
1035 }
1036 }
1037 /*
1038 * It's the right side that was found first, look left.
1039 */
1040 else {
1041 /*
1042 * Fix up the length.
1043 */
1044 args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
1045 xfs_alloc_fix_len(args);
1046 rlen = args->len;
1047 gtdiff = xfs_alloc_compute_diff(args->agbno, rlen,
1048 args->alignment, gtbno, gtlen, &gtnew);
1049 /*
1050 * Right side entry isn't perfect.
1051 */
1052 if (gtdiff) {
1053 /*
1054 * Look until we find a better one, run out of
1055 * space, or run off the end.
1056 */
1057 while (bno_cur_lt && bno_cur_gt) {
1058 if ((error = xfs_alloc_get_rec(
1059 bno_cur_lt, &ltbno,
1060 &ltlen, &i)))
1061 goto error0;
1062 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1063 xfs_alloc_compute_aligned(ltbno, ltlen,
1064 args->alignment, args->minlen,
1065 &ltbnoa, &ltlena);
1066 /*
1067 * The right one is clearly better.
1068 */
1069 if (ltbnoa <= args->agbno - gtdiff) {
1070 xfs_btree_del_cursor(
1071 bno_cur_lt,
1072 XFS_BTREE_NOERROR);
1073 bno_cur_lt = NULL;
1074 break;
1075 }
1076 /*
1077 * If we reach a big enough entry,
1078 * compare the two and pick the best.
1079 */
1080 if (ltlena >= args->minlen) {
1081 args->len = XFS_EXTLEN_MIN(
1082 ltlena, args->maxlen);
1083 xfs_alloc_fix_len(args);
1084 rlen = args->len;
1085 ltdiff = xfs_alloc_compute_diff(
1086 args->agbno, rlen,
1087 args->alignment,
1088 ltbno, ltlen, &ltnew);
1089 /*
1090 * Left side is better.
1091 */
1092 if (ltdiff < gtdiff) {
1093 xfs_btree_del_cursor(
1094 bno_cur_gt,
1095 XFS_BTREE_NOERROR);
1096 bno_cur_gt = NULL;
1097 }
1098 /*
1099 * Right side is better.
1100 */
1101 else {
1102 xfs_btree_del_cursor(
1103 bno_cur_lt,
1104 XFS_BTREE_NOERROR);
1105 bno_cur_lt = NULL;
1106 }
1107 break;
1108 }
1109 /*
1110 * Fell off the left end.
1111 */
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001112 if ((error = xfs_btree_decrement(
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 bno_cur_lt, 0, &i)))
1114 goto error0;
1115 if (!i) {
1116 xfs_btree_del_cursor(bno_cur_lt,
1117 XFS_BTREE_NOERROR);
1118 bno_cur_lt = NULL;
1119 break;
1120 }
1121 }
1122 }
1123 /*
1124 * The right side is perfect, trash the left side.
1125 */
1126 else {
1127 xfs_btree_del_cursor(bno_cur_lt,
1128 XFS_BTREE_NOERROR);
1129 bno_cur_lt = NULL;
1130 }
1131 }
1132 }
1133 /*
1134 * If we couldn't get anything, give up.
1135 */
1136 if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001137 trace_xfs_alloc_size_neither(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 args->agbno = NULLAGBLOCK;
1139 return 0;
1140 }
1141 /*
1142 * At this point we have selected a freespace entry, either to the
1143 * left or to the right. If it's on the right, copy all the
1144 * useful variables to the "left" set so we only have one
1145 * copy of this code.
1146 */
1147 if (bno_cur_gt) {
1148 bno_cur_lt = bno_cur_gt;
1149 bno_cur_gt = NULL;
1150 ltbno = gtbno;
1151 ltbnoa = gtbnoa;
1152 ltlen = gtlen;
1153 ltlena = gtlena;
1154 j = 1;
1155 } else
1156 j = 0;
1157 /*
1158 * Fix up the length and compute the useful address.
1159 */
1160 ltend = ltbno + ltlen;
1161 args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
1162 xfs_alloc_fix_len(args);
1163 if (!xfs_alloc_fix_minleft(args)) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001164 trace_xfs_alloc_near_nominleft(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001165 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1166 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1167 return 0;
1168 }
1169 rlen = args->len;
1170 (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno,
1171 ltlen, &ltnew);
1172 ASSERT(ltnew >= ltbno);
1173 ASSERT(ltnew + rlen <= ltend);
Christoph Hellwig16259e72005-11-02 15:11:25 +11001174 ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 args->agbno = ltnew;
1176 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
1177 ltnew, rlen, XFSA_FIXUP_BNO_OK)))
1178 goto error0;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001179
1180 if (j)
1181 trace_xfs_alloc_near_greater(args);
1182 else
1183 trace_xfs_alloc_near_lesser(args);
1184
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1186 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
1187 return 0;
1188
1189 error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001190 trace_xfs_alloc_near_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191 if (cnt_cur != NULL)
1192 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1193 if (bno_cur_lt != NULL)
1194 xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
1195 if (bno_cur_gt != NULL)
1196 xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
1197 return error;
1198}
1199
1200/*
1201 * Allocate a variable extent anywhere in the allocation group agno.
1202 * Extent's length (returned in len) will be between minlen and maxlen,
1203 * and of the form k * prod + mod unless there's nothing that large.
1204 * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1205 */
1206STATIC int /* error */
1207xfs_alloc_ag_vextent_size(
1208 xfs_alloc_arg_t *args) /* allocation argument structure */
1209{
1210 xfs_btree_cur_t *bno_cur; /* cursor for bno btree */
1211 xfs_btree_cur_t *cnt_cur; /* cursor for cnt btree */
1212 int error; /* error result */
1213 xfs_agblock_t fbno; /* start of found freespace */
1214 xfs_extlen_t flen; /* length of found freespace */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215 int i; /* temp status variable */
1216 xfs_agblock_t rbno; /* returned block number */
1217 xfs_extlen_t rlen; /* length of returned extent */
1218
1219 /*
1220 * Allocate and initialize a cursor for the by-size btree.
1221 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001222 cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1223 args->agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001224 bno_cur = NULL;
1225 /*
1226 * Look for an entry >= maxlen+alignment-1 blocks.
1227 */
1228 if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1229 args->maxlen + args->alignment - 1, &i)))
1230 goto error0;
1231 /*
1232 * If none, then pick up the last entry in the tree unless the
1233 * tree is empty.
1234 */
1235 if (!i) {
1236 if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno,
1237 &flen, &i)))
1238 goto error0;
1239 if (i == 0 || flen == 0) {
1240 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001241 trace_xfs_alloc_size_noentry(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001242 return 0;
1243 }
1244 ASSERT(i == 1);
1245 }
1246 /*
1247 * There's a freespace as big as maxlen+alignment-1, get it.
1248 */
1249 else {
1250 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i)))
1251 goto error0;
1252 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1253 }
1254 /*
1255 * In the first case above, we got the last entry in the
1256 * by-size btree. Now we check to see if the space hits maxlen
1257 * once aligned; if not, we search left for something better.
1258 * This can't happen in the second case above.
1259 */
1260 xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen,
1261 &rbno, &rlen);
1262 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1263 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1264 (rlen <= flen && rbno + rlen <= fbno + flen), error0);
1265 if (rlen < args->maxlen) {
1266 xfs_agblock_t bestfbno;
1267 xfs_extlen_t bestflen;
1268 xfs_agblock_t bestrbno;
1269 xfs_extlen_t bestrlen;
1270
1271 bestrlen = rlen;
1272 bestrbno = rbno;
1273 bestflen = flen;
1274 bestfbno = fbno;
1275 for (;;) {
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001276 if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001277 goto error0;
1278 if (i == 0)
1279 break;
1280 if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1281 &i)))
1282 goto error0;
1283 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1284 if (flen < bestrlen)
1285 break;
1286 xfs_alloc_compute_aligned(fbno, flen, args->alignment,
1287 args->minlen, &rbno, &rlen);
1288 rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1289 XFS_WANT_CORRUPTED_GOTO(rlen == 0 ||
1290 (rlen <= flen && rbno + rlen <= fbno + flen),
1291 error0);
1292 if (rlen > bestrlen) {
1293 bestrlen = rlen;
1294 bestrbno = rbno;
1295 bestflen = flen;
1296 bestfbno = fbno;
1297 if (rlen == args->maxlen)
1298 break;
1299 }
1300 }
1301 if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1302 &i)))
1303 goto error0;
1304 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1305 rlen = bestrlen;
1306 rbno = bestrbno;
1307 flen = bestflen;
1308 fbno = bestfbno;
1309 }
1310 args->wasfromfl = 0;
1311 /*
1312 * Fix up the length.
1313 */
1314 args->len = rlen;
1315 xfs_alloc_fix_len(args);
1316 if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) {
1317 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001318 trace_xfs_alloc_size_nominleft(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 args->agbno = NULLAGBLOCK;
1320 return 0;
1321 }
1322 rlen = args->len;
1323 XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0);
1324 /*
1325 * Allocate and initialize a cursor for the by-block tree.
1326 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001327 bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1328 args->agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1330 rbno, rlen, XFSA_FIXUP_CNT_OK)))
1331 goto error0;
1332 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1333 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1334 cnt_cur = bno_cur = NULL;
1335 args->len = rlen;
1336 args->agbno = rbno;
1337 XFS_WANT_CORRUPTED_GOTO(
1338 args->agbno + args->len <=
Christoph Hellwig16259e72005-11-02 15:11:25 +11001339 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 error0);
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001341 trace_xfs_alloc_size_done(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 return 0;
1343
1344error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001345 trace_xfs_alloc_size_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 if (cnt_cur)
1347 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1348 if (bno_cur)
1349 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1350 return error;
1351}
1352
1353/*
1354 * Deal with the case where only small freespaces remain.
1355 * Either return the contents of the last freespace record,
1356 * or allocate space from the freelist if there is nothing in the tree.
1357 */
1358STATIC int /* error */
1359xfs_alloc_ag_vextent_small(
1360 xfs_alloc_arg_t *args, /* allocation argument structure */
1361 xfs_btree_cur_t *ccur, /* by-size cursor */
1362 xfs_agblock_t *fbnop, /* result block number */
1363 xfs_extlen_t *flenp, /* result length */
1364 int *stat) /* status: 0-freelist, 1-normal/none */
1365{
1366 int error;
1367 xfs_agblock_t fbno;
1368 xfs_extlen_t flen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 int i;
1370
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001371 if ((error = xfs_btree_decrement(ccur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 goto error0;
1373 if (i) {
1374 if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i)))
1375 goto error0;
1376 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1377 }
1378 /*
1379 * Nothing in the btree, try the freelist. Make sure
1380 * to respect minleft even when pulling from the
1381 * freelist.
1382 */
1383 else if (args->minlen == 1 && args->alignment == 1 && !args->isfl &&
Christoph Hellwig16259e72005-11-02 15:11:25 +11001384 (be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_flcount)
1385 > args->minleft)) {
David Chinner92821e22007-05-24 15:26:31 +10001386 error = xfs_alloc_get_freelist(args->tp, args->agbp, &fbno, 0);
1387 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 goto error0;
1389 if (fbno != NULLAGBLOCK) {
1390 if (args->userdata) {
1391 xfs_buf_t *bp;
1392
1393 bp = xfs_btree_get_bufs(args->mp, args->tp,
1394 args->agno, fbno, 0);
1395 xfs_trans_binval(args->tp, bp);
1396 }
1397 args->len = 1;
1398 args->agbno = fbno;
1399 XFS_WANT_CORRUPTED_GOTO(
1400 args->agbno + args->len <=
Christoph Hellwig16259e72005-11-02 15:11:25 +11001401 be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 error0);
1403 args->wasfromfl = 1;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001404 trace_xfs_alloc_small_freelist(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001405 *stat = 0;
1406 return 0;
1407 }
1408 /*
1409 * Nothing in the freelist.
1410 */
1411 else
1412 flen = 0;
1413 }
1414 /*
1415 * Can't allocate from the freelist for some reason.
1416 */
Nathan Scottd432c802006-09-28 11:03:44 +10001417 else {
1418 fbno = NULLAGBLOCK;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 flen = 0;
Nathan Scottd432c802006-09-28 11:03:44 +10001420 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 /*
1422 * Can't do the allocation, give up.
1423 */
1424 if (flen < args->minlen) {
1425 args->agbno = NULLAGBLOCK;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001426 trace_xfs_alloc_small_notenough(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 flen = 0;
1428 }
1429 *fbnop = fbno;
1430 *flenp = flen;
1431 *stat = 1;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001432 trace_xfs_alloc_small_done(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001433 return 0;
1434
1435error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001436 trace_xfs_alloc_small_error(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437 return error;
1438}
1439
1440/*
1441 * Free the extent starting at agno/bno for length.
1442 */
1443STATIC int /* error */
1444xfs_free_ag_extent(
1445 xfs_trans_t *tp, /* transaction pointer */
1446 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
1447 xfs_agnumber_t agno, /* allocation group number */
1448 xfs_agblock_t bno, /* starting block number */
1449 xfs_extlen_t len, /* length of extent */
1450 int isfl) /* set if is freelist blocks - no sb acctg */
1451{
1452 xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */
1453 xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */
1454 int error; /* error return value */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001455 xfs_agblock_t gtbno; /* start of right neighbor block */
1456 xfs_extlen_t gtlen; /* length of right neighbor block */
1457 int haveleft; /* have a left neighbor block */
1458 int haveright; /* have a right neighbor block */
1459 int i; /* temp, result code */
1460 xfs_agblock_t ltbno; /* start of left neighbor block */
1461 xfs_extlen_t ltlen; /* length of left neighbor block */
1462 xfs_mount_t *mp; /* mount point struct for filesystem */
1463 xfs_agblock_t nbno; /* new starting block of freespace */
1464 xfs_extlen_t nlen; /* new length of freespace */
1465
1466 mp = tp->t_mountp;
1467 /*
1468 * Allocate and initialize a cursor for the by-block btree.
1469 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001470 bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471 cnt_cur = NULL;
1472 /*
1473 * Look for a neighboring block on the left (lower block numbers)
1474 * that is contiguous with this space.
1475 */
1476 if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1477 goto error0;
1478 if (haveleft) {
1479 /*
1480 * There is a block to our left.
1481 */
1482 if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
1483 goto error0;
1484 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1485 /*
1486 * It's not contiguous, though.
1487 */
1488 if (ltbno + ltlen < bno)
1489 haveleft = 0;
1490 else {
1491 /*
1492 * If this failure happens the request to free this
1493 * space was invalid, it's (partly) already free.
1494 * Very bad.
1495 */
1496 XFS_WANT_CORRUPTED_GOTO(ltbno + ltlen <= bno, error0);
1497 }
1498 }
1499 /*
1500 * Look for a neighboring block on the right (higher block numbers)
1501 * that is contiguous with this space.
1502 */
Christoph Hellwig637aa502008-10-30 16:55:45 +11001503 if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001504 goto error0;
1505 if (haveright) {
1506 /*
1507 * There is a block to our right.
1508 */
1509 if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
1510 goto error0;
1511 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1512 /*
1513 * It's not contiguous, though.
1514 */
1515 if (bno + len < gtbno)
1516 haveright = 0;
1517 else {
1518 /*
1519 * If this failure happens the request to free this
1520 * space was invalid, it's (partly) already free.
1521 * Very bad.
1522 */
1523 XFS_WANT_CORRUPTED_GOTO(gtbno >= bno + len, error0);
1524 }
1525 }
1526 /*
1527 * Now allocate and initialize a cursor for the by-size tree.
1528 */
Christoph Hellwig561f7d12008-10-30 16:53:59 +11001529 cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001530 /*
1531 * Have both left and right contiguous neighbors.
1532 * Merge all three into a single free block.
1533 */
1534 if (haveleft && haveright) {
1535 /*
1536 * Delete the old by-size entry on the left.
1537 */
1538 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1539 goto error0;
1540 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001541 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001542 goto error0;
1543 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1544 /*
1545 * Delete the old by-size entry on the right.
1546 */
1547 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1548 goto error0;
1549 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001550 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551 goto error0;
1552 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1553 /*
1554 * Delete the old by-block entry for the right block.
1555 */
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001556 if ((error = xfs_btree_delete(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001557 goto error0;
1558 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1559 /*
1560 * Move the by-block cursor back to the left neighbor.
1561 */
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001562 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563 goto error0;
1564 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1565#ifdef DEBUG
1566 /*
1567 * Check that this is the right record: delete didn't
1568 * mangle the cursor.
1569 */
1570 {
1571 xfs_agblock_t xxbno;
1572 xfs_extlen_t xxlen;
1573
1574 if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
1575 &i)))
1576 goto error0;
1577 XFS_WANT_CORRUPTED_GOTO(
1578 i == 1 && xxbno == ltbno && xxlen == ltlen,
1579 error0);
1580 }
1581#endif
1582 /*
1583 * Update remaining by-block entry to the new, joined block.
1584 */
1585 nbno = ltbno;
1586 nlen = len + ltlen + gtlen;
1587 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1588 goto error0;
1589 }
1590 /*
1591 * Have only a left contiguous neighbor.
1592 * Merge it together with the new freespace.
1593 */
1594 else if (haveleft) {
1595 /*
1596 * Delete the old by-size entry on the left.
1597 */
1598 if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
1599 goto error0;
1600 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001601 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 goto error0;
1603 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1604 /*
1605 * Back up the by-block cursor to the left neighbor, and
1606 * update its length.
1607 */
Christoph Hellwig8df4da42008-10-30 16:55:58 +11001608 if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 goto error0;
1610 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1611 nbno = ltbno;
1612 nlen = len + ltlen;
1613 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1614 goto error0;
1615 }
1616 /*
1617 * Have only a right contiguous neighbor.
1618 * Merge it together with the new freespace.
1619 */
1620 else if (haveright) {
1621 /*
1622 * Delete the old by-size entry on the right.
1623 */
1624 if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
1625 goto error0;
1626 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
Christoph Hellwig91cca5df2008-10-30 16:58:01 +11001627 if ((error = xfs_btree_delete(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 goto error0;
1629 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1630 /*
1631 * Update the starting block and length of the right
1632 * neighbor in the by-block tree.
1633 */
1634 nbno = bno;
1635 nlen = len + gtlen;
1636 if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
1637 goto error0;
1638 }
1639 /*
1640 * No contiguous neighbors.
1641 * Insert the new freespace into the by-block tree.
1642 */
1643 else {
1644 nbno = bno;
1645 nlen = len;
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001646 if ((error = xfs_btree_insert(bno_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 goto error0;
1648 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1649 }
1650 xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1651 bno_cur = NULL;
1652 /*
1653 * In all cases we need to insert the new freespace in the by-size tree.
1654 */
1655 if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
1656 goto error0;
1657 XFS_WANT_CORRUPTED_GOTO(i == 0, error0);
Christoph Hellwig4b22a572008-10-30 16:57:40 +11001658 if ((error = xfs_btree_insert(cnt_cur, &i)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659 goto error0;
1660 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
1661 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1662 cnt_cur = NULL;
1663 /*
1664 * Update the freespace totals in the ag and superblock.
1665 */
1666 {
1667 xfs_agf_t *agf;
1668 xfs_perag_t *pag; /* per allocation group data */
1669
Dave Chinnera862e0f2010-01-11 11:47:41 +00001670 pag = xfs_perag_get(mp, agno);
1671 pag->pagf_freeblks += len;
1672 xfs_perag_put(pag);
1673
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 agf = XFS_BUF_TO_AGF(agbp);
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001675 be32_add_cpu(&agf->agf_freeblks, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676 xfs_trans_agblocks_delta(tp, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 XFS_WANT_CORRUPTED_GOTO(
Christoph Hellwig16259e72005-11-02 15:11:25 +11001678 be32_to_cpu(agf->agf_freeblks) <=
1679 be32_to_cpu(agf->agf_length),
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 error0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001681 xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
1682 if (!isfl)
1683 xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len);
1684 XFS_STATS_INC(xs_freex);
1685 XFS_STATS_ADD(xs_freeb, len);
1686 }
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001687
1688 trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689
1690 /*
1691 * Since blocks move to the free list without the coordination
1692 * used in xfs_bmap_finish, we can't allow block to be available
1693 * for reallocation and non-transaction writing (user data)
1694 * until we know that the transaction that moved it to the free
1695 * list is permanently on disk. We track the blocks by declaring
1696 * these blocks as "busy"; the busy list is maintained on a per-ag
1697 * basis and each transaction records which entries should be removed
1698 * when the iclog commits to disk. If a busy block is allocated,
1699 * the iclog is pushed up to the LSN that freed the block.
1700 */
Dave Chinnered3b4d62010-05-21 12:07:08 +10001701 xfs_alloc_busy_insert(tp, agno, bno, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001702 return 0;
1703
1704 error0:
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00001705 trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001706 if (bno_cur)
1707 xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1708 if (cnt_cur)
1709 xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1710 return error;
1711}
1712
1713/*
1714 * Visible (exported) allocation/free functions.
1715 * Some of these are used just by xfs_alloc_btree.c and this file.
1716 */
1717
1718/*
1719 * Compute and fill in value of m_ag_maxlevels.
1720 */
1721void
1722xfs_alloc_compute_maxlevels(
1723 xfs_mount_t *mp) /* file system mount structure */
1724{
1725 int level;
1726 uint maxblocks;
1727 uint maxleafents;
1728 int minleafrecs;
1729 int minnoderecs;
1730
1731 maxleafents = (mp->m_sb.sb_agblocks + 1) / 2;
1732 minleafrecs = mp->m_alloc_mnr[0];
1733 minnoderecs = mp->m_alloc_mnr[1];
1734 maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
1735 for (level = 1; maxblocks > 1; level++)
1736 maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
1737 mp->m_ag_maxlevels = level;
1738}
1739
1740/*
Dave Chinner6cc87642009-03-16 08:29:46 +01001741 * Find the length of the longest extent in an AG.
1742 */
1743xfs_extlen_t
1744xfs_alloc_longest_free_extent(
1745 struct xfs_mount *mp,
1746 struct xfs_perag *pag)
1747{
1748 xfs_extlen_t need, delta = 0;
1749
1750 need = XFS_MIN_FREELIST_PAG(pag, mp);
1751 if (need > pag->pagf_flcount)
1752 delta = need - pag->pagf_flcount;
1753
1754 if (pag->pagf_longest > delta)
1755 return pag->pagf_longest - delta;
1756 return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
1757}
1758
1759/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 * Decide whether to use this allocation group for this allocation.
1761 * If so, fix up the btree freelist's size.
1762 */
1763STATIC int /* error */
1764xfs_alloc_fix_freelist(
1765 xfs_alloc_arg_t *args, /* allocation argument structure */
1766 int flags) /* XFS_ALLOC_FLAG_... */
1767{
1768 xfs_buf_t *agbp; /* agf buffer pointer */
1769 xfs_agf_t *agf; /* a.g. freespace structure pointer */
1770 xfs_buf_t *agflbp;/* agfl buffer pointer */
1771 xfs_agblock_t bno; /* freelist block */
1772 xfs_extlen_t delta; /* new blocks needed in freelist */
1773 int error; /* error result code */
1774 xfs_extlen_t longest;/* longest extent in allocation group */
1775 xfs_mount_t *mp; /* file system mount point structure */
1776 xfs_extlen_t need; /* total blocks needed in freelist */
1777 xfs_perag_t *pag; /* per-ag information structure */
1778 xfs_alloc_arg_t targs; /* local allocation arguments */
1779 xfs_trans_t *tp; /* transaction pointer */
1780
1781 mp = args->mp;
1782
1783 pag = args->pag;
1784 tp = args->tp;
1785 if (!pag->pagf_init) {
1786 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1787 &agbp)))
1788 return error;
1789 if (!pag->pagf_init) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001790 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1791 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 args->agbp = NULL;
1793 return 0;
1794 }
1795 } else
1796 agbp = NULL;
1797
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001798 /*
1799 * If this is a metadata preferred pag and we are user data
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 * then try somewhere else if we are not being asked to
1801 * try harder at this point
1802 */
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001803 if (pag->pagf_metadata && args->userdata &&
1804 (flags & XFS_ALLOC_FLAG_TRYLOCK)) {
1805 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 args->agbp = NULL;
1807 return 0;
1808 }
1809
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001810 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001811 /*
1812 * If it looks like there isn't a long enough extent, or enough
1813 * total blocks, reject it.
1814 */
Dave Chinner6cc87642009-03-16 08:29:46 +01001815 need = XFS_MIN_FREELIST_PAG(pag, mp);
1816 longest = xfs_alloc_longest_free_extent(mp, pag);
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001817 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1818 longest ||
1819 ((int)(pag->pagf_freeblks + pag->pagf_flcount -
1820 need - args->total) < (int)args->minleft)) {
1821 if (agbp)
1822 xfs_trans_brelse(tp, agbp);
1823 args->agbp = NULL;
1824 return 0;
1825 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826 }
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001827
Linus Torvalds1da177e2005-04-16 15:20:36 -07001828 /*
1829 * Get the a.g. freespace buffer.
1830 * Can fail if we're not blocking on locks, and it's held.
1831 */
1832 if (agbp == NULL) {
1833 if ((error = xfs_alloc_read_agf(mp, tp, args->agno, flags,
1834 &agbp)))
1835 return error;
1836 if (agbp == NULL) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001837 ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
1838 ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839 args->agbp = NULL;
1840 return 0;
1841 }
1842 }
1843 /*
1844 * Figure out how many blocks we should have in the freelist.
1845 */
1846 agf = XFS_BUF_TO_AGF(agbp);
1847 need = XFS_MIN_FREELIST(agf, mp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 /*
1849 * If there isn't enough total or single-extent, reject it.
1850 */
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001851 if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
1852 delta = need > be32_to_cpu(agf->agf_flcount) ?
1853 (need - be32_to_cpu(agf->agf_flcount)) : 0;
1854 longest = be32_to_cpu(agf->agf_longest);
1855 longest = (longest > delta) ? (longest - delta) :
1856 (be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
1857 if ((args->minlen + args->alignment + args->minalignslop - 1) >
1858 longest ||
1859 ((int)(be32_to_cpu(agf->agf_freeblks) +
1860 be32_to_cpu(agf->agf_flcount) - need - args->total) <
1861 (int)args->minleft)) {
1862 xfs_trans_brelse(tp, agbp);
1863 args->agbp = NULL;
1864 return 0;
1865 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 }
1867 /*
1868 * Make the freelist shorter if it's too long.
1869 */
Christoph Hellwig16259e72005-11-02 15:11:25 +11001870 while (be32_to_cpu(agf->agf_flcount) > need) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001871 xfs_buf_t *bp;
1872
David Chinner92821e22007-05-24 15:26:31 +10001873 error = xfs_alloc_get_freelist(tp, agbp, &bno, 0);
1874 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 return error;
1876 if ((error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1)))
1877 return error;
1878 bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0);
1879 xfs_trans_binval(tp, bp);
1880 }
1881 /*
1882 * Initialize the args structure.
1883 */
1884 targs.tp = tp;
1885 targs.mp = mp;
1886 targs.agbp = agbp;
1887 targs.agno = args->agno;
1888 targs.mod = targs.minleft = targs.wasdel = targs.userdata =
1889 targs.minalignslop = 0;
1890 targs.alignment = targs.minlen = targs.prod = targs.isfl = 1;
1891 targs.type = XFS_ALLOCTYPE_THIS_AG;
1892 targs.pag = pag;
1893 if ((error = xfs_alloc_read_agfl(mp, tp, targs.agno, &agflbp)))
1894 return error;
1895 /*
1896 * Make the freelist longer if it's too short.
1897 */
Christoph Hellwig16259e72005-11-02 15:11:25 +11001898 while (be32_to_cpu(agf->agf_flcount) < need) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899 targs.agbno = 0;
Christoph Hellwig16259e72005-11-02 15:11:25 +11001900 targs.maxlen = need - be32_to_cpu(agf->agf_flcount);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901 /*
1902 * Allocate as many blocks as possible at once.
1903 */
Nathan Scotte63a3692006-05-08 19:51:58 +10001904 if ((error = xfs_alloc_ag_vextent(&targs))) {
1905 xfs_trans_brelse(tp, agflbp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001906 return error;
Nathan Scotte63a3692006-05-08 19:51:58 +10001907 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 /*
1909 * Stop if we run out. Won't happen if callers are obeying
1910 * the restrictions correctly. Can happen for free calls
1911 * on a completely full ag.
1912 */
Yingping Lud210a282006-06-09 14:55:18 +10001913 if (targs.agbno == NULLAGBLOCK) {
Nathan Scott0e1edbd2006-08-10 14:40:41 +10001914 if (flags & XFS_ALLOC_FLAG_FREEING)
1915 break;
1916 xfs_trans_brelse(tp, agflbp);
1917 args->agbp = NULL;
1918 return 0;
Yingping Lud210a282006-06-09 14:55:18 +10001919 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920 /*
1921 * Put each allocated block on the list.
1922 */
1923 for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
David Chinner92821e22007-05-24 15:26:31 +10001924 error = xfs_alloc_put_freelist(tp, agbp,
1925 agflbp, bno, 0);
1926 if (error)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 return error;
1928 }
1929 }
Nathan Scotte63a3692006-05-08 19:51:58 +10001930 xfs_trans_brelse(tp, agflbp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 args->agbp = agbp;
1932 return 0;
1933}
1934
1935/*
1936 * Get a block from the freelist.
1937 * Returns with the buffer for the block gotten.
1938 */
1939int /* error */
1940xfs_alloc_get_freelist(
1941 xfs_trans_t *tp, /* transaction pointer */
1942 xfs_buf_t *agbp, /* buffer containing the agf structure */
David Chinner92821e22007-05-24 15:26:31 +10001943 xfs_agblock_t *bnop, /* block address retrieved from freelist */
1944 int btreeblk) /* destination is a AGF btree */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945{
1946 xfs_agf_t *agf; /* a.g. freespace structure */
1947 xfs_agfl_t *agfl; /* a.g. freelist structure */
1948 xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */
1949 xfs_agblock_t bno; /* block number returned */
1950 int error;
David Chinner92821e22007-05-24 15:26:31 +10001951 int logflags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001952 xfs_mount_t *mp; /* mount structure */
1953 xfs_perag_t *pag; /* per allocation group data */
1954
1955 agf = XFS_BUF_TO_AGF(agbp);
1956 /*
1957 * Freelist is empty, give up.
1958 */
1959 if (!agf->agf_flcount) {
1960 *bnop = NULLAGBLOCK;
1961 return 0;
1962 }
1963 /*
1964 * Read the array of free blocks.
1965 */
1966 mp = tp->t_mountp;
1967 if ((error = xfs_alloc_read_agfl(mp, tp,
Christoph Hellwig16259e72005-11-02 15:11:25 +11001968 be32_to_cpu(agf->agf_seqno), &agflbp)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 return error;
1970 agfl = XFS_BUF_TO_AGFL(agflbp);
1971 /*
1972 * Get the block number and update the data structures.
1973 */
Christoph Hellwige2101002006-09-28 10:56:51 +10001974 bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001975 be32_add_cpu(&agf->agf_flfirst, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 xfs_trans_brelse(tp, agflbp);
Christoph Hellwig16259e72005-11-02 15:11:25 +11001977 if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001978 agf->agf_flfirst = 0;
Dave Chinnera862e0f2010-01-11 11:47:41 +00001979
1980 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001981 be32_add_cpu(&agf->agf_flcount, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001982 xfs_trans_agflist_delta(tp, -1);
1983 pag->pagf_flcount--;
Dave Chinnera862e0f2010-01-11 11:47:41 +00001984 xfs_perag_put(pag);
David Chinner92821e22007-05-24 15:26:31 +10001985
1986 logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
1987 if (btreeblk) {
Marcin Slusarz413d57c2008-02-13 15:03:29 -08001988 be32_add_cpu(&agf->agf_btreeblks, 1);
David Chinner92821e22007-05-24 15:26:31 +10001989 pag->pagf_btreeblks++;
1990 logflags |= XFS_AGF_BTREEBLKS;
1991 }
1992
David Chinner92821e22007-05-24 15:26:31 +10001993 xfs_alloc_log_agf(tp, agbp, logflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001994 *bnop = bno;
1995
1996 /*
Dave Chinnered3b4d62010-05-21 12:07:08 +10001997 * As blocks are freed, they are added to the per-ag busy list and
1998 * remain there until the freeing transaction is committed to disk.
1999 * Now that we have allocated blocks, this list must be searched to see
2000 * if a block is being reused. If one is, then the freeing transaction
2001 * must be pushed to disk before this transaction.
2002 *
2003 * We do this by setting the current transaction to a sync transaction
2004 * which guarantees that the freeing transaction is on disk before this
2005 * transaction. This is done instead of a synchronous log force here so
2006 * that we don't sit and wait with the AGF locked in the transaction
2007 * during the log force.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 */
Dave Chinnered3b4d62010-05-21 12:07:08 +10002009 if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1))
2010 xfs_trans_set_sync(tp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002011 return 0;
2012}
2013
2014/*
2015 * Log the given fields from the agf structure.
2016 */
2017void
2018xfs_alloc_log_agf(
2019 xfs_trans_t *tp, /* transaction pointer */
2020 xfs_buf_t *bp, /* buffer for a.g. freelist header */
2021 int fields) /* mask of fields to be logged (XFS_AGF_...) */
2022{
2023 int first; /* first byte offset */
2024 int last; /* last byte offset */
2025 static const short offsets[] = {
2026 offsetof(xfs_agf_t, agf_magicnum),
2027 offsetof(xfs_agf_t, agf_versionnum),
2028 offsetof(xfs_agf_t, agf_seqno),
2029 offsetof(xfs_agf_t, agf_length),
2030 offsetof(xfs_agf_t, agf_roots[0]),
2031 offsetof(xfs_agf_t, agf_levels[0]),
2032 offsetof(xfs_agf_t, agf_flfirst),
2033 offsetof(xfs_agf_t, agf_fllast),
2034 offsetof(xfs_agf_t, agf_flcount),
2035 offsetof(xfs_agf_t, agf_freeblks),
2036 offsetof(xfs_agf_t, agf_longest),
David Chinner92821e22007-05-24 15:26:31 +10002037 offsetof(xfs_agf_t, agf_btreeblks),
Linus Torvalds1da177e2005-04-16 15:20:36 -07002038 sizeof(xfs_agf_t)
2039 };
2040
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002041 trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_);
2042
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043 xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2044 xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2045}
2046
2047/*
2048 * Interface for inode allocation to force the pag data to be initialized.
2049 */
2050int /* error */
2051xfs_alloc_pagf_init(
2052 xfs_mount_t *mp, /* file system mount structure */
2053 xfs_trans_t *tp, /* transaction pointer */
2054 xfs_agnumber_t agno, /* allocation group number */
2055 int flags) /* XFS_ALLOC_FLAGS_... */
2056{
2057 xfs_buf_t *bp;
2058 int error;
2059
2060 if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
2061 return error;
2062 if (bp)
2063 xfs_trans_brelse(tp, bp);
2064 return 0;
2065}
2066
2067/*
2068 * Put the block on the freelist for the allocation group.
2069 */
2070int /* error */
2071xfs_alloc_put_freelist(
2072 xfs_trans_t *tp, /* transaction pointer */
2073 xfs_buf_t *agbp, /* buffer for a.g. freelist header */
2074 xfs_buf_t *agflbp,/* buffer for a.g. free block array */
David Chinner92821e22007-05-24 15:26:31 +10002075 xfs_agblock_t bno, /* block being freed */
2076 int btreeblk) /* block came from a AGF btree */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077{
2078 xfs_agf_t *agf; /* a.g. freespace structure */
2079 xfs_agfl_t *agfl; /* a.g. free block array */
Christoph Hellwige2101002006-09-28 10:56:51 +10002080 __be32 *blockp;/* pointer to array entry */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002081 int error;
David Chinner92821e22007-05-24 15:26:31 +10002082 int logflags;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 xfs_mount_t *mp; /* mount structure */
2084 xfs_perag_t *pag; /* per allocation group data */
2085
2086 agf = XFS_BUF_TO_AGF(agbp);
2087 mp = tp->t_mountp;
2088
2089 if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp,
Christoph Hellwig16259e72005-11-02 15:11:25 +11002090 be32_to_cpu(agf->agf_seqno), &agflbp)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002091 return error;
2092 agfl = XFS_BUF_TO_AGFL(agflbp);
Marcin Slusarz413d57c2008-02-13 15:03:29 -08002093 be32_add_cpu(&agf->agf_fllast, 1);
Christoph Hellwig16259e72005-11-02 15:11:25 +11002094 if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 agf->agf_fllast = 0;
Dave Chinnera862e0f2010-01-11 11:47:41 +00002096
2097 pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno));
Marcin Slusarz413d57c2008-02-13 15:03:29 -08002098 be32_add_cpu(&agf->agf_flcount, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 xfs_trans_agflist_delta(tp, 1);
2100 pag->pagf_flcount++;
David Chinner92821e22007-05-24 15:26:31 +10002101
2102 logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
2103 if (btreeblk) {
Marcin Slusarz413d57c2008-02-13 15:03:29 -08002104 be32_add_cpu(&agf->agf_btreeblks, -1);
David Chinner92821e22007-05-24 15:26:31 +10002105 pag->pagf_btreeblks--;
2106 logflags |= XFS_AGF_BTREEBLKS;
2107 }
Dave Chinnera862e0f2010-01-11 11:47:41 +00002108 xfs_perag_put(pag);
David Chinner92821e22007-05-24 15:26:31 +10002109
David Chinner92821e22007-05-24 15:26:31 +10002110 xfs_alloc_log_agf(tp, agbp, logflags);
2111
Christoph Hellwig16259e72005-11-02 15:11:25 +11002112 ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp));
2113 blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)];
Christoph Hellwige2101002006-09-28 10:56:51 +10002114 *blockp = cpu_to_be32(bno);
David Chinner92821e22007-05-24 15:26:31 +10002115 xfs_alloc_log_agf(tp, agbp, logflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 xfs_trans_log_buf(tp, agflbp,
2117 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl),
2118 (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl +
2119 sizeof(xfs_agblock_t) - 1));
2120 return 0;
2121}
2122
2123/*
2124 * Read in the allocation group header (free/alloc section).
2125 */
2126int /* error */
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002127xfs_read_agf(
2128 struct xfs_mount *mp, /* mount point structure */
2129 struct xfs_trans *tp, /* transaction pointer */
2130 xfs_agnumber_t agno, /* allocation group number */
2131 int flags, /* XFS_BUF_ */
2132 struct xfs_buf **bpp) /* buffer for the ag freelist header */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133{
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002134 struct xfs_agf *agf; /* ag freelist header */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002135 int agf_ok; /* set if agf is consistent */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136 int error;
2137
2138 ASSERT(agno != NULLAGNUMBER);
2139 error = xfs_trans_read_buf(
2140 mp, tp, mp->m_ddev_targp,
2141 XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002142 XFS_FSS_TO_BB(mp, 1), flags, bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002143 if (error)
2144 return error;
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002145 if (!*bpp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002146 return 0;
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002147
2148 ASSERT(!XFS_BUF_GETERROR(*bpp));
2149 agf = XFS_BUF_TO_AGF(*bpp);
2150
Linus Torvalds1da177e2005-04-16 15:20:36 -07002151 /*
2152 * Validate the magic number of the agf block.
2153 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002154 agf_ok =
Christoph Hellwig16259e72005-11-02 15:11:25 +11002155 be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC &&
2156 XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) &&
2157 be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) &&
2158 be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) &&
2159 be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) &&
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002160 be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp) &&
2161 be32_to_cpu(agf->agf_seqno) == agno;
Barry Naujok89b28392008-10-30 17:05:49 +11002162 if (xfs_sb_version_haslazysbcount(&mp->m_sb))
2163 agf_ok = agf_ok && be32_to_cpu(agf->agf_btreeblks) <=
2164 be32_to_cpu(agf->agf_length);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002165 if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF,
2166 XFS_RANDOM_ALLOC_READ_AGF))) {
2167 XFS_CORRUPTION_ERROR("xfs_alloc_read_agf",
2168 XFS_ERRLEVEL_LOW, mp, agf);
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002169 xfs_trans_brelse(tp, *bpp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002170 return XFS_ERROR(EFSCORRUPTED);
2171 }
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002172 XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_AGF, XFS_AGF_REF);
2173 return 0;
2174}
2175
2176/*
2177 * Read in the allocation group header (free/alloc section).
2178 */
2179int /* error */
2180xfs_alloc_read_agf(
2181 struct xfs_mount *mp, /* mount point structure */
2182 struct xfs_trans *tp, /* transaction pointer */
2183 xfs_agnumber_t agno, /* allocation group number */
2184 int flags, /* XFS_ALLOC_FLAG_... */
2185 struct xfs_buf **bpp) /* buffer for the ag freelist header */
2186{
2187 struct xfs_agf *agf; /* ag freelist header */
2188 struct xfs_perag *pag; /* per allocation group data */
2189 int error;
2190
2191 ASSERT(agno != NULLAGNUMBER);
2192
2193 error = xfs_read_agf(mp, tp, agno,
Christoph Hellwig0cadda12010-01-19 09:56:44 +00002194 (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
From: Christoph Hellwig48056212008-11-28 14:23:38 +11002195 bpp);
2196 if (error)
2197 return error;
2198 if (!*bpp)
2199 return 0;
2200 ASSERT(!XFS_BUF_GETERROR(*bpp));
2201
2202 agf = XFS_BUF_TO_AGF(*bpp);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002203 pag = xfs_perag_get(mp, agno);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002204 if (!pag->pagf_init) {
Christoph Hellwig16259e72005-11-02 15:11:25 +11002205 pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
David Chinner92821e22007-05-24 15:26:31 +10002206 pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
Christoph Hellwig16259e72005-11-02 15:11:25 +11002207 pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
2208 pag->pagf_longest = be32_to_cpu(agf->agf_longest);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002209 pag->pagf_levels[XFS_BTNUM_BNOi] =
Christoph Hellwig16259e72005-11-02 15:11:25 +11002210 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002211 pag->pagf_levels[XFS_BTNUM_CNTi] =
Christoph Hellwig16259e72005-11-02 15:11:25 +11002212 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
Eric Sandeen007c61c2007-10-11 17:43:56 +10002213 spin_lock_init(&pag->pagb_lock);
Dave Chinnere57336f2010-01-11 11:47:49 +00002214 pag->pagb_count = 0;
Dave Chinnered3b4d62010-05-21 12:07:08 +10002215 pag->pagb_tree = RB_ROOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002216 pag->pagf_init = 1;
2217 }
2218#ifdef DEBUG
2219 else if (!XFS_FORCED_SHUTDOWN(mp)) {
Christoph Hellwig16259e72005-11-02 15:11:25 +11002220 ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
Barry Naujok89b28392008-10-30 17:05:49 +11002221 ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
Christoph Hellwig16259e72005-11-02 15:11:25 +11002222 ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
2223 ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
Christoph Hellwig16259e72005-11-02 15:11:25 +11002225 be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
Christoph Hellwig16259e72005-11-02 15:11:25 +11002227 be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 }
2229#endif
Dave Chinnera862e0f2010-01-11 11:47:41 +00002230 xfs_perag_put(pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231 return 0;
2232}
2233
2234/*
2235 * Allocate an extent (variable-size).
2236 * Depending on the allocation type, we either look in a single allocation
2237 * group or loop over the allocation groups to find the result.
2238 */
2239int /* error */
2240xfs_alloc_vextent(
2241 xfs_alloc_arg_t *args) /* allocation argument structure */
2242{
2243 xfs_agblock_t agsize; /* allocation group size */
2244 int error;
2245 int flags; /* XFS_ALLOC_FLAG_... locking flags */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 xfs_extlen_t minleft;/* minimum left value, temp copy */
2247 xfs_mount_t *mp; /* mount structure pointer */
2248 xfs_agnumber_t sagno; /* starting allocation group number */
2249 xfs_alloctype_t type; /* input allocation type */
2250 int bump_rotor = 0;
2251 int no_min = 0;
2252 xfs_agnumber_t rotorstep = xfs_rotorstep; /* inode32 agf stepper */
2253
2254 mp = args->mp;
2255 type = args->otype = args->type;
2256 args->agbno = NULLAGBLOCK;
2257 /*
2258 * Just fix this up, for the case where the last a.g. is shorter
2259 * (or there's only one a.g.) and the caller couldn't easily figure
2260 * that out (xfs_bmap_alloc).
2261 */
2262 agsize = mp->m_sb.sb_agblocks;
2263 if (args->maxlen > agsize)
2264 args->maxlen = agsize;
2265 if (args->alignment == 0)
2266 args->alignment = 1;
2267 ASSERT(XFS_FSB_TO_AGNO(mp, args->fsbno) < mp->m_sb.sb_agcount);
2268 ASSERT(XFS_FSB_TO_AGBNO(mp, args->fsbno) < agsize);
2269 ASSERT(args->minlen <= args->maxlen);
2270 ASSERT(args->minlen <= agsize);
2271 ASSERT(args->mod < args->prod);
2272 if (XFS_FSB_TO_AGNO(mp, args->fsbno) >= mp->m_sb.sb_agcount ||
2273 XFS_FSB_TO_AGBNO(mp, args->fsbno) >= agsize ||
2274 args->minlen > args->maxlen || args->minlen > agsize ||
2275 args->mod >= args->prod) {
2276 args->fsbno = NULLFSBLOCK;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002277 trace_xfs_alloc_vextent_badargs(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278 return 0;
2279 }
2280 minleft = args->minleft;
2281
2282 switch (type) {
2283 case XFS_ALLOCTYPE_THIS_AG:
2284 case XFS_ALLOCTYPE_NEAR_BNO:
2285 case XFS_ALLOCTYPE_THIS_BNO:
2286 /*
2287 * These three force us into a single a.g.
2288 */
2289 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002290 args->pag = xfs_perag_get(mp, args->agno);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002291 args->minleft = 0;
2292 error = xfs_alloc_fix_freelist(args, 0);
2293 args->minleft = minleft;
2294 if (error) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002295 trace_xfs_alloc_vextent_nofix(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 goto error0;
2297 }
2298 if (!args->agbp) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002299 trace_xfs_alloc_vextent_noagbp(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 break;
2301 }
2302 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2303 if ((error = xfs_alloc_ag_vextent(args)))
2304 goto error0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 break;
2306 case XFS_ALLOCTYPE_START_BNO:
2307 /*
2308 * Try near allocation first, then anywhere-in-ag after
2309 * the first a.g. fails.
2310 */
2311 if ((args->userdata == XFS_ALLOC_INITIAL_USER_DATA) &&
2312 (mp->m_flags & XFS_MOUNT_32BITINODES)) {
2313 args->fsbno = XFS_AGB_TO_FSB(mp,
2314 ((mp->m_agfrotor / rotorstep) %
2315 mp->m_sb.sb_agcount), 0);
2316 bump_rotor = 1;
2317 }
2318 args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno);
2319 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2320 /* FALLTHROUGH */
2321 case XFS_ALLOCTYPE_ANY_AG:
2322 case XFS_ALLOCTYPE_START_AG:
2323 case XFS_ALLOCTYPE_FIRST_AG:
2324 /*
2325 * Rotate through the allocation groups looking for a winner.
2326 */
2327 if (type == XFS_ALLOCTYPE_ANY_AG) {
2328 /*
2329 * Start with the last place we left off.
2330 */
2331 args->agno = sagno = (mp->m_agfrotor / rotorstep) %
2332 mp->m_sb.sb_agcount;
2333 args->type = XFS_ALLOCTYPE_THIS_AG;
2334 flags = XFS_ALLOC_FLAG_TRYLOCK;
2335 } else if (type == XFS_ALLOCTYPE_FIRST_AG) {
2336 /*
2337 * Start with allocation group given by bno.
2338 */
2339 args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2340 args->type = XFS_ALLOCTYPE_THIS_AG;
2341 sagno = 0;
2342 flags = 0;
2343 } else {
2344 if (type == XFS_ALLOCTYPE_START_AG)
2345 args->type = XFS_ALLOCTYPE_THIS_AG;
2346 /*
2347 * Start with the given allocation group.
2348 */
2349 args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno);
2350 flags = XFS_ALLOC_FLAG_TRYLOCK;
2351 }
2352 /*
2353 * Loop over allocation groups twice; first time with
2354 * trylock set, second time without.
2355 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002356 for (;;) {
Dave Chinnera862e0f2010-01-11 11:47:41 +00002357 args->pag = xfs_perag_get(mp, args->agno);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358 if (no_min) args->minleft = 0;
2359 error = xfs_alloc_fix_freelist(args, flags);
2360 args->minleft = minleft;
2361 if (error) {
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002362 trace_xfs_alloc_vextent_nofix(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002363 goto error0;
2364 }
2365 /*
2366 * If we get a buffer back then the allocation will fly.
2367 */
2368 if (args->agbp) {
2369 if ((error = xfs_alloc_ag_vextent(args)))
2370 goto error0;
2371 break;
2372 }
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002373
2374 trace_xfs_alloc_vextent_loopfailed(args);
2375
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376 /*
2377 * Didn't work, figure out the next iteration.
2378 */
2379 if (args->agno == sagno &&
2380 type == XFS_ALLOCTYPE_START_BNO)
2381 args->type = XFS_ALLOCTYPE_THIS_AG;
Yingping Lud210a282006-06-09 14:55:18 +10002382 /*
2383 * For the first allocation, we can try any AG to get
2384 * space. However, if we already have allocated a
2385 * block, we don't want to try AGs whose number is below
2386 * sagno. Otherwise, we may end up with out-of-order
2387 * locking of AGF, which might cause deadlock.
2388 */
2389 if (++(args->agno) == mp->m_sb.sb_agcount) {
2390 if (args->firstblock != NULLFSBLOCK)
2391 args->agno = sagno;
2392 else
2393 args->agno = 0;
2394 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002395 /*
2396 * Reached the starting a.g., must either be done
2397 * or switch to non-trylock mode.
2398 */
2399 if (args->agno == sagno) {
2400 if (no_min == 1) {
2401 args->agbno = NULLAGBLOCK;
Christoph Hellwig0b1b2132009-12-14 23:14:59 +00002402 trace_xfs_alloc_vextent_allfailed(args);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002403 break;
2404 }
2405 if (flags == 0) {
2406 no_min = 1;
2407 } else {
2408 flags = 0;
2409 if (type == XFS_ALLOCTYPE_START_BNO) {
2410 args->agbno = XFS_FSB_TO_AGBNO(mp,
2411 args->fsbno);
2412 args->type = XFS_ALLOCTYPE_NEAR_BNO;
2413 }
2414 }
2415 }
Dave Chinnera862e0f2010-01-11 11:47:41 +00002416 xfs_perag_put(args->pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418 if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) {
2419 if (args->agno == sagno)
2420 mp->m_agfrotor = (mp->m_agfrotor + 1) %
2421 (mp->m_sb.sb_agcount * rotorstep);
2422 else
2423 mp->m_agfrotor = (args->agno * rotorstep + 1) %
2424 (mp->m_sb.sb_agcount * rotorstep);
2425 }
2426 break;
2427 default:
2428 ASSERT(0);
2429 /* NOTREACHED */
2430 }
2431 if (args->agbno == NULLAGBLOCK)
2432 args->fsbno = NULLFSBLOCK;
2433 else {
2434 args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
2435#ifdef DEBUG
2436 ASSERT(args->len >= args->minlen);
2437 ASSERT(args->len <= args->maxlen);
2438 ASSERT(args->agbno % args->alignment == 0);
2439 XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno),
2440 args->len);
2441#endif
2442 }
Dave Chinnera862e0f2010-01-11 11:47:41 +00002443 xfs_perag_put(args->pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002444 return 0;
2445error0:
Dave Chinnera862e0f2010-01-11 11:47:41 +00002446 xfs_perag_put(args->pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002447 return error;
2448}
2449
2450/*
2451 * Free an extent.
2452 * Just break up the extent address and hand off to xfs_free_ag_extent
2453 * after fixing up the freelist.
2454 */
2455int /* error */
2456xfs_free_extent(
2457 xfs_trans_t *tp, /* transaction pointer */
2458 xfs_fsblock_t bno, /* starting block number of extent */
2459 xfs_extlen_t len) /* length of extent */
2460{
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002461 xfs_alloc_arg_t args;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 int error;
2463
2464 ASSERT(len != 0);
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002465 memset(&args, 0, sizeof(xfs_alloc_arg_t));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 args.tp = tp;
2467 args.mp = tp->t_mountp;
2468 args.agno = XFS_FSB_TO_AGNO(args.mp, bno);
2469 ASSERT(args.agno < args.mp->m_sb.sb_agcount);
2470 args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002471 args.pag = xfs_perag_get(args.mp, args.agno);
Yingping Lud210a282006-06-09 14:55:18 +10002472 if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473 goto error0;
2474#ifdef DEBUG
2475 ASSERT(args.agbp != NULL);
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002476 ASSERT((args.agbno + len) <=
2477 be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478#endif
Nathan Scott0e1edbd2006-08-10 14:40:41 +10002479 error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002480error0:
Dave Chinnera862e0f2010-01-11 11:47:41 +00002481 xfs_perag_put(args.pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482 return error;
2483}
2484
2485
2486/*
2487 * AG Busy list management
2488 * The busy list contains block ranges that have been freed but whose
Nathan Scottc41564b2006-03-29 08:55:14 +10002489 * transactions have not yet hit disk. If any block listed in a busy
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 * list is reused, the transaction that freed it must be forced to disk
2491 * before continuing to use the block.
2492 *
Dave Chinnered3b4d62010-05-21 12:07:08 +10002493 * xfs_alloc_busy_insert - add to the per-ag busy list
2494 * xfs_alloc_busy_clear - remove an item from the per-ag busy list
2495 * xfs_alloc_busy_search - search for a busy extent
2496 */
2497
2498/*
2499 * Insert a new extent into the busy tree.
2500 *
2501 * The busy extent tree is indexed by the start block of the busy extent.
2502 * there can be multiple overlapping ranges in the busy extent tree but only
2503 * ever one entry at a given start block. The reason for this is that
2504 * multi-block extents can be freed, then smaller chunks of that extent
2505 * allocated and freed again before the first transaction commit is on disk.
2506 * If the exact same start block is freed a second time, we have to wait for
2507 * that busy extent to pass out of the tree before the new extent is inserted.
2508 * There are two main cases we have to handle here.
2509 *
2510 * The first case is a transaction that triggers a "free - allocate - free"
2511 * cycle. This can occur during btree manipulations as a btree block is freed
2512 * to the freelist, then allocated from the free list, then freed again. In
2513 * this case, the second extxpnet free is what triggers the duplicate and as
2514 * such the transaction IDs should match. Because the extent was allocated in
2515 * this transaction, the transaction must be marked as synchronous. This is
2516 * true for all cases where the free/alloc/free occurs in the one transaction,
2517 * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case.
2518 * This serves to catch violations of the second case quite effectively.
2519 *
2520 * The second case is where the free/alloc/free occur in different
2521 * transactions. In this case, the thread freeing the extent the second time
2522 * can't mark the extent busy immediately because it is already tracked in a
2523 * transaction that may be committing. When the log commit for the existing
2524 * busy extent completes, the busy extent will be removed from the tree. If we
2525 * allow the second busy insert to continue using that busy extent structure,
2526 * it can be freed before this transaction is safely in the log. Hence our
2527 * only option in this case is to force the log to remove the existing busy
2528 * extent from the list before we insert the new one with the current
2529 * transaction ID.
2530 *
2531 * The problem we are trying to avoid in the free-alloc-free in separate
2532 * transactions is most easily described with a timeline:
2533 *
2534 * Thread 1 Thread 2 Thread 3 xfslogd
2535 * xact alloc
2536 * free X
2537 * mark busy
2538 * commit xact
2539 * free xact
2540 * xact alloc
2541 * alloc X
2542 * busy search
2543 * mark xact sync
2544 * commit xact
2545 * free xact
2546 * force log
2547 * checkpoint starts
2548 * ....
2549 * xact alloc
2550 * free X
2551 * mark busy
2552 * finds match
2553 * *** KABOOM! ***
2554 * ....
2555 * log IO completes
2556 * unbusy X
2557 * checkpoint completes
2558 *
2559 * By issuing a log force in thread 3 @ "KABOOM", the thread will block until
2560 * the checkpoint completes, and the busy extent it matched will have been
2561 * removed from the tree when it is woken. Hence it can then continue safely.
2562 *
2563 * However, to ensure this matching process is robust, we need to use the
2564 * transaction ID for identifying transaction, as delayed logging results in
2565 * the busy extent and transaction lifecycles being different. i.e. the busy
2566 * extent is active for a lot longer than the transaction. Hence the
2567 * transaction structure can be freed and reallocated, then mark the same
2568 * extent busy again in the new transaction. In this case the new transaction
2569 * will have a different tid but can have the same address, and hence we need
2570 * to check against the tid.
2571 *
2572 * Future: for delayed logging, we could avoid the log force if the extent was
2573 * first freed in the current checkpoint sequence. This, however, requires the
2574 * ability to pin the current checkpoint in memory until this transaction
2575 * commits to ensure that both the original free and the current one combine
2576 * logically into the one checkpoint. If the checkpoint sequences are
2577 * different, however, we still need to wait on a log force.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 */
2579void
Dave Chinnered3b4d62010-05-21 12:07:08 +10002580xfs_alloc_busy_insert(
2581 struct xfs_trans *tp,
2582 xfs_agnumber_t agno,
2583 xfs_agblock_t bno,
2584 xfs_extlen_t len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002585{
Dave Chinnered3b4d62010-05-21 12:07:08 +10002586 struct xfs_busy_extent *new;
2587 struct xfs_busy_extent *busyp;
Dave Chinnera862e0f2010-01-11 11:47:41 +00002588 struct xfs_perag *pag;
Dave Chinnered3b4d62010-05-21 12:07:08 +10002589 struct rb_node **rbp;
2590 struct rb_node *parent;
2591 int match;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592
Dave Chinnered3b4d62010-05-21 12:07:08 +10002593
2594 new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL);
2595 if (!new) {
2596 /*
2597 * No Memory! Since it is now not possible to track the free
2598 * block, make this a synchronous transaction to insure that
2599 * the block is not reused before this transaction commits.
2600 */
2601 trace_xfs_alloc_busy(tp, agno, bno, len, 1);
2602 xfs_trans_set_sync(tp);
2603 return;
2604 }
2605
2606 new->agno = agno;
2607 new->bno = bno;
2608 new->length = len;
2609 new->tid = xfs_log_get_trans_ident(tp);
2610
2611 INIT_LIST_HEAD(&new->list);
2612
2613 /* trace before insert to be able to see failed inserts */
2614 trace_xfs_alloc_busy(tp, agno, bno, len, 0);
2615
2616 pag = xfs_perag_get(tp->t_mountp, new->agno);
2617restart:
Dave Chinnera862e0f2010-01-11 11:47:41 +00002618 spin_lock(&pag->pagb_lock);
Dave Chinnered3b4d62010-05-21 12:07:08 +10002619 rbp = &pag->pagb_tree.rb_node;
2620 parent = NULL;
2621 busyp = NULL;
2622 match = 0;
2623 while (*rbp && match >= 0) {
2624 parent = *rbp;
2625 busyp = rb_entry(parent, struct xfs_busy_extent, rb_node);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002626
Dave Chinnered3b4d62010-05-21 12:07:08 +10002627 if (new->bno < busyp->bno) {
2628 /* may overlap, but exact start block is lower */
2629 rbp = &(*rbp)->rb_left;
2630 if (new->bno + new->length > busyp->bno)
2631 match = busyp->tid == new->tid ? 1 : -1;
2632 } else if (new->bno > busyp->bno) {
2633 /* may overlap, but exact start block is higher */
2634 rbp = &(*rbp)->rb_right;
2635 if (bno < busyp->bno + busyp->length)
2636 match = busyp->tid == new->tid ? 1 : -1;
2637 } else {
2638 match = busyp->tid == new->tid ? 1 : -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002639 break;
2640 }
2641 }
Dave Chinnered3b4d62010-05-21 12:07:08 +10002642 if (match < 0) {
2643 /* overlap marked busy in different transaction */
2644 spin_unlock(&pag->pagb_lock);
2645 xfs_log_force(tp->t_mountp, XFS_LOG_SYNC);
2646 goto restart;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002647 }
Dave Chinnered3b4d62010-05-21 12:07:08 +10002648 if (match > 0) {
2649 /*
2650 * overlap marked busy in same transaction. Update if exact
2651 * start block match, otherwise combine the busy extents into
2652 * a single range.
2653 */
2654 if (busyp->bno == new->bno) {
2655 busyp->length = max(busyp->length, new->length);
2656 spin_unlock(&pag->pagb_lock);
2657 ASSERT(tp->t_flags & XFS_TRANS_SYNC);
2658 xfs_perag_put(pag);
2659 kmem_free(new);
2660 return;
2661 }
2662 rb_erase(&busyp->rb_node, &pag->pagb_tree);
2663 new->length = max(busyp->bno + busyp->length,
2664 new->bno + new->length) -
2665 min(busyp->bno, new->bno);
2666 new->bno = min(busyp->bno, new->bno);
2667 } else
2668 busyp = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669
Dave Chinnered3b4d62010-05-21 12:07:08 +10002670 rb_link_node(&new->rb_node, parent, rbp);
2671 rb_insert_color(&new->rb_node, &pag->pagb_tree);
2672
2673 list_add(&new->list, &tp->t_busy);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002674 spin_unlock(&pag->pagb_lock);
2675 xfs_perag_put(pag);
Dave Chinnered3b4d62010-05-21 12:07:08 +10002676 kmem_free(busyp);
2677}
2678
2679/*
2680 * Search for a busy extent within the range of the extent we are about to
2681 * allocate. You need to be holding the busy extent tree lock when calling
2682 * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy
2683 * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact
2684 * match. This is done so that a non-zero return indicates an overlap that
2685 * will require a synchronous transaction, but it can still be
2686 * used to distinguish between a partial or exact match.
2687 */
2688static int
2689xfs_alloc_busy_search(
2690 struct xfs_mount *mp,
2691 xfs_agnumber_t agno,
2692 xfs_agblock_t bno,
2693 xfs_extlen_t len)
2694{
2695 struct xfs_perag *pag;
2696 struct rb_node *rbp;
2697 struct xfs_busy_extent *busyp;
2698 int match = 0;
2699
2700 pag = xfs_perag_get(mp, agno);
2701 spin_lock(&pag->pagb_lock);
2702
2703 rbp = pag->pagb_tree.rb_node;
2704
2705 /* find closest start bno overlap */
2706 while (rbp) {
2707 busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node);
2708 if (bno < busyp->bno) {
2709 /* may overlap, but exact start block is lower */
2710 if (bno + len > busyp->bno)
2711 match = -1;
2712 rbp = rbp->rb_left;
2713 } else if (bno > busyp->bno) {
2714 /* may overlap, but exact start block is higher */
2715 if (bno < busyp->bno + busyp->length)
2716 match = -1;
2717 rbp = rbp->rb_right;
2718 } else {
2719 /* bno matches busyp, length determines exact match */
2720 match = (busyp->length == len) ? 1 : -1;
2721 break;
2722 }
2723 }
2724 spin_unlock(&pag->pagb_lock);
2725 trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match);
2726 xfs_perag_put(pag);
2727 return match;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002728}
2729
2730void
Dave Chinnered3b4d62010-05-21 12:07:08 +10002731xfs_alloc_busy_clear(
2732 struct xfs_mount *mp,
2733 struct xfs_busy_extent *busyp)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002734{
Dave Chinnera862e0f2010-01-11 11:47:41 +00002735 struct xfs_perag *pag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736
Dave Chinnered3b4d62010-05-21 12:07:08 +10002737 trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno,
2738 busyp->length);
2739
2740 ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno,
2741 busyp->length) == 1);
2742
2743 list_del_init(&busyp->list);
2744
2745 pag = xfs_perag_get(mp, busyp->agno);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002746 spin_lock(&pag->pagb_lock);
Dave Chinnered3b4d62010-05-21 12:07:08 +10002747 rb_erase(&busyp->rb_node, &pag->pagb_tree);
Dave Chinnera862e0f2010-01-11 11:47:41 +00002748 spin_unlock(&pag->pagb_lock);
2749 xfs_perag_put(pag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002750
Dave Chinnered3b4d62010-05-21 12:07:08 +10002751 kmem_free(busyp);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752}