blob: 5aa07caea5f15b06b607f222be591f49e16ff7bd [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Nathan Scott1e69dd02006-06-19 08:39:53 +10002 * Copyright (c) 2000-2006 Silicon Graphics, Inc.
Nathan Scott7b718762005-11-02 14:58:39 +11003 * All Rights Reserved.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004 *
Nathan Scott7b718762005-11-02 14:58:39 +11005 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * published by the Free Software Foundation.
8 *
Nathan Scott7b718762005-11-02 14:58:39 +11009 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 *
Nathan Scott7b718762005-11-02 14:58:39 +110014 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
Linus Torvalds1da177e2005-04-16 15:20:36 -070017 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include "xfs.h"
Nathan Scotta844f452005-11-02 14:38:42 +110019#include "xfs_fs.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070020#include "xfs_types.h"
Nathan Scotta844f452005-11-02 14:38:42 +110021#include "xfs_bit.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include "xfs_log.h"
Nathan Scotta844f452005-11-02 14:38:42 +110023#include "xfs_inum.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include "xfs_trans.h"
25#include "xfs_sb.h"
26#include "xfs_ag.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070027#include "xfs_dir2.h"
28#include "xfs_dmapi.h"
29#include "xfs_mount.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070030#include "xfs_bmap_btree.h"
Nathan Scotta844f452005-11-02 14:38:42 +110031#include "xfs_alloc_btree.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070032#include "xfs_ialloc_btree.h"
Nathan Scotta844f452005-11-02 14:38:42 +110033#include "xfs_dir2_sf.h"
34#include "xfs_attr_sf.h"
35#include "xfs_dinode.h"
36#include "xfs_inode.h"
37#include "xfs_inode_item.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070038#include "xfs_itable.h"
39#include "xfs_btree.h"
40#include "xfs_alloc.h"
41#include "xfs_ialloc.h"
42#include "xfs_attr.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include "xfs_bmap.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070044#include "xfs_error.h"
45#include "xfs_buf_item.h"
46#include "xfs_rw.h"
Christoph Hellwig0b1b2132009-12-14 23:14:59 +000047#include "xfs_trace.h"
Linus Torvalds1da177e2005-04-16 15:20:36 -070048
49/*
50 * This is a subroutine for xfs_write() and other writers (xfs_ioctl)
51 * which clears the setuid and setgid bits when a file is written.
52 */
53int
54xfs_write_clear_setuid(
55 xfs_inode_t *ip)
56{
57 xfs_mount_t *mp;
58 xfs_trans_t *tp;
59 int error;
60
61 mp = ip->i_mount;
62 tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID);
63 if ((error = xfs_trans_reserve(tp, 0,
64 XFS_WRITEID_LOG_RES(mp),
65 0, 0, 0))) {
66 xfs_trans_cancel(tp, 0);
67 return error;
68 }
69 xfs_ilock(ip, XFS_ILOCK_EXCL);
70 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
71 xfs_trans_ihold(tp, ip);
72 ip->i_d.di_mode &= ~S_ISUID;
73
74 /*
75 * Note that we don't have to worry about mandatory
76 * file locking being disabled here because we only
77 * clear the S_ISGID bit if the Group execute bit is
78 * on, but if it was on then mandatory locking wouldn't
79 * have been enabled.
80 */
81 if (ip->i_d.di_mode & S_IXGRP) {
82 ip->i_d.di_mode &= ~S_ISGID;
83 }
84 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
85 xfs_trans_set_sync(tp);
Eric Sandeen1c72bf92007-05-08 13:48:42 +100086 error = xfs_trans_commit(tp, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -070087 xfs_iunlock(ip, XFS_ILOCK_EXCL);
88 return 0;
89}
90
91/*
92 * Force a shutdown of the filesystem instantly while keeping
93 * the filesystem consistent. We don't do an unmount here; just shutdown
94 * the shop, make sure that absolutely nothing persistent happens to
95 * this filesystem after this point.
96 */
Linus Torvalds1da177e2005-04-16 15:20:36 -070097void
98xfs_do_force_shutdown(
Christoph Hellwig48c872a9f2007-08-30 17:20:31 +100099 xfs_mount_t *mp,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700100 int flags,
101 char *fname,
102 int lnnum)
103{
104 int logerror;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700105
Nathan Scott7d04a332006-06-09 14:58:38 +1000106 logerror = flags & SHUTDOWN_LOG_IO_ERROR;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107
Nathan Scott7d04a332006-06-09 14:58:38 +1000108 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
109 cmn_err(CE_NOTE, "xfs_force_shutdown(%s,0x%x) called from "
110 "line %d of file %s. Return address = 0x%p",
111 mp->m_fsname, flags, lnnum, fname, __return_address);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 }
113 /*
114 * No need to duplicate efforts.
115 */
116 if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
117 return;
118
119 /*
120 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
121 * queue up anybody new on the log reservations, and wakes up
Nathan Scott7d04a332006-06-09 14:58:38 +1000122 * everybody who's sleeping on log reservations to tell them
123 * the bad news.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124 */
125 if (xfs_log_force_umount(mp, logerror))
126 return;
127
Nathan Scott7d04a332006-06-09 14:58:38 +1000128 if (flags & SHUTDOWN_CORRUPT_INCORE) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129 xfs_cmn_err(XFS_PTAG_SHUTDOWN_CORRUPT, CE_ALERT, mp,
130 "Corruption of in-memory data detected. Shutting down filesystem: %s",
131 mp->m_fsname);
132 if (XFS_ERRLEVEL_HIGH <= xfs_error_level) {
133 xfs_stack_trace();
134 }
Nathan Scott7d04a332006-06-09 14:58:38 +1000135 } else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136 if (logerror) {
137 xfs_cmn_err(XFS_PTAG_SHUTDOWN_LOGERROR, CE_ALERT, mp,
Nathan Scott7d04a332006-06-09 14:58:38 +1000138 "Log I/O Error Detected. Shutting down filesystem: %s",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139 mp->m_fsname);
Nathan Scott7d04a332006-06-09 14:58:38 +1000140 } else if (flags & SHUTDOWN_DEVICE_REQ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700141 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
Nathan Scott7d04a332006-06-09 14:58:38 +1000142 "All device paths lost. Shutting down filesystem: %s",
143 mp->m_fsname);
144 } else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
145 xfs_cmn_err(XFS_PTAG_SHUTDOWN_IOERROR, CE_ALERT, mp,
146 "I/O Error Detected. Shutting down filesystem: %s",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 mp->m_fsname);
148 }
149 }
Nathan Scott7d04a332006-06-09 14:58:38 +1000150 if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
151 cmn_err(CE_ALERT, "Please umount the filesystem, "
152 "and rectify the problem(s)");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 }
154}
155
156
157/*
158 * Called when we want to stop a buffer from getting written or read.
159 * We attach the EIO error, muck with its flags, and call biodone
160 * so that the proper iodone callbacks get called.
161 */
162int
163xfs_bioerror(
164 xfs_buf_t *bp)
165{
166
167#ifdef XFSERRORDEBUG
168 ASSERT(XFS_BUF_ISREAD(bp) || bp->b_iodone);
169#endif
170
171 /*
172 * No need to wait until the buffer is unpinned.
173 * We aren't flushing it.
174 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175 XFS_BUF_ERROR(bp, EIO);
176 /*
177 * We're calling biodone, so delete B_DONE flag. Either way
178 * we have to call the iodone callback, and calling biodone
179 * probably is the best way since it takes care of
180 * GRIO as well.
181 */
182 XFS_BUF_UNREAD(bp);
183 XFS_BUF_UNDELAYWRITE(bp);
184 XFS_BUF_UNDONE(bp);
185 XFS_BUF_STALE(bp);
186
187 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
188 xfs_biodone(bp);
189
190 return (EIO);
191}
192
193/*
194 * Same as xfs_bioerror, except that we are releasing the buffer
195 * here ourselves, and avoiding the biodone call.
196 * This is meant for userdata errors; metadata bufs come with
197 * iodone functions attached, so that we can track down errors.
198 */
199int
200xfs_bioerror_relse(
201 xfs_buf_t *bp)
202{
203 int64_t fl;
204
205 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xfs_buf_iodone_callbacks);
206 ASSERT(XFS_BUF_IODONE_FUNC(bp) != xlog_iodone);
207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 fl = XFS_BUF_BFLAGS(bp);
209 /*
210 * No need to wait until the buffer is unpinned.
211 * We aren't flushing it.
212 *
213 * chunkhold expects B_DONE to be set, whether
214 * we actually finish the I/O or not. We don't want to
215 * change that interface.
216 */
217 XFS_BUF_UNREAD(bp);
218 XFS_BUF_UNDELAYWRITE(bp);
219 XFS_BUF_DONE(bp);
220 XFS_BUF_STALE(bp);
221 XFS_BUF_CLR_IODONE_FUNC(bp);
222 XFS_BUF_CLR_BDSTRAT_FUNC(bp);
223 if (!(fl & XFS_B_ASYNC)) {
224 /*
225 * Mark b_error and B_ERROR _both_.
226 * Lot's of chunkcache code assumes that.
227 * There's no reason to mark error for
228 * ASYNC buffers.
229 */
230 XFS_BUF_ERROR(bp, EIO);
David Chinnerb4dd3302008-08-13 16:36:11 +1000231 XFS_BUF_FINISH_IOWAIT(bp);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700232 } else {
233 xfs_buf_relse(bp);
234 }
235 return (EIO);
236}
Nathan Scottce8e9222006-01-11 15:39:08 +1100237
Linus Torvalds1da177e2005-04-16 15:20:36 -0700238/*
239 * Prints out an ALERT message about I/O error.
240 */
241void
242xfs_ioerror_alert(
243 char *func,
244 struct xfs_mount *mp,
245 xfs_buf_t *bp,
246 xfs_daddr_t blkno)
247{
248 cmn_err(CE_ALERT,
249 "I/O error in filesystem (\"%s\") meta-data dev %s block 0x%llx"
Christoph Hellwigda1650a2005-11-02 10:21:35 +1100250 " (\"%s\") error %d buf count %zd",
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251 (!mp || !mp->m_fsname) ? "(fs name not set)" : mp->m_fsname,
Nathan Scottce8e9222006-01-11 15:39:08 +1100252 XFS_BUFTARG_NAME(XFS_BUF_TARGET(bp)),
253 (__uint64_t)blkno, func,
254 XFS_BUF_GETERROR(bp), XFS_BUF_COUNT(bp));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255}
256
257/*
258 * This isn't an absolute requirement, but it is
259 * just a good idea to call xfs_read_buf instead of
260 * directly doing a read_buf call. For one, we shouldn't
261 * be doing this disk read if we are in SHUTDOWN state anyway,
262 * so this stops that from happening. Secondly, this does all
263 * the error checking stuff and the brelse if appropriate for
264 * the caller, so the code can be a little leaner.
265 */
266
267int
268xfs_read_buf(
269 struct xfs_mount *mp,
270 xfs_buftarg_t *target,
271 xfs_daddr_t blkno,
272 int len,
273 uint flags,
274 xfs_buf_t **bpp)
275{
276 xfs_buf_t *bp;
277 int error;
278
Christoph Hellwig6ad112b2009-11-24 18:02:23 +0000279 if (!flags)
280 flags = XBF_LOCK | XBF_MAPPED;
281
282 bp = xfs_buf_read(target, blkno, len, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 if (!bp)
284 return XFS_ERROR(EIO);
285 error = XFS_BUF_GETERROR(bp);
286 if (bp && !error && !XFS_FORCED_SHUTDOWN(mp)) {
287 *bpp = bp;
288 } else {
289 *bpp = NULL;
290 if (error) {
291 xfs_ioerror_alert("xfs_read_buf", mp, bp, XFS_BUF_ADDR(bp));
292 } else {
293 error = XFS_ERROR(EIO);
294 }
295 if (bp) {
296 XFS_BUF_UNDONE(bp);
297 XFS_BUF_UNDELAYWRITE(bp);
298 XFS_BUF_STALE(bp);
299 /*
300 * brelse clears B_ERROR and b_error
301 */
302 xfs_buf_relse(bp);
303 }
304 }
305 return (error);
306}
307
308/*
309 * Wrapper around bwrite() so that we can trap
310 * write errors, and act accordingly.
311 */
312int
313xfs_bwrite(
314 struct xfs_mount *mp,
315 struct xfs_buf *bp)
316{
317 int error;
318
319 /*
320 * XXXsup how does this work for quotas.
321 */
322 XFS_BUF_SET_BDSTRAT_FUNC(bp, xfs_bdstrat_cb);
Christoph Hellwig15ac08a2008-12-09 04:47:30 -0500323 bp->b_mount = mp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700324 XFS_BUF_WRITE(bp);
325
326 if ((error = XFS_bwrite(bp))) {
327 ASSERT(mp);
328 /*
329 * Cannot put a buftrace here since if the buffer is not
330 * B_HOLD then we will brelse() the buffer before returning
331 * from bwrite and we could be tracing a buffer that has
332 * been reused.
333 */
Nathan Scott7d04a332006-06-09 14:58:38 +1000334 xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 }
336 return (error);
337}
Christoph Hellwig5683f532009-11-14 16:17:21 +0000338
339/*
340 * helper function to extract extent size hint from inode
341 */
342xfs_extlen_t
343xfs_get_extsz_hint(
344 struct xfs_inode *ip)
345{
346 xfs_extlen_t extsz;
347
348 if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
349 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
350 ? ip->i_d.di_extsize
351 : ip->i_mount->m_sb.sb_rextsize;
352 ASSERT(extsz);
353 } else {
354 extsz = (ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE)
355 ? ip->i_d.di_extsize : 0;
356 }
357
358 return extsz;
359}