blob: 974551ea45539ab760c02804ad1fe8b65966617e [file] [log] [blame]
Nikolaus Rath055f2722019-05-05 13:11:03 -04001/*
2 FUSE: Filesystem in Userspace
3 Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
4 Copyright (C) 2017 Nikolaus Rath <Nikolaus@rath.org>
5 Copyright (C) 2018 Valve, Inc
6
7 This program can be distributed under the terms of the GNU GPL.
8 See the file COPYING.
9*/
10
11/** @file
12 *
13 * This is a "high-performance" version of passthrough_ll.c. While
14 * passthrough_ll.c is designed to be as simple as possible, this
15 * example intended to be as efficient and correct as possible.
16 *
17 * passthrough_hp.cc mirrors a specified "source" directory under a
18 * specified the mountpoint with as much fidelity and performance as
19 * possible.
20 *
21 * If --nocache is specified, the source directory may be changed
22 * directly even while mounted and the filesystem will continue
23 * to work correctly.
24 *
25 * Without --nocache, the source directory is assumed to be modified
26 * only through the passthrough filesystem. This enables much better
27 * performance, but if changes are made directly to the source, they
28 * may not be immediately visible under the mountpoint and further
29 * access to the mountpoint may result in incorrect behavior,
30 * including data-loss.
31 *
32 * On its own, this filesystem fulfills no practical purpose. It is
33 * intended as a template upon which additional functionality can be
34 * built.
35 *
36 * Unless --nocache is specified, is only possible to write to files
37 * for which the mounting user has read permissions. This is because
38 * the writeback cache requires the kernel to be able to issue read
39 * requests for all files (which the passthrough filesystem cannot
40 * satisfy if it can't read the file in the underlying filesystem).
41 *
42 * ## Source code ##
43 * \include passthrough_hp.cc
44 */
45
46#define FUSE_USE_VERSION 35
47
48#ifdef HAVE_CONFIG_H
Nikolaus Rath7a5e1a92019-05-12 11:00:15 +010049#include "config.h"
Nikolaus Rath055f2722019-05-05 13:11:03 -040050#endif
51
52#ifndef _GNU_SOURCE
53#define _GNU_SOURCE
54#endif
55
56// C includes
57#include <dirent.h>
58#include <err.h>
59#include <errno.h>
60#include <ftw.h>
61#include <fuse_lowlevel.h>
62#include <inttypes.h>
63#include <string.h>
64#include <sys/file.h>
65#include <sys/resource.h>
66#include <sys/xattr.h>
67#include <time.h>
68#include <unistd.h>
69#include <pthread.h>
70
71// C++ includes
72#include <cstddef>
73#include <cstdio>
74#include <cstdlib>
75#include <list>
Nikolaus Rath7a5e1a92019-05-12 11:00:15 +010076#include "cxxopts.hpp"
Nikolaus Rath055f2722019-05-05 13:11:03 -040077#include <mutex>
78#include <fstream>
79#include <thread>
80#include <iomanip>
81
82using namespace std;
83
84/* We are re-using pointers to our `struct sfs_inode` and `struct
85 sfs_dirp` elements as inodes and file handles. This means that we
86 must be able to store pointer a pointer in both a fuse_ino_t
87 variable and a uint64_t variable (used for file handles). */
88static_assert(sizeof(fuse_ino_t) >= sizeof(void*),
89 "void* must fit into fuse_ino_t");
90static_assert(sizeof(fuse_ino_t) >= sizeof(uint64_t),
91 "fuse_ino_t must be at least 64 bits");
92
93
94/* Forward declarations */
95struct Inode;
96static Inode& get_inode(fuse_ino_t ino);
97static void forget_one(fuse_ino_t ino, uint64_t n);
98
99// Uniquely identifies a file in the source directory tree. This could
100// be simplified to just ino_t since we require the source directory
101// not to contain any mountpoints. This hasn't been done yet in case
102// we need to reconsider this constraint (but relaxing this would have
103// the drawback that we can no longer re-use inode numbers, and thus
104// readdir() would need to do a full lookup() in order to report the
105// right inode number).
106typedef std::pair<ino_t, dev_t> SrcId;
107
108// Define a hash function for SrcId
109namespace std {
110 template<>
111 struct hash<SrcId> {
112 size_t operator()(const SrcId& id) const {
113 return hash<ino_t>{}(id.first) ^ hash<dev_t>{}(id.second);
114 }
115 };
116}
117
118// Maps files in the source directory tree to inodes
119typedef std::unordered_map<SrcId, Inode> InodeMap;
120
121struct Inode {
122 int fd {-1};
123 bool is_symlink {false};
124 dev_t src_dev {0};
125 ino_t src_ino {0};
126 uint64_t nlookup {0};
127 std::mutex m;
128
129 // Delete copy constructor and assignments. We could implement
130 // move if we need it.
131 Inode() = default;
132 Inode(const Inode&) = delete;
133 Inode(Inode&& inode) = delete;
134 Inode& operator=(Inode&& inode) = delete;
135 Inode& operator=(const Inode&) = delete;
136
137 ~Inode() {
138 if(fd > 0)
139 close(fd);
140 }
141};
142
143struct Fs {
144 // Must be acquired *after* any Inode.m locks.
145 std::mutex mutex;
146 InodeMap inodes; // protected by mutex
147 Inode root;
148 double timeout;
149 bool debug;
150 std::string source;
151 size_t blocksize;
152 dev_t src_dev;
153 bool nosplice;
154 bool nocache;
155};
156static Fs fs{};
157
158
159#define FUSE_BUF_COPY_FLAGS \
160 (fs.nosplice ? \
161 FUSE_BUF_NO_SPLICE : \
162 static_cast<fuse_buf_copy_flags>(0))
163
164
165static Inode& get_inode(fuse_ino_t ino) {
166 if (ino == FUSE_ROOT_ID)
167 return fs.root;
168
169 Inode* inode = reinterpret_cast<Inode*>(ino);
170 if(inode->fd == -1) {
171 cerr << "INTERNAL ERROR: Unknown inode " << ino << endl;
172 abort();
173 }
174 return *inode;
175}
176
177
178static int get_fs_fd(fuse_ino_t ino) {
179 int fd = get_inode(ino).fd;
180 return fd;
181}
182
183
184static void sfs_init(void *userdata, fuse_conn_info *conn) {
185 (void)userdata;
186 if (conn->capable & FUSE_CAP_EXPORT_SUPPORT)
187 conn->want |= FUSE_CAP_EXPORT_SUPPORT;
188
189 if (fs.timeout && conn->capable & FUSE_CAP_WRITEBACK_CACHE)
190 conn->want |= FUSE_CAP_WRITEBACK_CACHE;
191
192 if (conn->capable & FUSE_CAP_FLOCK_LOCKS)
193 conn->want |= FUSE_CAP_FLOCK_LOCKS;
194
195 // Use splicing if supported. Since we are using writeback caching
196 // and readahead, individual requests should have a decent size so
197 // that splicing between fd's is well worth it.
198 if (conn->capable & FUSE_CAP_SPLICE_WRITE && !fs.nosplice)
199 conn->want |= FUSE_CAP_SPLICE_WRITE;
200 if (conn->capable & FUSE_CAP_SPLICE_READ && !fs.nosplice)
201 conn->want |= FUSE_CAP_SPLICE_READ;
202}
203
204
205static void sfs_getattr(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
206 (void)fi;
207 Inode& inode = get_inode(ino);
208 struct stat attr;
209 auto res = fstatat(inode.fd, "", &attr,
210 AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
211 if (res == -1) {
212 fuse_reply_err(req, errno);
213 return;
214 }
215 fuse_reply_attr(req, &attr, fs.timeout);
216}
217
218
219#ifdef HAVE_UTIMENSAT
220static int utimensat_empty_nofollow(Inode& inode,
221 const struct timespec *tv) {
222 if (inode.is_symlink) {
223 /* Does not work on current kernels, but may in the future:
224 https://marc.info/?l=linux-kernel&m=154158217810354&w=2 */
225 auto res = utimensat(inode.fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
226 if (res == -1 && errno == EINVAL) {
227 /* Sorry, no race free way to set times on symlink. */
228 errno = EPERM;
229 }
230 return res;
231 }
232
233 char procname[64];
234 sprintf(procname, "/proc/self/fd/%i", inode.fd);
235
236 return utimensat(AT_FDCWD, procname, tv, 0);
237}
238#endif
239
240
241static void do_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
242 int valid, struct fuse_file_info* fi) {
243 Inode& inode = get_inode(ino);
244 int ifd = inode.fd;
245 int res;
246
247 if (valid & FUSE_SET_ATTR_MODE) {
248 if (fi) {
249 res = fchmod(fi->fh, attr->st_mode);
250 } else {
251 char procname[64];
252 sprintf(procname, "/proc/self/fd/%i", ifd);
253 res = chmod(procname, attr->st_mode);
254 }
255 if (res == -1)
256 goto out_err;
257 }
258 if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
259 uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : static_cast<uid_t>(-1);
260 gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : static_cast<gid_t>(-1);
261
262 res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
263 if (res == -1)
264 goto out_err;
265 }
266 if (valid & FUSE_SET_ATTR_SIZE) {
267 if (fi) {
268 res = ftruncate(fi->fh, attr->st_size);
269 } else {
270 char procname[64];
271 sprintf(procname, "/proc/self/fd/%i", ifd);
272 res = truncate(procname, attr->st_size);
273 }
274 if (res == -1)
275 goto out_err;
276 }
277 if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
278 struct timespec tv[2];
279
280 tv[0].tv_sec = 0;
281 tv[1].tv_sec = 0;
282 tv[0].tv_nsec = UTIME_OMIT;
283 tv[1].tv_nsec = UTIME_OMIT;
284
285 if (valid & FUSE_SET_ATTR_ATIME_NOW)
286 tv[0].tv_nsec = UTIME_NOW;
287 else if (valid & FUSE_SET_ATTR_ATIME)
288 tv[0] = attr->st_atim;
289
290 if (valid & FUSE_SET_ATTR_MTIME_NOW)
291 tv[1].tv_nsec = UTIME_NOW;
292 else if (valid & FUSE_SET_ATTR_MTIME)
293 tv[1] = attr->st_mtim;
294
295 if (fi)
296 res = futimens(fi->fh, tv);
297 else {
298#ifdef HAVE_UTIMENSAT
299 res = utimensat_empty_nofollow(inode, tv);
300#else
301 res = -1;
302 errno = EOPNOTSUPP;
303#endif
304 }
305 if (res == -1)
306 goto out_err;
307 }
308 return sfs_getattr(req, ino, fi);
309
310out_err:
311 fuse_reply_err(req, errno);
312}
313
314
315static void sfs_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
316 int valid, fuse_file_info *fi) {
317 (void) ino;
318 do_setattr(req, ino, attr, valid, fi);
319}
320
321
322static int do_lookup(fuse_ino_t parent, const char *name,
323 fuse_entry_param *e) {
324 if (fs.debug)
325 cerr << "DEBUG: lookup(): name=" << name
326 << ", parent=" << parent << endl;
327 memset(e, 0, sizeof(*e));
328 e->attr_timeout = fs.timeout;
329 e->entry_timeout = fs.timeout;
330
331 auto newfd = openat(get_fs_fd(parent), name, O_PATH | O_NOFOLLOW);
332 if (newfd == -1)
333 return errno;
334
335 auto res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
336 if (res == -1) {
337 auto saveerr = errno;
338 close(newfd);
339 if (fs.debug)
340 cerr << "DEBUG: lookup(): fstatat failed" << endl;
341 return saveerr;
342 }
343
344 if (e->attr.st_dev != fs.src_dev) {
345 cerr << "WARNING: Mountpoints in the source directory tree will be hidden." << endl;
346 return ENOTSUP;
347 } else if (e->attr.st_ino == FUSE_ROOT_ID) {
348 cerr << "ERROR: Source directory tree must not include inode "
349 << FUSE_ROOT_ID << endl;
350 return EIO;
351 }
352
353 SrcId id {e->attr.st_ino, e->attr.st_dev};
354 unique_lock<mutex> fs_lock {fs.mutex};
355 Inode* inode_p;
356 try {
357 inode_p = &fs.inodes[id];
358 } catch (std::bad_alloc&) {
359 return ENOMEM;
360 }
361 e->ino = reinterpret_cast<fuse_ino_t>(inode_p);
362 Inode& inode {*inode_p};
363
364 if(inode.fd != -1) { // found existing inode
365 fs_lock.unlock();
366 if (fs.debug)
367 cerr << "DEBUG: lookup(): inode " << e->attr.st_ino
368 << " (userspace) already known." << endl;
369 lock_guard<mutex> g {inode.m};
370 inode.nlookup++;
371 close(newfd);
372 } else { // no existing inode
373 /* This is just here to make Helgrind happy. It violates the
374 lock ordering requirement (inode.m must be acquired before
375 fs.mutex), but this is of no consequence because at this
376 point no other thread has access to the inode mutex */
377 lock_guard<mutex> g {inode.m};
378 inode.src_ino = e->attr.st_ino;
379 inode.src_dev = e->attr.st_dev;
380 inode.is_symlink = S_ISLNK(e->attr.st_mode);
381 inode.nlookup = 1;
382 inode.fd = newfd;
383 fs_lock.unlock();
384
385 if (fs.debug)
386 cerr << "DEBUG: lookup(): created userspace inode " << e->attr.st_ino
387 << endl;
388 }
389
390 return 0;
391}
392
393
394static void sfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) {
395 fuse_entry_param e {};
396 auto err = do_lookup(parent, name, &e);
397 if (err == ENOENT) {
398 e.attr_timeout = fs.timeout;
399 e.entry_timeout = fs.timeout;
400 e.ino = e.attr.st_ino = 0;
401 fuse_reply_entry(req, &e);
402 } else if (err) {
403 if (err == ENFILE || err == EMFILE)
404 cerr << "ERROR: Reached maximum number of file descriptors." << endl;
405 fuse_reply_err(req, err);
406 } else {
407 fuse_reply_entry(req, &e);
408 }
409}
410
411
412static void mknod_symlink(fuse_req_t req, fuse_ino_t parent,
413 const char *name, mode_t mode, dev_t rdev,
414 const char *link) {
415 int res;
416 Inode& inode_p = get_inode(parent);
417 auto saverr = ENOMEM;
418
419 if (S_ISDIR(mode))
420 res = mkdirat(inode_p.fd, name, mode);
421 else if (S_ISLNK(mode))
422 res = symlinkat(link, inode_p.fd, name);
423 else
424 res = mknodat(inode_p.fd, name, mode, rdev);
425 saverr = errno;
426 if (res == -1)
427 goto out;
428
429 fuse_entry_param e;
430 saverr = do_lookup(parent, name, &e);
431 if (saverr)
432 goto out;
433
434 fuse_reply_entry(req, &e);
435 return;
436
437out:
438 if (saverr == ENFILE || saverr == EMFILE)
439 cerr << "ERROR: Reached maximum number of file descriptors." << endl;
440 fuse_reply_err(req, saverr);
441}
442
443
444static void sfs_mknod(fuse_req_t req, fuse_ino_t parent, const char *name,
445 mode_t mode, dev_t rdev) {
446 mknod_symlink(req, parent, name, mode, rdev, nullptr);
447}
448
449
450static void sfs_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
451 mode_t mode) {
452 mknod_symlink(req, parent, name, S_IFDIR | mode, 0, nullptr);
453}
454
455
456static void sfs_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
457 const char *name) {
458 mknod_symlink(req, parent, name, S_IFLNK, 0, link);
459}
460
461
462static int linkat_empty_nofollow(Inode& inode, int dfd, const char *name) {
463 if (inode.is_symlink) {
464 auto res = linkat(inode.fd, "", dfd, name, AT_EMPTY_PATH);
465 if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
466 /* Sorry, no race free way to hard-link a symlink. */
467 errno = EOPNOTSUPP;
468 }
469 return res;
470 }
471
472 char procname[64];
473 sprintf(procname, "/proc/self/fd/%i", inode.fd);
474 return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
475}
476
477
478static void sfs_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
479 const char *name) {
480 Inode& inode = get_inode(ino);
481 Inode& inode_p = get_inode(parent);
482 fuse_entry_param e {};
483
484 e.attr_timeout = fs.timeout;
485 e.entry_timeout = fs.timeout;
486
487 auto res = linkat_empty_nofollow(inode, inode_p.fd, name);
488 if (res == -1) {
489 fuse_reply_err(req, errno);
490 return;
491 }
492
493 res = fstatat(inode.fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
494 if (res == -1) {
495 fuse_reply_err(req, errno);
496 return;
497 }
498 e.ino = reinterpret_cast<fuse_ino_t>(&inode);
499 {
500 lock_guard<mutex> g {inode.m};
501 inode.nlookup++;
502 }
503
504 fuse_reply_entry(req, &e);
505 return;
506}
507
508
509static void sfs_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) {
510 Inode& inode_p = get_inode(parent);
511 lock_guard<mutex> g {inode_p.m};
512 auto res = unlinkat(inode_p.fd, name, AT_REMOVEDIR);
513 fuse_reply_err(req, res == -1 ? errno : 0);
514}
515
516
517static void sfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
518 fuse_ino_t newparent, const char *newname,
519 unsigned int flags) {
520 Inode& inode_p = get_inode(parent);
521 Inode& inode_np = get_inode(newparent);
522 if (flags) {
523 fuse_reply_err(req, EINVAL);
524 return;
525 }
526
527 auto res = renameat(inode_p.fd, name, inode_np.fd, newname);
528 fuse_reply_err(req, res == -1 ? errno : 0);
529}
530
531
532static void sfs_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) {
533 Inode& inode_p = get_inode(parent);
534 auto res = unlinkat(inode_p.fd, name, 0);
535 fuse_reply_err(req, res == -1 ? errno : 0);
536}
537
538
539static void forget_one(fuse_ino_t ino, uint64_t n) {
540 Inode& inode = get_inode(ino);
541 unique_lock<mutex> l {inode.m};
542
543 if(n > inode.nlookup) {
544 cerr << "INTERNAL ERROR: Negative lookup count for inode "
545 << inode.src_ino << endl;
546 abort();
547 }
548 inode.nlookup -= n;
549 if (!inode.nlookup) {
550 if (fs.debug)
551 cerr << "DEBUG: forget: cleaning up inode " << inode.src_ino << endl;
552 {
553 lock_guard<mutex> g_fs {fs.mutex};
554 l.unlock();
555 fs.inodes.erase({inode.src_ino, inode.src_dev});
556 }
557 } else if (fs.debug)
558 cerr << "DEBUG: forget: inode " << inode.src_ino
559 << " lookup count now " << inode.nlookup << endl;
560}
561
562static void sfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) {
563 forget_one(ino, nlookup);
564 fuse_reply_none(req);
565}
566
567
568static void sfs_forget_multi(fuse_req_t req, size_t count,
569 fuse_forget_data *forgets) {
570 for (int i = 0; i < count; i++)
571 forget_one(forgets[i].ino, forgets[i].nlookup);
572 fuse_reply_none(req);
573}
574
575
576static void sfs_readlink(fuse_req_t req, fuse_ino_t ino) {
577 Inode& inode = get_inode(ino);
578 char buf[PATH_MAX + 1];
579 auto res = readlinkat(inode.fd, "", buf, sizeof(buf));
580 if (res == -1)
581 fuse_reply_err(req, errno);
582 else if (res == sizeof(buf))
583 fuse_reply_err(req, ENAMETOOLONG);
584 else {
585 buf[res] = '\0';
586 fuse_reply_readlink(req, buf);
587 }
588}
589
590
591struct DirHandle {
592 DIR *dp {nullptr};
593 off_t offset;
594
595 DirHandle() = default;
596 DirHandle(const DirHandle&) = delete;
597 DirHandle& operator=(const DirHandle&) = delete;
598
599 ~DirHandle() {
600 if(dp)
601 closedir(dp);
602 }
603};
604
605
606static DirHandle *get_dir_handle(fuse_file_info *fi) {
607 return reinterpret_cast<DirHandle*>(fi->fh);
608}
609
610
611static void sfs_opendir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
612 Inode& inode = get_inode(ino);
613 auto d = new (nothrow) DirHandle;
614 if (d == nullptr) {
615 fuse_reply_err(req, ENOMEM);
616 return;
617 }
618
619 // Make Helgrind happy - it can't know that there's an implicit
620 // synchronization due to the fact that other threads cannot
621 // access d until we've called fuse_reply_*.
622 lock_guard<mutex> g {inode.m};
623
624 auto fd = openat(inode.fd, ".", O_RDONLY);
625 if (fd == -1)
626 goto out_errno;
627
628 // On success, dir stream takes ownership of fd, so we
629 // do not have to close it.
630 d->dp = fdopendir(fd);
631 if(d->dp == nullptr)
632 goto out_errno;
633
634 d->offset = 0;
635
636 fi->fh = reinterpret_cast<uint64_t>(d);
637 if(fs.timeout) {
638 fi->keep_cache = 1;
639 fi->cache_readdir = 1;
640 }
641 fuse_reply_open(req, fi);
642 return;
643
644out_errno:
645 auto error = errno;
646 delete d;
647 if (error == ENFILE || error == EMFILE)
648 cerr << "ERROR: Reached maximum number of file descriptors." << endl;
649 fuse_reply_err(req, error);
650}
651
652
653static bool is_dot_or_dotdot(const char *name) {
654 return name[0] == '.' &&
655 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
656}
657
658
659static void do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
660 off_t offset, fuse_file_info *fi, int plus) {
661 auto d = get_dir_handle(fi);
662 Inode& inode = get_inode(ino);
663 lock_guard<mutex> g {inode.m};
664 char *p;
665 auto rem = size;
666 int err = 0, count = 0;
667
668 if (fs.debug)
669 cerr << "DEBUG: readdir(): started with offset "
670 << offset << endl;
671
672 auto buf = new (nothrow) char[size];
673 if (!buf) {
674 fuse_reply_err(req, ENOMEM);
675 return;
676 }
677 p = buf;
678
679 if (offset != d->offset) {
680 if (fs.debug)
681 cerr << "DEBUG: readdir(): seeking to " << offset << endl;
682 seekdir(d->dp, offset);
683 d->offset = offset;
684 }
685
686 while (1) {
687 struct dirent *entry;
688 errno = 0;
689 entry = readdir(d->dp);
690 if (!entry) {
691 if(errno) {
692 err = errno;
693 if (fs.debug)
694 warn("DEBUG: readdir(): readdir failed with");
695 goto error;
696 }
697 break; // End of stream
698 }
699 d->offset = entry->d_off;
700 if (is_dot_or_dotdot(entry->d_name))
701 continue;
702
703 fuse_entry_param e{};
704 size_t entsize;
705 if(plus) {
706 err = do_lookup(ino, entry->d_name, &e);
707 if (err)
708 goto error;
709 entsize = fuse_add_direntry_plus(req, p, rem, entry->d_name, &e, entry->d_off);
710
711 if (entsize > rem) {
712 if (fs.debug)
713 cerr << "DEBUG: readdir(): buffer full, returning data. " << endl;
714 forget_one(e.ino, 1);
715 break;
716 }
717 } else {
718 e.attr.st_ino = entry->d_ino;
719 e.attr.st_mode = entry->d_type << 12;
720 entsize = fuse_add_direntry(req, p, rem, entry->d_name, &e.attr, entry->d_off);
721
722 if (entsize > rem) {
723 if (fs.debug)
724 cerr << "DEBUG: readdir(): buffer full, returning data. " << endl;
725 break;
726 }
727 }
728
729 p += entsize;
730 rem -= entsize;
731 count++;
732 if (fs.debug) {
733 cerr << "DEBUG: readdir(): added to buffer: " << entry->d_name
734 << ", ino " << e.attr.st_ino << ", offset " << entry->d_off << endl;
735 }
736 }
737 err = 0;
738error:
739
740 // If there's an error, we can only signal it if we haven't stored
741 // any entries yet - otherwise we'd end up with wrong lookup
742 // counts for the entries that are already in the buffer. So we
743 // return what we've collected until that point.
744 if (err && rem == size) {
745 if (err == ENFILE || err == EMFILE)
746 cerr << "ERROR: Reached maximum number of file descriptors." << endl;
747 fuse_reply_err(req, err);
748 } else {
749 if (fs.debug)
750 cerr << "DEBUG: readdir(): returning " << count
751 << " entries, curr offset " << d->offset << endl;
752 fuse_reply_buf(req, buf, size - rem);
753 }
754 delete[] buf;
755 return;
756}
757
758
759static void sfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
760 off_t offset, fuse_file_info *fi) {
761 // operation logging is done in readdir to reduce code duplication
762 do_readdir(req, ino, size, offset, fi, 0);
763}
764
765
766static void sfs_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
767 off_t offset, fuse_file_info *fi) {
768 // operation logging is done in readdir to reduce code duplication
769 do_readdir(req, ino, size, offset, fi, 1);
770}
771
772
773static void sfs_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
774 (void) ino;
775 auto d = get_dir_handle(fi);
776 delete d;
777 fuse_reply_err(req, 0);
778}
779
780
781static void sfs_create(fuse_req_t req, fuse_ino_t parent, const char *name,
782 mode_t mode, fuse_file_info *fi) {
783 Inode& inode_p = get_inode(parent);
784
785 auto fd = openat(inode_p.fd, name,
786 (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode);
787 if (fd == -1) {
788 auto err = errno;
789 if (err == ENFILE || err == EMFILE)
790 cerr << "ERROR: Reached maximum number of file descriptors." << endl;
791 fuse_reply_err(req, err);
792 return;
793 }
794
795 fi->fh = fd;
796 fuse_entry_param e;
797 auto err = do_lookup(parent, name, &e);
798 if (err) {
799 if (err == ENFILE || err == EMFILE)
800 cerr << "ERROR: Reached maximum number of file descriptors." << endl;
801 fuse_reply_err(req, err);
802 } else
803 fuse_reply_create(req, &e, fi);
804}
805
806
807static void sfs_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
808 fuse_file_info *fi) {
809 (void) ino;
810 int res;
811 int fd = dirfd(get_dir_handle(fi)->dp);
812 if (datasync)
813 res = fdatasync(fd);
814 else
815 res = fsync(fd);
816 fuse_reply_err(req, res == -1 ? errno : 0);
817}
818
819
820static void sfs_open(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
821 Inode& inode = get_inode(ino);
822
823 /* With writeback cache, kernel may send read requests even
824 when userspace opened write-only */
825 if (fs.timeout && (fi->flags & O_ACCMODE) == O_WRONLY) {
826 fi->flags &= ~O_ACCMODE;
827 fi->flags |= O_RDWR;
828 }
829
830 /* With writeback cache, O_APPEND is handled by the kernel. This
831 breaks atomicity (since the file may change in the underlying
832 filesystem, so that the kernel's idea of the end of the file
833 isn't accurate anymore). However, no process should modify the
834 file in the underlying filesystem once it has been read, so
835 this is not a problem. */
836 if (fs.timeout && fi->flags & O_APPEND)
837 fi->flags &= ~O_APPEND;
838
839 /* Unfortunately we cannot use inode.fd, because this was opened
840 with O_PATH (so it doesn't allow read/write access). */
841 char buf[64];
842 sprintf(buf, "/proc/self/fd/%i", inode.fd);
843 auto fd = open(buf, fi->flags & ~O_NOFOLLOW);
844 if (fd == -1) {
845 auto err = errno;
846 if (err == ENFILE || err == EMFILE)
847 cerr << "ERROR: Reached maximum number of file descriptors." << endl;
848 fuse_reply_err(req, err);
849 return;
850 }
851
852 fi->keep_cache = (fs.timeout != 0);
853 fi->fh = fd;
854 fuse_reply_open(req, fi);
855}
856
857
858static void sfs_release(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
859 (void) ino;
860 close(fi->fh);
861 fuse_reply_err(req, 0);
862}
863
864
865static void sfs_flush(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
866 (void) ino;
867 auto res = close(dup(fi->fh));
868 fuse_reply_err(req, res == -1 ? errno : 0);
869}
870
871
872static void sfs_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
873 fuse_file_info *fi) {
874 (void) ino;
875 int res;
876 if (datasync)
877 res = fdatasync(fi->fh);
878 else
879 res = fsync(fi->fh);
880 fuse_reply_err(req, res == -1 ? errno : 0);
881}
882
883
884static void do_read(fuse_req_t req, size_t size, off_t off, fuse_file_info *fi) {
885
886 fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
887 buf.buf[0].flags = static_cast<fuse_buf_flags>(
888 FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK);
889 buf.buf[0].fd = fi->fh;
890 buf.buf[0].pos = off;
891
892 fuse_reply_data(req, &buf, FUSE_BUF_COPY_FLAGS);
893}
894
895static void sfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
896 fuse_file_info *fi) {
897 (void) ino;
898 do_read(req, size, off, fi);
899}
900
901
902static void do_write_buf(fuse_req_t req, size_t size, off_t off,
903 fuse_bufvec *in_buf, fuse_file_info *fi) {
904 fuse_bufvec out_buf = FUSE_BUFVEC_INIT(size);
905 out_buf.buf[0].flags = static_cast<fuse_buf_flags>(
906 FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK);
907 out_buf.buf[0].fd = fi->fh;
908 out_buf.buf[0].pos = off;
909
910 auto res = fuse_buf_copy(&out_buf, in_buf, FUSE_BUF_COPY_FLAGS);
911 if (res < 0)
912 fuse_reply_err(req, -res);
913 else
914 fuse_reply_write(req, (size_t)res);
915}
916
917
918static void sfs_write_buf(fuse_req_t req, fuse_ino_t ino, fuse_bufvec *in_buf,
919 off_t off, fuse_file_info *fi) {
920 (void) ino;
921 auto size {fuse_buf_size(in_buf)};
922 do_write_buf(req, size, off, in_buf, fi);
923}
924
925
926static void sfs_statfs(fuse_req_t req, fuse_ino_t ino) {
927 struct statvfs stbuf;
928
929 auto res = fstatvfs(get_fs_fd(ino), &stbuf);
930 if (res == -1)
931 fuse_reply_err(req, errno);
932 else
933 fuse_reply_statfs(req, &stbuf);
934}
935
936
937#ifdef HAVE_POSIX_FALLOCATE
938static void sfs_fallocate(fuse_req_t req, fuse_ino_t ino, int mode,
939 off_t offset, off_t length, fuse_file_info *fi) {
940 (void) ino;
941 if (mode) {
942 fuse_reply_err(req, EOPNOTSUPP);
943 return;
944 }
945
946 auto err = posix_fallocate(fi->fh, offset, length);
947 fuse_reply_err(req, err);
948}
949#endif
950
951static void sfs_flock(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi,
952 int op) {
953 (void) ino;
954 auto res = flock(fi->fh, op);
955 fuse_reply_err(req, res == -1 ? errno : 0);
956}
957
958
959#ifdef HAVE_SETXATTR
960static void sfs_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
961 size_t size) {
962 char *value = nullptr;
963 Inode& inode = get_inode(ino);
964 ssize_t ret;
965 int saverr;
966
967 if (inode.is_symlink) {
968 /* Sorry, no race free way to getxattr on symlink. */
969 saverr = ENOTSUP;
970 goto out;
971 }
972
973 char procname[64];
974 sprintf(procname, "/proc/self/fd/%i", inode.fd);
975
976 if (size) {
977 value = new (nothrow) char[size];
978 if (value == nullptr) {
979 saverr = ENOMEM;
980 goto out;
981 }
982
983 ret = getxattr(procname, name, value, size);
984 if (ret == -1)
985 goto out_err;
986 saverr = 0;
987 if (ret == 0)
988 goto out;
989
990 fuse_reply_buf(req, value, ret);
991 } else {
992 ret = getxattr(procname, name, nullptr, 0);
993 if (ret == -1)
994 goto out_err;
995
996 fuse_reply_xattr(req, ret);
997 }
998out_free:
999 delete[] value;
1000 return;
1001
1002out_err:
1003 saverr = errno;
1004out:
1005 fuse_reply_err(req, saverr);
1006 goto out_free;
1007}
1008
1009
1010static void sfs_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) {
1011 char *value = nullptr;
1012 Inode& inode = get_inode(ino);
1013 ssize_t ret;
1014 int saverr;
1015
1016 if (inode.is_symlink) {
1017 /* Sorry, no race free way to listxattr on symlink. */
1018 saverr = ENOTSUP;
1019 goto out;
1020 }
1021
1022 char procname[64];
1023 sprintf(procname, "/proc/self/fd/%i", inode.fd);
1024
1025 if (size) {
1026 value = new (nothrow) char[size];
1027 if (value == nullptr) {
1028 saverr = ENOMEM;
1029 goto out;
1030 }
1031
1032 ret = listxattr(procname, value, size);
1033 if (ret == -1)
1034 goto out_err;
1035 saverr = 0;
1036 if (ret == 0)
1037 goto out;
1038
1039 fuse_reply_buf(req, value, ret);
1040 } else {
1041 ret = listxattr(procname, nullptr, 0);
1042 if (ret == -1)
1043 goto out_err;
1044
1045 fuse_reply_xattr(req, ret);
1046 }
1047out_free:
1048 delete[] value;
1049 return;
1050out_err:
1051 saverr = errno;
1052out:
1053 fuse_reply_err(req, saverr);
1054 goto out_free;
1055}
1056
1057
1058static void sfs_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
1059 const char *value, size_t size, int flags) {
1060 Inode& inode = get_inode(ino);
1061 ssize_t ret;
1062 int saverr;
1063
1064 if (inode.is_symlink) {
1065 /* Sorry, no race free way to setxattr on symlink. */
1066 saverr = ENOTSUP;
1067 goto out;
1068 }
1069
1070 char procname[64];
1071 sprintf(procname, "/proc/self/fd/%i", inode.fd);
1072
1073 ret = setxattr(procname, name, value, size, flags);
1074 saverr = ret == -1 ? errno : 0;
1075
1076out:
1077 fuse_reply_err(req, saverr);
1078}
1079
1080
1081static void sfs_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) {
1082 char procname[64];
1083 Inode& inode = get_inode(ino);
1084 ssize_t ret;
1085 int saverr;
1086
1087 if (inode.is_symlink) {
1088 /* Sorry, no race free way to setxattr on symlink. */
1089 saverr = ENOTSUP;
1090 goto out;
1091 }
1092
1093 sprintf(procname, "/proc/self/fd/%i", inode.fd);
1094 ret = removexattr(procname, name);
1095 saverr = ret == -1 ? errno : 0;
1096
1097out:
1098 fuse_reply_err(req, saverr);
1099}
1100#endif
1101
1102
1103static void assign_operations(fuse_lowlevel_ops &sfs_oper) {
1104 sfs_oper.init = sfs_init;
1105 sfs_oper.lookup = sfs_lookup;
1106 sfs_oper.mkdir = sfs_mkdir;
1107 sfs_oper.mknod = sfs_mknod;
1108 sfs_oper.symlink = sfs_symlink;
1109 sfs_oper.link = sfs_link;
1110 sfs_oper.unlink = sfs_unlink;
1111 sfs_oper.rmdir = sfs_rmdir;
1112 sfs_oper.rename = sfs_rename;
1113 sfs_oper.forget = sfs_forget;
1114 sfs_oper.forget_multi = sfs_forget_multi;
1115 sfs_oper.getattr = sfs_getattr;
1116 sfs_oper.setattr = sfs_setattr;
1117 sfs_oper.readlink = sfs_readlink;
1118 sfs_oper.opendir = sfs_opendir;
1119 sfs_oper.readdir = sfs_readdir;
1120 sfs_oper.readdirplus = sfs_readdirplus;
1121 sfs_oper.releasedir = sfs_releasedir;
1122 sfs_oper.fsyncdir = sfs_fsyncdir;
1123 sfs_oper.create = sfs_create;
1124 sfs_oper.open = sfs_open;
1125 sfs_oper.release = sfs_release;
1126 sfs_oper.flush = sfs_flush;
1127 sfs_oper.fsync = sfs_fsync;
1128 sfs_oper.read = sfs_read;
1129 sfs_oper.write_buf = sfs_write_buf;
1130 sfs_oper.statfs = sfs_statfs;
1131#ifdef HAVE_POSIX_FALLOCATE
1132 sfs_oper.fallocate = sfs_fallocate;
1133#endif
1134 sfs_oper.flock = sfs_flock;
1135#ifdef HAVE_SETXATTR
1136 sfs_oper.setxattr = sfs_setxattr;
1137 sfs_oper.getxattr = sfs_getxattr;
1138 sfs_oper.listxattr = sfs_listxattr;
1139 sfs_oper.removexattr = sfs_removexattr;
1140#endif
1141}
1142
1143static void print_usage(char *prog_name) {
1144 cout << "Usage: " << prog_name << " --help\n"
1145 << " " << prog_name << " [options] <source> <mountpoint>\n";
1146}
1147
1148static cxxopts::ParseResult parse_wrapper(cxxopts::Options& parser, int& argc, char**& argv) {
1149 try {
1150 return parser.parse(argc, argv);
1151 } catch (cxxopts::option_not_exists_exception& exc) {
1152 std::cout << argv[0] << ": " << exc.what() << std::endl;
1153 print_usage(argv[0]);
1154 exit(2);
1155 }
1156}
1157
1158
1159static cxxopts::ParseResult parse_options(int argc, char **argv) {
1160 cxxopts::Options opt_parser(argv[0]);
1161 opt_parser.add_options()
1162 ("debug", "Enable filesystem debug messages")
1163 ("debug-fuse", "Enable libfuse debug messages")
1164 ("help", "Print help")
1165 ("nocache", "Disable all caching")
1166 ("nosplice", "Do not use splice(2) to transfer data")
1167 ("single", "Run single-threaded");
1168
1169 // FIXME: Find a better way to limit the try clause to just
1170 // opt_parser.parse() (cf. https://github.com/jarro2783/cxxopts/issues/146)
1171 auto options = parse_wrapper(opt_parser, argc, argv);
1172
1173 if (options.count("help")) {
1174 print_usage(argv[0]);
1175 // Strip everything before the option list from the
1176 // default help string.
1177 auto help = opt_parser.help();
1178 std::cout << std::endl << "options:"
1179 << help.substr(help.find("\n\n") + 1, string::npos);
1180 exit(0);
1181
1182 } else if (argc != 3) {
1183 std::cout << argv[0] << ": invalid number of arguments\n";
1184 print_usage(argv[0]);
1185 exit(2);
1186 }
1187
1188 fs.debug = options.count("debug") != 0;
1189 fs.nosplice = options.count("nosplice") != 0;
1190 fs.source = std::string {realpath(argv[1], NULL)};
1191
1192 return options;
1193}
1194
1195
1196static void maximize_fd_limit() {
1197 struct rlimit lim {};
1198 auto res = getrlimit(RLIMIT_NOFILE, &lim);
1199 if (res != 0) {
1200 warn("WARNING: getrlimit() failed with");
1201 return;
1202 }
1203 lim.rlim_cur = lim.rlim_max;
1204 res = setrlimit(RLIMIT_NOFILE, &lim);
1205 if (res != 0)
1206 warn("WARNING: setrlimit() failed with");
1207}
1208
1209
1210int main(int argc, char *argv[]) {
1211
1212 // Parse command line options
1213 auto options {parse_options(argc, argv)};
1214
1215 // We need an fd for every dentry in our the filesystem that the
1216 // kernel knows about. This is way more than most processes need,
1217 // so try to get rid of any resource softlimit.
1218 maximize_fd_limit();
1219
1220 // Initialize filesystem root
1221 fs.root.fd = -1;
1222 fs.root.nlookup = 9999;
1223 fs.root.is_symlink = false;
1224 fs.timeout = options.count("nocache") ? 0 : 86400.0;
1225
1226 struct stat stat;
1227 auto ret = lstat(fs.source.c_str(), &stat);
1228 if (ret == -1)
1229 err(1, "ERROR: failed to stat source (\"%s\")", fs.source.c_str());
1230 if (!S_ISDIR(stat.st_mode))
1231 errx(1, "ERROR: source is not a directory");
1232 fs.src_dev = stat.st_dev;
1233
1234 fs.root.fd = open(fs.source.c_str(), O_PATH);
1235 if (fs.root.fd == -1)
1236 err(1, "ERROR: open(\"%s\", O_PATH)", fs.source.c_str());
1237
1238 // Initialize fuse
1239 fuse_args args = FUSE_ARGS_INIT(0, nullptr);
1240 if (fuse_opt_add_arg(&args, argv[0]) ||
1241 fuse_opt_add_arg(&args, "-o") ||
1242 fuse_opt_add_arg(&args, "default_permissions,fsname=hpps") ||
1243 (options.count("debug-fuse") && fuse_opt_add_arg(&args, "-odebug")))
1244 errx(3, "ERROR: Out of memory");
1245
1246 fuse_lowlevel_ops sfs_oper {};
1247 assign_operations(sfs_oper);
1248 auto se = fuse_session_new(&args, &sfs_oper, sizeof(sfs_oper), &fs);
1249 if (se == nullptr)
1250 goto err_out1;
1251
1252 if (fuse_set_signal_handlers(se) != 0)
1253 goto err_out2;
1254
1255 // Don't apply umask, use modes exactly as specified
1256 umask(0);
1257
1258 // Mount and run main loop
1259 struct fuse_loop_config loop_config;
1260 loop_config.clone_fd = 0;
1261 loop_config.max_idle_threads = 10;
1262 if (fuse_session_mount(se, argv[2]) != 0)
1263 goto err_out3;
1264 if (options.count("single"))
1265 ret = fuse_session_loop(se);
1266 else
1267 ret = fuse_session_loop_mt(se, &loop_config);
1268
1269 fuse_session_unmount(se);
1270
1271err_out3:
1272 fuse_remove_signal_handlers(se);
1273err_out2:
1274 fuse_session_destroy(se);
1275err_out1:
1276 fuse_opt_free_args(&args);
1277
1278 return ret ? 1 : 0;
1279}
1280