blob: 5b1547a452d8cdee92e8e1bd3463fdc3af2f4dcc [file] [log] [blame]
Steven Rostedt (Red Hat)4282d602015-01-20 11:36:55 -05001/*
2 * inode.c - part of tracefs, a pseudo file system for activating tracing
3 *
4 * Based on debugfs by: Greg Kroah-Hartman <greg@kroah.com>
5 *
6 * Copyright (C) 2014 Red Hat Inc, author: Steven Rostedt <srostedt@redhat.com>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License version
10 * 2 as published by the Free Software Foundation.
11 *
12 * tracefs is the file system that is used by the tracing infrastructure.
13 *
14 */
15
16#include <linux/module.h>
17#include <linux/fs.h>
18#include <linux/mount.h>
19#include <linux/namei.h>
20#include <linux/tracefs.h>
21#include <linux/fsnotify.h>
22#include <linux/seq_file.h>
23#include <linux/parser.h>
24#include <linux/magic.h>
25#include <linux/slab.h>
26
27#define TRACEFS_DEFAULT_MODE 0700
28
29static struct vfsmount *tracefs_mount;
30static int tracefs_mount_count;
31static bool tracefs_registered;
32
33static ssize_t default_read_file(struct file *file, char __user *buf,
34 size_t count, loff_t *ppos)
35{
36 return 0;
37}
38
39static ssize_t default_write_file(struct file *file, const char __user *buf,
40 size_t count, loff_t *ppos)
41{
42 return count;
43}
44
45static const struct file_operations tracefs_file_operations = {
46 .read = default_read_file,
47 .write = default_write_file,
48 .open = simple_open,
49 .llseek = noop_llseek,
50};
51
52static struct inode *tracefs_get_inode(struct super_block *sb)
53{
54 struct inode *inode = new_inode(sb);
55 if (inode) {
56 inode->i_ino = get_next_ino();
57 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
58 }
59 return inode;
60}
61
62struct tracefs_mount_opts {
63 kuid_t uid;
64 kgid_t gid;
65 umode_t mode;
66};
67
68enum {
69 Opt_uid,
70 Opt_gid,
71 Opt_mode,
72 Opt_err
73};
74
75static const match_table_t tokens = {
76 {Opt_uid, "uid=%u"},
77 {Opt_gid, "gid=%u"},
78 {Opt_mode, "mode=%o"},
79 {Opt_err, NULL}
80};
81
82struct tracefs_fs_info {
83 struct tracefs_mount_opts mount_opts;
84};
85
86static int tracefs_parse_options(char *data, struct tracefs_mount_opts *opts)
87{
88 substring_t args[MAX_OPT_ARGS];
89 int option;
90 int token;
91 kuid_t uid;
92 kgid_t gid;
93 char *p;
94
95 opts->mode = TRACEFS_DEFAULT_MODE;
96
97 while ((p = strsep(&data, ",")) != NULL) {
98 if (!*p)
99 continue;
100
101 token = match_token(p, tokens, args);
102 switch (token) {
103 case Opt_uid:
104 if (match_int(&args[0], &option))
105 return -EINVAL;
106 uid = make_kuid(current_user_ns(), option);
107 if (!uid_valid(uid))
108 return -EINVAL;
109 opts->uid = uid;
110 break;
111 case Opt_gid:
112 if (match_int(&args[0], &option))
113 return -EINVAL;
114 gid = make_kgid(current_user_ns(), option);
115 if (!gid_valid(gid))
116 return -EINVAL;
117 opts->gid = gid;
118 break;
119 case Opt_mode:
120 if (match_octal(&args[0], &option))
121 return -EINVAL;
122 opts->mode = option & S_IALLUGO;
123 break;
124 /*
125 * We might like to report bad mount options here;
126 * but traditionally tracefs has ignored all mount options
127 */
128 }
129 }
130
131 return 0;
132}
133
134static int tracefs_apply_options(struct super_block *sb)
135{
136 struct tracefs_fs_info *fsi = sb->s_fs_info;
137 struct inode *inode = sb->s_root->d_inode;
138 struct tracefs_mount_opts *opts = &fsi->mount_opts;
139
140 inode->i_mode &= ~S_IALLUGO;
141 inode->i_mode |= opts->mode;
142
143 inode->i_uid = opts->uid;
144 inode->i_gid = opts->gid;
145
146 return 0;
147}
148
149static int tracefs_remount(struct super_block *sb, int *flags, char *data)
150{
151 int err;
152 struct tracefs_fs_info *fsi = sb->s_fs_info;
153
154 sync_filesystem(sb);
155 err = tracefs_parse_options(data, &fsi->mount_opts);
156 if (err)
157 goto fail;
158
159 tracefs_apply_options(sb);
160
161fail:
162 return err;
163}
164
165static int tracefs_show_options(struct seq_file *m, struct dentry *root)
166{
167 struct tracefs_fs_info *fsi = root->d_sb->s_fs_info;
168 struct tracefs_mount_opts *opts = &fsi->mount_opts;
169
170 if (!uid_eq(opts->uid, GLOBAL_ROOT_UID))
171 seq_printf(m, ",uid=%u",
172 from_kuid_munged(&init_user_ns, opts->uid));
173 if (!gid_eq(opts->gid, GLOBAL_ROOT_GID))
174 seq_printf(m, ",gid=%u",
175 from_kgid_munged(&init_user_ns, opts->gid));
176 if (opts->mode != TRACEFS_DEFAULT_MODE)
177 seq_printf(m, ",mode=%o", opts->mode);
178
179 return 0;
180}
181
182static const struct super_operations tracefs_super_operations = {
183 .statfs = simple_statfs,
184 .remount_fs = tracefs_remount,
185 .show_options = tracefs_show_options,
186};
187
188static int trace_fill_super(struct super_block *sb, void *data, int silent)
189{
190 static struct tree_descr trace_files[] = {{""}};
191 struct tracefs_fs_info *fsi;
192 int err;
193
194 save_mount_options(sb, data);
195
196 fsi = kzalloc(sizeof(struct tracefs_fs_info), GFP_KERNEL);
197 sb->s_fs_info = fsi;
198 if (!fsi) {
199 err = -ENOMEM;
200 goto fail;
201 }
202
203 err = tracefs_parse_options(data, &fsi->mount_opts);
204 if (err)
205 goto fail;
206
207 err = simple_fill_super(sb, TRACEFS_MAGIC, trace_files);
208 if (err)
209 goto fail;
210
211 sb->s_op = &tracefs_super_operations;
212
213 tracefs_apply_options(sb);
214
215 return 0;
216
217fail:
218 kfree(fsi);
219 sb->s_fs_info = NULL;
220 return err;
221}
222
223static struct dentry *trace_mount(struct file_system_type *fs_type,
224 int flags, const char *dev_name,
225 void *data)
226{
227 return mount_single(fs_type, flags, data, trace_fill_super);
228}
229
230static struct file_system_type trace_fs_type = {
231 .owner = THIS_MODULE,
232 .name = "tracefs",
233 .mount = trace_mount,
234 .kill_sb = kill_litter_super,
235};
236MODULE_ALIAS_FS("tracefs");
237
238static struct dentry *start_creating(const char *name, struct dentry *parent)
239{
240 struct dentry *dentry;
241 int error;
242
243 pr_debug("tracefs: creating file '%s'\n",name);
244
245 error = simple_pin_fs(&trace_fs_type, &tracefs_mount,
246 &tracefs_mount_count);
247 if (error)
248 return ERR_PTR(error);
249
250 /* If the parent is not specified, we create it in the root.
251 * We need the root dentry to do this, which is in the super
252 * block. A pointer to that is in the struct vfsmount that we
253 * have around.
254 */
255 if (!parent)
256 parent = tracefs_mount->mnt_root;
257
258 mutex_lock(&parent->d_inode->i_mutex);
259 dentry = lookup_one_len(name, parent, strlen(name));
260 if (!IS_ERR(dentry) && dentry->d_inode) {
261 dput(dentry);
262 dentry = ERR_PTR(-EEXIST);
263 }
264 if (IS_ERR(dentry))
265 mutex_unlock(&parent->d_inode->i_mutex);
266 return dentry;
267}
268
269static struct dentry *failed_creating(struct dentry *dentry)
270{
271 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
272 dput(dentry);
273 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
274 return NULL;
275}
276
277static struct dentry *end_creating(struct dentry *dentry)
278{
279 mutex_unlock(&dentry->d_parent->d_inode->i_mutex);
280 return dentry;
281}
282
283/**
284 * tracefs_create_file - create a file in the tracefs filesystem
285 * @name: a pointer to a string containing the name of the file to create.
286 * @mode: the permission that the file should have.
287 * @parent: a pointer to the parent dentry for this file. This should be a
288 * directory dentry if set. If this parameter is NULL, then the
289 * file will be created in the root of the tracefs filesystem.
290 * @data: a pointer to something that the caller will want to get to later
291 * on. The inode.i_private pointer will point to this value on
292 * the open() call.
293 * @fops: a pointer to a struct file_operations that should be used for
294 * this file.
295 *
296 * This is the basic "create a file" function for tracefs. It allows for a
297 * wide range of flexibility in creating a file, or a directory (if you want
298 * to create a directory, the tracefs_create_dir() function is
299 * recommended to be used instead.)
300 *
301 * This function will return a pointer to a dentry if it succeeds. This
302 * pointer must be passed to the tracefs_remove() function when the file is
303 * to be removed (no automatic cleanup happens if your module is unloaded,
304 * you are responsible here.) If an error occurs, %NULL will be returned.
305 *
306 * If tracefs is not enabled in the kernel, the value -%ENODEV will be
307 * returned.
308 */
309struct dentry *tracefs_create_file(const char *name, umode_t mode,
310 struct dentry *parent, void *data,
311 const struct file_operations *fops)
312{
313 struct dentry *dentry;
314 struct inode *inode;
315
316 if (!(mode & S_IFMT))
317 mode |= S_IFREG;
318 BUG_ON(!S_ISREG(mode));
319 dentry = start_creating(name, parent);
320
321 if (IS_ERR(dentry))
322 return NULL;
323
324 inode = tracefs_get_inode(dentry->d_sb);
325 if (unlikely(!inode))
326 return failed_creating(dentry);
327
328 inode->i_mode = mode;
329 inode->i_fop = fops ? fops : &tracefs_file_operations;
330 inode->i_private = data;
331 d_instantiate(dentry, inode);
332 fsnotify_create(dentry->d_parent->d_inode, dentry);
333 return end_creating(dentry);
334}
335
336/**
337 * tracefs_create_dir - create a directory in the tracefs filesystem
338 * @name: a pointer to a string containing the name of the directory to
339 * create.
340 * @parent: a pointer to the parent dentry for this file. This should be a
341 * directory dentry if set. If this parameter is NULL, then the
342 * directory will be created in the root of the tracefs filesystem.
343 *
344 * This function creates a directory in tracefs with the given name.
345 *
346 * This function will return a pointer to a dentry if it succeeds. This
347 * pointer must be passed to the tracefs_remove() function when the file is
348 * to be removed. If an error occurs, %NULL will be returned.
349 *
350 * If tracing is not enabled in the kernel, the value -%ENODEV will be
351 * returned.
352 */
353struct dentry *tracefs_create_dir(const char *name, struct dentry *parent)
354{
355 struct dentry *dentry = start_creating(name, parent);
356 struct inode *inode;
357
358 if (IS_ERR(dentry))
359 return NULL;
360
361 inode = tracefs_get_inode(dentry->d_sb);
362 if (unlikely(!inode))
363 return failed_creating(dentry);
364
365 inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
366 inode->i_op = &simple_dir_inode_operations;
367 inode->i_fop = &simple_dir_operations;
368
369 /* directory inodes start off with i_nlink == 2 (for "." entry) */
370 inc_nlink(inode);
371 d_instantiate(dentry, inode);
372 inc_nlink(dentry->d_parent->d_inode);
373 fsnotify_mkdir(dentry->d_parent->d_inode, dentry);
374 return end_creating(dentry);
375}
376
377static inline int tracefs_positive(struct dentry *dentry)
378{
379 return dentry->d_inode && !d_unhashed(dentry);
380}
381
382static int __tracefs_remove(struct dentry *dentry, struct dentry *parent)
383{
384 int ret = 0;
385
386 if (tracefs_positive(dentry)) {
387 if (dentry->d_inode) {
388 dget(dentry);
389 switch (dentry->d_inode->i_mode & S_IFMT) {
390 case S_IFDIR:
391 ret = simple_rmdir(parent->d_inode, dentry);
392 break;
393 default:
394 simple_unlink(parent->d_inode, dentry);
395 break;
396 }
397 if (!ret)
398 d_delete(dentry);
399 dput(dentry);
400 }
401 }
402 return ret;
403}
404
405/**
406 * tracefs_remove - removes a file or directory from the tracefs filesystem
407 * @dentry: a pointer to a the dentry of the file or directory to be
408 * removed.
409 *
410 * This function removes a file or directory in tracefs that was previously
411 * created with a call to another tracefs function (like
412 * tracefs_create_file() or variants thereof.)
413 */
414void tracefs_remove(struct dentry *dentry)
415{
416 struct dentry *parent;
417 int ret;
418
419 if (IS_ERR_OR_NULL(dentry))
420 return;
421
422 parent = dentry->d_parent;
423 if (!parent || !parent->d_inode)
424 return;
425
426 mutex_lock(&parent->d_inode->i_mutex);
427 ret = __tracefs_remove(dentry, parent);
428 mutex_unlock(&parent->d_inode->i_mutex);
429 if (!ret)
430 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
431}
432
433/**
434 * tracefs_remove_recursive - recursively removes a directory
435 * @dentry: a pointer to a the dentry of the directory to be removed.
436 *
437 * This function recursively removes a directory tree in tracefs that
438 * was previously created with a call to another tracefs function
439 * (like tracefs_create_file() or variants thereof.)
440 */
441void tracefs_remove_recursive(struct dentry *dentry)
442{
443 struct dentry *child, *parent;
444
445 if (IS_ERR_OR_NULL(dentry))
446 return;
447
448 parent = dentry->d_parent;
449 if (!parent || !parent->d_inode)
450 return;
451
452 parent = dentry;
453 down:
454 mutex_lock(&parent->d_inode->i_mutex);
455 loop:
456 /*
457 * The parent->d_subdirs is protected by the d_lock. Outside that
458 * lock, the child can be unlinked and set to be freed which can
459 * use the d_u.d_child as the rcu head and corrupt this list.
460 */
461 spin_lock(&parent->d_lock);
462 list_for_each_entry(child, &parent->d_subdirs, d_child) {
463 if (!tracefs_positive(child))
464 continue;
465
466 /* perhaps simple_empty(child) makes more sense */
467 if (!list_empty(&child->d_subdirs)) {
468 spin_unlock(&parent->d_lock);
469 mutex_unlock(&parent->d_inode->i_mutex);
470 parent = child;
471 goto down;
472 }
473
474 spin_unlock(&parent->d_lock);
475
476 if (!__tracefs_remove(child, parent))
477 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
478
479 /*
480 * The parent->d_lock protects agaist child from unlinking
481 * from d_subdirs. When releasing the parent->d_lock we can
482 * no longer trust that the next pointer is valid.
483 * Restart the loop. We'll skip this one with the
484 * tracefs_positive() check.
485 */
486 goto loop;
487 }
488 spin_unlock(&parent->d_lock);
489
490 mutex_unlock(&parent->d_inode->i_mutex);
491 child = parent;
492 parent = parent->d_parent;
493 mutex_lock(&parent->d_inode->i_mutex);
494
495 if (child != dentry)
496 /* go up */
497 goto loop;
498
499 if (!__tracefs_remove(child, parent))
500 simple_release_fs(&tracefs_mount, &tracefs_mount_count);
501 mutex_unlock(&parent->d_inode->i_mutex);
502}
503
504/**
505 * tracefs_initialized - Tells whether tracefs has been registered
506 */
507bool tracefs_initialized(void)
508{
509 return tracefs_registered;
510}
511
512static int __init tracefs_init(void)
513{
514 int retval;
515
516 retval = register_filesystem(&trace_fs_type);
517 if (!retval)
518 tracefs_registered = true;
519
520 return retval;
521}
522core_initcall(tracefs_init);