kernel: Only expose su when daemon is running

It has been claimed that the PG implementation of 'su' has security
vulnerabilities even when disabled.  Unfortunately, the people that
find these vulnerabilities often like to keep them private so they
can profit from exploits while leaving users exposed to malicious
hackers.

In order to reduce the attack surface for vulnerabilites, it is
therefore necessary to make 'su' completely inaccessible when it
is not in use (except by the root and system users).
diff --git a/fs/exec.c b/fs/exec.c
index 48bc14f..96f5110 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1573,6 +1573,11 @@
 	if (retval < 0)
 		goto out;
 
+	if (d_is_su(file->f_dentry) && capable(CAP_SYS_ADMIN)) {
+		current->flags |= PF_SU;
+		su_exec();
+	}
+
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/namei.c b/fs/namei.c
index f9931ca..f6a4f75 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1822,6 +1822,14 @@
 		}
 	}
 
+	if (!err) {
+		struct super_block *sb = nd->inode->i_sb;
+		if (sb->s_flags & MS_RDONLY) {
+			if (d_is_su(nd->path.dentry) && !su_visible())
+				err = -ENOENT;
+		}
+	}
+
 	if (base)
 		fput(base);
 
diff --git a/fs/readdir.c b/fs/readdir.c
index cc0a822..5360540 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -47,6 +47,14 @@
 
 EXPORT_SYMBOL(vfs_readdir);
 
+static bool hide_name(const char *name, int namlen)
+{
+	if (namlen == 2 && !memcmp(name, "su", 2))
+		if (!su_visible())
+			return true;
+	return false;
+}
+
 /*
  * Traditional linux readdir() handling..
  *
@@ -68,6 +76,7 @@
 struct readdir_callback {
 	struct old_linux_dirent __user * dirent;
 	int result;
+	bool romnt;
 };
 
 static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
@@ -84,6 +93,8 @@
 		buf->result = -EOVERFLOW;
 		return -EOVERFLOW;
 	}
+	if (hide_name(name, namlen) && buf->romnt)
+		return 0;
 	buf->result++;
 	dirent = buf->dirent;
 	if (!access_ok(VERIFY_WRITE, dirent,
@@ -116,6 +127,7 @@
 
 	buf.result = 0;
 	buf.dirent = dirent;
+	buf.romnt = (file->f_path.dentry->d_sb->s_flags & MS_RDONLY);
 
 	error = vfs_readdir(file, fillonedir, &buf);
 	if (buf.result)
@@ -144,6 +156,7 @@
 	struct linux_dirent __user * previous;
 	int count;
 	int error;
+	bool romnt;
 };
 
 static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
@@ -163,6 +176,8 @@
 		buf->error = -EOVERFLOW;
 		return -EOVERFLOW;
 	}
+	if (hide_name(name, namlen) && buf->romnt)
+		return 0;
 	dirent = buf->previous;
 	if (dirent) {
 		if (__put_user(offset, &dirent->d_off))
@@ -210,6 +225,7 @@
 	buf.previous = NULL;
 	buf.count = count;
 	buf.error = 0;
+	buf.romnt = (file->f_path.dentry->d_sb->s_flags & MS_RDONLY);
 
 	error = vfs_readdir(file, filldir, &buf);
 	if (error >= 0)
@@ -231,6 +247,7 @@
 	struct linux_dirent64 __user * previous;
 	int count;
 	int error;
+	bool romnt;
 };
 
 static int filldir64(void * __buf, const char * name, int namlen, loff_t offset,
@@ -244,6 +261,8 @@
 	buf->error = -EINVAL;	/* only used if we fail.. */
 	if (reclen > buf->count)
 		return -EINVAL;
+	if (hide_name(name, namlen) && buf->romnt)
+		return 0;
 	dirent = buf->previous;
 	if (dirent) {
 		if (__put_user(offset, &dirent->d_off))
@@ -293,6 +312,7 @@
 	buf.previous = NULL;
 	buf.count = count;
 	buf.error = 0;
+	buf.romnt = (file->f_path.dentry->d_sb->s_flags & MS_RDONLY);
 
 	error = vfs_readdir(file, filldir64, &buf);
 	if (error >= 0)
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 12efdd0..322eb85 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -416,6 +416,13 @@
 
 extern void d_clear_need_lookup(struct dentry *dentry);
 
+static inline bool d_is_su(const struct dentry *dentry)
+{
+	return dentry &&
+	       dentry->d_name.len == 2 &&
+	       !memcmp(dentry->d_name.name, "su", 2);
+}
+
 extern int sysctl_vfs_cache_pressure;
 
 struct name_snapshot {
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e16976a..1a4422d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -93,6 +93,12 @@
 
 #include <asm/processor.h>
 
+int  su_instances(void);
+bool su_running(void);
+bool su_visible(void);
+void su_exec(void);
+void su_exit(void);
+
 struct exec_domain;
 struct futex_pi_state;
 struct robust_list_head;
@@ -1848,6 +1854,8 @@
 #define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
 #define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
 
+#define PF_SU		0x80000000      /* task is su */
+
 /*
  * Only the _current_ task can read/write to tsk->flags, but other
  * tasks can access tsk->flags in readonly mode for example
diff --git a/kernel/exit.c b/kernel/exit.c
index f38ee31..56ad8c1 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -949,6 +949,11 @@
 	}
 
 	exit_signals(tsk);  /* sets PF_EXITING */
+
+	if (tsk->flags & PF_SU) {
+		su_exit();
+	}
+
 	/*
 	 * tsk->flags are checked in the futex code to protect against
 	 * an exiting task cleaning up the robust pi futexes.
diff --git a/kernel/fork.c b/kernel/fork.c
index 4c24e3a..096b77b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -295,6 +295,8 @@
 	if (err)
 		goto out;
 
+	tsk->flags &= ~PF_SU;
+
 	tsk->stack = ti;
 #ifdef CONFIG_SECCOMP
 	/*
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f8a1cbe..6947339 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -88,6 +88,38 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
+static atomic_t __su_instances;
+
+int su_instances(void)
+{
+	return atomic_read(&__su_instances);
+}
+
+bool su_running(void)
+{
+	return su_instances() > 0;
+}
+
+bool su_visible(void)
+{
+	uid_t uid = current_uid();
+	if (su_running())
+		return true;
+	if (uid == 0 || uid == 1000)
+		return true;
+	return false;
+}
+
+void su_exec(void)
+{
+	atomic_inc(&__su_instances);
+}
+
+void su_exit(void)
+{
+	atomic_dec(&__su_instances);
+}
+
 ATOMIC_NOTIFIER_HEAD(migration_notifier_head);
 
 void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)