CRED: Fix regression in cap_capable() as shown up by sys_faccessat() [ver #3]

Fix a regression in cap_capable() due to:

	commit 3b11a1decef07c19443d24ae926982bc8ec9f4c0
	Author: David Howells <dhowells@redhat.com>
	Date:   Fri Nov 14 10:39:26 2008 +1100

	    CRED: Differentiate objective and effective subjective credentials on a task

The problem is that the above patch allows a process to have two sets of
credentials, and for the most part uses the subjective credentials when
accessing current's creds.

There is, however, one exception: cap_capable(), and thus capable(), uses the
real/objective credentials of the target task, whether or not it is the current
task.

Ordinarily this doesn't matter, since usually the two cred pointers in current
point to the same set of creds.  However, sys_faccessat() makes use of this
facility to override the credentials of the calling process to make its test,
without affecting the creds as seen from other processes.

One of the things sys_faccessat() does is to make an adjustment to the
effective capabilities mask, which cap_capable(), as it stands, then ignores.

The affected capability check is in generic_permission():

	if (!(mask & MAY_EXEC) || execute_ok(inode))
		if (capable(CAP_DAC_OVERRIDE))
			return 0;

This change passes the set of credentials to be tested down into the commoncap
and SELinux code.  The security functions called by capable() and
has_capability() select the appropriate set of credentials from the process
being checked.

This can be tested by compiling the following program from the XFS testsuite:

/*
 *  t_access_root.c - trivial test program to show permission bug.
 *
 *  Written by Michael Kerrisk - copyright ownership not pursued.
 *  Sourced from: http://linux.derkeiler.com/Mailing-Lists/Kernel/2003-10/6030.html
 */
#include <limits.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>

#define UID 500
#define GID 100
#define PERM 0
#define TESTPATH "/tmp/t_access"

static void
errExit(char *msg)
{
    perror(msg);
    exit(EXIT_FAILURE);
} /* errExit */

static void
accessTest(char *file, int mask, char *mstr)
{
    printf("access(%s, %s) returns %d\n", file, mstr, access(file, mask));
} /* accessTest */

int
main(int argc, char *argv[])
{
    int fd, perm, uid, gid;
    char *testpath;
    char cmd[PATH_MAX + 20];

    testpath = (argc > 1) ? argv[1] : TESTPATH;
    perm = (argc > 2) ? strtoul(argv[2], NULL, 8) : PERM;
    uid = (argc > 3) ? atoi(argv[3]) : UID;
    gid = (argc > 4) ? atoi(argv[4]) : GID;

    unlink(testpath);

    fd = open(testpath, O_RDWR | O_CREAT, 0);
    if (fd == -1) errExit("open");

    if (fchown(fd, uid, gid) == -1) errExit("fchown");
    if (fchmod(fd, perm) == -1) errExit("fchmod");
    close(fd);

    snprintf(cmd, sizeof(cmd), "ls -l %s", testpath);
    system(cmd);

    if (seteuid(uid) == -1) errExit("seteuid");

    accessTest(testpath, 0, "0");
    accessTest(testpath, R_OK, "R_OK");
    accessTest(testpath, W_OK, "W_OK");
    accessTest(testpath, X_OK, "X_OK");
    accessTest(testpath, R_OK | W_OK, "R_OK | W_OK");
    accessTest(testpath, R_OK | X_OK, "R_OK | X_OK");
    accessTest(testpath, W_OK | X_OK, "W_OK | X_OK");
    accessTest(testpath, R_OK | W_OK | X_OK, "R_OK | W_OK | X_OK");

    exit(EXIT_SUCCESS);
} /* main */

This can be run against an Ext3 filesystem as well as against an XFS
filesystem.  If successful, it will show:

	[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
	---------- 1 dhowells dhowells 0 2008-12-31 03:00 /tmp/xxx
	access(/tmp/xxx, 0) returns 0
	access(/tmp/xxx, R_OK) returns 0
	access(/tmp/xxx, W_OK) returns 0
	access(/tmp/xxx, X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK) returns 0
	access(/tmp/xxx, R_OK | X_OK) returns -1
	access(/tmp/xxx, W_OK | X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1

If unsuccessful, it will show:

	[root@andromeda src]# ./t_access_root /tmp/xxx 0 4043 4043
	---------- 1 dhowells dhowells 0 2008-12-31 02:56 /tmp/xxx
	access(/tmp/xxx, 0) returns 0
	access(/tmp/xxx, R_OK) returns -1
	access(/tmp/xxx, W_OK) returns -1
	access(/tmp/xxx, X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK) returns -1
	access(/tmp/xxx, R_OK | X_OK) returns -1
	access(/tmp/xxx, W_OK | X_OK) returns -1
	access(/tmp/xxx, R_OK | W_OK | X_OK) returns -1

I've also tested the fix with the SELinux and syscalls LTP testsuites.

Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: James Morris <jmorris@namei.org>
diff --git a/security/commoncap.c b/security/commoncap.c
index 7971354..f0e671d 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -45,26 +45,22 @@
 /**
  * cap_capable - Determine whether a task has a particular effective capability
  * @tsk: The task to query
+ * @cred: The credentials to use
  * @cap: The capability to check for
  * @audit: Whether to write an audit message or not
  *
  * Determine whether the nominated task has the specified capability amongst
  * its effective set, returning 0 if it does, -ve if it does not.
  *
- * NOTE WELL: cap_capable() cannot be used like the kernel's capable()
- * function.  That is, it has the reverse semantics: cap_capable() returns 0
- * when a task has a capability, but the kernel's capable() returns 1 for this
- * case.
+ * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable()
+ * and has_capability() functions.  That is, it has the reverse semantics:
+ * cap_has_capability() returns 0 when a task has a capability, but the
+ * kernel's capable() and has_capability() returns 1 for this case.
  */
-int cap_capable(struct task_struct *tsk, int cap, int audit)
+int cap_capable(struct task_struct *tsk, const struct cred *cred, int cap,
+		int audit)
 {
-	__u32 cap_raised;
-
-	/* Derived from include/linux/sched.h:capable. */
-	rcu_read_lock();
-	cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap);
-	rcu_read_unlock();
-	return cap_raised ? 0 : -EPERM;
+	return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
 }
 
 /**
@@ -160,7 +156,8 @@
 	/* they are so limited unless the current task has the CAP_SETPCAP
 	 * capability
 	 */
-	if (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) == 0)
+	if (cap_capable(current, current_cred(), CAP_SETPCAP,
+			SECURITY_CAP_AUDIT) == 0)
 		return 0;
 #endif
 	return 1;
@@ -869,7 +866,8 @@
 		     & (new->securebits ^ arg2))			/*[1]*/
 		    || ((new->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
-		    || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0) /*[4]*/
+		    || (cap_capable(current, current_cred(), CAP_SETPCAP,
+				    SECURITY_CAP_AUDIT) != 0)		/*[4]*/
 			/*
 			 * [1] no changing of bits that are locked
 			 * [2] no unlocking of locks
@@ -950,7 +948,8 @@
 {
 	int cap_sys_admin = 0;
 
-	if (cap_capable(current, CAP_SYS_ADMIN, SECURITY_CAP_NOAUDIT) == 0)
+	if (cap_capable(current, current_cred(), CAP_SYS_ADMIN,
+			SECURITY_CAP_NOAUDIT) == 0)
 		cap_sys_admin = 1;
 	return __vm_enough_memory(mm, pages, cap_sys_admin);
 }