App data directory isolation

- During Zygote fork (before setuid), Zygote will create a tmpfs overlay
(mount namespace) on its DE and CE directories, so app process cannot
access the actual DE CE directory anymore.

- In the overlay tmpfs directory, zygote will create its app and
whitelisted app data directories.

- Bind mount (namespace) the mirror data directory to the directories in
tmpfs overlay.

- When CE storage is ready, ask installd to prepare CE storage's data mirror.

Bug: 143937733
Test: Test app shows it cannot access other apps data directory anymore
Test: Test app shows it can access whitelisted app / same uid app data
directory.
Change-Id: I64e06c1ffd962a7134a176aad33c06b5f661f7cd
diff --git a/core/jni/com_android_internal_os_Zygote.cpp b/core/jni/com_android_internal_os_Zygote.cpp
index b32acde..a371c54 100644
--- a/core/jni/com_android_internal_os_Zygote.cpp
+++ b/core/jni/com_android_internal_os_Zygote.cpp
@@ -31,6 +31,8 @@
 // sys/mount.h has to come before linux/fs.h due to redefinition of MS_RDONLY, MS_BIND, etc
 #include <sys/mount.h>
 #include <linux/fs.h>
+#include <sys/types.h>
+#include <dirent.h>
 
 #include <array>
 #include <atomic>
@@ -40,6 +42,7 @@
 #include <sstream>
 #include <string>
 #include <string_view>
+#include <unordered_set>
 
 #include <android/fdsan.h>
 #include <arpa/inet.h>
@@ -51,6 +54,7 @@
 #include <mntent.h>
 #include <paths.h>
 #include <signal.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <sys/capability.h>
 #include <sys/cdefs.h>
@@ -158,6 +162,17 @@
  */
 static int gUsapPoolEventFD = -1;
 
+static constexpr int DEFAULT_DATA_DIR_PERMISSION = 0751;
+
+/**
+ * Property to control if app data isolation is enabled.
+ */
+static const std::string ANDROID_APP_DATA_ISOLATION_ENABLED_PROPERTY =
+    "persist.zygote.app_data_isolation";
+
+static constexpr const uint64_t UPPER_HALF_WORD_MASK = 0xFFFF'FFFF'0000'0000;
+static constexpr const uint64_t LOWER_HALF_WORD_MASK = 0x0000'0000'FFFF'FFFF;
+
 /**
  * The maximum value that the gUSAPPoolSizeMax variable may take.  This value
  * is a mirror of ZygoteServer.USAP_POOL_SIZE_MAX_LIMIT
@@ -662,7 +677,7 @@
   return 0;
 }
 
-static void CreateDir(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
+static void PrepareDir(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
                       fail_fn_t fail_fn) {
   if (fs_prepare_dir(dir.c_str(), mode, uid, gid) != 0) {
     fail_fn(CREATE_ERROR("fs_prepare_dir failed on %s: %s",
@@ -670,6 +685,16 @@
   }
 }
 
+static void PrepareDirIfNotPresent(const std::string& dir, mode_t mode, uid_t uid, gid_t gid,
+                      fail_fn_t fail_fn) {
+  struct stat sb;
+  if (TEMP_FAILURE_RETRY(stat(dir.c_str(), &sb)) != -1) {
+    // Directory exists already
+    return;
+  }
+  PrepareDir(dir, mode, uid, gid, fail_fn);
+}
+
 static void BindMount(const std::string& source_dir, const std::string& target_dir,
                       fail_fn_t fail_fn) {
   if (TEMP_FAILURE_RETRY(mount(source_dir.c_str(), target_dir.c_str(), nullptr,
@@ -679,6 +704,15 @@
   }
 }
 
+static void MountAppDataTmpFs(const std::string& target_dir,
+                      fail_fn_t fail_fn) {
+  if (TEMP_FAILURE_RETRY(mount("tmpfs", target_dir.c_str(), "tmpfs",
+                               MS_NOSUID | MS_NODEV | MS_NOEXEC, "uid=0,gid=0,mode=0751")) == -1) {
+    fail_fn(CREATE_ERROR("Failed to mount tmpfs to %s: %s",
+                         target_dir.c_str(), strerror(errno)));
+  }
+}
+
 // Create a private mount namespace and bind mount appropriate emulated
 // storage for the given user.
 static void MountEmulatedStorage(uid_t uid, jint mount_mode,
@@ -711,7 +745,7 @@
   const std::string pass_through_source = StringPrintf("/mnt/pass_through/%d", user_id);
   bool isFuse = GetBoolProperty(kPropFuse, false);
 
-  CreateDir(user_source, 0751, AID_ROOT, AID_ROOT, fail_fn);
+  PrepareDir(user_source, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
 
   if (isFuse) {
     BindMount(mount_mode == MOUNT_EXTERNAL_PASS_THROUGH ? pass_through_source : user_source,
@@ -1007,6 +1041,231 @@
   return pid;
 }
 
+// Create an app data directory over tmpfs overlayed CE / DE storage, and bind mount it
+// from the actual app data directory in data mirror.
+static void createAndMountAppData(std::string_view package_name,
+    std::string_view mirror_pkg_dir_name, std::string_view mirror_data_path,
+    std::string_view actual_data_path, fail_fn_t fail_fn) {
+
+  char mirrorAppDataPath[PATH_MAX];
+  char actualAppDataPath[PATH_MAX];
+  snprintf(mirrorAppDataPath, PATH_MAX, "%s/%s", mirror_data_path.data(),
+      mirror_pkg_dir_name.data());
+  snprintf(actualAppDataPath, PATH_MAX, "%s/%s", actual_data_path.data(), package_name.data());
+
+  PrepareDir(actualAppDataPath, 0700, AID_ROOT, AID_ROOT, fail_fn);
+
+  // Bind mount from original app data directory in mirror.
+  BindMount(mirrorAppDataPath, actualAppDataPath, fail_fn);
+}
+
+// Get the directory name stored in /data/data. If device is unlocked it should be the same as
+// package name, otherwise it will be an encrypted name but with same inode number.
+static std::string getAppDataDirName(std::string_view parent_path, std::string_view package_name,
+      long long ce_data_inode, fail_fn_t fail_fn) {
+  // Check if directory exists
+  char tmpPath[PATH_MAX];
+  snprintf(tmpPath, PATH_MAX, "%s/%s", parent_path.data(), package_name.data());
+  struct stat s;
+  int err = stat(tmpPath, &s);
+  if (err == 0) {
+    // Directory exists, so return the directory name
+    return package_name.data();
+  } else {
+    if (errno != ENOENT) {
+      fail_fn(CREATE_ERROR("Unexpected error in getAppDataDirName: %s", strerror(errno)));
+      return nullptr;
+    }
+    // Directory doesn't exist, try to search the name from inode
+    DIR* dir = opendir(parent_path.data());
+    if (dir == nullptr) {
+      fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
+    }
+    struct dirent* ent;
+    while ((ent = readdir(dir))) {
+      if (ent->d_ino == ce_data_inode) {
+        closedir(dir);
+        return ent->d_name;
+      }
+    }
+    closedir(dir);
+
+    // Fallback due to b/145989852, ce_data_inode stored in package manager may be corrupted
+    // if ino_t is 32 bits.
+    ino_t fixed_ce_data_inode = 0;
+    if ((ce_data_inode & UPPER_HALF_WORD_MASK) == UPPER_HALF_WORD_MASK) {
+      fixed_ce_data_inode = ce_data_inode & LOWER_HALF_WORD_MASK;
+    } else if ((ce_data_inode & LOWER_HALF_WORD_MASK) == LOWER_HALF_WORD_MASK) {
+      fixed_ce_data_inode = ((ce_data_inode >> 32) & LOWER_HALF_WORD_MASK);
+    }
+    if (fixed_ce_data_inode != 0) {
+      dir = opendir(parent_path.data());
+      if (dir == nullptr) {
+        fail_fn(CREATE_ERROR("Failed to opendir %s", parent_path.data()));
+      }
+      while ((ent = readdir(dir))) {
+        if (ent->d_ino == fixed_ce_data_inode) {
+          long long d_ino = ent->d_ino;
+          ALOGW("Fallback success inode %lld -> %lld", ce_data_inode, d_ino);
+          closedir(dir);
+          return ent->d_name;
+        }
+      }
+      closedir(dir);
+    }
+    // Fallback done
+
+    fail_fn(CREATE_ERROR("Unable to find %s:%lld in %s", package_name.data(),
+        ce_data_inode, parent_path.data()));
+    return nullptr;
+  }
+}
+
+// Isolate app's data directory, by mounting a tmpfs on CE DE storage,
+// and create and bind mount app data in related_packages.
+static void isolateAppDataPerPackage(int userId, std::string_view package_name,
+    std::string_view volume_uuid, long long ce_data_inode, std::string_view actualCePath,
+    std::string_view actualDePath, fail_fn_t fail_fn) {
+
+  char mirrorCePath[PATH_MAX];
+  char mirrorDePath[PATH_MAX];
+  char mirrorCeParent[PATH_MAX];
+  snprintf(mirrorCeParent, PATH_MAX, "/data_mirror/data_ce/%s", volume_uuid.data());
+  snprintf(mirrorCePath, PATH_MAX, "%s/%d", mirrorCeParent, userId);
+  snprintf(mirrorDePath, PATH_MAX, "/data_mirror/data_de/%s/%d", volume_uuid.data(), userId);
+
+  createAndMountAppData(package_name, package_name, mirrorDePath, actualDePath, fail_fn);
+
+  std::string ce_data_path = getAppDataDirName(mirrorCePath, package_name, ce_data_inode, fail_fn);
+  createAndMountAppData(package_name, ce_data_path, mirrorCePath, actualCePath, fail_fn);
+}
+
+/**
+ * Make other apps data directory not visible in CE, DE storage.
+ *
+ * Apps without app data isolation can detect if another app is installed on system,
+ * by "touching" other apps data directory like /data/data/com.whatsapp, if it returns
+ * "Permission denied" it means apps installed, otherwise it returns "File not found".
+ * Traditional file permissions or SELinux can only block accessing those directories but
+ * can't fix fingerprinting like this.
+ * We fix it by "overlaying" data directory, and only relevant app data packages exists
+ * in data directories.
+ *
+ * Steps:
+ * 1). Collect a list of all related apps (apps with same uid and whitelisted apps) data info
+ * (package name, data stored volume uuid, and inode number of its CE data directory)
+ * 2). Mount tmpfs on /data/data, /data/user(_de) and /mnt/expand, so apps no longer
+ * able to access apps data directly.
+ * 3). For each related app, create its app data directory and bind mount the actual content
+ * from apps data mirror directory. This works on both CE and DE storage, as DE storage
+ * is always available even storage is FBE locked, while we use inode number to find
+ * the encrypted DE directory in mirror so we can still bind mount it successfully.
+ *
+ * Example:
+ * 0). Assuming com.android.foo CE data is stored in /data/data and no shared uid
+ * 1). Mount a tmpfs on /data/data, /data/user, /data/user_de, /mnt/expand
+ * List = ["com.android.foo", "null" (volume uuid "null"=default),
+ * 123456 (inode number)]
+ * 2). On DE storage, we create a directory /data/user_de/0/com.com.android.foo, and bind
+ * mount (in the app's mount namespace) it from /data_mirror/data_de/0/com.android.foo.
+ * 3). We do similar for CE storage. But in direct boot mode, as /data_mirror/data_ce/0/ is
+ * encrypted, we can't find a directory with name com.android.foo on it, so we will
+ * use the inode number to find the right directory instead, which that directory content will
+ * be decrypted after storage is decrypted.
+ *
+ */
+static void isolateAppData(JNIEnv* env, jobjectArray pkg_data_info_list,
+    uid_t uid, const char* process_name, jstring managed_nice_name,
+    fail_fn_t fail_fn) {
+
+  const userid_t userId = multiuser_get_user_id(uid);
+
+  auto extract_fn = std::bind(ExtractJString, env, process_name, managed_nice_name, _1);
+
+  int size = (pkg_data_info_list != nullptr) ? env->GetArrayLength(pkg_data_info_list) : 0;
+  // Size should be a multiple of 3, as it contains list of <package_name, volume_uuid, inode>
+  if ((size % 3) != 0) {
+    fail_fn(CREATE_ERROR("Wrong pkg_inode_list size %d", size));
+  }
+
+  // Mount tmpfs on all possible data directories, so app no longer see the original apps data.
+  char internalCePath[PATH_MAX];
+  char internalLegacyCePath[PATH_MAX];
+  char internalDePath[PATH_MAX];
+  char externalPrivateMountPath[PATH_MAX];
+
+  snprintf(internalCePath, PATH_MAX, "/data/user");
+  snprintf(internalLegacyCePath, PATH_MAX, "/data/data");
+  snprintf(internalDePath, PATH_MAX, "/data/user_de");
+  snprintf(externalPrivateMountPath, PATH_MAX, "/mnt/expand");
+
+  MountAppDataTmpFs(internalLegacyCePath, fail_fn);
+  MountAppDataTmpFs(internalCePath, fail_fn);
+  MountAppDataTmpFs(internalDePath, fail_fn);
+  MountAppDataTmpFs(externalPrivateMountPath, fail_fn);
+
+  for (int i = 0; i < size; i += 3) {
+    jstring package_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i));
+    std::string packageName = extract_fn(package_str).value();
+
+    jstring vol_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i + 1));
+    std::string volUuid = extract_fn(vol_str).value();
+
+    jstring inode_str = (jstring) (env->GetObjectArrayElement(pkg_data_info_list, i + 2));
+    std::string inode = extract_fn(inode_str).value();
+    std::string::size_type sz;
+    long long ceDataInode = std::stoll(inode, &sz);
+
+    std::string actualCePath, actualDePath;
+    if (volUuid.compare("null") != 0) {
+      // Volume that is stored in /mnt/expand
+      char volPath[PATH_MAX];
+      char volCePath[PATH_MAX];
+      char volDePath[PATH_MAX];
+      char volCeUserPath[PATH_MAX];
+      char volDeUserPath[PATH_MAX];
+
+      snprintf(volPath, PATH_MAX, "/mnt/expand/%s", volUuid.c_str());
+      snprintf(volCePath, PATH_MAX, "%s/user", volPath);
+      snprintf(volDePath, PATH_MAX, "%s/user_de", volPath);
+      snprintf(volCeUserPath, PATH_MAX, "%s/%d", volCePath, userId);
+      snprintf(volDeUserPath, PATH_MAX, "%s/%d", volDePath, userId);
+
+      PrepareDirIfNotPresent(volPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
+      PrepareDirIfNotPresent(volCePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
+      PrepareDirIfNotPresent(volDePath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT, fail_fn);
+      PrepareDirIfNotPresent(volCeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
+          fail_fn);
+      PrepareDirIfNotPresent(volDeUserPath, DEFAULT_DATA_DIR_PERMISSION, AID_ROOT, AID_ROOT,
+          fail_fn);
+
+      actualCePath = volCeUserPath;
+      actualDePath = volDeUserPath;
+    } else {
+      // Internal volume that stored in /data
+      char internalCeUserPath[PATH_MAX];
+      char internalDeUserPath[PATH_MAX];
+      snprintf(internalCeUserPath, PATH_MAX, "/data/user/%d", userId);
+      snprintf(internalDeUserPath, PATH_MAX, "/data/user_de/%d", userId);
+      // If it's user 0, create a symlink /data/user/0 -> /data/data,
+      // otherwise create /data/user/$USER
+      if (userId == 0) {
+        symlink(internalLegacyCePath, internalCeUserPath);
+        actualCePath = internalLegacyCePath;
+      } else {
+        PrepareDirIfNotPresent(internalCeUserPath, DEFAULT_DATA_DIR_PERMISSION,
+            AID_ROOT, AID_ROOT, fail_fn);
+        actualCePath = internalCeUserPath;
+      }
+      PrepareDirIfNotPresent(internalDeUserPath, DEFAULT_DATA_DIR_PERMISSION,
+          AID_ROOT, AID_ROOT, fail_fn);
+      actualDePath = internalDeUserPath;
+    }
+    isolateAppDataPerPackage(userId, packageName, volUuid, ceDataInode,
+        actualCePath, actualDePath, fail_fn);
+  }
+}
+
 // Utility routine to specialize a zygote child process.
 static void SpecializeCommon(JNIEnv* env, uid_t uid, gid_t gid, jintArray gids,
                              jint runtime_flags, jobjectArray rlimits,
@@ -1051,6 +1310,16 @@
 
   MountEmulatedStorage(uid, mount_external, use_native_bridge, fail_fn);
 
+  // System services, isolated process, webview/app zygote, old target sdk app, should
+  // give a null in same_uid_pkgs and private_volumes so they don't need app data isolation.
+  // Isolated process / webview / app zygote should be gated by SELinux and file permission
+  // so they can't even traverse CE / DE directories.
+  if (pkg_data_info_list != nullptr
+      && GetBoolProperty(ANDROID_APP_DATA_ISOLATION_ENABLED_PROPERTY, false)) {
+    isolateAppData(env, pkg_data_info_list, uid, process_name, managed_nice_name,
+        fail_fn);
+  }
+
   // If this zygote isn't root, it won't be able to create a process group,
   // since the directory is owned by root.
   if (!is_system_server && getuid() == 0) {
diff --git a/services/core/java/com/android/server/StorageManagerService.java b/services/core/java/com/android/server/StorageManagerService.java
index 29d3a1d7..fcb1854 100644
--- a/services/core/java/com/android/server/StorageManagerService.java
+++ b/services/core/java/com/android/server/StorageManagerService.java
@@ -143,6 +143,7 @@
 import com.android.internal.util.IndentingPrintWriter;
 import com.android.internal.util.Preconditions;
 import com.android.internal.widget.LockPatternUtils;
+import com.android.server.pm.Installer;
 import com.android.server.storage.AppFuseBridge;
 import com.android.server.storage.StorageSessionController;
 import com.android.server.storage.StorageSessionController.ExternalStorageServiceException;
@@ -366,6 +367,8 @@
 
     private volatile int mCurrentUserId = UserHandle.USER_SYSTEM;
 
+    private final Installer mInstaller;
+
     /** Holding lock for AppFuse business */
     private final Object mAppFuseLock = new Object();
 
@@ -1244,6 +1247,13 @@
                     vol.state = newState;
                     onVolumeStateChangedLocked(vol, oldState, newState);
                 }
+                try {
+                    if (vol.type == VolumeInfo.TYPE_PRIVATE && state == VolumeInfo.STATE_MOUNTED) {
+                        mInstaller.onPrivateVolumeMounted(vol.getFsUuid());
+                    }
+                } catch (Installer.InstallerException e) {
+                    Slog.i(TAG, "Failed when private volume mounted " + vol, e);
+                }
             }
         }
 
@@ -1289,6 +1299,13 @@
 
             if (vol != null) {
                 mStorageSessionController.onVolumeRemove(vol);
+                try {
+                    if (vol.type == VolumeInfo.TYPE_PRIVATE) {
+                        mInstaller.onPrivateVolumeRemoved(vol.getFsUuid());
+                    }
+                } catch (Installer.InstallerException e) {
+                    Slog.i(TAG, "Failed when private volume unmounted " + vol, e);
+                }
             }
         }
     };
@@ -1600,6 +1617,9 @@
 
         mStorageSessionController = new StorageSessionController(mContext, mIsFuseEnabled);
 
+        mInstaller = new Installer(mContext);
+        mInstaller.onStart();
+
         // Initialize the last-fstrim tracking if necessary
         File dataDir = Environment.getDataDirectory();
         File systemDir = new File(dataDir, "system");
@@ -1973,6 +1993,13 @@
         try {
             mVold.unmount(vol.id);
             mStorageSessionController.onVolumeUnmount(vol);
+            try {
+                if (vol.type == VolumeInfo.TYPE_PRIVATE) {
+                    mInstaller.onPrivateVolumeRemoved(vol.getFsUuid());
+                }
+            } catch (Installer.InstallerException e) {
+                Slog.e(TAG, "Failed unmount mirror data", e);
+            }
         } catch (Exception e) {
             Slog.wtf(TAG, e);
         }
diff --git a/services/core/java/com/android/server/pm/Installer.java b/services/core/java/com/android/server/pm/Installer.java
index 26cd42d..eb4b593 100644
--- a/services/core/java/com/android/server/pm/Installer.java
+++ b/services/core/java/com/android/server/pm/Installer.java
@@ -580,6 +580,30 @@
         }
     }
 
+    /**
+     * Bind mount private volume CE and DE mirror storage.
+     */
+    public void onPrivateVolumeMounted(String volumeUuid) throws InstallerException {
+        if (!checkBeforeRemote()) return;
+        try {
+            mInstalld.onPrivateVolumeMounted(volumeUuid);
+        } catch (Exception e) {
+            throw InstallerException.from(e);
+        }
+    }
+
+    /**
+     * Unmount private volume CE and DE mirror storage.
+     */
+    public void onPrivateVolumeRemoved(String volumeUuid) throws InstallerException {
+        if (!checkBeforeRemote()) return;
+        try {
+            mInstalld.onPrivateVolumeRemoved(volumeUuid);
+        } catch (Exception e) {
+            throw InstallerException.from(e);
+        }
+    }
+
     public boolean prepareAppProfile(String pkg, @UserIdInt int userId, @AppIdInt int appId,
             String profileName, String codePath, String dexMetadataPath) throws InstallerException {
         if (!checkBeforeRemote()) return false;