Reland: resolve FUSE and lower FS cache inconsistencies

Rf - Read FUSE, Wf - Write FUSE
Rl - Read lower fs, Wl - Write lower fs

File reads in the following scenarios may not see the latest data
because of different VFS caches:
1. Rf - Wl -Rf
2. Rl - Wf - Rl

To fix the scenarios above we dynamically return a FUSE or lower FS fd
from the MediaProvider#open or open with or without caching during
FUSE_OPEN. Here are the conditions:

MP open:
- Return FUSE fd if file already opened with caching via FUSE or we are
  unable to SET_OFD_SETLK to F_RDLCK or F_WRLCK with fcntl(2).
- Return lower FS fd otherwise

FUSE open:
- Open with caching if F_OFD_GETLK with fcntl(2) returns F_UNLCK
- Open with direct_io otherwise

Unfortunately, we cannot place a F_RDLCK on a file opened in write
only mode and we cannot place a F_WRLCK twice. This means that if a
file is opened via MP in write only mode, we can only set a F_WRLCK
and subsequent opens will have to go through FUSE.

Pseudo code of algorithm implemented:

MP#opeFd
  lowerFsFd = open(lowerPath)
  useFuse = FuseD#shouldOpenWithFuse(lowerPath, fd)
  if useFuse
    upperFsFd = open(upperPath)
    return upperFsFd
  else
    return lowerFsFd

FuseD#open
  lowerFsFd = open(path)
  hasLock = fcntl(get_lock, lowerFsFd)
  if hasLock
    return uncachedFileInfo
  else
    return cachedFileInfo

FuseD#shouldOpenWithFuse
  if cached
    return true
  else
    res = fcntl(set_read_or_write_lock, fd)
    return res

Test: atest FuseDaemonHostTest#testVfsCacheConsistency
Test: atest FuseDaemonHostTest
Test: adb logcat -v color | grep --color=never -iE "using fuse|using lower|using direct|using cache"
Change-Id: I7726a75a51869c0e3ea3856103dd501b1aa19d14
FuseDaemon: [/storage/emulated] Using cache for /storage/emulated/0/DCIM/open_file_path_write_content_resolver.jpg
MediaProvider: Using FUSE for /storage/emulated/0/DCIM/open_file_path_write_content_resolver.jpg
FuseDaemon: [/storage/emulated] Using cache for /storage/emulated/0/DCIM/open_file_path_write_content_resolver.jpg
MediaProvider: Using lower FS for /storage/emulated/0/DCIM/open_content_resolver_write_content_resolver.jpg
FuseDaemon: [/storage/emulated] Using direct io for /storage/emulated/0/DCIM/open_content_resolver_write_content_resolver.jpg
FuseDaemon: [/storage/emulated] Using cache for /storage/emulated/0/DCIM/open_file_path_write_file_path.jpg
MediaProvider: Using FUSE for /storage/emulated/0/DCIM/open_file_path_write_file_path.jpg
FuseDaemon: [/storage/emulated] Using cache for /storage/emulated/0/DCIM/open_file_path_write_file_path.jpg
MediaProvider: Using lower FS for /storage/emulated/0/DCIM/open_content_resolver_write_file_path.jpg
FuseDaemon: [/storage/emulated] Using direct io for /storage/emulated/0/DCIM/open_content_resolver_write_file_path.jpg
MediaProvider: Using lower FS for /storage/emulated/0/DCIM/open_content_resolver_write_only.jpg
MediaProvider: Using FUSE for /storage/emulated/0/DCIM/open_content_resolver_write_only.jpg
FuseDaemon: [/storage/emulated] Using direct io for /storage/emulated/0/DCIM/open_content_resolver_write_only.jpg
MediaProvider: Using lower FS for /storage/emulated/0/DCIM/open_content_resolver_dup.jpg
FuseDaemon: [/storage/emulated] Using direct io for /storage/emulated/0/DCIM/open_content_resolver_dup.jpg
Bug: 135341433
diff --git a/jni/FuseDaemon.cpp b/jni/FuseDaemon.cpp
index f85c212..a2f5dd3 100644
--- a/jni/FuseDaemon.cpp
+++ b/jni/FuseDaemon.cpp
@@ -91,10 +91,11 @@
 
 class handle {
   public:
-    handle(const string& path) : path(path), fd(-1), ri(nullptr){};
+    handle(const string& path) : path(path), fd(-1), ri(nullptr), cached(true) {};
     string path;
     int fd;
     std::unique_ptr<RedactionInfo> ri;
+    bool cached;
 };
 
 struct dirhandle {
@@ -122,10 +123,21 @@
     struct node* next;   /* per-dir sibling list */
     struct node* child;  /* first contained file by this dir */
     struct node* parent; /* containing directory */
+    std::vector<handle*> handles; /* container for file handle pointers */
+    std::vector<dirhandle*> dirhandles; /* container for dir handle pointers */
 
     string name;
 
     bool deleted;
+
+    bool HasCachedHandle() {
+        for (const auto& handle : handles) {
+            if (handle->cached) {
+                return true;
+            }
+        }
+        return false;
+    }
 };
 
 /*
@@ -306,6 +318,57 @@
     return (__u64)(uintptr_t) ptr;
 }
 
+/*
+ * Set an F_RDLCK or F_WRLCKK on fd with fcntl(2).
+ *
+ * This is called before the MediaProvider returns fd from the lower file
+ * system to an app over the ContentResolver interface. This allows us
+ * check with is_file_locked if any reference to that fd is still open.
+ */
+static int set_file_lock(int fd, bool for_read, const std::string& path) {
+    std::string lock_str = (for_read ? "read" : "write");
+    TRACE << "Setting " << lock_str << " lock for path " << path;
+
+    struct flock fl{};
+    fl.l_type = for_read ? F_RDLCK : F_WRLCK;
+    fl.l_whence = SEEK_SET;
+
+    int res = fcntl(fd, F_OFD_SETLK, &fl);
+    if (res) {
+        PLOG(ERROR) << "Failed to set " << lock_str << " lock on path " << path;
+        return res;
+    }
+    TRACE << "Successfully set " << lock_str << " lock on path " << path;
+    return res;
+}
+
+/*
+ * Check if an F_RDLCK or F_WRLCK is set on fd with fcntl(2).
+ *
+ * This is used to determine if the MediaProvider has given an fd to the lower fs to an app over
+ * the ContentResolver interface. Before that happens, we always call set_file_lock on the file
+ * allowing us to know if any reference to that fd is still open here.
+ *
+ * Returns true if fd may have a lock, false otherwise
+ */
+static bool is_file_locked(int fd, const std::string& path) {
+    TRACE << "Checking if file is locked " << path;
+
+    struct flock fl{};
+    fl.l_type = F_WRLCK;
+    fl.l_whence = SEEK_SET;
+
+    int res = fcntl(fd, F_OFD_GETLK, &fl);
+    if (res) {
+        PLOG(ERROR) << "Failed to check lock for file " << path;
+        // Assume worst
+        return true;
+    }
+    bool locked = fl.l_type != F_UNLCK;
+    TRACE << "File " << path << " is " << (locked ? "locked" : "unlocked");
+    return locked;
+}
+
 static void acquire_node_locked(struct node* node) {
     node->refcount++;
     TRACE << "ACQUIRE " << node << " " << node->name << " rc=" << node->refcount;
@@ -429,7 +492,50 @@
             return node;
         }
     }
-    return 0;
+    return nullptr;
+}
+
+static std::vector<std::string> get_path_segments(int segment_start, const std::string& path) {
+    using namespace std;
+    vector<std::string> segments;
+    int segment_end = path.find_first_of('/', segment_start);
+
+    while (segment_end != string::npos) {
+        if (segment_end == segment_start) {
+            // First character is '/' ignore
+            segment_end = path.find_first_of('/', ++segment_start);
+            continue;
+        }
+
+        segments.push_back(path.substr(segment_start, segment_end - segment_start));
+        segment_start = segment_end + 1;
+        segment_end = path.find_first_of('/', segment_start);
+    }
+    if (segment_start < path.size()) {
+        segments.push_back(path.substr(segment_start));
+    }
+    return segments;
+}
+
+static struct node* lookup_node_by_full_path_locked(struct fuse* fuse,
+                                                    const string& absolute_path) {
+    TRACE << "Looking up node for full path " << absolute_path;
+    if (absolute_path.find(fuse->path) != 0) {
+        TRACE << "Invalid path " << absolute_path;
+        return nullptr;
+    }
+
+    node* node = &fuse->root;
+    std::vector<std::string> segments = get_path_segments(fuse->path.size(), absolute_path);
+
+    for (const string& segment : segments) {
+        node = lookup_child_by_name_locked(node, segment);
+        if (!node) {
+            TRACE << "Node not found for segment " << segment << " in path " << absolute_path;
+            return nullptr;
+        }
+    }
+    return node;
 }
 
 static struct node* acquire_or_create_child_locked(struct fuse* fuse,
@@ -966,12 +1072,31 @@
         return;
     }
 
-    // In case we have any redaction ranges, we don't want to read cached content as we might
-    // accidentally access un-redacted content.
-    if (h->ri->isRedactionNeeded()) {
+    if (h->ri->isRedactionNeeded() || is_file_locked(h->fd, path)) {
+        // We don't want to use the FUSE VFS cache in two cases:
+        // 1. When redaction is needed because app A with EXIF access might access
+        // a region that should have been redacted for app B without EXIF access, but app B on
+        // a subsequent read, will be able to see the EXIF data because the read request for that
+        // region will be served from cache and not get to the FUSE daemon
+        // 2. When the file has a read or write lock on it. This means that the MediaProvider has
+        // given an fd to the lower file system to an app. There are two cases where using the cache
+        // in this case can be a problem:
+        // a. Writing to a FUSE fd with caching enabled will use the write-back cache and a
+        // subsequent read from the lower fs fd will not see the write.
+        // b. Reading from a FUSE fd with caching enabled may not see the latest writes using the
+        // lower fs fd because those writes did not go through the FUSE layer and reads from FUSE
+        // after that write may be served from cache
+        TRACE_FUSE(fuse) << "Using direct io for " << path;
         fi->direct_io = true;
+    } else {
+        TRACE_FUSE(fuse) << "Using cache for " << path;
     }
 
+    h->cached = !fi->direct_io;
+    pthread_mutex_lock(&fuse->lock);
+    node->handles.push_back(h);
+    pthread_mutex_unlock(&fuse->lock);
+
     fi->fh = ptr_to_id(h);
     fi->keep_cache = 1;
     fuse_reply_open(req, fi);
@@ -1170,8 +1295,10 @@
                        struct fuse_file_info* fi) {
     ATRACE_CALL();
     struct fuse* fuse = get_fuse(req);
-    handle* h = reinterpret_cast<handle*>(fi->fh);
 
+    pthread_mutex_lock(&fuse->lock);
+    node* node = lookup_node_by_id_locked(fuse, ino);
+    handle* h = reinterpret_cast<handle*>(fi->fh);
     TRACE_FUSE(fuse) << "RELEASE "
                      << "0" << std::oct << fi->flags << " " << h << "(" << h->fd << ")";
 
@@ -1179,8 +1306,16 @@
     close(h->fd);
 
     // TODO(b/145737191): Figure out if we need to scan files on close, and how to do it properly
-
+    if (node) {
+        for (auto it = node->handles.begin(); it != node->handles.end(); it++) {
+            if (*it == h) {
+                node->handles.erase(it);
+                break;
+            }
+        }
+    }
     delete h;
+    pthread_mutex_unlock(&fuse->lock);
     fuse_reply_err(req, 0);
 }
 
@@ -1246,6 +1381,7 @@
         fuse_reply_err(req, errno);
         return;
     }
+    node->dirhandles.push_back(h);
     h->next_off = 0;
     fi->fh = ptr_to_id(h);
     fuse_reply_open(req, fi);
@@ -1357,11 +1493,23 @@
                           struct fuse_file_info* fi) {
     ATRACE_CALL();
     struct fuse* fuse = get_fuse(req);
-    struct dirhandle* h = reinterpret_cast<struct dirhandle*>(fi->fh);
 
+    pthread_mutex_lock(&fuse->lock);
+    node* node = lookup_node_by_id_locked(fuse, ino);
+    dirhandle* h = reinterpret_cast<struct dirhandle*>(fi->fh);
     TRACE_FUSE(fuse) << "RELEASEDIR " << h;
     closedir(h->d);
+    if (node) {
+        for (auto it = node->dirhandles.begin(); it != node->dirhandles.end(); it++) {
+            if (*it == h) {
+                node->dirhandles.erase(it);
+                break;
+            }
+        }
+    }
     delete h;
+    pthread_mutex_unlock(&fuse->lock);
+
     fuse_reply_err(req, 0);
 }
 
@@ -1459,7 +1607,9 @@
 
     fi->fh = ptr_to_id(h);
     fi->keep_cache = 1;
-    if (make_node_entry(req, parent_node, name, child_path, &e)) {
+    node* node = make_node_entry(req, parent_node, name, child_path, &e);
+    if (node) {
+        node->handles.push_back(h);
         fuse_reply_create(req, &e, fi);
     } else {
         fuse_reply_err(req, errno);
@@ -1572,7 +1722,34 @@
     __android_log_vprint(fuse_to_android_loglevel.at(level), LOG_TAG, fmt, ap);
 }
 
-FuseDaemon::FuseDaemon(JNIEnv* env, jobject mediaProvider) : mp(env, mediaProvider) {}
+bool FuseDaemon::ShouldOpenWithFuse(int fd, bool for_read, const std::string& path) {
+    TRACE << "Checking if file should be opened with FUSE " << path;
+    bool use_fuse = false;
+
+    if (active.load(std::memory_order_acquire)) {
+        pthread_mutex_lock(&fuse->lock);
+        node* node = lookup_node_by_full_path_locked(fuse, path);
+        if (node && node->HasCachedHandle()) {
+            TRACE << "Should open " << path << " with FUSE. Reason: cache";
+            use_fuse = true;
+        } else {
+            // If we are unable to set a lock, we should use fuse since we can't track
+            // when all fd references (including dups) are closed. This can happen when
+            // we try to set a write lock twice on the same file
+            use_fuse = set_file_lock(fd, for_read, path);
+            TRACE << "Should open " << path << (use_fuse ? " with" : " without")
+                  << " FUSE. Reason: lock";
+        }
+        pthread_mutex_unlock(&fuse->lock);
+    } else {
+        TRACE << "FUSE daemon is inactive. Should not open " << path << " with FUSE";
+    }
+
+    return use_fuse;
+}
+
+FuseDaemon::FuseDaemon(JNIEnv* env, jobject mediaProvider) : mp(env, mediaProvider),
+                                                             active(false), fuse(nullptr) {}
 
 void FuseDaemon::Start(const int fd, const std::string& path) {
     struct fuse_args args;
@@ -1608,6 +1785,9 @@
     fuse_default.root.name = path;
     fuse_default.path = path;
     fuse_default.mp = &mp;
+    // fuse_default is stack allocated, but it's safe to save it as an instance variable because
+    // this method blocks and FuseDaemon#active tells if we are currently blocking
+    fuse = &fuse_default;
 
     // Used by pf_read: redacted ranges are represented by zeroized ranges of bytes,
     // so we mmap the maximum length of redacted ranges in the beginning and save memory allocations
@@ -1636,6 +1816,7 @@
     // fuse_session_loop(se);
     // Multi-threaded
     LOG(INFO) << "Starting fuse...";
+    active.store(true, std::memory_order_release);
     fuse_session_loop_mt(se, &config);
     LOG(INFO) << "Ending fuse...";
 
@@ -1645,6 +1826,7 @@
 
     fuse_opt_free_args(&args);
     fuse_session_destroy(se);
+    active.store(false, std::memory_order_relaxed);
     LOG(INFO) << "Ended fuse";
     return;
 }
diff --git a/jni/FuseDaemon.h b/jni/FuseDaemon.h
index d3ff966..1233863 100644
--- a/jni/FuseDaemon.h
+++ b/jni/FuseDaemon.h
@@ -17,14 +17,15 @@
 #ifndef MEDIAPROVIDER_JNI_FUSEDAEMON_H_
 #define MEDIAPROVIDER_JNI_FUSEDAEMON_H_
 
+#include <memory>
 #include <string>
 
 #include "MediaProviderWrapper.h"
 #include "jni.h"
 
+struct fuse;
 namespace mediaprovider {
 namespace fuse {
-
 class FuseDaemon final {
   public:
     FuseDaemon(JNIEnv* env, jobject mediaProvider);
@@ -36,10 +37,17 @@
      */
     void Start(const int fd, const std::string& path);
 
+    /**
+     * Check if file should be opened with FUSE
+     */
+    bool ShouldOpenWithFuse(int fd, bool for_read, const std::string& path);
+
   private:
     FuseDaemon(const FuseDaemon&) = delete;
     void operator=(const FuseDaemon&) = delete;
     MediaProviderWrapper mp;
+    std::atomic_bool active;
+    struct ::fuse* fuse;
 };
 
 }  // namespace fuse
diff --git a/jni/com_android_providers_media_FuseDaemon.cpp b/jni/com_android_providers_media_FuseDaemon.cpp
index 1633b99..caf00b2 100644
--- a/jni/com_android_providers_media_FuseDaemon.cpp
+++ b/jni/com_android_providers_media_FuseDaemon.cpp
@@ -30,9 +30,10 @@
 constexpr const char* CLASS_NAME = "com/android/providers/media/fuse/FuseDaemon";
 static jclass gFuseDaemonClass;
 
-jlong com_android_providers_media_FuseDaemon_new(JNIEnv* env, jobject self, jobject mediaProvider) {
+jlong com_android_providers_media_FuseDaemon_new(JNIEnv* env, jobject self,
+                                                 jobject media_provider) {
     LOG(DEBUG) << "Creating the FUSE daemon...";
-    return reinterpret_cast<jlong>(new fuse::FuseDaemon(env, mediaProvider));
+    return reinterpret_cast<jlong>(new fuse::FuseDaemon(env, media_provider));
 }
 
 void com_android_providers_media_FuseDaemon_start(JNIEnv* env, jobject self, jlong java_daemon,
@@ -44,9 +45,8 @@
     if (!utf_chars_path.c_str()) {
         return;
     }
-    const std::string& string_path = std::string(utf_chars_path.c_str());
 
-    daemon->Start(fd, string_path);
+    daemon->Start(fd, utf_chars_path.c_str());
 }
 
 void com_android_providers_media_FuseDaemon_delete(JNIEnv* env, jobject self, jlong java_daemon) {
@@ -55,13 +55,34 @@
     delete daemon;
 }
 
+jboolean com_android_providers_media_FuseDaemon_should_open_with_fuse(JNIEnv* env, jobject self,
+                                                                      jlong java_daemon,
+                                                                      jstring java_path,
+                                                                      jboolean for_read, jint fd) {
+    fuse::FuseDaemon* const daemon = reinterpret_cast<fuse::FuseDaemon*>(java_daemon);
+    if (daemon) {
+        ScopedUtfChars utf_chars_path(env, java_path);
+        if (!utf_chars_path.c_str()) {
+            // TODO(b/145741852): Throw exception
+            return JNI_FALSE;
+        }
+
+        return daemon->ShouldOpenWithFuse(fd, for_read, utf_chars_path.c_str());
+    }
+    // TODO(b/145741852): Throw exception
+    return JNI_FALSE;
+}
+
 const JNINativeMethod methods[] = {
         {"native_new", "(Lcom/android/providers/media/MediaProvider;)J",
          reinterpret_cast<void*>(com_android_providers_media_FuseDaemon_new)},
         {"native_start", "(JILjava/lang/String;)V",
          reinterpret_cast<void*>(com_android_providers_media_FuseDaemon_start)},
         {"native_delete", "(J)V",
-         reinterpret_cast<void*>(com_android_providers_media_FuseDaemon_delete)}};
+         reinterpret_cast<void*>(com_android_providers_media_FuseDaemon_delete)},
+        {"native_should_open_with_fuse", "(JLjava/lang/String;ZI)Z",
+         reinterpret_cast<void*>(com_android_providers_media_FuseDaemon_should_open_with_fuse)}};
+
 }  // namespace
 
 void register_android_providers_media_FuseDaemon(JNIEnv* env) {