ulockmgr
diff --git a/lib/Makefile.am b/lib/Makefile.am
index 5206493..87e323a 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -1,7 +1,7 @@
 ## Process this file with automake to produce Makefile.in
 
 AM_CPPFLAGS = -I$(top_srcdir)/include -DFUSERMOUNT_DIR=\"$(bindir)\"
-lib_LTLIBRARIES = libfuse.la
+lib_LTLIBRARIES = libfuse.la libulockmgr.la
 
 if BSD
 mount_source = mount_bsd.c
@@ -27,4 +27,7 @@
 libfuse_la_LDFLAGS = $(libfuse_libs) -version-number 2:6:0 \
 	-Wl,--version-script,$(srcdir)/fuse_versionscript
 
+libulockmgr_la_SOURCES = ulockmgr.c
+libulockmgr_la_LDFLAGS = -version-number 1:0:0
+
 EXTRA_DIST = fuse_versionscript
diff --git a/lib/fuse.c b/lib/fuse.c
index a2f6bb1..92f21db 100644
--- a/lib/fuse.c
+++ b/lib/fuse.c
@@ -34,6 +34,7 @@
 #define FUSE_DEFAULT_INTR_SIGNAL SIGUSR1
 
 #define FUSE_UNKNOWN_INO 0xffffffff
+#define OFFSET_MAX 0x7fffffffffffffffLL
 
 struct fuse_config {
     unsigned int uid;
@@ -76,6 +77,15 @@
     int intr_installed;
 };
 
+struct lock {
+    int type;
+    off_t start;
+    off_t end;
+    pid_t pid;
+    uint64_t owner;
+    struct lock *next;
+};
+
 struct node {
     struct node *name_next;
     struct node *id_next;
@@ -91,6 +101,7 @@
     struct timespec mtime;
     off_t size;
     int cache_valid;
+    struct lock *locks;
 };
 
 struct fuse_dirhandle {
@@ -1604,43 +1615,6 @@
         reply_err(req, res);
 }
 
-static void fuse_flush(fuse_req_t req, fuse_ino_t ino,
-                       struct fuse_file_info *fi, uint64_t owner)
-{
-    struct fuse *f = req_fuse_prepare(req);
-    char *path;
-    int err;
-
-    err = -ENOENT;
-    pthread_rwlock_rdlock(&f->tree_lock);
-    path = get_path(f, ino);
-    if (path != NULL) {
-        if (f->conf.debug) {
-            printf("FLUSH[%llu]\n", (unsigned long long) fi->fh);
-            fflush(stdout);
-        }
-        err = -ENOSYS;
-        if (f->op.flush) {
-            struct fuse_intr_data d;
-            fuse_prepare_interrupt(f, req, &d);
-            err = f->op.flush(path, fi);
-            fuse_finish_interrupt(f, req, &d);
-        }
-        free(path);
-    }
-    if (f->op.lock) {
-        struct flock lock;
-        memset(&lock, 0, sizeof(lock));
-        lock.l_type = F_UNLCK;
-        lock.l_whence = SEEK_SET;
-        fuse_do_lock(f, req, path, fi, F_SETLK, &lock, owner);
-        if (err == -ENOSYS)
-            err = 0;
-    }
-    pthread_rwlock_unlock(&f->tree_lock);
-    reply_err(req, err);
-}
-
 static void fuse_release(fuse_req_t req, fuse_ino_t ino,
                          struct fuse_file_info *fi)
 {
@@ -2162,6 +2136,167 @@
     reply_err(req, err);
 }
 
+static struct lock *locks_conflict(struct node *node, const struct lock *lock)
+{
+    struct lock *l;
+
+    for (l = node->locks; l; l = l->next)
+        if (l->owner != lock->owner &&
+            lock->start <= l->end && l->start <= lock->end &&
+            (l->type == F_WRLCK || lock->type == F_WRLCK))
+            break;
+
+    return l;
+}
+
+static void delete_lock(struct lock **lockp)
+{
+    struct lock *l = *lockp;
+    *lockp = l->next;
+    free(l);
+}
+
+static void insert_lock(struct lock **pos, struct lock *lock)
+{
+    lock->next = *pos;
+    *pos = lock;
+}
+
+static int locks_insert(struct node *node, struct lock *lock)
+{
+    struct lock **lp;
+    struct lock *newl1 = NULL;
+    struct lock *newl2 = NULL;
+
+    if (lock->type != F_UNLCK || lock->start != 0 || lock->end != OFFSET_MAX) {
+        newl1 = malloc(sizeof(struct lock));
+        newl2 = malloc(sizeof(struct lock));
+
+        if (!newl1 || !newl2) {
+            free(newl1);
+            free(newl2);
+            return -ENOLCK;
+        }
+    }
+
+    for (lp = &node->locks; *lp;) {
+        struct lock *l = *lp;
+        if (l->owner != lock->owner)
+            goto skip;
+
+        if (lock->type == l->type) {
+            if (l->end < lock->start - 1)
+                goto skip;
+            if (lock->end < l->start - 1)
+                break;
+            if (l->start <= lock->start && lock->end <= l->end)
+                goto out;
+            if (l->start < lock->start)
+                lock->start = l->start;
+            if (lock->end < l->end)
+                lock->end = l->end;
+            goto delete;
+        } else {
+            if (l->end < lock->start)
+                goto skip;
+            if (lock->end < l->start)
+                break;
+            if (lock->start <= l->start && l->end <= lock->end)
+                goto delete;
+            if (l->end <= lock->end) {
+                l->end = lock->start - 1;
+                goto skip;
+            }
+            if (lock->start <= l->start) {
+                l->start = lock->end + 1;
+                break;
+            }
+            *newl2 = *l;
+            newl2->start = lock->end + 1;
+            l->end = lock->start - 1;
+            insert_lock(&l->next, newl2);
+            newl2 = NULL;
+        }
+    skip:
+        lp = &l->next;
+        continue;
+
+    delete:
+        delete_lock(lp);
+    }
+    if (lock->type != F_UNLCK) {
+        *newl1 = *lock;
+        insert_lock(lp, newl1);
+        newl1 = NULL;
+    }
+out:
+    free(newl1);
+    free(newl2);
+    return 0;
+}
+
+static void flock_to_lock(struct flock *flock, struct lock *lock)
+{
+    memset(lock, 0, sizeof(struct lock));
+    lock->type = flock->l_type;
+    lock->start = flock->l_start;
+    lock->end = flock->l_len ? flock->l_start + flock->l_len - 1 : OFFSET_MAX;
+    lock->pid = flock->l_pid;
+}
+
+static void lock_to_flock(struct lock *lock, struct flock *flock)
+{
+    flock->l_type = lock->type;
+    flock->l_start = lock->start;
+    flock->l_len = (lock->end == OFFSET_MAX) ? 0 : lock->end - lock->start + 1;
+    flock->l_pid = lock->pid;
+}
+
+static void fuse_flush(fuse_req_t req, fuse_ino_t ino,
+                       struct fuse_file_info *fi, uint64_t owner)
+{
+    struct fuse *f = req_fuse_prepare(req);
+    char *path;
+    int err;
+
+    err = -ENOENT;
+    pthread_rwlock_rdlock(&f->tree_lock);
+    path = get_path(f, ino);
+    if (path != NULL) {
+        if (f->conf.debug) {
+            printf("FLUSH[%llu]\n", (unsigned long long) fi->fh);
+            fflush(stdout);
+        }
+        err = -ENOSYS;
+        if (f->op.flush) {
+            struct fuse_intr_data d;
+            fuse_prepare_interrupt(f, req, &d);
+            err = f->op.flush(path, fi);
+            fuse_finish_interrupt(f, req, &d);
+        }
+        free(path);
+    }
+    if (f->op.lock) {
+        struct flock lock;
+        struct lock l;
+        memset(&lock, 0, sizeof(lock));
+        lock.l_type = F_UNLCK;
+        lock.l_whence = SEEK_SET;
+        fuse_do_lock(f, req, path, fi, F_SETLK, &lock, owner);
+        flock_to_lock(&lock, &l);
+        l.owner = owner;
+        pthread_mutex_lock(&f->lock);
+        locks_insert(get_node(f, ino), &l);
+        pthread_mutex_unlock(&f->lock);
+
+        /* if op.lock() is defined FLUSH is needed regardless of op.flush() */
+        if (err == -ENOSYS)
+            err = 0;
+    }
+    pthread_rwlock_unlock(&f->tree_lock);
+    reply_err(req, err);
+}
+
 static int fuse_lock_common(fuse_req_t req, fuse_ino_t ino,
                             struct fuse_file_info *fi, struct flock *lock,
                             uint64_t owner, int cmd)
@@ -2174,7 +2309,7 @@
     pthread_rwlock_rdlock(&f->tree_lock);
     path = get_path(f, ino);
     if (path != NULL) {
-        fuse_do_lock(f, req, path, fi, cmd, lock, owner);
+        err = fuse_do_lock(f, req, path, fi, cmd, lock, owner);
         free(path);
     }
     pthread_rwlock_unlock(&f->tree_lock);
@@ -2185,7 +2320,23 @@
                        struct fuse_file_info *fi, struct flock *lock,
                        uint64_t owner)
 {
-    int err = fuse_lock_common(req, ino, fi, lock, owner, F_GETLK);
+    int err;
+    struct lock l;
+    struct lock *conflict;
+    struct fuse *f = req_fuse(req);
+
+    flock_to_lock(lock, &l);
+    l.owner = owner;
+    pthread_mutex_lock(&f->lock);
+    conflict = locks_conflict(get_node(f, ino), &l);
+    if (conflict)
+        lock_to_flock(conflict, lock);
+    pthread_mutex_unlock(&f->lock);
+    if (!conflict)
+        err = fuse_lock_common(req, ino, fi, lock, owner, F_GETLK);
+    else
+        err = 0;
+
     if (!err)
         fuse_reply_lock(req, lock);
     else
@@ -2196,8 +2347,18 @@
                        struct fuse_file_info *fi, struct flock *lock,
                        uint64_t owner, int sleep)
 {
-    reply_err(req, fuse_lock_common(req, ino, fi, lock, owner,
-                                    sleep ? F_SETLKW : F_SETLK));
+    int err = fuse_lock_common(req, ino, fi, lock, owner,
+                               sleep ? F_SETLKW : F_SETLK);
+    if (!err) {
+        struct fuse *f = req_fuse(req);
+        struct lock l;
+        flock_to_lock(lock, &l);
+        l.owner = owner;
+        pthread_mutex_lock(&f->lock);
+        locks_insert(get_node(f, ino), &l);
+        pthread_mutex_unlock(&f->lock);
+    }
+    reply_err(req, err);
 }
 
 static struct fuse_lowlevel_ops fuse_path_ops = {
@@ -2418,8 +2579,7 @@
 
         memset(&sa, 0, sizeof(struct sigaction));
         sa.sa_handler = fuse_intr_sighandler;
-        sigemptyset(&(sa.sa_mask));
-        sa.sa_flags = 0;
+        sigemptyset(&sa.sa_mask);
 
         if (sigaction(signum, &sa, NULL) == -1) {
             perror("fuse: cannot set interrupt signal handler");
diff --git a/lib/ulockmgr.c b/lib/ulockmgr.c
new file mode 100644
index 0000000..795fc41
--- /dev/null
+++ b/lib/ulockmgr.c
@@ -0,0 +1,402 @@
+/*
+    libulockmgr: Userspace Lock Manager Library
+    Copyright (C) 2006  Miklos Szeredi <miklos@szeredi.hu>
+
+    This program can be distributed under the terms of the GNU LGPL.
+    See the file COPYING.LIB
+*/
+
+/* #define DEBUG 1 */
+
+#include "ulockmgr.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/stat.h>
+#include <sys/socket.h>
+#include <sys/wait.h>
+
+struct message {
+    int intr;
+    pthread_t thr;
+    int cmd;
+    int fd;
+    struct flock lock;
+    int error;
+};
+
+struct fd_store {
+    struct fd_store *next;
+    int fd;
+    int finished;
+};
+
+struct owner {
+    struct owner *next;
+    struct owner *prev;
+    struct fd_store *fds;
+    void *id;
+    size_t id_len;
+    int cfd;
+};
+
+static pthread_mutex_t ulockmgr_lock;
+static int ulockmgr_cfd = -1;
+static struct owner owner_list = { .next = &owner_list, .prev = &owner_list };
+
+#define MAX_SEND_FDS 2
+
+static void list_del_owner(struct owner *owner)
+{
+    struct owner *prev = owner->prev;
+    struct owner *next = owner->next;
+    prev->next = next;
+    next->prev = prev;
+}
+
+static void list_add_owner(struct owner *owner, struct owner *next)
+{
+    struct owner *prev = next->prev;
+    owner->next = next;
+    owner->prev = prev;
+    prev->next = owner;
+    next->prev = owner;
+}
+
+static int ulockmgr_send_message(int sock, void *buf, size_t buflen,
+                                 int *fdp, int numfds)
+{
+    struct msghdr msg;
+    struct cmsghdr *p_cmsg;
+    struct iovec vec;
+    char cmsgbuf[CMSG_SPACE(sizeof(int) * MAX_SEND_FDS)];
+    int res;
+
+    assert(numfds <= MAX_SEND_FDS);
+    msg.msg_control = cmsgbuf;
+    msg.msg_controllen = sizeof(cmsgbuf);
+    p_cmsg = CMSG_FIRSTHDR(&msg);
+    p_cmsg->cmsg_level = SOL_SOCKET;
+    p_cmsg->cmsg_type = SCM_RIGHTS;
+    p_cmsg->cmsg_len = CMSG_LEN(sizeof(int) * numfds);
+    memcpy(CMSG_DATA(p_cmsg), fdp, sizeof(int) * numfds);
+    msg.msg_controllen = p_cmsg->cmsg_len;
+    msg.msg_name = NULL;
+    msg.msg_namelen = 0;
+    msg.msg_iov = &vec;
+    msg.msg_iovlen = 1;
+    msg.msg_flags = 0;
+    vec.iov_base = buf;
+    vec.iov_len = buflen;
+    res = sendmsg(sock, &msg, MSG_NOSIGNAL);
+    if (res == -1) {
+        perror("libulockmgr: sendmsg");
+        return -1;
+    }
+    if ((size_t) res != buflen) {
+        fprintf(stderr, "libulockmgr: sendmsg short\n");
+        return -1;
+    }
+    return 0;
+}
+
+static int ulockmgr_start_daemon(void)
+{
+    int sv[2];
+    int res;
+    char tmp[64];
+
+    res = socketpair(AF_UNIX, SOCK_STREAM, 0, sv);
+    if (res == -1) {
+        perror("libulockmgr: socketpair");
+        return -1;
+    }
+    snprintf(tmp, sizeof(tmp), "exec ulockmgr_server %i", sv[0]);
+    res = system(tmp);
+    close(sv[0]);
+    if (res == -1 || !WIFEXITED(res) || WEXITSTATUS(res) != 0) {
+        close(sv[1]);
+        return -1;
+    }
+    ulockmgr_cfd = sv[1];
+    return 0;
+}
+
+static struct owner *ulockmgr_new_owner(const void *id, size_t id_len)
+{
+    int sv[2];
+    int res;
+    char c = 'm';
+    struct owner *o;
+
+    if (ulockmgr_cfd == -1 && ulockmgr_start_daemon() == -1)
+        return NULL;
+
+    o = calloc(1, sizeof(struct owner) + id_len);
+    if (!o) {
+        fprintf(stderr, "libulockmgr: failed to allocate memory\n");
+        return NULL;
+    }
+    o->id = o + 1;
+    o->id_len = id_len;
+    res = socketpair(AF_UNIX, SOCK_STREAM, 0, sv);
+    if (res == -1) {
+        perror("libulockmgr: socketpair");
+        goto out_free;
+    }
+    res = ulockmgr_send_message(ulockmgr_cfd, &c, sizeof(c), &sv[0], 1);
+    close(sv[0]);
+    if (res == -1) {
+        close(ulockmgr_cfd);
+        ulockmgr_cfd = -1;
+        goto out_close;
+    }
+
+    o->cfd = sv[1];
+    memcpy(o->id, id, id_len);
+    list_add_owner(o, &owner_list);
+
+    return o;
+
+ out_close:
+    close(sv[1]);
+ out_free:
+    free(o);
+    return NULL;
+}
+
+static int ulockmgr_send_request(struct message *msg, const void *id,
+                                 size_t id_len)
+{
+    int sv[2];
+    int cfd;
+    struct owner *o;
+    struct fd_store *f;
+    int fd = msg->fd;
+    int cmd = msg->cmd;
+    int res;
+    int unlockall = (cmd == F_SETLK && msg->lock.l_type == F_UNLCK &&
+                     msg->lock.l_start == 0 && msg->lock.l_len == 0);
+
+    for (o = owner_list.next; o != &owner_list; o = o->next)
+        if (o->id_len == id_len && memcmp(o->id, id, id_len) == 0)
+            break;
+
+    if (o == &owner_list)
+        o = NULL;
+
+    if (!o && cmd != F_GETLK && msg->lock.l_type != F_UNLCK)
+        o = ulockmgr_new_owner(id, id_len);
+
+    if (!o) {
+        if (cmd == F_GETLK) {
+            res = fcntl(msg->fd, F_GETLK, &msg->lock);
+            return (res == -1) ? -errno : 0;
+        } else if (msg->lock.l_type == F_UNLCK)
+            return 0;
+        else
+            return -ENOLCK;
+    }
+
+    f = calloc(1, sizeof(struct fd_store));
+    if (!f) {
+        fprintf(stderr, "libulockmgr: failed to allocate memory\n");
+        return -ENOLCK;
+    }
+
+    res = socketpair(AF_UNIX, SOCK_STREAM, 0, sv);
+    if (res == -1) {
+        perror("libulockmgr: socketpair");
+        free(f);
+        return -ENOLCK;
+    }
+
+    cfd = sv[1];
+    sv[1] = msg->fd;
+    res = ulockmgr_send_message(o->cfd, msg, sizeof(struct message), sv, 2);
+    close(sv[0]);
+    if (res == -1) {
+        free(f);
+        close(cfd);
+        return -EIO;
+    }
+
+    f->fd = msg->fd;
+    f->next = o->fds;
+    o->fds = f;
+
+    res = recv(cfd, msg, sizeof(struct message), MSG_WAITALL);
+    if (res == -1) {
+        perror("libulockmgr: recv");
+        msg->error = EIO;
+    } else if (res != sizeof(struct message)) {
+        fprintf(stderr, "libulockmgr: recv short\n");
+        msg->error = EIO;
+    } else if (cmd == F_SETLKW && msg->error == EAGAIN) {
+        pthread_mutex_unlock(&ulockmgr_lock);
+        while (1) {
+            sigset_t old;
+            sigset_t unblock;
+            int errno_save;
+
+            sigemptyset(&unblock);
+            sigaddset(&unblock, SIGUSR1);
+            pthread_sigmask(SIG_UNBLOCK, &unblock, &old);
+            res = recv(cfd, msg, sizeof(struct message), MSG_WAITALL);
+            errno_save = errno;
+            pthread_sigmask(SIG_SETMASK, &old, NULL);
+            if (res == sizeof(struct message))
+                break;
+            else if (res >= 0) {
+                fprintf(stderr, "libulockmgr: recv short\n");
+                msg->error = EIO;
+                break;
+            } else if (errno_save != EINTR) {
+                errno = errno_save;
+                perror("libulockmgr: recv");
+                msg->error = EIO;
+                break;
+            }
+            msg->intr = 1;
+            res = send(o->cfd, msg, sizeof(struct message), MSG_NOSIGNAL);
+            if (res == -1) {
+                perror("libulockmgr: send");
+                msg->error = EIO;
+                break;
+            }
+            if (res != sizeof(struct message)) {
+                fprintf(stderr, "libulockmgr: send short\n");
+                msg->error = EIO;
+                break;
+            }
+        }
+        pthread_mutex_lock(&ulockmgr_lock);
+
+    }
+
+    f->finished = 1;
+    close(cfd);
+    if (unlockall) {
+        struct fd_store **fp;
+
+        for (fp = &o->fds; *fp;) {
+            f = *fp;
+            if (f->fd == fd && f->finished) {
+                *fp = f->next;
+                free(f);
+            } else
+                fp = &f->next;
+        }
+        if (!o->fds) {
+            list_del_owner(o);
+            close(o->cfd);
+            free(o);
+        }
+        /* Force OK on unlock-all, since it _will_ succeed once the
+           owner is deleted */
+        msg->error = 0;
+    }
+
+    return -msg->error;
+}
+
+#ifdef DEBUG
+static uint32_t owner_hash(const unsigned char *id, size_t id_len)
+{
+    uint32_t h = 0;
+    size_t i;
+    for (i = 0; i < id_len; i++)
+        h = ((h << 8) | (h >> 24)) ^ id[i];
+
+    return h;
+}
+#endif
+
+static int ulockmgr_canonicalize(int fd, struct flock *lock)
+{
+    off_t offset;
+    if (lock->l_whence == SEEK_CUR) {
+        offset = lseek(fd, 0, SEEK_CUR);
+        if (offset == (off_t) -1)
+            return -errno;
+    } else if (lock->l_whence == SEEK_END) {
+        struct stat stbuf;
+        int res = fstat(fd, &stbuf);
+        if (res == -1)
+            return -errno;
+
+        offset = stbuf.st_size;
+    } else
+        offset = 0;
+
+    lock->l_whence = SEEK_SET;
+    lock->l_start += offset;
+
+    if (lock->l_start < 0)
+        return -EINVAL;
+
+    if (lock->l_len < 0) {
+        lock->l_start += lock->l_len;
+        if (lock->l_start < 0)
+            return -EINVAL;
+        lock->l_len = -lock->l_len;
+    }
+    if (lock->l_len && lock->l_start + lock->l_len - 1 < 0)
+        return -EINVAL;
+
+    return 0;
+}
+
+int ulockmgr_op(int fd, int cmd, struct flock *lock, const void *owner,
+                size_t owner_len)
+{
+    int err;
+    struct message msg;
+    sigset_t old;
+    sigset_t block;
+
+    if (cmd != F_GETLK && cmd != F_SETLK && cmd != F_SETLKW)
+        return -EINVAL;
+
+    if (lock->l_whence != SEEK_SET && lock->l_whence != SEEK_CUR &&
+        lock->l_whence != SEEK_END)
+        return -EINVAL;
+
+#ifdef DEBUG
+    fprintf(stderr, "libulockmgr: %i %i %i %lli %lli own: 0x%08x\n",
+            cmd, lock->l_type, lock->l_whence, lock->l_start, lock->l_len,
+            owner_hash(owner, owner_len));
+#endif
+
+    /* Unlock should never block anyway */
+    if (cmd == F_SETLKW && lock->l_type == F_UNLCK)
+        cmd = F_SETLK;
+
+    memset(&msg, 0, sizeof(struct message));
+    msg.cmd = cmd;
+    msg.fd = fd;
+    msg.lock = *lock;
+    err = ulockmgr_canonicalize(fd, &msg.lock);
+    if (err)
+        return err;
+
+    sigemptyset(&block);
+    sigaddset(&block, SIGUSR1);
+    pthread_sigmask(SIG_BLOCK, &block, &old);
+    pthread_mutex_lock(&ulockmgr_lock);
+    err = ulockmgr_send_request(&msg, owner, owner_len);
+    pthread_mutex_unlock(&ulockmgr_lock);
+    pthread_sigmask(SIG_SETMASK, &old, NULL);
+    if (!err && cmd == F_GETLK) {
+        if (msg.lock.l_type == F_UNLCK)
+            lock->l_type = F_UNLCK;
+        else
+            *lock = msg.lock;
+    }
+
+    return err;
+}