Add "tail_until_writer_finished" script

Copied a script from chromiumos/src/platform/dev which is useful for
tailing log files which will be rotated. This is necessary for the
apache_error_stats script.

BUG=chromium:712388
TEST=Tests pass, and are properly skipped if "inotifywait" is missing.

Change-Id: I56047cf3bd4eaa48346c51d08f7441378e251f6b
Reviewed-on: https://chromium-review.googlesource.com/486122
Commit-Ready: Paul Hobbs <phobbs@google.com>
Tested-by: Paul Hobbs <phobbs@google.com>
Reviewed-by: Shuqian Zhao <shuqianz@chromium.org>
diff --git a/site_utils/stats/tail_until_writer_finished.py b/site_utils/stats/tail_until_writer_finished.py
new file mode 100755
index 0000000..13138a9
--- /dev/null
+++ b/site_utils/stats/tail_until_writer_finished.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python2 -u
+
+# Copyright 2017 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tails a file, and quits when inotify detects that it has been closed."""
+
+from __future__ import print_function
+
+import argparse
+import select
+import subprocess
+import sys
+import time
+import contextlib
+
+
+@contextlib.contextmanager
+def WriterClosedFile(path):
+    """Context manager to watch whether a file is closed by a writer.
+
+    @param path: the path to watch.
+    """
+    inotify_process = subprocess.Popen(
+        ['inotifywait', '-qe', 'close_write', path],
+        stdout=subprocess.PIPE)
+
+    # stdout.read is blocking, so use select.select to detect if input is
+    # available.
+    def IsClosed():
+        """Returns whether the inotify_process.stdout file is closed."""
+        read_list, _, _ = select.select([inotify_process.stdout], [], [], 0)
+        return bool(read_list)
+
+    try:
+        yield IsClosed
+    finally:
+        inotify_process.kill()
+
+
+def TailFile(path, sleep_interval, chunk_size,
+             outfile=sys.stdout,
+             seek_to_end=True):
+    """Tails a file, and quits when there are no writers on the file.
+
+    @param path: The path to the file to open
+    @param sleep_interval: The amount to sleep in between reads to reduce
+                           wasted IO
+    @param chunk_size: The amount of bytes to read in between print() calls
+    @param outfile: A file handle to write to.  Defaults to sys.stdout
+    @param seek_to_end: Whether to start at the end of the file at |path| when
+                        reading.
+    """
+
+    def ReadChunks(fh):
+        """Reads all chunks from a file handle, and prints them to |outfile|.
+
+        @param fh: The filehandle to read from.
+        """
+        for chunk in iter(lambda: fh.read(chunk_size), b''):
+            print(chunk, end='', file=outfile)
+
+    with WriterClosedFile(path) as IsClosed:
+        with open(path) as fh:
+            if seek_to_end == True:
+                fh.seek(0, 2)
+            while True:
+                ReadChunks(fh)
+                if IsClosed():
+                    # We need to read the chunks again to avoid a race condition
+                    # where the writer finishes writing some output in between
+                    # the ReadChunks() and the IsClosed() call.
+                    ReadChunks(fh)
+                    break
+
+                # Sleep a bit to limit the number of wasted reads.
+                time.sleep(sleep_interval)
+
+
+def Main():
+    """Main entrypoint for the script."""
+    p = argparse.ArgumentParser(description=__doc__)
+    p.add_argument('file', help='The file to tail')
+    p.add_argument('--sleep_interval', type=float, default=0.1,
+                   help='Time sleeping between file reads')
+    p.add_argument('--chunk_size', type=int, default=64 * 2**10,
+                   help='Bytes to read before yielding')
+    p.add_argument('--from_beginning', action='store_true',
+                   help='If given, read from the beginning of the file.')
+    args = p.parse_args()
+
+    TailFile(args.file, args.sleep_interval, args.chunk_size,
+             seek_to_end=not args.from_beginning)
+
+
+if __name__ == '__main__':
+    Main()
diff --git a/site_utils/stats/tail_until_writer_finished_unittest.py b/site_utils/stats/tail_until_writer_finished_unittest.py
new file mode 100755
index 0000000..e213517
--- /dev/null
+++ b/site_utils/stats/tail_until_writer_finished_unittest.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python2
+
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unit tests for apache_log_metrics.py"""
+
+from __future__ import print_function
+
+import StringIO
+import subprocess
+import tempfile
+import threading
+import time
+import unittest
+
+import tail_until_writer_finished
+
+
+class TestTailUntilWriterFinished(unittest.TestCase):
+    """Tests tail_until_writer_finished."""
+
+    def SkipIfMissingInotifyTools(self):
+        """The tail_until_writer_finished module requires 'inotifywait'."""
+        try:
+          subprocess.call(['inotifywait'], stderr=subprocess.PIPE)
+        except OSError:
+          raise unittest.SkipTest('inotify-tools must be installed.')
+
+    def testTail(self):
+        """Tests reading a file from the end."""
+        self.GetsEntireInput(seek_to_end=True)
+
+    def testRead(self):
+        """Tests reading a file from the beginning."""
+        self.GetsEntireInput(seek_to_end=False)
+
+    def GetsEntireInput(self, seek_to_end):
+        """Tails a temp file in a thread.
+
+        Check that it read the file correctly.
+
+        @param seek_to_end: Whether to .seek to the end of the file before
+            reading.
+        """
+        self.SkipIfMissingInotifyTools()
+
+        f = tempfile.NamedTemporaryFile()
+        output = StringIO.StringIO()
+
+        f.write('This line will not get read if we seek to end.\n')
+        f.flush()
+
+        def Tail():
+            """Tails the file into |output| with a 64k chunk size."""
+            tail_until_writer_finished.TailFile(
+                f.name, 0.1, 64000, outfile=output, seek_to_end=seek_to_end)
+
+        thread = threading.Thread(target=Tail)
+        thread.start()
+
+        # There is a race here: the thread must start the inotify process before
+        # we close the file. This shouldn't take long at all, so add a small
+        # sleep.
+        time.sleep(0.3)
+
+        for i in range(100):
+            f.write(str(i) + '\n')
+            f.flush()
+        f.close()
+        thread.join()
+
+        expected = ''.join([str(i) + '\n' for i in range(100)])
+        if not seek_to_end:
+            expected = ('This line will not get read if we seek to end.\n'
+                        + expected)
+        self.assertEqual(output.getvalue(), expected)
+
+
+if __name__ == '__main__':
+    unittest.main()