Add "tail_until_writer_finished" script
Copied a script from chromiumos/src/platform/dev which is useful for
tailing log files which will be rotated. This is necessary for the
apache_error_stats script.
BUG=chromium:712388
TEST=Tests pass, and are properly skipped if "inotifywait" is missing.
Change-Id: I56047cf3bd4eaa48346c51d08f7441378e251f6b
Reviewed-on: https://chromium-review.googlesource.com/486122
Commit-Ready: Paul Hobbs <phobbs@google.com>
Tested-by: Paul Hobbs <phobbs@google.com>
Reviewed-by: Shuqian Zhao <shuqianz@chromium.org>
diff --git a/site_utils/stats/tail_until_writer_finished.py b/site_utils/stats/tail_until_writer_finished.py
new file mode 100755
index 0000000..13138a9
--- /dev/null
+++ b/site_utils/stats/tail_until_writer_finished.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python2 -u
+
+# Copyright 2017 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Tails a file, and quits when inotify detects that it has been closed."""
+
+from __future__ import print_function
+
+import argparse
+import select
+import subprocess
+import sys
+import time
+import contextlib
+
+
+@contextlib.contextmanager
+def WriterClosedFile(path):
+ """Context manager to watch whether a file is closed by a writer.
+
+ @param path: the path to watch.
+ """
+ inotify_process = subprocess.Popen(
+ ['inotifywait', '-qe', 'close_write', path],
+ stdout=subprocess.PIPE)
+
+ # stdout.read is blocking, so use select.select to detect if input is
+ # available.
+ def IsClosed():
+ """Returns whether the inotify_process.stdout file is closed."""
+ read_list, _, _ = select.select([inotify_process.stdout], [], [], 0)
+ return bool(read_list)
+
+ try:
+ yield IsClosed
+ finally:
+ inotify_process.kill()
+
+
+def TailFile(path, sleep_interval, chunk_size,
+ outfile=sys.stdout,
+ seek_to_end=True):
+ """Tails a file, and quits when there are no writers on the file.
+
+ @param path: The path to the file to open
+ @param sleep_interval: The amount to sleep in between reads to reduce
+ wasted IO
+ @param chunk_size: The amount of bytes to read in between print() calls
+ @param outfile: A file handle to write to. Defaults to sys.stdout
+ @param seek_to_end: Whether to start at the end of the file at |path| when
+ reading.
+ """
+
+ def ReadChunks(fh):
+ """Reads all chunks from a file handle, and prints them to |outfile|.
+
+ @param fh: The filehandle to read from.
+ """
+ for chunk in iter(lambda: fh.read(chunk_size), b''):
+ print(chunk, end='', file=outfile)
+
+ with WriterClosedFile(path) as IsClosed:
+ with open(path) as fh:
+ if seek_to_end == True:
+ fh.seek(0, 2)
+ while True:
+ ReadChunks(fh)
+ if IsClosed():
+ # We need to read the chunks again to avoid a race condition
+ # where the writer finishes writing some output in between
+ # the ReadChunks() and the IsClosed() call.
+ ReadChunks(fh)
+ break
+
+ # Sleep a bit to limit the number of wasted reads.
+ time.sleep(sleep_interval)
+
+
+def Main():
+ """Main entrypoint for the script."""
+ p = argparse.ArgumentParser(description=__doc__)
+ p.add_argument('file', help='The file to tail')
+ p.add_argument('--sleep_interval', type=float, default=0.1,
+ help='Time sleeping between file reads')
+ p.add_argument('--chunk_size', type=int, default=64 * 2**10,
+ help='Bytes to read before yielding')
+ p.add_argument('--from_beginning', action='store_true',
+ help='If given, read from the beginning of the file.')
+ args = p.parse_args()
+
+ TailFile(args.file, args.sleep_interval, args.chunk_size,
+ seek_to_end=not args.from_beginning)
+
+
+if __name__ == '__main__':
+ Main()
diff --git a/site_utils/stats/tail_until_writer_finished_unittest.py b/site_utils/stats/tail_until_writer_finished_unittest.py
new file mode 100755
index 0000000..e213517
--- /dev/null
+++ b/site_utils/stats/tail_until_writer_finished_unittest.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python2
+
+# Copyright 2016 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"""Unit tests for apache_log_metrics.py"""
+
+from __future__ import print_function
+
+import StringIO
+import subprocess
+import tempfile
+import threading
+import time
+import unittest
+
+import tail_until_writer_finished
+
+
+class TestTailUntilWriterFinished(unittest.TestCase):
+ """Tests tail_until_writer_finished."""
+
+ def SkipIfMissingInotifyTools(self):
+ """The tail_until_writer_finished module requires 'inotifywait'."""
+ try:
+ subprocess.call(['inotifywait'], stderr=subprocess.PIPE)
+ except OSError:
+ raise unittest.SkipTest('inotify-tools must be installed.')
+
+ def testTail(self):
+ """Tests reading a file from the end."""
+ self.GetsEntireInput(seek_to_end=True)
+
+ def testRead(self):
+ """Tests reading a file from the beginning."""
+ self.GetsEntireInput(seek_to_end=False)
+
+ def GetsEntireInput(self, seek_to_end):
+ """Tails a temp file in a thread.
+
+ Check that it read the file correctly.
+
+ @param seek_to_end: Whether to .seek to the end of the file before
+ reading.
+ """
+ self.SkipIfMissingInotifyTools()
+
+ f = tempfile.NamedTemporaryFile()
+ output = StringIO.StringIO()
+
+ f.write('This line will not get read if we seek to end.\n')
+ f.flush()
+
+ def Tail():
+ """Tails the file into |output| with a 64k chunk size."""
+ tail_until_writer_finished.TailFile(
+ f.name, 0.1, 64000, outfile=output, seek_to_end=seek_to_end)
+
+ thread = threading.Thread(target=Tail)
+ thread.start()
+
+ # There is a race here: the thread must start the inotify process before
+ # we close the file. This shouldn't take long at all, so add a small
+ # sleep.
+ time.sleep(0.3)
+
+ for i in range(100):
+ f.write(str(i) + '\n')
+ f.flush()
+ f.close()
+ thread.join()
+
+ expected = ''.join([str(i) + '\n' for i in range(100)])
+ if not seek_to_end:
+ expected = ('This line will not get read if we seek to end.\n'
+ + expected)
+ self.assertEqual(output.getvalue(), expected)
+
+
+if __name__ == '__main__':
+ unittest.main()