bpo-38112: Compileall improvements (GH-16012)

* Raise the limit of maximum path depth to actual  recursion limit

* Add posibilities to adjust a path compiled in .pyc  file.

Now, you can:
- Strip a part of path from a beggining of path into compiled file
   example "-s /test /test/build/real/test.py" → "build/real/test.py"
- Append some new path to a beggining of path into compiled file
   example "-p /boo real/test.py" → "/boo/real/test.py"

You can also use both options in the same time. In that case,
striping is done before appending.

* Add a possibility to specify multiple optimization levels

Each optimization level then leads to separated compiled file.
Use `action='append'` instead of `nargs='+'` for the -o option.
Instead of `-o 0 1 2`, specify `-o 0 -o 1 -o 2`. It's more to type,
but much more explicit.

* Add a symlinks limitation feature

This feature allows us to limit byte-compilation of symbolic
links if they are pointing outside specified dir (build root
for example).
diff --git a/Lib/compileall.py b/Lib/compileall.py
index 49306d9..26caf34 100644
--- a/Lib/compileall.py
+++ b/Lib/compileall.py
@@ -17,10 +17,13 @@
 import struct
 
 from functools import partial
+from pathlib import Path
+
+RECURSION_LIMIT = sys.getrecursionlimit()
 
 __all__ = ["compile_dir","compile_file","compile_path"]
 
-def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):
+def _walk_dir(dir, maxlevels=RECURSION_LIMIT, quiet=0):
     if quiet < 2 and isinstance(dir, os.PathLike):
         dir = os.fspath(dir)
     if not quiet:
@@ -36,35 +39,39 @@
         if name == '__pycache__':
             continue
         fullname = os.path.join(dir, name)
-        if ddir is not None:
-            dfile = os.path.join(ddir, name)
-        else:
-            dfile = None
         if not os.path.isdir(fullname):
             yield fullname
         elif (maxlevels > 0 and name != os.curdir and name != os.pardir and
               os.path.isdir(fullname) and not os.path.islink(fullname)):
-            yield from _walk_dir(fullname, ddir=dfile,
-                                 maxlevels=maxlevels - 1, quiet=quiet)
+            yield from _walk_dir(fullname, maxlevels=maxlevels - 1,
+                                 quiet=quiet)
 
-def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
-                quiet=0, legacy=False, optimize=-1, workers=1,
-                invalidation_mode=None):
+def compile_dir(dir, maxlevels=RECURSION_LIMIT, ddir=None, force=False,
+                rx=None, quiet=0, legacy=False, optimize=-1, workers=1,
+                invalidation_mode=None, stripdir=None,
+                prependdir=None, limit_sl_dest=None):
     """Byte-compile all modules in the given directory tree.
 
     Arguments (only dir is required):
 
     dir:       the directory to byte-compile
-    maxlevels: maximum recursion level (default 10)
+    maxlevels: maximum recursion level (default `sys.getrecursionlimit()`)
     ddir:      the directory that will be prepended to the path to the
                file as it is compiled into each byte-code file.
     force:     if True, force compilation, even if timestamps are up-to-date
     quiet:     full output with False or 0, errors only with 1,
                no output with 2
     legacy:    if True, produce legacy pyc paths instead of PEP 3147 paths
-    optimize:  optimization level or -1 for level of the interpreter
+    optimize:  int or list of optimization levels or -1 for level of
+               the interpreter. Multiple levels leads to multiple compiled
+               files each with one optimization level.
     workers:   maximum number of parallel workers
     invalidation_mode: how the up-to-dateness of the pyc will be checked
+    stripdir:  part of path to left-strip from source file path
+    prependdir: path to prepend to beggining of original file path, applied
+               after stripdir
+    limit_sl_dest: ignore symlinks if they are pointing outside of
+                   the defined path
     """
     ProcessPoolExecutor = None
     if workers < 0:
@@ -76,8 +83,7 @@
             from concurrent.futures import ProcessPoolExecutor
         except ImportError:
             workers = 1
-    files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels,
-                      ddir=ddir)
+    files = _walk_dir(dir, quiet=quiet, maxlevels=maxlevels)
     success = True
     if workers != 1 and ProcessPoolExecutor is not None:
         # If workers == 0, let ProcessPoolExecutor choose
@@ -88,19 +94,25 @@
                                            rx=rx, quiet=quiet,
                                            legacy=legacy,
                                            optimize=optimize,
-                                           invalidation_mode=invalidation_mode),
+                                           invalidation_mode=invalidation_mode,
+                                           stripdir=stripdir,
+                                           prependdir=prependdir,
+                                           limit_sl_dest=limit_sl_dest),
                                    files)
             success = min(results, default=True)
     else:
         for file in files:
             if not compile_file(file, ddir, force, rx, quiet,
-                                legacy, optimize, invalidation_mode):
+                                legacy, optimize, invalidation_mode,
+                                stripdir=stripdir, prependdir=prependdir,
+                                limit_sl_dest=limit_sl_dest):
                 success = False
     return success
 
 def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
                  legacy=False, optimize=-1,
-                 invalidation_mode=None):
+                 invalidation_mode=None, stripdir=None, prependdir=None,
+                 limit_sl_dest=None):
     """Byte-compile one file.
 
     Arguments (only fullname is required):
@@ -112,32 +124,76 @@
     quiet:     full output with False or 0, errors only with 1,
                no output with 2
     legacy:    if True, produce legacy pyc paths instead of PEP 3147 paths
-    optimize:  optimization level or -1 for level of the interpreter
+    optimize:  int or list of optimization levels or -1 for level of
+               the interpreter. Multiple levels leads to multiple compiled
+               files each with one optimization level.
     invalidation_mode: how the up-to-dateness of the pyc will be checked
+    stripdir:  part of path to left-strip from source file path
+    prependdir: path to prepend to beggining of original file path, applied
+               after stripdir
+    limit_sl_dest: ignore symlinks if they are pointing outside of
+                   the defined path.
     """
+
+    if ddir is not None and (stripdir is not None or prependdir is not None):
+        raise ValueError(("Destination dir (ddir) cannot be used "
+                          "in combination with stripdir or prependdir"))
+
     success = True
     if quiet < 2 and isinstance(fullname, os.PathLike):
         fullname = os.fspath(fullname)
     name = os.path.basename(fullname)
+
+    dfile = None
+
     if ddir is not None:
         dfile = os.path.join(ddir, name)
-    else:
-        dfile = None
+
+    if stripdir is not None:
+        fullname_parts = fullname.split(os.path.sep)
+        stripdir_parts = stripdir.split(os.path.sep)
+        ddir_parts = list(fullname_parts)
+
+        for spart, opart in zip(stripdir_parts, fullname_parts):
+            if spart == opart:
+                ddir_parts.remove(spart)
+
+        dfile = os.path.join(*ddir_parts)
+
+    if prependdir is not None:
+        if dfile is None:
+            dfile = os.path.join(prependdir, fullname)
+        else:
+            dfile = os.path.join(prependdir, dfile)
+
+    if isinstance(optimize, int):
+        optimize = [optimize]
+
     if rx is not None:
         mo = rx.search(fullname)
         if mo:
             return success
+
+    if limit_sl_dest is not None and os.path.islink(fullname):
+        if Path(limit_sl_dest).resolve() not in Path(fullname).resolve().parents:
+            return success
+
+    opt_cfiles = {}
+
     if os.path.isfile(fullname):
-        if legacy:
-            cfile = fullname + 'c'
-        else:
-            if optimize >= 0:
-                opt = optimize if optimize >= 1 else ''
-                cfile = importlib.util.cache_from_source(
-                                fullname, optimization=opt)
+        for opt_level in optimize:
+            if legacy:
+                opt_cfiles[opt_level] = fullname + 'c'
             else:
-                cfile = importlib.util.cache_from_source(fullname)
-            cache_dir = os.path.dirname(cfile)
+                if opt_level >= 0:
+                    opt = opt_level if opt_level >= 1 else ''
+                    cfile = (importlib.util.cache_from_source(
+                             fullname, optimization=opt))
+                    opt_cfiles[opt_level] = cfile
+                else:
+                    cfile = importlib.util.cache_from_source(fullname)
+                    opt_cfiles[opt_level] = cfile
+
         head, tail = name[:-3], name[-3:]
         if tail == '.py':
             if not force:
@@ -145,18 +201,22 @@
                     mtime = int(os.stat(fullname).st_mtime)
                     expect = struct.pack('<4sll', importlib.util.MAGIC_NUMBER,
                                          0, mtime)
-                    with open(cfile, 'rb') as chandle:
-                        actual = chandle.read(12)
-                    if expect == actual:
+                    for cfile in opt_cfiles.values():
+                        with open(cfile, 'rb') as chandle:
+                            actual = chandle.read(12)
+                        if expect != actual:
+                            break
+                    else:
                         return success
                 except OSError:
                     pass
             if not quiet:
                 print('Compiling {!r}...'.format(fullname))
             try:
-                ok = py_compile.compile(fullname, cfile, dfile, True,
-                                        optimize=optimize,
-                                        invalidation_mode=invalidation_mode)
+                for opt_level, cfile in opt_cfiles.items():
+                    ok = py_compile.compile(fullname, cfile, dfile, True,
+                                            optimize=opt_level,
+                                            invalidation_mode=invalidation_mode)
             except py_compile.PyCompileError as err:
                 success = False
                 if quiet >= 2:
@@ -225,7 +285,7 @@
     parser = argparse.ArgumentParser(
         description='Utilities to support installing Python libraries.')
     parser.add_argument('-l', action='store_const', const=0,
-                        default=10, dest='maxlevels',
+                        default=RECURSION_LIMIT, dest='maxlevels',
                         help="don't recurse into subdirectories")
     parser.add_argument('-r', type=int, dest='recursion',
                         help=('control the maximum recursion level. '
@@ -243,6 +303,20 @@
                               'compile-time tracebacks and in runtime '
                               'tracebacks in cases where the source file is '
                               'unavailable'))
+    parser.add_argument('-s', metavar='STRIPDIR',  dest='stripdir',
+                        default=None,
+                        help=('part of path to left-strip from path '
+                              'to source file - for example buildroot. '
+                              '`-d` and `-s` options cannot be '
+                              'specified together.'))
+    parser.add_argument('-p', metavar='PREPENDDIR',  dest='prependdir',
+                        default=None,
+                        help=('path to add as prefix to path '
+                              'to source file - for example / to make '
+                              'it absolute when some part is removed '
+                              'by `-s` option. '
+                              '`-d` and `-p` options cannot be '
+                              'specified together.'))
     parser.add_argument('-x', metavar='REGEXP', dest='rx', default=None,
                         help=('skip files matching the regular expression; '
                               'the regexp is searched for in the full path '
@@ -265,6 +339,12 @@
                               '"checked-hash" if the SOURCE_DATE_EPOCH '
                               'environment variable is set, and '
                               '"timestamp" otherwise.'))
+    parser.add_argument('-o', action='append', type=int, dest='opt_levels',
+                        help=('Optimization levels to run compilation with.'
+                              'Default is -1 which uses optimization level of'
+                              'Python interpreter itself (specified by -O).'))
+    parser.add_argument('-e', metavar='DIR', dest='limit_sl_dest',
+                        help='Ignore symlinks pointing outsite of the DIR')
 
     args = parser.parse_args()
     compile_dests = args.compile_dest
@@ -273,12 +353,22 @@
         import re
         args.rx = re.compile(args.rx)
 
+    if args.limit_sl_dest == "":
+        args.limit_sl_dest = None
 
     if args.recursion is not None:
         maxlevels = args.recursion
     else:
         maxlevels = args.maxlevels
 
+    if args.opt_levels is None:
+        args.opt_levels = [-1]
+
+    if args.ddir is not None and (
+        args.stripdir is not None or args.prependdir is not None
+    ):
+        parser.error("-d cannot be used in combination with -s or -p")
+
     # if flist is provided then load it
     if args.flist:
         try:
@@ -303,13 +393,21 @@
                 if os.path.isfile(dest):
                     if not compile_file(dest, args.ddir, args.force, args.rx,
                                         args.quiet, args.legacy,
-                                        invalidation_mode=invalidation_mode):
+                                        invalidation_mode=invalidation_mode,
+                                        stripdir=args.stripdir,
+                                        prependdir=args.prependdir,
+                                        optimize=args.opt_levels,
+                                        limit_sl_dest=args.limit_sl_dest):
                         success = False
                 else:
                     if not compile_dir(dest, maxlevels, args.ddir,
                                        args.force, args.rx, args.quiet,
                                        args.legacy, workers=args.workers,
-                                       invalidation_mode=invalidation_mode):
+                                       invalidation_mode=invalidation_mode,
+                                       stripdir=args.stripdir,
+                                       prependdir=args.prependdir,
+                                       optimize=args.opt_levels,
+                                       limit_sl_dest=args.limit_sl_dest):
                         success = False
             return success
         else:
diff --git a/Lib/test/test_compileall.py b/Lib/test/test_compileall.py
index 99d8437..af885c5 100644
--- a/Lib/test/test_compileall.py
+++ b/Lib/test/test_compileall.py
@@ -41,6 +41,16 @@
         os.mkdir(self.subdirectory)
         self.source_path3 = os.path.join(self.subdirectory, '_test3.py')
         shutil.copyfile(self.source_path, self.source_path3)
+        many_directories = [str(number) for number in range(1, 100)]
+        self.long_path = os.path.join(self.directory,
+                                      "long",
+                                      *many_directories)
+        os.makedirs(self.long_path)
+        self.source_path_long = os.path.join(self.long_path, '_test4.py')
+        shutil.copyfile(self.source_path, self.source_path_long)
+        self.bc_path_long = importlib.util.cache_from_source(
+            self.source_path_long
+        )
 
     def tearDown(self):
         shutil.rmtree(self.directory)
@@ -194,6 +204,123 @@
         compileall.compile_dir(self.directory, quiet=True, workers=5)
         self.assertTrue(compile_file_mock.called)
 
+    def text_compile_dir_maxlevels(self):
+        # Test the actual impact of maxlevels attr
+        compileall.compile_dir(os.path.join(self.directory, "long"),
+                               maxlevels=10, quiet=True)
+        self.assertFalse(os.path.isfile(self.bc_path_long))
+        compileall.compile_dir(os.path.join(self.directory, "long"),
+                               maxlevels=110, quiet=True)
+        self.assertTrue(os.path.isfile(self.bc_path_long))
+
+    def test_strip_only(self):
+        fullpath = ["test", "build", "real", "path"]
+        path = os.path.join(self.directory, *fullpath)
+        os.makedirs(path)
+        script = script_helper.make_script(path, "test", "1 / 0")
+        bc = importlib.util.cache_from_source(script)
+        stripdir = os.path.join(self.directory, *fullpath[:2])
+        compileall.compile_dir(path, quiet=True, stripdir=stripdir)
+        rc, out, err = script_helper.assert_python_failure(bc)
+        expected_in = os.path.join(*fullpath[2:])
+        self.assertIn(
+            expected_in,
+            str(err, encoding=sys.getdefaultencoding())
+        )
+        self.assertNotIn(
+            stripdir,
+            str(err, encoding=sys.getdefaultencoding())
+        )
+
+    def test_prepend_only(self):
+        fullpath = ["test", "build", "real", "path"]
+        path = os.path.join(self.directory, *fullpath)
+        os.makedirs(path)
+        script = script_helper.make_script(path, "test", "1 / 0")
+        bc = importlib.util.cache_from_source(script)
+        prependdir = "/foo"
+        compileall.compile_dir(path, quiet=True, prependdir=prependdir)
+        rc, out, err = script_helper.assert_python_failure(bc)
+        expected_in = os.path.join(prependdir, self.directory, *fullpath)
+        self.assertIn(
+            expected_in,
+            str(err, encoding=sys.getdefaultencoding())
+        )
+
+    def test_strip_and_prepend(self):
+        fullpath = ["test", "build", "real", "path"]
+        path = os.path.join(self.directory, *fullpath)
+        os.makedirs(path)
+        script = script_helper.make_script(path, "test", "1 / 0")
+        bc = importlib.util.cache_from_source(script)
+        stripdir = os.path.join(self.directory, *fullpath[:2])
+        prependdir = "/foo"
+        compileall.compile_dir(path, quiet=True,
+                               stripdir=stripdir, prependdir=prependdir)
+        rc, out, err = script_helper.assert_python_failure(bc)
+        expected_in = os.path.join(prependdir, *fullpath[2:])
+        self.assertIn(
+            expected_in,
+            str(err, encoding=sys.getdefaultencoding())
+        )
+        self.assertNotIn(
+            stripdir,
+            str(err, encoding=sys.getdefaultencoding())
+        )
+
+    def test_strip_prepend_and_ddir(self):
+        fullpath = ["test", "build", "real", "path", "ddir"]
+        path = os.path.join(self.directory, *fullpath)
+        os.makedirs(path)
+        script_helper.make_script(path, "test", "1 / 0")
+        with self.assertRaises(ValueError):
+            compileall.compile_dir(path, quiet=True, ddir="/bar",
+                                   stripdir="/foo", prependdir="/bar")
+
+    def test_multiple_optimization_levels(self):
+        script = script_helper.make_script(self.directory,
+                                           "test_optimization",
+                                           "a = 0")
+        bc = []
+        for opt_level in "", 1, 2, 3:
+            bc.append(importlib.util.cache_from_source(script,
+                                                       optimization=opt_level))
+        test_combinations = [[0, 1], [1, 2], [0, 2], [0, 1, 2]]
+        for opt_combination in test_combinations:
+            compileall.compile_file(script, quiet=True,
+                                    optimize=opt_combination)
+            for opt_level in opt_combination:
+                self.assertTrue(os.path.isfile(bc[opt_level]))
+                try:
+                    os.unlink(bc[opt_level])
+                except Exception:
+                    pass
+
+    @support.skip_unless_symlink
+    def test_ignore_symlink_destination(self):
+        # Create folders for allowed files, symlinks and prohibited area
+        allowed_path = os.path.join(self.directory, "test", "dir", "allowed")
+        symlinks_path = os.path.join(self.directory, "test", "dir", "symlinks")
+        prohibited_path = os.path.join(self.directory, "test", "dir", "prohibited")
+        os.makedirs(allowed_path)
+        os.makedirs(symlinks_path)
+        os.makedirs(prohibited_path)
+
+        # Create scripts and symlinks and remember their byte-compiled versions
+        allowed_script = script_helper.make_script(allowed_path, "test_allowed", "a = 0")
+        prohibited_script = script_helper.make_script(prohibited_path, "test_prohibited", "a = 0")
+        allowed_symlink = os.path.join(symlinks_path, "test_allowed.py")
+        prohibited_symlink = os.path.join(symlinks_path, "test_prohibited.py")
+        os.symlink(allowed_script, allowed_symlink)
+        os.symlink(prohibited_script, prohibited_symlink)
+        allowed_bc = importlib.util.cache_from_source(allowed_symlink)
+        prohibited_bc = importlib.util.cache_from_source(prohibited_symlink)
+
+        compileall.compile_dir(symlinks_path, quiet=True, limit_sl_dest=allowed_path)
+
+        self.assertTrue(os.path.isfile(allowed_bc))
+        self.assertFalse(os.path.isfile(prohibited_bc))
+
 
 class CompileallTestsWithSourceEpoch(CompileallTestsBase,
                                      unittest.TestCase,
@@ -436,6 +563,20 @@
         self.assertCompiled(spamfn)
         self.assertCompiled(eggfn)
 
+    @support.skip_unless_symlink
+    def test_symlink_loop(self):
+        # Currently, compileall ignores symlinks to directories.
+        # If that limitation is ever lifted, it should protect against
+        # recursion in symlink loops.
+        pkg = os.path.join(self.pkgdir, 'spam')
+        script_helper.make_pkg(pkg)
+        os.symlink('.', os.path.join(pkg, 'evil'))
+        os.symlink('.', os.path.join(pkg, 'evil2'))
+        self.assertRunOK('-q', self.pkgdir)
+        self.assertCompiled(os.path.join(
+            self.pkgdir, 'spam', 'evil', 'evil2', '__init__.py'
+        ))
+
     def test_quiet(self):
         noisy = self.assertRunOK(self.pkgdir)
         quiet = self.assertRunOK('-q', self.pkgdir)
@@ -577,6 +718,74 @@
             self.assertTrue(compile_dir.called)
             self.assertEqual(compile_dir.call_args[-1]['workers'], 0)
 
+    def test_strip_and_prepend(self):
+        fullpath = ["test", "build", "real", "path"]
+        path = os.path.join(self.directory, *fullpath)
+        os.makedirs(path)
+        script = script_helper.make_script(path, "test", "1 / 0")
+        bc = importlib.util.cache_from_source(script)
+        stripdir = os.path.join(self.directory, *fullpath[:2])
+        prependdir = "/foo"
+        self.assertRunOK("-s", stripdir, "-p", prependdir, path)
+        rc, out, err = script_helper.assert_python_failure(bc)
+        expected_in = os.path.join(prependdir, *fullpath[2:])
+        self.assertIn(
+            expected_in,
+            str(err, encoding=sys.getdefaultencoding())
+        )
+        self.assertNotIn(
+            stripdir,
+            str(err, encoding=sys.getdefaultencoding())
+        )
+
+    def test_multiple_optimization_levels(self):
+        path = os.path.join(self.directory, "optimizations")
+        os.makedirs(path)
+        script = script_helper.make_script(path,
+                                           "test_optimization",
+                                           "a = 0")
+        bc = []
+        for opt_level in "", 1, 2, 3:
+            bc.append(importlib.util.cache_from_source(script,
+                                                       optimization=opt_level))
+        test_combinations = [["0", "1"],
+                             ["1", "2"],
+                             ["0", "2"],
+                             ["0", "1", "2"]]
+        for opt_combination in test_combinations:
+            self.assertRunOK(path, *("-o" + str(n) for n in opt_combination))
+            for opt_level in opt_combination:
+                self.assertTrue(os.path.isfile(bc[int(opt_level)]))
+                try:
+                    os.unlink(bc[opt_level])
+                except Exception:
+                    pass
+
+    @support.skip_unless_symlink
+    def test_ignore_symlink_destination(self):
+        # Create folders for allowed files, symlinks and prohibited area
+        allowed_path = os.path.join(self.directory, "test", "dir", "allowed")
+        symlinks_path = os.path.join(self.directory, "test", "dir", "symlinks")
+        prohibited_path = os.path.join(self.directory, "test", "dir", "prohibited")
+        os.makedirs(allowed_path)
+        os.makedirs(symlinks_path)
+        os.makedirs(prohibited_path)
+
+        # Create scripts and symlinks and remember their byte-compiled versions
+        allowed_script = script_helper.make_script(allowed_path, "test_allowed", "a = 0")
+        prohibited_script = script_helper.make_script(prohibited_path, "test_prohibited", "a = 0")
+        allowed_symlink = os.path.join(symlinks_path, "test_allowed.py")
+        prohibited_symlink = os.path.join(symlinks_path, "test_prohibited.py")
+        os.symlink(allowed_script, allowed_symlink)
+        os.symlink(prohibited_script, prohibited_symlink)
+        allowed_bc = importlib.util.cache_from_source(allowed_symlink)
+        prohibited_bc = importlib.util.cache_from_source(prohibited_symlink)
+
+        self.assertRunOK(symlinks_path, "-e", allowed_path)
+
+        self.assertTrue(os.path.isfile(allowed_bc))
+        self.assertFalse(os.path.isfile(prohibited_bc))
+
 
 class CommandLineTestsWithSourceEpoch(CommandLineTestsBase,
                                        unittest.TestCase,