Refactored site.py into functions.  Also moved over to using sets.

New regression test suite.
diff --git a/Lib/site.py b/Lib/site.py
index 682bbd5..348938b 100644
--- a/Lib/site.py
+++ b/Lib/site.py
@@ -57,64 +57,104 @@
 
 """
 
-import sys, os
+import sys
+import os
+import __builtin__
 
 
 def makepath(*paths):
     dir = os.path.abspath(os.path.join(*paths))
     return dir, os.path.normcase(dir)
 
-for m in sys.modules.values():
-    if hasattr(m, "__file__") and m.__file__:
-        m.__file__ = os.path.abspath(m.__file__)
-del m
+def abs__file__():
+    """Set all module' __file__ attribute to an absolute path"""
+    for m in sys.modules.values():
+        try:
+            m.__file__ = os.path.abspath(m.__file__)
+        except AttributeError:
+            continue
 
-# This ensures that the initial path provided by the interpreter contains
-# only absolute pathnames, even if we're running from the build directory.
-L = []
-_dirs_in_sys_path = {}
-dir = dircase = None  # sys.path may be empty at this point
-for dir in sys.path:
-    # Filter out duplicate paths (on case-insensitive file systems also
-    # if they only differ in case); turn relative paths into absolute
-    # paths.
-    dir, dircase = makepath(dir)
-    if not dircase in _dirs_in_sys_path:
-        L.append(dir)
-        _dirs_in_sys_path[dircase] = 1
-sys.path[:] = L
-del dir, dircase, L
+def removeduppaths():
+    """ Remove duplicate entries from sys.path along with making them
+    absolute"""
+    # This ensures that the initial path provided by the interpreter contains
+    # only absolute pathnames, even if we're running from the build directory.
+    L = []
+    known_paths = set()
+    for dir in sys.path:
+        # Filter out duplicate paths (on case-insensitive file systems also
+        # if they only differ in case); turn relative paths into absolute
+        # paths.
+        dir, dircase = makepath(dir)
+        if not dircase in known_paths:
+            L.append(dir)
+            known_paths.add(dircase)
+    sys.path[:] = L
+    return known_paths
 
-# Append ./build/lib.<platform> in case we're running in the build dir
-# (especially for Guido :-)
 # XXX This should not be part of site.py, since it is needed even when
 # using the -S option for Python.  See http://www.python.org/sf/586680
-if (os.name == "posix" and sys.path and
-    os.path.basename(sys.path[-1]) == "Modules"):
+def addbuilddir():
+    """Append ./build/lib.<platform> in case we're running in the build dir
+    (especially for Guido :-)"""
     from distutils.util import get_platform
     s = "build/lib.%s-%.3s" % (get_platform(), sys.version)
     s = os.path.join(os.path.dirname(sys.path[-1]), s)
     sys.path.append(s)
-    del get_platform, s
 
 def _init_pathinfo():
-    global _dirs_in_sys_path
-    _dirs_in_sys_path = d = {}
+    """Return a set containing all existing directory entries from sys.path"""
+    d = set()
     for dir in sys.path:
-        if dir and not os.path.isdir(dir):
+        try:
+            if os.path.isdir(dir):
+                dir, dircase = makepath(dir)
+                d.add(dircase)
+        except TypeError:
             continue
-        dir, dircase = makepath(dir)
-        d[dircase] = 1
+    return d
 
-def addsitedir(sitedir):
-    global _dirs_in_sys_path
-    if _dirs_in_sys_path is None:
+def addpackage(sitedir, name, known_paths):
+    """Add a new path to known_paths by combining sitedir and 'name' or execute
+    sitedir if it starts with 'import'"""
+    if known_paths is None:
         _init_pathinfo()
         reset = 1
     else:
         reset = 0
+    fullname = os.path.join(sitedir, name)
+    try:
+        f = file(fullname, "rU")
+    except IOError:
+        return
+    try:
+        for line in f:
+            if line.startswith("#"):
+                continue
+            if line.startswith("import"):
+                exec line
+                continue
+            line = line.rstrip()
+            dir, dircase = makepath(sitedir, line)
+            if not dircase in known_paths and os.path.exists(dir):
+                sys.path.append(dir)
+                known_paths.add(dircase)
+    finally:
+        f.close()
+    if reset:
+        known_paths = None
+    return known_paths
+
+def addsitedir(sitedir, known_paths):
+    """Add 'sitedir' argument to sys.path if missing and handle .pth files in
+    'sitedir'"""
+    if known_paths is None:
+        d = _init_pathinfo()
+        reset = 1
+    else:
+        reset = 0
     sitedir, sitedircase = makepath(sitedir)
-    if not sitedircase in _dirs_in_sys_path:
+    if not sitedircase in known_paths:
         sys.path.append(sitedir)        # Add path component
     try:
         names = os.listdir(sitedir)
@@ -123,82 +163,55 @@
     names.sort()
     for name in names:
         if name[-4:] == os.extsep + "pth":
-            addpackage(sitedir, name)
+            addpackage(sitedir, name, known_paths)
     if reset:
-        _dirs_in_sys_path = None
+        known_paths = None
+    return known_paths
 
-def addpackage(sitedir, name):
-    global _dirs_in_sys_path
-    if _dirs_in_sys_path is None:
-        _init_pathinfo()
-        reset = 1
-    else:
-        reset = 0
-    fullname = os.path.join(sitedir, name)
-    try:
-        f = open(fullname)
-    except IOError:
-        return
-    while 1:
-        dir = f.readline()
-        if not dir:
-            break
-        if dir[0] == '#':
-            continue
-        if dir.startswith("import"):
-            exec dir
-            continue
-        dir = dir.rstrip()
-        dir, dircase = makepath(sitedir, dir)
-        if not dircase in _dirs_in_sys_path and os.path.exists(dir):
-            sys.path.append(dir)
-            _dirs_in_sys_path[dircase] = 1
-    if reset:
-        _dirs_in_sys_path = None
-
-prefixes = [sys.prefix]
-sitedir = None # make sure sitedir is initialized because of later 'del'
-if sys.exec_prefix != sys.prefix:
-    prefixes.append(sys.exec_prefix)
-for prefix in prefixes:
-    if prefix:
-        if sys.platform in ('os2emx', 'riscos'):
-            sitedirs = [os.path.join(prefix, "Lib", "site-packages")]
-        elif os.sep == '/':
-            sitedirs = [os.path.join(prefix,
-                                     "lib",
-                                     "python" + sys.version[:3],
-                                     "site-packages"),
-                        os.path.join(prefix, "lib", "site-python")]
-        else:
-            sitedirs = [prefix, os.path.join(prefix, "lib", "site-packages")]
-        if sys.platform == 'darwin':
-            # for framework builds *only* we add the standard Apple
-            # locations. Currently only per-user, but /Library and
-            # /Network/Library could be added too
-            if 'Python.framework' in prefix:
-                home = os.environ.get('HOME')
-                if home:
-                    sitedirs.append(
-                        os.path.join(home,
-                                     'Library',
-                                     'Python',
-                                     sys.version[:3],
-                                     'site-packages'))
-        for sitedir in sitedirs:
-            if os.path.isdir(sitedir):
-                addsitedir(sitedir)
-del prefix, sitedir
-
-_dirs_in_sys_path = None
+def addsitepackages(known_paths):
+    """Add site-packages (and possibly site-python) to sys.path"""
+    prefixes = [sys.prefix]
+    if sys.exec_prefix != sys.prefix:
+        prefixes.append(sys.exec_prefix)
+    for prefix in prefixes:
+        if prefix:
+            if sys.platform in ('os2emx', 'riscos'):
+                sitedirs = [os.path.join(prefix, "Lib", "site-packages")]
+            elif os.sep == '/':
+                sitedirs = [os.path.join(prefix,
+                                         "lib",
+                                         "python" + sys.version[:3],
+                                         "site-packages"),
+                            os.path.join(prefix, "lib", "site-python")]
+            else:
+                sitedirs = [prefix, os.path.join(prefix, "lib", "site-packages")]
+            if sys.platform == 'darwin':
+                # for framework builds *only* we add the standard Apple
+                # locations. Currently only per-user, but /Library and
+                # /Network/Library could be added too
+                if 'Python.framework' in prefix:
+                    home = os.environ.get('HOME')
+                    if home:
+                        sitedirs.append(
+                            os.path.join(home,
+                                         'Library',
+                                         'Python',
+                                         sys.version[:3],
+                                         'site-packages'))
+            for sitedir in sitedirs:
+                if os.path.isdir(sitedir):
+                    addsitedir(sitedir, known_paths)
+    return None
 
 
-# the OS/2 EMX port has optional extension modules that do double duty
-# as DLLs (and must use the .DLL file extension) for other extensions.
-# The library search path needs to be amended so these will be found
-# during module import.  Use BEGINLIBPATH so that these are at the start
-# of the library search path.
-if sys.platform == 'os2emx':
+def setBEGINLIBPATH():
+    """The OS/2 EMX port has optional extension modules that do double duty
+    as DLLs (and must use the .DLL file extension) for other extensions.
+    The library search path needs to be amended so these will be found
+    during module import.  Use BEGINLIBPATH so that these are at the start
+    of the library search path.
+    
+    """
     dllpath = os.path.join(sys.prefix, "Lib", "lib-dynload")
     libpath = os.environ['BEGINLIBPATH'].split(';')
     if libpath[-1]:
@@ -208,21 +221,24 @@
     os.environ['BEGINLIBPATH'] = ';'.join(libpath)
 
 
-# Define new built-ins 'quit' and 'exit'.
-# These are simply strings that display a hint on how to exit.
-if os.sep == ':':
-    exit = 'Use Cmd-Q to quit.'
-elif os.sep == '\\':
-    exit = 'Use Ctrl-Z plus Return to exit.'
-else:
-    exit = 'Use Ctrl-D (i.e. EOF) to exit.'
-import __builtin__
-__builtin__.quit = __builtin__.exit = exit
-del exit
+def setquit():
+    """Define new built-ins 'quit' and 'exit'.
+    These are simply strings that display a hint on how to exit.
 
-# interactive prompt objects for printing the license text, a list of
-# contributors and the copyright notice.
-class _Printer:
+    """
+    if os.sep == ':':
+        exit = 'Use Cmd-Q to quit.'
+    elif os.sep == '\\':
+        exit = 'Use Ctrl-Z plus Return to exit.'
+    else:
+        exit = 'Use Ctrl-D (i.e. EOF) to exit.'
+    __builtin__.quit = __builtin__.exit = exit
+
+
+class _Printer(object):
+    """interactive prompt objects for printing the license text, a list of
+    contributors and the copyright notice."""
+
     MAXLINES = 23
 
     def __init__(self, name, data, files=(), dirs=()):
@@ -237,10 +253,10 @@
             return
         data = None
         for dir in self.__dirs:
-            for file in self.__files:
-                file = os.path.join(dir, file)
+            for filename in self.__files:
+                filename = os.path.join(dir, filename)
                 try:
-                    fp = open(file)
+                    fp = file(filename, "rU")
                     data = fp.read()
                     fp.close()
                     break
@@ -280,26 +296,30 @@
                 if key == 'q':
                     break
 
-__builtin__.copyright = _Printer("copyright", sys.copyright)
-if sys.platform[:4] == 'java':
-    __builtin__.credits = _Printer(
-        "credits",
-        "Jython is maintained by the Jython developers (www.jython.org).")
-else:
-    __builtin__.credits = _Printer("credits", """\
-Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
-for supporting Python development.  See www.python.org for more information.""")
-here = os.path.dirname(os.__file__)
-__builtin__.license = _Printer(
-    "license", "See http://www.python.org/%.3s/license.html" % sys.version,
-    ["LICENSE.txt", "LICENSE"],
-    [os.path.join(here, os.pardir), here, os.curdir])
+def setcopyright():
+    """Set 'copyright' and 'credits' in __builtin__"""
+    __builtin__.copyright = _Printer("copyright", sys.copyright)
+    if sys.platform[:4] == 'java':
+        __builtin__.credits = _Printer(
+            "credits",
+            "Jython is maintained by the Jython developers (www.jython.org).")
+    else:
+        __builtin__.credits = _Printer("credits", """\
+    Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
+    for supporting Python development.  See www.python.org for more information.""")
+    here = os.path.dirname(os.__file__)
+    __builtin__.license = _Printer(
+        "license", "See http://www.python.org/%.3s/license.html" % sys.version,
+        ["LICENSE.txt", "LICENSE"],
+        [os.path.join(here, os.pardir), here, os.curdir])
 
 
-# Define new built-in 'help'.
-# This is a wrapper around pydoc.help (with a twist).
+class _Helper(object):
+    """Define the built-in 'help'.
+    This is a wrapper around pydoc.help (with a twist).
 
-class _Helper:
+    """
+
     def __repr__(self):
         return "Type help() for interactive help, " \
                "or help(object) for help about object."
@@ -307,61 +327,74 @@
         import pydoc
         return pydoc.help(*args, **kwds)
 
-__builtin__.help = _Helper()
+def sethelper():
+    __builtin__.help = _Helper()
+
+def aliasmbcs():
+    """On Windows, some default encodings are not provided by Python,
+    while they are always available as "mbcs" in each locale. Make
+    them usable by aliasing to "mbcs" in such a case."""
+    if sys.platform == 'win32':
+        import locale, codecs
+        enc = locale.getdefaultlocale()[1]
+        if enc.startswith('cp'):            # "cp***" ?
+            try:
+                codecs.lookup(enc)
+            except LookupError:
+                import encodings
+                encodings._cache[enc] = encodings._unknown
+                encodings.aliases.aliases[enc] = 'mbcs'
+
+def setencoding():
+    """Set the string encoding used by the Unicode implementation.  The
+    default is 'ascii', but if you're willing to experiment, you can
+    change this."""
+    encoding = "ascii" # Default value set by _PyUnicode_Init()
+    if 0:
+        # Enable to support locale aware default string encodings.
+        import locale
+        loc = locale.getdefaultlocale()
+        if loc[1]:
+            encoding = loc[1]
+    if 0:
+        # Enable to switch off string to Unicode coercion and implicit
+        # Unicode to string conversion.
+        encoding = "undefined"
+    if encoding != "ascii":
+        # On Non-Unicode builds this will raise an AttributeError...
+        sys.setdefaultencoding(encoding) # Needs Python Unicode build !
 
 
-# On Windows, some default encodings are not provided by Python,
-# while they are always available as "mbcs" in each locale. Make
-# them usable by aliasing to "mbcs" in such a case.
+def execsitecustomize():
+    """Run custom site specific code, if available."""
+    try:
+        import sitecustomize
+    except ImportError:
+        pass
 
-if sys.platform == 'win32':
-    import locale, codecs
-    enc = locale.getdefaultlocale()[1]
-    if enc.startswith('cp'):            # "cp***" ?
-        try:
-            codecs.lookup(enc)
-        except LookupError:
-            import encodings
-            encodings._cache[enc] = encodings._unknown
-            encodings.aliases.aliases[enc] = 'mbcs'
 
-# Set the string encoding used by the Unicode implementation.  The
-# default is 'ascii', but if you're willing to experiment, you can
-# change this.
+def main():
+    abs__file__()
+    paths_in_sys = removeduppaths()
+    if (os.name == "posix" and sys.path and
+        os.path.basename(sys.path[-1]) == "Modules"):
+        addbuilddir()
+    paths_in_sys = addsitepackages(paths_in_sys)
+    if sys.platform == 'os2emx':
+        setBEGINLIBPATH()
+    setquit()
+    setcopyright()
+    sethelper()
+    aliasmbcs()
+    setencoding()
+    execsitecustomize()
+    # Remove sys.setdefaultencoding() so that users cannot change the
+    # encoding after initialization.  The test for presence is needed when
+    # this module is run as a script, because this code is executed twice.
+    if hasattr(sys, "setdefaultencoding"):
+        del sys.setdefaultencoding
 
-encoding = "ascii" # Default value set by _PyUnicode_Init()
-
-if 0:
-    # Enable to support locale aware default string encodings.
-    import locale
-    loc = locale.getdefaultlocale()
-    if loc[1]:
-        encoding = loc[1]
-
-if 0:
-    # Enable to switch off string to Unicode coercion and implicit
-    # Unicode to string conversion.
-    encoding = "undefined"
-
-if encoding != "ascii":
-    # On Non-Unicode builds this will raise an AttributeError...
-    sys.setdefaultencoding(encoding) # Needs Python Unicode build !
-
-#
-# Run custom site specific code, if available.
-#
-try:
-    import sitecustomize
-except ImportError:
-    pass
-
-#
-# Remove sys.setdefaultencoding() so that users cannot change the
-# encoding after initialization.  The test for presence is needed when
-# this module is run as a script, because this code is executed twice.
-#
-if hasattr(sys, "setdefaultencoding"):
-    del sys.setdefaultencoding
+main()
 
 def _test():
     print "sys.path = ["
diff --git a/Lib/test/test_site.py b/Lib/test/test_site.py
new file mode 100644
index 0000000..ee15b1e
--- /dev/null
+++ b/Lib/test/test_site.py
@@ -0,0 +1,199 @@
+"""Tests for 'site'.
+
+Tests assume the initial paths in sys.path once the interpreter has begun
+executing have not been removed.
+
+"""
+import unittest
+from test.test_support import TestSkipped, run_unittest, TESTFN
+import __builtin__
+import os
+import sys
+import encodings
+import tempfile
+# Need to make sure to not import 'site' if someone specified ``-S`` at the
+# command-line.  Detect this by just making sure 'site' has not been imported
+# already.
+if "site" in sys.modules:
+    import site
+else:
+    raise TestSkipped("importation of site.py suppressed")
+
+class HelperFunctionsTests(unittest.TestCase):
+    """Tests for helper functions.
+    
+    The setting of the encoding (set using sys.setdefaultencoding) used by
+    the Unicode implementation is not tested.
+    
+    """
+
+    def setUp(self):
+        """Save a copy of sys.path"""
+        self.sys_path = sys.path[:]
+
+    def tearDown(self):
+        """Restore sys.path"""
+        sys.path = self.sys_path
+    
+    def test_makepath(self):
+        # Test makepath() have an absolute path for its first return value
+        # and a case-normalized version of the absolute path for its
+        # second value.
+        path_parts = ("Beginning", "End")
+        original_dir = os.path.join(*path_parts)
+        abs_dir, norm_dir = site.makepath(*path_parts)
+        self.failUnlessEqual(os.path.abspath(original_dir), abs_dir)
+        if original_dir == os.path.normcase(original_dir):
+            self.failUnlessEqual(abs_dir, norm_dir)
+        else:
+            self.failUnlessEqual(os.path.normcase(abs_dir), norm_dir)
+
+    def test_init_pathinfo(self):
+        dir_set = site._init_pathinfo()
+        for entry in [site.makepath(path)[1] for path in sys.path
+                        if path and os.path.isdir(path)]:
+            self.failUnless(entry in dir_set,
+                            "%s from sys.path not found in set returned "
+                            "by _init_pathinfo(): %s" % (entry, dir_set))
+    
+    def test_addpackage(self):
+        # Make sure addpackage() imports if the line starts with 'import',
+        # otherwise add a directory combined from sitedir and 'name'.
+        # Must also skip comment lines.
+        dir_path, file_name, new_dir  = createpth()
+        try:
+            site.addpackage(dir_path, file_name, set())
+            self.failUnless(site.makepath(os.path.join(dir_path, new_dir))[0] in
+                    sys.path)
+        finally:
+            cleanuppth(dir_path, file_name, new_dir)
+    
+    def test_addsitedir(self):
+        dir_path, file_name, new_dir = createpth()
+        try:
+            site.addsitedir(dir_path, set())
+            self.failUnless(site.makepath(os.path.join(dir_path, new_dir))[0] in
+            sys.path)
+        finally:
+            cleanuppth(dir_path, file_name, new_dir)
+
+def createpth():
+    """Create a temporary .pth file at the returned location and return the
+    directory where it was created, the pth file name, and the directory
+    specified in the pth file.
+
+    Make sure to delete the file when finished.
+
+    """
+    pth_dirname = "__testdir__"
+    file_name = TESTFN + ".pth"
+    full_dirname = os.path.dirname(os.path.abspath(file_name))
+    FILE = file(os.path.join(full_dirname, file_name), 'w')
+    try:
+        print>>FILE, "#import @bad module name"
+        print>>FILE, ''
+        print>>FILE, "import os"
+        print>>FILE, pth_dirname
+    finally:
+        FILE.close()
+    os.mkdir(os.path.join(full_dirname, pth_dirname))
+    return full_dirname, file_name, pth_dirname
+
+def cleanuppth(full_dirname, file_name, pth_dirname):
+    """Clean up what createpth() made"""
+    os.remove(os.path.join(full_dirname, file_name))
+    os.rmdir(os.path.join(full_dirname, pth_dirname))
+
+class ImportSideEffectTests(unittest.TestCase):
+    """Test side-effects from importing 'site'."""
+
+    def setUp(self):
+        """Make a copy of sys.path"""
+        self.sys_path = sys.path[:]
+
+    def tearDown(self):
+        """Restore sys.path"""
+        sys.path = self.sys_path
+
+    def test_abs__file__(self):
+        # Make sure all imported modules have their __file__ attribute
+        # as an absolute path.
+        # Handled by abs__file__()
+        site.abs__file__()
+        for module in sys.modules.values():
+            try:
+                self.failUnless(os.path.isabs(module.__file__))
+            except AttributeError:
+                continue
+
+    def test_no_duplicate_paths(self):
+        # No duplicate paths should exist in sys.path
+        # Handled by removeduppaths()
+        site.removeduppaths()
+        seen_paths = set()
+        for path in sys.path:
+            self.failUnless(path not in seen_paths)
+            seen_paths.add(path)
+
+    def test_add_build_dir(self):
+        # Test that the build directory's Modules directory is used when it
+        # should be.
+        # XXX: implement
+        pass
+
+    def test_sitepackages(self):
+        # There should be a path that ends in site-packages
+        for path in sys.path:
+            if path.endswith("site-packages"):
+                break
+        else:
+            self.fail("'site-packages' directory missing'")
+
+    def test_setting_quit(self):
+        # 'quit' and 'exit' should be injected into __builtin__
+        self.failUnless(hasattr(__builtin__, "quit"))
+        self.failUnless(hasattr(__builtin__, "exit"))
+
+    def test_setting_copyright(self):
+        # 'copyright' and 'credits' should be in __builtin__
+        self.failUnless(hasattr(__builtin__, "copyright"))
+        self.failUnless(hasattr(__builtin__, "credits"))
+
+    def test_setting_help(self):
+        # 'help' should be set in __builtin__
+        self.failUnless(hasattr(__builtin__, "help"))
+
+    def test_aliasing_mbcs(self):
+        if sys.platform == "win32":
+            import locale
+            if locale.getdefaultlocale()[1].startswith('cp'):
+                for value in encodings.aliases.aliases.itervalues():
+                    if value == "mbcs":
+                        break
+                else:
+                    self.fail("did not alias mbcs")
+
+    def test_setdefaultencoding_removed(self):
+        # Make sure sys.setdefaultencoding is gone
+        self.failUnless(not hasattr(sys, "setdefaultencoding"))
+
+    def test_sitecustomize_executed(self):
+        # If sitecustomize is available, it should have been imported.
+        if not sys.modules.has_key("sitecustomize"):
+            try:
+                import sitecustomize
+            except ImportError:
+                pass
+            else:
+                self.fail("sitecustomize not imported automatically")
+
+
+
+
+def test_main():
+    run_unittest(HelperFunctionsTests, ImportSideEffectTests)
+
+
+
+if __name__ == "__main__":
+    test_main()