Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 1 | # Test some Unicode file name semantics |
| 2 | # We dont test many operations on files other than |
| 3 | # that their names can be used with Unicode characters. |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 4 | import os, glob, time, shutil |
Nicholas Bastin | 6680341 | 2004-03-21 20:55:47 +0000 | [diff] [blame] | 5 | import unicodedata |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 6 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 7 | import unittest |
| 8 | from test.test_support import run_suite, TestSkipped, TESTFN_UNICODE |
| 9 | from test.test_support import TESTFN_ENCODING, TESTFN_UNICODE_UNENCODEABLE |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 10 | try: |
Tim Peters | 0eadaac | 2003-04-24 16:02:54 +0000 | [diff] [blame] | 11 | TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING) |
Martin v. Löwis | c2ca32d | 2003-03-17 18:30:15 +0000 | [diff] [blame] | 12 | except (UnicodeError, TypeError): |
Martin v. Löwis | c49435c | 2003-03-08 10:25:31 +0000 | [diff] [blame] | 13 | # Either the file system encoding is None, or the file name |
| 14 | # cannot be encoded in the file system encoding. |
| 15 | raise TestSkipped("No Unicode filesystem semantics on this platform.") |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 16 | |
Martin v. Löwis | cd24699 | 2004-11-07 19:57:35 +0000 | [diff] [blame] | 17 | if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE: |
| 18 | # The file system encoding does not support Latin-1 |
| 19 | # (which test_support assumes), so try the file system |
| 20 | # encoding instead. |
| 21 | import sys |
| 22 | try: |
| 23 | TESTFN_UNICODE = unicode("@test-\xe0\xf2", sys.getfilesystemencoding()) |
| 24 | TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING) |
| 25 | if '?' in TESTFN_ENCODED: |
| 26 | # MBCS will not report the error properly |
| 27 | raise UnicodeError, "mbcs encoding problem" |
| 28 | except (UnicodeError, TypeError): |
| 29 | raise TestSkipped("Cannot find a suiteable filename.") |
| 30 | |
| 31 | if TESTFN_ENCODED.decode(TESTFN_ENCODING) != TESTFN_UNICODE: |
| 32 | raise TestSkipped("Cannot find a suitable filename.") |
| 33 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 34 | def remove_if_exists(filename): |
| 35 | if os.path.exists(filename): |
| 36 | os.unlink(filename) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 37 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 38 | class TestUnicodeFiles(unittest.TestCase): |
| 39 | # The 'do_' functions are the actual tests. They generally assume the |
| 40 | # file already exists etc. |
Tim Peters | 58eb11c | 2004-01-18 20:29:55 +0000 | [diff] [blame] | 41 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 42 | # Do all the tests we can given only a single filename. The file should |
| 43 | # exist. |
| 44 | def _do_single(self, filename): |
| 45 | self.failUnless(os.path.exists(filename)) |
| 46 | self.failUnless(os.path.isfile(filename)) |
Martin v. Löwis | b60ae99 | 2005-03-08 09:10:29 +0000 | [diff] [blame] | 47 | self.failUnless(os.access(filename, os.R_OK)) |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 48 | self.failUnless(os.path.exists(os.path.abspath(filename))) |
| 49 | self.failUnless(os.path.isfile(os.path.abspath(filename))) |
Martin v. Löwis | b60ae99 | 2005-03-08 09:10:29 +0000 | [diff] [blame] | 50 | self.failUnless(os.access(os.path.abspath(filename), os.R_OK)) |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 51 | os.chmod(filename, 0777) |
| 52 | os.utime(filename, None) |
| 53 | os.utime(filename, (time.time(), time.time())) |
| 54 | # Copy/rename etc tests using the same filename |
| 55 | self._do_copyish(filename, filename) |
| 56 | # Filename should appear in glob output |
| 57 | self.failUnless( |
| 58 | os.path.abspath(filename)==os.path.abspath(glob.glob(filename)[0])) |
| 59 | # basename should appear in listdir. |
| 60 | path, base = os.path.split(os.path.abspath(filename)) |
Raymond Hettinger | 3b04ce8 | 2004-06-28 06:57:19 +0000 | [diff] [blame] | 61 | if isinstance(base, str): |
| 62 | base = base.decode(TESTFN_ENCODING) |
| 63 | file_list = os.listdir(path) |
| 64 | # listdir() with a unicode arg may or may not return Unicode |
| 65 | # objects, depending on the platform. |
| 66 | if file_list and isinstance(file_list[0], str): |
| 67 | file_list = [f.decode(TESTFN_ENCODING) for f in file_list] |
Nicholas Bastin | 6680341 | 2004-03-21 20:55:47 +0000 | [diff] [blame] | 68 | |
Raymond Hettinger | 3b04ce8 | 2004-06-28 06:57:19 +0000 | [diff] [blame] | 69 | # Normalize the unicode strings, as round-tripping the name via the OS |
| 70 | # may return a different (but equivalent) value. |
| 71 | base = unicodedata.normalize("NFD", base) |
Nicholas Bastin | 6680341 | 2004-03-21 20:55:47 +0000 | [diff] [blame] | 72 | file_list = [unicodedata.normalize("NFD", f) for f in file_list] |
| 73 | |
Raymond Hettinger | 3b04ce8 | 2004-06-28 06:57:19 +0000 | [diff] [blame] | 74 | self.failUnless(base in file_list) |
Tim Peters | 58eb11c | 2004-01-18 20:29:55 +0000 | [diff] [blame] | 75 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 76 | # Do as many "equivalancy' tests as we can - ie, check that although we |
| 77 | # have different types for the filename, they refer to the same file. |
| 78 | def _do_equivilent(self, filename1, filename2): |
| 79 | # Note we only check "filename1 against filename2" - we don't bother |
| 80 | # checking "filename2 against 1", as we assume we are called again with |
| 81 | # the args reversed. |
| 82 | self.failUnless(type(filename1)!=type(filename2), |
| 83 | "No point checking equivalent filenames of the same type") |
| 84 | # stat and lstat should return the same results. |
| 85 | self.failUnlessEqual(os.stat(filename1), |
| 86 | os.stat(filename2)) |
| 87 | self.failUnlessEqual(os.lstat(filename1), |
| 88 | os.lstat(filename2)) |
| 89 | # Copy/rename etc tests using equivalent filename |
| 90 | self._do_copyish(filename1, filename2) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 91 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 92 | # Tests that copy, move, etc one file to another. |
| 93 | def _do_copyish(self, filename1, filename2): |
| 94 | # Should be able to rename the file using either name. |
| 95 | self.failUnless(os.path.isfile(filename1)) # must exist. |
| 96 | os.rename(filename1, filename2 + ".new") |
| 97 | self.failUnless(os.path.isfile(filename1+".new")) |
| 98 | os.rename(filename1 + ".new", filename2) |
| 99 | self.failUnless(os.path.isfile(filename2)) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 100 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 101 | # Try using shutil on the filenames. |
| 102 | try: |
| 103 | filename1==filename2 |
| 104 | except UnicodeDecodeError: |
| 105 | # these filenames can't be compared - shutil.copy tries to do |
| 106 | # just that. This is really a bug in 'shutil' - if one of shutil's |
| 107 | # 2 params are Unicode and the other isn't, it should coerce the |
| 108 | # string to Unicode with the filesystem encoding before comparison. |
| 109 | pass |
| 110 | else: |
| 111 | # filenames can be compared. |
| 112 | shutil.copy(filename1, filename2 + ".new") |
| 113 | os.unlink(filename1 + ".new") # remove using equiv name. |
| 114 | # And a couple of moves, one using each name. |
| 115 | shutil.move(filename1, filename2 + ".new") |
| 116 | self.failUnless(not os.path.exists(filename2)) |
| 117 | shutil.move(filename1 + ".new", filename2) |
| 118 | self.failUnless(os.path.exists(filename1)) |
| 119 | # Note - due to the implementation of shutil.move, |
| 120 | # it tries a rename first. This only fails on Windows when on |
| 121 | # different file systems - and this test can't ensure that. |
| 122 | # So we test the shutil.copy2 function, which is the thing most |
| 123 | # likely to fail. |
| 124 | shutil.copy2(filename1, filename2 + ".new") |
| 125 | os.unlink(filename1 + ".new") |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 126 | |
Nicholas Bastin | 6680341 | 2004-03-21 20:55:47 +0000 | [diff] [blame] | 127 | def _do_directory(self, make_name, chdir_name, encoded): |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 128 | cwd = os.getcwd() |
| 129 | if os.path.isdir(make_name): |
| 130 | os.rmdir(make_name) |
| 131 | os.mkdir(make_name) |
| 132 | try: |
| 133 | os.chdir(chdir_name) |
| 134 | try: |
Nicholas Bastin | 6680341 | 2004-03-21 20:55:47 +0000 | [diff] [blame] | 135 | if not encoded: |
| 136 | cwd_result = os.getcwdu() |
| 137 | name_result = make_name |
| 138 | else: |
| 139 | cwd_result = os.getcwd().decode(TESTFN_ENCODING) |
| 140 | name_result = make_name.decode(TESTFN_ENCODING) |
| 141 | |
| 142 | cwd_result = unicodedata.normalize("NFD", cwd_result) |
| 143 | name_result = unicodedata.normalize("NFD", name_result) |
| 144 | |
| 145 | self.failUnlessEqual(os.path.basename(cwd_result),name_result) |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 146 | finally: |
| 147 | os.chdir(cwd) |
| 148 | finally: |
| 149 | os.rmdir(make_name) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 150 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 151 | # The '_test' functions 'entry points with params' - ie, what the |
| 152 | # top-level 'test' functions would be if they could take params |
| 153 | def _test_single(self, filename): |
| 154 | remove_if_exists(filename) |
Alex Martelli | 01c77c6 | 2006-08-24 02:58:11 +0000 | [diff] [blame^] | 155 | f = open(filename, "w") |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 156 | f.close() |
| 157 | try: |
| 158 | self._do_single(filename) |
| 159 | finally: |
| 160 | os.unlink(filename) |
| 161 | self.failUnless(not os.path.exists(filename)) |
| 162 | # and again with os.open. |
| 163 | f = os.open(filename, os.O_CREAT) |
| 164 | os.close(f) |
| 165 | try: |
| 166 | self._do_single(filename) |
| 167 | finally: |
| 168 | os.unlink(filename) |
Tim Peters | 58eb11c | 2004-01-18 20:29:55 +0000 | [diff] [blame] | 169 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 170 | def _test_equivalent(self, filename1, filename2): |
| 171 | remove_if_exists(filename1) |
| 172 | self.failUnless(not os.path.exists(filename2)) |
Alex Martelli | 01c77c6 | 2006-08-24 02:58:11 +0000 | [diff] [blame^] | 173 | f = open(filename1, "w") |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 174 | f.close() |
| 175 | try: |
| 176 | self._do_equivilent(filename1, filename2) |
| 177 | finally: |
| 178 | os.unlink(filename1) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 179 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 180 | # The 'test' functions are unittest entry points, and simply call our |
| 181 | # _test functions with each of the filename combinations we wish to test |
| 182 | def test_single_files(self): |
| 183 | self._test_single(TESTFN_ENCODED) |
| 184 | self._test_single(TESTFN_UNICODE) |
Mark Hammond | 2e8624c | 2003-12-03 22:16:47 +0000 | [diff] [blame] | 185 | if TESTFN_UNICODE_UNENCODEABLE is not None: |
| 186 | self._test_single(TESTFN_UNICODE_UNENCODEABLE) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 187 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 188 | def test_equivalent_files(self): |
| 189 | self._test_equivalent(TESTFN_ENCODED, TESTFN_UNICODE) |
| 190 | self._test_equivalent(TESTFN_UNICODE, TESTFN_ENCODED) |
Mark Hammond | e843e48 | 2002-01-07 02:11:43 +0000 | [diff] [blame] | 191 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 192 | def test_directories(self): |
| 193 | # For all 'equivilent' combinations: |
| 194 | # Make dir with encoded, chdir with unicode, checkdir with encoded |
| 195 | # (or unicode/encoded/unicode, etc |
| 196 | ext = ".dir" |
Nicholas Bastin | 6680341 | 2004-03-21 20:55:47 +0000 | [diff] [blame] | 197 | self._do_directory(TESTFN_ENCODED+ext, TESTFN_ENCODED+ext, True) |
| 198 | self._do_directory(TESTFN_ENCODED+ext, TESTFN_UNICODE+ext, True) |
| 199 | self._do_directory(TESTFN_UNICODE+ext, TESTFN_ENCODED+ext, False) |
| 200 | self._do_directory(TESTFN_UNICODE+ext, TESTFN_UNICODE+ext, False) |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 201 | # Our directory name that can't use a non-unicode name. |
Mark Hammond | 2e8624c | 2003-12-03 22:16:47 +0000 | [diff] [blame] | 202 | if TESTFN_UNICODE_UNENCODEABLE is not None: |
| 203 | self._do_directory(TESTFN_UNICODE_UNENCODEABLE+ext, |
| 204 | TESTFN_UNICODE_UNENCODEABLE+ext, |
Nicholas Bastin | 6680341 | 2004-03-21 20:55:47 +0000 | [diff] [blame] | 205 | False) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 206 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 207 | def test_main(): |
| 208 | suite = unittest.TestSuite() |
| 209 | suite.addTest(unittest.makeSuite(TestUnicodeFiles)) |
| 210 | run_suite(suite) |
Mark Hammond | ef8b654 | 2001-05-13 08:04:26 +0000 | [diff] [blame] | 211 | |
Mark Hammond | 6d45972 | 2003-12-03 01:29:56 +0000 | [diff] [blame] | 212 | if __name__ == "__main__": |
| 213 | test_main() |