blob: 6affa834669664e6c385a37ca3ea84c2d7d8286b [file] [log] [blame]
Josh Gaob85a9f32015-09-23 20:40:47 -07001"""distutils.util
2
3Miscellaneous utility functions -- anything that doesn't fit into
4one of the other *util.py modules.
5"""
6
7__revision__ = "$Id$"
8
9import sys, os, string, re
10from distutils.errors import DistutilsPlatformError
11from distutils.dep_util import newer
12from distutils.spawn import spawn
13from distutils import log
14from distutils.errors import DistutilsByteCompileError
15
16def get_platform ():
17 """Return a string that identifies the current platform. This is used
18 mainly to distinguish platform-specific build directories and
19 platform-specific built distributions. Typically includes the OS name
20 and version and the architecture (as supplied by 'os.uname()'),
21 although the exact information included depends on the OS; eg. for IRIX
22 the architecture isn't particularly important (IRIX only runs on SGI
23 hardware), but for Linux the kernel version isn't particularly
24 important.
25
26 Examples of returned values:
27 linux-i586
28 linux-alpha (?)
29 solaris-2.6-sun4u
30 irix-5.3
31 irix64-6.2
32
33 Windows will return one of:
34 win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
35 win-ia64 (64bit Windows on Itanium)
36 win32 (all others - specifically, sys.platform is returned)
37
38 For other non-POSIX platforms, currently just returns 'sys.platform'.
39 """
40 if os.name == 'nt':
41 # sniff sys.version for architecture.
42 prefix = " bit ("
43 i = string.find(sys.version, prefix)
44 if i == -1:
45 return sys.platform
46 j = string.find(sys.version, ")", i)
47 look = sys.version[i+len(prefix):j].lower()
48 if look=='amd64':
49 return 'win-amd64'
50 if look=='itanium':
51 return 'win-ia64'
52 return sys.platform
53
54 # Set for cross builds explicitly
55 if "_PYTHON_HOST_PLATFORM" in os.environ:
56 return os.environ["_PYTHON_HOST_PLATFORM"]
57
58 if os.name != "posix" or not hasattr(os, 'uname'):
59 # XXX what about the architecture? NT is Intel or Alpha,
60 # Mac OS is M68k or PPC, etc.
61 return sys.platform
62
63 # Try to distinguish various flavours of Unix
64
65 (osname, host, release, version, machine) = os.uname()
66
67 # Convert the OS name to lowercase, remove '/' characters
68 # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh")
69 osname = string.lower(osname)
70 osname = string.replace(osname, '/', '')
71 machine = string.replace(machine, ' ', '_')
72 machine = string.replace(machine, '/', '-')
73
74 if osname[:5] == "linux":
75 # At least on Linux/Intel, 'machine' is the processor --
76 # i386, etc.
77 # XXX what about Alpha, SPARC, etc?
78 return "%s-%s" % (osname, machine)
79 elif osname[:5] == "sunos":
80 if release[0] >= "5": # SunOS 5 == Solaris 2
81 osname = "solaris"
82 release = "%d.%s" % (int(release[0]) - 3, release[2:])
83 # We can't use "platform.architecture()[0]" because a
84 # bootstrap problem. We use a dict to get an error
85 # if some suspicious happens.
86 bitness = {2147483647:"32bit", 9223372036854775807:"64bit"}
87 machine += ".%s" % bitness[sys.maxint]
88 # fall through to standard osname-release-machine representation
89 elif osname[:4] == "irix": # could be "irix64"!
90 return "%s-%s" % (osname, release)
91 elif osname[:3] == "aix":
92 return "%s-%s.%s" % (osname, version, release)
93 elif osname[:6] == "cygwin":
94 osname = "cygwin"
95 rel_re = re.compile (r'[\d.]+')
96 m = rel_re.match(release)
97 if m:
98 release = m.group()
99 elif osname[:6] == "darwin":
100 import _osx_support, distutils.sysconfig
101 osname, release, machine = _osx_support.get_platform_osx(
102 distutils.sysconfig.get_config_vars(),
103 osname, release, machine)
104
105 return "%s-%s-%s" % (osname, release, machine)
106
107# get_platform ()
108
109
110def convert_path (pathname):
111 """Return 'pathname' as a name that will work on the native filesystem,
112 i.e. split it on '/' and put it back together again using the current
113 directory separator. Needed because filenames in the setup script are
114 always supplied in Unix style, and have to be converted to the local
115 convention before we can actually use them in the filesystem. Raises
116 ValueError on non-Unix-ish systems if 'pathname' either starts or
117 ends with a slash.
118 """
119 if os.sep == '/':
120 return pathname
121 if not pathname:
122 return pathname
123 if pathname[0] == '/':
124 raise ValueError, "path '%s' cannot be absolute" % pathname
125 if pathname[-1] == '/':
126 raise ValueError, "path '%s' cannot end with '/'" % pathname
127
128 paths = string.split(pathname, '/')
129 while '.' in paths:
130 paths.remove('.')
131 if not paths:
132 return os.curdir
133 # On Windows, if paths is ['C:','folder','subfolder'] then
134 # os.path.join(*paths) will return 'C:folder\subfolder' which
135 # is thus relative to the CWD on that drive. So we work around
136 # this by adding a \ to path[0]
137 if (len(paths) > 0 and paths[0].endswith(':') and
138 sys.platform == "win32" and sys.version.find("GCC") >= 0):
139 paths[0] += '\\'
140 return os.path.join(*paths)
141
142# convert_path ()
143
144
145def change_root (new_root, pathname):
146 """Return 'pathname' with 'new_root' prepended. If 'pathname' is
147 relative, this is equivalent to "os.path.join(new_root,pathname)".
148 Otherwise, it requires making 'pathname' relative and then joining the
149 two, which is tricky on DOS/Windows and Mac OS.
150 """
151 if os.name == 'posix':
152 if not os.path.isabs(pathname):
153 return os.path.join(new_root, pathname)
154 else:
155 return os.path.join(new_root, pathname[1:])
156
157 elif os.name == 'nt':
158 (drive, path) = os.path.splitdrive(pathname)
159 if path[0] == '\\':
160 path = path[1:]
161 return os.path.join(new_root, path)
162
163 elif os.name == 'os2':
164 (drive, path) = os.path.splitdrive(pathname)
165 if path[0] == os.sep:
166 path = path[1:]
167 return os.path.join(new_root, path)
168
169 else:
170 raise DistutilsPlatformError, \
171 "nothing known about platform '%s'" % os.name
172
173
174_environ_checked = 0
175def check_environ ():
176 """Ensure that 'os.environ' has all the environment variables we
177 guarantee that users can use in config files, command-line options,
178 etc. Currently this includes:
179 HOME - user's home directory (Unix only)
180 PLAT - description of the current platform, including hardware
181 and OS (see 'get_platform()')
182 """
183 global _environ_checked
184 if _environ_checked:
185 return
186
187 if os.name == 'posix' and 'HOME' not in os.environ:
188 import pwd
189 os.environ['HOME'] = pwd.getpwuid(os.getuid())[5]
190
191 if 'PLAT' not in os.environ:
192 os.environ['PLAT'] = get_platform()
193
194 _environ_checked = 1
195
196
197def subst_vars (s, local_vars):
198 """Perform shell/Perl-style variable substitution on 'string'. Every
199 occurrence of '$' followed by a name is considered a variable, and
200 variable is substituted by the value found in the 'local_vars'
201 dictionary, or in 'os.environ' if it's not in 'local_vars'.
202 'os.environ' is first checked/augmented to guarantee that it contains
203 certain values: see 'check_environ()'. Raise ValueError for any
204 variables not found in either 'local_vars' or 'os.environ'.
205 """
206 check_environ()
207 def _subst (match, local_vars=local_vars):
208 var_name = match.group(1)
209 if var_name in local_vars:
210 return str(local_vars[var_name])
211 else:
212 return os.environ[var_name]
213
214 try:
215 return re.sub(r'\$([a-zA-Z_][a-zA-Z_0-9]*)', _subst, s)
216 except KeyError, var:
217 raise ValueError, "invalid variable '$%s'" % var
218
219# subst_vars ()
220
221
222def grok_environment_error (exc, prefix="error: "):
223 """Generate a useful error message from an EnvironmentError (IOError or
224 OSError) exception object. Handles Python 1.5.1 and 1.5.2 styles, and
225 does what it can to deal with exception objects that don't have a
226 filename (which happens when the error is due to a two-file operation,
227 such as 'rename()' or 'link()'. Returns the error message as a string
228 prefixed with 'prefix'.
229 """
230 # check for Python 1.5.2-style {IO,OS}Error exception objects
231 if hasattr(exc, 'filename') and hasattr(exc, 'strerror'):
232 if exc.filename:
233 error = prefix + "%s: %s" % (exc.filename, exc.strerror)
234 else:
235 # two-argument functions in posix module don't
236 # include the filename in the exception object!
237 error = prefix + "%s" % exc.strerror
238 else:
239 error = prefix + str(exc[-1])
240
241 return error
242
243
244# Needed by 'split_quoted()'
245_wordchars_re = _squote_re = _dquote_re = None
246def _init_regex():
247 global _wordchars_re, _squote_re, _dquote_re
248 _wordchars_re = re.compile(r'[^\\\'\"%s ]*' % string.whitespace)
249 _squote_re = re.compile(r"'(?:[^'\\]|\\.)*'")
250 _dquote_re = re.compile(r'"(?:[^"\\]|\\.)*"')
251
252def split_quoted (s):
253 """Split a string up according to Unix shell-like rules for quotes and
254 backslashes. In short: words are delimited by spaces, as long as those
255 spaces are not escaped by a backslash, or inside a quoted string.
256 Single and double quotes are equivalent, and the quote characters can
257 be backslash-escaped. The backslash is stripped from any two-character
258 escape sequence, leaving only the escaped character. The quote
259 characters are stripped from any quoted string. Returns a list of
260 words.
261 """
262
263 # This is a nice algorithm for splitting up a single string, since it
264 # doesn't require character-by-character examination. It was a little
265 # bit of a brain-bender to get it working right, though...
266 if _wordchars_re is None: _init_regex()
267
268 s = string.strip(s)
269 words = []
270 pos = 0
271
272 while s:
273 m = _wordchars_re.match(s, pos)
274 end = m.end()
275 if end == len(s):
276 words.append(s[:end])
277 break
278
279 if s[end] in string.whitespace: # unescaped, unquoted whitespace: now
280 words.append(s[:end]) # we definitely have a word delimiter
281 s = string.lstrip(s[end:])
282 pos = 0
283
284 elif s[end] == '\\': # preserve whatever is being escaped;
285 # will become part of the current word
286 s = s[:end] + s[end+1:]
287 pos = end+1
288
289 else:
290 if s[end] == "'": # slurp singly-quoted string
291 m = _squote_re.match(s, end)
292 elif s[end] == '"': # slurp doubly-quoted string
293 m = _dquote_re.match(s, end)
294 else:
295 raise RuntimeError, \
296 "this can't happen (bad char '%c')" % s[end]
297
298 if m is None:
299 raise ValueError, \
300 "bad string (mismatched %s quotes?)" % s[end]
301
302 (beg, end) = m.span()
303 s = s[:beg] + s[beg+1:end-1] + s[end:]
304 pos = m.end() - 2
305
306 if pos >= len(s):
307 words.append(s)
308 break
309
310 return words
311
312# split_quoted ()
313
314
315def execute (func, args, msg=None, verbose=0, dry_run=0):
316 """Perform some action that affects the outside world (eg. by
317 writing to the filesystem). Such actions are special because they
318 are disabled by the 'dry_run' flag. This method takes care of all
319 that bureaucracy for you; all you have to do is supply the
320 function to call and an argument tuple for it (to embody the
321 "external action" being performed), and an optional message to
322 print.
323 """
324 if msg is None:
325 msg = "%s%r" % (func.__name__, args)
326 if msg[-2:] == ',)': # correct for singleton tuple
327 msg = msg[0:-2] + ')'
328
329 log.info(msg)
330 if not dry_run:
331 func(*args)
332
333
334def strtobool (val):
335 """Convert a string representation of truth to true (1) or false (0).
336
337 True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
338 are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if
339 'val' is anything else.
340 """
341 val = string.lower(val)
342 if val in ('y', 'yes', 't', 'true', 'on', '1'):
343 return 1
344 elif val in ('n', 'no', 'f', 'false', 'off', '0'):
345 return 0
346 else:
347 raise ValueError, "invalid truth value %r" % (val,)
348
349
350def byte_compile (py_files,
351 optimize=0, force=0,
352 prefix=None, base_dir=None,
353 verbose=1, dry_run=0,
354 direct=None):
355 """Byte-compile a collection of Python source files to either .pyc
356 or .pyo files in the same directory. 'py_files' is a list of files
357 to compile; any files that don't end in ".py" are silently skipped.
358 'optimize' must be one of the following:
359 0 - don't optimize (generate .pyc)
360 1 - normal optimization (like "python -O")
361 2 - extra optimization (like "python -OO")
362 If 'force' is true, all files are recompiled regardless of
363 timestamps.
364
365 The source filename encoded in each bytecode file defaults to the
366 filenames listed in 'py_files'; you can modify these with 'prefix' and
367 'basedir'. 'prefix' is a string that will be stripped off of each
368 source filename, and 'base_dir' is a directory name that will be
369 prepended (after 'prefix' is stripped). You can supply either or both
370 (or neither) of 'prefix' and 'base_dir', as you wish.
371
372 If 'dry_run' is true, doesn't actually do anything that would
373 affect the filesystem.
374
375 Byte-compilation is either done directly in this interpreter process
376 with the standard py_compile module, or indirectly by writing a
377 temporary script and executing it. Normally, you should let
378 'byte_compile()' figure out to use direct compilation or not (see
379 the source for details). The 'direct' flag is used by the script
380 generated in indirect mode; unless you know what you're doing, leave
381 it set to None.
382 """
383 # nothing is done if sys.dont_write_bytecode is True
384 if sys.dont_write_bytecode:
385 raise DistutilsByteCompileError('byte-compiling is disabled.')
386
387 # First, if the caller didn't force us into direct or indirect mode,
388 # figure out which mode we should be in. We take a conservative
389 # approach: choose direct mode *only* if the current interpreter is
390 # in debug mode and optimize is 0. If we're not in debug mode (-O
391 # or -OO), we don't know which level of optimization this
392 # interpreter is running with, so we can't do direct
393 # byte-compilation and be certain that it's the right thing. Thus,
394 # always compile indirectly if the current interpreter is in either
395 # optimize mode, or if either optimization level was requested by
396 # the caller.
397 if direct is None:
398 direct = (__debug__ and optimize == 0)
399
400 # "Indirect" byte-compilation: write a temporary script and then
401 # run it with the appropriate flags.
402 if not direct:
403 try:
404 from tempfile import mkstemp
405 (script_fd, script_name) = mkstemp(".py")
406 except ImportError:
407 from tempfile import mktemp
408 (script_fd, script_name) = None, mktemp(".py")
409 log.info("writing byte-compilation script '%s'", script_name)
410 if not dry_run:
411 if script_fd is not None:
412 script = os.fdopen(script_fd, "w")
413 else:
414 script = open(script_name, "w")
415
416 script.write("""\
417from distutils.util import byte_compile
418files = [
419""")
420
421 # XXX would be nice to write absolute filenames, just for
422 # safety's sake (script should be more robust in the face of
423 # chdir'ing before running it). But this requires abspath'ing
424 # 'prefix' as well, and that breaks the hack in build_lib's
425 # 'byte_compile()' method that carefully tacks on a trailing
426 # slash (os.sep really) to make sure the prefix here is "just
427 # right". This whole prefix business is rather delicate -- the
428 # problem is that it's really a directory, but I'm treating it
429 # as a dumb string, so trailing slashes and so forth matter.
430
431 #py_files = map(os.path.abspath, py_files)
432 #if prefix:
433 # prefix = os.path.abspath(prefix)
434
435 script.write(string.join(map(repr, py_files), ",\n") + "]\n")
436 script.write("""
437byte_compile(files, optimize=%r, force=%r,
438 prefix=%r, base_dir=%r,
439 verbose=%r, dry_run=0,
440 direct=1)
441""" % (optimize, force, prefix, base_dir, verbose))
442
443 script.close()
444
445 cmd = [sys.executable, script_name]
446 if optimize == 1:
447 cmd.insert(1, "-O")
448 elif optimize == 2:
449 cmd.insert(1, "-OO")
450 spawn(cmd, dry_run=dry_run)
451 execute(os.remove, (script_name,), "removing %s" % script_name,
452 dry_run=dry_run)
453
454 # "Direct" byte-compilation: use the py_compile module to compile
455 # right here, right now. Note that the script generated in indirect
456 # mode simply calls 'byte_compile()' in direct mode, a weird sort of
457 # cross-process recursion. Hey, it works!
458 else:
459 from py_compile import compile
460
461 for file in py_files:
462 if file[-3:] != ".py":
463 # This lets us be lazy and not filter filenames in
464 # the "install_lib" command.
465 continue
466
467 # Terminology from the py_compile module:
468 # cfile - byte-compiled file
469 # dfile - purported source filename (same as 'file' by default)
470 cfile = file + (__debug__ and "c" or "o")
471 dfile = file
472 if prefix:
473 if file[:len(prefix)] != prefix:
474 raise ValueError, \
475 ("invalid prefix: filename %r doesn't start with %r"
476 % (file, prefix))
477 dfile = dfile[len(prefix):]
478 if base_dir:
479 dfile = os.path.join(base_dir, dfile)
480
481 cfile_base = os.path.basename(cfile)
482 if direct:
483 if force or newer(file, cfile):
484 log.info("byte-compiling %s to %s", file, cfile_base)
485 if not dry_run:
486 compile(file, cfile, dfile)
487 else:
488 log.debug("skipping byte-compilation of %s to %s",
489 file, cfile_base)
490
491# byte_compile ()
492
493def rfc822_escape (header):
494 """Return a version of the string escaped for inclusion in an
495 RFC-822 header, by ensuring there are 8 spaces space after each newline.
496 """
497 lines = string.split(header, '\n')
498 header = string.join(lines, '\n' + 8*' ')
499 return header