| Tor Norbye | 3a2425a | 2013-11-04 10:16:08 -0800 | [diff] [blame^] | 1 | # epydoc -- Utility functions |
| 2 | # |
| 3 | # Copyright (C) 2005 Edward Loper |
| 4 | # Author: Edward Loper <edloper@loper.org> |
| 5 | # URL: <http://epydoc.sf.net> |
| 6 | # |
| 7 | # $Id: util.py 1671 2008-01-29 02:55:49Z edloper $ |
| 8 | |
| 9 | """ |
| 10 | Miscellaneous utility functions that are used by multiple modules. |
| 11 | |
| 12 | @group Python source types: is_module_file, is_package_dir, is_pyname, |
| 13 | py_src_filename |
| 14 | @group Text processing: wordwrap, decode_with_backslashreplace, |
| 15 | plaintext_to_html |
| 16 | """ |
| 17 | __docformat__ = 'epytext en' |
| 18 | |
| 19 | import os, os.path, re |
| 20 | |
| 21 | ###################################################################### |
| 22 | ## Python Source Types |
| 23 | ###################################################################### |
| 24 | |
| 25 | PY_SRC_EXTENSIONS = ['.py', '.pyw'] |
| 26 | PY_BIN_EXTENSIONS = ['.pyc', '.so', '.pyd'] |
| 27 | |
| 28 | def is_module_file(path): |
| 29 | # Make sure it's a file name. |
| 30 | if not isinstance(path, basestring): |
| 31 | return False |
| 32 | (dir, filename) = os.path.split(path) |
| 33 | (basename, extension) = os.path.splitext(filename) |
| 34 | return (os.path.isfile(path) and |
| 35 | re.match('[a-zA-Z_]\w*$', basename) and |
| 36 | extension in PY_SRC_EXTENSIONS+PY_BIN_EXTENSIONS) |
| 37 | |
| 38 | def is_src_filename(filename): |
| 39 | if not isinstance(filename, basestring): return False |
| 40 | if not os.path.exists(filename): return False |
| 41 | return os.path.splitext(filename)[1] in PY_SRC_EXTENSIONS |
| 42 | |
| 43 | def is_package_dir(dirname): |
| 44 | """ |
| 45 | Return true if the given directory is a valid package directory |
| 46 | (i.e., it names a directory that contains a valid __init__ file, |
| 47 | and its name is a valid identifier). |
| 48 | """ |
| 49 | # Make sure it's a directory name. |
| 50 | if not isinstance(dirname, basestring): |
| 51 | return False |
| 52 | if not os.path.isdir(dirname): |
| 53 | return False |
| 54 | dirname = os.path.abspath(dirname) |
| 55 | # Make sure it's a valid identifier. (Special case for |
| 56 | # "foo/", where os.path.split -> ("foo", "").) |
| 57 | (parent, dir) = os.path.split(dirname) |
| 58 | if dir == '': (parent, dir) = os.path.split(parent) |
| 59 | |
| 60 | # The following constraint was removed because of sourceforge |
| 61 | # bug #1787028 -- in some cases (eg eggs), it's too strict. |
| 62 | #if not re.match('\w+$', dir): |
| 63 | # return False |
| 64 | |
| 65 | for name in os.listdir(dirname): |
| 66 | filename = os.path.join(dirname, name) |
| 67 | if name.startswith('__init__.') and is_module_file(filename): |
| 68 | return True |
| 69 | else: |
| 70 | return False |
| 71 | |
| 72 | def is_pyname(name): |
| 73 | return re.match(r"\w+(\.\w+)*$", name) |
| 74 | |
| 75 | def py_src_filename(filename): |
| 76 | basefile, extension = os.path.splitext(filename) |
| 77 | if extension in PY_SRC_EXTENSIONS: |
| 78 | return filename |
| 79 | else: |
| 80 | for ext in PY_SRC_EXTENSIONS: |
| 81 | if os.path.isfile('%s%s' % (basefile, ext)): |
| 82 | return '%s%s' % (basefile, ext) |
| 83 | else: |
| 84 | raise ValueError('Could not find a corresponding ' |
| 85 | 'Python source file for %r.' % filename) |
| 86 | |
| 87 | def munge_script_name(filename): |
| 88 | name = os.path.split(filename)[1] |
| 89 | name = re.sub(r'\W', '_', name) |
| 90 | return 'script-'+name |
| 91 | |
| 92 | ###################################################################### |
| 93 | ## Text Processing |
| 94 | ###################################################################### |
| 95 | |
| 96 | def decode_with_backslashreplace(s): |
| 97 | r""" |
| 98 | Convert the given 8-bit string into unicode, treating any |
| 99 | character c such that ord(c)<128 as an ascii character, and |
| 100 | converting any c such that ord(c)>128 into a backslashed escape |
| 101 | sequence. |
| 102 | |
| 103 | >>> decode_with_backslashreplace('abc\xff\xe8') |
| 104 | u'abc\\xff\\xe8' |
| 105 | """ |
| 106 | # s.encode('string-escape') is not appropriate here, since it |
| 107 | # also adds backslashes to some ascii chars (eg \ and '). |
| 108 | assert isinstance(s, str) |
| 109 | return (s |
| 110 | .decode('latin1') |
| 111 | .encode('ascii', 'backslashreplace') |
| 112 | .decode('ascii')) |
| 113 | |
| 114 | def wordwrap(str, indent=0, right=75, startindex=0, splitchars=''): |
| 115 | """ |
| 116 | Word-wrap the given string. I.e., add newlines to the string such |
| 117 | that any lines that are longer than C{right} are broken into |
| 118 | shorter lines (at the first whitespace sequence that occurs before |
| 119 | index C{right}). If the given string contains newlines, they will |
| 120 | I{not} be removed. Any lines that begin with whitespace will not |
| 121 | be wordwrapped. |
| 122 | |
| 123 | @param indent: If specified, then indent each line by this number |
| 124 | of spaces. |
| 125 | @type indent: C{int} |
| 126 | @param right: The right margin for word wrapping. Lines that are |
| 127 | longer than C{right} will be broken at the first whitespace |
| 128 | sequence before the right margin. |
| 129 | @type right: C{int} |
| 130 | @param startindex: If specified, then assume that the first line |
| 131 | is already preceeded by C{startindex} characters. |
| 132 | @type startindex: C{int} |
| 133 | @param splitchars: A list of non-whitespace characters which can |
| 134 | be used to split a line. (E.g., use '/\\' to allow path names |
| 135 | to be split over multiple lines.) |
| 136 | @rtype: C{str} |
| 137 | """ |
| 138 | if splitchars: |
| 139 | chunks = re.split(r'( +|\n|[^ \n%s]*[%s])' % |
| 140 | (re.escape(splitchars), re.escape(splitchars)), |
| 141 | str.expandtabs()) |
| 142 | else: |
| 143 | chunks = re.split(r'( +|\n)', str.expandtabs()) |
| 144 | result = [' '*(indent-startindex)] |
| 145 | charindex = max(indent, startindex) |
| 146 | for chunknum, chunk in enumerate(chunks): |
| 147 | if (charindex+len(chunk) > right and charindex > 0) or chunk == '\n': |
| 148 | result.append('\n' + ' '*indent) |
| 149 | charindex = indent |
| 150 | if chunk[:1] not in ('\n', ' '): |
| 151 | result.append(chunk) |
| 152 | charindex += len(chunk) |
| 153 | else: |
| 154 | result.append(chunk) |
| 155 | charindex += len(chunk) |
| 156 | return ''.join(result).rstrip()+'\n' |
| 157 | |
| 158 | def plaintext_to_html(s): |
| 159 | """ |
| 160 | @return: An HTML string that encodes the given plaintext string. |
| 161 | In particular, special characters (such as C{'<'} and C{'&'}) |
| 162 | are escaped. |
| 163 | @rtype: C{string} |
| 164 | """ |
| 165 | s = s.replace('&', '&').replace('"', '"') |
| 166 | s = s.replace('<', '<').replace('>', '>') |
| 167 | return s |
| 168 | |
| 169 | def plaintext_to_latex(str, nbsp=0, breakany=0): |
| 170 | """ |
| 171 | @return: A LaTeX string that encodes the given plaintext string. |
| 172 | In particular, special characters (such as C{'$'} and C{'_'}) |
| 173 | are escaped, and tabs are expanded. |
| 174 | @rtype: C{string} |
| 175 | @param breakany: Insert hyphenation marks, so that LaTeX can |
| 176 | break the resulting string at any point. This is useful for |
| 177 | small boxes (e.g., the type box in the variable list table). |
| 178 | @param nbsp: Replace every space with a non-breaking space |
| 179 | (C{'~'}). |
| 180 | """ |
| 181 | # These get converted to hyphenation points later |
| 182 | if breakany: str = re.sub('(.)', '\\1\1', str) |
| 183 | |
| 184 | # These get converted to \textbackslash later. |
| 185 | str = str.replace('\\', '\0') |
| 186 | |
| 187 | # Expand tabs |
| 188 | str = str.expandtabs() |
| 189 | |
| 190 | # These elements need to be backslashed. |
| 191 | str = re.sub(r'([#$&%_\${}])', r'\\\1', str) |
| 192 | |
| 193 | # These elements have special names. |
| 194 | str = str.replace('|', '{\\textbar}') |
| 195 | str = str.replace('<', '{\\textless}') |
| 196 | str = str.replace('>', '{\\textgreater}') |
| 197 | str = str.replace('^', '{\\textasciicircum}') |
| 198 | str = str.replace('~', '{\\textasciitilde}') |
| 199 | str = str.replace('\0', r'{\textbackslash}') |
| 200 | |
| 201 | # replace spaces with non-breaking spaces |
| 202 | if nbsp: str = str.replace(' ', '~') |
| 203 | |
| 204 | # Convert \1's to hyphenation points. |
| 205 | if breakany: str = str.replace('\1', r'\-') |
| 206 | |
| 207 | return str |
| 208 | |
| 209 | class RunSubprocessError(OSError): |
| 210 | def __init__(self, cmd, out, err): |
| 211 | OSError.__init__(self, '%s failed' % cmd[0]) |
| 212 | self.out = out |
| 213 | self.err = err |
| 214 | |
| 215 | def run_subprocess(cmd, data=None): |
| 216 | """ |
| 217 | Execute the command C{cmd} in a subprocess. |
| 218 | |
| 219 | @param cmd: The command to execute, specified as a list |
| 220 | of string. |
| 221 | @param data: A string containing data to send to the |
| 222 | subprocess. |
| 223 | @return: A tuple C{(out, err)}. |
| 224 | @raise OSError: If there is any problem executing the |
| 225 | command, or if its exitval is not 0. |
| 226 | """ |
| 227 | if isinstance(cmd, basestring): |
| 228 | cmd = cmd.split() |
| 229 | |
| 230 | # Under Python 2.4+, use subprocess |
| 231 | try: |
| 232 | from subprocess import Popen, PIPE |
| 233 | pipe = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE) |
| 234 | out, err = pipe.communicate(data) |
| 235 | if hasattr(pipe, 'returncode'): |
| 236 | if pipe.returncode == 0: |
| 237 | return out, err |
| 238 | else: |
| 239 | raise RunSubprocessError(cmd, out, err) |
| 240 | else: |
| 241 | # Assume that there was an error iff anything was written |
| 242 | # to the child's stderr. |
| 243 | if err == '': |
| 244 | return out, err |
| 245 | else: |
| 246 | raise RunSubprocessError(cmd, out, err) |
| 247 | except ImportError: |
| 248 | pass |
| 249 | |
| 250 | # Under Python 2.3 or earlier, on unix, use popen2.Popen3 so we |
| 251 | # can access the return value. |
| 252 | import popen2 |
| 253 | if hasattr(popen2, 'Popen3'): |
| 254 | pipe = popen2.Popen3(' '.join(cmd), True) |
| 255 | to_child = pipe.tochild |
| 256 | from_child = pipe.fromchild |
| 257 | child_err = pipe.childerr |
| 258 | if data: |
| 259 | to_child.write(data) |
| 260 | to_child.close() |
| 261 | out = err = '' |
| 262 | while pipe.poll() is None: |
| 263 | out += from_child.read() |
| 264 | err += child_err.read() |
| 265 | out += from_child.read() |
| 266 | err += child_err.read() |
| 267 | if pipe.wait() == 0: |
| 268 | return out, err |
| 269 | else: |
| 270 | raise RunSubprocessError(cmd, out, err) |
| 271 | |
| 272 | # Under Python 2.3 or earlier, on non-unix, use os.popen3 |
| 273 | else: |
| 274 | to_child, from_child, child_err = os.popen3(' '.join(cmd), 'b') |
| 275 | if data: |
| 276 | try: |
| 277 | to_child.write(data) |
| 278 | # Guard for a broken pipe error |
| 279 | except IOError, e: |
| 280 | raise OSError(e) |
| 281 | to_child.close() |
| 282 | out = from_child.read() |
| 283 | err = child_err.read() |
| 284 | # Assume that there was an error iff anything was written |
| 285 | # to the child's stderr. |
| 286 | if err == '': |
| 287 | return out, err |
| 288 | else: |
| 289 | raise RunSubprocessError(cmd, out, err) |