Demos for Fred's parser module

commit: 16d27e3b141d26853effc6c70214412cebebbe9f [log] [tgz]
author: Guido van Rossum <guido@python.org> Wed Aug 21 16:28:53 1996 +0000
committer: Guido van Rossum <guido@python.org> Wed Aug 21 16:28:53 1996 +0000
tree: c98f065362f2a1dfd552eff86934cb638dfaf055
parent: 6dbd190f5ec3127ad7c5ef6fc67d2f02c4cc1492 [diff]
diff --git a/Demo/parser/FILES b/Demo/parser/FILES
new file mode 100644
index 0000000..4505d3a
--- /dev/null
+++ b/Demo/parser/FILES

@@ -0,0 +1,6 @@
+Demo/parser/
+Doc/libparser.tex
+Lib/AST.py
+Lib/symbol.py
+Lib/token.py
+Modules/parsermodule.c

diff --git a/Demo/parser/Makefile b/Demo/parser/Makefile
new file mode 100644
index 0000000..648bf6e
--- /dev/null
+++ b/Demo/parser/Makefile

@@ -0,0 +1,8 @@
+parser.dvi:  parser.tex ../../Doc/libparser.tex
+	TEXINPUTS=../../Doc:: $(LATEX) parser
+
+#  Use a new name for this; the included file uses 'clean' already....
+clean-parser:
+	rm -f *.log *.aux *.dvi *.pyc
+
+include ../../Doc/Makefile

diff --git a/Demo/parser/README b/Demo/parser/README
new file mode 100644
index 0000000..03696c3
--- /dev/null
+++ b/Demo/parser/README

@@ -0,0 +1,15 @@
+These files are from the large example of using the `parser' module.  Refer
+to the Python Library Reference for more information.
+
+Files:
+------
+
+	example.py   --	module that uses the `parser' module to extract
+			information from the parse tree of Python source
+			code.
+
+	source.py    --	sample source code used to demonstrate ability to
+			handle nested constructs easily using the functions
+			and classes in example.py.
+
+Enjoy!

diff --git a/Demo/parser/docstring.py b/Demo/parser/docstring.py
new file mode 100644
index 0000000..45a261b
--- /dev/null
+++ b/Demo/parser/docstring.py

@@ -0,0 +1,2 @@
+"""Some documentation.
+"""

diff --git a/Demo/parser/example.py b/Demo/parser/example.py
new file mode 100644
index 0000000..c428aff
--- /dev/null
+++ b/Demo/parser/example.py

@@ -0,0 +1,163 @@
+"""Simple code to extract class & function docstrings from a module.
+
+
+"""
+
+import symbol
+import token
+import types
+
+
+def get_docs(fileName):
+    """Retrieve information from the parse tree of a source file.
+
+    fileName
+	Name of the file to read Python source code from.
+    """
+    source = open(fileName).read()
+    import os
+    basename = os.path.basename(os.path.splitext(fileName)[0])
+    import parser
+    ast = parser.suite(source)
+    tup = parser.ast2tuple(ast)
+    return ModuleInfo(tup, basename)
+
+
+class DefnInfo:
+    _docstring = ''
+    _name = ''
+
+    def __init__(self, tree):
+	self._name = tree[2][1]
+
+    def get_docstring(self):
+	return self._docstring
+
+    def get_name(self):
+	return self._name
+
+class SuiteInfoBase(DefnInfo):
+    def __init__(self):
+	self._class_info = {}
+	self._function_info = {}
+
+    def get_class_names(self):
+	return self._class_info.keys()
+
+    def get_class_info(self, name):
+	return self._class_info[name]
+
+    def _extract_info(self, tree):
+	if len(tree) >= 4:
+	    found, vars = match(DOCSTRING_STMT_PATTERN, tree[3])
+	    if found:
+		self._docstring = eval(vars['docstring'])
+	for node in tree[1:]:
+	    if (node[0] == symbol.stmt
+		and node[1][0] == symbol.compound_stmt):
+		if node[1][1][0] == symbol.funcdef:
+		    name = node[1][1][2][1]
+		    self._function_info[name] = \
+					      FunctionInfo(node[1][1])
+		elif node[1][1][0] == symbol.classdef:
+		    name = node[1][1][2][1]
+		    self._class_info[name] = ClassInfo(node[1][1])
+
+
+class SuiteInfo(SuiteInfoBase):
+    def __init__(self, tree):
+	SuiteInfoBase.__init__(self)
+	self._extract_info(tree)
+
+    def get_function_names(self):
+	return self._function_info.keys()
+
+    def get_function_info(self, name):
+	return self._function_info[name]
+
+
+class FunctionInfo(SuiteInfo):
+    def __init__(self, tree):
+	DefnInfo.__init__(self, tree)
+	suite = tree[-1]
+	if len(suite) >= 4:
+	    found, vars = match(DOCSTRING_STMT_PATTERN, suite[3])
+	    if found:
+		self._docstring = eval(vars['docstring'])
+	SuiteInfoBase.__init__(self)
+	self._extract_info(suite)
+
+
+class ClassInfo(SuiteInfoBase):
+    def __init__(self, tree):
+	SuiteInfoBase.__init__(self)
+	DefnInfo.__init__(self, tree)
+	self._extract_info(tree[-1])
+
+    def get_method_names(self):
+	return self._function_info.keys()
+
+    def get_method_info(self, name):
+	return self._function_info[name]
+
+
+class ModuleInfo(SuiteInfo):
+    def __init__(self, tree, name="<string>"):
+	self._name = name
+	SuiteInfo.__init__(self, tree)
+	found, vars = match(DOCSTRING_STMT_PATTERN, tree[1])
+	if found:
+	    self._docstring = vars["docstring"]
+
+
+from types import ListType, TupleType
+
+def match(pattern, data, vars=None):
+    """
+    """
+    if vars is None:
+	vars = {}
+    if type(pattern) is ListType:	# 'variables' are ['varname']
+	vars[pattern[0]] = data
+	return 1, vars
+    if type(pattern) is not TupleType:
+	return (pattern == data), vars
+    if len(data) != len(pattern):
+	return 0, vars
+    for pattern, data in map(None, pattern, data):
+	same, vars = match(pattern, data, vars)
+	if not same:
+	    break
+    return same, vars
+
+
+#  This pattern will match a 'stmt' node which *might* represent a docstring;
+#  docstrings require that the statement which provides the docstring be the
+#  first statement in the class or function, which this pattern does not check.
+#
+DOCSTRING_STMT_PATTERN = (
+    symbol.stmt,
+    (symbol.simple_stmt,
+     (symbol.small_stmt,
+      (symbol.expr_stmt,
+       (symbol.testlist,
+	(symbol.test,
+	 (symbol.and_test,
+	  (symbol.not_test,
+	   (symbol.comparison,
+	    (symbol.expr,
+	     (symbol.xor_expr,
+	      (symbol.and_expr,
+	       (symbol.shift_expr,
+		(symbol.arith_expr,
+		 (symbol.term,
+		  (symbol.factor,
+		   (symbol.power,
+		    (symbol.atom,
+		     (token.STRING, ['docstring'])
+		     )))))))))))))))),
+     (token.NEWLINE, '')
+     ))
+
+#
+#  end of file

diff --git a/Demo/parser/parser.tex b/Demo/parser/parser.tex
new file mode 100644
index 0000000..170d9d7
--- /dev/null
+++ b/Demo/parser/parser.tex

@@ -0,0 +1,77 @@
+\documentstyle[twoside,10pt,myformat]{report}
+
+%%  This manual does not supplement the chapter from the Python
+%%  Library Reference, but only allows formatting of the parser module
+%%  component of that document as a separate document, and was created
+%%  primarily to ease review of the formatted document during authoring.
+
+\title{Python Parser Module Reference}
+\author{
+	Fred L. Drake, Jr. \\
+	Corporation for National Research Initiatives (CNRI) \\
+	1895 Preston White Drive, Reston, Va 20191, USA \\
+	E-mail: {\tt fdrake@cnri.reston.va.us}, {\tt fdrake@intr.net}
+}
+
+\date{August 20th, 1996 \\ Release 1.4}
+
+\begin{document}
+
+\pagenumbering{roman}
+
+\maketitle
+
+Copyright \copyright{} 1995-1996 by Fred L. Drake, Jr. and Virginia
+Polytechnic Institute and State University, Blacksburg, Virginia, USA.
+Portions of the software copyright 1991-1995 by Stichting Mathematisch
+Centrum, Amsterdam, The Netherlands.  Copying is permitted under the
+terms associated with the main Python distribution, with the
+additional restriction that this additional notice be included and
+maintained on all distributed copies.
+
+\begin{center}
+All Rights Reserved
+\end{center}
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted,
+provided that the above copyright notice appear in all copies and that
+both that copyright notice and this permission notice appear in
+supporting documentation, and that the names of Fred L. Drake, Jr. and
+Virginia Polytechnic Institute and State University not be used in
+advertising or publicity pertaining to distribution of the software
+without specific, written prior permission.
+
+FRED L. DRAKE, JR. AND VIRGINIA POLYTECHNIC INSTITUTE AND STATE
+UNIVERSITY DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+EVENT SHALL FRED L. DRAKE, JR. OR VIRGINIA POLYTECHNIC INSTITUTE AND
+STATE UNIVERSITY BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
+DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
+PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
+PERFORMANCE OF THIS SOFTWARE.
+
+\begin{abstract}
+
+\noindent
+The \emph{Python Parser Module Reference} describes the interfaces
+published by the optional \code{parser} module and gives examples of
+how they may be used.  It contains the same text as the chapter on the
+\code{parser} module in the \emph{Python Library Reference}, but is
+presented as a separate document.
+
+This manual assumes basic knowledge about the Python language.  For an
+informal introduction to Python, see the {\em Python Tutorial}; the
+Python Reference Manual remains the highest authority on syntactic and
+semantic questions.
+
+\end{abstract}
+
+\pagebreak
+\pagenumbering{arabic}
+
+\chapter{Parser Module Reference}
+\input{libparser}
+
+\end{document}

diff --git a/Demo/parser/pprint.py b/Demo/parser/pprint.py
new file mode 100644
index 0000000..c4b8158
--- /dev/null
+++ b/Demo/parser/pprint.py

@@ -0,0 +1,143 @@
+#  pprint.py
+#
+#  Author:	Fred L. Drake, Jr.
+#		fdrake@vt.edu
+#
+#  This is a simple little module I wrote to make life easier.  I didn't
+#  see anything quite like it in the library, though I may have overlooked
+#  something.  I wrote this when I was trying to read some heavily nested
+#  tuples with fairly non-descriptive content.  This is modelled very much
+#  after Lisp/Scheme - style pretty-printing of lists.  If you find it
+#  useful, thank small children who sleep at night.
+#
+
+"""Support to pretty-print lists, tuples, & dictionaries recursively.
+Very simple, but at least somewhat useful, especially in debugging
+data structures.
+
+INDENT_PER_LEVEL	--  Amount of indentation to use for each new
+			    recursive level.  The default is 1.  This
+			    must be a non-negative integer, and may be
+			    set by the caller before calling pprint().
+
+MAX_WIDTH		--  Maximum width of the display.  This is only
+			    used if the representation *can* be kept
+			    less than MAX_WIDTH characters wide.  May
+			    be set by the user before calling pprint().
+
+TAB_WIDTH		--  The width represented by a single tab.  This
+			    value is typically 8, but 4 is the default
+			    under MacOS.  Can be changed by the user if
+			    desired, but is probably not a good idea.
+
+pprint(seq [, stream])	--  The pretty-printer.  This takes a Python
+			    object (presumably a sequence, but that
+			    doesn't matter) and an optional output
+			    stream.  See the function documentation
+			    for details.
+"""
+
+
+INDENT_PER_LEVEL = 1
+
+MAX_WIDTH = 80
+
+import os
+TAB_WIDTH = (os.name == 'mac' and 4) or 8
+del os
+
+
+
+def _indentation(cols):
+    "Create tabbed indentation string COLS columns wide."
+
+    #  This is used to reduce the byte-count for the output, allowing
+    #  files created using this module to use as little external storage
+    #  as possible.  This is primarily intended to minimize impact on
+    #  a user's quota when storing resource files, or for creating output
+    #  intended for transmission.
+
+    return ((cols / TAB_WIDTH) * '\t') + ((cols % TAB_WIDTH) * ' ')
+
+
+
+def pprint(seq, stream = None, indent = 0, allowance = 0):
+    """Pretty-print a list, tuple, or dictionary.
+
+    pprint(seq [, stream]) ==> None
+
+    If STREAM is provided, output is written to that stream, otherwise
+    sys.stdout is used.  Indentation is done according to
+    INDENT_PER_LEVEL, which may be set to any non-negative integer
+    before calling this function.  The output written on the stream is
+    a perfectly valid representation of the Python object passed in,
+    with indentation to suite human-readable interpretation.  The
+    output can be used as input without error, given readable
+    representations of all sequence elements are available via repr().
+    Output is restricted to MAX_WIDTH columns where possible.  The
+    STREAM parameter must support the write() method with a single
+    parameter, which will always be a string.  The output stream may be
+    a StringIO.StringIO object if the result is needed as a string.
+    """
+
+    if stream is None:
+	import sys
+	stream = sys.stdout
+
+    from types import DictType, ListType, TupleType
+
+    rep = `seq`
+    typ = type(seq)
+    sepLines = len(rep) > (MAX_WIDTH - 1 - indent - allowance)
+
+    if sepLines and (typ is ListType or typ is TupleType):
+	#  Pretty-print the sequence.
+	stream.write(((typ is ListType) and '[') or '(')
+
+	length = len(seq)
+	if length:
+	    indent = indent + INDENT_PER_LEVEL
+	    pprint(seq[0], stream, indent, allowance + 1)
+
+	    if len(seq) > 1:
+		for ent in seq[1:]:
+		    stream.write(',\n' + _indentation(indent))
+		    pprint(ent, stream, indent, allowance + 1)
+
+	    indent = indent - INDENT_PER_LEVEL
+
+	stream.write(((typ is ListType) and ']') or ')')
+
+    elif typ is DictType and sepLines:
+	stream.write('{')
+
+	length = len(seq)
+	if length:
+	    indent = indent + INDENT_PER_LEVEL
+	    items  = seq.items()
+	    items.sort()
+	    key, ent = items[0]
+	    rep = `key` + ': '
+	    stream.write(rep)
+	    pprint(ent, stream, indent + len(rep), allowance + 1)
+
+	    if len(items) > 1:
+		for key, ent in items[1:]:
+		    rep = `key` + ': '
+		    stream.write(',\n' + _indentation(indent) + rep)
+		    pprint(ent, stream, indent + len(rep), allowance + 1)
+
+	    indent = indent - INDENT_PER_LEVEL
+
+	stream.write('}')
+
+    else:
+	stream.write(rep)
+
+    #  Terminate the 'print' if we're not a recursive invocation.
+    if not indent:
+	stream.write('\n')
+
+
+#
+#  end of pprint.py

diff --git a/Demo/parser/source.py b/Demo/parser/source.py
new file mode 100644
index 0000000..b1690a5
--- /dev/null
+++ b/Demo/parser/source.py

@@ -0,0 +1,27 @@
+"""Exmaple file to be parsed for the parsermodule example.
+
+The classes and functions in this module exist only to exhibit the ability
+of the handling information extraction from nested definitions using parse
+trees.  They shouldn't interest you otherwise!
+"""
+
+class Simple:
+    "This class does very little."
+
+    def method(self):
+	"This method does almost nothing."
+	return 1
+
+    class Nested:
+	"This is a nested class."
+
+	def nested_method(self):
+	    "Method of Nested class."
+	    def nested_function():
+		"Function in method of Nested class."
+		pass
+	    return nested_function
+
+def function():
+    "This function lives at the module level."
+    return 0

diff --git a/Demo/parser/test_parser.py b/Demo/parser/test_parser.py
new file mode 100755
index 0000000..e114d76
--- /dev/null
+++ b/Demo/parser/test_parser.py

@@ -0,0 +1,50 @@
+#! /projects/python/Python-1.4b2/python
+#  (Force the script to use the latest build.)
+#
+#  test_parser.py
+
+import parser, traceback
+
+_numFailed = 0
+
+def testChunk(t, fileName):
+    global _numFailed
+    print '----', fileName,
+    try:
+	ast = parser.suite(t)
+	tup = parser.ast2tuple(ast)
+	# this discards the first AST; a huge memory savings when running
+	# against a large source file like Tkinter.py.
+	ast = None
+	new = parser.tuple2ast(tup)
+    except parser.ParserError, err:
+	print
+	print 'parser module raised exception on input file', fileName + ':'
+	traceback.print_exc()
+	_numFailed = _numFailed + 1
+    else:
+	if tup != parser.ast2tuple(new):
+	    print
+	    print 'parser module failed on input file', fileName
+	    _numFailed = _numFailed + 1
+	else:
+	    print 'o.k.'
+
+def testFile(fileName):
+    t = open(fileName).read()
+    testChunk(t, fileName)
+
+def test():
+    import sys
+    args = sys.argv[1:]
+    if not args:
+	import glob
+	args = glob.glob("*.py")
+    map(testFile, args)
+    sys.exit(_numFailed != 0)
+
+if __name__ == '__main__':
+    test()
+
+#
+#  end of file
commit	16d27e3b141d26853effc6c70214412cebebbe9f	[log] [tgz]
author	Guido van Rossum <guido@python.org>	Wed Aug 21 16:28:53 1996 +0000
committer	Guido van Rossum <guido@python.org>	Wed Aug 21 16:28:53 1996 +0000
tree	c98f065362f2a1dfd552eff86934cb638dfaf055
parent	6dbd190f5ec3127ad7c5ef6fc67d2f02c4cc1492 [diff]