Blame - Mac/Tools/IDE/PyFontify.py - platform/external/python/cpython2

blob: b5d6102777138b228739da39ccc0a2338aab40c4 [file] [log] [blame]

Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	1	"""Module to analyze Python source code; for syntax coloring tools.
				2
				3	Interface:
				4	tags = fontify(pytext, searchfrom, searchto)
				5
				6	The 'pytext' argument is a string containing Python source code.
				7	The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
				8	The returned value is a list of tuples, formatted like this:
				9	[('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
				10	The tuple contents are always like this:
				11	(tag, startindex, endindex, sublist)
				12	tag is one of 'keyword', 'string', 'comment' or 'identifier'
				13	sublist is not used, hence always None.
				14	"""
				15
				16	# Based on FontText.py by Mitchell S. Chapman,
				17	# which was modified by Zachary Roadhouse,
				18	# then un-Tk'd by Just van Rossum.
				19	# Many thanks for regular expression debugging & authoring are due to:
				20	# Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
				21	# So, who owns the copyright? ;-) How about this:
Just van Rossum	ed2ed94	2000-07-01 14:30:08 +0000	[diff] [blame]	22	# Copyright 1996-2000:
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	23	# Mitchell S. Chapman,
				24	# Zachary Roadhouse,
				25	# Tim Peters,
				26	# Just van Rossum
				27
Just van Rossum	ed2ed94	2000-07-01 14:30:08 +0000	[diff] [blame]	28	__version__ = "0.3.3"
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	29
Jack Jansen	9ad2752	2001-02-21 13:54:31 +0000	[diff] [blame]	30	import string, re
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	31
				32	# First a little helper, since I don't like to repeat things. (Tismer speaking)
				33	import string
				34	def replace(where, what, with):
				35	return string.join(string.split(where, what), with)
				36
				37	# This list of keywords is taken from ref/node13.html of the
				38	# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
				39	keywordsList = [
Just van Rossum	6e5f2d1	2000-04-09 19:44:13 +0000	[diff] [blame]	40	"assert", "exec",
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	41	"del", "from", "lambda", "return",
				42	"and", "elif", "global", "not", "try",
				43	"break", "else", "if", "or", "while",
				44	"class", "except", "import", "pass",
				45	"continue", "finally", "in", "print",
				46	"def", "for", "is", "raise"]
				47
				48	# Build up a regular expression which will match anything
				49	# interesting, including multi-line triple-quoted strings.
				50	commentPat = "#.*"
				51
				52	pat = "q[^\q\n]\(\\\\[\000-\377][^\q\n]\)*q"
				53	quotePat = replace(pat, "q", "'") + "\\|" + replace(pat, 'q', '"')
				54
				55	# Way to go, Tim!
				56	pat = """
				57	qqq
				58	[^\\q]*
				59	\(
				60	\( \\\\[\000-\377]
				61	\\| q
				62	\( \\\\[\000-\377]
				63	\\| [^\\q]
				64	\\| q
				65	\( \\\\[\000-\377]
				66	\\| [^\\q]
				67	\)
				68	\)
				69	\)
				70	[^\\q]*
				71	\)*
				72	qqq
				73	"""
				74	pat = string.join(string.split(pat), '') # get rid of whitespace
				75	tripleQuotePat = replace(pat, "q", "'") + "\\|" + replace(pat, 'q', '"')
				76
				77	# Build up a regular expression which matches all and only
				78	# Python keywords. This will let us skip the uninteresting
				79	# identifier references.
				80	# nonKeyPat identifies characters which may legally precede
				81	# a keyword pattern.
				82	nonKeyPat = "\(^\\|[^a-zA-Z0-9_.\"']\)"
				83
				84	keyPat = nonKeyPat + "\("
				85	for keyword in keywordsList:
				86	keyPat = keyPat + keyword + "\\|"
				87	keyPat = keyPat[:-2] + "\)" + nonKeyPat
				88
Just van Rossum	ed2ed94	2000-07-01 14:30:08 +0000	[diff] [blame]	89	matchPat = commentPat + "\\|" + keyPat + "\\|" + tripleQuotePat + "\\|" + quotePat
Jack Jansen	9ad2752	2001-02-21 13:54:31 +0000	[diff] [blame]	90	matchRE = re.compile(matchPat)
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	91
				92	idKeyPat = "[ \t][A-Za-z_][A-Za-z_0-9.]" # Ident w. leading whitespace.
Jack Jansen	9ad2752	2001-02-21 13:54:31 +0000	[diff] [blame]	93	idRE = re.compile(idKeyPat)
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	94
				95
				96	def fontify(pytext, searchfrom = 0, searchto = None):
				97	if searchto is None:
				98	searchto = len(pytext)
				99	# Cache a few attributes for quicker reference.
				100	search = matchRE.search
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	101	idSearch = idRE.search
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	102
				103	tags = []
				104	tags_append = tags.append
				105	commentTag = 'comment'
				106	stringTag = 'string'
				107	keywordTag = 'keyword'
				108	identifierTag = 'identifier'
				109
				110	start = 0
				111	end = searchfrom
				112	while 1:
Jack Jansen	9ad2752	2001-02-21 13:54:31 +0000	[diff] [blame]	113	m = search(pytext, end)
				114	if not m or m.start() >= searchto:
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	115	break # EXIT LOOP
Jack Jansen	9ad2752	2001-02-21 13:54:31 +0000	[diff] [blame]	116	match = m.group(0)
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	117	end = start + len(match)
				118	c = match[0]
				119	if c not in "#'\"":
				120	# Must have matched a keyword.
				121	if start <> searchfrom:
				122	# there's still a redundant char before and after it, strip!
				123	match = match[1:-1]
				124	start = start + 1
				125	else:
				126	# this is the first keyword in the text.
				127	# Only a space at the end.
				128	match = match[:-1]
				129	end = end - 1
				130	tags_append((keywordTag, start, end, None))
				131	# If this was a defining keyword, look ahead to the
				132	# following identifier.
				133	if match in ["def", "class"]:
Jack Jansen	9ad2752	2001-02-21 13:54:31 +0000	[diff] [blame]	134	m = idSearch(pytext, end)
				135	if m and m.start() == end:
				136	match = m.group(0)
Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	137	end = start + len(match)
				138	tags_append((identifierTag, start, end, None))
				139	elif c == "#":
				140	tags_append((commentTag, start, end, None))
				141	else:
				142	tags_append((stringTag, start, end, None))
				143	return tags
				144
				145
				146	def test(path):
				147	f = open(path)
				148	text = f.read()
				149	f.close()
				150	tags = fontify(text)
				151	for tag, start, end, sublist in tags:
				152	print tag, `text[start:end]`