Blame - Mac/Tools/IDE/PyFontify.py - platform/external/python/cpython3

blob: a61de659b8f6cce1f08b44af24c8bd12b397f1d7 [file] [log] [blame]

Just van Rossum	40f9b7b	1999-01-30 22:39:17 +0000	[diff] [blame]	1	"""Module to analyze Python source code; for syntax coloring tools.
				2
				3	Interface:
				4	tags = fontify(pytext, searchfrom, searchto)
				5
				6	The 'pytext' argument is a string containing Python source code.
				7	The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
				8	The returned value is a list of tuples, formatted like this:
				9	[('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
				10	The tuple contents are always like this:
				11	(tag, startindex, endindex, sublist)
				12	tag is one of 'keyword', 'string', 'comment' or 'identifier'
				13	sublist is not used, hence always None.
				14	"""
				15
				16	# Based on FontText.py by Mitchell S. Chapman,
				17	# which was modified by Zachary Roadhouse,
				18	# then un-Tk'd by Just van Rossum.
				19	# Many thanks for regular expression debugging & authoring are due to:
				20	# Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
				21	# So, who owns the copyright? ;-) How about this:
				22	# Copyright 1996-1997:
				23	# Mitchell S. Chapman,
				24	# Zachary Roadhouse,
				25	# Tim Peters,
				26	# Just van Rossum
				27
				28	__version__ = "0.3.1"
				29
				30	import string, regex
				31
				32	# First a little helper, since I don't like to repeat things. (Tismer speaking)
				33	import string
				34	def replace(where, what, with):
				35	return string.join(string.split(where, what), with)
				36
				37	# This list of keywords is taken from ref/node13.html of the
				38	# Python 1.3 HTML documentation. ("access" is intentionally omitted.)
				39	keywordsList = [
				40	"assert",
				41	"del", "from", "lambda", "return",
				42	"and", "elif", "global", "not", "try",
				43	"break", "else", "if", "or", "while",
				44	"class", "except", "import", "pass",
				45	"continue", "finally", "in", "print",
				46	"def", "for", "is", "raise"]
				47
				48	# Build up a regular expression which will match anything
				49	# interesting, including multi-line triple-quoted strings.
				50	commentPat = "#.*"
				51
				52	pat = "q[^\q\n]\(\\\\[\000-\377][^\q\n]\)*q"
				53	quotePat = replace(pat, "q", "'") + "\\|" + replace(pat, 'q', '"')
				54
				55	# Way to go, Tim!
				56	pat = """
				57	qqq
				58	[^\\q]*
				59	\(
				60	\( \\\\[\000-\377]
				61	\\| q
				62	\( \\\\[\000-\377]
				63	\\| [^\\q]
				64	\\| q
				65	\( \\\\[\000-\377]
				66	\\| [^\\q]
				67	\)
				68	\)
				69	\)
				70	[^\\q]*
				71	\)*
				72	qqq
				73	"""
				74	pat = string.join(string.split(pat), '') # get rid of whitespace
				75	tripleQuotePat = replace(pat, "q", "'") + "\\|" + replace(pat, 'q', '"')
				76
				77	# Build up a regular expression which matches all and only
				78	# Python keywords. This will let us skip the uninteresting
				79	# identifier references.
				80	# nonKeyPat identifies characters which may legally precede
				81	# a keyword pattern.
				82	nonKeyPat = "\(^\\|[^a-zA-Z0-9_.\"']\)"
				83
				84	keyPat = nonKeyPat + "\("
				85	for keyword in keywordsList:
				86	keyPat = keyPat + keyword + "\\|"
				87	keyPat = keyPat[:-2] + "\)" + nonKeyPat
				88
				89	matchPat = keyPat + "\\|" + commentPat + "\\|" + tripleQuotePat + "\\|" + quotePat
				90	matchRE = regex.compile(matchPat)
				91
				92	idKeyPat = "[ \t][A-Za-z_][A-Za-z_0-9.]" # Ident w. leading whitespace.
				93	idRE = regex.compile(idKeyPat)
				94
				95
				96	def fontify(pytext, searchfrom = 0, searchto = None):
				97	if searchto is None:
				98	searchto = len(pytext)
				99	# Cache a few attributes for quicker reference.
				100	search = matchRE.search
				101	group = matchRE.group
				102	idSearch = idRE.search
				103	idGroup = idRE.group
				104
				105	tags = []
				106	tags_append = tags.append
				107	commentTag = 'comment'
				108	stringTag = 'string'
				109	keywordTag = 'keyword'
				110	identifierTag = 'identifier'
				111
				112	start = 0
				113	end = searchfrom
				114	while 1:
				115	start = search(pytext, end)
				116	if start < 0 or start >= searchto:
				117	break # EXIT LOOP
				118	match = group(0)
				119	end = start + len(match)
				120	c = match[0]
				121	if c not in "#'\"":
				122	# Must have matched a keyword.
				123	if start <> searchfrom:
				124	# there's still a redundant char before and after it, strip!
				125	match = match[1:-1]
				126	start = start + 1
				127	else:
				128	# this is the first keyword in the text.
				129	# Only a space at the end.
				130	match = match[:-1]
				131	end = end - 1
				132	tags_append((keywordTag, start, end, None))
				133	# If this was a defining keyword, look ahead to the
				134	# following identifier.
				135	if match in ["def", "class"]:
				136	start = idSearch(pytext, end)
				137	if start == end:
				138	match = idGroup(0)
				139	end = start + len(match)
				140	tags_append((identifierTag, start, end, None))
				141	elif c == "#":
				142	tags_append((commentTag, start, end, None))
				143	else:
				144	tags_append((stringTag, start, end, None))
				145	return tags
				146
				147
				148	def test(path):
				149	f = open(path)
				150	text = f.read()
				151	f.close()
				152	tags = fontify(text)
				153	for tag, start, end, sublist in tags:
				154	print tag, `text[start:end]`