Blame - Mac/Tools/IDE/PyFontify.py - platform/external/python/cpython3

1999-01-30 22:39:17 +0000

[diff] [blame]

1

"""Module to analyze Python source code; for syntax coloring tools.

2

3

Interface:

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

4

tags = fontify(pytext, searchfrom, searchto)

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

5

6

The 'pytext' argument is a string containing Python source code.

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

7

The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

8

The returned value is a list of tuples, formatted like this:

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

9

[('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

10

The tuple contents are always like this:

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

11

(tag, startindex, endindex, sublist)

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

12

tag is one of 'keyword', 'string', 'comment' or 'identifier'

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

13

sublist is not used, hence always None.

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

14

"""

15

16

# Based on FontText.py by Mitchell S. Chapman,

17

# which was modified by Zachary Roadhouse,

18

# then un-Tk'd by Just van Rossum.

19

# Many thanks for regular expression debugging & authoring are due to:

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

20

# Tim (the-incredib-ly y'rs) Peters and Cristian Tismer

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

21

# So, who owns the copyright? ;-) How about this:

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

22

23

# Mitchell S. Chapman,

24

# Zachary Roadhouse,

25

# Tim Peters,

26

# Just van Rossum

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

27

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

28

__version__ = "0.4"

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

29

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

30

import string

31

import re

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

32

33

# First a little helper, since I don't like to repeat things. (Tismer speaking)

34

import string

35

def replace(where, what, with):

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

36

return string.join(string.split(where, what), with)

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

37

38

# This list of keywords is taken from ref/node13.html of the

39

# Python 1.3 HTML documentation. ("access" is intentionally omitted.)

40

keywordsList = [

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

41

"assert", "exec",

42

"del", "from", "lambda", "return",

43

"and", "elif", "global", "not", "try",

44

"break", "else", "if", "or", "while",

45

"class", "except", "import", "pass",

46

"continue", "finally", "in", "print",

47

"def", "for", "is", "raise", "yield"]

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

48

49

# Build up a regular expression which will match anything

50

# interesting, including multi-line triple-quoted strings.

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

51

commentPat = r"#[^\n]*"

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

52

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

53

pat = r"q[^\\q\n]*(\\[\000-\377][^\\q\n]*)*q"

54

quotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

55

56

# Way to go, Tim!

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

57

pat = r"""

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

qqq

[^\\q]*

(

( \\[\000-\377]

| q

( \\[\000-\377]

| [^\q]

| q

( \\[\000-\377]

| [^\\q]

)

)

)

[^\\q]*

)*

qqq

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

74

"""

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

75

pat = string.join(string.split(pat), '') # get rid of whitespace

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

76

tripleQuotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

77

78

# Build up a regular expression which matches all and only

79

# Python keywords. This will let us skip the uninteresting

80

# identifier references.

81

# nonKeyPat identifies characters which may legally precede

82

# a keyword pattern.

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

83

nonKeyPat = r"(^|[^a-zA-Z0-9_.\"'])"

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

84

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

85

keyPat = nonKeyPat + "(" + "|".join(keywordsList) + ")" + nonKeyPat

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

86

Just van Rossum

2001-07-10 19:25:40 +0000

[diff] [blame]

87

matchPat = commentPat + "|" + keyPat + "|" + tripleQuotePat + "|" + quotePat

Jack Jansen

9ad2752

2001-02-21 13:54:31 +0000

[diff] [blame]

88

matchRE = re.compile(matchPat)

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

89

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

90

idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*" # Ident w. leading whitespace.

Jack Jansen

9ad2752

2001-02-21 13:54:31 +0000

[diff] [blame]

91

idRE = re.compile(idKeyPat)

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

92

93

94

def fontify(pytext, searchfrom = 0, searchto = None):

Tim Peters

2004-07-18 06:16:08 +0000

[diff] [blame]

95

if searchto is None:

96

searchto = len(pytext)

97

# Cache a few attributes for quicker reference.

98

search = matchRE.search

99

idSearch = idRE.search

100

101

tags = []

102

tags_append = tags.append

103

commentTag = 'comment'

104

stringTag = 'string'

105

keywordTag = 'keyword'

106

identifierTag = 'identifier'

start = 0

end = searchfrom

while 1:

m = search(pytext, end)

if m is None:

break # EXIT LOOP

start = m.start()

if start >= searchto:

116

break # EXIT LOOP

117

match = m.group(0)

118

end = start + len(match)

119

c = match[0]

120

if c not in "#'\"":

121

# Must have matched a keyword.

122

if start <> searchfrom:

123

# there's still a redundant char before and after it, strip!

match = match[1:-1]

start = start + 1

else:

# this is the first keyword in the text.

128

# Only a space at the end.

129

match = match[:-1]

130

end = end - 1

131

tags_append((keywordTag, start, end, None))

132

# If this was a defining keyword, look ahead to the

133

# following identifier.

134

if match in ["def", "class"]:

135

m = idSearch(pytext, end)

if m is not None:

start = m.start()

if start == end:

match = m.group(0)

end = start + len(match)

141

tags_append((identifierTag, start, end, None))

142

elif c == "#":

143

tags_append((commentTag, start, end, None))

144

else:

145

tags_append((stringTag, start, end, None))

146

return tags

Just van Rossum

1999-01-30 22:39:17 +0000

[diff] [blame]

147

148

149

def test(path):

Tim Peters