blob: d376568fc98a0c63041b63baca4afd64be94ea2b [file] [log] [blame]
Terry Jan Reedy10b1c7c2014-06-16 19:01:01 -04001"""Provide advanced parsing abilities for ParenMatch and other extensions.
Terry Jan Reedy3e583302014-06-16 02:33:35 -04002
3HyperParser uses PyParser. PyParser mostly gives information on the
4proper indentation of code. HyperParser gives additional information on
5the structure of code.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +00006"""
7
8import string
9import keyword
Kurt B. Kaiser2d7f6a02007-08-22 23:01:33 +000010from idlelib import PyParse
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000011
12class HyperParser:
13
14 def __init__(self, editwin, index):
Terry Jan Reedy3e583302014-06-16 02:33:35 -040015 "To initialize, analyze the surroundings of the given index."
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000016
17 self.editwin = editwin
18 self.text = text = editwin.text
19
20 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
21
22 def index2line(index):
23 return int(float(index))
24 lno = index2line(text.index(index))
25
26 if not editwin.context_use_ps1:
27 for context in editwin.num_context_lines:
28 startat = max(lno - context, 1)
Brett Cannon0b70cca2006-08-25 02:59:59 +000029 startatindex = repr(startat) + ".0"
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000030 stopatindex = "%d.end" % lno
Terry Jan Reedy3e583302014-06-16 02:33:35 -040031 # We add the newline because PyParse requires a newline
32 # at end. We add a space so that index won't be at end
33 # of line, so that its status will be the same as the
34 # char before it, if should.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000035 parser.set_str(text.get(startatindex, stopatindex)+' \n')
36 bod = parser.find_good_parse_start(
37 editwin._build_char_in_string_func(startatindex))
38 if bod is not None or startat == 1:
39 break
40 parser.set_lo(bod or 0)
41 else:
42 r = text.tag_prevrange("console", index)
43 if r:
44 startatindex = r[1]
45 else:
46 startatindex = "1.0"
47 stopatindex = "%d.end" % lno
Terry Jan Reedy3e583302014-06-16 02:33:35 -040048 # We add the newline because PyParse requires it. We add a
49 # space so that index won't be at end of line, so that its
50 # status will be the same as the char before it, if should.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000051 parser.set_str(text.get(startatindex, stopatindex)+' \n')
52 parser.set_lo(0)
53
Terry Jan Reedy3e583302014-06-16 02:33:35 -040054 # We want what the parser has, minus the last newline and space.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000055 self.rawtext = parser.str[:-2]
Terry Jan Reedy3e583302014-06-16 02:33:35 -040056 # Parser.str apparently preserves the statement we are in, so
57 # that stopatindex can be used to synchronize the string with
58 # the text box indices.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000059 self.stopatindex = stopatindex
60 self.bracketing = parser.get_last_stmt_bracketing()
Terry Jan Reedy3e583302014-06-16 02:33:35 -040061 # find which pairs of bracketing are openers. These always
62 # correspond to a character of rawtext.
63 self.isopener = [i>0 and self.bracketing[i][1] >
64 self.bracketing[i-1][1]
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000065 for i in range(len(self.bracketing))]
66
67 self.set_index(index)
68
69 def set_index(self, index):
Terry Jan Reedy3e583302014-06-16 02:33:35 -040070 """Set the index to which the functions relate.
71
72 The index must be in the same statement.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000073 """
Terry Jan Reedy3e583302014-06-16 02:33:35 -040074 indexinrawtext = (len(self.rawtext) -
75 len(self.text.get(index, self.stopatindex)))
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000076 if indexinrawtext < 0:
Terry Jan Reedy3e583302014-06-16 02:33:35 -040077 raise ValueError("Index %s precedes the analyzed statement"
78 % index)
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000079 self.indexinrawtext = indexinrawtext
80 # find the rightmost bracket to which index belongs
81 self.indexbracket = 0
Terry Jan Reedy3e583302014-06-16 02:33:35 -040082 while (self.indexbracket < len(self.bracketing)-1 and
83 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000084 self.indexbracket += 1
Terry Jan Reedy3e583302014-06-16 02:33:35 -040085 if (self.indexbracket < len(self.bracketing)-1 and
86 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
87 not self.isopener[self.indexbracket+1]):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000088 self.indexbracket += 1
89
90 def is_in_string(self):
Terry Jan Reedy10b1c7c2014-06-16 19:01:01 -040091 """Is the index given to the HyperParser in a string?"""
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000092 # The bracket to which we belong should be an opener.
93 # If it's an opener, it has to have a character.
Terry Jan Reedy3e583302014-06-16 02:33:35 -040094 return (self.isopener[self.indexbracket] and
95 self.rawtext[self.bracketing[self.indexbracket][0]]
96 in ('"', "'"))
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000097
98 def is_in_code(self):
Terry Jan Reedy10b1c7c2014-06-16 19:01:01 -040099 """Is the index given to the HyperParser in normal code?"""
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400100 return (not self.isopener[self.indexbracket] or
101 self.rawtext[self.bracketing[self.indexbracket][0]]
102 not in ('#', '"', "'"))
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000103
104 def get_surrounding_brackets(self, openers='([{', mustclose=False):
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400105 """Return bracket indexes or None.
106
107 If the index given to the HyperParser is surrounded by a
108 bracket defined in openers (or at least has one before it),
109 return the indices of the opening bracket and the closing
110 bracket (or the end of line, whichever comes first).
111
112 If it is not surrounded by brackets, or the end of line comes
113 before the closing bracket and mustclose is True, returns None.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000114 """
Terry Jan Reedyd0c1ea42014-06-16 02:40:24 -0400115
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000116 bracketinglevel = self.bracketing[self.indexbracket][1]
117 before = self.indexbracket
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400118 while (not self.isopener[before] or
119 self.rawtext[self.bracketing[before][0]] not in openers or
120 self.bracketing[before][1] > bracketinglevel):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000121 before -= 1
122 if before < 0:
123 return None
124 bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
125 after = self.indexbracket + 1
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400126 while (after < len(self.bracketing) and
127 self.bracketing[after][1] >= bracketinglevel):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000128 after += 1
129
130 beforeindex = self.text.index("%s-%dc" %
131 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400132 if (after >= len(self.bracketing) or
133 self.bracketing[after][0] > len(self.rawtext)):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000134 if mustclose:
135 return None
136 afterindex = self.stopatindex
137 else:
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400138 # We are after a real char, so it is a ')' and we give the
139 # index before it.
140 afterindex = self.text.index(
141 "%s-%dc" % (self.stopatindex,
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000142 len(self.rawtext)-(self.bracketing[after][0]-1)))
143
144 return beforeindex, afterindex
145
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400146 # Ascii chars that may be in a white space
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000147 _whitespace_chars = " \t\n\\"
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400148 # Ascii chars that may be in an identifier
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000149 _id_chars = string.ascii_letters + string.digits + "_"
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400150 # Ascii chars that may be the first char of an identifier
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000151 _id_first_chars = string.ascii_letters + "_"
152
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400153 # Given a string and pos, return the number of chars in the
154 # identifier which ends at pos, or 0 if there is no such one. Saved
155 # words are not identifiers.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000156 def _eat_identifier(self, str, limit, pos):
157 i = pos
158 while i > limit and str[i-1] in self._id_chars:
159 i -= 1
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400160 if (i < pos and (str[i] not in self._id_first_chars or
Terry Jan Reedy10b1c7c2014-06-16 19:01:01 -0400161 (keyword.iskeyword(str[i:pos]) and
162 str[i:pos] not in {'None', 'False', 'True'}))):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000163 i = pos
164 return pos - i
165
166 def get_expression(self):
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400167 """Return a string with the Python expression which ends at the
168 given index, which is empty if there is no real one.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000169 """
170 if not self.is_in_code():
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400171 raise ValueError("get_expression should only be called"
172 "if index is inside a code.")
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000173
174 rawtext = self.rawtext
175 bracketing = self.bracketing
176
177 brck_index = self.indexbracket
178 brck_limit = bracketing[brck_index][0]
179 pos = self.indexinrawtext
180
181 last_identifier_pos = pos
182 postdot_phase = True
183
184 while 1:
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400185 # Eat whitespaces, comments, and if postdot_phase is False - a dot
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000186 while 1:
187 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
188 # Eat a whitespace
189 pos -= 1
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400190 elif (not postdot_phase and
191 pos > brck_limit and rawtext[pos-1] == '.'):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000192 # Eat a dot
193 pos -= 1
194 postdot_phase = True
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400195 # The next line will fail if we are *inside* a comment,
196 # but we shouldn't be.
197 elif (pos == brck_limit and brck_index > 0 and
198 rawtext[bracketing[brck_index-1][0]] == '#'):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000199 # Eat a comment
200 brck_index -= 2
201 brck_limit = bracketing[brck_index][0]
202 pos = bracketing[brck_index+1][0]
203 else:
204 # If we didn't eat anything, quit.
205 break
206
207 if not postdot_phase:
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400208 # We didn't find a dot, so the expression end at the
209 # last identifier pos.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000210 break
211
212 ret = self._eat_identifier(rawtext, brck_limit, pos)
213 if ret:
214 # There is an identifier to eat
215 pos = pos - ret
216 last_identifier_pos = pos
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400217 # Now, to continue the search, we must find a dot.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000218 postdot_phase = False
219 # (the loop continues now)
220
221 elif pos == brck_limit:
Terry Jan Reedy3e583302014-06-16 02:33:35 -0400222 # We are at a bracketing limit. If it is a closing
223 # bracket, eat the bracket, otherwise, stop the search.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000224 level = bracketing[brck_index][1]
225 while brck_index > 0 and bracketing[brck_index-1][1] > level:
226 brck_index -= 1
227 if bracketing[brck_index][0] == brck_limit:
228 # We were not at the end of a closing bracket
229 break
230 pos = bracketing[brck_index][0]
231 brck_index -= 1
232 brck_limit = bracketing[brck_index][0]
233 last_identifier_pos = pos
234 if rawtext[pos] in "([":
235 # [] and () may be used after an identifier, so we
236 # continue. postdot_phase is True, so we don't allow a dot.
237 pass
238 else:
239 # We can't continue after other types of brackets
Serhiy Storchaka8c126d72013-01-01 22:25:59 +0200240 if rawtext[pos] in "'\"":
241 # Scan a string prefix
Serhiy Storchakaeb6aa5c2013-01-01 22:32:42 +0200242 while pos > 0 and rawtext[pos - 1] in "rRbBuU":
Serhiy Storchaka8c126d72013-01-01 22:25:59 +0200243 pos -= 1
244 last_identifier_pos = pos
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000245 break
246
247 else:
248 # We've found an operator or something.
249 break
250
251 return rawtext[last_identifier_pos:self.indexinrawtext]
Terry Jan Reedy10b1c7c2014-06-16 19:01:01 -0400252
253
254if __name__ == '__main__':
255 import unittest
256 unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)