blob: 19e4977f7c33cd629080701b3b3e70881781fab2 [file] [log] [blame]
Terry Jan Reedy633c33b2014-06-16 02:33:18 -04001"""Provide advanced parsing abilities for the ParenMatch and other extensions.
2
3HyperParser uses PyParser. PyParser mostly gives information on the
4proper indentation of code. HyperParser gives additional information on
5the structure of code.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +00006"""
7
8import string
9import keyword
Florent Xiclunad630c042010-04-02 07:24:52 +000010from idlelib import PyParse
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000011
12class HyperParser:
13
14 def __init__(self, editwin, index):
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040015 "To initialize, analyze the surroundings of the given index."
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000016
17 self.editwin = editwin
18 self.text = text = editwin.text
19
20 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
21
22 def index2line(index):
23 return int(float(index))
24 lno = index2line(text.index(index))
25
26 if not editwin.context_use_ps1:
27 for context in editwin.num_context_lines:
28 startat = max(lno - context, 1)
Florent Xiclunadfd36182010-04-02 08:30:21 +000029 startatindex = repr(startat) + ".0"
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000030 stopatindex = "%d.end" % lno
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040031 # We add the newline because PyParse requires a newline
32 # at end. We add a space so that index won't be at end
33 # of line, so that its status will be the same as the
34 # char before it, if should.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000035 parser.set_str(text.get(startatindex, stopatindex)+' \n')
36 bod = parser.find_good_parse_start(
37 editwin._build_char_in_string_func(startatindex))
38 if bod is not None or startat == 1:
39 break
40 parser.set_lo(bod or 0)
41 else:
42 r = text.tag_prevrange("console", index)
43 if r:
44 startatindex = r[1]
45 else:
46 startatindex = "1.0"
47 stopatindex = "%d.end" % lno
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040048 # We add the newline because PyParse requires it. We add a
49 # space so that index won't be at end of line, so that its
50 # status will be the same as the char before it, if should.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000051 parser.set_str(text.get(startatindex, stopatindex)+' \n')
52 parser.set_lo(0)
53
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040054 # We want what the parser has, minus the last newline and space.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000055 self.rawtext = parser.str[:-2]
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040056 # Parser.str apparently preserves the statement we are in, so
57 # that stopatindex can be used to synchronize the string with
58 # the text box indices.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000059 self.stopatindex = stopatindex
60 self.bracketing = parser.get_last_stmt_bracketing()
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040061 # find which pairs of bracketing are openers. These always
62 # correspond to a character of rawtext.
63 self.isopener = [i>0 and self.bracketing[i][1] >
64 self.bracketing[i-1][1]
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000065 for i in range(len(self.bracketing))]
66
67 self.set_index(index)
68
69 def set_index(self, index):
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040070 """Set the index to which the functions relate.
71
72 The index must be in the same statement.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000073 """
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040074 indexinrawtext = (len(self.rawtext) -
75 len(self.text.get(index, self.stopatindex)))
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000076 if indexinrawtext < 0:
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040077 raise ValueError("Index %s precedes the analyzed statement"
78 % index)
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000079 self.indexinrawtext = indexinrawtext
80 # find the rightmost bracket to which index belongs
81 self.indexbracket = 0
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040082 while (self.indexbracket < len(self.bracketing)-1 and
83 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000084 self.indexbracket += 1
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040085 if (self.indexbracket < len(self.bracketing)-1 and
86 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
87 not self.isopener[self.indexbracket+1]):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000088 self.indexbracket += 1
89
90 def is_in_string(self):
91 """Is the index given to the HyperParser is in a string?"""
92 # The bracket to which we belong should be an opener.
93 # If it's an opener, it has to have a character.
Terry Jan Reedy633c33b2014-06-16 02:33:18 -040094 return (self.isopener[self.indexbracket] and
95 self.rawtext[self.bracketing[self.indexbracket][0]]
96 in ('"', "'"))
Kurt B. Kaiserb1754452005-11-18 22:05:48 +000097
98 def is_in_code(self):
99 """Is the index given to the HyperParser is in a normal code?"""
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400100 return (not self.isopener[self.indexbracket] or
101 self.rawtext[self.bracketing[self.indexbracket][0]]
102 not in ('#', '"', "'"))
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000103
104 def get_surrounding_brackets(self, openers='([{', mustclose=False):
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400105 """Return bracket indexes or None.
106
107 If the index given to the HyperParser is surrounded by a
108 bracket defined in openers (or at least has one before it),
109 return the indices of the opening bracket and the closing
110 bracket (or the end of line, whichever comes first).
111
112 If it is not surrounded by brackets, or the end of line comes
113 before the closing bracket and mustclose is True, returns None.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000114 """
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400115
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000116 bracketinglevel = self.bracketing[self.indexbracket][1]
117 before = self.indexbracket
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400118 while (not self.isopener[before] or
119 self.rawtext[self.bracketing[before][0]] not in openers or
120 self.bracketing[before][1] > bracketinglevel):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000121 before -= 1
122 if before < 0:
123 return None
124 bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
125 after = self.indexbracket + 1
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400126 while (after < len(self.bracketing) and
127 self.bracketing[after][1] >= bracketinglevel):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000128 after += 1
129
130 beforeindex = self.text.index("%s-%dc" %
131 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400132 if (after >= len(self.bracketing) or
133 self.bracketing[after][0] > len(self.rawtext)):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000134 if mustclose:
135 return None
136 afterindex = self.stopatindex
137 else:
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400138 # We are after a real char, so it is a ')' and we give the
139 # index before it.
140 afterindex = self.text.index(
141 "%s-%dc" % (self.stopatindex,
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000142 len(self.rawtext)-(self.bracketing[after][0]-1)))
143
144 return beforeindex, afterindex
145
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400146 # Ascii chars that may be in a white space
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000147 _whitespace_chars = " \t\n\\"
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400148 # Ascii chars that may be in an identifier
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000149 _id_chars = string.ascii_letters + string.digits + "_"
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400150 # Ascii chars that may be the first char of an identifier
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000151 _id_first_chars = string.ascii_letters + "_"
152
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400153 # Given a string and pos, return the number of chars in the
154 # identifier which ends at pos, or 0 if there is no such one. Saved
155 # words are not identifiers.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000156 def _eat_identifier(self, str, limit, pos):
157 i = pos
158 while i > limit and str[i-1] in self._id_chars:
159 i -= 1
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400160 if (i < pos and (str[i] not in self._id_first_chars or
161 keyword.iskeyword(str[i:pos]))):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000162 i = pos
163 return pos - i
164
165 def get_expression(self):
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400166 """Return a string with the Python expression which ends at the
167 given index, which is empty if there is no real one.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000168 """
169 if not self.is_in_code():
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400170 raise ValueError("get_expression should only be called"
171 "if index is inside a code.")
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000172
173 rawtext = self.rawtext
174 bracketing = self.bracketing
175
176 brck_index = self.indexbracket
177 brck_limit = bracketing[brck_index][0]
178 pos = self.indexinrawtext
179
180 last_identifier_pos = pos
181 postdot_phase = True
182
183 while 1:
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400184 # Eat whitespaces, comments, and if postdot_phase is False - a dot
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000185 while 1:
186 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
187 # Eat a whitespace
188 pos -= 1
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400189 elif (not postdot_phase and
190 pos > brck_limit and rawtext[pos-1] == '.'):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000191 # Eat a dot
192 pos -= 1
193 postdot_phase = True
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400194 # The next line will fail if we are *inside* a comment,
195 # but we shouldn't be.
196 elif (pos == brck_limit and brck_index > 0 and
197 rawtext[bracketing[brck_index-1][0]] == '#'):
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000198 # Eat a comment
199 brck_index -= 2
200 brck_limit = bracketing[brck_index][0]
201 pos = bracketing[brck_index+1][0]
202 else:
203 # If we didn't eat anything, quit.
204 break
205
206 if not postdot_phase:
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400207 # We didn't find a dot, so the expression end at the
208 # last identifier pos.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000209 break
210
211 ret = self._eat_identifier(rawtext, brck_limit, pos)
212 if ret:
213 # There is an identifier to eat
214 pos = pos - ret
215 last_identifier_pos = pos
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400216 # Now, to continue the search, we must find a dot.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000217 postdot_phase = False
218 # (the loop continues now)
219
220 elif pos == brck_limit:
Terry Jan Reedy633c33b2014-06-16 02:33:18 -0400221 # We are at a bracketing limit. If it is a closing
222 # bracket, eat the bracket, otherwise, stop the search.
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000223 level = bracketing[brck_index][1]
224 while brck_index > 0 and bracketing[brck_index-1][1] > level:
225 brck_index -= 1
226 if bracketing[brck_index][0] == brck_limit:
227 # We were not at the end of a closing bracket
228 break
229 pos = bracketing[brck_index][0]
230 brck_index -= 1
231 brck_limit = bracketing[brck_index][0]
232 last_identifier_pos = pos
233 if rawtext[pos] in "([":
234 # [] and () may be used after an identifier, so we
235 # continue. postdot_phase is True, so we don't allow a dot.
236 pass
237 else:
238 # We can't continue after other types of brackets
Serhiy Storchaka14224422013-01-01 22:27:45 +0200239 if rawtext[pos] in "'\"":
240 # Scan a string prefix
241 while pos > 0 and rawtext[pos - 1] in "rRbBuU":
242 pos -= 1
243 last_identifier_pos = pos
Kurt B. Kaiserb1754452005-11-18 22:05:48 +0000244 break
245
246 else:
247 # We've found an operator or something.
248 break
249
250 return rawtext[last_identifier_pos:self.indexinrawtext]