Kurt B. Kaiser | b175445 | 2005-11-18 22:05:48 +0000 | [diff] [blame] | 1 | """ |
| 2 | HyperParser |
| 3 | =========== |
| 4 | This module defines the HyperParser class, which provides advanced parsing |
| 5 | abilities for the ParenMatch and other extensions. |
| 6 | The HyperParser uses PyParser. PyParser is intended mostly to give information |
| 7 | on the proper indentation of code. HyperParser gives some information on the |
| 8 | structure of code, used by extensions to help the user. |
| 9 | """ |
| 10 | |
| 11 | import string |
| 12 | import keyword |
| 13 | import PyParse |
| 14 | |
| 15 | class HyperParser: |
| 16 | |
| 17 | def __init__(self, editwin, index): |
| 18 | """Initialize the HyperParser to analyze the surroundings of the given |
| 19 | index. |
| 20 | """ |
| 21 | |
| 22 | self.editwin = editwin |
| 23 | self.text = text = editwin.text |
| 24 | |
| 25 | parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) |
| 26 | |
| 27 | def index2line(index): |
| 28 | return int(float(index)) |
| 29 | lno = index2line(text.index(index)) |
| 30 | |
| 31 | if not editwin.context_use_ps1: |
| 32 | for context in editwin.num_context_lines: |
| 33 | startat = max(lno - context, 1) |
| 34 | startatindex = `startat` + ".0" |
| 35 | stopatindex = "%d.end" % lno |
| 36 | # We add the newline because PyParse requires a newline at end. |
| 37 | # We add a space so that index won't be at end of line, so that |
| 38 | # its status will be the same as the char before it, if should. |
| 39 | parser.set_str(text.get(startatindex, stopatindex)+' \n') |
| 40 | bod = parser.find_good_parse_start( |
| 41 | editwin._build_char_in_string_func(startatindex)) |
| 42 | if bod is not None or startat == 1: |
| 43 | break |
| 44 | parser.set_lo(bod or 0) |
| 45 | else: |
| 46 | r = text.tag_prevrange("console", index) |
| 47 | if r: |
| 48 | startatindex = r[1] |
| 49 | else: |
| 50 | startatindex = "1.0" |
| 51 | stopatindex = "%d.end" % lno |
| 52 | # We add the newline because PyParse requires a newline at end. |
| 53 | # We add a space so that index won't be at end of line, so that |
| 54 | # its status will be the same as the char before it, if should. |
| 55 | parser.set_str(text.get(startatindex, stopatindex)+' \n') |
| 56 | parser.set_lo(0) |
| 57 | |
| 58 | # We want what the parser has, except for the last newline and space. |
| 59 | self.rawtext = parser.str[:-2] |
| 60 | # As far as I can see, parser.str preserves the statement we are in, |
| 61 | # so that stopatindex can be used to synchronize the string with the |
| 62 | # text box indices. |
| 63 | self.stopatindex = stopatindex |
| 64 | self.bracketing = parser.get_last_stmt_bracketing() |
| 65 | # find which pairs of bracketing are openers. These always correspond |
| 66 | # to a character of rawtext. |
| 67 | self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1] |
| 68 | for i in range(len(self.bracketing))] |
| 69 | |
| 70 | self.set_index(index) |
| 71 | |
| 72 | def set_index(self, index): |
| 73 | """Set the index to which the functions relate. Note that it must be |
| 74 | in the same statement. |
| 75 | """ |
| 76 | indexinrawtext = \ |
| 77 | len(self.rawtext) - len(self.text.get(index, self.stopatindex)) |
| 78 | if indexinrawtext < 0: |
| 79 | raise ValueError("The index given is before the analyzed statement") |
| 80 | self.indexinrawtext = indexinrawtext |
| 81 | # find the rightmost bracket to which index belongs |
| 82 | self.indexbracket = 0 |
| 83 | while self.indexbracket < len(self.bracketing)-1 and \ |
| 84 | self.bracketing[self.indexbracket+1][0] < self.indexinrawtext: |
| 85 | self.indexbracket += 1 |
| 86 | if self.indexbracket < len(self.bracketing)-1 and \ |
| 87 | self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \ |
| 88 | not self.isopener[self.indexbracket+1]: |
| 89 | self.indexbracket += 1 |
| 90 | |
| 91 | def is_in_string(self): |
| 92 | """Is the index given to the HyperParser is in a string?""" |
| 93 | # The bracket to which we belong should be an opener. |
| 94 | # If it's an opener, it has to have a character. |
| 95 | return self.isopener[self.indexbracket] and \ |
| 96 | self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'") |
| 97 | |
| 98 | def is_in_code(self): |
| 99 | """Is the index given to the HyperParser is in a normal code?""" |
| 100 | return not self.isopener[self.indexbracket] or \ |
| 101 | self.rawtext[self.bracketing[self.indexbracket][0]] not in \ |
| 102 | ('#', '"', "'") |
| 103 | |
| 104 | def get_surrounding_brackets(self, openers='([{', mustclose=False): |
| 105 | """If the index given to the HyperParser is surrounded by a bracket |
| 106 | defined in openers (or at least has one before it), return the |
| 107 | indices of the opening bracket and the closing bracket (or the |
| 108 | end of line, whichever comes first). |
| 109 | If it is not surrounded by brackets, or the end of line comes before |
| 110 | the closing bracket and mustclose is True, returns None. |
| 111 | """ |
| 112 | bracketinglevel = self.bracketing[self.indexbracket][1] |
| 113 | before = self.indexbracket |
| 114 | while not self.isopener[before] or \ |
| 115 | self.rawtext[self.bracketing[before][0]] not in openers or \ |
| 116 | self.bracketing[before][1] > bracketinglevel: |
| 117 | before -= 1 |
| 118 | if before < 0: |
| 119 | return None |
| 120 | bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) |
| 121 | after = self.indexbracket + 1 |
| 122 | while after < len(self.bracketing) and \ |
| 123 | self.bracketing[after][1] >= bracketinglevel: |
| 124 | after += 1 |
| 125 | |
| 126 | beforeindex = self.text.index("%s-%dc" % |
| 127 | (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) |
| 128 | if after >= len(self.bracketing) or \ |
| 129 | self.bracketing[after][0] > len(self.rawtext): |
| 130 | if mustclose: |
| 131 | return None |
| 132 | afterindex = self.stopatindex |
| 133 | else: |
| 134 | # We are after a real char, so it is a ')' and we give the index |
| 135 | # before it. |
| 136 | afterindex = self.text.index("%s-%dc" % |
| 137 | (self.stopatindex, |
| 138 | len(self.rawtext)-(self.bracketing[after][0]-1))) |
| 139 | |
| 140 | return beforeindex, afterindex |
| 141 | |
| 142 | # This string includes all chars that may be in a white space |
| 143 | _whitespace_chars = " \t\n\\" |
| 144 | # This string includes all chars that may be in an identifier |
| 145 | _id_chars = string.ascii_letters + string.digits + "_" |
| 146 | # This string includes all chars that may be the first char of an identifier |
| 147 | _id_first_chars = string.ascii_letters + "_" |
| 148 | |
| 149 | # Given a string and pos, return the number of chars in the identifier |
| 150 | # which ends at pos, or 0 if there is no such one. Saved words are not |
| 151 | # identifiers. |
| 152 | def _eat_identifier(self, str, limit, pos): |
| 153 | i = pos |
| 154 | while i > limit and str[i-1] in self._id_chars: |
| 155 | i -= 1 |
| 156 | if i < pos and (str[i] not in self._id_first_chars or \ |
| 157 | keyword.iskeyword(str[i:pos])): |
| 158 | i = pos |
| 159 | return pos - i |
| 160 | |
| 161 | def get_expression(self): |
| 162 | """Return a string with the Python expression which ends at the given |
| 163 | index, which is empty if there is no real one. |
| 164 | """ |
| 165 | if not self.is_in_code(): |
| 166 | raise ValueError("get_expression should only be called if index "\ |
| 167 | "is inside a code.") |
| 168 | |
| 169 | rawtext = self.rawtext |
| 170 | bracketing = self.bracketing |
| 171 | |
| 172 | brck_index = self.indexbracket |
| 173 | brck_limit = bracketing[brck_index][0] |
| 174 | pos = self.indexinrawtext |
| 175 | |
| 176 | last_identifier_pos = pos |
| 177 | postdot_phase = True |
| 178 | |
| 179 | while 1: |
| 180 | # Eat whitespaces, comments, and if postdot_phase is False - one dot |
| 181 | while 1: |
| 182 | if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: |
| 183 | # Eat a whitespace |
| 184 | pos -= 1 |
| 185 | elif not postdot_phase and \ |
| 186 | pos > brck_limit and rawtext[pos-1] == '.': |
| 187 | # Eat a dot |
| 188 | pos -= 1 |
| 189 | postdot_phase = True |
| 190 | # The next line will fail if we are *inside* a comment, but we |
| 191 | # shouldn't be. |
| 192 | elif pos == brck_limit and brck_index > 0 and \ |
| 193 | rawtext[bracketing[brck_index-1][0]] == '#': |
| 194 | # Eat a comment |
| 195 | brck_index -= 2 |
| 196 | brck_limit = bracketing[brck_index][0] |
| 197 | pos = bracketing[brck_index+1][0] |
| 198 | else: |
| 199 | # If we didn't eat anything, quit. |
| 200 | break |
| 201 | |
| 202 | if not postdot_phase: |
| 203 | # We didn't find a dot, so the expression end at the last |
| 204 | # identifier pos. |
| 205 | break |
| 206 | |
| 207 | ret = self._eat_identifier(rawtext, brck_limit, pos) |
| 208 | if ret: |
| 209 | # There is an identifier to eat |
| 210 | pos = pos - ret |
| 211 | last_identifier_pos = pos |
| 212 | # Now, in order to continue the search, we must find a dot. |
| 213 | postdot_phase = False |
| 214 | # (the loop continues now) |
| 215 | |
| 216 | elif pos == brck_limit: |
| 217 | # We are at a bracketing limit. If it is a closing bracket, |
| 218 | # eat the bracket, otherwise, stop the search. |
| 219 | level = bracketing[brck_index][1] |
| 220 | while brck_index > 0 and bracketing[brck_index-1][1] > level: |
| 221 | brck_index -= 1 |
| 222 | if bracketing[brck_index][0] == brck_limit: |
| 223 | # We were not at the end of a closing bracket |
| 224 | break |
| 225 | pos = bracketing[brck_index][0] |
| 226 | brck_index -= 1 |
| 227 | brck_limit = bracketing[brck_index][0] |
| 228 | last_identifier_pos = pos |
| 229 | if rawtext[pos] in "([": |
| 230 | # [] and () may be used after an identifier, so we |
| 231 | # continue. postdot_phase is True, so we don't allow a dot. |
| 232 | pass |
| 233 | else: |
| 234 | # We can't continue after other types of brackets |
| 235 | break |
| 236 | |
| 237 | else: |
| 238 | # We've found an operator or something. |
| 239 | break |
| 240 | |
| 241 | return rawtext[last_identifier_pos:self.indexinrawtext] |