| """Iterator based sre token scanner | 
 |  | 
 | """ | 
 |  | 
 | import re | 
 | import sre_parse | 
 | import sre_compile | 
 | import sre_constants | 
 |  | 
 | from re import VERBOSE, MULTILINE, DOTALL | 
 | from sre_constants import BRANCH, SUBPATTERN | 
 |  | 
 | __all__ = ['Scanner', 'pattern'] | 
 |  | 
 | FLAGS = (VERBOSE | MULTILINE | DOTALL) | 
 |  | 
 | class Scanner(object): | 
 |     def __init__(self, lexicon, flags=FLAGS): | 
 |         self.actions = [None] | 
 |         # Combine phrases into a compound pattern | 
 |         s = sre_parse.Pattern() | 
 |         s.flags = flags | 
 |         p = [] | 
 |         for idx, token in enumerate(lexicon): | 
 |             phrase = token.pattern | 
 |             try: | 
 |                 subpattern = sre_parse.SubPattern(s, | 
 |                     [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) | 
 |             except sre_constants.error: | 
 |                 raise | 
 |             p.append(subpattern) | 
 |             self.actions.append(token) | 
 |  | 
 |         s.groups = len(p) + 1 # NOTE(guido): Added to make SRE validation work | 
 |         p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) | 
 |         self.scanner = sre_compile.compile(p) | 
 |  | 
 |     def iterscan(self, string, idx=0, context=None): | 
 |         """Yield match, end_idx for each match | 
 |  | 
 |         """ | 
 |         match = self.scanner.scanner(string, idx).match | 
 |         actions = self.actions | 
 |         lastend = idx | 
 |         end = len(string) | 
 |         while True: | 
 |             m = match() | 
 |             if m is None: | 
 |                 break | 
 |             matchbegin, matchend = m.span() | 
 |             if lastend == matchend: | 
 |                 break | 
 |             action = actions[m.lastindex] | 
 |             if action is not None: | 
 |                 rval, next_pos = action(m, context) | 
 |                 if next_pos is not None and next_pos != matchend: | 
 |                     # "fast forward" the scanner | 
 |                     matchend = next_pos | 
 |                     match = self.scanner.scanner(string, matchend).match | 
 |                 yield rval, matchend | 
 |             lastend = matchend | 
 |  | 
 |  | 
 | def pattern(pattern, flags=FLAGS): | 
 |     def decorator(fn): | 
 |         fn.pattern = pattern | 
 |         fn.regex = re.compile(pattern, flags) | 
 |         return fn | 
 |     return decorator |