| # Tools for info file processing. |
| |
| # XXX Need to be more careful with reading ahead searching for nodes. |
| |
| |
| import regexp |
| import string |
| |
| |
| # Exported exceptions. |
| # |
| NoSuchFile = 'no such file' |
| NoSuchNode = 'no such node' |
| |
| |
| # The search path for info files; this is site-specific. |
| # Directory names should end in a partname delimiter, |
| # so they can simply be concatenated to a relative pathname. |
| # |
| #INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac |
| INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX |
| |
| |
| # Tunable constants. |
| # |
| BLOCKSIZE = 512 # Qty to align reads to, if possible |
| FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node |
| CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data |
| |
| |
| # Regular expressions used. |
| # Note that it is essential that Python leaves unrecognized backslash |
| # escapes in a string so they can be seen by regexp.compile! |
| # |
| findheader = regexp.compile('\037\014?\n(.*\n)').match |
| findescape = regexp.compile('\037').match |
| parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match |
| findfirstline = regexp.compile('^.*\n').match |
| findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match |
| findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match |
| findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match |
| findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match |
| findmenu = regexp.compile('^\* [mM]enu:').match |
| findmenuitem = regexp.compile( \ |
| '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match |
| findfootnote = regexp.compile( \ |
| '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match |
| parsenoderef = regexp.compile('^\((.*)\)(.*)$').match |
| |
| |
| # Get a node and all information pertaining to it. |
| # This doesn't work if there is an indirect tag table, |
| # and in general you are better off using icache.get_node() instead. |
| # Functions get_whole_file() and get_file_node() provide part |
| # functionality used by icache. |
| # Raise NoSuchFile or NoSuchNode as appropriate. |
| # |
| def get_node(curfile, ref): |
| file, node = parse_ref(curfile, ref) |
| if node == '*': |
| return get_whole_file(file) |
| else: |
| return get_file_node(file, 0, node) |
| # |
| def get_whole_file(file): |
| f = try_open(file) # May raise NoSuchFile |
| text = f.read() |
| header, menu, footnotes = ('', '', ''), [], [] |
| return file, '*', header, menu, footnotes, text |
| # |
| def get_file_node(file, offset, node): |
| f = try_open(file) # May raise NoSuchFile |
| text = find_node(f, offset, node) # May raise NoSuchNode |
| node, header, menu, footnotes = analyze_node(text) |
| return file, node, header, menu, footnotes, text |
| |
| |
| # Parse a node reference into a file (possibly default) and node name. |
| # Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE". |
| # Default file is the curfile argument; default node is Top. |
| # A node value of '*' is a special case: the whole file should |
| # be interpreted (by the caller!) as a single node. |
| # |
| def parse_ref(curfile, ref): |
| match = parsenoderef(ref) |
| if not match: |
| file, node = curfile, ref |
| else: |
| (a, b), (a1, b1), (a2, b2) = match |
| file, node = ref[a1:b1], ref[a2:b2] |
| if not file: |
| file = curfile # (Is this necessary?) |
| if not node: |
| node = 'Top' |
| return file, node |
| |
| |
| # Extract node name, links, menu and footnotes from the node text. |
| # |
| def analyze_node(text): |
| # |
| # Get node name and links from the header line |
| # |
| match = findfirstline(text) |
| if match: |
| (a, b) = match[0] |
| line = text[a:b] |
| else: |
| line = '' |
| node = get_it(text, findnode) |
| prev = get_it(text, findprev) |
| next = get_it(text, findnext) |
| up = get_it(text, findup) |
| # |
| # Get the menu items, if there is a menu |
| # |
| menu = [] |
| match = findmenu(text) |
| if match: |
| (a, b) = match[0] |
| while 1: |
| match = findmenuitem(text, b) |
| if not match: |
| break |
| (a, b), (a1, b1), (a2, b2) = match |
| topic, ref = text[a1:b1], text[a2:b2] |
| if ref == ':': |
| ref = topic |
| menu.append((topic, ref)) |
| # |
| # Get the footnotes |
| # |
| footnotes = [] |
| b = 0 |
| while 1: |
| match = findfootnote(text, b) |
| if not match: |
| break |
| (a, b), (a1, b1), (a2, b2) = match |
| topic, ref = text[a1:b1], text[a2:b2] |
| if ref == ':': |
| ref = topic |
| footnotes.append((topic, ref)) |
| # |
| return node, (prev, next, up), menu, footnotes |
| # |
| def get_it(line, matcher): |
| match = matcher(line) |
| if not match: |
| return '' |
| else: |
| (a, b), (a1, b1) = match |
| return line[a1:b1] |
| |
| |
| # Find a node in an open file. |
| # The offset (from the tags table) is a hint about the node's position. |
| # Pass zero if there is no tags table. |
| # Raise NoSuchNode if the node isn't found. |
| # NB: This seeks around in the file. |
| # |
| def find_node(f, offset, node): |
| node = string.lower(node) # Just to be sure |
| # |
| # Position a little before the given offset, |
| # so we may find the node even if it has moved around |
| # in the file a little. |
| # |
| offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE) |
| f.seek(offset) |
| # |
| # Loop, hunting for a matching node header. |
| # |
| while 1: |
| buf = f.read(CHUNKSIZE) |
| if not buf: |
| break |
| i = 0 |
| while 1: |
| match = findheader(buf, i) |
| if match: |
| (a,b), (a1,b1) = match |
| start = a1 |
| line = buf[a1:b1] |
| i = b |
| match = parseheader(line) |
| if match: |
| (a,b), (a1,b1) = match |
| key = string.lower(line[a1:b1]) |
| if key == node: |
| # Got it! Now read the rest. |
| return read_node(f, buf[start:]) |
| elif findescape(buf, i): |
| next = f.read(CHUNKSIZE) |
| if not next: |
| break |
| buf = buf + next |
| else: |
| break |
| # |
| # If we get here, we didn't find it. Too bad. |
| # |
| raise NoSuchNode, node |
| |
| |
| # Finish off getting a node (subroutine for find_node()). |
| # The node begins at the start of buf and may end in buf; |
| # if it doesn't end there, read additional data from f. |
| # |
| def read_node(f, buf): |
| i = 0 |
| match = findescape(buf, i) |
| while not match: |
| next = f.read(CHUNKSIZE) |
| if not next: |
| end = len(buf) |
| break |
| i = len(buf) |
| buf = buf + next |
| match = findescape(buf, i) |
| else: |
| # Got a match |
| (a, b) = match[0] |
| end = a |
| # Strip trailing newlines |
| while end > 0 and buf[end-1] == '\n': |
| end = end-1 |
| buf = buf[:end] |
| return buf |
| |
| |
| # Read reverse starting at offset until the beginning of a node is found. |
| # Then return a buffer containing the beginning of the node, |
| # with f positioned just after the buffer. |
| # The buffer will contain at least the full header line of the node; |
| # the caller should finish off with read_node() if it is the right node. |
| # (It is also possible that the buffer extends beyond the node!) |
| # Return an empty string if there is no node before the given offset. |
| # |
| def backup_node(f, offset): |
| start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE) |
| end = offset |
| while start < end: |
| f.seek(start) |
| buf = f.read(end-start) |
| i = 0 |
| hit = -1 |
| while 1: |
| match = findheader(buf, i) |
| if match: |
| (a,b), (a1,b1) = match |
| hit = a1 |
| i = b |
| elif end < offset and findescape(buf, i): |
| next = f.read(min(offset-end, BLOCKSIZE)) |
| if not next: |
| break |
| buf = buf + next |
| end = end + len(next) |
| else: |
| break |
| if hit >= 0: |
| return buf[hit:] |
| end = start |
| start = max(0, end - CHUNKSIZE) |
| return '' |
| |
| |
| # Make a tag table for the given file by scanning the file. |
| # The file must be open for reading, and positioned at the beginning |
| # (or wherever the hunt for tags must begin; it is read till the end). |
| # |
| def make_tags(f): |
| tags = {} |
| while 1: |
| offset = f.tell() |
| buf = f.read(CHUNKSIZE) |
| if not buf: |
| break |
| i = 0 |
| while 1: |
| match = findheader(buf, i) |
| if match: |
| (a,b), (a1,b1) = match |
| start = offset+a1 |
| line = buf[a1:b1] |
| i = b |
| match = parseheader(line) |
| if match: |
| (a,b), (a1,b1) = match |
| key = string.lower(line[a1:b1]) |
| if tags.has_key(key): |
| print 'Duplicate node:', |
| print key |
| tags[key] = '', start, line |
| elif findescape(buf, i): |
| next = f.read(CHUNKSIZE) |
| if not next: |
| break |
| buf = buf + next |
| else: |
| break |
| return tags |
| |
| |
| # Try to open a file, return a file object if succeeds. |
| # Raise NoSuchFile if the file can't be opened. |
| # Should treat absolute pathnames special. |
| # |
| def try_open(file): |
| for dir in INFOPATH: |
| try: |
| return open(dir + file, 'r') |
| except IOError: |
| pass |
| raise NoSuchFile, file |
| |
| |
| # A little test for the speed of make_tags(). |
| # |
| TESTFILE = 'texinfo-1' |
| def test_make_tags(): |
| import time |
| f = try_open(TESTFILE) |
| t1 = time.time() |
| tags = make_tags(f) |
| t2 = time.time() |
| print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.' |