Blame - Demo/ibrowse/ifile.py - platform/external/python/cpython3

blob: 7e66fcdf00afa077bbb525e8b8db5cb63cb9b870 [file] [log] [blame]

Guido van Rossum	5dd52d3	1995-04-10 11:47:11 +0000	[diff] [blame]	1	# Tools for info file processing.
				2
				3	# XXX Need to be more careful with reading ahead searching for nodes.
				4
				5
				6	import regexp
				7	import string
				8
				9
				10	# Exported exceptions.
				11	#
				12	NoSuchFile = 'no such file'
				13	NoSuchNode = 'no such node'
				14
				15
				16	# The search path for info files; this is site-specific.
				17	# Directory names should end in a partname delimiter,
				18	# so they can simply be concatenated to a relative pathname.
				19	#
				20	#INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac
				21	INFOPATH = ['', '/usr/local/emacs/info/'] # X11 on UNIX
				22
				23
				24	# Tunable constants.
				25	#
				26	BLOCKSIZE = 512 # Qty to align reads to, if possible
				27	FUZZ = 2*BLOCKSIZE # Qty to back-up before searching for a node
				28	CHUNKSIZE = 4*BLOCKSIZE # Qty to read at once when reading lots of data
				29
				30
				31	# Regular expressions used.
				32	# Note that it is essential that Python leaves unrecognized backslash
				33	# escapes in a string so they can be seen by regexp.compile!
				34	#
				35	findheader = regexp.compile('\037\014?\n(.*\n)').match
				36	findescape = regexp.compile('\037').match
				37	parseheader = regexp.compile('[nN]ode:[ \t]([^\t,\n])').match
				38	findfirstline = regexp.compile('^.*\n').match
				39	findnode = regexp.compile('[nN]ode:[ \t]([^\t,\n])').match
				40	findprev = regexp.compile('[pP]rev[ious]:[ \t]([^\t,\n]*)').match
				41	findnext = regexp.compile('[nN]ext:[ \t]([^\t,\n])').match
				42	findup = regexp.compile('[uU]p:[ \t]([^\t,\n])').match
				43	findmenu = regexp.compile('^\* [mM]enu:').match
				44	findmenuitem = regexp.compile( \
				45	'^\* ([^:]+):[ \t](:\|$[^\t]$[^\t,\n.]\|[^:(][^\t,\n.])').match
				46	findfootnote = regexp.compile( \
				47	'\[nN]ote ([^:]+):[ \t](:\|[^:][^\t,\n.]*)').match
				48	parsenoderef = regexp.compile('^$(.)$(.)$').match
				49
				50
				51	# Get a node and all information pertaining to it.
				52	# This doesn't work if there is an indirect tag table,
				53	# and in general you are better off using icache.get_node() instead.
				54	# Functions get_whole_file() and get_file_node() provide part
				55	# functionality used by icache.
				56	# Raise NoSuchFile or NoSuchNode as appropriate.
				57	#
				58	def get_node(curfile, ref):
				59	file, node = parse_ref(curfile, ref)
				60	if node == '*':
				61	return get_whole_file(file)
				62	else:
				63	return get_file_node(file, 0, node)
				64	#
				65	def get_whole_file(file):
				66	f = try_open(file) # May raise NoSuchFile
				67	text = f.read()
				68	header, menu, footnotes = ('', '', ''), [], []
				69	return file, '*', header, menu, footnotes, text
				70	#
				71	def get_file_node(file, offset, node):
				72	f = try_open(file) # May raise NoSuchFile
				73	text = find_node(f, offset, node) # May raise NoSuchNode
				74	node, header, menu, footnotes = analyze_node(text)
				75	return file, node, header, menu, footnotes, text
				76
				77
				78	# Parse a node reference into a file (possibly default) and node name.
				79	# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
				80	# Default file is the curfile argument; default node is Top.
				81	# A node value of '*' is a special case: the whole file should
				82	# be interpreted (by the caller!) as a single node.
				83	#
				84	def parse_ref(curfile, ref):
				85	match = parsenoderef(ref)
				86	if not match:
				87	file, node = curfile, ref
				88	else:
				89	(a, b), (a1, b1), (a2, b2) = match
				90	file, node = ref[a1:b1], ref[a2:b2]
				91	if not file:
				92	file = curfile # (Is this necessary?)
				93	if not node:
				94	node = 'Top'
				95	return file, node
				96
				97
				98	# Extract node name, links, menu and footnotes from the node text.
				99	#
				100	def analyze_node(text):
				101	#
				102	# Get node name and links from the header line
				103	#
				104	match = findfirstline(text)
				105	if match:
				106	(a, b) = match[0]
				107	line = text[a:b]
				108	else:
				109	line = ''
				110	node = get_it(text, findnode)
				111	prev = get_it(text, findprev)
				112	next = get_it(text, findnext)
				113	up = get_it(text, findup)
				114	#
				115	# Get the menu items, if there is a menu
				116	#
				117	menu = []
				118	match = findmenu(text)
				119	if match:
				120	(a, b) = match[0]
				121	while 1:
				122	match = findmenuitem(text, b)
				123	if not match:
				124	break
				125	(a, b), (a1, b1), (a2, b2) = match
				126	topic, ref = text[a1:b1], text[a2:b2]
				127	if ref == ':':
				128	ref = topic
Guido van Rossum	9c2c1e8	1998-10-08 15:24:48 +0000	[diff] [blame]	129	menu.append((topic, ref))
Guido van Rossum	5dd52d3	1995-04-10 11:47:11 +0000	[diff] [blame]	130	#
				131	# Get the footnotes
				132	#
				133	footnotes = []
				134	b = 0
				135	while 1:
				136	match = findfootnote(text, b)
				137	if not match:
				138	break
				139	(a, b), (a1, b1), (a2, b2) = match
				140	topic, ref = text[a1:b1], text[a2:b2]
				141	if ref == ':':
				142	ref = topic
Guido van Rossum	9c2c1e8	1998-10-08 15:24:48 +0000	[diff] [blame]	143	footnotes.append((topic, ref))
Guido van Rossum	5dd52d3	1995-04-10 11:47:11 +0000	[diff] [blame]	144	#
				145	return node, (prev, next, up), menu, footnotes
				146	#
				147	def get_it(line, matcher):
				148	match = matcher(line)
				149	if not match:
				150	return ''
				151	else:
				152	(a, b), (a1, b1) = match
				153	return line[a1:b1]
				154
				155
				156	# Find a node in an open file.
				157	# The offset (from the tags table) is a hint about the node's position.
				158	# Pass zero if there is no tags table.
				159	# Raise NoSuchNode if the node isn't found.
				160	# NB: This seeks around in the file.
				161	#
				162	def find_node(f, offset, node):
				163	node = string.lower(node) # Just to be sure
				164	#
				165	# Position a little before the given offset,
				166	# so we may find the node even if it has moved around
				167	# in the file a little.
				168	#
				169	offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
				170	f.seek(offset)
				171	#
				172	# Loop, hunting for a matching node header.
				173	#
				174	while 1:
				175	buf = f.read(CHUNKSIZE)
				176	if not buf:
				177	break
				178	i = 0
				179	while 1:
				180	match = findheader(buf, i)
				181	if match:
				182	(a,b), (a1,b1) = match
				183	start = a1
				184	line = buf[a1:b1]
				185	i = b
				186	match = parseheader(line)
				187	if match:
				188	(a,b), (a1,b1) = match
				189	key = string.lower(line[a1:b1])
				190	if key == node:
				191	# Got it! Now read the rest.
				192	return read_node(f, buf[start:])
				193	elif findescape(buf, i):
				194	next = f.read(CHUNKSIZE)
				195	if not next:
				196	break
				197	buf = buf + next
				198	else:
				199	break
				200	#
				201	# If we get here, we didn't find it. Too bad.
				202	#
				203	raise NoSuchNode, node
				204
				205
				206	# Finish off getting a node (subroutine for find_node()).
				207	# The node begins at the start of buf and may end in buf;
				208	# if it doesn't end there, read additional data from f.
				209	#
				210	def read_node(f, buf):
				211	i = 0
				212	match = findescape(buf, i)
				213	while not match:
				214	next = f.read(CHUNKSIZE)
				215	if not next:
				216	end = len(buf)
				217	break
				218	i = len(buf)
				219	buf = buf + next
				220	match = findescape(buf, i)
				221	else:
				222	# Got a match
				223	(a, b) = match[0]
				224	end = a
				225	# Strip trailing newlines
				226	while end > 0 and buf[end-1] == '\n':
				227	end = end-1
				228	buf = buf[:end]
				229	return buf
				230
				231
				232	# Read reverse starting at offset until the beginning of a node is found.
				233	# Then return a buffer containing the beginning of the node,
				234	# with f positioned just after the buffer.
				235	# The buffer will contain at least the full header line of the node;
				236	# the caller should finish off with read_node() if it is the right node.
				237	# (It is also possible that the buffer extends beyond the node!)
				238	# Return an empty string if there is no node before the given offset.
				239	#
				240	def backup_node(f, offset):
				241	start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
				242	end = offset
				243	while start < end:
				244	f.seek(start)
				245	buf = f.read(end-start)
				246	i = 0
				247	hit = -1
				248	while 1:
				249	match = findheader(buf, i)
				250	if match:
				251	(a,b), (a1,b1) = match
				252	hit = a1
				253	i = b
				254	elif end < offset and findescape(buf, i):
				255	next = f.read(min(offset-end, BLOCKSIZE))
				256	if not next:
				257	break
				258	buf = buf + next
				259	end = end + len(next)
				260	else:
				261	break
				262	if hit >= 0:
				263	return buf[hit:]
				264	end = start
				265	start = max(0, end - CHUNKSIZE)
				266	return ''
				267
				268
				269	# Make a tag table for the given file by scanning the file.
				270	# The file must be open for reading, and positioned at the beginning
				271	# (or wherever the hunt for tags must begin; it is read till the end).
				272	#
				273	def make_tags(f):
				274	tags = {}
				275	while 1:
				276	offset = f.tell()
				277	buf = f.read(CHUNKSIZE)
				278	if not buf:
				279	break
				280	i = 0
				281	while 1:
				282	match = findheader(buf, i)
				283	if match:
				284	(a,b), (a1,b1) = match
				285	start = offset+a1
				286	line = buf[a1:b1]
				287	i = b
				288	match = parseheader(line)
				289	if match:
				290	(a,b), (a1,b1) = match
				291	key = string.lower(line[a1:b1])
				292	if tags.has_key(key):
				293	print 'Duplicate node:',
				294	print key
				295	tags[key] = '', start, line
				296	elif findescape(buf, i):
				297	next = f.read(CHUNKSIZE)
				298	if not next:
				299	break
				300	buf = buf + next
				301	else:
				302	break
				303	return tags
				304
				305
				306	# Try to open a file, return a file object if succeeds.
				307	# Raise NoSuchFile if the file can't be opened.
				308	# Should treat absolute pathnames special.
				309	#
				310	def try_open(file):
				311	for dir in INFOPATH:
				312	try:
				313	return open(dir + file, 'r')
				314	except IOError:
				315	pass
				316	raise NoSuchFile, file
				317
				318
				319	# A little test for the speed of make_tags().
				320	#
				321	TESTFILE = 'texinfo-1'
				322	def test_make_tags():
				323	import time
				324	f = try_open(TESTFILE)
				325	t1 = time.time()
				326	tags = make_tags(f)
				327	t2 = time.time()
				328	print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'