blob: 2e034301c56eae31ad1090e4a7625df5cf3b26b9 [file] [log] [blame]
Greg Wardd1dc4751999-01-13 16:12:04 +00001"""text_file
2
3provides the TextFile class, which gives an interface to text files
4that (optionally) takes care of stripping comments, ignoring blank
5lines, and joining lines with backslashes."""
6
7# created 1999/01/12, Greg Ward
8
9__revision__ = "$Id$"
10
11from types import *
Greg Wardf6cdcd51999-01-18 17:08:16 +000012import sys, os, string, re
Greg Wardd1dc4751999-01-13 16:12:04 +000013
14
15class TextFile:
Greg Wardd1dc4751999-01-13 16:12:04 +000016
17 default_options = { 'strip_comments': 1,
Greg Wardd1dc4751999-01-13 16:12:04 +000018 'skip_blanks': 1,
19 'join_lines': 0,
20 'lstrip_ws': 0,
21 'rstrip_ws': 1,
Greg Warddb75afe1999-03-08 21:46:11 +000022 'collapse_ws': 0,
Greg Wardd1dc4751999-01-13 16:12:04 +000023 }
24
Greg Ward782cdfe1999-03-23 14:00:06 +000025 def __init__ (self, filename=None, file=None, **options):
26
27 if filename is None and file is None:
28 raise RuntimeError, \
29 "you must supply either or both of 'filename' and 'file'"
Greg Wardd1dc4751999-01-13 16:12:04 +000030
31 # set values for all options -- either from client option hash
32 # or fallback to default_options
33 for opt in self.default_options.keys():
34 if options.has_key (opt):
Greg Wardabc2f961999-08-10 20:09:38 +000035 setattr (self, opt, options[opt])
Greg Wardd1dc4751999-01-13 16:12:04 +000036
37 else:
38 setattr (self, opt, self.default_options[opt])
39
40 # sanity check client option hash
41 for opt in options.keys():
42 if not self.default_options.has_key (opt):
43 raise KeyError, "invalid TextFile option '%s'" % opt
44
Greg Ward782cdfe1999-03-23 14:00:06 +000045 if file is None:
46 self.open (filename)
47 else:
48 self.filename = filename
49 self.file = file
50 self.current_line = 0 # assuming that file is at BOF!
Greg Ward787451b1999-03-26 21:48:59 +000051
Greg Ward91c488c1999-03-29 18:01:49 +000052 # 'linebuf' is a stack of lines that will be emptied before we
53 # actually read from the file; it's only populated by an
54 # 'unreadline()' operation
55 self.linebuf = []
Greg Wardd1dc4751999-01-13 16:12:04 +000056
57
Greg Ward782cdfe1999-03-23 14:00:06 +000058 def open (self, filename):
59 self.filename = filename
Greg Wardd1dc4751999-01-13 16:12:04 +000060 self.file = open (self.filename, 'r')
61 self.current_line = 0
62
63
64 def close (self):
65 self.file.close ()
66 self.file = None
67 self.filename = None
68 self.current_line = None
69
70
Greg Ward4e5a7c71999-04-15 17:50:19 +000071 def warn (self, msg, line=None):
72 if line is None:
73 line = self.current_line
Greg Wardf6cdcd51999-01-18 17:08:16 +000074 sys.stderr.write (self.filename + ", ")
Greg Wardabc2f961999-08-10 20:09:38 +000075 if type (line) in (ListType, TupleType):
Greg Ward4e5a7c71999-04-15 17:50:19 +000076 sys.stderr.write ("lines %d-%d: " % tuple (line))
Greg Wardf6cdcd51999-01-18 17:08:16 +000077 else:
Greg Ward4e5a7c71999-04-15 17:50:19 +000078 sys.stderr.write ("line %d: " % line)
Greg Wardabc2f961999-08-10 20:09:38 +000079 sys.stderr.write (str (msg) + "\n")
Greg Wardf6cdcd51999-01-18 17:08:16 +000080
81
Greg Wardd1dc4751999-01-13 16:12:04 +000082 def readline (self):
83
Greg Ward91c488c1999-03-29 18:01:49 +000084 # If any "unread" lines waiting in 'linebuf', return the top
85 # one. (We don't actually buffer read-ahead data -- lines only
86 # get put in 'linebuf' if the client explicitly does an
87 # 'unreadline()'.
88 if self.linebuf:
89 line = self.linebuf[-1]
90 del self.linebuf[-1]
91 return line
92
Greg Wardd1dc4751999-01-13 16:12:04 +000093 buildup_line = ''
94
95 while 1:
Greg Wardabc2f961999-08-10 20:09:38 +000096 # read the line, make it None if EOF
Greg Wardd1dc4751999-01-13 16:12:04 +000097 line = self.file.readline()
Greg Wardabc2f961999-08-10 20:09:38 +000098 if line == '': line = None
99
Greg Wardd1dc4751999-01-13 16:12:04 +0000100 if self.strip_comments and line:
Greg Wardabc2f961999-08-10 20:09:38 +0000101
102 # Look for the first "#" in the line. If none, never
103 # mind. If we find one and it's the first character, or
104 # is not preceded by "\", then it starts a comment --
105 # strip the comment, strip whitespace before it, and
106 # carry on. Otherwise, it's just an escaped "#", so
107 # unescape it (and any other escaped "#"'s that might be
108 # lurking in there) and otherwise leave the line alone.
109
110 pos = string.find (line, "#")
111 if pos == -1: # no "#" -- no comments
112 pass
113 elif pos == 0 or line[pos-1] != "\\": # it's a comment
114 # Have to preserve the trailing newline; if
115 # stripping comments resulted in an empty line, we'd
116 # have no way to distinguish end-of-file! (NB. this
117 # means that if the final line is all comment and
118 # has to trailing newline, we will think that it's
119 # EOF; I think that's OK.)
120 has_newline = (line[-1] == '\n')
121 line = line[0:pos]
122 if has_newline: line = line + '\n'
123
124 else: # it's an escaped "#"
125 line = string.replace (line, "\\#", "#")
126
Greg Wardd1dc4751999-01-13 16:12:04 +0000127
128 # did previous line end with a backslash? then accumulate
129 if self.join_lines and buildup_line:
130 # oops: end of file
Greg Wardabc2f961999-08-10 20:09:38 +0000131 if line is None:
Greg Wardd1dc4751999-01-13 16:12:04 +0000132 self.warn ("continuation line immediately precedes "
133 "end-of-file")
134 return buildup_line
135
136 line = buildup_line + line
137
138 # careful: pay attention to line number when incrementing it
139 if type (self.current_line) is ListType:
140 self.current_line[1] = self.current_line[1] + 1
141 else:
142 self.current_line = [self.current_line, self.current_line+1]
143 # just an ordinary line, read it as usual
144 else:
Greg Wardabc2f961999-08-10 20:09:38 +0000145 if line is None: # eof
Greg Wardd1dc4751999-01-13 16:12:04 +0000146 return None
147
148 # still have to be careful about incrementing the line number!
149 if type (self.current_line) is ListType:
150 self.current_line = self.current_line[1] + 1
151 else:
152 self.current_line = self.current_line + 1
Greg Ward91c488c1999-03-29 18:01:49 +0000153
Greg Wardd1dc4751999-01-13 16:12:04 +0000154
155 # strip whitespace however the client wants (leading and
156 # trailing, or one or the other, or neither)
157 if self.lstrip_ws and self.rstrip_ws:
158 line = string.strip (line)
159 else:
160 if self.lstrip_ws:
161 line = string.lstrip (line)
162 if self.rstrip_ws:
163 line = string.rstrip (line)
164
165 # blank line (whether we rstrip'ed or not)? skip to next line
166 # if appropriate
167 if line == '' or line == '\n' and self.skip_blanks:
168 continue
169
170 if self.join_lines:
171 if line[-1] == '\\':
172 buildup_line = line[:-1]
173 continue
174
175 if line[-2:] == '\\\n':
176 buildup_line = line[0:-2] + '\n'
177 continue
178
Greg Warddb75afe1999-03-08 21:46:11 +0000179 # collapse internal whitespace (*after* joining lines!)
180 if self.collapse_ws:
181 line = re.sub (r'(\S)\s+(\S)', r'\1 \2', line)
182
Greg Wardd1dc4751999-01-13 16:12:04 +0000183 # well, I guess there's some actual content there: return it
184 return line
185
186 # end readline
187
188
Greg Wardd1dc4751999-01-13 16:12:04 +0000189 def readlines (self):
190 lines = []
191 while 1:
192 line = self.readline()
193 if line is None:
194 return lines
195 lines.append (line)
196
197
Greg Ward91c488c1999-03-29 18:01:49 +0000198 def unreadline (self, line):
199 self.linebuf.append (line)
200
201
Greg Wardd1dc4751999-01-13 16:12:04 +0000202if __name__ == "__main__":
203 test_data = """# test file
204
205line 3 \\
206continues on next line
207"""
208
209 # result 1: no fancy options
210 result1 = map (lambda x: x + "\n", string.split (test_data, "\n")[0:-1])
211
212 # result 2: just strip comments
213 result2 = ["\n", "\n", "line 3 \\\n", "continues on next line\n"]
214
215 # result 3: just strip blank lines
216 result3 = ["# test file\n", "line 3 \\\n", "continues on next line\n"]
217
218 # result 4: default, strip comments, blank lines, and trailing whitespace
219 result4 = ["line 3 \\", "continues on next line"]
220
221 # result 5: full processing, strip comments and blanks, plus join lines
222 result5 = ["line 3 continues on next line"]
223
224 def test_input (count, description, file, expected_result):
225 result = file.readlines ()
226 # result = string.join (result, '')
227 if result == expected_result:
228 print "ok %d (%s)" % (count, description)
229 else:
230 print "not ok %d (%s):" % (count, description)
231 print "** expected:"
232 print expected_result
233 print "** received:"
234 print result
235
236
237 filename = "test.txt"
238 out_file = open (filename, "w")
239 out_file.write (test_data)
240 out_file.close ()
241
242 in_file = TextFile (filename, strip_comments=0, skip_blanks=0,
Greg Wardabc2f961999-08-10 20:09:38 +0000243 lstrip_ws=0, rstrip_ws=0)
Greg Wardd1dc4751999-01-13 16:12:04 +0000244 test_input (1, "no processing", in_file, result1)
245
246 in_file = TextFile (filename, strip_comments=1, skip_blanks=0,
Greg Wardabc2f961999-08-10 20:09:38 +0000247 lstrip_ws=0, rstrip_ws=0)
Greg Wardd1dc4751999-01-13 16:12:04 +0000248 test_input (2, "strip comments", in_file, result2)
249
250 in_file = TextFile (filename, strip_comments=0, skip_blanks=1,
Greg Wardabc2f961999-08-10 20:09:38 +0000251 lstrip_ws=0, rstrip_ws=0)
Greg Wardd1dc4751999-01-13 16:12:04 +0000252 test_input (3, "strip blanks", in_file, result3)
253
254 in_file = TextFile (filename)
255 test_input (4, "default processing", in_file, result4)
256
257 in_file = TextFile (filename, strip_comments=1, skip_blanks=1,
258 join_lines=1, rstrip_ws=1)
259 test_input (5, "full processing", in_file, result5)
260
261 os.remove (filename)
262