blob: 690cb80f3ccbb20480636ff9d815fa421b60e923 [file] [log] [blame]
Greg Wardd1dc4751999-01-13 16:12:04 +00001"""text_file
2
3provides the TextFile class, which gives an interface to text files
4that (optionally) takes care of stripping comments, ignoring blank
5lines, and joining lines with backslashes."""
6
Greg Wardd1dc4751999-01-13 16:12:04 +00007__revision__ = "$Id$"
8
Tarek Ziadé2b66da72009-12-21 01:22:46 +00009import sys
Greg Wardd1dc4751999-01-13 16:12:04 +000010
11
12class TextFile:
Greg Wardd1dc4751999-01-13 16:12:04 +000013
Greg Ward274ad9d1999-09-29 13:03:32 +000014 """Provides a file-like object that takes care of all the things you
15 commonly want to do when processing a text file that has some
Greg Ward60cd2862000-09-16 18:04:55 +000016 line-by-line syntax: strip comments (as long as "#" is your
17 comment character), skip blank lines, join adjacent lines by
18 escaping the newline (ie. backslash at end of line), strip
19 leading and/or trailing whitespace. All of these are optional
20 and independently controllable.
Greg Ward274ad9d1999-09-29 13:03:32 +000021
22 Provides a 'warn()' method so you can generate warning messages that
23 report physical line number, even if the logical line in question
24 spans multiple physical lines. Also provides 'unreadline()' for
25 implementing line-at-a-time lookahead.
26
27 Constructor is called as:
28
29 TextFile (filename=None, file=None, **options)
30
31 It bombs (RuntimeError) if both 'filename' and 'file' are None;
32 'filename' should be a string, and 'file' a file object (or
33 something that provides 'readline()' and 'close()' methods). It is
34 recommended that you supply at least 'filename', so that TextFile
35 can include it in warning messages. If 'file' is not supplied,
36 TextFile creates its own using the 'open()' builtin.
37
38 The options are all boolean, and affect the value returned by
39 'readline()':
40 strip_comments [default: true]
41 strip from "#" to end-of-line, as well as any whitespace
42 leading up to the "#" -- unless it is escaped by a backslash
43 lstrip_ws [default: false]
44 strip leading whitespace from each line before returning it
45 rstrip_ws [default: true]
46 strip trailing whitespace (including line terminator!) from
47 each line before returning it
48 skip_blanks [default: true}
49 skip lines that are empty *after* stripping comments and
Greg Ward60cd2862000-09-16 18:04:55 +000050 whitespace. (If both lstrip_ws and rstrip_ws are false,
Greg Ward274ad9d1999-09-29 13:03:32 +000051 then some lines may consist of solely whitespace: these will
52 *not* be skipped, even if 'skip_blanks' is true.)
53 join_lines [default: false]
54 if a backslash is the last non-newline character on a line
55 after stripping comments and whitespace, join the following line
56 to it to form one "logical line"; if N consecutive lines end
57 with a backslash, then N+1 physical lines will be joined to
58 form one logical line.
Greg Ward60cd2862000-09-16 18:04:55 +000059 collapse_join [default: false]
60 strip leading whitespace from lines that are joined to their
61 predecessor; only matters if (join_lines and not lstrip_ws)
Greg Ward274ad9d1999-09-29 13:03:32 +000062
63 Note that since 'rstrip_ws' can strip the trailing newline, the
64 semantics of 'readline()' must differ from those of the builtin file
65 object's 'readline()' method! In particular, 'readline()' returns
66 None for end-of-file: an empty string might just be a blank line (or
67 an all-whitespace line), if 'rstrip_ws' is true but 'skip_blanks' is
68 not."""
69
Greg Wardd1dc4751999-01-13 16:12:04 +000070 default_options = { 'strip_comments': 1,
Greg Wardd1dc4751999-01-13 16:12:04 +000071 'skip_blanks': 1,
Greg Wardd1dc4751999-01-13 16:12:04 +000072 'lstrip_ws': 0,
73 'rstrip_ws': 1,
Greg Ward60cd2862000-09-16 18:04:55 +000074 'join_lines': 0,
75 'collapse_join': 0,
Greg Wardd1dc4751999-01-13 16:12:04 +000076 }
77
Greg Ward782cdfe1999-03-23 14:00:06 +000078 def __init__ (self, filename=None, file=None, **options):
Greg Ward274ad9d1999-09-29 13:03:32 +000079 """Construct a new TextFile object. At least one of 'filename'
80 (a string) and 'file' (a file-like object) must be supplied.
81 They keyword argument options are described above and affect
82 the values returned by 'readline()'."""
Greg Ward782cdfe1999-03-23 14:00:06 +000083
84 if filename is None and file is None:
85 raise RuntimeError, \
Fred Drakeb94b8492001-12-06 20:51:35 +000086 "you must supply either or both of 'filename' and 'file'"
Greg Wardd1dc4751999-01-13 16:12:04 +000087
88 # set values for all options -- either from client option hash
89 # or fallback to default_options
90 for opt in self.default_options.keys():
Guido van Rossum8bc09652008-02-21 18:18:37 +000091 if opt in options:
Greg Wardabc2f961999-08-10 20:09:38 +000092 setattr (self, opt, options[opt])
Greg Wardd1dc4751999-01-13 16:12:04 +000093
94 else:
95 setattr (self, opt, self.default_options[opt])
96
97 # sanity check client option hash
98 for opt in options.keys():
Guido van Rossum8bc09652008-02-21 18:18:37 +000099 if opt not in self.default_options:
Greg Wardd1dc4751999-01-13 16:12:04 +0000100 raise KeyError, "invalid TextFile option '%s'" % opt
101
Greg Ward782cdfe1999-03-23 14:00:06 +0000102 if file is None:
103 self.open (filename)
104 else:
105 self.filename = filename
106 self.file = file
107 self.current_line = 0 # assuming that file is at BOF!
Greg Ward787451b1999-03-26 21:48:59 +0000108
Greg Ward91c488c1999-03-29 18:01:49 +0000109 # 'linebuf' is a stack of lines that will be emptied before we
110 # actually read from the file; it's only populated by an
111 # 'unreadline()' operation
112 self.linebuf = []
Fred Drakeb94b8492001-12-06 20:51:35 +0000113
Greg Wardd1dc4751999-01-13 16:12:04 +0000114
Greg Ward782cdfe1999-03-23 14:00:06 +0000115 def open (self, filename):
Greg Ward274ad9d1999-09-29 13:03:32 +0000116 """Open a new file named 'filename'. This overrides both the
117 'filename' and 'file' arguments to the constructor."""
118
Greg Ward782cdfe1999-03-23 14:00:06 +0000119 self.filename = filename
Greg Wardd1dc4751999-01-13 16:12:04 +0000120 self.file = open (self.filename, 'r')
121 self.current_line = 0
122
123
124 def close (self):
Greg Ward274ad9d1999-09-29 13:03:32 +0000125 """Close the current file and forget everything we know about it
126 (filename, current line number)."""
Serhiy Storchaka1aa2c0f2015-04-10 13:24:10 +0300127 file = self.file
Greg Wardd1dc4751999-01-13 16:12:04 +0000128 self.file = None
129 self.filename = None
130 self.current_line = None
Serhiy Storchaka1aa2c0f2015-04-10 13:24:10 +0300131 file.close()
Greg Wardd1dc4751999-01-13 16:12:04 +0000132
133
Greg Wardf11296b2000-09-16 18:06:31 +0000134 def gen_error (self, msg, line=None):
135 outmsg = []
136 if line is None:
137 line = self.current_line
138 outmsg.append(self.filename + ", ")
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000139 if isinstance(line, (list, tuple)):
Greg Wardf11296b2000-09-16 18:06:31 +0000140 outmsg.append("lines %d-%d: " % tuple (line))
141 else:
142 outmsg.append("line %d: " % line)
143 outmsg.append(str(msg))
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000144 return ''.join(outmsg)
Greg Wardf11296b2000-09-16 18:06:31 +0000145
146
147 def error (self, msg, line=None):
148 raise ValueError, "error: " + self.gen_error(msg, line)
149
Greg Ward4e5a7c71999-04-15 17:50:19 +0000150 def warn (self, msg, line=None):
Greg Ward274ad9d1999-09-29 13:03:32 +0000151 """Print (to stderr) a warning message tied to the current logical
152 line in the current file. If the current logical line in the
153 file spans multiple physical lines, the warning refers to the
154 whole range, eg. "lines 3-5". If 'line' supplied, it overrides
155 the current line number; it may be a list or tuple to indicate a
156 range of physical lines, or an integer for a single physical
157 line."""
Greg Wardf11296b2000-09-16 18:06:31 +0000158 sys.stderr.write("warning: " + self.gen_error(msg, line) + "\n")
Greg Wardf6cdcd51999-01-18 17:08:16 +0000159
160
Greg Wardd1dc4751999-01-13 16:12:04 +0000161 def readline (self):
Greg Ward274ad9d1999-09-29 13:03:32 +0000162 """Read and return a single logical line from the current file (or
163 from an internal buffer if lines have previously been "unread"
164 with 'unreadline()'). If the 'join_lines' option is true, this
165 may involve reading multiple physical lines concatenated into a
166 single string. Updates the current line number, so calling
167 'warn()' after 'readline()' emits a warning about the physical
168 line(s) just read. Returns None on end-of-file, since the empty
169 string can occur if 'rstrip_ws' is true but 'strip_blanks' is
170 not."""
Greg Wardd1dc4751999-01-13 16:12:04 +0000171
Greg Ward91c488c1999-03-29 18:01:49 +0000172 # If any "unread" lines waiting in 'linebuf', return the top
173 # one. (We don't actually buffer read-ahead data -- lines only
174 # get put in 'linebuf' if the client explicitly does an
175 # 'unreadline()'.
176 if self.linebuf:
177 line = self.linebuf[-1]
178 del self.linebuf[-1]
179 return line
180
Greg Wardd1dc4751999-01-13 16:12:04 +0000181 buildup_line = ''
182
183 while 1:
Greg Wardabc2f961999-08-10 20:09:38 +0000184 # read the line, make it None if EOF
Greg Wardd1dc4751999-01-13 16:12:04 +0000185 line = self.file.readline()
Greg Wardabc2f961999-08-10 20:09:38 +0000186 if line == '': line = None
187
Greg Wardd1dc4751999-01-13 16:12:04 +0000188 if self.strip_comments and line:
Greg Wardabc2f961999-08-10 20:09:38 +0000189
190 # Look for the first "#" in the line. If none, never
191 # mind. If we find one and it's the first character, or
192 # is not preceded by "\", then it starts a comment --
193 # strip the comment, strip whitespace before it, and
194 # carry on. Otherwise, it's just an escaped "#", so
195 # unescape it (and any other escaped "#"'s that might be
196 # lurking in there) and otherwise leave the line alone.
197
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000198 pos = line.find("#")
Greg Wardabc2f961999-08-10 20:09:38 +0000199 if pos == -1: # no "#" -- no comments
200 pass
Greg Wardacff0b32000-09-16 18:33:36 +0000201
202 # It's definitely a comment -- either "#" is the first
203 # character, or it's elsewhere and unescaped.
204 elif pos == 0 or line[pos-1] != "\\":
Greg Ward274ad9d1999-09-29 13:03:32 +0000205 # Have to preserve the trailing newline, because it's
206 # the job of a later step (rstrip_ws) to remove it --
207 # and if rstrip_ws is false, we'd better preserve it!
208 # (NB. this means that if the final line is all comment
209 # and has no trailing newline, we will think that it's
Greg Wardabc2f961999-08-10 20:09:38 +0000210 # EOF; I think that's OK.)
Greg Ward274ad9d1999-09-29 13:03:32 +0000211 eol = (line[-1] == '\n') and '\n' or ''
212 line = line[0:pos] + eol
Fred Drakeb94b8492001-12-06 20:51:35 +0000213
Greg Wardacff0b32000-09-16 18:33:36 +0000214 # If all that's left is whitespace, then skip line
215 # *now*, before we try to join it to 'buildup_line' --
216 # that way constructs like
217 # hello \\
218 # # comment that should be ignored
219 # there
220 # result in "hello there".
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000221 if line.strip() == "":
Greg Wardacff0b32000-09-16 18:33:36 +0000222 continue
223
Greg Wardabc2f961999-08-10 20:09:38 +0000224 else: # it's an escaped "#"
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000225 line = line.replace("\\#", "#")
Fred Drakeb94b8492001-12-06 20:51:35 +0000226
Greg Wardd1dc4751999-01-13 16:12:04 +0000227
228 # did previous line end with a backslash? then accumulate
229 if self.join_lines and buildup_line:
230 # oops: end of file
Greg Wardabc2f961999-08-10 20:09:38 +0000231 if line is None:
Greg Wardd1dc4751999-01-13 16:12:04 +0000232 self.warn ("continuation line immediately precedes "
233 "end-of-file")
234 return buildup_line
235
Greg Ward60cd2862000-09-16 18:04:55 +0000236 if self.collapse_join:
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000237 line = line.lstrip()
Greg Wardd1dc4751999-01-13 16:12:04 +0000238 line = buildup_line + line
239
240 # careful: pay attention to line number when incrementing it
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000241 if isinstance(self.current_line, list):
Greg Wardd1dc4751999-01-13 16:12:04 +0000242 self.current_line[1] = self.current_line[1] + 1
243 else:
Greg Wardacff0b32000-09-16 18:33:36 +0000244 self.current_line = [self.current_line,
245 self.current_line+1]
Greg Wardd1dc4751999-01-13 16:12:04 +0000246 # just an ordinary line, read it as usual
247 else:
Greg Wardabc2f961999-08-10 20:09:38 +0000248 if line is None: # eof
Greg Wardd1dc4751999-01-13 16:12:04 +0000249 return None
250
251 # still have to be careful about incrementing the line number!
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000252 if isinstance(self.current_line, list):
Greg Wardd1dc4751999-01-13 16:12:04 +0000253 self.current_line = self.current_line[1] + 1
254 else:
255 self.current_line = self.current_line + 1
Fred Drakeb94b8492001-12-06 20:51:35 +0000256
Greg Wardd1dc4751999-01-13 16:12:04 +0000257
258 # strip whitespace however the client wants (leading and
259 # trailing, or one or the other, or neither)
260 if self.lstrip_ws and self.rstrip_ws:
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000261 line = line.strip()
Greg Ward274ad9d1999-09-29 13:03:32 +0000262 elif self.lstrip_ws:
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000263 line = line.lstrip()
Greg Ward274ad9d1999-09-29 13:03:32 +0000264 elif self.rstrip_ws:
Tarek Ziadé2b66da72009-12-21 01:22:46 +0000265 line = line.rstrip()
Greg Wardd1dc4751999-01-13 16:12:04 +0000266
267 # blank line (whether we rstrip'ed or not)? skip to next line
268 # if appropriate
Greg Ward3d05c162000-09-16 18:09:22 +0000269 if (line == '' or line == '\n') and self.skip_blanks:
Greg Wardd1dc4751999-01-13 16:12:04 +0000270 continue
271
272 if self.join_lines:
273 if line[-1] == '\\':
274 buildup_line = line[:-1]
275 continue
276
277 if line[-2:] == '\\\n':
278 buildup_line = line[0:-2] + '\n'
279 continue
280
Greg Wardd1dc4751999-01-13 16:12:04 +0000281 # well, I guess there's some actual content there: return it
282 return line
283
Greg Wardacff0b32000-09-16 18:33:36 +0000284 # readline ()
Greg Wardd1dc4751999-01-13 16:12:04 +0000285
286
Greg Wardd1dc4751999-01-13 16:12:04 +0000287 def readlines (self):
Greg Ward274ad9d1999-09-29 13:03:32 +0000288 """Read and return the list of all logical lines remaining in the
289 current file."""
290
Greg Wardd1dc4751999-01-13 16:12:04 +0000291 lines = []
292 while 1:
293 line = self.readline()
294 if line is None:
295 return lines
296 lines.append (line)
297
298
Greg Ward91c488c1999-03-29 18:01:49 +0000299 def unreadline (self, line):
Greg Ward274ad9d1999-09-29 13:03:32 +0000300 """Push 'line' (a string) onto an internal buffer that will be
301 checked by future 'readline()' calls. Handy for implementing
302 a parser with line-at-a-time lookahead."""
303
Greg Ward91c488c1999-03-29 18:01:49 +0000304 self.linebuf.append (line)