blob: a43e5d44c5434c7488e5df5634afae5d81f8d607 [file] [log] [blame]
Walter Dörwaldc2fcf9b2003-12-15 10:16:09 +00001r"""File-like objects that read from or write to a string buffer.
Guido van Rossum4acc25b2000-02-02 15:10:15 +00002
3This implements (nearly) all stdio methods.
4
5f = StringIO() # ready for writing
6f = StringIO(buf) # ready for reading
7f.close() # explicitly release resources held
8flag = f.isatty() # always false
9pos = f.tell() # get current position
10f.seek(pos) # set current position
11f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
12buf = f.read() # read until EOF
13buf = f.read(n) # read up to n bytes
14buf = f.readline() # read until end of line ('\n') or EOF
15list = f.readlines()# list of f.readline() results until EOF
Fred Drakee0a7f4f2000-09-28 04:21:06 +000016f.truncate([size]) # truncate file at to at most size (default: current pos)
Guido van Rossum4acc25b2000-02-02 15:10:15 +000017f.write(buf) # write at current position
18f.writelines(list) # for line in list: f.write(line)
19f.getvalue() # return whole file's contents as a string
20
21Notes:
22- Using a real file is often faster (but less convenient).
Guido van Rossum98d9fd32000-02-28 15:12:25 +000023- There's also a much faster implementation in C, called cStringIO, but
24 it's not subclassable.
Guido van Rossum4acc25b2000-02-02 15:10:15 +000025- fileno() is left unimplemented so that code which uses it triggers
26 an exception early.
27- Seeking far beyond EOF and then writing will insert real null
28 bytes that occupy space in the buffer.
29- There's a simple test set (see end of this file).
30"""
Barry Warsawc7ed0e32000-12-12 23:12:23 +000031try:
Barry Warsawc1401312000-12-12 23:16:51 +000032 from errno import EINVAL
Barry Warsawc7ed0e32000-12-12 23:12:23 +000033except ImportError:
Barry Warsawc1401312000-12-12 23:16:51 +000034 EINVAL = 22
Barry Warsawc7ed0e32000-12-12 23:12:23 +000035
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000036__all__ = ["StringIO"]
37
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000038def _complain_ifclosed(closed):
39 if closed:
40 raise ValueError, "I/O operation on closed file"
41
Guido van Rossum85d89451994-06-23 11:53:27 +000042class StringIO:
Tim Peters8ac14952002-05-23 15:15:30 +000043 """class StringIO([buffer])
44
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000045 When a StringIO object is created, it can be initialized to an existing
46 string by passing the string to the constructor. If no string is given,
Tim Peters8ac14952002-05-23 15:15:30 +000047 the StringIO will start empty.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000048
49 The StringIO object can accept either Unicode or 8-bit strings, but
50 mixing the two may take some care. If both are used, 8-bit strings that
51 cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters8ac14952002-05-23 15:15:30 +000052 a UnicodeError to be raised when getvalue() is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000053 """
Barry Warsawc1401312000-12-12 23:16:51 +000054 def __init__(self, buf = ''):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000055 # Force self.buf to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +000056 if not isinstance(buf, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000057 buf = str(buf)
58 self.buf = buf
Fred Drakea63bd1c2000-12-13 20:23:11 +000059 self.len = len(buf)
60 self.buflist = []
61 self.pos = 0
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000062 self.closed = False
Fred Drakea63bd1c2000-12-13 20:23:11 +000063 self.softspace = 0
Barry Warsawc1401312000-12-12 23:16:51 +000064
Barry Warsawbdefa0b2001-09-22 04:34:54 +000065 def __iter__(self):
Guido van Rossumc1265bd2003-01-31 16:04:15 +000066 return self
67
68 def next(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000069 """A file object is its own iterator, for example iter(f) returns f
70 (unless f is closed). When a file is used as an iterator, typically
71 in a for loop (for example, for line in f: print line), the next()
72 method is called repeatedly. This method returns the next input line,
73 or raises StopIteration when EOF is hit.
74 """
Guido van Rossumc1265bd2003-01-31 16:04:15 +000075 if self.closed:
76 raise StopIteration
77 r = self.readline()
78 if not r:
79 raise StopIteration
80 return r
Barry Warsawbdefa0b2001-09-22 04:34:54 +000081
Barry Warsawc1401312000-12-12 23:16:51 +000082 def close(self):
Tim Peters8ac14952002-05-23 15:15:30 +000083 """Free the memory buffer.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000084 """
Fred Drakea63bd1c2000-12-13 20:23:11 +000085 if not self.closed:
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000086 self.closed = True
Fred Drakea63bd1c2000-12-13 20:23:11 +000087 del self.buf, self.pos
Barry Warsawc1401312000-12-12 23:16:51 +000088
89 def isatty(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000090 """Returns False because StringIO objects are not connected to a
91 tty-like device.
92 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000093 _complain_ifclosed(self.closed)
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Barry Warsawc1401312000-12-12 23:16:51 +000095
96 def seek(self, pos, mode = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000097 """Set the file's current position.
98
99 The mode argument is optional and defaults to 0 (absolute file
100 positioning); other values are 1 (seek relative to the current
101 position) and 2 (seek relative to the file's end).
102
103 There is no return value.
104 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000105 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000106 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000107 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000108 self.buflist = []
109 if mode == 1:
110 pos += self.pos
111 elif mode == 2:
112 pos += self.len
113 self.pos = max(0, pos)
Barry Warsawc1401312000-12-12 23:16:51 +0000114
115 def tell(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000116 """Return the file's current position."""
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000117 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000118 return self.pos
Barry Warsawc1401312000-12-12 23:16:51 +0000119
120 def read(self, n = -1):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000121 """Read at most size bytes from the file
122 (less if the read hits EOF before obtaining size bytes).
123
124 If the size argument is negative or omitted, read all data until EOF
125 is reached. The bytes are returned as a string object. An empty
126 string is returned when EOF is encountered immediately.
127 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000128 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000129 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000130 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000131 self.buflist = []
132 if n < 0:
133 newpos = self.len
134 else:
135 newpos = min(self.pos+n, self.len)
136 r = self.buf[self.pos:newpos]
137 self.pos = newpos
138 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000139
140 def readline(self, length=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000141 """Read one entire line from the file.
142
143 A trailing newline character is kept in the string (but may be absent
144 when a file ends with an incomplete line). If the size argument is
145 present and non-negative, it is a maximum byte count (including the
146 trailing newline) and an incomplete line may be returned.
147
148 An empty string is returned only when EOF is encountered immediately.
149
150 Note: Unlike stdio's fgets(), the returned string contains null
151 characters ('\0') if they occurred in the input.
152 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000153 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000154 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000155 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000156 self.buflist = []
157 i = self.buf.find('\n', self.pos)
158 if i < 0:
159 newpos = self.len
160 else:
161 newpos = i+1
162 if length is not None:
163 if self.pos + length < newpos:
164 newpos = self.pos + length
165 r = self.buf[self.pos:newpos]
166 self.pos = newpos
167 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000168
169 def readlines(self, sizehint = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000170 """Read until EOF using readline() and return a list containing the
171 lines thus read.
172
173 If the optional sizehint argument is present, instead of reading up
174 to EOF, whole lines totalling approximately sizehint bytes (or more
175 to accommodate a final whole line).
176 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000177 total = 0
178 lines = []
179 line = self.readline()
180 while line:
181 lines.append(line)
182 total += len(line)
183 if 0 < sizehint <= total:
184 break
185 line = self.readline()
186 return lines
Barry Warsawc1401312000-12-12 23:16:51 +0000187
188 def truncate(self, size=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000189 """Truncate the file's size.
190
191 If the optional size argument is present, the file is truncated to
192 (at most) that size. The size defaults to the current position.
193 The current file position is not changed unless the position
194 is beyond the new file size.
195
196 If the specified size exceeds the file's current size, the
197 file remains unchanged.
198 """
Tim Peters58eb11c2004-01-18 20:29:55 +0000199 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000200 if size is None:
201 size = self.pos
202 elif size < 0:
203 raise IOError(EINVAL, "Negative size not allowed")
204 elif size < self.pos:
205 self.pos = size
206 self.buf = self.getvalue()[:size]
Barry Warsawc1401312000-12-12 23:16:51 +0000207
208 def write(self, s):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000209 """Write a string to the file.
210
211 There is no return value.
212 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000213 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000214 if not s: return
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000215 # Force s to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +0000216 if not isinstance(s, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000217 s = str(s)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000218 spos = self.pos
219 slen = self.len
220 if spos == slen:
Fred Draked679e092002-09-17 18:10:34 +0000221 self.buflist.append(s)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000222 self.len = self.pos = slen = spos = spos + len(s)
Fred Draked679e092002-09-17 18:10:34 +0000223 return
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000224 if spos > slen:
225 self.buflist.append('\0'*(spos - slen))
226 slen = spos
227 newpos = spos + len(s)
228 if spos < slen:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000229 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000230 self.buf += ''.join(self.buflist)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000231 self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
Fred Drakea63bd1c2000-12-13 20:23:11 +0000232 self.buf = ''
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000233 if newpos > slen:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000234 self.len = newpos
235 else:
236 self.buflist.append(s)
237 self.len = newpos
238 self.pos = newpos
Barry Warsawc1401312000-12-12 23:16:51 +0000239
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000240 def writelines(self, iterable):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000241 """Write a sequence of strings to the file. The sequence can be any
242 iterable object producing strings, typically a list of strings. There
243 is no return value.
244
245 (The name is intended to match readlines(); writelines() does not add
246 line separators.)
247 """
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000248 write = self.write
249 for line in iterable:
250 write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000251
252 def flush(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000253 """Flush the internal buffer
254 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000255 _complain_ifclosed(self.closed)
Barry Warsawc1401312000-12-12 23:16:51 +0000256
257 def getvalue(self):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000258 """
259 Retrieve the entire contents of the "file" at any time before
260 the StringIO object's close() method is called.
261
262 The StringIO object can accept either Unicode or 8-bit strings,
263 but mixing the two may take some care. If both are used, 8-bit
264 strings that cannot be interpreted as 7-bit ASCII (that use the
265 8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters8ac14952002-05-23 15:15:30 +0000266 is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000267 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000268 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000269 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000270 self.buflist = []
271 return self.buf
Guido van Rossum85d89451994-06-23 11:53:27 +0000272
273
274# A little test suite
275
276def test():
Barry Warsawc1401312000-12-12 23:16:51 +0000277 import sys
278 if sys.argv[1:]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000279 file = sys.argv[1]
Barry Warsawc1401312000-12-12 23:16:51 +0000280 else:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000281 file = '/etc/passwd'
Barry Warsawc1401312000-12-12 23:16:51 +0000282 lines = open(file, 'r').readlines()
283 text = open(file, 'r').read()
284 f = StringIO()
285 for line in lines[:-2]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000286 f.write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000287 f.writelines(lines[-2:])
288 if f.getvalue() != text:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000289 raise RuntimeError, 'write failed'
Barry Warsawc1401312000-12-12 23:16:51 +0000290 length = f.tell()
291 print 'File length =', length
292 f.seek(len(lines[0]))
293 f.write(lines[1])
294 f.seek(0)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000295 print 'First line =', repr(f.readline())
Neal Norwitz9fb289d2002-02-11 17:52:18 +0000296 print 'Position =', f.tell()
Barry Warsawc1401312000-12-12 23:16:51 +0000297 line = f.readline()
Walter Dörwald70a6b492004-02-12 17:35:32 +0000298 print 'Second line =', repr(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000299 f.seek(-len(line), 1)
300 line2 = f.read(len(line))
301 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000302 raise RuntimeError, 'bad result after seek back'
Barry Warsawc1401312000-12-12 23:16:51 +0000303 f.seek(len(line2), 1)
304 list = f.readlines()
305 line = list[-1]
306 f.seek(f.tell() - len(line))
307 line2 = f.read()
308 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000309 raise RuntimeError, 'bad result after seek back from EOF'
Barry Warsawc1401312000-12-12 23:16:51 +0000310 print 'Read', len(list), 'more lines'
311 print 'File length =', f.tell()
312 if f.tell() != length:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000313 raise RuntimeError, 'bad length'
Barry Warsawc1401312000-12-12 23:16:51 +0000314 f.close()
Guido van Rossum85d89451994-06-23 11:53:27 +0000315
316if __name__ == '__main__':
Barry Warsawc1401312000-12-12 23:16:51 +0000317 test()