blob: 5c463fbc1c94cf09cd9f6e8e4166f928c32c2fa9 [file] [log] [blame]
Walter Dörwaldc2fcf9b2003-12-15 10:16:09 +00001r"""File-like objects that read from or write to a string buffer.
Guido van Rossum4acc25b2000-02-02 15:10:15 +00002
3This implements (nearly) all stdio methods.
4
5f = StringIO() # ready for writing
6f = StringIO(buf) # ready for reading
7f.close() # explicitly release resources held
8flag = f.isatty() # always false
9pos = f.tell() # get current position
10f.seek(pos) # set current position
11f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
12buf = f.read() # read until EOF
13buf = f.read(n) # read up to n bytes
14buf = f.readline() # read until end of line ('\n') or EOF
15list = f.readlines()# list of f.readline() results until EOF
Fred Drakee0a7f4f2000-09-28 04:21:06 +000016f.truncate([size]) # truncate file at to at most size (default: current pos)
Guido van Rossum4acc25b2000-02-02 15:10:15 +000017f.write(buf) # write at current position
18f.writelines(list) # for line in list: f.write(line)
19f.getvalue() # return whole file's contents as a string
20
21Notes:
22- Using a real file is often faster (but less convenient).
Guido van Rossum98d9fd32000-02-28 15:12:25 +000023- There's also a much faster implementation in C, called cStringIO, but
24 it's not subclassable.
Guido van Rossum4acc25b2000-02-02 15:10:15 +000025- fileno() is left unimplemented so that code which uses it triggers
26 an exception early.
27- Seeking far beyond EOF and then writing will insert real null
28 bytes that occupy space in the buffer.
29- There's a simple test set (see end of this file).
30"""
Barry Warsawc7ed0e32000-12-12 23:12:23 +000031try:
Barry Warsawc1401312000-12-12 23:16:51 +000032 from errno import EINVAL
Barry Warsawc7ed0e32000-12-12 23:12:23 +000033except ImportError:
Barry Warsawc1401312000-12-12 23:16:51 +000034 EINVAL = 22
Barry Warsawc7ed0e32000-12-12 23:12:23 +000035
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000036__all__ = ["StringIO"]
37
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000038def _complain_ifclosed(closed):
39 if closed:
40 raise ValueError, "I/O operation on closed file"
41
Guido van Rossum85d89451994-06-23 11:53:27 +000042class StringIO:
Tim Peters8ac14952002-05-23 15:15:30 +000043 """class StringIO([buffer])
44
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000045 When a StringIO object is created, it can be initialized to an existing
46 string by passing the string to the constructor. If no string is given,
Tim Peters8ac14952002-05-23 15:15:30 +000047 the StringIO will start empty.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000048
49 The StringIO object can accept either Unicode or 8-bit strings, but
50 mixing the two may take some care. If both are used, 8-bit strings that
51 cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters8ac14952002-05-23 15:15:30 +000052 a UnicodeError to be raised when getvalue() is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000053 """
Barry Warsawc1401312000-12-12 23:16:51 +000054 def __init__(self, buf = ''):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000055 # Force self.buf to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +000056 if not isinstance(buf, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000057 buf = str(buf)
58 self.buf = buf
Fred Drakea63bd1c2000-12-13 20:23:11 +000059 self.len = len(buf)
60 self.buflist = []
61 self.pos = 0
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000062 self.closed = False
Fred Drakea63bd1c2000-12-13 20:23:11 +000063 self.softspace = 0
Barry Warsawc1401312000-12-12 23:16:51 +000064
Barry Warsawbdefa0b2001-09-22 04:34:54 +000065 def __iter__(self):
Guido van Rossumc1265bd2003-01-31 16:04:15 +000066 return self
67
68 def next(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000069 """A file object is its own iterator, for example iter(f) returns f
70 (unless f is closed). When a file is used as an iterator, typically
71 in a for loop (for example, for line in f: print line), the next()
72 method is called repeatedly. This method returns the next input line,
73 or raises StopIteration when EOF is hit.
74 """
Guido van Rossumc1265bd2003-01-31 16:04:15 +000075 if self.closed:
76 raise StopIteration
77 r = self.readline()
78 if not r:
79 raise StopIteration
80 return r
Barry Warsawbdefa0b2001-09-22 04:34:54 +000081
Barry Warsawc1401312000-12-12 23:16:51 +000082 def close(self):
Tim Peters8ac14952002-05-23 15:15:30 +000083 """Free the memory buffer.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000084 """
Fred Drakea63bd1c2000-12-13 20:23:11 +000085 if not self.closed:
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000086 self.closed = True
Fred Drakea63bd1c2000-12-13 20:23:11 +000087 del self.buf, self.pos
Barry Warsawc1401312000-12-12 23:16:51 +000088
89 def isatty(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000090 """Returns False because StringIO objects are not connected to a
91 tty-like device.
92 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000093 _complain_ifclosed(self.closed)
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Barry Warsawc1401312000-12-12 23:16:51 +000095
96 def seek(self, pos, mode = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000097 """Set the file's current position.
98
99 The mode argument is optional and defaults to 0 (absolute file
100 positioning); other values are 1 (seek relative to the current
101 position) and 2 (seek relative to the file's end).
102
103 There is no return value.
104 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000105 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000106 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000107 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000108 self.buflist = []
109 if mode == 1:
110 pos += self.pos
111 elif mode == 2:
112 pos += self.len
113 self.pos = max(0, pos)
Barry Warsawc1401312000-12-12 23:16:51 +0000114
115 def tell(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000116 """Return the file's current position."""
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000117 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000118 return self.pos
Barry Warsawc1401312000-12-12 23:16:51 +0000119
120 def read(self, n = -1):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000121 """Read at most size bytes from the file
122 (less if the read hits EOF before obtaining size bytes).
123
124 If the size argument is negative or omitted, read all data until EOF
125 is reached. The bytes are returned as a string object. An empty
126 string is returned when EOF is encountered immediately.
127 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000128 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000129 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000130 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000131 self.buflist = []
132 if n < 0:
133 newpos = self.len
134 else:
135 newpos = min(self.pos+n, self.len)
136 r = self.buf[self.pos:newpos]
137 self.pos = newpos
138 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000139
140 def readline(self, length=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000141 """Read one entire line from the file.
142
143 A trailing newline character is kept in the string (but may be absent
144 when a file ends with an incomplete line). If the size argument is
145 present and non-negative, it is a maximum byte count (including the
146 trailing newline) and an incomplete line may be returned.
147
148 An empty string is returned only when EOF is encountered immediately.
149
150 Note: Unlike stdio's fgets(), the returned string contains null
151 characters ('\0') if they occurred in the input.
152 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000153 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000154 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000155 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000156 self.buflist = []
157 i = self.buf.find('\n', self.pos)
158 if i < 0:
159 newpos = self.len
160 else:
161 newpos = i+1
162 if length is not None:
163 if self.pos + length < newpos:
164 newpos = self.pos + length
165 r = self.buf[self.pos:newpos]
166 self.pos = newpos
167 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000168
169 def readlines(self, sizehint = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000170 """Read until EOF using readline() and return a list containing the
171 lines thus read.
172
173 If the optional sizehint argument is present, instead of reading up
174 to EOF, whole lines totalling approximately sizehint bytes (or more
175 to accommodate a final whole line).
176 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000177 total = 0
178 lines = []
179 line = self.readline()
180 while line:
181 lines.append(line)
182 total += len(line)
183 if 0 < sizehint <= total:
184 break
185 line = self.readline()
186 return lines
Barry Warsawc1401312000-12-12 23:16:51 +0000187
188 def truncate(self, size=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000189 """Truncate the file's size.
190
191 If the optional size argument is present, the file is truncated to
192 (at most) that size. The size defaults to the current position.
193 The current file position is not changed unless the position
194 is beyond the new file size.
195
196 If the specified size exceeds the file's current size, the
197 file remains unchanged.
198 """
Tim Peters58eb11c2004-01-18 20:29:55 +0000199 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000200 if size is None:
201 size = self.pos
202 elif size < 0:
203 raise IOError(EINVAL, "Negative size not allowed")
204 elif size < self.pos:
205 self.pos = size
206 self.buf = self.getvalue()[:size]
Raymond Hettinger6065d322004-12-20 23:51:53 +0000207 self.len = size
Barry Warsawc1401312000-12-12 23:16:51 +0000208
209 def write(self, s):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000210 """Write a string to the file.
211
212 There is no return value.
213 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000214 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000215 if not s: return
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000216 # Force s to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +0000217 if not isinstance(s, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000218 s = str(s)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000219 spos = self.pos
220 slen = self.len
221 if spos == slen:
Fred Draked679e092002-09-17 18:10:34 +0000222 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000223 self.len = self.pos = spos + len(s)
Fred Draked679e092002-09-17 18:10:34 +0000224 return
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000225 if spos > slen:
226 self.buflist.append('\0'*(spos - slen))
227 slen = spos
228 newpos = spos + len(s)
229 if spos < slen:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000230 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000231 self.buf += ''.join(self.buflist)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000232 self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
Fred Drakea63bd1c2000-12-13 20:23:11 +0000233 self.buf = ''
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000234 if newpos > slen:
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000235 slen = newpos
Fred Drakea63bd1c2000-12-13 20:23:11 +0000236 else:
237 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000238 slen = newpos
239 self.len = slen
Fred Drakea63bd1c2000-12-13 20:23:11 +0000240 self.pos = newpos
Barry Warsawc1401312000-12-12 23:16:51 +0000241
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000242 def writelines(self, iterable):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000243 """Write a sequence of strings to the file. The sequence can be any
244 iterable object producing strings, typically a list of strings. There
245 is no return value.
246
247 (The name is intended to match readlines(); writelines() does not add
248 line separators.)
249 """
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000250 write = self.write
251 for line in iterable:
252 write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000253
254 def flush(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000255 """Flush the internal buffer
256 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000257 _complain_ifclosed(self.closed)
Barry Warsawc1401312000-12-12 23:16:51 +0000258
259 def getvalue(self):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000260 """
261 Retrieve the entire contents of the "file" at any time before
262 the StringIO object's close() method is called.
263
264 The StringIO object can accept either Unicode or 8-bit strings,
265 but mixing the two may take some care. If both are used, 8-bit
266 strings that cannot be interpreted as 7-bit ASCII (that use the
267 8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters8ac14952002-05-23 15:15:30 +0000268 is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000269 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000270 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000271 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000272 self.buflist = []
273 return self.buf
Guido van Rossum85d89451994-06-23 11:53:27 +0000274
275
276# A little test suite
277
278def test():
Barry Warsawc1401312000-12-12 23:16:51 +0000279 import sys
280 if sys.argv[1:]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000281 file = sys.argv[1]
Barry Warsawc1401312000-12-12 23:16:51 +0000282 else:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000283 file = '/etc/passwd'
Barry Warsawc1401312000-12-12 23:16:51 +0000284 lines = open(file, 'r').readlines()
285 text = open(file, 'r').read()
286 f = StringIO()
287 for line in lines[:-2]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000288 f.write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000289 f.writelines(lines[-2:])
290 if f.getvalue() != text:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000291 raise RuntimeError, 'write failed'
Barry Warsawc1401312000-12-12 23:16:51 +0000292 length = f.tell()
293 print 'File length =', length
294 f.seek(len(lines[0]))
295 f.write(lines[1])
296 f.seek(0)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000297 print 'First line =', repr(f.readline())
Neal Norwitz9fb289d2002-02-11 17:52:18 +0000298 print 'Position =', f.tell()
Barry Warsawc1401312000-12-12 23:16:51 +0000299 line = f.readline()
Walter Dörwald70a6b492004-02-12 17:35:32 +0000300 print 'Second line =', repr(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000301 f.seek(-len(line), 1)
302 line2 = f.read(len(line))
303 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000304 raise RuntimeError, 'bad result after seek back'
Barry Warsawc1401312000-12-12 23:16:51 +0000305 f.seek(len(line2), 1)
306 list = f.readlines()
307 line = list[-1]
308 f.seek(f.tell() - len(line))
309 line2 = f.read()
310 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000311 raise RuntimeError, 'bad result after seek back from EOF'
Barry Warsawc1401312000-12-12 23:16:51 +0000312 print 'Read', len(list), 'more lines'
313 print 'File length =', f.tell()
314 if f.tell() != length:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000315 raise RuntimeError, 'bad length'
Raymond Hettinger6065d322004-12-20 23:51:53 +0000316 f.truncate(length/2)
317 f.seek(0, 2)
318 print 'Truncated length =', f.tell()
319 if f.tell() != length/2:
320 raise RuntimeError, 'truncate did not adjust length'
Barry Warsawc1401312000-12-12 23:16:51 +0000321 f.close()
Guido van Rossum85d89451994-06-23 11:53:27 +0000322
323if __name__ == '__main__':
Barry Warsawc1401312000-12-12 23:16:51 +0000324 test()