blob: 1dfc8b4d070c7734e0cfc19509f110d862a43818 [file] [log] [blame]
Walter Dörwaldc2fcf9b2003-12-15 10:16:09 +00001r"""File-like objects that read from or write to a string buffer.
Guido van Rossum4acc25b2000-02-02 15:10:15 +00002
3This implements (nearly) all stdio methods.
4
5f = StringIO() # ready for writing
6f = StringIO(buf) # ready for reading
7f.close() # explicitly release resources held
8flag = f.isatty() # always false
9pos = f.tell() # get current position
10f.seek(pos) # set current position
11f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
12buf = f.read() # read until EOF
13buf = f.read(n) # read up to n bytes
14buf = f.readline() # read until end of line ('\n') or EOF
15list = f.readlines()# list of f.readline() results until EOF
Fred Drakee0a7f4f2000-09-28 04:21:06 +000016f.truncate([size]) # truncate file at to at most size (default: current pos)
Guido van Rossum4acc25b2000-02-02 15:10:15 +000017f.write(buf) # write at current position
18f.writelines(list) # for line in list: f.write(line)
19f.getvalue() # return whole file's contents as a string
20
21Notes:
22- Using a real file is often faster (but less convenient).
Guido van Rossum98d9fd32000-02-28 15:12:25 +000023- There's also a much faster implementation in C, called cStringIO, but
24 it's not subclassable.
Guido van Rossum4acc25b2000-02-02 15:10:15 +000025- fileno() is left unimplemented so that code which uses it triggers
26 an exception early.
27- Seeking far beyond EOF and then writing will insert real null
28 bytes that occupy space in the buffer.
29- There's a simple test set (see end of this file).
30"""
Barry Warsawc7ed0e32000-12-12 23:12:23 +000031try:
Barry Warsawc1401312000-12-12 23:16:51 +000032 from errno import EINVAL
Barry Warsawc7ed0e32000-12-12 23:12:23 +000033except ImportError:
Barry Warsawc1401312000-12-12 23:16:51 +000034 EINVAL = 22
Barry Warsawc7ed0e32000-12-12 23:12:23 +000035
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000036__all__ = ["StringIO"]
37
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000038def _complain_ifclosed(closed):
39 if closed:
40 raise ValueError, "I/O operation on closed file"
41
Guido van Rossum85d89451994-06-23 11:53:27 +000042class StringIO:
Tim Peters8ac14952002-05-23 15:15:30 +000043 """class StringIO([buffer])
44
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000045 When a StringIO object is created, it can be initialized to an existing
46 string by passing the string to the constructor. If no string is given,
Tim Peters8ac14952002-05-23 15:15:30 +000047 the StringIO will start empty.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000048
49 The StringIO object can accept either Unicode or 8-bit strings, but
50 mixing the two may take some care. If both are used, 8-bit strings that
51 cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters8ac14952002-05-23 15:15:30 +000052 a UnicodeError to be raised when getvalue() is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000053 """
Barry Warsawc1401312000-12-12 23:16:51 +000054 def __init__(self, buf = ''):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000055 # Force self.buf to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +000056 if not isinstance(buf, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000057 buf = str(buf)
58 self.buf = buf
Fred Drakea63bd1c2000-12-13 20:23:11 +000059 self.len = len(buf)
60 self.buflist = []
61 self.pos = 0
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000062 self.closed = False
Fred Drakea63bd1c2000-12-13 20:23:11 +000063 self.softspace = 0
Barry Warsawc1401312000-12-12 23:16:51 +000064
Barry Warsawbdefa0b2001-09-22 04:34:54 +000065 def __iter__(self):
Guido van Rossumc1265bd2003-01-31 16:04:15 +000066 return self
67
68 def next(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000069 """A file object is its own iterator, for example iter(f) returns f
70 (unless f is closed). When a file is used as an iterator, typically
71 in a for loop (for example, for line in f: print line), the next()
72 method is called repeatedly. This method returns the next input line,
73 or raises StopIteration when EOF is hit.
74 """
Guido van Rossumc1265bd2003-01-31 16:04:15 +000075 if self.closed:
76 raise StopIteration
77 r = self.readline()
78 if not r:
79 raise StopIteration
80 return r
Barry Warsawbdefa0b2001-09-22 04:34:54 +000081
Barry Warsawc1401312000-12-12 23:16:51 +000082 def close(self):
Tim Peters8ac14952002-05-23 15:15:30 +000083 """Free the memory buffer.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000084 """
Fred Drakea63bd1c2000-12-13 20:23:11 +000085 if not self.closed:
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000086 self.closed = True
Fred Drakea63bd1c2000-12-13 20:23:11 +000087 del self.buf, self.pos
Barry Warsawc1401312000-12-12 23:16:51 +000088
89 def isatty(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000090 """Returns False because StringIO objects are not connected to a
91 tty-like device.
92 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000093 _complain_ifclosed(self.closed)
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Barry Warsawc1401312000-12-12 23:16:51 +000095
96 def seek(self, pos, mode = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000097 """Set the file's current position.
98
99 The mode argument is optional and defaults to 0 (absolute file
100 positioning); other values are 1 (seek relative to the current
101 position) and 2 (seek relative to the file's end).
102
103 There is no return value.
104 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000105 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000106 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000107 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000108 self.buflist = []
109 if mode == 1:
110 pos += self.pos
111 elif mode == 2:
112 pos += self.len
113 self.pos = max(0, pos)
Barry Warsawc1401312000-12-12 23:16:51 +0000114
115 def tell(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000116 """Return the file's current position."""
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000117 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000118 return self.pos
Barry Warsawc1401312000-12-12 23:16:51 +0000119
120 def read(self, n = -1):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000121 """Read at most size bytes from the file
122 (less if the read hits EOF before obtaining size bytes).
123
124 If the size argument is negative or omitted, read all data until EOF
125 is reached. The bytes are returned as a string object. An empty
126 string is returned when EOF is encountered immediately.
127 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000128 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000129 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000130 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000131 self.buflist = []
132 if n < 0:
133 newpos = self.len
134 else:
135 newpos = min(self.pos+n, self.len)
136 r = self.buf[self.pos:newpos]
137 self.pos = newpos
138 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000139
140 def readline(self, length=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000141 """Read one entire line from the file.
142
143 A trailing newline character is kept in the string (but may be absent
144 when a file ends with an incomplete line). If the size argument is
145 present and non-negative, it is a maximum byte count (including the
146 trailing newline) and an incomplete line may be returned.
147
148 An empty string is returned only when EOF is encountered immediately.
149
150 Note: Unlike stdio's fgets(), the returned string contains null
151 characters ('\0') if they occurred in the input.
152 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000153 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000154 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000155 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000156 self.buflist = []
157 i = self.buf.find('\n', self.pos)
158 if i < 0:
159 newpos = self.len
160 else:
161 newpos = i+1
162 if length is not None:
163 if self.pos + length < newpos:
164 newpos = self.pos + length
165 r = self.buf[self.pos:newpos]
166 self.pos = newpos
167 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000168
169 def readlines(self, sizehint = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000170 """Read until EOF using readline() and return a list containing the
171 lines thus read.
172
173 If the optional sizehint argument is present, instead of reading up
174 to EOF, whole lines totalling approximately sizehint bytes (or more
175 to accommodate a final whole line).
176 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000177 total = 0
178 lines = []
179 line = self.readline()
180 while line:
181 lines.append(line)
182 total += len(line)
183 if 0 < sizehint <= total:
184 break
185 line = self.readline()
186 return lines
Barry Warsawc1401312000-12-12 23:16:51 +0000187
188 def truncate(self, size=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000189 """Truncate the file's size.
190
191 If the optional size argument is present, the file is truncated to
192 (at most) that size. The size defaults to the current position.
193 The current file position is not changed unless the position
194 is beyond the new file size.
195
196 If the specified size exceeds the file's current size, the
197 file remains unchanged.
198 """
Tim Peters58eb11c2004-01-18 20:29:55 +0000199 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000200 if size is None:
201 size = self.pos
202 elif size < 0:
203 raise IOError(EINVAL, "Negative size not allowed")
204 elif size < self.pos:
205 self.pos = size
206 self.buf = self.getvalue()[:size]
Barry Warsawc1401312000-12-12 23:16:51 +0000207
208 def write(self, s):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000209 """Write a string to the file.
210
211 There is no return value.
212 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000213 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000214 if not s: return
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000215 # Force s to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +0000216 if not isinstance(s, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000217 s = str(s)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000218 spos = self.pos
219 slen = self.len
220 if spos == slen:
Fred Draked679e092002-09-17 18:10:34 +0000221 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000222 self.len = self.pos = spos + len(s)
Fred Draked679e092002-09-17 18:10:34 +0000223 return
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000224 if spos > slen:
225 self.buflist.append('\0'*(spos - slen))
226 slen = spos
227 newpos = spos + len(s)
228 if spos < slen:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000229 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000230 self.buf += ''.join(self.buflist)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000231 self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
Fred Drakea63bd1c2000-12-13 20:23:11 +0000232 self.buf = ''
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000233 if newpos > slen:
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000234 slen = newpos
Fred Drakea63bd1c2000-12-13 20:23:11 +0000235 else:
236 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000237 slen = newpos
238 self.len = slen
Fred Drakea63bd1c2000-12-13 20:23:11 +0000239 self.pos = newpos
Barry Warsawc1401312000-12-12 23:16:51 +0000240
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000241 def writelines(self, iterable):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000242 """Write a sequence of strings to the file. The sequence can be any
243 iterable object producing strings, typically a list of strings. There
244 is no return value.
245
246 (The name is intended to match readlines(); writelines() does not add
247 line separators.)
248 """
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000249 write = self.write
250 for line in iterable:
251 write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000252
253 def flush(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000254 """Flush the internal buffer
255 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000256 _complain_ifclosed(self.closed)
Barry Warsawc1401312000-12-12 23:16:51 +0000257
258 def getvalue(self):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000259 """
260 Retrieve the entire contents of the "file" at any time before
261 the StringIO object's close() method is called.
262
263 The StringIO object can accept either Unicode or 8-bit strings,
264 but mixing the two may take some care. If both are used, 8-bit
265 strings that cannot be interpreted as 7-bit ASCII (that use the
266 8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters8ac14952002-05-23 15:15:30 +0000267 is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000268 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000269 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000270 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000271 self.buflist = []
272 return self.buf
Guido van Rossum85d89451994-06-23 11:53:27 +0000273
274
275# A little test suite
276
277def test():
Barry Warsawc1401312000-12-12 23:16:51 +0000278 import sys
279 if sys.argv[1:]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000280 file = sys.argv[1]
Barry Warsawc1401312000-12-12 23:16:51 +0000281 else:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000282 file = '/etc/passwd'
Barry Warsawc1401312000-12-12 23:16:51 +0000283 lines = open(file, 'r').readlines()
284 text = open(file, 'r').read()
285 f = StringIO()
286 for line in lines[:-2]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000287 f.write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000288 f.writelines(lines[-2:])
289 if f.getvalue() != text:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000290 raise RuntimeError, 'write failed'
Barry Warsawc1401312000-12-12 23:16:51 +0000291 length = f.tell()
292 print 'File length =', length
293 f.seek(len(lines[0]))
294 f.write(lines[1])
295 f.seek(0)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000296 print 'First line =', repr(f.readline())
Neal Norwitz9fb289d2002-02-11 17:52:18 +0000297 print 'Position =', f.tell()
Barry Warsawc1401312000-12-12 23:16:51 +0000298 line = f.readline()
Walter Dörwald70a6b492004-02-12 17:35:32 +0000299 print 'Second line =', repr(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000300 f.seek(-len(line), 1)
301 line2 = f.read(len(line))
302 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000303 raise RuntimeError, 'bad result after seek back'
Barry Warsawc1401312000-12-12 23:16:51 +0000304 f.seek(len(line2), 1)
305 list = f.readlines()
306 line = list[-1]
307 f.seek(f.tell() - len(line))
308 line2 = f.read()
309 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000310 raise RuntimeError, 'bad result after seek back from EOF'
Barry Warsawc1401312000-12-12 23:16:51 +0000311 print 'Read', len(list), 'more lines'
312 print 'File length =', f.tell()
313 if f.tell() != length:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000314 raise RuntimeError, 'bad length'
Barry Warsawc1401312000-12-12 23:16:51 +0000315 f.close()
Guido van Rossum85d89451994-06-23 11:53:27 +0000316
317if __name__ == '__main__':
Barry Warsawc1401312000-12-12 23:16:51 +0000318 test()