blob: 340fae1229031c7110d18d5d6e143f7b9d106d1a [file] [log] [blame]
Walter Dörwaldc2fcf9b2003-12-15 10:16:09 +00001r"""File-like objects that read from or write to a string buffer.
Guido van Rossum4acc25b2000-02-02 15:10:15 +00002
3This implements (nearly) all stdio methods.
4
5f = StringIO() # ready for writing
6f = StringIO(buf) # ready for reading
7f.close() # explicitly release resources held
8flag = f.isatty() # always false
9pos = f.tell() # get current position
10f.seek(pos) # set current position
11f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
12buf = f.read() # read until EOF
13buf = f.read(n) # read up to n bytes
14buf = f.readline() # read until end of line ('\n') or EOF
15list = f.readlines()# list of f.readline() results until EOF
Fred Drakee0a7f4f2000-09-28 04:21:06 +000016f.truncate([size]) # truncate file at to at most size (default: current pos)
Guido van Rossum4acc25b2000-02-02 15:10:15 +000017f.write(buf) # write at current position
18f.writelines(list) # for line in list: f.write(line)
19f.getvalue() # return whole file's contents as a string
20
21Notes:
22- Using a real file is often faster (but less convenient).
Guido van Rossum98d9fd32000-02-28 15:12:25 +000023- There's also a much faster implementation in C, called cStringIO, but
24 it's not subclassable.
Guido van Rossum4acc25b2000-02-02 15:10:15 +000025- fileno() is left unimplemented so that code which uses it triggers
26 an exception early.
27- Seeking far beyond EOF and then writing will insert real null
28 bytes that occupy space in the buffer.
29- There's a simple test set (see end of this file).
30"""
Barry Warsawc7ed0e32000-12-12 23:12:23 +000031try:
Barry Warsawc1401312000-12-12 23:16:51 +000032 from errno import EINVAL
Barry Warsawc7ed0e32000-12-12 23:12:23 +000033except ImportError:
Barry Warsawc1401312000-12-12 23:16:51 +000034 EINVAL = 22
Barry Warsawc7ed0e32000-12-12 23:12:23 +000035
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000036__all__ = ["StringIO"]
37
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000038def _complain_ifclosed(closed):
39 if closed:
40 raise ValueError, "I/O operation on closed file"
41
Guido van Rossum85d89451994-06-23 11:53:27 +000042class StringIO:
Tim Peters8ac14952002-05-23 15:15:30 +000043 """class StringIO([buffer])
44
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000045 When a StringIO object is created, it can be initialized to an existing
46 string by passing the string to the constructor. If no string is given,
Tim Peters8ac14952002-05-23 15:15:30 +000047 the StringIO will start empty.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000048
49 The StringIO object can accept either Unicode or 8-bit strings, but
50 mixing the two may take some care. If both are used, 8-bit strings that
51 cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters8ac14952002-05-23 15:15:30 +000052 a UnicodeError to be raised when getvalue() is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000053 """
Barry Warsawc1401312000-12-12 23:16:51 +000054 def __init__(self, buf = ''):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000055 # Force self.buf to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +000056 if not isinstance(buf, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000057 buf = str(buf)
58 self.buf = buf
Fred Drakea63bd1c2000-12-13 20:23:11 +000059 self.len = len(buf)
60 self.buflist = []
61 self.pos = 0
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000062 self.closed = False
Fred Drakea63bd1c2000-12-13 20:23:11 +000063 self.softspace = 0
Barry Warsawc1401312000-12-12 23:16:51 +000064
Barry Warsawbdefa0b2001-09-22 04:34:54 +000065 def __iter__(self):
Guido van Rossumc1265bd2003-01-31 16:04:15 +000066 return self
67
68 def next(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000069 """A file object is its own iterator, for example iter(f) returns f
70 (unless f is closed). When a file is used as an iterator, typically
71 in a for loop (for example, for line in f: print line), the next()
72 method is called repeatedly. This method returns the next input line,
73 or raises StopIteration when EOF is hit.
74 """
Walter Dörwald0af5d932006-03-15 08:23:53 +000075 _complain_ifclosed(self.closed)
Guido van Rossumc1265bd2003-01-31 16:04:15 +000076 r = self.readline()
77 if not r:
78 raise StopIteration
79 return r
Barry Warsawbdefa0b2001-09-22 04:34:54 +000080
Barry Warsawc1401312000-12-12 23:16:51 +000081 def close(self):
Tim Peters8ac14952002-05-23 15:15:30 +000082 """Free the memory buffer.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000083 """
Fred Drakea63bd1c2000-12-13 20:23:11 +000084 if not self.closed:
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000085 self.closed = True
Fred Drakea63bd1c2000-12-13 20:23:11 +000086 del self.buf, self.pos
Barry Warsawc1401312000-12-12 23:16:51 +000087
88 def isatty(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000089 """Returns False because StringIO objects are not connected to a
90 tty-like device.
91 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000092 _complain_ifclosed(self.closed)
Guido van Rossum8ca162f2002-04-07 06:36:23 +000093 return False
Barry Warsawc1401312000-12-12 23:16:51 +000094
95 def seek(self, pos, mode = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000096 """Set the file's current position.
97
98 The mode argument is optional and defaults to 0 (absolute file
99 positioning); other values are 1 (seek relative to the current
100 position) and 2 (seek relative to the file's end).
101
102 There is no return value.
103 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000104 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000105 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000106 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000107 self.buflist = []
108 if mode == 1:
109 pos += self.pos
110 elif mode == 2:
111 pos += self.len
112 self.pos = max(0, pos)
Barry Warsawc1401312000-12-12 23:16:51 +0000113
114 def tell(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000115 """Return the file's current position."""
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000116 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000117 return self.pos
Barry Warsawc1401312000-12-12 23:16:51 +0000118
119 def read(self, n = -1):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000120 """Read at most size bytes from the file
121 (less if the read hits EOF before obtaining size bytes).
122
123 If the size argument is negative or omitted, read all data until EOF
124 is reached. The bytes are returned as a string object. An empty
125 string is returned when EOF is encountered immediately.
126 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000127 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000128 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000129 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000130 self.buflist = []
Antoine Pitroue08dd112009-10-14 18:12:54 +0000131 if n is None or n < 0:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000132 newpos = self.len
133 else:
134 newpos = min(self.pos+n, self.len)
135 r = self.buf[self.pos:newpos]
136 self.pos = newpos
137 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000138
139 def readline(self, length=None):
Andrew M. Kuchling88268042007-01-05 14:22:17 +0000140 r"""Read one entire line from the file.
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000141
142 A trailing newline character is kept in the string (but may be absent
143 when a file ends with an incomplete line). If the size argument is
144 present and non-negative, it is a maximum byte count (including the
145 trailing newline) and an incomplete line may be returned.
146
147 An empty string is returned only when EOF is encountered immediately.
148
149 Note: Unlike stdio's fgets(), the returned string contains null
150 characters ('\0') if they occurred in the input.
151 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000152 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000153 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000154 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000155 self.buflist = []
156 i = self.buf.find('\n', self.pos)
157 if i < 0:
158 newpos = self.len
159 else:
160 newpos = i+1
Benjamin Peterson41472162009-12-13 17:29:16 +0000161 if length is not None and length > 0:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000162 if self.pos + length < newpos:
163 newpos = self.pos + length
164 r = self.buf[self.pos:newpos]
165 self.pos = newpos
166 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000167
168 def readlines(self, sizehint = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000169 """Read until EOF using readline() and return a list containing the
170 lines thus read.
171
172 If the optional sizehint argument is present, instead of reading up
173 to EOF, whole lines totalling approximately sizehint bytes (or more
174 to accommodate a final whole line).
175 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000176 total = 0
177 lines = []
178 line = self.readline()
179 while line:
180 lines.append(line)
181 total += len(line)
182 if 0 < sizehint <= total:
183 break
184 line = self.readline()
185 return lines
Barry Warsawc1401312000-12-12 23:16:51 +0000186
187 def truncate(self, size=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000188 """Truncate the file's size.
189
190 If the optional size argument is present, the file is truncated to
191 (at most) that size. The size defaults to the current position.
192 The current file position is not changed unless the position
193 is beyond the new file size.
194
195 If the specified size exceeds the file's current size, the
196 file remains unchanged.
197 """
Tim Peters58eb11c2004-01-18 20:29:55 +0000198 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000199 if size is None:
200 size = self.pos
201 elif size < 0:
202 raise IOError(EINVAL, "Negative size not allowed")
203 elif size < self.pos:
204 self.pos = size
205 self.buf = self.getvalue()[:size]
Raymond Hettinger6065d322004-12-20 23:51:53 +0000206 self.len = size
Barry Warsawc1401312000-12-12 23:16:51 +0000207
208 def write(self, s):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000209 """Write a string to the file.
210
211 There is no return value.
212 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000213 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000214 if not s: return
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000215 # Force s to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +0000216 if not isinstance(s, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000217 s = str(s)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000218 spos = self.pos
219 slen = self.len
220 if spos == slen:
Fred Draked679e092002-09-17 18:10:34 +0000221 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000222 self.len = self.pos = spos + len(s)
Fred Draked679e092002-09-17 18:10:34 +0000223 return
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000224 if spos > slen:
225 self.buflist.append('\0'*(spos - slen))
226 slen = spos
227 newpos = spos + len(s)
228 if spos < slen:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000229 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000230 self.buf += ''.join(self.buflist)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000231 self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
Fred Drakea63bd1c2000-12-13 20:23:11 +0000232 self.buf = ''
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000233 if newpos > slen:
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000234 slen = newpos
Fred Drakea63bd1c2000-12-13 20:23:11 +0000235 else:
236 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000237 slen = newpos
238 self.len = slen
Fred Drakea63bd1c2000-12-13 20:23:11 +0000239 self.pos = newpos
Barry Warsawc1401312000-12-12 23:16:51 +0000240
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000241 def writelines(self, iterable):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000242 """Write a sequence of strings to the file. The sequence can be any
243 iterable object producing strings, typically a list of strings. There
244 is no return value.
245
246 (The name is intended to match readlines(); writelines() does not add
247 line separators.)
248 """
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000249 write = self.write
250 for line in iterable:
251 write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000252
253 def flush(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000254 """Flush the internal buffer
255 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000256 _complain_ifclosed(self.closed)
Barry Warsawc1401312000-12-12 23:16:51 +0000257
258 def getvalue(self):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000259 """
260 Retrieve the entire contents of the "file" at any time before
261 the StringIO object's close() method is called.
262
263 The StringIO object can accept either Unicode or 8-bit strings,
264 but mixing the two may take some care. If both are used, 8-bit
265 strings that cannot be interpreted as 7-bit ASCII (that use the
266 8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters8ac14952002-05-23 15:15:30 +0000267 is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000268 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000269 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000270 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000271 self.buflist = []
272 return self.buf
Guido van Rossum85d89451994-06-23 11:53:27 +0000273
274
275# A little test suite
276
277def test():
Barry Warsawc1401312000-12-12 23:16:51 +0000278 import sys
279 if sys.argv[1:]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000280 file = sys.argv[1]
Barry Warsawc1401312000-12-12 23:16:51 +0000281 else:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000282 file = '/etc/passwd'
Barry Warsawc1401312000-12-12 23:16:51 +0000283 lines = open(file, 'r').readlines()
284 text = open(file, 'r').read()
285 f = StringIO()
286 for line in lines[:-2]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000287 f.write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000288 f.writelines(lines[-2:])
289 if f.getvalue() != text:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000290 raise RuntimeError, 'write failed'
Barry Warsawc1401312000-12-12 23:16:51 +0000291 length = f.tell()
292 print 'File length =', length
293 f.seek(len(lines[0]))
294 f.write(lines[1])
295 f.seek(0)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000296 print 'First line =', repr(f.readline())
Neal Norwitz9fb289d2002-02-11 17:52:18 +0000297 print 'Position =', f.tell()
Barry Warsawc1401312000-12-12 23:16:51 +0000298 line = f.readline()
Walter Dörwald70a6b492004-02-12 17:35:32 +0000299 print 'Second line =', repr(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000300 f.seek(-len(line), 1)
301 line2 = f.read(len(line))
302 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000303 raise RuntimeError, 'bad result after seek back'
Barry Warsawc1401312000-12-12 23:16:51 +0000304 f.seek(len(line2), 1)
305 list = f.readlines()
306 line = list[-1]
307 f.seek(f.tell() - len(line))
308 line2 = f.read()
309 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000310 raise RuntimeError, 'bad result after seek back from EOF'
Barry Warsawc1401312000-12-12 23:16:51 +0000311 print 'Read', len(list), 'more lines'
312 print 'File length =', f.tell()
313 if f.tell() != length:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000314 raise RuntimeError, 'bad length'
Raymond Hettinger6065d322004-12-20 23:51:53 +0000315 f.truncate(length/2)
316 f.seek(0, 2)
317 print 'Truncated length =', f.tell()
318 if f.tell() != length/2:
319 raise RuntimeError, 'truncate did not adjust length'
Barry Warsawc1401312000-12-12 23:16:51 +0000320 f.close()
Guido van Rossum85d89451994-06-23 11:53:27 +0000321
322if __name__ == '__main__':
Barry Warsawc1401312000-12-12 23:16:51 +0000323 test()