blob: 1b9fa7b3daa877620d1de5906171c04788f439c6 [file] [log] [blame]
Walter Dörwaldc2fcf9b2003-12-15 10:16:09 +00001r"""File-like objects that read from or write to a string buffer.
Guido van Rossum4acc25b2000-02-02 15:10:15 +00002
3This implements (nearly) all stdio methods.
4
5f = StringIO() # ready for writing
6f = StringIO(buf) # ready for reading
7f.close() # explicitly release resources held
8flag = f.isatty() # always false
9pos = f.tell() # get current position
10f.seek(pos) # set current position
11f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
12buf = f.read() # read until EOF
13buf = f.read(n) # read up to n bytes
14buf = f.readline() # read until end of line ('\n') or EOF
15list = f.readlines()# list of f.readline() results until EOF
Fred Drakee0a7f4f2000-09-28 04:21:06 +000016f.truncate([size]) # truncate file at to at most size (default: current pos)
Guido van Rossum4acc25b2000-02-02 15:10:15 +000017f.write(buf) # write at current position
18f.writelines(list) # for line in list: f.write(line)
19f.getvalue() # return whole file's contents as a string
20
21Notes:
22- Using a real file is often faster (but less convenient).
Guido van Rossum98d9fd32000-02-28 15:12:25 +000023- There's also a much faster implementation in C, called cStringIO, but
24 it's not subclassable.
Guido van Rossum4acc25b2000-02-02 15:10:15 +000025- fileno() is left unimplemented so that code which uses it triggers
26 an exception early.
27- Seeking far beyond EOF and then writing will insert real null
28 bytes that occupy space in the buffer.
29- There's a simple test set (see end of this file).
30"""
Barry Warsawc7ed0e32000-12-12 23:12:23 +000031try:
Barry Warsawc1401312000-12-12 23:16:51 +000032 from errno import EINVAL
Barry Warsawc7ed0e32000-12-12 23:12:23 +000033except ImportError:
Barry Warsawc1401312000-12-12 23:16:51 +000034 EINVAL = 22
Barry Warsawc7ed0e32000-12-12 23:12:23 +000035
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000036__all__ = ["StringIO"]
37
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000038def _complain_ifclosed(closed):
39 if closed:
40 raise ValueError, "I/O operation on closed file"
41
Guido van Rossum85d89451994-06-23 11:53:27 +000042class StringIO:
Tim Peters8ac14952002-05-23 15:15:30 +000043 """class StringIO([buffer])
44
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000045 When a StringIO object is created, it can be initialized to an existing
46 string by passing the string to the constructor. If no string is given,
Tim Peters8ac14952002-05-23 15:15:30 +000047 the StringIO will start empty.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000048
49 The StringIO object can accept either Unicode or 8-bit strings, but
50 mixing the two may take some care. If both are used, 8-bit strings that
51 cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters8ac14952002-05-23 15:15:30 +000052 a UnicodeError to be raised when getvalue() is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000053 """
Barry Warsawc1401312000-12-12 23:16:51 +000054 def __init__(self, buf = ''):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000055 # Force self.buf to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +000056 if not isinstance(buf, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000057 buf = str(buf)
58 self.buf = buf
Fred Drakea63bd1c2000-12-13 20:23:11 +000059 self.len = len(buf)
60 self.buflist = []
61 self.pos = 0
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000062 self.closed = False
Fred Drakea63bd1c2000-12-13 20:23:11 +000063 self.softspace = 0
Barry Warsawc1401312000-12-12 23:16:51 +000064
Barry Warsawbdefa0b2001-09-22 04:34:54 +000065 def __iter__(self):
Guido van Rossumc1265bd2003-01-31 16:04:15 +000066 return self
67
68 def next(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000069 """A file object is its own iterator, for example iter(f) returns f
70 (unless f is closed). When a file is used as an iterator, typically
71 in a for loop (for example, for line in f: print line), the next()
72 method is called repeatedly. This method returns the next input line,
73 or raises StopIteration when EOF is hit.
74 """
Guido van Rossumc1265bd2003-01-31 16:04:15 +000075 if self.closed:
76 raise StopIteration
77 r = self.readline()
78 if not r:
79 raise StopIteration
80 return r
Barry Warsawbdefa0b2001-09-22 04:34:54 +000081
Barry Warsawc1401312000-12-12 23:16:51 +000082 def close(self):
Tim Peters8ac14952002-05-23 15:15:30 +000083 """Free the memory buffer.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000084 """
Fred Drakea63bd1c2000-12-13 20:23:11 +000085 if not self.closed:
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000086 self.closed = True
Fred Drakea63bd1c2000-12-13 20:23:11 +000087 del self.buf, self.pos
Barry Warsawc1401312000-12-12 23:16:51 +000088
89 def isatty(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000090 """Returns False because StringIO objects are not connected to a
91 tty-like device.
92 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000093 _complain_ifclosed(self.closed)
Guido van Rossum8ca162f2002-04-07 06:36:23 +000094 return False
Barry Warsawc1401312000-12-12 23:16:51 +000095
96 def seek(self, pos, mode = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000097 """Set the file's current position.
98
99 The mode argument is optional and defaults to 0 (absolute file
100 positioning); other values are 1 (seek relative to the current
101 position) and 2 (seek relative to the file's end).
102
103 There is no return value.
104 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000105 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000106 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000107 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000108 self.buflist = []
109 if mode == 1:
110 pos += self.pos
111 elif mode == 2:
112 pos += self.len
113 self.pos = max(0, pos)
Barry Warsawc1401312000-12-12 23:16:51 +0000114
115 def tell(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000116 """Return the file's current position."""
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000117 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000118 return self.pos
Barry Warsawc1401312000-12-12 23:16:51 +0000119
120 def read(self, n = -1):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000121 """Read at most size bytes from the file
122 (less if the read hits EOF before obtaining size bytes).
123
124 If the size argument is negative or omitted, read all data until EOF
125 is reached. The bytes are returned as a string object. An empty
126 string is returned when EOF is encountered immediately.
127 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000128 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000129 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000130 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000131 self.buflist = []
132 if n < 0:
133 newpos = self.len
134 else:
135 newpos = min(self.pos+n, self.len)
136 r = self.buf[self.pos:newpos]
137 self.pos = newpos
138 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000139
140 def readline(self, length=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000141 """Read one entire line from the file.
142
143 A trailing newline character is kept in the string (but may be absent
144 when a file ends with an incomplete line). If the size argument is
145 present and non-negative, it is a maximum byte count (including the
146 trailing newline) and an incomplete line may be returned.
147
148 An empty string is returned only when EOF is encountered immediately.
149
150 Note: Unlike stdio's fgets(), the returned string contains null
151 characters ('\0') if they occurred in the input.
152 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000153 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000154 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000155 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000156 self.buflist = []
157 i = self.buf.find('\n', self.pos)
158 if i < 0:
159 newpos = self.len
160 else:
161 newpos = i+1
162 if length is not None:
163 if self.pos + length < newpos:
164 newpos = self.pos + length
165 r = self.buf[self.pos:newpos]
166 self.pos = newpos
167 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000168
169 def readlines(self, sizehint = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000170 """Read until EOF using readline() and return a list containing the
171 lines thus read.
172
173 If the optional sizehint argument is present, instead of reading up
174 to EOF, whole lines totalling approximately sizehint bytes (or more
175 to accommodate a final whole line).
176 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000177 total = 0
178 lines = []
179 line = self.readline()
180 while line:
181 lines.append(line)
182 total += len(line)
183 if 0 < sizehint <= total:
184 break
185 line = self.readline()
186 return lines
Barry Warsawc1401312000-12-12 23:16:51 +0000187
188 def truncate(self, size=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000189 """Truncate the file's size.
190
191 If the optional size argument is present, the file is truncated to
192 (at most) that size. The size defaults to the current position.
193 The current file position is not changed unless the position
194 is beyond the new file size.
195
196 If the specified size exceeds the file's current size, the
197 file remains unchanged.
198 """
Tim Peters58eb11c2004-01-18 20:29:55 +0000199 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000200 if size is None:
201 size = self.pos
202 elif size < 0:
203 raise IOError(EINVAL, "Negative size not allowed")
204 elif size < self.pos:
205 self.pos = size
206 self.buf = self.getvalue()[:size]
Barry Warsawc1401312000-12-12 23:16:51 +0000207
208 def write(self, s):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000209 """Write a string to the file.
210
211 There is no return value.
212 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000213 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000214 if not s: return
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000215 # Force s to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +0000216 if not isinstance(s, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000217 s = str(s)
Fred Draked679e092002-09-17 18:10:34 +0000218 if self.pos == self.len:
219 self.buflist.append(s)
220 self.len = self.pos = self.pos + len(s)
221 return
Fred Drakea63bd1c2000-12-13 20:23:11 +0000222 if self.pos > self.len:
223 self.buflist.append('\0'*(self.pos - self.len))
224 self.len = self.pos
225 newpos = self.pos + len(s)
226 if self.pos < self.len:
227 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000228 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000229 self.buflist = []
230 self.buflist = [self.buf[:self.pos], s, self.buf[newpos:]]
231 self.buf = ''
232 if newpos > self.len:
233 self.len = newpos
234 else:
235 self.buflist.append(s)
236 self.len = newpos
237 self.pos = newpos
Barry Warsawc1401312000-12-12 23:16:51 +0000238
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000239 def writelines(self, iterable):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000240 """Write a sequence of strings to the file. The sequence can be any
241 iterable object producing strings, typically a list of strings. There
242 is no return value.
243
244 (The name is intended to match readlines(); writelines() does not add
245 line separators.)
246 """
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000247 write = self.write
248 for line in iterable:
249 write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000250
251 def flush(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000252 """Flush the internal buffer
253 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000254 _complain_ifclosed(self.closed)
Barry Warsawc1401312000-12-12 23:16:51 +0000255
256 def getvalue(self):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000257 """
258 Retrieve the entire contents of the "file" at any time before
259 the StringIO object's close() method is called.
260
261 The StringIO object can accept either Unicode or 8-bit strings,
262 but mixing the two may take some care. If both are used, 8-bit
263 strings that cannot be interpreted as 7-bit ASCII (that use the
264 8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters8ac14952002-05-23 15:15:30 +0000265 is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000266 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000267 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000268 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000269 self.buflist = []
270 return self.buf
Guido van Rossum85d89451994-06-23 11:53:27 +0000271
272
273# A little test suite
274
275def test():
Barry Warsawc1401312000-12-12 23:16:51 +0000276 import sys
277 if sys.argv[1:]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000278 file = sys.argv[1]
Barry Warsawc1401312000-12-12 23:16:51 +0000279 else:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000280 file = '/etc/passwd'
Barry Warsawc1401312000-12-12 23:16:51 +0000281 lines = open(file, 'r').readlines()
282 text = open(file, 'r').read()
283 f = StringIO()
284 for line in lines[:-2]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000285 f.write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000286 f.writelines(lines[-2:])
287 if f.getvalue() != text:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000288 raise RuntimeError, 'write failed'
Barry Warsawc1401312000-12-12 23:16:51 +0000289 length = f.tell()
290 print 'File length =', length
291 f.seek(len(lines[0]))
292 f.write(lines[1])
293 f.seek(0)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000294 print 'First line =', repr(f.readline())
Neal Norwitz9fb289d2002-02-11 17:52:18 +0000295 print 'Position =', f.tell()
Barry Warsawc1401312000-12-12 23:16:51 +0000296 line = f.readline()
Walter Dörwald70a6b492004-02-12 17:35:32 +0000297 print 'Second line =', repr(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000298 f.seek(-len(line), 1)
299 line2 = f.read(len(line))
300 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000301 raise RuntimeError, 'bad result after seek back'
Barry Warsawc1401312000-12-12 23:16:51 +0000302 f.seek(len(line2), 1)
303 list = f.readlines()
304 line = list[-1]
305 f.seek(f.tell() - len(line))
306 line2 = f.read()
307 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000308 raise RuntimeError, 'bad result after seek back from EOF'
Barry Warsawc1401312000-12-12 23:16:51 +0000309 print 'Read', len(list), 'more lines'
310 print 'File length =', f.tell()
311 if f.tell() != length:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000312 raise RuntimeError, 'bad length'
Barry Warsawc1401312000-12-12 23:16:51 +0000313 f.close()
Guido van Rossum85d89451994-06-23 11:53:27 +0000314
315if __name__ == '__main__':
Barry Warsawc1401312000-12-12 23:16:51 +0000316 test()