blob: 9394360fa784195a45e466f58c0ba0c6c2655743 [file] [log] [blame]
Walter Dörwaldc2fcf9b2003-12-15 10:16:09 +00001r"""File-like objects that read from or write to a string buffer.
Guido van Rossum4acc25b2000-02-02 15:10:15 +00002
3This implements (nearly) all stdio methods.
4
5f = StringIO() # ready for writing
6f = StringIO(buf) # ready for reading
7f.close() # explicitly release resources held
8flag = f.isatty() # always false
9pos = f.tell() # get current position
10f.seek(pos) # set current position
11f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
12buf = f.read() # read until EOF
13buf = f.read(n) # read up to n bytes
14buf = f.readline() # read until end of line ('\n') or EOF
15list = f.readlines()# list of f.readline() results until EOF
Fred Drakee0a7f4f2000-09-28 04:21:06 +000016f.truncate([size]) # truncate file at to at most size (default: current pos)
Guido van Rossum4acc25b2000-02-02 15:10:15 +000017f.write(buf) # write at current position
18f.writelines(list) # for line in list: f.write(line)
19f.getvalue() # return whole file's contents as a string
20
21Notes:
22- Using a real file is often faster (but less convenient).
Guido van Rossum98d9fd32000-02-28 15:12:25 +000023- There's also a much faster implementation in C, called cStringIO, but
24 it's not subclassable.
Guido van Rossum4acc25b2000-02-02 15:10:15 +000025- fileno() is left unimplemented so that code which uses it triggers
26 an exception early.
27- Seeking far beyond EOF and then writing will insert real null
28 bytes that occupy space in the buffer.
29- There's a simple test set (see end of this file).
30"""
Barry Warsawc7ed0e32000-12-12 23:12:23 +000031try:
Barry Warsawc1401312000-12-12 23:16:51 +000032 from errno import EINVAL
Barry Warsawc7ed0e32000-12-12 23:12:23 +000033except ImportError:
Barry Warsawc1401312000-12-12 23:16:51 +000034 EINVAL = 22
Barry Warsawc7ed0e32000-12-12 23:12:23 +000035
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000036__all__ = ["StringIO"]
37
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000038def _complain_ifclosed(closed):
39 if closed:
40 raise ValueError, "I/O operation on closed file"
41
Guido van Rossum85d89451994-06-23 11:53:27 +000042class StringIO:
Tim Peters8ac14952002-05-23 15:15:30 +000043 """class StringIO([buffer])
44
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000045 When a StringIO object is created, it can be initialized to an existing
46 string by passing the string to the constructor. If no string is given,
Tim Peters8ac14952002-05-23 15:15:30 +000047 the StringIO will start empty.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000048
49 The StringIO object can accept either Unicode or 8-bit strings, but
50 mixing the two may take some care. If both are used, 8-bit strings that
51 cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters8ac14952002-05-23 15:15:30 +000052 a UnicodeError to be raised when getvalue() is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000053 """
Barry Warsawc1401312000-12-12 23:16:51 +000054 def __init__(self, buf = ''):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000055 # Force self.buf to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +000056 if not isinstance(buf, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000057 buf = str(buf)
58 self.buf = buf
Fred Drakea63bd1c2000-12-13 20:23:11 +000059 self.len = len(buf)
60 self.buflist = []
61 self.pos = 0
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000062 self.closed = False
Fred Drakea63bd1c2000-12-13 20:23:11 +000063 self.softspace = 0
Barry Warsawc1401312000-12-12 23:16:51 +000064
Barry Warsawbdefa0b2001-09-22 04:34:54 +000065 def __iter__(self):
Guido van Rossumc1265bd2003-01-31 16:04:15 +000066 return self
67
68 def next(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000069 """A file object is its own iterator, for example iter(f) returns f
70 (unless f is closed). When a file is used as an iterator, typically
71 in a for loop (for example, for line in f: print line), the next()
72 method is called repeatedly. This method returns the next input line,
73 or raises StopIteration when EOF is hit.
74 """
Thomas Woutersa9773292006-04-21 09:43:23 +000075 _complain_ifclosed(self.closed)
Guido van Rossumc1265bd2003-01-31 16:04:15 +000076 r = self.readline()
77 if not r:
78 raise StopIteration
79 return r
Barry Warsawbdefa0b2001-09-22 04:34:54 +000080
Barry Warsawc1401312000-12-12 23:16:51 +000081 def close(self):
Tim Peters8ac14952002-05-23 15:15:30 +000082 """Free the memory buffer.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000083 """
Fred Drakea63bd1c2000-12-13 20:23:11 +000084 if not self.closed:
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000085 self.closed = True
Fred Drakea63bd1c2000-12-13 20:23:11 +000086 del self.buf, self.pos
Barry Warsawc1401312000-12-12 23:16:51 +000087
88 def isatty(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000089 """Returns False because StringIO objects are not connected to a
90 tty-like device.
91 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000092 _complain_ifclosed(self.closed)
Guido van Rossum8ca162f2002-04-07 06:36:23 +000093 return False
Barry Warsawc1401312000-12-12 23:16:51 +000094
95 def seek(self, pos, mode = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +000096 """Set the file's current position.
97
98 The mode argument is optional and defaults to 0 (absolute file
99 positioning); other values are 1 (seek relative to the current
100 position) and 2 (seek relative to the file's end).
101
102 There is no return value.
103 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000104 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000105 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000106 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000107 self.buflist = []
108 if mode == 1:
109 pos += self.pos
110 elif mode == 2:
111 pos += self.len
112 self.pos = max(0, pos)
Barry Warsawc1401312000-12-12 23:16:51 +0000113
114 def tell(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000115 """Return the file's current position."""
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000116 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000117 return self.pos
Barry Warsawc1401312000-12-12 23:16:51 +0000118
Guido van Rossum47b9ff62006-08-24 00:41:19 +0000119 def read(self, n=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000120 """Read at most size bytes from the file
121 (less if the read hits EOF before obtaining size bytes).
122
123 If the size argument is negative or omitted, read all data until EOF
124 is reached. The bytes are returned as a string object. An empty
125 string is returned when EOF is encountered immediately.
126 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000127 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000128 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000129 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000130 self.buflist = []
Guido van Rossum47b9ff62006-08-24 00:41:19 +0000131 if n is None:
132 n = -1
Fred Drakea63bd1c2000-12-13 20:23:11 +0000133 if n < 0:
134 newpos = self.len
135 else:
136 newpos = min(self.pos+n, self.len)
137 r = self.buf[self.pos:newpos]
138 self.pos = newpos
139 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000140
141 def readline(self, length=None):
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000142 r"""Read one entire line from the file.
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000143
144 A trailing newline character is kept in the string (but may be absent
145 when a file ends with an incomplete line). If the size argument is
146 present and non-negative, it is a maximum byte count (including the
147 trailing newline) and an incomplete line may be returned.
148
149 An empty string is returned only when EOF is encountered immediately.
150
151 Note: Unlike stdio's fgets(), the returned string contains null
152 characters ('\0') if they occurred in the input.
153 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000154 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000155 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000156 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000157 self.buflist = []
158 i = self.buf.find('\n', self.pos)
159 if i < 0:
160 newpos = self.len
161 else:
162 newpos = i+1
163 if length is not None:
164 if self.pos + length < newpos:
165 newpos = self.pos + length
166 r = self.buf[self.pos:newpos]
167 self.pos = newpos
168 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000169
170 def readlines(self, sizehint = 0):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000171 """Read until EOF using readline() and return a list containing the
172 lines thus read.
173
174 If the optional sizehint argument is present, instead of reading up
175 to EOF, whole lines totalling approximately sizehint bytes (or more
176 to accommodate a final whole line).
177 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000178 total = 0
179 lines = []
180 line = self.readline()
181 while line:
182 lines.append(line)
183 total += len(line)
184 if 0 < sizehint <= total:
185 break
186 line = self.readline()
187 return lines
Barry Warsawc1401312000-12-12 23:16:51 +0000188
189 def truncate(self, size=None):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000190 """Truncate the file's size.
191
192 If the optional size argument is present, the file is truncated to
193 (at most) that size. The size defaults to the current position.
194 The current file position is not changed unless the position
195 is beyond the new file size.
196
197 If the specified size exceeds the file's current size, the
198 file remains unchanged.
199 """
Tim Peters58eb11c2004-01-18 20:29:55 +0000200 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000201 if size is None:
202 size = self.pos
203 elif size < 0:
204 raise IOError(EINVAL, "Negative size not allowed")
205 elif size < self.pos:
206 self.pos = size
207 self.buf = self.getvalue()[:size]
Raymond Hettinger6065d322004-12-20 23:51:53 +0000208 self.len = size
Barry Warsawc1401312000-12-12 23:16:51 +0000209
210 def write(self, s):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000211 """Write a string to the file.
212
213 There is no return value.
214 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000215 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000216 if not s: return
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000217 # Force s to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +0000218 if not isinstance(s, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000219 s = str(s)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000220 spos = self.pos
221 slen = self.len
222 if spos == slen:
Fred Draked679e092002-09-17 18:10:34 +0000223 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000224 self.len = self.pos = spos + len(s)
Fred Draked679e092002-09-17 18:10:34 +0000225 return
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000226 if spos > slen:
227 self.buflist.append('\0'*(spos - slen))
228 slen = spos
229 newpos = spos + len(s)
230 if spos < slen:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000231 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000232 self.buf += ''.join(self.buflist)
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000233 self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
Fred Drakea63bd1c2000-12-13 20:23:11 +0000234 self.buf = ''
Raymond Hettinger0336e1f2004-09-23 06:43:25 +0000235 if newpos > slen:
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000236 slen = newpos
Fred Drakea63bd1c2000-12-13 20:23:11 +0000237 else:
238 self.buflist.append(s)
Raymond Hettinger513c8bd2004-09-23 07:00:47 +0000239 slen = newpos
240 self.len = slen
Fred Drakea63bd1c2000-12-13 20:23:11 +0000241 self.pos = newpos
Barry Warsawc1401312000-12-12 23:16:51 +0000242
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000243 def writelines(self, iterable):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000244 """Write a sequence of strings to the file. The sequence can be any
245 iterable object producing strings, typically a list of strings. There
246 is no return value.
247
248 (The name is intended to match readlines(); writelines() does not add
249 line separators.)
250 """
Raymond Hettinger6ec09962004-03-08 18:17:31 +0000251 write = self.write
252 for line in iterable:
253 write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000254
255 def flush(self):
Raymond Hettingerdeb4da52004-03-14 07:54:37 +0000256 """Flush the internal buffer
257 """
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000258 _complain_ifclosed(self.closed)
Barry Warsawc1401312000-12-12 23:16:51 +0000259
260 def getvalue(self):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000261 """
262 Retrieve the entire contents of the "file" at any time before
263 the StringIO object's close() method is called.
264
265 The StringIO object can accept either Unicode or 8-bit strings,
266 but mixing the two may take some care. If both are used, 8-bit
267 strings that cannot be interpreted as 7-bit ASCII (that use the
268 8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters8ac14952002-05-23 15:15:30 +0000269 is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000270 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000271 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000272 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000273 self.buflist = []
274 return self.buf
Guido van Rossum85d89451994-06-23 11:53:27 +0000275
276
277# A little test suite
278
279def test():
Barry Warsawc1401312000-12-12 23:16:51 +0000280 import sys
281 if sys.argv[1:]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000282 file = sys.argv[1]
Barry Warsawc1401312000-12-12 23:16:51 +0000283 else:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000284 file = '/etc/passwd'
Barry Warsawc1401312000-12-12 23:16:51 +0000285 lines = open(file, 'r').readlines()
286 text = open(file, 'r').read()
287 f = StringIO()
288 for line in lines[:-2]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000289 f.write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000290 f.writelines(lines[-2:])
291 if f.getvalue() != text:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000292 raise RuntimeError, 'write failed'
Barry Warsawc1401312000-12-12 23:16:51 +0000293 length = f.tell()
294 print 'File length =', length
295 f.seek(len(lines[0]))
296 f.write(lines[1])
297 f.seek(0)
Walter Dörwald70a6b492004-02-12 17:35:32 +0000298 print 'First line =', repr(f.readline())
Neal Norwitz9fb289d2002-02-11 17:52:18 +0000299 print 'Position =', f.tell()
Barry Warsawc1401312000-12-12 23:16:51 +0000300 line = f.readline()
Walter Dörwald70a6b492004-02-12 17:35:32 +0000301 print 'Second line =', repr(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000302 f.seek(-len(line), 1)
303 line2 = f.read(len(line))
304 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000305 raise RuntimeError, 'bad result after seek back'
Barry Warsawc1401312000-12-12 23:16:51 +0000306 f.seek(len(line2), 1)
307 list = f.readlines()
308 line = list[-1]
309 f.seek(f.tell() - len(line))
310 line2 = f.read()
311 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000312 raise RuntimeError, 'bad result after seek back from EOF'
Barry Warsawc1401312000-12-12 23:16:51 +0000313 print 'Read', len(list), 'more lines'
314 print 'File length =', f.tell()
315 if f.tell() != length:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000316 raise RuntimeError, 'bad length'
Raymond Hettinger6065d322004-12-20 23:51:53 +0000317 f.truncate(length/2)
318 f.seek(0, 2)
319 print 'Truncated length =', f.tell()
320 if f.tell() != length/2:
321 raise RuntimeError, 'truncate did not adjust length'
Barry Warsawc1401312000-12-12 23:16:51 +0000322 f.close()
Guido van Rossum85d89451994-06-23 11:53:27 +0000323
324if __name__ == '__main__':
Barry Warsawc1401312000-12-12 23:16:51 +0000325 test()