blob: 6422f76647cff5db08482472bf1a724256021475 [file] [log] [blame]
Walter Dörwaldc2fcf9b2003-12-15 10:16:09 +00001r"""File-like objects that read from or write to a string buffer.
Guido van Rossum4acc25b2000-02-02 15:10:15 +00002
3This implements (nearly) all stdio methods.
4
5f = StringIO() # ready for writing
6f = StringIO(buf) # ready for reading
7f.close() # explicitly release resources held
8flag = f.isatty() # always false
9pos = f.tell() # get current position
10f.seek(pos) # set current position
11f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
12buf = f.read() # read until EOF
13buf = f.read(n) # read up to n bytes
14buf = f.readline() # read until end of line ('\n') or EOF
15list = f.readlines()# list of f.readline() results until EOF
Fred Drakee0a7f4f2000-09-28 04:21:06 +000016f.truncate([size]) # truncate file at to at most size (default: current pos)
Guido van Rossum4acc25b2000-02-02 15:10:15 +000017f.write(buf) # write at current position
18f.writelines(list) # for line in list: f.write(line)
19f.getvalue() # return whole file's contents as a string
20
21Notes:
22- Using a real file is often faster (but less convenient).
Guido van Rossum98d9fd32000-02-28 15:12:25 +000023- There's also a much faster implementation in C, called cStringIO, but
24 it's not subclassable.
Guido van Rossum4acc25b2000-02-02 15:10:15 +000025- fileno() is left unimplemented so that code which uses it triggers
26 an exception early.
27- Seeking far beyond EOF and then writing will insert real null
28 bytes that occupy space in the buffer.
29- There's a simple test set (see end of this file).
30"""
Barry Warsawc7ed0e32000-12-12 23:12:23 +000031try:
Barry Warsawc1401312000-12-12 23:16:51 +000032 from errno import EINVAL
Barry Warsawc7ed0e32000-12-12 23:12:23 +000033except ImportError:
Barry Warsawc1401312000-12-12 23:16:51 +000034 EINVAL = 22
Barry Warsawc7ed0e32000-12-12 23:12:23 +000035
Skip Montanaroe99d5ea2001-01-20 19:54:20 +000036__all__ = ["StringIO"]
37
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000038def _complain_ifclosed(closed):
39 if closed:
40 raise ValueError, "I/O operation on closed file"
41
Guido van Rossum85d89451994-06-23 11:53:27 +000042class StringIO:
Tim Peters8ac14952002-05-23 15:15:30 +000043 """class StringIO([buffer])
44
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000045 When a StringIO object is created, it can be initialized to an existing
46 string by passing the string to the constructor. If no string is given,
Tim Peters8ac14952002-05-23 15:15:30 +000047 the StringIO will start empty.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000048
49 The StringIO object can accept either Unicode or 8-bit strings, but
50 mixing the two may take some care. If both are used, 8-bit strings that
51 cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
Tim Peters8ac14952002-05-23 15:15:30 +000052 a UnicodeError to be raised when getvalue() is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000053 """
Barry Warsawc1401312000-12-12 23:16:51 +000054 def __init__(self, buf = ''):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000055 # Force self.buf to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +000056 if not isinstance(buf, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +000057 buf = str(buf)
58 self.buf = buf
Fred Drakea63bd1c2000-12-13 20:23:11 +000059 self.len = len(buf)
60 self.buflist = []
61 self.pos = 0
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000062 self.closed = False
Fred Drakea63bd1c2000-12-13 20:23:11 +000063 self.softspace = 0
Barry Warsawc1401312000-12-12 23:16:51 +000064
Barry Warsawbdefa0b2001-09-22 04:34:54 +000065 def __iter__(self):
Guido van Rossumc1265bd2003-01-31 16:04:15 +000066 return self
67
68 def next(self):
69 if self.closed:
70 raise StopIteration
71 r = self.readline()
72 if not r:
73 raise StopIteration
74 return r
Barry Warsawbdefa0b2001-09-22 04:34:54 +000075
Barry Warsawc1401312000-12-12 23:16:51 +000076 def close(self):
Tim Peters8ac14952002-05-23 15:15:30 +000077 """Free the memory buffer.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +000078 """
Fred Drakea63bd1c2000-12-13 20:23:11 +000079 if not self.closed:
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000080 self.closed = True
Fred Drakea63bd1c2000-12-13 20:23:11 +000081 del self.buf, self.pos
Barry Warsawc1401312000-12-12 23:16:51 +000082
83 def isatty(self):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000084 _complain_ifclosed(self.closed)
Guido van Rossum8ca162f2002-04-07 06:36:23 +000085 return False
Barry Warsawc1401312000-12-12 23:16:51 +000086
87 def seek(self, pos, mode = 0):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000088 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +000089 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +000090 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +000091 self.buflist = []
92 if mode == 1:
93 pos += self.pos
94 elif mode == 2:
95 pos += self.len
96 self.pos = max(0, pos)
Barry Warsawc1401312000-12-12 23:16:51 +000097
98 def tell(self):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +000099 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000100 return self.pos
Barry Warsawc1401312000-12-12 23:16:51 +0000101
102 def read(self, n = -1):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000103 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000104 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000105 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000106 self.buflist = []
107 if n < 0:
108 newpos = self.len
109 else:
110 newpos = min(self.pos+n, self.len)
111 r = self.buf[self.pos:newpos]
112 self.pos = newpos
113 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000114
115 def readline(self, length=None):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000116 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000117 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000118 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000119 self.buflist = []
120 i = self.buf.find('\n', self.pos)
121 if i < 0:
122 newpos = self.len
123 else:
124 newpos = i+1
125 if length is not None:
126 if self.pos + length < newpos:
127 newpos = self.pos + length
128 r = self.buf[self.pos:newpos]
129 self.pos = newpos
130 return r
Barry Warsawc1401312000-12-12 23:16:51 +0000131
132 def readlines(self, sizehint = 0):
Fred Drakea63bd1c2000-12-13 20:23:11 +0000133 total = 0
134 lines = []
135 line = self.readline()
136 while line:
137 lines.append(line)
138 total += len(line)
139 if 0 < sizehint <= total:
140 break
141 line = self.readline()
142 return lines
Barry Warsawc1401312000-12-12 23:16:51 +0000143
144 def truncate(self, size=None):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000145 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000146 if size is None:
147 size = self.pos
148 elif size < 0:
149 raise IOError(EINVAL, "Negative size not allowed")
150 elif size < self.pos:
151 self.pos = size
152 self.buf = self.getvalue()[:size]
Barry Warsawc1401312000-12-12 23:16:51 +0000153
154 def write(self, s):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000155 _complain_ifclosed(self.closed)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000156 if not s: return
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000157 # Force s to be a string or unicode
Walter Dörwald65230a22002-06-03 15:58:32 +0000158 if not isinstance(s, basestring):
Marc-André Lemburgf853be92002-01-06 17:15:05 +0000159 s = str(s)
Fred Draked679e092002-09-17 18:10:34 +0000160 if self.pos == self.len:
161 self.buflist.append(s)
162 self.len = self.pos = self.pos + len(s)
163 return
Fred Drakea63bd1c2000-12-13 20:23:11 +0000164 if self.pos > self.len:
165 self.buflist.append('\0'*(self.pos - self.len))
166 self.len = self.pos
167 newpos = self.pos + len(s)
168 if self.pos < self.len:
169 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000170 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000171 self.buflist = []
172 self.buflist = [self.buf[:self.pos], s, self.buf[newpos:]]
173 self.buf = ''
174 if newpos > self.len:
175 self.len = newpos
176 else:
177 self.buflist.append(s)
178 self.len = newpos
179 self.pos = newpos
Barry Warsawc1401312000-12-12 23:16:51 +0000180
181 def writelines(self, list):
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000182 self.write(''.join(list))
Barry Warsawc1401312000-12-12 23:16:51 +0000183
184 def flush(self):
Martin v. Löwis9e62ff22003-10-18 10:20:42 +0000185 _complain_ifclosed(self.closed)
Barry Warsawc1401312000-12-12 23:16:51 +0000186
187 def getvalue(self):
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000188 """
189 Retrieve the entire contents of the "file" at any time before
190 the StringIO object's close() method is called.
191
192 The StringIO object can accept either Unicode or 8-bit strings,
193 but mixing the two may take some care. If both are used, 8-bit
194 strings that cannot be interpreted as 7-bit ASCII (that use the
195 8th bit) will cause a UnicodeError to be raised when getvalue()
Tim Peters8ac14952002-05-23 15:15:30 +0000196 is called.
Raymond Hettingerd1fa3db2002-05-15 02:56:03 +0000197 """
Fred Drakea63bd1c2000-12-13 20:23:11 +0000198 if self.buflist:
Marc-André Lemburg85d6edf2001-02-09 13:37:37 +0000199 self.buf += ''.join(self.buflist)
Fred Drakea63bd1c2000-12-13 20:23:11 +0000200 self.buflist = []
201 return self.buf
Guido van Rossum85d89451994-06-23 11:53:27 +0000202
203
204# A little test suite
205
206def test():
Barry Warsawc1401312000-12-12 23:16:51 +0000207 import sys
208 if sys.argv[1:]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000209 file = sys.argv[1]
Barry Warsawc1401312000-12-12 23:16:51 +0000210 else:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000211 file = '/etc/passwd'
Barry Warsawc1401312000-12-12 23:16:51 +0000212 lines = open(file, 'r').readlines()
213 text = open(file, 'r').read()
214 f = StringIO()
215 for line in lines[:-2]:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000216 f.write(line)
Barry Warsawc1401312000-12-12 23:16:51 +0000217 f.writelines(lines[-2:])
218 if f.getvalue() != text:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000219 raise RuntimeError, 'write failed'
Barry Warsawc1401312000-12-12 23:16:51 +0000220 length = f.tell()
221 print 'File length =', length
222 f.seek(len(lines[0]))
223 f.write(lines[1])
224 f.seek(0)
225 print 'First line =', `f.readline()`
Neal Norwitz9fb289d2002-02-11 17:52:18 +0000226 print 'Position =', f.tell()
Barry Warsawc1401312000-12-12 23:16:51 +0000227 line = f.readline()
228 print 'Second line =', `line`
229 f.seek(-len(line), 1)
230 line2 = f.read(len(line))
231 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000232 raise RuntimeError, 'bad result after seek back'
Barry Warsawc1401312000-12-12 23:16:51 +0000233 f.seek(len(line2), 1)
234 list = f.readlines()
235 line = list[-1]
236 f.seek(f.tell() - len(line))
237 line2 = f.read()
238 if line != line2:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000239 raise RuntimeError, 'bad result after seek back from EOF'
Barry Warsawc1401312000-12-12 23:16:51 +0000240 print 'Read', len(list), 'more lines'
241 print 'File length =', f.tell()
242 if f.tell() != length:
Fred Drakea63bd1c2000-12-13 20:23:11 +0000243 raise RuntimeError, 'bad length'
Barry Warsawc1401312000-12-12 23:16:51 +0000244 f.close()
Guido van Rossum85d89451994-06-23 11:53:27 +0000245
246if __name__ == '__main__':
Barry Warsawc1401312000-12-12 23:16:51 +0000247 test()