Blame - Lib/collections.py - platform/external/python/cpython3

2009-03-02 21:24:57 +0000

[diff] [blame]

2

'UserString', 'Counter', 'OrderedDict']

Guido van Rossum

cd16bf6

2007-06-13 18:07:49 +0000

[diff] [blame]

3

# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.

4

# They should however be considered an integral part of collections.py.

5

from _abcoll import *

6

import _abcoll

7

__all__ += _abcoll.__all__

8

Christian Heimes

2007-12-19 02:07:34 +0000

[diff] [blame]

9

from _collections import deque, defaultdict

10

from operator import itemgetter as _itemgetter

11

from keyword import iskeyword as _iskeyword

12

import sys as _sys

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

13

import heapq as _heapq

Raymond Hettinger

798ee1a

2009-03-23 18:29:11 +0000

[diff] [blame]

14

from weakref import proxy as _proxy

Raymond Hettinger

ea9f8db

2009-03-02 21:28:41 +0000

[diff] [blame]

15

from itertools import repeat as _repeat, chain as _chain, starmap as _starmap

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

16

17

################################################################################

18

### OrderedDict

19

################################################################################

20

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

21

class _Link(object):

22

__slots__ = 'prev', 'next', 'key', '__weakref__'

23

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

24

class OrderedDict(dict, MutableMapping):

Raymond Hettinger

2009-03-19 23:14:39 +0000

[diff] [blame]

25

'Dictionary that remembers insertion order'

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

26

# An inherited dict maps keys to values.

Raymond Hettinger

2009-03-19 23:14:39 +0000

[diff] [blame]

27

# The inherited dict provides __getitem__, __len__, __contains__, and get.

28

# The remaining methods are order-aware.

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

29

# Big-O running times for all methods are the same as for regular dictionaries.

30

31

# The internal self.__map dictionary maps keys to links in a doubly linked list.

32

# The circular doubly linked list starts and ends with a sentinel element.

33

# The sentinel element never gets deleted (this simplifies the algorithm).

34

# The prev/next links are weakref proxies (to prevent circular references).

35

# Individual links are kept alive by the hard reference in self.__map.

36

# Those hard references disappear when a key is deleted from an OrderedDict.

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

37

38

def __init__(self, *args, **kwds):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

39

'''Initialize an ordered dictionary. Signature is the same as for

40

regular dictionaries, but keyword arguments are not recommended

41

because their insertion order is arbitrary.

42

43

'''

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

44

if len(args) > 1:

45

raise TypeError('expected at most 1 arguments, got %d' % len(args))

Raymond Hettinger

08c70cf

2009-03-03 20:47:29 +0000

[diff] [blame]

46

try:

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

47

self.__root

Raymond Hettinger

08c70cf

2009-03-03 20:47:29 +0000

[diff] [blame]

48

except AttributeError:

Raymond Hettinger

52dc06b

2009-03-25 22:45:22 +0000

[diff] [blame]

49

self.__root = root = _Link() # sentinel node for the doubly linked list

50

root.prev = root.next = root

51

self.__map = {}

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

52

self.update(*args, **kwds)

53

54

def clear(self):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

55

'od.clear() -> None. Remove all items from od.'

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

56

root = self.__root

57

root.prev = root.next = root

Raymond Hettinger

52dc06b

2009-03-25 22:45:22 +0000

[diff] [blame]

58

self.__map.clear()

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

59

dict.clear(self)

60

61

def __setitem__(self, key, value):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

62

'od.__setitem__(i, y) <==> od[i]=y'

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

63

# Setting a new item creates a new link which goes at the end of the linked

64

# list, and the inherited dictionary is updated with the new key/value pair.

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

65

if key not in self:

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

66

self.__map[key] = link = _Link()

67

root = self.__root

68

last = root.prev

69

link.prev, link.next, link.key = last, root, key

Raymond Hettinger

798ee1a

2009-03-23 18:29:11 +0000

[diff] [blame]

70

last.next = root.prev = _proxy(link)

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

71

dict.__setitem__(self, key, value)

72

73

def __delitem__(self, key):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

74

'od.__delitem__(y) <==> del od[y]'

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

75

# Deleting an existing item uses self.__map to find the link which is

76

# then removed by updating the links in the predecessor and successor nodes.

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

77

dict.__delitem__(self, key)

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

78

link = self.__map.pop(key)

79

link.prev.next = link.next

80

link.next.prev = link.prev

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

81

82

def __iter__(self):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

83

'od.__iter__() <==> iter(od)'

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

84

# Traverse the linked list in order.

85

root = self.__root

86

curr = root.next

87

while curr is not root:

88

yield curr.key

89

curr = curr.next

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

90

91

def __reversed__(self):

Raymond Hettinger

2352cf3

2009-04-08 01:16:27 +0000

[diff] [blame]

92

'od.__reversed__() <==> reversed(od)'

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

93

# Traverse the linked list in reverse order.

94

root = self.__root

95

curr = root.prev

96

while curr is not root:

97

yield curr.key

98

curr = curr.prev

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

99

100

def __reduce__(self):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

101

'Return state information for pickling'

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

102

items = [[k, self[k]] for k in self]

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

103

tmp = self.__map, self.__root

104

del self.__map, self.__root

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

105

inst_dict = vars(self).copy()

Raymond Hettinger

2009-03-23 05:19:21 +0000

[diff] [blame]

106

self.__map, self.__root = tmp

Raymond Hettinger

14b89ff

2009-03-03 22:20:56 +0000

[diff] [blame]

107

if inst_dict:

108

return (self.__class__, (items,), inst_dict)

109

return self.__class__, (items,)

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

110

111

setdefault = MutableMapping.setdefault

112

update = MutableMapping.update

113

pop = MutableMapping.pop

114

keys = MutableMapping.keys

115

values = MutableMapping.values

116

items = MutableMapping.items

117

Raymond Hettinger

2009-03-19 23:14:39 +0000

[diff] [blame]

118

def popitem(self, last=True):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

119

'''od.popitem() -> (k, v), return and remove a (key, value) pair.

120

Pairs are returned in LIFO order if last is true or FIFO order if false.

121

122

'''

Raymond Hettinger

2009-03-19 23:14:39 +0000

[diff] [blame]

123

if not self:

124

raise KeyError('dictionary is empty')

Raymond Hettinger

446a4f2

2009-04-08 08:28:28 +0000

[diff] [blame]

125

key = next(reversed(self) if last else iter(self))

Raymond Hettinger

2009-03-19 23:14:39 +0000

[diff] [blame]

126

value = self.pop(key)

127

return key, value

128

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

129

def __repr__(self):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

130

'od.__repr__() <==> repr(od)'

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

131

if not self:

132

return '%s()' % (self.__class__.__name__,)

133

return '%s(%r)' % (self.__class__.__name__, list(self.items()))

134

135

def copy(self):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

136

'od.copy() -> a shallow copy of od'

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

137

return self.__class__(self)

138

139

@classmethod

140

def fromkeys(cls, iterable, value=None):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

141

'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S

142

and values equal to v (which defaults to None).

143

144

'''

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

d = cls()

for key in iterable:

d[key] = value

return d

def __eq__(self, other):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

151

'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive

152

while comparison to a regular mapping is order-insensitive.

153

154

'''

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

155

if isinstance(other, OrderedDict):

Raymond Hettinger

798ee1a

2009-03-23 18:29:11 +0000

[diff] [blame]

156

return len(self)==len(other) and \

157

all(p==q for p, q in zip(self.items(), other.items()))

Raymond Hettinger

2009-03-02 21:24:57 +0000

[diff] [blame]

158

return dict.__eq__(self, other)

159

Benjamin Peterson

2504b7a

2009-04-04 17:26:32 +0000

[diff] [blame]

160

def __ne__(self, other):

Raymond Hettinger

2009-04-08 01:15:02 +0000

[diff] [blame]

161

'''od.__ne__(y) <==> od!=y. Comparison to another OD is order-sensitive

162

while comparison to a regular mapping is order-insensitive.

163

164

'''

Benjamin Peterson

2504b7a

2009-04-04 17:26:32 +0000

[diff] [blame]

165

return not self == other

166

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

167

Christian Heimes

2007-12-19 02:07:34 +0000

[diff] [blame]

168

Raymond Hettinger

2008-02-05 01:53:00 +0000

[diff] [blame]

169

################################################################################

170

### namedtuple

171

################################################################################

172

Benjamin Peterson

2009-02-10 02:41:10 +0000

[diff] [blame]

173

def namedtuple(typename, field_names, verbose=False, rename=False):

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

174

"""Returns a new subclass of tuple with named fields.

175

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

176

>>> Point = namedtuple('Point', 'x y')

Thomas Wouters

2007-09-19 03:06:30 +0000

[diff] [blame]

177

>>> Point.__doc__ # docstring for the new class

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

178

'Point(x, y)'

Thomas Wouters

2007-09-19 03:06:30 +0000

[diff] [blame]

179

>>> p = Point(11, y=22) # instantiate with positional args or keywords

Christian Heimes

2007-12-19 02:07:34 +0000

[diff] [blame]

180

>>> p[0] + p[1] # indexable like a plain tuple

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

181

33

Christian Heimes

2007-12-19 02:07:34 +0000

[diff] [blame]

182

>>> x, y = p # unpack like a regular tuple

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

183

>>> x, y

184

(11, 22)

Thomas Wouters

2007-09-19 03:06:30 +0000

[diff] [blame]

185

>>> p.x + p.y # fields also accessable by name

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

186

33

Christian Heimes

2007-12-15 01:27:15 +0000

[diff] [blame]

187

>>> d = p._asdict() # convert to a dictionary

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

188

>>> d['x']

189

11

190

>>> Point(**d) # convert from a dictionary

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

191

Point(x=11, y=22)

Christian Heimes

2007-12-15 01:27:15 +0000

[diff] [blame]

192

>>> p._replace(x=100) # _replace() is like str.replace() but targets named fields

Thomas Wouters

2007-09-19 03:06:30 +0000

[diff] [blame]

193

Point(x=100, y=22)

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

"""

Christian Heimes

2008-01-09 00:17:24 +0000

[diff] [blame]

197

# Parse and validate the field names. Validation serves two purposes,

198

# generating informative error messages and preventing template injection attacks.

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

199

if isinstance(field_names, str):

200

field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas

Benjamin Peterson

e9bbc8b

2008-09-28 02:06:32 +0000

[diff] [blame]

201

field_names = tuple(map(str, field_names))

Benjamin Peterson

2009-02-10 02:41:10 +0000

[diff] [blame]

202

if rename:

203

names = list(field_names)

204

seen = set()

205

for i, name in enumerate(names):

206

if (not all(c.isalnum() or c=='_' for c in name) or _iskeyword(name)

207

or not name or name[0].isdigit() or name.startswith('_')

208

or name in seen):

Raymond Hettinger

5614524

2009-04-02 22:31:59 +0000

[diff] [blame]

209

names[i] = '_%d' % i

Benjamin Peterson

2009-02-10 02:41:10 +0000

[diff] [blame]

210

seen.add(name)

211

field_names = tuple(names)

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

212

for name in (typename,) + field_names:

Christian Heimes

b9eccbf

2007-12-05 20:18:38 +0000

[diff] [blame]

213

if not all(c.isalnum() or c=='_' for c in name):

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

214

raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name)

215

if _iskeyword(name):

216

raise ValueError('Type names and field names cannot be a keyword: %r' % name)

217

if name[0].isdigit():

218

raise ValueError('Type names and field names cannot start with a number: %r' % name)

219

seen_names = set()

220

for name in field_names:

Benjamin Peterson

2009-02-10 02:41:10 +0000

[diff] [blame]

221

if name.startswith('_') and not rename:

Christian Heimes

2007-12-15 01:27:15 +0000

[diff] [blame]

222

raise ValueError('Field names cannot start with an underscore: %r' % name)

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

223

if name in seen_names:

224

raise ValueError('Encountered duplicate field name: %r' % name)

225

seen_names.add(name)

226

227

# Create and fill-in the class template

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

228

numfields = len(field_names)

Thomas Wouters

2007-09-19 03:06:30 +0000

[diff] [blame]

229

argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes

Guido van Rossum

2007-05-22 18:11:13 +0000

[diff] [blame]

230

reprtxt = ', '.join('%s=%%r' % name for name in field_names)

231

template = '''class %(typename)s(tuple):

Christian Heimes

2007-12-15 01:27:15 +0000

[diff] [blame]

232

'%(typename)s(%(argtxt)s)' \n

233

__slots__ = () \n

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

234

_fields = %(field_names)r \n

Raymond Hettinger

2009-05-27 00:38:24 +0000

[diff] [blame]

235

def __new__(_cls, %(argtxt)s):

236

return _tuple.__new__(_cls, (%(argtxt)s)) \n

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

237

@classmethod

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

238

def _make(cls, iterable, new=tuple.__new__, len=len):

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

239

'Make a new %(typename)s object from a sequence or iterable'

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

240

result = new(cls, iterable)

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

241

if len(result) != %(numfields)d:

242

raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result))

243

return result \n

Guido van Rossum

2007-05-22 18:11:13 +0000

[diff] [blame]

244

def __repr__(self):

Christian Heimes

2007-12-15 01:27:15 +0000

[diff] [blame]

245

return '%(typename)s(%(reprtxt)s)' %% self \n

Raymond Hettinger

a4f52b1

2009-03-02 22:28:31 +0000

[diff] [blame]

246

def _asdict(self):

247

'Return a new OrderedDict which maps field names to their values'

248

return OrderedDict(zip(self._fields, self)) \n

Raymond Hettinger

2009-05-27 00:38:24 +0000

[diff] [blame]

249

def _replace(_self, **kwds):

Guido van Rossum

3d392eb

2007-11-16 00:35:22 +0000

[diff] [blame]

250

'Return a new %(typename)s object replacing specified fields with new values'

Raymond Hettinger

2009-05-27 00:38:24 +0000

[diff] [blame]

251

result = _self._make(map(kwds.pop, %(field_names)r, _self))

Christian Heimes

2008-01-06 16:59:19 +0000

[diff] [blame]

252

if kwds:

253

raise ValueError('Got unexpected field names: %%r' %% kwds.keys())

Georg Brandl

c28e1fa

2008-06-10 19:20:26 +0000

[diff] [blame]

254

return result \n

255

def __getnewargs__(self):

256

return tuple(self) \n\n''' % locals()

Guido van Rossum

2007-05-22 18:11:13 +0000

[diff] [blame]

257

for i, name in enumerate(field_names):

Raymond Hettinger

2009-05-27 00:38:24 +0000

[diff] [blame]

258

template += ' %s = _property(_itemgetter(%d))\n' % (name, i)

Thomas Wouters

2007-09-19 03:06:30 +0000

[diff] [blame]

259

if verbose:

260

print(template)

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

261

Georg Brandl

f08a9dd

2008-06-10 16:57:31 +0000

[diff] [blame]

262

# Execute the template string in a temporary namespace and

263

# support tracing utilities by setting a value for frame.f_globals['__name__']

Raymond Hettinger

2009-05-27 00:38:24 +0000

[diff] [blame]

264

namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,

265

OrderedDict=OrderedDict, _property=property, _tuple=tuple)

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

266

try:

267

exec(template, namespace)

268

except SyntaxError as e:

Christian Heimes

2007-12-19 02:07:34 +0000

[diff] [blame]

269

raise SyntaxError(e.msg + ':\n' + template) from e

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

270

result = namespace[typename]

271

272

# For pickling to work, the __module__ variable needs to be set to the frame

273

# where the named tuple is created. Bypass this step in enviroments where

Benjamin Peterson

25c95f1

2009-05-08 20:42:26 +0000

[diff] [blame]

274

# sys._getframe is not defined (Jython for example) or sys._getframe is not

275

# defined for arguments greater than 0 (IronPython).

276

try:

Raymond Hettinger

0f05517

2009-01-27 10:06:09 +0000

[diff] [blame]

277

result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')

Benjamin Peterson

25c95f1

2009-05-08 20:42:26 +0000

[diff] [blame]

278

except (AttributeError, ValueError):

279

pass

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

280

Guido van Rossum

2007-05-22 18:11:13 +0000

[diff] [blame]

281

return result

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

282

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

283

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

284

########################################################################

285

### Counter

286

########################################################################

287

288

class Counter(dict):

289

'''Dict subclass for counting hashable items. Sometimes called a bag

290

or multiset. Elements are stored as dictionary keys and their counts

291

are stored as dictionary values.

292

293

>>> c = Counter('abracadabra') # count elements from a string

294

295

>>> c.most_common(3) # three most common elements

296

[('a', 5), ('r', 2), ('b', 2)]

297

>>> sorted(c) # list all unique elements

298

['a', 'b', 'c', 'd', 'r']

299

>>> ''.join(sorted(c.elements())) # list elements with repetitions

300

'aaaaabbcdrr'

301

>>> sum(c.values()) # total of all counts

302

11

303

304

>>> c['a'] # count of letter 'a'

305

5

306

>>> for elem in 'shazam': # update counts from an iterable

307

... c[elem] += 1 # by adding 1 to each element's count

308

>>> c['a'] # now there are seven 'a'

309

7

310

>>> del c['r'] # remove all 'r'

311

>>> c['r'] # now there are zero 'r'

312

0

313

314

>>> d = Counter('simsalabim') # make another counter

315

>>> c.update(d) # add in the second counter

316

>>> c['a'] # now there are nine 'a'

317

9

318

319

>>> c.clear() # empty the counter

>>> c

Counter()

Note: If a count is set to zero or reduced to zero, it will remain

324

in the counter until the entry is deleted or the counter is cleared:

325

326

>>> c = Counter('aaabbc')

327

>>> c['b'] -= 2 # reduce the count of 'b' by two

328

>>> c.most_common() # 'b' is still in, but its count is zero

329

[('a', 3), ('c', 1), ('b', 0)]

'''

# References:

# http://en.wikipedia.org/wiki/Multiset

334

# http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html

335

# http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm

336

# http://code.activestate.com/recipes/259174/

337

# Knuth, TAOCP Vol. II section 4.6.3

338

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

339

def __init__(self, iterable=None, **kwds):

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

340

'''Create a new, empty Counter object. And if given, count elements

341

from an input iterable. Or, initialize the count from another mapping

342

of elements to their counts.

343

344

>>> c = Counter() # a new, empty counter

345

>>> c = Counter('gallahad') # a new counter from an iterable

346

>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

347

>>> c = Counter(a=4, b=2) # a new counter from keyword args

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

348

349

'''

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

350

self.update(iterable, **kwds)

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

351

352

def __missing__(self, key):

353

'The count of elements not in the Counter is zero.'

354

# Needed so that self[missing_item] does not raise KeyError

355

return 0

356

357

def most_common(self, n=None):

358

'''List the n most common elements and their counts from the most

359

common to the least. If n is None, then list all element counts.

360

361

>>> Counter('abracadabra').most_common(3)

362

[('a', 5), ('r', 2), ('b', 2)]

363

364

'''

365

# Emulate Bag.sortedByCount from Smalltalk

366

if n is None:

367

return sorted(self.items(), key=_itemgetter(1), reverse=True)

368

return _heapq.nlargest(n, self.items(), key=_itemgetter(1))

369

370

def elements(self):

371

'''Iterator over elements repeating each as many times as its count.

372

373

>>> c = Counter('ABCABC')

374

>>> sorted(c.elements())

375

['A', 'A', 'B', 'B', 'C', 'C']

376

377

# Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1

378

>>> prime_factors = Counter({2: 2, 3: 3, 17: 1})

379

>>> product = 1

380

>>> for factor in prime_factors.elements(): # loop over factors

381

... product *= factor # and multiply them

>>> product

1836

Note, if an element's count has been set to zero or is a negative

386

number, elements() will ignore it.

387

388

'''

389

# Emulate Bag.do from Smalltalk and Multiset.begin from C++.

390

return _chain.from_iterable(_starmap(_repeat, self.items()))

391

392

# Override dict methods where necessary

393

394

@classmethod

395

def fromkeys(cls, iterable, v=None):

396

# There is no equivalent method for counters because setting v=1

397

# means that no element can have a count greater than one.

398

raise NotImplementedError(

399

'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')

400

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

401

def update(self, iterable=None, **kwds):

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

402

'''Like dict.update() but add counts instead of replacing them.

403

404

Source can be an iterable, a dictionary, or another Counter instance.

405

406

>>> c = Counter('which')

407

>>> c.update('witch') # add elements from another iterable

408

>>> d = Counter('watch')

409

>>> c.update(d) # add elements from another counter

410

>>> c['h'] # four 'h' in which, witch, and watch

4

'''

# The regular dict.update() operation makes no sense here because the

415

# replace behavior results in the some of original untouched counts

416

# being mixed-in with all of the other counts for a mismash that

417

# doesn't have a straight-forward interpretation in most counting

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

418

# contexts. Instead, we implement straight-addition. Both the inputs

419

# and outputs are allowed to contain zero and negative counts.

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

420

421

if iterable is not None:

422

if isinstance(iterable, Mapping):

Raymond Hettinger

dd01f8f

2009-01-22 09:09:55 +0000

[diff] [blame]

423

if self:

424

for elem, count in iterable.items():

425

self[elem] += count

426

else:

427

dict.update(self, iterable) # fast path when counter is empty

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

428

else:

429

for elem in iterable:

430

self[elem] += 1

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

431

if kwds:

432

self.update(kwds)

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

433

434

def copy(self):

435

'Like dict.copy() but returns a Counter instance instead of a dict.'

436

return Counter(self)

437

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

438

def __delitem__(self, elem):

439

'Like dict.__delitem__() but does not raise KeyError for missing values.'

440

if elem in self:

441

dict.__delitem__(self, elem)

442

Raymond Hettinger

2009-01-14 02:20:07 +0000

[diff] [blame]

443

def __repr__(self):

444

if not self:

445

return '%s()' % self.__class__.__name__

446

items = ', '.join(map('%r: %r'.__mod__, self.most_common()))

447

return '%s({%s})' % (self.__class__.__name__, items)

448

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

449

# Multiset-style mathematical operations discussed in:

450

# Knuth TAOCP Volume II section 4.6.3 exercise 19

451

# and at http://en.wikipedia.org/wiki/Multiset

452

#

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

453

# Outputs guaranteed to only include positive counts.

454

#

455

# To strip negative and zero counts, add-in an empty counter:

456

# c += Counter()

457

458

def __add__(self, other):

459

'''Add counts from two counters.

460

461

>>> Counter('abbb') + Counter('bcc')

462

Counter({'b': 4, 'c': 2, 'a': 1})

463

464

'''

465

if not isinstance(other, Counter):

466

return NotImplemented

467

result = Counter()

468

for elem in set(self) | set(other):

469

newcount = self[elem] + other[elem]

470

if newcount > 0:

471

result[elem] = newcount

472

return result

473

474

def __sub__(self, other):

475

''' Subtract count, but keep only results with positive counts.

476

477

>>> Counter('abbbc') - Counter('bccd')

478

Counter({'b': 2, 'a': 1})

479

480

'''

481

if not isinstance(other, Counter):

482

return NotImplemented

483

result = Counter()

Raymond Hettinger

e0d1b9f

2009-01-21 20:36:27 +0000

[diff] [blame]

484

for elem in set(self) | set(other):

485

newcount = self[elem] - other[elem]

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

486

if newcount > 0:

487

result[elem] = newcount

488

return result

489

490

def __or__(self, other):

491

'''Union is the maximum of value in either of the input counters.

492

493

>>> Counter('abbb') | Counter('bcc')

494

Counter({'b': 3, 'c': 2, 'a': 1})

495

496

'''

497

if not isinstance(other, Counter):

498

return NotImplemented

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

499

result = Counter()

500

for elem in set(self) | set(other):

Raymond Hettinger

c479170

2009-04-04 08:48:03 +0000

[diff] [blame]

501

p, q = self[elem], other[elem]

502

newcount = q if p < q else p

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

503

if newcount > 0:

504

result[elem] = newcount

505

return result

506

507

def __and__(self, other):

508

''' Intersection is the minimum of corresponding counts.

509

510

>>> Counter('abbb') & Counter('bcc')

Counter({'b': 1})

'''

if not isinstance(other, Counter):

515

return NotImplemented

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

516

result = Counter()

517

if len(self) < len(other):

518

self, other = other, self

519

for elem in filter(self.__contains__, other):

Raymond Hettinger

c479170

2009-04-04 08:48:03 +0000

[diff] [blame]

520

p, q = self[elem], other[elem]

521

newcount = p if p < q else q

Raymond Hettinger

2009-01-20 03:41:22 +0000

[diff] [blame]

522

if newcount > 0:

523

result[elem] = newcount

524

return result

525

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

526

Raymond Hettinger

2008-02-05 01:53:00 +0000

[diff] [blame]

527

################################################################################

528

### UserDict

529

################################################################################

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

530

Raymond Hettinger

2008-02-05 01:53:00 +0000

[diff] [blame]

531

class UserDict(MutableMapping):

532

533

# Start by filling-out the abstract methods

534

def __init__(self, dict=None, **kwargs):

self.data = {}

if dict is not None:

self.update(dict)

if len(kwargs):

self.update(kwargs)

def __len__(self): return len(self.data)

541

def __getitem__(self, key):

542

if key in self.data:

543

return self.data[key]

544

if hasattr(self.__class__, "__missing__"):

545

return self.__class__.__missing__(self, key)

546

raise KeyError(key)

547

def __setitem__(self, key, item): self.data[key] = item

548

def __delitem__(self, key): del self.data[key]

549

def __iter__(self):

550

return iter(self.data)

551

Raymond Hettinger

554c8b8

2008-02-05 22:54:43 +0000

[diff] [blame]

552

# Modify __contains__ to work correctly when __missing__ is present

553

def __contains__(self, key):

554

return key in self.data

Raymond Hettinger

2008-02-05 01:53:00 +0000

[diff] [blame]

555

556

# Now, add the methods in dicts but not in MutableMapping

557

def __repr__(self): return repr(self.data)

558

def copy(self):

559

if self.__class__ is UserDict:

560

return UserDict(self.data.copy())

import copy

data = self.data

try:

self.data = {}

c = copy.copy(self)

finally:

self.data = data

c.update(self)

return c

@classmethod

def fromkeys(cls, iterable, value=None):

d = cls()

for key in iterable:

d[key] = value

return d

Raymond Hettinger

2008-02-05 01:53:00 +0000

[diff] [blame]

577

578

579

################################################################################

Raymond Hettinger

53dbe39

2008-02-12 20:03:09 +0000

[diff] [blame]

580

### UserList

581

################################################################################

582

583

class UserList(MutableSequence):

584

"""A more or less complete user-defined wrapper around list objects."""

585

def __init__(self, initlist=None):

586

self.data = []

587

if initlist is not None:

588

# XXX should this accept an arbitrary sequence?

589

if type(initlist) == type(self.data):

590

self.data[:] = initlist

591

elif isinstance(initlist, UserList):

592

self.data[:] = initlist.data[:]

593

else:

594

self.data = list(initlist)

595

def __repr__(self): return repr(self.data)

596

def __lt__(self, other): return self.data < self.__cast(other)

597

def __le__(self, other): return self.data <= self.__cast(other)

598

def __eq__(self, other): return self.data == self.__cast(other)

599

def __ne__(self, other): return self.data != self.__cast(other)

600

def __gt__(self, other): return self.data > self.__cast(other)

601

def __ge__(self, other): return self.data >= self.__cast(other)

602

def __cast(self, other):

603

return other.data if isinstance(other, UserList) else other

Raymond Hettinger

53dbe39

2008-02-12 20:03:09 +0000

[diff] [blame]

604

def __contains__(self, item): return item in self.data

605

def __len__(self): return len(self.data)

606

def __getitem__(self, i): return self.data[i]

607

def __setitem__(self, i, item): self.data[i] = item

608

def __delitem__(self, i): del self.data[i]

609

def __add__(self, other):

610

if isinstance(other, UserList):

611

return self.__class__(self.data + other.data)

612

elif isinstance(other, type(self.data)):

613

return self.__class__(self.data + other)

614

return self.__class__(self.data + list(other))

615

def __radd__(self, other):

616

if isinstance(other, UserList):

617

return self.__class__(other.data + self.data)

618

elif isinstance(other, type(self.data)):

619

return self.__class__(other + self.data)

620

return self.__class__(list(other) + self.data)

621

def __iadd__(self, other):

622

if isinstance(other, UserList):

623

self.data += other.data

624

elif isinstance(other, type(self.data)):

625

self.data += other

626

else:

627

self.data += list(other)

628

return self

629

def __mul__(self, n):

630

return self.__class__(self.data*n)

631

__rmul__ = __mul__

632

def __imul__(self, n):

633

self.data *= n

634

return self

635

def append(self, item): self.data.append(item)

636

def insert(self, i, item): self.data.insert(i, item)

637

def pop(self, i=-1): return self.data.pop(i)

638

def remove(self, item): self.data.remove(item)

639

def count(self, item): return self.data.count(item)

640

def index(self, item, *args): return self.data.index(item, *args)

641

def reverse(self): self.data.reverse()

642

def sort(self, *args, **kwds): self.data.sort(*args, **kwds)

643

def extend(self, other):

644

if isinstance(other, UserList):

645

self.data.extend(other.data)

646

else:

647

self.data.extend(other)

################################################################################

Raymond Hettinger

b3a65f8

2008-02-21 22:11:37 +0000

[diff] [blame]

652

### UserString

653

################################################################################

654

655

class UserString(Sequence):

656

def __init__(self, seq):

657

if isinstance(seq, str):

658

self.data = seq

659

elif isinstance(seq, UserString):

660

self.data = seq.data[:]

661

else:

662

self.data = str(seq)

663

def __str__(self): return str(self.data)

664

def __repr__(self): return repr(self.data)

665

def __int__(self): return int(self.data)

Raymond Hettinger

b3a65f8

2008-02-21 22:11:37 +0000

[diff] [blame]

666

def __float__(self): return float(self.data)

667

def __complex__(self): return complex(self.data)

668

def __hash__(self): return hash(self.data)

669

670

def __eq__(self, string):

671

if isinstance(string, UserString):

672

return self.data == string.data

673

return self.data == string

674

def __ne__(self, string):

675

if isinstance(string, UserString):

676

return self.data != string.data

677

return self.data != string

678

def __lt__(self, string):

679

if isinstance(string, UserString):

680

return self.data < string.data

681

return self.data < string

682

def __le__(self, string):

683

if isinstance(string, UserString):

684

return self.data <= string.data

685

return self.data <= string

686

def __gt__(self, string):

687

if isinstance(string, UserString):

688

return self.data > string.data

689

return self.data > string

690

def __ge__(self, string):

691

if isinstance(string, UserString):

692

return self.data >= string.data

693

return self.data >= string

694

695

def __contains__(self, char):

696

if isinstance(char, UserString):

697

char = char.data

698

return char in self.data

699

700

def __len__(self): return len(self.data)

701

def __getitem__(self, index): return self.__class__(self.data[index])

702

def __add__(self, other):

703

if isinstance(other, UserString):

704

return self.__class__(self.data + other.data)

705

elif isinstance(other, str):

706

return self.__class__(self.data + other)

707

return self.__class__(self.data + str(other))

708

def __radd__(self, other):

709

if isinstance(other, str):

710

return self.__class__(other + self.data)

711

return self.__class__(str(other) + self.data)

712

def __mul__(self, n):

713

return self.__class__(self.data*n)

714

__rmul__ = __mul__

715

def __mod__(self, args):

716

return self.__class__(self.data % args)

717

718

# the following methods are defined in alphabetical order:

719

def capitalize(self): return self.__class__(self.data.capitalize())

720

def center(self, width, *args):

721

return self.__class__(self.data.center(width, *args))

722

def count(self, sub, start=0, end=_sys.maxsize):

723

if isinstance(sub, UserString):

724

sub = sub.data

725

return self.data.count(sub, start, end)

726

def encode(self, encoding=None, errors=None): # XXX improve this?

727

if encoding:

728

if errors:

729

return self.__class__(self.data.encode(encoding, errors))

730

return self.__class__(self.data.encode(encoding))

731

return self.__class__(self.data.encode())

732

def endswith(self, suffix, start=0, end=_sys.maxsize):

733

return self.data.endswith(suffix, start, end)

734

def expandtabs(self, tabsize=8):

735

return self.__class__(self.data.expandtabs(tabsize))

736

def find(self, sub, start=0, end=_sys.maxsize):

737

if isinstance(sub, UserString):

738

sub = sub.data

739

return self.data.find(sub, start, end)

740

def format(self, *args, **kwds):

741

return self.data.format(*args, **kwds)

742

def index(self, sub, start=0, end=_sys.maxsize):

743

return self.data.index(sub, start, end)

744

def isalpha(self): return self.data.isalpha()

745

def isalnum(self): return self.data.isalnum()

746

def isdecimal(self): return self.data.isdecimal()

747

def isdigit(self): return self.data.isdigit()

748

def isidentifier(self): return self.data.isidentifier()

749

def islower(self): return self.data.islower()

750

def isnumeric(self): return self.data.isnumeric()

751

def isspace(self): return self.data.isspace()

752

def istitle(self): return self.data.istitle()

753

def isupper(self): return self.data.isupper()

754

def join(self, seq): return self.data.join(seq)

755

def ljust(self, width, *args):

756

return self.__class__(self.data.ljust(width, *args))

757

def lower(self): return self.__class__(self.data.lower())

758

def lstrip(self, chars=None): return self.__class__(self.data.lstrip(chars))

759

def partition(self, sep):

760

return self.data.partition(sep)

761

def replace(self, old, new, maxsplit=-1):

762

if isinstance(old, UserString):

763

old = old.data

764

if isinstance(new, UserString):

765

new = new.data

766

return self.__class__(self.data.replace(old, new, maxsplit))

767

def rfind(self, sub, start=0, end=_sys.maxsize):

768

return self.data.rfind(sub, start, end)

769

def rindex(self, sub, start=0, end=_sys.maxsize):

770

return self.data.rindex(sub, start, end)

771

def rjust(self, width, *args):

772

return self.__class__(self.data.rjust(width, *args))

773

def rpartition(self, sep):

774

return self.data.rpartition(sep)

775

def rstrip(self, chars=None):

776

return self.__class__(self.data.rstrip(chars))

777

def split(self, sep=None, maxsplit=-1):

778

return self.data.split(sep, maxsplit)

779

def rsplit(self, sep=None, maxsplit=-1):

780

return self.data.rsplit(sep, maxsplit)

781

def splitlines(self, keepends=0): return self.data.splitlines(keepends)

782

def startswith(self, prefix, start=0, end=_sys.maxsize):

783

return self.data.startswith(prefix, start, end)

784

def strip(self, chars=None): return self.__class__(self.data.strip(chars))

785

def swapcase(self): return self.__class__(self.data.swapcase())

786

def title(self): return self.__class__(self.data.title())

787

def translate(self, *args):

788

return self.__class__(self.data.translate(*args))

789

def upper(self): return self.__class__(self.data.upper())

790

def zfill(self, width): return self.__class__(self.data.zfill(width))

################################################################################

Raymond Hettinger

2008-02-05 01:53:00 +0000

[diff] [blame]

795

### Simple tests

796

################################################################################

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

797

798

if __name__ == '__main__':

Thomas Wouters

2007-09-19 03:06:30 +0000

[diff] [blame]

799

# verify that instances can be pickled

Guido van Rossum

99603b0

2007-07-20 00:22:32 +0000

[diff] [blame]

800

from pickle import loads, dumps

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

801

Point = namedtuple('Point', 'x, y', True)

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

802

p = Point(x=10, y=20)

803

assert p == loads(dumps(p))

804

Guido van Rossum

3d392eb

2007-11-16 00:35:22 +0000

[diff] [blame]

805

# test and demonstrate ability to override methods

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

806

class Point(namedtuple('Point', 'x y')):

Christian Heimes

2008-01-11 16:17:00 +0000

[diff] [blame]

807

__slots__ = ()

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

808

@property

809

def hypot(self):

810

return (self.x ** 2 + self.y ** 2) ** 0.5

Christian Heimes

790c823

2008-01-07 21:14:23 +0000

[diff] [blame]

811

def __str__(self):

Christian Heimes

2008-01-11 16:17:00 +0000

[diff] [blame]

812

return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

813

Christian Heimes

2008-01-11 16:17:00 +0000

[diff] [blame]

814

for p in Point(3, 4), Point(14, 5/7.):

Christian Heimes

790c823

2008-01-07 21:14:23 +0000

[diff] [blame]

815

print (p)

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

816

817

class Point(namedtuple('Point', 'x y')):

818

'Point class with optimized _make() and _replace() without error-checking'

Christian Heimes

2008-01-11 16:17:00 +0000

[diff] [blame]

819

__slots__ = ()

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

820

_make = classmethod(tuple.__new__)

821

def _replace(self, _map=map, **kwds):

Christian Heimes

2380ac7

2008-01-09 00:17:24 +0000

[diff] [blame]

822

return self._make(_map(kwds.get, ('x', 'y'), self))

Christian Heimes

2008-01-07 17:19:16 +0000

[diff] [blame]

823

824

print(Point(11, 22)._replace(x=100))

Guido van Rossum

3d392eb

2007-11-16 00:35:22 +0000

[diff] [blame]

825

Christian Heimes

2008-01-11 16:17:00 +0000

[diff] [blame]

826

Point3D = namedtuple('Point3D', Point._fields + ('z',))

827

print(Point3D.__doc__)

828

Guido van Rossum

2007-04-27 19:54:29 +0000

[diff] [blame]

829

import doctest

Guido van Rossum

2007-11-01 19:42:39 +0000

[diff] [blame]

830

TestResults = namedtuple('TestResults', 'failed attempted')

Guido van Rossum