blob: b61714bb689aeb0d0e3e9c6dc9c14a98a8504453 [file] [log] [blame]
Doug Zongkerfc44a512014-08-26 13:10:25 -07001from __future__ import print_function
2import heapq
3import itertools
4
5__all__ = ["RangeSet"]
6
7class RangeSet(object):
8 """A RangeSet represents a set of nonoverlapping ranges on the
9 integers (ie, a set of integers, but efficient when the set contains
10 lots of runs."""
11
12 def __init__(self, data=None):
13 if data:
14 self.data = tuple(self._remove_pairs(data))
15 else:
16 self.data = ()
17
18 def __iter__(self):
19 for i in range(0, len(self.data), 2):
20 yield self.data[i:i+2]
21
22 def __eq__(self, other):
23 return self.data == other.data
24 def __ne__(self, other):
25 return self.data != other.data
26 def __nonzero__(self):
27 return bool(self.data)
28
29 def __str__(self):
30 if not self.data:
31 return "empty"
32 else:
33 return self.to_string()
34
35 @classmethod
36 def parse(cls, text):
37 """Parse a text string consisting of a space-separated list of
38 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to
39 include both their ends (so the above example represents 18
40 individual blocks. Returns a RangeSet object.
41
42 If the input has all its blocks in increasing order, then returned
43 RangeSet will have an extra attribute 'monotonic' that is set to
44 True. For example the input "10-20 30" is monotonic, but the input
45 "15-20 30 10-14" is not, even though they represent the same set
46 of blocks (and the two RangeSets will compare equal with ==).
47 """
48
49 data = []
50 last = -1
51 monotonic = True
52 for p in text.split():
53 if "-" in p:
54 s, e = p.split("-")
55 data.append(int(s))
56 data.append(int(e)+1)
57 if last <= s <= e:
58 last = e
59 else:
60 monotonic = False
61 else:
62 s = int(p)
63 data.append(s)
64 data.append(s+1)
65 if last <= s:
66 last = s+1
67 else:
68 monotonic = True
69 data.sort()
70 r = RangeSet(cls._remove_pairs(data))
71 r.monotonic = monotonic
72 return r
73
74 @staticmethod
75 def _remove_pairs(source):
76 last = None
77 for i in source:
78 if i == last:
79 last = None
80 else:
81 if last is not None:
82 yield last
83 last = i
84 if last is not None:
85 yield last
86
87 def to_string(self):
88 out = []
89 for i in range(0, len(self.data), 2):
90 s, e = self.data[i:i+2]
91 if e == s+1:
92 out.append(str(s))
93 else:
94 out.append(str(s) + "-" + str(e-1))
95 return " ".join(out)
96
97 def to_string_raw(self):
98 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
99
100 def union(self, other):
101 """Return a new RangeSet representing the union of this RangeSet
102 with the argument."""
103 out = []
104 z = 0
105 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
106 zip(other.data, itertools.cycle((+1, -1)))):
107 if (z == 0 and d == 1) or (z == 1 and d == -1):
108 out.append(p)
109 z += d
110 return RangeSet(data=out)
111
112 def intersect(self, other):
113 """Return a new RangeSet representing the intersection of this
114 RangeSet with the argument."""
115 out = []
116 z = 0
117 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
118 zip(other.data, itertools.cycle((+1, -1)))):
119 if (z == 1 and d == 1) or (z == 2 and d == -1):
120 out.append(p)
121 z += d
122 return RangeSet(data=out)
123
124 def subtract(self, other):
125 """Return a new RangeSet representing subtracting the argument
126 from this RangeSet."""
127
128 out = []
129 z = 0
130 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
131 zip(other.data, itertools.cycle((-1, +1)))):
132 if (z == 0 and d == 1) or (z == 1 and d == -1):
133 out.append(p)
134 z += d
135 return RangeSet(data=out)
136
137 def overlaps(self, other):
138 """Returns true if the argument has a nonempty overlap with this
139 RangeSet."""
140
141 # This is like intersect, but we can stop as soon as we discover the
142 # output is going to be nonempty.
143 z = 0
144 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
145 zip(other.data, itertools.cycle((+1, -1)))):
146 if (z == 1 and d == 1) or (z == 2 and d == -1):
147 return True
148 z += d
149 return False
150
151 def size(self):
152 """Returns the total size of the RangeSet (ie, how many integers
153 are in the set)."""
154
155 total = 0
156 for i, p in enumerate(self.data):
157 if i % 2:
158 total += p
159 else:
160 total -= p
161 return total