blob: 373bbed1d5fcb23937c556e202fe3576082758b0 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22 """A RangeSet represents a set of nonoverlapping ranges on the
23 integers (ie, a set of integers, but efficient when the set contains
24 lots of runs."""
25
26 def __init__(self, data=None):
Tao Bao937847a2015-08-25 15:10:10 -070027 # TODO(tbao): monotonic is broken when passing in a tuple.
Dan Albert8b72aef2015-03-23 19:13:21 -070028 self.monotonic = False
Doug Zongker846cb3a2014-09-09 10:55:36 -070029 if isinstance(data, str):
30 self._parse_internal(data)
31 elif data:
Doug Zongkerfc44a512014-08-26 13:10:25 -070032 self.data = tuple(self._remove_pairs(data))
33 else:
34 self.data = ()
35
36 def __iter__(self):
37 for i in range(0, len(self.data), 2):
38 yield self.data[i:i+2]
39
40 def __eq__(self, other):
41 return self.data == other.data
42 def __ne__(self, other):
43 return self.data != other.data
44 def __nonzero__(self):
45 return bool(self.data)
46
47 def __str__(self):
48 if not self.data:
49 return "empty"
50 else:
51 return self.to_string()
52
Doug Zongker846cb3a2014-09-09 10:55:36 -070053 def __repr__(self):
54 return '<RangeSet("' + self.to_string() + '")>'
55
Doug Zongkerfc44a512014-08-26 13:10:25 -070056 @classmethod
57 def parse(cls, text):
58 """Parse a text string consisting of a space-separated list of
59 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to
60 include both their ends (so the above example represents 18
61 individual blocks. Returns a RangeSet object.
62
63 If the input has all its blocks in increasing order, then returned
64 RangeSet will have an extra attribute 'monotonic' that is set to
65 True. For example the input "10-20 30" is monotonic, but the input
66 "15-20 30 10-14" is not, even though they represent the same set
67 of blocks (and the two RangeSets will compare equal with ==).
68 """
Doug Zongker846cb3a2014-09-09 10:55:36 -070069 return cls(text)
Doug Zongkerfc44a512014-08-26 13:10:25 -070070
Doug Zongker846cb3a2014-09-09 10:55:36 -070071 def _parse_internal(self, text):
Doug Zongkerfc44a512014-08-26 13:10:25 -070072 data = []
73 last = -1
74 monotonic = True
75 for p in text.split():
76 if "-" in p:
77 s, e = p.split("-")
78 data.append(int(s))
79 data.append(int(e)+1)
80 if last <= s <= e:
81 last = e
82 else:
83 monotonic = False
84 else:
85 s = int(p)
86 data.append(s)
87 data.append(s+1)
88 if last <= s:
89 last = s+1
90 else:
91 monotonic = True
92 data.sort()
Doug Zongker846cb3a2014-09-09 10:55:36 -070093 self.data = tuple(self._remove_pairs(data))
94 self.monotonic = monotonic
Doug Zongkerfc44a512014-08-26 13:10:25 -070095
96 @staticmethod
97 def _remove_pairs(source):
98 last = None
99 for i in source:
100 if i == last:
101 last = None
102 else:
103 if last is not None:
104 yield last
105 last = i
106 if last is not None:
107 yield last
108
109 def to_string(self):
110 out = []
111 for i in range(0, len(self.data), 2):
112 s, e = self.data[i:i+2]
113 if e == s+1:
114 out.append(str(s))
115 else:
116 out.append(str(s) + "-" + str(e-1))
117 return " ".join(out)
118
119 def to_string_raw(self):
120 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
121
122 def union(self, other):
123 """Return a new RangeSet representing the union of this RangeSet
Doug Zongker846cb3a2014-09-09 10:55:36 -0700124 with the argument.
125
126 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
127 <RangeSet("10-34")>
128 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
129 <RangeSet("10-19 22 30-34")>
130 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700131 out = []
132 z = 0
133 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
134 zip(other.data, itertools.cycle((+1, -1)))):
135 if (z == 0 and d == 1) or (z == 1 and d == -1):
136 out.append(p)
137 z += d
138 return RangeSet(data=out)
139
140 def intersect(self, other):
141 """Return a new RangeSet representing the intersection of this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700142 RangeSet with the argument.
143
144 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
145 <RangeSet("18-19 30-32")>
146 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
147 <RangeSet("")>
148 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700149 out = []
150 z = 0
151 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
152 zip(other.data, itertools.cycle((+1, -1)))):
153 if (z == 1 and d == 1) or (z == 2 and d == -1):
154 out.append(p)
155 z += d
156 return RangeSet(data=out)
157
158 def subtract(self, other):
159 """Return a new RangeSet representing subtracting the argument
Doug Zongker846cb3a2014-09-09 10:55:36 -0700160 from this RangeSet.
161
162 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
163 <RangeSet("10-17 33-34")>
164 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
165 <RangeSet("10-19 30-34")>
166 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700167
168 out = []
169 z = 0
170 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
171 zip(other.data, itertools.cycle((-1, +1)))):
172 if (z == 0 and d == 1) or (z == 1 and d == -1):
173 out.append(p)
174 z += d
175 return RangeSet(data=out)
176
177 def overlaps(self, other):
178 """Returns true if the argument has a nonempty overlap with this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700179 RangeSet.
180
181 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
182 True
183 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
184 False
185 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700186
187 # This is like intersect, but we can stop as soon as we discover the
188 # output is going to be nonempty.
189 z = 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700190 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
Doug Zongkerfc44a512014-08-26 13:10:25 -0700191 zip(other.data, itertools.cycle((+1, -1)))):
192 if (z == 1 and d == 1) or (z == 2 and d == -1):
193 return True
194 z += d
195 return False
196
197 def size(self):
198 """Returns the total size of the RangeSet (ie, how many integers
Doug Zongker846cb3a2014-09-09 10:55:36 -0700199 are in the set).
200
201 >>> RangeSet("10-19 30-34").size()
202 15
203 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700204
205 total = 0
206 for i, p in enumerate(self.data):
207 if i % 2:
208 total += p
209 else:
210 total -= p
211 return total
Doug Zongker62338182014-09-08 08:29:55 -0700212
213 def map_within(self, other):
214 """'other' should be a subset of 'self'. Returns a RangeSet
215 representing what 'other' would get translated to if the integers
216 of 'self' were translated down to be contiguous starting at zero.
217
Doug Zongker846cb3a2014-09-09 10:55:36 -0700218 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
219 <RangeSet("3-4")>
220 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
221 <RangeSet("3-4")>
222 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
223 <RangeSet("7-12")>
224 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
225 <RangeSet("2-3 7-12")>
Doug Zongker62338182014-09-08 08:29:55 -0700226 """
227
228 out = []
229 offset = 0
230 start = None
231 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
232 zip(other.data, itertools.cycle((-1, +1)))):
233 if d == -5:
234 start = p
235 elif d == +5:
236 offset += p-start
237 start = None
238 else:
239 out.append(offset + p - start)
240 return RangeSet(data=out)
241
Tao Bao2fd2c9b2015-07-09 17:37:49 -0700242 def extend(self, n):
243 """Extend the RangeSet by 'n' blocks.
244
245 The lower bound is guaranteed to be non-negative.
246
247 >>> RangeSet("0-9").extend(1)
248 <RangeSet("0-10")>
249 >>> RangeSet("10-19").extend(15)
250 <RangeSet("0-34")>
251 >>> RangeSet("10-19 30-39").extend(4)
252 <RangeSet("6-23 26-43")>
253 >>> RangeSet("10-19 30-39").extend(10)
254 <RangeSet("0-49")>
255 """
256 out = self
257 for i in range(0, len(self.data), 2):
258 s, e = self.data[i:i+2]
259 s1 = max(0, s - n)
260 e1 = e + n
261 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
262 return out
263
Tao Bao937847a2015-08-25 15:10:10 -0700264 def first(self, n):
265 """Return the RangeSet that contains at most the first 'n' integers.
266
267 >>> RangeSet("0-9").first(1)
268 <RangeSet("0")>
269 >>> RangeSet("10-19").first(5)
270 <RangeSet("10-14")>
271 >>> RangeSet("10-19").first(15)
272 <RangeSet("10-19")>
273 >>> RangeSet("10-19 30-39").first(3)
274 <RangeSet("10-12")>
275 >>> RangeSet("10-19 30-39").first(15)
276 <RangeSet("10-19 30-34")>
277 >>> RangeSet("10-19 30-39").first(30)
278 <RangeSet("10-19 30-39")>
279 >>> RangeSet("0-9").first(0)
280 <RangeSet("")>
281 """
282
283 if self.size() <= n:
284 return self
285
286 out = []
287 for s, e in self:
288 if e - s >= n:
289 out += (s, s+n)
290 break
291 else:
292 out += (s, e)
293 n -= e - s
294 return RangeSet(data=out)
295
Doug Zongker62338182014-09-08 08:29:55 -0700296
297if __name__ == "__main__":
298 import doctest
299 doctest.testmod()