blob: de34d172f68ace858cc231f6939f4607b8c9082f [file] [log] [blame]
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -04001/*
Behdad Esfahboddeed4a42017-10-15 16:53:09 +02002 * Copyright © 2012,2017 Google, Inc.
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -04003 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_PRIVATE_HH
28#define HB_SET_PRIVATE_HH
29
30#include "hb-private.hh"
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -040031#include "hb-object-private.hh"
32
33
Behdad Esfahbod0edd0fd2013-04-17 17:26:56 -040034/*
Behdad Esfahbod0d5798a2013-04-17 18:19:21 -040035 * hb_set_t
36 */
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -040037
Behdad Esfahbod1bc1cb32012-06-16 15:21:55 -040038struct hb_set_t
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -040039{
Behdad Esfahboddeed4a42017-10-15 16:53:09 +020040 struct page_map_t
41 {
42 inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; }
43
44 uint32_t major;
45 uint32_t index;
46 };
47
48 struct page_t
49 {
50 inline void init (void) {
51 memset (&v, 0, sizeof (v));
52 }
53
54 inline unsigned int len (void) const
55 { return ARRAY_LENGTH_CONST (v); }
56
57 inline bool is_empty (void) const
58 {
59 for (unsigned int i = 0; i < len (); i++)
60 if (v[i])
61 return false;
62 return true;
63 }
64
65 inline void add (hb_codepoint_t g) { elt (g) |= mask (g); }
66 inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
67 inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); }
68
69 inline bool is_equal (const page_t *other) const
70 {
71 return 0 == memcmp (&v, &other->v, sizeof (v));
72 }
73
74 inline unsigned int get_population (void) const
75 {
76 unsigned int pop = 0;
77 for (unsigned int i = 0; i < len (); i++)
78 pop += _hb_popcount (v[i]);
79 return pop;
80 }
81
82 inline bool next (hb_codepoint_t *codepoint) const
83 {
84 unsigned int m = (*codepoint + 1) & MASK;
85 if (!m)
86 {
87 *codepoint = INVALID;
88 return false;
89 }
90 unsigned int i = m / ELT_BITS;
91 unsigned int j = m & ELT_MASK;
92
93 for (; j < ELT_BITS; j++)
94 if (v[i] & (elt_t (1) << j))
95 goto found;
96 for (i++; i < len (); i++)
97 if (v[i])
98 for (unsigned int j = 0; j < ELT_BITS; j++)
99 if (v[i] & (elt_t (1) << j))
100 goto found;
101
102 *codepoint = INVALID;
103 return false;
104
105 found:
106 *codepoint = i * ELT_BITS + j;
107 return true;
108 }
109 inline hb_codepoint_t get_min (void) const
110 {
111 for (unsigned int i = 0; i < len (); i++)
112 if (v[i])
113 {
114 elt_t e = v[i];
115 for (unsigned int j = 0; j < ELT_BITS; j++)
116 if (e & (elt_t (1) << j))
117 return i * ELT_BITS + j;
118 }
119 return INVALID;
120 }
121 inline hb_codepoint_t get_max (void) const
122 {
123 for (int i = len () - 1; i >= 0; i--)
124 if (v[i])
125 {
126 elt_t e = v[i];
127 for (int j = ELT_BITS - 1; j >= 0; j--)
128 if (e & (elt_t (1) << j))
129 return i * ELT_BITS + j;
130 }
131 return 0;
132 }
133
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200134 static const unsigned int PAGE_BITS = 512; /* Use to tune. */
135 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
136
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400137 typedef uint64_t elt_t;
138
Behdad Esfahbodbb991792017-10-15 18:20:25 -0400139#if 0 && HAVE_VECTOR_SIZE
140 /* The vectorized version does not work with clang as non-const
Behdad Esfahbodab8f3272017-10-15 18:21:35 -0400141 * elt() errs "non-const reference cannot bind to vector element". */
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200142 typedef elt_t vector_t __attribute__((vector_size (PAGE_BITS / 8)));
143#else
Behdad Esfahbod7737e872017-10-15 16:21:03 -0400144 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200145#endif
146
147 vector_t v;
148
149 static const unsigned int ELT_BITS = sizeof (elt_t) * 8;
150 static const unsigned int ELT_MASK = ELT_BITS - 1;
Behdad Esfahbodba8b5692017-10-17 11:16:36 -0700151 static const unsigned int BITS = sizeof (vector_t) * 8;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200152 static const unsigned int MASK = BITS - 1;
153 static_assert (PAGE_BITS == BITS, "");
154
155 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
156 elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
157 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); }
158 };
Behdad Esfahbodd0f0ff82017-10-23 08:34:30 -0400159 static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, "");
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200160
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400161 hb_object_header_t header;
162 ASSERT_POD ();
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600163 bool in_error;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200164 hb_prealloced_array_t<page_map_t, 8> page_map;
165 hb_prealloced_array_t<page_t, 8> pages;
166
167 inline bool resize (unsigned int count)
168 {
169 if (unlikely (in_error)) return false;
170 if (!pages.resize (count) || !page_map.resize (count))
171 {
172 pages.resize (page_map.len);
173 in_error = true;
174 return false;
175 }
176 return true;
177 }
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400178
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400179 inline void clear (void) {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600180 if (unlikely (hb_object_is_inert (this)))
181 return;
182 in_error = false;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200183 page_map.resize (0);
184 pages.resize (0);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400185 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800186 inline bool is_empty (void) const {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200187 unsigned int count = pages.len;
188 for (unsigned int i = 0; i < count; i++)
189 if (!pages[i].is_empty ())
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400190 return false;
191 return true;
192 }
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200193
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400194 inline void add (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400195 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600196 if (unlikely (in_error)) return;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400197 if (unlikely (g == INVALID)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200198 page_t *page = page_for_insert (g);
199 if (!page)
200 return;
201 page->add (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400202 }
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400203 inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
204 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600205 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800206 /* TODO Speedup */
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400207 for (unsigned int i = a; i < b + 1; i++)
208 add (i);
209 }
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400210 inline void del (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400211 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600212 if (unlikely (in_error)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200213 page_t *p = page_for (g);
214 if (!p)
215 return;
216 p->del (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400217 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800218 inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
219 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600220 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800221 for (unsigned int i = a; i < b + 1; i++)
222 del (i);
223 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400224 inline bool has (hb_codepoint_t g) const
225 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200226 const page_t *p = page_for (g);
227 if (!p)
228 return false;
229 return p->has (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400230 }
231 inline bool intersects (hb_codepoint_t first,
232 hb_codepoint_t last) const
233 {
Behdad Esfahbod10d43652017-10-15 16:56:05 -0400234 hb_codepoint_t c = first - 1;
235 return next (&c) && c <= last;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400236 }
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400237 inline void set (const hb_set_t *other)
238 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600239 if (unlikely (in_error)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200240 unsigned int count = other->pages.len;
241 if (!resize (count))
242 return;
243
244 memcpy (pages.array, other->pages.array, count * sizeof (pages.array[0]));
245 memcpy (page_map.array, other->page_map.array, count * sizeof (page_map.array[0]));
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400246 }
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200247
248 inline bool is_equal (const hb_set_t *other) const
249 {
250 unsigned int na = pages.len;
251 unsigned int nb = other->pages.len;
252
253 unsigned int a = 0, b = 0;
254 for (; a < na && b < nb; )
255 {
256 if (page_at (a).is_empty ()) { a++; continue; }
257 if (other->page_at (b).is_empty ()) { b++; continue; }
258 if (page_map[a].major != other->page_map[b].major ||
259 !page_at (a).is_equal (&other->page_at (b)))
260 return false;
261 a++;
262 b++;
263 }
264 for (; a < na; a++)
265 if (!page_at (a).is_empty ()) { return false; }
266 for (; b < nb; b++)
267 if (!other->page_at (b).is_empty ()) { return false; }
268
269 return true;
270 }
271
272 template <class Op>
273 inline void process (const hb_set_t *other)
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400274 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600275 if (unlikely (in_error)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200276
277 int na = pages.len;
278 int nb = other->pages.len;
279
280 unsigned int count = 0;
281 int a = 0, b = 0;
282 for (; a < na && b < nb; )
283 {
284 if (page_map[a].major == other->page_map[b].major)
285 {
286 count++;
287 a++;
288 b++;
289 }
290 else if (page_map[a].major < other->page_map[b].major)
291 {
292 if (Op::passthru_left)
293 count++;
294 a++;
295 }
296 else
297 {
298 if (Op::passthru_right)
299 count++;
300 b++;
301 }
302 }
303 if (Op::passthru_left)
304 count += na - a;
305 if (Op::passthru_right)
306 count += nb - b;
307
308 if (!resize (count))
309 return;
310
311 /* Process in-place backward. */
312 a = na - 1, b = nb - 1;
313 for (; a >= 0 && b >= 0; )
314 {
315 if (page_map[a].major == other->page_map[b].major)
316 {
317 Op::process (page_at (--count).v, page_at (a).v, other->page_at (b).v);
318 a--;
319 b--;
320 }
Behdad Esfahboda11249e2017-10-16 01:33:32 -0400321 else if (page_map[a].major > other->page_map[b].major)
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200322 {
323 if (Op::passthru_left)
324 page_at (--count).v = page_at (a).v;
325 a--;
326 }
327 else
328 {
329 if (Op::passthru_right)
330 page_at (--count).v = other->page_at (b).v;
331 b--;
332 }
333 }
334 while (a >= 0)
335 page_at (--count).v = page_at (--a).v;
336 while (b >= 0)
337 page_at (--count).v = other->page_at (--b).v;
338 assert (!count);
339 }
340
341 inline void union_ (const hb_set_t *other)
342 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400343 process<HbOpOr> (other);
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400344 }
345 inline void intersect (const hb_set_t *other)
346 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400347 process<HbOpAnd> (other);
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400348 }
349 inline void subtract (const hb_set_t *other)
350 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400351 process<HbOpMinus> (other);
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400352 }
Behdad Esfahbod62c3e112012-05-25 13:48:00 -0400353 inline void symmetric_difference (const hb_set_t *other)
354 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400355 process<HbOpXor> (other);
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600356 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800357 inline bool next (hb_codepoint_t *codepoint) const
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400358 {
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400359 if (unlikely (*codepoint == INVALID)) {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200360 *codepoint = get_min ();
361 return *codepoint != INVALID;
362 }
363
Behdad Esfahboddd33e4e2017-10-23 08:36:40 -0400364 page_map_t map = {get_major (*codepoint), 0};
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200365 unsigned int i;
366 page_map.bfind (&map, &i);
367 if (i < page_map.len)
368 {
369 if (pages[page_map[i].index].next (codepoint))
370 {
Behdad Esfahbodd0f0ff82017-10-23 08:34:30 -0400371 *codepoint += page_map[i].major * page_t::PAGE_BITS;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200372 return true;
373 }
374 i++;
375 }
376 for (; i < page_map.len; i++)
377 {
378 hb_codepoint_t m = pages[page_map[i].index].get_min ();
379 if (m != INVALID)
380 {
Behdad Esfahbodd0f0ff82017-10-23 08:34:30 -0400381 *codepoint = page_map[i].major * page_t::PAGE_BITS + m;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400382 return true;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400383 }
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400384 }
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400385 *codepoint = INVALID;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400386 return false;
387 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800388 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
389 {
390 hb_codepoint_t i;
391
392 i = *last;
393 if (!next (&i))
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400394 {
395 *last = *first = INVALID;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800396 return false;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400397 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800398
399 *last = *first = i;
400 while (next (&i) && i == *last + 1)
401 (*last)++;
402
403 return true;
404 }
405
406 inline unsigned int get_population (void) const
407 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200408 unsigned int pop = 0;
409 unsigned int count = pages.len;
410 for (unsigned int i = 0; i < count; i++)
411 pop += pages[i].get_population ();
412 return pop;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800413 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400414 inline hb_codepoint_t get_min (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400415 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200416 unsigned int count = pages.len;
417 for (unsigned int i = 0; i < count; i++)
418 if (!page_at (i).is_empty ())
Behdad Esfahbodd0f0ff82017-10-23 08:34:30 -0400419 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min ();
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400420 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400421 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400422 inline hb_codepoint_t get_max (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400423 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200424 unsigned int count = pages.len;
425 for (int i = count - 1; i >= 0; i++)
426 if (!page_at (i).is_empty ())
Behdad Esfahbodd0f0ff82017-10-23 08:34:30 -0400427 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_max ();
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400428 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400429 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400430
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400431 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400432
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200433 page_t *page_for_insert (hb_codepoint_t g)
434 {
Behdad Esfahboddd33e4e2017-10-23 08:36:40 -0400435 page_map_t map = {get_major (g), pages.len};
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200436 unsigned int i;
437 if (!page_map.bfind (&map, &i))
438 {
439 if (!resize (pages.len + 1))
440 return nullptr;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400441
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200442 pages[map.index].init ();
443 memmove (&page_map[i + 1], &page_map[i], (page_map.len - 1 - i) * sizeof (page_map[0]));
444 page_map[i] = map;
445 }
446 return &pages[page_map[i].index];
447 }
448 page_t *page_for (hb_codepoint_t g)
449 {
Behdad Esfahboddd33e4e2017-10-23 08:36:40 -0400450 page_map_t key = {get_major (g)};
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200451 const page_map_t *found = page_map.bsearch (&key);
452 if (found)
453 return &pages[found->index];
454 return nullptr;
455 }
456 const page_t *page_for (hb_codepoint_t g) const
457 {
Behdad Esfahboddd33e4e2017-10-23 08:36:40 -0400458 page_map_t key = {get_major (g)};
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200459 const page_map_t *found = page_map.bsearch (&key);
460 if (found)
461 return &pages[found->index];
462 return nullptr;
463 }
464 page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
465 const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
Behdad Esfahboddd33e4e2017-10-23 08:36:40 -0400466 unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400467};
468
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400469
470#endif /* HB_SET_PRIVATE_HH */