blob: 689766c130c7b428a3c4e3a7137d8026deecd43b [file] [log] [blame]
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -04001/*
Behdad Esfahboddeed4a42017-10-15 16:53:09 +02002 * Copyright © 2012,2017 Google, Inc.
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -04003 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_PRIVATE_HH
28#define HB_SET_PRIVATE_HH
29
30#include "hb-private.hh"
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -040031#include "hb-object-private.hh"
32
33
Behdad Esfahbod0edd0fd2013-04-17 17:26:56 -040034/*
Behdad Esfahbod0d5798a2013-04-17 18:19:21 -040035 * hb_set_t
36 */
Behdad Esfahbodb40f2c02013-04-16 23:21:38 -040037
Behdad Esfahbod1bc1cb32012-06-16 15:21:55 -040038struct hb_set_t
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -040039{
Behdad Esfahboddeed4a42017-10-15 16:53:09 +020040 struct page_map_t
41 {
42 inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; }
43
44 uint32_t major;
45 uint32_t index;
46 };
47
48 struct page_t
49 {
50 inline void init (void) {
51 memset (&v, 0, sizeof (v));
52 }
53
54 inline unsigned int len (void) const
55 { return ARRAY_LENGTH_CONST (v); }
56
57 inline bool is_empty (void) const
58 {
59 for (unsigned int i = 0; i < len (); i++)
60 if (v[i])
61 return false;
62 return true;
63 }
64
65 inline void add (hb_codepoint_t g) { elt (g) |= mask (g); }
66 inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
67 inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); }
68
69 inline bool is_equal (const page_t *other) const
70 {
71 return 0 == memcmp (&v, &other->v, sizeof (v));
72 }
73
74 inline unsigned int get_population (void) const
75 {
76 unsigned int pop = 0;
77 for (unsigned int i = 0; i < len (); i++)
78 pop += _hb_popcount (v[i]);
79 return pop;
80 }
81
82 inline bool next (hb_codepoint_t *codepoint) const
83 {
84 unsigned int m = (*codepoint + 1) & MASK;
85 if (!m)
86 {
87 *codepoint = INVALID;
88 return false;
89 }
90 unsigned int i = m / ELT_BITS;
91 unsigned int j = m & ELT_MASK;
92
93 for (; j < ELT_BITS; j++)
94 if (v[i] & (elt_t (1) << j))
95 goto found;
96 for (i++; i < len (); i++)
97 if (v[i])
98 for (unsigned int j = 0; j < ELT_BITS; j++)
99 if (v[i] & (elt_t (1) << j))
100 goto found;
101
102 *codepoint = INVALID;
103 return false;
104
105 found:
106 *codepoint = i * ELT_BITS + j;
107 return true;
108 }
109 inline hb_codepoint_t get_min (void) const
110 {
111 for (unsigned int i = 0; i < len (); i++)
112 if (v[i])
113 {
114 elt_t e = v[i];
115 for (unsigned int j = 0; j < ELT_BITS; j++)
116 if (e & (elt_t (1) << j))
117 return i * ELT_BITS + j;
118 }
119 return INVALID;
120 }
121 inline hb_codepoint_t get_max (void) const
122 {
123 for (int i = len () - 1; i >= 0; i--)
124 if (v[i])
125 {
126 elt_t e = v[i];
127 for (int j = ELT_BITS - 1; j >= 0; j--)
128 if (e & (elt_t (1) << j))
129 return i * ELT_BITS + j;
130 }
131 return 0;
132 }
133
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200134 static const unsigned int PAGE_BITS = 512; /* Use to tune. */
135 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
136
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400137 typedef uint64_t elt_t;
138
Behdad Esfahbodbb991792017-10-15 18:20:25 -0400139#if 0 && HAVE_VECTOR_SIZE
140 /* The vectorized version does not work with clang as non-const
Behdad Esfahbodab8f3272017-10-15 18:21:35 -0400141 * elt() errs "non-const reference cannot bind to vector element". */
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200142 typedef elt_t vector_t __attribute__((vector_size (PAGE_BITS / 8)));
143#else
Behdad Esfahbod7737e872017-10-15 16:21:03 -0400144 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200145#endif
146
147 vector_t v;
148
149 static const unsigned int ELT_BITS = sizeof (elt_t) * 8;
150 static const unsigned int ELT_MASK = ELT_BITS - 1;
Behdad Esfahbodba8b5692017-10-17 11:16:36 -0700151 static const unsigned int BITS = sizeof (vector_t) * 8;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200152 static const unsigned int MASK = BITS - 1;
153 static_assert (PAGE_BITS == BITS, "");
154
155 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
156 elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
157 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); }
158 };
159
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400160 hb_object_header_t header;
161 ASSERT_POD ();
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600162 bool in_error;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200163 hb_prealloced_array_t<page_map_t, 8> page_map;
164 hb_prealloced_array_t<page_t, 8> pages;
165
166 inline bool resize (unsigned int count)
167 {
168 if (unlikely (in_error)) return false;
169 if (!pages.resize (count) || !page_map.resize (count))
170 {
171 pages.resize (page_map.len);
172 in_error = true;
173 return false;
174 }
175 return true;
176 }
Behdad Esfahbod6220e5f2012-06-06 03:30:09 -0400177
Behdad Esfahbod1827dc22012-04-24 16:56:37 -0400178 inline void init (void) {
Behdad Esfahbodcd7ea4f2014-08-14 12:57:02 -0400179 hb_object_init (this);
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200180 page_map.init ();
181 pages.init ();
Behdad Esfahbod1827dc22012-04-24 16:56:37 -0400182 }
Behdad Esfahboda5e39fe2012-04-25 00:14:46 -0400183 inline void fini (void) {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200184 page_map.finish ();
185 pages.finish ();
Behdad Esfahboda5e39fe2012-04-25 00:14:46 -0400186 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400187 inline void clear (void) {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600188 if (unlikely (hb_object_is_inert (this)))
189 return;
190 in_error = false;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200191 page_map.resize (0);
192 pages.resize (0);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400193 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800194 inline bool is_empty (void) const {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200195 unsigned int count = pages.len;
196 for (unsigned int i = 0; i < count; i++)
197 if (!pages[i].is_empty ())
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400198 return false;
199 return true;
200 }
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200201
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400202 inline void add (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400203 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600204 if (unlikely (in_error)) return;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400205 if (unlikely (g == INVALID)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200206 page_t *page = page_for_insert (g);
207 if (!page)
208 return;
209 page->add (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400210 }
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400211 inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
212 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600213 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800214 /* TODO Speedup */
Behdad Esfahbod67bb9e82012-06-09 02:02:46 -0400215 for (unsigned int i = a; i < b + 1; i++)
216 add (i);
217 }
Behdad Esfahbod5caece62012-04-23 23:03:12 -0400218 inline void del (hb_codepoint_t g)
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400219 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600220 if (unlikely (in_error)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200221 page_t *p = page_for (g);
222 if (!p)
223 return;
224 p->del (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400225 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800226 inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
227 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600228 if (unlikely (in_error)) return;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800229 for (unsigned int i = a; i < b + 1; i++)
230 del (i);
231 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400232 inline bool has (hb_codepoint_t g) const
233 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200234 const page_t *p = page_for (g);
235 if (!p)
236 return false;
237 return p->has (g);
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400238 }
239 inline bool intersects (hb_codepoint_t first,
240 hb_codepoint_t last) const
241 {
Behdad Esfahbod10d43652017-10-15 16:56:05 -0400242 hb_codepoint_t c = first - 1;
243 return next (&c) && c <= last;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400244 }
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400245 inline void set (const hb_set_t *other)
246 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600247 if (unlikely (in_error)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200248 unsigned int count = other->pages.len;
249 if (!resize (count))
250 return;
251
252 memcpy (pages.array, other->pages.array, count * sizeof (pages.array[0]));
253 memcpy (page_map.array, other->page_map.array, count * sizeof (page_map.array[0]));
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400254 }
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200255
256 inline bool is_equal (const hb_set_t *other) const
257 {
258 unsigned int na = pages.len;
259 unsigned int nb = other->pages.len;
260
261 unsigned int a = 0, b = 0;
262 for (; a < na && b < nb; )
263 {
264 if (page_at (a).is_empty ()) { a++; continue; }
265 if (other->page_at (b).is_empty ()) { b++; continue; }
266 if (page_map[a].major != other->page_map[b].major ||
267 !page_at (a).is_equal (&other->page_at (b)))
268 return false;
269 a++;
270 b++;
271 }
272 for (; a < na; a++)
273 if (!page_at (a).is_empty ()) { return false; }
274 for (; b < nb; b++)
275 if (!other->page_at (b).is_empty ()) { return false; }
276
277 return true;
278 }
279
280 template <class Op>
281 inline void process (const hb_set_t *other)
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400282 {
Behdad Esfahbod7b1b7202013-01-02 23:02:59 -0600283 if (unlikely (in_error)) return;
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200284
285 int na = pages.len;
286 int nb = other->pages.len;
287
288 unsigned int count = 0;
289 int a = 0, b = 0;
290 for (; a < na && b < nb; )
291 {
292 if (page_map[a].major == other->page_map[b].major)
293 {
294 count++;
295 a++;
296 b++;
297 }
298 else if (page_map[a].major < other->page_map[b].major)
299 {
300 if (Op::passthru_left)
301 count++;
302 a++;
303 }
304 else
305 {
306 if (Op::passthru_right)
307 count++;
308 b++;
309 }
310 }
311 if (Op::passthru_left)
312 count += na - a;
313 if (Op::passthru_right)
314 count += nb - b;
315
316 if (!resize (count))
317 return;
318
319 /* Process in-place backward. */
320 a = na - 1, b = nb - 1;
321 for (; a >= 0 && b >= 0; )
322 {
323 if (page_map[a].major == other->page_map[b].major)
324 {
325 Op::process (page_at (--count).v, page_at (a).v, other->page_at (b).v);
326 a--;
327 b--;
328 }
Behdad Esfahboda11249e2017-10-16 01:33:32 -0400329 else if (page_map[a].major > other->page_map[b].major)
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200330 {
331 if (Op::passthru_left)
332 page_at (--count).v = page_at (a).v;
333 a--;
334 }
335 else
336 {
337 if (Op::passthru_right)
338 page_at (--count).v = other->page_at (b).v;
339 b--;
340 }
341 }
342 while (a >= 0)
343 page_at (--count).v = page_at (--a).v;
344 while (b >= 0)
345 page_at (--count).v = other->page_at (--b).v;
346 assert (!count);
347 }
348
349 inline void union_ (const hb_set_t *other)
350 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400351 process<HbOpOr> (other);
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400352 }
353 inline void intersect (const hb_set_t *other)
354 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400355 process<HbOpAnd> (other);
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400356 }
357 inline void subtract (const hb_set_t *other)
358 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400359 process<HbOpMinus> (other);
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400360 }
Behdad Esfahbod62c3e112012-05-25 13:48:00 -0400361 inline void symmetric_difference (const hb_set_t *other)
362 {
Behdad Esfahbodf8a0ec52017-10-15 16:10:35 -0400363 process<HbOpXor> (other);
Behdad Esfahbod8165f272013-01-02 22:50:36 -0600364 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800365 inline bool next (hb_codepoint_t *codepoint) const
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400366 {
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400367 if (unlikely (*codepoint == INVALID)) {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200368 *codepoint = get_min ();
369 return *codepoint != INVALID;
370 }
371
372 page_map_t map = {major (*codepoint), 0};
373 unsigned int i;
374 page_map.bfind (&map, &i);
375 if (i < page_map.len)
376 {
377 if (pages[page_map[i].index].next (codepoint))
378 {
379 *codepoint += page_map[i].major * PAGE_SIZE;
380 return true;
381 }
382 i++;
383 }
384 for (; i < page_map.len; i++)
385 {
386 hb_codepoint_t m = pages[page_map[i].index].get_min ();
387 if (m != INVALID)
388 {
389 *codepoint = page_map[i].major * PAGE_SIZE + m;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400390 return true;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400391 }
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400392 }
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400393 *codepoint = INVALID;
Behdad Esfahbod29ce4462012-05-25 14:17:54 -0400394 return false;
395 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800396 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
397 {
398 hb_codepoint_t i;
399
400 i = *last;
401 if (!next (&i))
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400402 {
403 *last = *first = INVALID;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800404 return false;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400405 }
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800406
407 *last = *first = i;
408 while (next (&i) && i == *last + 1)
409 (*last)++;
410
411 return true;
412 }
413
414 inline unsigned int get_population (void) const
415 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200416 unsigned int pop = 0;
417 unsigned int count = pages.len;
418 for (unsigned int i = 0; i < count; i++)
419 pop += pages[i].get_population ();
420 return pop;
Behdad Esfahbodaec89de2012-11-15 16:15:42 -0800421 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400422 inline hb_codepoint_t get_min (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400423 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200424 unsigned int count = pages.len;
425 for (unsigned int i = 0; i < count; i++)
426 if (!page_at (i).is_empty ())
427 return page_map[i].major * PAGE_SIZE + page_at (i).get_min ();
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400428 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400429 }
Behdad Esfahbodf039e792012-05-17 20:55:12 -0400430 inline hb_codepoint_t get_max (void) const
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400431 {
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200432 unsigned int count = pages.len;
433 for (int i = count - 1; i >= 0; i++)
434 if (!page_at (i).is_empty ())
435 return page_map[i].major * PAGE_SIZE + page_at (i).get_max ();
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400436 return INVALID;
Behdad Esfahbod6c6ccaf2012-04-24 14:21:15 -0400437 }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400438
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200439 static const unsigned int PAGE_SIZE = sizeof (page_t) * 8;
Behdad Esfahbod20cbc1f2013-09-06 15:29:22 -0400440 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400441
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200442 page_t *page_for_insert (hb_codepoint_t g)
443 {
444 page_map_t map = {major (g), pages.len};
445 unsigned int i;
446 if (!page_map.bfind (&map, &i))
447 {
448 if (!resize (pages.len + 1))
449 return nullptr;
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400450
Behdad Esfahboddeed4a42017-10-15 16:53:09 +0200451 pages[map.index].init ();
452 memmove (&page_map[i + 1], &page_map[i], (page_map.len - 1 - i) * sizeof (page_map[0]));
453 page_map[i] = map;
454 }
455 return &pages[page_map[i].index];
456 }
457 page_t *page_for (hb_codepoint_t g)
458 {
459 page_map_t key = {major (g)};
460 const page_map_t *found = page_map.bsearch (&key);
461 if (found)
462 return &pages[found->index];
463 return nullptr;
464 }
465 const page_t *page_for (hb_codepoint_t g) const
466 {
467 page_map_t key = {major (g)};
468 const page_map_t *found = page_map.bsearch (&key);
469 if (found)
470 return &pages[found->index];
471 return nullptr;
472 }
473 page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
474 const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
475 unsigned int major (hb_codepoint_t g) const { return g / PAGE_SIZE; }
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400476};
477
Behdad Esfahbod0b08adb2012-04-23 22:41:09 -0400478
479#endif /* HB_SET_PRIVATE_HH */