blob: 495920def6685514916284e42d5bc4d197bc942c [file] [log] [blame]
Elliott Hughes5b808042021-10-01 10:56:10 -07001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
11 New API code Copyright (c) 2016-2019 University of Cambridge
12
13-----------------------------------------------------------------------------
14Redistribution and use in source and binary forms, with or without
15modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38POSSIBILITY OF SUCH DAMAGE.
39-----------------------------------------------------------------------------
40*/
41
42#ifdef HAVE_CONFIG_H
43#include "config.h"
44#endif
45
46#include "pcre2_internal.h"
47
48#ifdef SUPPORT_JIT
49
50/* All-in-one: Since we use the JIT compiler only from here,
51we just include it. This way we don't need to touch the build
52system files. */
53
54#define SLJIT_CONFIG_AUTO 1
55#define SLJIT_CONFIG_STATIC 1
56#define SLJIT_VERBOSE 0
57
58#ifdef PCRE2_DEBUG
59#define SLJIT_DEBUG 1
60#else
61#define SLJIT_DEBUG 0
62#endif
63
64#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
67static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68{
69pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70return allocator->malloc(size, allocator->memory_data);
71}
72
73static void pcre2_jit_free(void *ptr, void *allocator_data)
74{
75pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76allocator->free(ptr, allocator->memory_data);
77}
78
79#include "sljit/sljitLir.c"
80
81#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82#error Unsupported architecture
83#endif
84
85/* Defines for debugging purposes. */
86
87/* 1 - Use unoptimized capturing brackets.
88 2 - Enable capture_last_ptr (includes option 1). */
89/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91/* 1 - Always have a control head. */
92/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94/* Allocate memory for the regex stack on the real machine stack.
95Fast, but limited size. */
96#define MACHINE_STACK_SIZE 32768
97
98/* Growth rate for stack allocated by the OS. Should be the multiply
99of page size. */
100#define STACK_GROWTH_RATE 8192
101
102/* Enable to check that the allocation could destroy temporaries. */
103#if defined SLJIT_DEBUG && SLJIT_DEBUG
104#define DESTROY_REGISTERS 1
105#endif
106
107/*
108Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110The code generator follows the recursive nature of the PERL compatible regular
111expressions. The basic blocks of regular expressions are condition checkers
112whose execute different commands depending on the result of the condition check.
113The relationship between the operators can be horizontal (concatenation) and
114vertical (sub-expression) (See struct backtrack_common for more details).
115
116 'ab' - 'a' and 'b' regexps are concatenated
117 'a+' - 'a' is the sub-expression of the '+' operator
118
119The condition checkers are boolean (true/false) checkers. Machine code is generated
120for the checker itself and for the actions depending on the result of the checker.
121The 'true' case is called as the matching path (expected path), and the other is called as
122the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123branches on the matching path.
124
125 Greedy star operator (*) :
126 Matching path: match happens.
127 Backtrack path: match failed.
128 Non-greedy star operator (*?) :
129 Matching path: no need to perform a match.
130 Backtrack path: match is required.
131
132The following example shows how the code generated for a capturing bracket
133with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134we have the following regular expression:
135
136 A(B|C)D
137
138The generated code will be the following:
139
140 A matching path
141 '(' matching path (pushing arguments to the stack)
142 B matching path
143 ')' matching path (pushing arguments to the stack)
144 D matching path
145 return with successful match
146
147 D backtrack path
148 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149 B backtrack path
150 C expected path
151 jump to D matching path
152 C backtrack path
153 A backtrack path
154
155 Notice, that the order of backtrack code paths are the opposite of the fast
156 code paths. In this way the topmost value on the stack is always belong
157 to the current backtrack code path. The backtrack path must check
158 whether there is a next alternative. If so, it needs to jump back to
159 the matching path eventually. Otherwise it needs to clear out its own stack
160 frame and continue the execution on the backtrack code paths.
161*/
162
163/*
164Saved stack frames:
165
166Atomic blocks and asserts require reloading the values of private data
167when the backtrack mechanism performed. Because of OP_RECURSE, the data
168are not necessarly known in compile time, thus we need a dynamic restore
169mechanism.
170
171The stack frames are stored in a chain list, and have the following format:
172([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174Thus we can restore the private data to a particular point in the stack.
175*/
176
177typedef struct jit_arguments {
178 /* Pointers first. */
179 struct sljit_stack *stack;
180 PCRE2_SPTR str;
181 PCRE2_SPTR begin;
182 PCRE2_SPTR end;
183 pcre2_match_data *match_data;
184 PCRE2_SPTR startchar_ptr;
185 PCRE2_UCHAR *mark_ptr;
186 int (*callout)(pcre2_callout_block *, void *);
187 void *callout_data;
188 /* Everything else after. */
189 sljit_uw offset_limit;
190 sljit_u32 limit_match;
191 sljit_u32 oveccount;
192 sljit_u32 options;
193} jit_arguments;
194
195#define JIT_NUMBER_OF_COMPILE_MODES 3
196
197typedef struct executable_functions {
198 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201 sljit_u32 top_bracket;
202 sljit_u32 limit_match;
203} executable_functions;
204
205typedef struct jump_list {
206 struct sljit_jump *jump;
207 struct jump_list *next;
208} jump_list;
209
210typedef struct stub_list {
211 struct sljit_jump *start;
212 struct sljit_label *quit;
213 struct stub_list *next;
214} stub_list;
215
216enum frame_types {
217 no_frame = -1,
218 no_stack = -2
219};
220
221enum control_types {
222 type_mark = 0,
223 type_then_trap = 1
224};
225
226enum early_fail_types {
227 type_skip = 0,
228 type_fail = 1,
229 type_fail_range = 2
230};
231
232typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234/* The following structure is the key data type for the recursive
235code generator. It is allocated by compile_matchingpath, and contains
236the arguments for compile_backtrackingpath. Must be the first member
237of its descendants. */
238typedef struct backtrack_common {
239 /* Concatenation stack. */
240 struct backtrack_common *prev;
241 jump_list *nextbacktracks;
242 /* Internal stack (for component operators). */
243 struct backtrack_common *top;
244 jump_list *topbacktracks;
245 /* Opcode pointer. */
246 PCRE2_SPTR cc;
247} backtrack_common;
248
249typedef struct assert_backtrack {
250 backtrack_common common;
251 jump_list *condfailed;
252 /* Less than 0 if a frame is not needed. */
253 int framesize;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 /* For iterators. */
257 struct sljit_label *matchingpath;
258} assert_backtrack;
259
260typedef struct bracket_backtrack {
261 backtrack_common common;
262 /* Where to coninue if an alternative is successfully matched. */
263 struct sljit_label *alternative_matchingpath;
264 /* For rmin and rmax iterators. */
265 struct sljit_label *recursive_matchingpath;
266 /* For greedy ? operator. */
267 struct sljit_label *zero_matchingpath;
268 /* Contains the branches of a failed condition. */
269 union {
270 /* Both for OP_COND, OP_SCOND. */
271 jump_list *condfailed;
272 assert_backtrack *assert;
273 /* For OP_ONCE. Less than 0 if not needed. */
274 int framesize;
275 /* For brackets with >3 alternatives. */
276 struct sljit_put_label *matching_put_label;
277 } u;
278 /* Points to our private memory word on the stack. */
279 int private_data_ptr;
280} bracket_backtrack;
281
282typedef struct bracketpos_backtrack {
283 backtrack_common common;
284 /* Points to our private memory word on the stack. */
285 int private_data_ptr;
286 /* Reverting stack is needed. */
287 int framesize;
288 /* Allocated stack size. */
289 int stacksize;
290} bracketpos_backtrack;
291
292typedef struct braminzero_backtrack {
293 backtrack_common common;
294 struct sljit_label *matchingpath;
295} braminzero_backtrack;
296
297typedef struct char_iterator_backtrack {
298 backtrack_common common;
299 /* Next iteration. */
300 struct sljit_label *matchingpath;
301 union {
302 jump_list *backtracks;
303 struct {
304 unsigned int othercasebit;
305 PCRE2_UCHAR chr;
306 BOOL enabled;
307 } charpos;
308 } u;
309} char_iterator_backtrack;
310
311typedef struct ref_iterator_backtrack {
312 backtrack_common common;
313 /* Next iteration. */
314 struct sljit_label *matchingpath;
315} ref_iterator_backtrack;
316
317typedef struct recurse_entry {
318 struct recurse_entry *next;
319 /* Contains the function entry label. */
320 struct sljit_label *entry_label;
321 /* Contains the function entry label. */
322 struct sljit_label *backtrack_label;
323 /* Collects the entry calls until the function is not created. */
324 jump_list *entry_calls;
325 /* Collects the backtrack calls until the function is not created. */
326 jump_list *backtrack_calls;
327 /* Points to the starting opcode. */
328 sljit_sw start;
329} recurse_entry;
330
331typedef struct recurse_backtrack {
332 backtrack_common common;
333 /* Return to the matching path. */
334 struct sljit_label *matchingpath;
335 /* Recursive pattern. */
336 recurse_entry *entry;
337 /* Pattern is inlined. */
338 BOOL inlined_pattern;
339} recurse_backtrack;
340
341#define OP_THEN_TRAP OP_TABLE_LENGTH
342
343typedef struct then_trap_backtrack {
344 backtrack_common common;
345 /* If then_trap is not NULL, this structure contains the real
346 then_trap for the backtracking path. */
347 struct then_trap_backtrack *then_trap;
348 /* Points to the starting opcode. */
349 sljit_sw start;
350 /* Exit point for the then opcodes of this alternative. */
351 jump_list *quit;
352 /* Frame size of the current alternative. */
353 int framesize;
354} then_trap_backtrack;
355
356#define MAX_N_CHARS 12
357#define MAX_DIFF_CHARS 5
358
359typedef struct fast_forward_char_data {
360 /* Number of characters in the chars array, 255 for any character. */
361 sljit_u8 count;
362 /* Number of last UTF-8 characters in the chars array. */
363 sljit_u8 last_count;
364 /* Available characters in the current position. */
365 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366} fast_forward_char_data;
367
368#define MAX_CLASS_RANGE_SIZE 4
369#define MAX_CLASS_CHARS_SIZE 3
370
371typedef struct compiler_common {
372 /* The sljit ceneric compiler. */
373 struct sljit_compiler *compiler;
374 /* Compiled regular expression. */
375 pcre2_real_code *re;
376 /* First byte code. */
377 PCRE2_SPTR start;
378 /* Maps private data offset to each opcode. */
379 sljit_s32 *private_data_ptrs;
380 /* Chain list of read-only data ptrs. */
381 void *read_only_data_head;
382 /* Tells whether the capturing bracket is optimized. */
383 sljit_u8 *optimized_cbracket;
384 /* Tells whether the starting offset is a target of then. */
385 sljit_u8 *then_offsets;
386 /* Current position where a THEN must jump. */
387 then_trap_backtrack *then_trap;
388 /* Starting offset of private data for capturing brackets. */
389 sljit_s32 cbra_ptr;
390 /* Output vector starting point. Must be divisible by 2. */
391 sljit_s32 ovector_start;
392 /* Points to the starting character of the current match. */
393 sljit_s32 start_ptr;
394 /* Last known position of the requested byte. */
395 sljit_s32 req_char_ptr;
396 /* Head of the last recursion. */
397 sljit_s32 recursive_head_ptr;
398 /* First inspected character for partial matching.
399 (Needed for avoiding zero length partial matches.) */
400 sljit_s32 start_used_ptr;
401 /* Starting pointer for partial soft matches. */
402 sljit_s32 hit_start;
403 /* Pointer of the match end position. */
404 sljit_s32 match_end_ptr;
405 /* Points to the marked string. */
406 sljit_s32 mark_ptr;
407 /* Recursive control verb management chain. */
408 sljit_s32 control_head_ptr;
409 /* Points to the last matched capture block index. */
410 sljit_s32 capture_last_ptr;
411 /* Fast forward skipping byte code pointer. */
412 PCRE2_SPTR fast_forward_bc_ptr;
413 /* Locals used by fast fail optimization. */
414 sljit_s32 early_fail_start_ptr;
415 sljit_s32 early_fail_end_ptr;
416
417 /* Flipped and lower case tables. */
418 const sljit_u8 *fcc;
419 sljit_sw lcc;
420 /* Mode can be PCRE2_JIT_COMPLETE and others. */
421 int mode;
422 /* TRUE, when empty match is accepted for partial matching. */
423 BOOL allow_empty_partial;
424 /* TRUE, when minlength is greater than 0. */
425 BOOL might_be_empty;
426 /* \K is found in the pattern. */
427 BOOL has_set_som;
428 /* (*SKIP:arg) is found in the pattern. */
429 BOOL has_skip_arg;
430 /* (*THEN) is found in the pattern. */
431 BOOL has_then;
432 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
433 BOOL has_skip_in_assert_back;
434 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
435 BOOL local_quit_available;
436 /* Currently in a positive assertion. */
437 BOOL in_positive_assertion;
438 /* Newline control. */
439 int nltype;
440 sljit_u32 nlmax;
441 sljit_u32 nlmin;
442 int newline;
443 int bsr_nltype;
444 sljit_u32 bsr_nlmax;
445 sljit_u32 bsr_nlmin;
446 /* Dollar endonly. */
447 int endonly;
448 /* Tables. */
449 sljit_sw ctypes;
450 /* Named capturing brackets. */
451 PCRE2_SPTR name_table;
452 sljit_sw name_count;
453 sljit_sw name_entry_size;
454
455 /* Labels and jump lists. */
456 struct sljit_label *partialmatchlabel;
457 struct sljit_label *quit_label;
458 struct sljit_label *abort_label;
459 struct sljit_label *accept_label;
460 struct sljit_label *ff_newline_shortcut;
461 stub_list *stubs;
462 recurse_entry *entries;
463 recurse_entry *currententry;
464 jump_list *partialmatch;
465 jump_list *quit;
466 jump_list *positive_assertion_quit;
467 jump_list *abort;
468 jump_list *failed_match;
469 jump_list *accept;
470 jump_list *calllimit;
471 jump_list *stackalloc;
472 jump_list *revertframes;
473 jump_list *wordboundary;
474 jump_list *anynewline;
475 jump_list *hspace;
476 jump_list *vspace;
477 jump_list *casefulcmp;
478 jump_list *caselesscmp;
479 jump_list *reset_match;
480 BOOL unset_backref;
481 BOOL alt_circumflex;
482#ifdef SUPPORT_UNICODE
483 BOOL utf;
484 BOOL invalid_utf;
485 BOOL ucp;
486 /* Points to saving area for iref. */
487 sljit_s32 iref_ptr;
488 jump_list *getucd;
489 jump_list *getucdtype;
490#if PCRE2_CODE_UNIT_WIDTH == 8
491 jump_list *utfreadchar;
492 jump_list *utfreadtype8;
493 jump_list *utfpeakcharback;
494#endif
495#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
496 jump_list *utfreadchar_invalid;
497 jump_list *utfreadnewline_invalid;
498 jump_list *utfmoveback_invalid;
499 jump_list *utfpeakcharback_invalid;
500#endif
501#endif /* SUPPORT_UNICODE */
502} compiler_common;
503
504/* For byte_sequence_compare. */
505
506typedef struct compare_context {
507 int length;
508 int sourcereg;
509#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
510 int ucharptr;
511 union {
512 sljit_s32 asint;
513 sljit_u16 asushort;
514#if PCRE2_CODE_UNIT_WIDTH == 8
515 sljit_u8 asbyte;
516 sljit_u8 asuchars[4];
517#elif PCRE2_CODE_UNIT_WIDTH == 16
518 sljit_u16 asuchars[2];
519#elif PCRE2_CODE_UNIT_WIDTH == 32
520 sljit_u32 asuchars[1];
521#endif
522 } c;
523 union {
524 sljit_s32 asint;
525 sljit_u16 asushort;
526#if PCRE2_CODE_UNIT_WIDTH == 8
527 sljit_u8 asbyte;
528 sljit_u8 asuchars[4];
529#elif PCRE2_CODE_UNIT_WIDTH == 16
530 sljit_u16 asuchars[2];
531#elif PCRE2_CODE_UNIT_WIDTH == 32
532 sljit_u32 asuchars[1];
533#endif
534 } oc;
535#endif
536} compare_context;
537
538/* Undefine sljit macros. */
539#undef CMP
540
541/* Used for accessing the elements of the stack. */
542#define STACK(i) ((i) * (int)sizeof(sljit_sw))
543
544#ifdef SLJIT_PREF_SHIFT_REG
545#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
546/* Nothing. */
547#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
548#define SHIFT_REG_IS_R3
549#else
550#error "Unsupported shift register"
551#endif
552#endif
553
554#define TMP1 SLJIT_R0
555#ifdef SHIFT_REG_IS_R3
556#define TMP2 SLJIT_R3
557#define TMP3 SLJIT_R2
558#else
559#define TMP2 SLJIT_R2
560#define TMP3 SLJIT_R3
561#endif
562#define STR_PTR SLJIT_R1
563#define STR_END SLJIT_S0
564#define STACK_TOP SLJIT_S1
565#define STACK_LIMIT SLJIT_S2
566#define COUNT_MATCH SLJIT_S3
567#define ARGUMENTS SLJIT_S4
568#define RETURN_ADDR SLJIT_R4
569
570#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
571#define HAS_VIRTUAL_REGISTERS 1
572#else
573#define HAS_VIRTUAL_REGISTERS 0
574#endif
575
576/* Local space layout. */
577/* These two locals can be used by the current opcode. */
578#define LOCALS0 (0 * sizeof(sljit_sw))
579#define LOCALS1 (1 * sizeof(sljit_sw))
580/* Two local variables for possessive quantifiers (char1 cannot use them). */
581#define POSSESSIVE0 (2 * sizeof(sljit_sw))
582#define POSSESSIVE1 (3 * sizeof(sljit_sw))
583/* Max limit of recursions. */
584#define LIMIT_MATCH (4 * sizeof(sljit_sw))
585/* The output vector is stored on the stack, and contains pointers
586to characters. The vector data is divided into two groups: the first
587group contains the start / end character pointers, and the second is
588the start pointers when the end of the capturing group has not yet reached. */
589#define OVECTOR_START (common->ovector_start)
590#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
591#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
592#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
593
594#if PCRE2_CODE_UNIT_WIDTH == 8
595#define MOV_UCHAR SLJIT_MOV_U8
596#define IN_UCHARS(x) (x)
597#elif PCRE2_CODE_UNIT_WIDTH == 16
598#define MOV_UCHAR SLJIT_MOV_U16
599#define UCHAR_SHIFT (1)
600#define IN_UCHARS(x) ((x) * 2)
601#elif PCRE2_CODE_UNIT_WIDTH == 32
602#define MOV_UCHAR SLJIT_MOV_U32
603#define UCHAR_SHIFT (2)
604#define IN_UCHARS(x) ((x) * 4)
605#else
606#error Unsupported compiling mode
607#endif
608
609/* Shortcuts. */
610#define DEFINE_COMPILER \
611 struct sljit_compiler *compiler = common->compiler
612#define OP1(op, dst, dstw, src, srcw) \
613 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
614#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
615 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
616#define OP_SRC(op, src, srcw) \
617 sljit_emit_op_src(compiler, (op), (src), (srcw))
618#define LABEL() \
619 sljit_emit_label(compiler)
620#define JUMP(type) \
621 sljit_emit_jump(compiler, (type))
622#define JUMPTO(type, label) \
623 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
624#define JUMPHERE(jump) \
625 sljit_set_label((jump), sljit_emit_label(compiler))
626#define SET_LABEL(jump, label) \
627 sljit_set_label((jump), (label))
628#define CMP(type, src1, src1w, src2, src2w) \
629 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
630#define CMPTO(type, src1, src1w, src2, src2w, label) \
631 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
632#define OP_FLAGS(op, dst, dstw, type) \
633 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
634#define CMOV(type, dst_reg, src, srcw) \
635 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
636#define GET_LOCAL_BASE(dst, dstw, offset) \
637 sljit_get_local_base(compiler, (dst), (dstw), (offset))
638
639#define READ_CHAR_MAX 0x7fffffff
640
641#define INVALID_UTF_CHAR -1
642#define UNASSIGNED_UTF_CHAR 888
643
644#if defined SUPPORT_UNICODE
645#if PCRE2_CODE_UNIT_WIDTH == 8
646
647#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
648 { \
649 if (ptr[0] <= 0x7f) \
650 c = *ptr++; \
651 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
652 { \
653 c = ptr[1] - 0x80; \
654 \
655 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
656 { \
657 c |= (ptr[0] - 0xc0) << 6; \
658 ptr += 2; \
659 } \
660 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
661 { \
662 c = c << 6 | (ptr[2] - 0x80); \
663 \
664 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
665 { \
666 c |= (ptr[0] - 0xe0) << 12; \
667 ptr += 3; \
668 \
669 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
670 { \
671 invalid_action; \
672 } \
673 } \
674 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
675 { \
676 c = c << 6 | (ptr[3] - 0x80); \
677 \
678 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
679 { \
680 c |= (ptr[0] - 0xf0) << 18; \
681 ptr += 4; \
682 \
683 if (c >= 0x110000 || c < 0x10000) \
684 { \
685 invalid_action; \
686 } \
687 } \
688 else \
689 { \
690 invalid_action; \
691 } \
692 } \
693 else \
694 { \
695 invalid_action; \
696 } \
697 } \
698 else \
699 { \
700 invalid_action; \
701 } \
702 } \
703 else \
704 { \
705 invalid_action; \
706 } \
707 }
708
709#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
710 { \
711 c = ptr[-1]; \
712 if (c <= 0x7f) \
713 ptr--; \
714 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
715 { \
716 c -= 0x80; \
717 \
718 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
719 { \
720 c |= (ptr[-2] - 0xc0) << 6; \
721 ptr -= 2; \
722 } \
723 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
724 { \
725 c = c << 6 | (ptr[-2] - 0x80); \
726 \
727 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
728 { \
729 c |= (ptr[-3] - 0xe0) << 12; \
730 ptr -= 3; \
731 \
732 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
733 { \
734 invalid_action; \
735 } \
736 } \
737 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
738 { \
739 c = c << 6 | (ptr[-3] - 0x80); \
740 \
741 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
742 { \
743 c |= (ptr[-4] - 0xf0) << 18; \
744 ptr -= 4; \
745 \
746 if (c >= 0x110000 || c < 0x10000) \
747 { \
748 invalid_action; \
749 } \
750 } \
751 else \
752 { \
753 invalid_action; \
754 } \
755 } \
756 else \
757 { \
758 invalid_action; \
759 } \
760 } \
761 else \
762 { \
763 invalid_action; \
764 } \
765 } \
766 else \
767 { \
768 invalid_action; \
769 } \
770 }
771
772#elif PCRE2_CODE_UNIT_WIDTH == 16
773
774#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
775 { \
776 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
777 c = *ptr++; \
778 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
779 { \
780 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
781 ptr += 2; \
782 } \
783 else \
784 { \
785 invalid_action; \
786 } \
787 }
788
789#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
790 { \
791 c = ptr[-1]; \
792 if (c < 0xd800 || c >= 0xe000) \
793 ptr--; \
794 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
795 { \
796 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
797 ptr -= 2; \
798 } \
799 else \
800 { \
801 invalid_action; \
802 } \
803 }
804
805
806#elif PCRE2_CODE_UNIT_WIDTH == 32
807
808#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
809 { \
810 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
811 c = *ptr++; \
812 else \
813 { \
814 invalid_action; \
815 } \
816 }
817
818#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
819 { \
820 c = ptr[-1]; \
821 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
822 ptr--; \
823 else \
824 { \
825 invalid_action; \
826 } \
827 }
828
829#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
830#endif /* SUPPORT_UNICODE */
831
832static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
833{
834SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
835do cc += GET(cc, 1); while (*cc == OP_ALT);
836SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
837cc += 1 + LINK_SIZE;
838return cc;
839}
840
841static int no_alternatives(PCRE2_SPTR cc)
842{
843int count = 0;
844SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
845do
846 {
847 cc += GET(cc, 1);
848 count++;
849 }
850while (*cc == OP_ALT);
851SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
852return count;
853}
854
855/* Functions whose might need modification for all new supported opcodes:
856 next_opcode
857 check_opcode_types
858 set_private_data_ptrs
859 get_framesize
860 init_frame
861 get_recurse_data_length
862 copy_recurse_data
863 compile_matchingpath
864 compile_backtrackingpath
865*/
866
867static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
868{
869SLJIT_UNUSED_ARG(common);
870switch(*cc)
871 {
872 case OP_SOD:
873 case OP_SOM:
874 case OP_SET_SOM:
875 case OP_NOT_WORD_BOUNDARY:
876 case OP_WORD_BOUNDARY:
877 case OP_NOT_DIGIT:
878 case OP_DIGIT:
879 case OP_NOT_WHITESPACE:
880 case OP_WHITESPACE:
881 case OP_NOT_WORDCHAR:
882 case OP_WORDCHAR:
883 case OP_ANY:
884 case OP_ALLANY:
885 case OP_NOTPROP:
886 case OP_PROP:
887 case OP_ANYNL:
888 case OP_NOT_HSPACE:
889 case OP_HSPACE:
890 case OP_NOT_VSPACE:
891 case OP_VSPACE:
892 case OP_EXTUNI:
893 case OP_EODN:
894 case OP_EOD:
895 case OP_CIRC:
896 case OP_CIRCM:
897 case OP_DOLL:
898 case OP_DOLLM:
899 case OP_CRSTAR:
900 case OP_CRMINSTAR:
901 case OP_CRPLUS:
902 case OP_CRMINPLUS:
903 case OP_CRQUERY:
904 case OP_CRMINQUERY:
905 case OP_CRRANGE:
906 case OP_CRMINRANGE:
907 case OP_CRPOSSTAR:
908 case OP_CRPOSPLUS:
909 case OP_CRPOSQUERY:
910 case OP_CRPOSRANGE:
911 case OP_CLASS:
912 case OP_NCLASS:
913 case OP_REF:
914 case OP_REFI:
915 case OP_DNREF:
916 case OP_DNREFI:
917 case OP_RECURSE:
918 case OP_CALLOUT:
919 case OP_ALT:
920 case OP_KET:
921 case OP_KETRMAX:
922 case OP_KETRMIN:
923 case OP_KETRPOS:
924 case OP_REVERSE:
925 case OP_ASSERT:
926 case OP_ASSERT_NOT:
927 case OP_ASSERTBACK:
928 case OP_ASSERTBACK_NOT:
929 case OP_ASSERT_NA:
930 case OP_ASSERTBACK_NA:
931 case OP_ONCE:
932 case OP_SCRIPT_RUN:
933 case OP_BRA:
934 case OP_BRAPOS:
935 case OP_CBRA:
936 case OP_CBRAPOS:
937 case OP_COND:
938 case OP_SBRA:
939 case OP_SBRAPOS:
940 case OP_SCBRA:
941 case OP_SCBRAPOS:
942 case OP_SCOND:
943 case OP_CREF:
944 case OP_DNCREF:
945 case OP_RREF:
946 case OP_DNRREF:
947 case OP_FALSE:
948 case OP_TRUE:
949 case OP_BRAZERO:
950 case OP_BRAMINZERO:
951 case OP_BRAPOSZERO:
952 case OP_PRUNE:
953 case OP_SKIP:
954 case OP_THEN:
955 case OP_COMMIT:
956 case OP_FAIL:
957 case OP_ACCEPT:
958 case OP_ASSERT_ACCEPT:
959 case OP_CLOSE:
960 case OP_SKIPZERO:
961 return cc + PRIV(OP_lengths)[*cc];
962
963 case OP_CHAR:
964 case OP_CHARI:
965 case OP_NOT:
966 case OP_NOTI:
967 case OP_STAR:
968 case OP_MINSTAR:
969 case OP_PLUS:
970 case OP_MINPLUS:
971 case OP_QUERY:
972 case OP_MINQUERY:
973 case OP_UPTO:
974 case OP_MINUPTO:
975 case OP_EXACT:
976 case OP_POSSTAR:
977 case OP_POSPLUS:
978 case OP_POSQUERY:
979 case OP_POSUPTO:
980 case OP_STARI:
981 case OP_MINSTARI:
982 case OP_PLUSI:
983 case OP_MINPLUSI:
984 case OP_QUERYI:
985 case OP_MINQUERYI:
986 case OP_UPTOI:
987 case OP_MINUPTOI:
988 case OP_EXACTI:
989 case OP_POSSTARI:
990 case OP_POSPLUSI:
991 case OP_POSQUERYI:
992 case OP_POSUPTOI:
993 case OP_NOTSTAR:
994 case OP_NOTMINSTAR:
995 case OP_NOTPLUS:
996 case OP_NOTMINPLUS:
997 case OP_NOTQUERY:
998 case OP_NOTMINQUERY:
999 case OP_NOTUPTO:
1000 case OP_NOTMINUPTO:
1001 case OP_NOTEXACT:
1002 case OP_NOTPOSSTAR:
1003 case OP_NOTPOSPLUS:
1004 case OP_NOTPOSQUERY:
1005 case OP_NOTPOSUPTO:
1006 case OP_NOTSTARI:
1007 case OP_NOTMINSTARI:
1008 case OP_NOTPLUSI:
1009 case OP_NOTMINPLUSI:
1010 case OP_NOTQUERYI:
1011 case OP_NOTMINQUERYI:
1012 case OP_NOTUPTOI:
1013 case OP_NOTMINUPTOI:
1014 case OP_NOTEXACTI:
1015 case OP_NOTPOSSTARI:
1016 case OP_NOTPOSPLUSI:
1017 case OP_NOTPOSQUERYI:
1018 case OP_NOTPOSUPTOI:
1019 cc += PRIV(OP_lengths)[*cc];
1020#ifdef SUPPORT_UNICODE
1021 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1022#endif
1023 return cc;
1024
1025 /* Special cases. */
1026 case OP_TYPESTAR:
1027 case OP_TYPEMINSTAR:
1028 case OP_TYPEPLUS:
1029 case OP_TYPEMINPLUS:
1030 case OP_TYPEQUERY:
1031 case OP_TYPEMINQUERY:
1032 case OP_TYPEUPTO:
1033 case OP_TYPEMINUPTO:
1034 case OP_TYPEEXACT:
1035 case OP_TYPEPOSSTAR:
1036 case OP_TYPEPOSPLUS:
1037 case OP_TYPEPOSQUERY:
1038 case OP_TYPEPOSUPTO:
1039 return cc + PRIV(OP_lengths)[*cc] - 1;
1040
1041 case OP_ANYBYTE:
1042#ifdef SUPPORT_UNICODE
1043 if (common->utf) return NULL;
1044#endif
1045 return cc + 1;
1046
1047 case OP_CALLOUT_STR:
1048 return cc + GET(cc, 1 + 2*LINK_SIZE);
1049
1050#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1051 case OP_XCLASS:
1052 return cc + GET(cc, 1);
1053#endif
1054
1055 case OP_MARK:
1056 case OP_COMMIT_ARG:
1057 case OP_PRUNE_ARG:
1058 case OP_SKIP_ARG:
1059 case OP_THEN_ARG:
1060 return cc + 1 + 2 + cc[1];
1061
1062 default:
1063 SLJIT_UNREACHABLE();
1064 return NULL;
1065 }
1066}
1067
1068static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1069{
1070int count;
1071PCRE2_SPTR slot;
1072PCRE2_SPTR assert_back_end = cc - 1;
1073PCRE2_SPTR assert_na_end = cc - 1;
1074
1075/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1076while (cc < ccend)
1077 {
1078 switch(*cc)
1079 {
1080 case OP_SET_SOM:
1081 common->has_set_som = TRUE;
1082 common->might_be_empty = TRUE;
1083 cc += 1;
1084 break;
1085
1086 case OP_REFI:
1087#ifdef SUPPORT_UNICODE
1088 if (common->iref_ptr == 0)
1089 {
1090 common->iref_ptr = common->ovector_start;
1091 common->ovector_start += 3 * sizeof(sljit_sw);
1092 }
1093#endif /* SUPPORT_UNICODE */
1094 /* Fall through. */
1095 case OP_REF:
1096 common->optimized_cbracket[GET2(cc, 1)] = 0;
1097 cc += 1 + IMM2_SIZE;
1098 break;
1099
1100 case OP_ASSERT_NA:
1101 case OP_ASSERTBACK_NA:
1102 slot = bracketend(cc);
1103 if (slot > assert_na_end)
1104 assert_na_end = slot;
1105 cc += 1 + LINK_SIZE;
1106 break;
1107
1108 case OP_CBRAPOS:
1109 case OP_SCBRAPOS:
1110 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1111 cc += 1 + LINK_SIZE + IMM2_SIZE;
1112 break;
1113
1114 case OP_COND:
1115 case OP_SCOND:
1116 /* Only AUTO_CALLOUT can insert this opcode. We do
1117 not intend to support this case. */
1118 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1119 return FALSE;
1120 cc += 1 + LINK_SIZE;
1121 break;
1122
1123 case OP_CREF:
1124 common->optimized_cbracket[GET2(cc, 1)] = 0;
1125 cc += 1 + IMM2_SIZE;
1126 break;
1127
1128 case OP_DNREF:
1129 case OP_DNREFI:
1130 case OP_DNCREF:
1131 count = GET2(cc, 1 + IMM2_SIZE);
1132 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1133 while (count-- > 0)
1134 {
1135 common->optimized_cbracket[GET2(slot, 0)] = 0;
1136 slot += common->name_entry_size;
1137 }
1138 cc += 1 + 2 * IMM2_SIZE;
1139 break;
1140
1141 case OP_RECURSE:
1142 /* Set its value only once. */
1143 if (common->recursive_head_ptr == 0)
1144 {
1145 common->recursive_head_ptr = common->ovector_start;
1146 common->ovector_start += sizeof(sljit_sw);
1147 }
1148 cc += 1 + LINK_SIZE;
1149 break;
1150
1151 case OP_CALLOUT:
1152 case OP_CALLOUT_STR:
1153 if (common->capture_last_ptr == 0)
1154 {
1155 common->capture_last_ptr = common->ovector_start;
1156 common->ovector_start += sizeof(sljit_sw);
1157 }
1158 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1159 break;
1160
1161 case OP_ASSERTBACK:
1162 slot = bracketend(cc);
1163 if (slot > assert_back_end)
1164 assert_back_end = slot;
1165 cc += 1 + LINK_SIZE;
1166 break;
1167
1168 case OP_THEN_ARG:
1169 common->has_then = TRUE;
1170 common->control_head_ptr = 1;
1171 /* Fall through. */
1172
1173 case OP_COMMIT_ARG:
1174 case OP_PRUNE_ARG:
1175 if (cc < assert_na_end)
1176 return FALSE;
1177 /* Fall through */
1178 case OP_MARK:
1179 if (common->mark_ptr == 0)
1180 {
1181 common->mark_ptr = common->ovector_start;
1182 common->ovector_start += sizeof(sljit_sw);
1183 }
1184 cc += 1 + 2 + cc[1];
1185 break;
1186
1187 case OP_THEN:
1188 common->has_then = TRUE;
1189 common->control_head_ptr = 1;
1190 cc += 1;
1191 break;
1192
1193 case OP_SKIP:
1194 if (cc < assert_back_end)
1195 common->has_skip_in_assert_back = TRUE;
1196 if (cc < assert_na_end)
1197 return FALSE;
1198 cc += 1;
1199 break;
1200
1201 case OP_SKIP_ARG:
1202 common->control_head_ptr = 1;
1203 common->has_skip_arg = TRUE;
1204 if (cc < assert_back_end)
1205 common->has_skip_in_assert_back = TRUE;
1206 if (cc < assert_na_end)
1207 return FALSE;
1208 cc += 1 + 2 + cc[1];
1209 break;
1210
1211 case OP_PRUNE:
1212 case OP_COMMIT:
1213 case OP_ASSERT_ACCEPT:
1214 if (cc < assert_na_end)
1215 return FALSE;
1216 cc++;
1217 break;
1218
1219 default:
1220 cc = next_opcode(common, cc);
1221 if (cc == NULL)
1222 return FALSE;
1223 break;
1224 }
1225 }
1226return TRUE;
1227}
1228
1229#define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1230
1231/*
1232start:
1233 0 - skip / early fail allowed
1234 1 - only early fail with range allowed
1235 >1 - (start - 1) early fail is processed
1236
1237return: current number of iterators enhanced with fast fail
1238*/
1239static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1240 sljit_s32 depth, int start, BOOL fast_forward_allowed)
1241{
1242PCRE2_SPTR begin = cc;
1243PCRE2_SPTR next_alt;
1244PCRE2_SPTR end;
1245PCRE2_SPTR accelerated_start;
1246BOOL prev_fast_forward_allowed;
1247int result = 0;
1248int count;
1249
1250SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1251SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1252SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1253
1254do
1255 {
1256 count = start;
1257 next_alt = cc + GET(cc, 1);
1258 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1259
1260 while (TRUE)
1261 {
1262 accelerated_start = NULL;
1263
1264 switch(*cc)
1265 {
1266 case OP_SOD:
1267 case OP_SOM:
1268 case OP_SET_SOM:
1269 case OP_NOT_WORD_BOUNDARY:
1270 case OP_WORD_BOUNDARY:
1271 case OP_EODN:
1272 case OP_EOD:
1273 case OP_CIRC:
1274 case OP_CIRCM:
1275 case OP_DOLL:
1276 case OP_DOLLM:
1277 /* Zero width assertions. */
1278 cc++;
1279 continue;
1280
1281 case OP_NOT_DIGIT:
1282 case OP_DIGIT:
1283 case OP_NOT_WHITESPACE:
1284 case OP_WHITESPACE:
1285 case OP_NOT_WORDCHAR:
1286 case OP_WORDCHAR:
1287 case OP_ANY:
1288 case OP_ALLANY:
1289 case OP_ANYBYTE:
1290 case OP_NOT_HSPACE:
1291 case OP_HSPACE:
1292 case OP_NOT_VSPACE:
1293 case OP_VSPACE:
1294 fast_forward_allowed = FALSE;
1295 cc++;
1296 continue;
1297
1298 case OP_ANYNL:
1299 case OP_EXTUNI:
1300 fast_forward_allowed = FALSE;
1301 if (count == 0)
1302 count = 1;
1303 cc++;
1304 continue;
1305
1306 case OP_NOTPROP:
1307 case OP_PROP:
1308 fast_forward_allowed = FALSE;
1309 cc += 1 + 2;
1310 continue;
1311
1312 case OP_CHAR:
1313 case OP_CHARI:
1314 case OP_NOT:
1315 case OP_NOTI:
1316 fast_forward_allowed = FALSE;
1317 cc += 2;
1318#ifdef SUPPORT_UNICODE
1319 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1320#endif
1321 continue;
1322
1323 case OP_TYPESTAR:
1324 case OP_TYPEMINSTAR:
1325 case OP_TYPEPLUS:
1326 case OP_TYPEMINPLUS:
1327 case OP_TYPEPOSSTAR:
1328 case OP_TYPEPOSPLUS:
1329 /* The type or prop opcode is skipped in the next iteration. */
1330 cc += 1;
1331
1332 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1333 {
1334 accelerated_start = cc - 1;
1335 break;
1336 }
1337
1338 if (count == 0)
1339 count = 1;
1340 fast_forward_allowed = FALSE;
1341 continue;
1342
1343 case OP_TYPEUPTO:
1344 case OP_TYPEMINUPTO:
1345 case OP_TYPEEXACT:
1346 case OP_TYPEPOSUPTO:
1347 cc += IMM2_SIZE;
1348 /* Fall through */
1349
1350 case OP_TYPEQUERY:
1351 case OP_TYPEMINQUERY:
1352 case OP_TYPEPOSQUERY:
1353 /* The type or prop opcode is skipped in the next iteration. */
1354 fast_forward_allowed = FALSE;
1355 if (count == 0)
1356 count = 1;
1357 cc += 1;
1358 continue;
1359
1360 case OP_STAR:
1361 case OP_MINSTAR:
1362 case OP_PLUS:
1363 case OP_MINPLUS:
1364 case OP_POSSTAR:
1365 case OP_POSPLUS:
1366
1367 case OP_STARI:
1368 case OP_MINSTARI:
1369 case OP_PLUSI:
1370 case OP_MINPLUSI:
1371 case OP_POSSTARI:
1372 case OP_POSPLUSI:
1373
1374 case OP_NOTSTAR:
1375 case OP_NOTMINSTAR:
1376 case OP_NOTPLUS:
1377 case OP_NOTMINPLUS:
1378 case OP_NOTPOSSTAR:
1379 case OP_NOTPOSPLUS:
1380
1381 case OP_NOTSTARI:
1382 case OP_NOTMINSTARI:
1383 case OP_NOTPLUSI:
1384 case OP_NOTMINPLUSI:
1385 case OP_NOTPOSSTARI:
1386 case OP_NOTPOSPLUSI:
1387 accelerated_start = cc;
1388 cc += 2;
1389#ifdef SUPPORT_UNICODE
1390 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1391#endif
1392 break;
1393
1394 case OP_UPTO:
1395 case OP_MINUPTO:
1396 case OP_EXACT:
1397 case OP_POSUPTO:
1398 case OP_UPTOI:
1399 case OP_MINUPTOI:
1400 case OP_EXACTI:
1401 case OP_POSUPTOI:
1402 case OP_NOTUPTO:
1403 case OP_NOTMINUPTO:
1404 case OP_NOTEXACT:
1405 case OP_NOTPOSUPTO:
1406 case OP_NOTUPTOI:
1407 case OP_NOTMINUPTOI:
1408 case OP_NOTEXACTI:
1409 case OP_NOTPOSUPTOI:
1410 cc += IMM2_SIZE;
1411 /* Fall through */
1412
1413 case OP_QUERY:
1414 case OP_MINQUERY:
1415 case OP_POSQUERY:
1416 case OP_QUERYI:
1417 case OP_MINQUERYI:
1418 case OP_POSQUERYI:
1419 case OP_NOTQUERY:
1420 case OP_NOTMINQUERY:
1421 case OP_NOTPOSQUERY:
1422 case OP_NOTQUERYI:
1423 case OP_NOTMINQUERYI:
1424 case OP_NOTPOSQUERYI:
1425 fast_forward_allowed = FALSE;
1426 if (count == 0)
1427 count = 1;
1428 cc += 2;
1429#ifdef SUPPORT_UNICODE
1430 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1431#endif
1432 continue;
1433
1434 case OP_CLASS:
1435 case OP_NCLASS:
1436#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1437 case OP_XCLASS:
1438 accelerated_start = cc;
1439 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1440#else
1441 accelerated_start = cc;
1442 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1443#endif
1444
1445 switch (*cc)
1446 {
1447 case OP_CRSTAR:
1448 case OP_CRMINSTAR:
1449 case OP_CRPLUS:
1450 case OP_CRMINPLUS:
1451 case OP_CRPOSSTAR:
1452 case OP_CRPOSPLUS:
1453 cc++;
1454 break;
1455
1456 case OP_CRRANGE:
1457 case OP_CRMINRANGE:
1458 case OP_CRPOSRANGE:
1459 cc += 2 * IMM2_SIZE;
1460 /* Fall through */
1461 case OP_CRQUERY:
1462 case OP_CRMINQUERY:
1463 case OP_CRPOSQUERY:
1464 cc++;
1465 if (count == 0)
1466 count = 1;
1467 /* Fall through */
1468 default:
1469 accelerated_start = NULL;
1470 fast_forward_allowed = FALSE;
1471 continue;
1472 }
1473 break;
1474
1475 case OP_ONCE:
1476 case OP_BRA:
1477 case OP_CBRA:
1478 end = cc + GET(cc, 1);
1479
1480 prev_fast_forward_allowed = fast_forward_allowed;
1481 fast_forward_allowed = FALSE;
1482 if (depth >= 4)
1483 break;
1484
1485 end = bracketend(cc) - (1 + LINK_SIZE);
1486 if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1487 break;
1488
1489 count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1490
1491 if (PRIVATE_DATA(cc) != 0)
1492 common->private_data_ptrs[begin - common->start] = 1;
1493
1494 if (count < EARLY_FAIL_ENHANCE_MAX)
1495 {
1496 cc = end + (1 + LINK_SIZE);
1497 continue;
1498 }
1499 break;
1500
1501 case OP_KET:
1502 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1503 if (cc >= next_alt)
1504 break;
1505 cc += 1 + LINK_SIZE;
1506 continue;
1507 }
1508
1509 if (accelerated_start != NULL)
1510 {
1511 if (count == 0)
1512 {
1513 count++;
1514
1515 if (fast_forward_allowed && *next_alt == OP_KET)
1516 {
1517 common->fast_forward_bc_ptr = accelerated_start;
1518 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1519 *private_data_start += sizeof(sljit_sw);
1520 }
1521 else
1522 {
1523 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1524
1525 if (common->early_fail_start_ptr == 0)
1526 common->early_fail_start_ptr = *private_data_start;
1527
1528 *private_data_start += sizeof(sljit_sw);
1529 common->early_fail_end_ptr = *private_data_start;
1530
1531 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1532 return EARLY_FAIL_ENHANCE_MAX;
1533 }
1534 }
1535 else
1536 {
1537 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1538
1539 if (common->early_fail_start_ptr == 0)
1540 common->early_fail_start_ptr = *private_data_start;
1541
1542 *private_data_start += 2 * sizeof(sljit_sw);
1543 common->early_fail_end_ptr = *private_data_start;
1544
1545 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1546 return EARLY_FAIL_ENHANCE_MAX;
1547 }
1548
1549 /* Cannot be part of a repeat. */
1550 common->private_data_ptrs[begin - common->start] = 1;
1551 count++;
1552
1553 if (count < EARLY_FAIL_ENHANCE_MAX)
1554 continue;
1555 }
1556
1557 break;
1558 }
1559
1560 if (*cc != OP_ALT && *cc != OP_KET)
1561 result = EARLY_FAIL_ENHANCE_MAX;
1562 else if (result < count)
1563 result = count;
1564
1565 fast_forward_allowed = FALSE;
1566 cc = next_alt;
1567 }
1568while (*cc == OP_ALT);
1569
1570return result;
1571}
1572
1573static int get_class_iterator_size(PCRE2_SPTR cc)
1574{
1575sljit_u32 min;
1576sljit_u32 max;
1577switch(*cc)
1578 {
1579 case OP_CRSTAR:
1580 case OP_CRPLUS:
1581 return 2;
1582
1583 case OP_CRMINSTAR:
1584 case OP_CRMINPLUS:
1585 case OP_CRQUERY:
1586 case OP_CRMINQUERY:
1587 return 1;
1588
1589 case OP_CRRANGE:
1590 case OP_CRMINRANGE:
1591 min = GET2(cc, 1);
1592 max = GET2(cc, 1 + IMM2_SIZE);
1593 if (max == 0)
1594 return (*cc == OP_CRRANGE) ? 2 : 1;
1595 max -= min;
1596 if (max > 2)
1597 max = 2;
1598 return max;
1599
1600 default:
1601 return 0;
1602 }
1603}
1604
1605static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1606{
1607PCRE2_SPTR end = bracketend(begin);
1608PCRE2_SPTR next;
1609PCRE2_SPTR next_end;
1610PCRE2_SPTR max_end;
1611PCRE2_UCHAR type;
1612sljit_sw length = end - begin;
1613sljit_s32 min, max, i;
1614
1615/* Detect fixed iterations first. */
1616if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1617 return FALSE;
1618
1619/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1620 * Skip the check of the second part. */
1621if (PRIVATE_DATA(end - LINK_SIZE) == 0)
1622 return TRUE;
1623
1624next = end;
1625min = 1;
1626while (1)
1627 {
1628 if (*next != *begin)
1629 break;
1630 next_end = bracketend(next);
1631 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1632 break;
1633 next = next_end;
1634 min++;
1635 }
1636
1637if (min == 2)
1638 return FALSE;
1639
1640max = 0;
1641max_end = next;
1642if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1643 {
1644 type = *next;
1645 while (1)
1646 {
1647 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1648 break;
1649 next_end = bracketend(next + 2 + LINK_SIZE);
1650 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1651 break;
1652 next = next_end;
1653 max++;
1654 }
1655
1656 if (next[0] == type && next[1] == *begin && max >= 1)
1657 {
1658 next_end = bracketend(next + 1);
1659 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1660 {
1661 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1662 if (*next_end != OP_KET)
1663 break;
1664
1665 if (i == max)
1666 {
1667 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1668 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1669 /* +2 the original and the last. */
1670 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1671 if (min == 1)
1672 return TRUE;
1673 min--;
1674 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1675 }
1676 }
1677 }
1678 }
1679
1680if (min >= 3)
1681 {
1682 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1683 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1684 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1685 return TRUE;
1686 }
1687
1688return FALSE;
1689}
1690
1691#define CASE_ITERATOR_PRIVATE_DATA_1 \
1692 case OP_MINSTAR: \
1693 case OP_MINPLUS: \
1694 case OP_QUERY: \
1695 case OP_MINQUERY: \
1696 case OP_MINSTARI: \
1697 case OP_MINPLUSI: \
1698 case OP_QUERYI: \
1699 case OP_MINQUERYI: \
1700 case OP_NOTMINSTAR: \
1701 case OP_NOTMINPLUS: \
1702 case OP_NOTQUERY: \
1703 case OP_NOTMINQUERY: \
1704 case OP_NOTMINSTARI: \
1705 case OP_NOTMINPLUSI: \
1706 case OP_NOTQUERYI: \
1707 case OP_NOTMINQUERYI:
1708
1709#define CASE_ITERATOR_PRIVATE_DATA_2A \
1710 case OP_STAR: \
1711 case OP_PLUS: \
1712 case OP_STARI: \
1713 case OP_PLUSI: \
1714 case OP_NOTSTAR: \
1715 case OP_NOTPLUS: \
1716 case OP_NOTSTARI: \
1717 case OP_NOTPLUSI:
1718
1719#define CASE_ITERATOR_PRIVATE_DATA_2B \
1720 case OP_UPTO: \
1721 case OP_MINUPTO: \
1722 case OP_UPTOI: \
1723 case OP_MINUPTOI: \
1724 case OP_NOTUPTO: \
1725 case OP_NOTMINUPTO: \
1726 case OP_NOTUPTOI: \
1727 case OP_NOTMINUPTOI:
1728
1729#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1730 case OP_TYPEMINSTAR: \
1731 case OP_TYPEMINPLUS: \
1732 case OP_TYPEQUERY: \
1733 case OP_TYPEMINQUERY:
1734
1735#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1736 case OP_TYPESTAR: \
1737 case OP_TYPEPLUS:
1738
1739#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1740 case OP_TYPEUPTO: \
1741 case OP_TYPEMINUPTO:
1742
1743static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1744{
1745PCRE2_SPTR cc = common->start;
1746PCRE2_SPTR alternative;
1747PCRE2_SPTR end = NULL;
1748int private_data_ptr = *private_data_start;
1749int space, size, bracketlen;
1750BOOL repeat_check = TRUE;
1751
1752while (cc < ccend)
1753 {
1754 space = 0;
1755 size = 0;
1756 bracketlen = 0;
1757 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1758 break;
1759
1760 /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1761 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1762 {
1763 if (detect_repeat(common, cc))
1764 {
1765 /* These brackets are converted to repeats, so no global
1766 based single character repeat is allowed. */
1767 if (cc >= end)
1768 end = bracketend(cc);
1769 }
1770 }
1771 repeat_check = TRUE;
1772
1773 switch(*cc)
1774 {
1775 case OP_KET:
1776 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1777 {
1778 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1779 private_data_ptr += sizeof(sljit_sw);
1780 cc += common->private_data_ptrs[cc + 1 - common->start];
1781 }
1782 cc += 1 + LINK_SIZE;
1783 break;
1784
1785 case OP_ASSERT:
1786 case OP_ASSERT_NOT:
1787 case OP_ASSERTBACK:
1788 case OP_ASSERTBACK_NOT:
1789 case OP_ASSERT_NA:
1790 case OP_ASSERTBACK_NA:
1791 case OP_ONCE:
1792 case OP_SCRIPT_RUN:
1793 case OP_BRAPOS:
1794 case OP_SBRA:
1795 case OP_SBRAPOS:
1796 case OP_SCOND:
1797 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1798 private_data_ptr += sizeof(sljit_sw);
1799 bracketlen = 1 + LINK_SIZE;
1800 break;
1801
1802 case OP_CBRAPOS:
1803 case OP_SCBRAPOS:
1804 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1805 private_data_ptr += sizeof(sljit_sw);
1806 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1807 break;
1808
1809 case OP_COND:
1810 /* Might be a hidden SCOND. */
1811 common->private_data_ptrs[cc - common->start] = 0;
1812 alternative = cc + GET(cc, 1);
1813 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1814 {
1815 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1816 private_data_ptr += sizeof(sljit_sw);
1817 }
1818 bracketlen = 1 + LINK_SIZE;
1819 break;
1820
1821 case OP_BRA:
1822 bracketlen = 1 + LINK_SIZE;
1823 break;
1824
1825 case OP_CBRA:
1826 case OP_SCBRA:
1827 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1828 break;
1829
1830 case OP_BRAZERO:
1831 case OP_BRAMINZERO:
1832 case OP_BRAPOSZERO:
1833 size = 1;
1834 repeat_check = FALSE;
1835 break;
1836
1837 CASE_ITERATOR_PRIVATE_DATA_1
1838 size = -2;
1839 space = 1;
1840 break;
1841
1842 CASE_ITERATOR_PRIVATE_DATA_2A
1843 size = -2;
1844 space = 2;
1845 break;
1846
1847 CASE_ITERATOR_PRIVATE_DATA_2B
1848 size = -(2 + IMM2_SIZE);
1849 space = 2;
1850 break;
1851
1852 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1853 size = 1;
1854 space = 1;
1855 break;
1856
1857 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1858 size = 1;
1859 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1860 space = 2;
1861 break;
1862
1863 case OP_TYPEUPTO:
1864 size = 1 + IMM2_SIZE;
1865 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1866 space = 2;
1867 break;
1868
1869 case OP_TYPEMINUPTO:
1870 size = 1 + IMM2_SIZE;
1871 space = 2;
1872 break;
1873
1874 case OP_CLASS:
1875 case OP_NCLASS:
1876 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1877 space = get_class_iterator_size(cc + size);
1878 break;
1879
1880#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1881 case OP_XCLASS:
1882 size = GET(cc, 1);
1883 space = get_class_iterator_size(cc + size);
1884 break;
1885#endif
1886
1887 default:
1888 cc = next_opcode(common, cc);
1889 SLJIT_ASSERT(cc != NULL);
1890 break;
1891 }
1892
1893 /* Character iterators, which are not inside a repeated bracket,
1894 gets a private slot instead of allocating it on the stack. */
1895 if (space > 0 && cc >= end)
1896 {
1897 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1898 private_data_ptr += sizeof(sljit_sw) * space;
1899 }
1900
1901 if (size != 0)
1902 {
1903 if (size < 0)
1904 {
1905 cc += -size;
1906#ifdef SUPPORT_UNICODE
1907 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1908#endif
1909 }
1910 else
1911 cc += size;
1912 }
1913
1914 if (bracketlen > 0)
1915 {
1916 if (cc >= end)
1917 {
1918 end = bracketend(cc);
1919 if (end[-1 - LINK_SIZE] == OP_KET)
1920 end = NULL;
1921 }
1922 cc += bracketlen;
1923 }
1924 }
1925*private_data_start = private_data_ptr;
1926}
1927
1928/* Returns with a frame_types (always < 0) if no need for frame. */
1929static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1930{
1931int length = 0;
1932int possessive = 0;
1933BOOL stack_restore = FALSE;
1934BOOL setsom_found = recursive;
1935BOOL setmark_found = recursive;
1936/* The last capture is a local variable even for recursions. */
1937BOOL capture_last_found = FALSE;
1938
1939#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1940SLJIT_ASSERT(common->control_head_ptr != 0);
1941*needs_control_head = TRUE;
1942#else
1943*needs_control_head = FALSE;
1944#endif
1945
1946if (ccend == NULL)
1947 {
1948 ccend = bracketend(cc) - (1 + LINK_SIZE);
1949 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1950 {
1951 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1952 /* This is correct regardless of common->capture_last_ptr. */
1953 capture_last_found = TRUE;
1954 }
1955 cc = next_opcode(common, cc);
1956 }
1957
1958SLJIT_ASSERT(cc != NULL);
1959while (cc < ccend)
1960 switch(*cc)
1961 {
1962 case OP_SET_SOM:
1963 SLJIT_ASSERT(common->has_set_som);
1964 stack_restore = TRUE;
1965 if (!setsom_found)
1966 {
1967 length += 2;
1968 setsom_found = TRUE;
1969 }
1970 cc += 1;
1971 break;
1972
1973 case OP_MARK:
1974 case OP_COMMIT_ARG:
1975 case OP_PRUNE_ARG:
1976 case OP_THEN_ARG:
1977 SLJIT_ASSERT(common->mark_ptr != 0);
1978 stack_restore = TRUE;
1979 if (!setmark_found)
1980 {
1981 length += 2;
1982 setmark_found = TRUE;
1983 }
1984 if (common->control_head_ptr != 0)
1985 *needs_control_head = TRUE;
1986 cc += 1 + 2 + cc[1];
1987 break;
1988
1989 case OP_RECURSE:
1990 stack_restore = TRUE;
1991 if (common->has_set_som && !setsom_found)
1992 {
1993 length += 2;
1994 setsom_found = TRUE;
1995 }
1996 if (common->mark_ptr != 0 && !setmark_found)
1997 {
1998 length += 2;
1999 setmark_found = TRUE;
2000 }
2001 if (common->capture_last_ptr != 0 && !capture_last_found)
2002 {
2003 length += 2;
2004 capture_last_found = TRUE;
2005 }
2006 cc += 1 + LINK_SIZE;
2007 break;
2008
2009 case OP_CBRA:
2010 case OP_CBRAPOS:
2011 case OP_SCBRA:
2012 case OP_SCBRAPOS:
2013 stack_restore = TRUE;
2014 if (common->capture_last_ptr != 0 && !capture_last_found)
2015 {
2016 length += 2;
2017 capture_last_found = TRUE;
2018 }
2019 length += 3;
2020 cc += 1 + LINK_SIZE + IMM2_SIZE;
2021 break;
2022
2023 case OP_THEN:
2024 stack_restore = TRUE;
2025 if (common->control_head_ptr != 0)
2026 *needs_control_head = TRUE;
2027 cc ++;
2028 break;
2029
2030 default:
2031 stack_restore = TRUE;
2032 /* Fall through. */
2033
2034 case OP_NOT_WORD_BOUNDARY:
2035 case OP_WORD_BOUNDARY:
2036 case OP_NOT_DIGIT:
2037 case OP_DIGIT:
2038 case OP_NOT_WHITESPACE:
2039 case OP_WHITESPACE:
2040 case OP_NOT_WORDCHAR:
2041 case OP_WORDCHAR:
2042 case OP_ANY:
2043 case OP_ALLANY:
2044 case OP_ANYBYTE:
2045 case OP_NOTPROP:
2046 case OP_PROP:
2047 case OP_ANYNL:
2048 case OP_NOT_HSPACE:
2049 case OP_HSPACE:
2050 case OP_NOT_VSPACE:
2051 case OP_VSPACE:
2052 case OP_EXTUNI:
2053 case OP_EODN:
2054 case OP_EOD:
2055 case OP_CIRC:
2056 case OP_CIRCM:
2057 case OP_DOLL:
2058 case OP_DOLLM:
2059 case OP_CHAR:
2060 case OP_CHARI:
2061 case OP_NOT:
2062 case OP_NOTI:
2063
2064 case OP_EXACT:
2065 case OP_POSSTAR:
2066 case OP_POSPLUS:
2067 case OP_POSQUERY:
2068 case OP_POSUPTO:
2069
2070 case OP_EXACTI:
2071 case OP_POSSTARI:
2072 case OP_POSPLUSI:
2073 case OP_POSQUERYI:
2074 case OP_POSUPTOI:
2075
2076 case OP_NOTEXACT:
2077 case OP_NOTPOSSTAR:
2078 case OP_NOTPOSPLUS:
2079 case OP_NOTPOSQUERY:
2080 case OP_NOTPOSUPTO:
2081
2082 case OP_NOTEXACTI:
2083 case OP_NOTPOSSTARI:
2084 case OP_NOTPOSPLUSI:
2085 case OP_NOTPOSQUERYI:
2086 case OP_NOTPOSUPTOI:
2087
2088 case OP_TYPEEXACT:
2089 case OP_TYPEPOSSTAR:
2090 case OP_TYPEPOSPLUS:
2091 case OP_TYPEPOSQUERY:
2092 case OP_TYPEPOSUPTO:
2093
2094 case OP_CLASS:
2095 case OP_NCLASS:
2096 case OP_XCLASS:
2097
2098 case OP_CALLOUT:
2099 case OP_CALLOUT_STR:
2100
2101 cc = next_opcode(common, cc);
2102 SLJIT_ASSERT(cc != NULL);
2103 break;
2104 }
2105
2106/* Possessive quantifiers can use a special case. */
2107if (SLJIT_UNLIKELY(possessive == length))
2108 return stack_restore ? no_frame : no_stack;
2109
2110if (length > 0)
2111 return length + 1;
2112return stack_restore ? no_frame : no_stack;
2113}
2114
2115static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2116{
2117DEFINE_COMPILER;
2118BOOL setsom_found = FALSE;
2119BOOL setmark_found = FALSE;
2120/* The last capture is a local variable even for recursions. */
2121BOOL capture_last_found = FALSE;
2122int offset;
2123
2124/* >= 1 + shortest item size (2) */
2125SLJIT_UNUSED_ARG(stacktop);
2126SLJIT_ASSERT(stackpos >= stacktop + 2);
2127
2128stackpos = STACK(stackpos);
2129if (ccend == NULL)
2130 {
2131 ccend = bracketend(cc) - (1 + LINK_SIZE);
2132 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2133 cc = next_opcode(common, cc);
2134 }
2135
2136SLJIT_ASSERT(cc != NULL);
2137while (cc < ccend)
2138 switch(*cc)
2139 {
2140 case OP_SET_SOM:
2141 SLJIT_ASSERT(common->has_set_som);
2142 if (!setsom_found)
2143 {
2144 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2145 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2146 stackpos -= (int)sizeof(sljit_sw);
2147 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2148 stackpos -= (int)sizeof(sljit_sw);
2149 setsom_found = TRUE;
2150 }
2151 cc += 1;
2152 break;
2153
2154 case OP_MARK:
2155 case OP_COMMIT_ARG:
2156 case OP_PRUNE_ARG:
2157 case OP_THEN_ARG:
2158 SLJIT_ASSERT(common->mark_ptr != 0);
2159 if (!setmark_found)
2160 {
2161 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2162 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2163 stackpos -= (int)sizeof(sljit_sw);
2164 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2165 stackpos -= (int)sizeof(sljit_sw);
2166 setmark_found = TRUE;
2167 }
2168 cc += 1 + 2 + cc[1];
2169 break;
2170
2171 case OP_RECURSE:
2172 if (common->has_set_som && !setsom_found)
2173 {
2174 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2175 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2176 stackpos -= (int)sizeof(sljit_sw);
2177 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2178 stackpos -= (int)sizeof(sljit_sw);
2179 setsom_found = TRUE;
2180 }
2181 if (common->mark_ptr != 0 && !setmark_found)
2182 {
2183 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2184 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2185 stackpos -= (int)sizeof(sljit_sw);
2186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2187 stackpos -= (int)sizeof(sljit_sw);
2188 setmark_found = TRUE;
2189 }
2190 if (common->capture_last_ptr != 0 && !capture_last_found)
2191 {
2192 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2193 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2194 stackpos -= (int)sizeof(sljit_sw);
2195 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2196 stackpos -= (int)sizeof(sljit_sw);
2197 capture_last_found = TRUE;
2198 }
2199 cc += 1 + LINK_SIZE;
2200 break;
2201
2202 case OP_CBRA:
2203 case OP_CBRAPOS:
2204 case OP_SCBRA:
2205 case OP_SCBRAPOS:
2206 if (common->capture_last_ptr != 0 && !capture_last_found)
2207 {
2208 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2209 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2210 stackpos -= (int)sizeof(sljit_sw);
2211 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2212 stackpos -= (int)sizeof(sljit_sw);
2213 capture_last_found = TRUE;
2214 }
2215 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2216 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2217 stackpos -= (int)sizeof(sljit_sw);
2218 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2219 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2220 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2221 stackpos -= (int)sizeof(sljit_sw);
2222 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2223 stackpos -= (int)sizeof(sljit_sw);
2224
2225 cc += 1 + LINK_SIZE + IMM2_SIZE;
2226 break;
2227
2228 default:
2229 cc = next_opcode(common, cc);
2230 SLJIT_ASSERT(cc != NULL);
2231 break;
2232 }
2233
2234OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2235SLJIT_ASSERT(stackpos == STACK(stacktop));
2236}
2237
2238#define RECURSE_TMP_REG_COUNT 3
2239
2240typedef struct delayed_mem_copy_status {
2241 struct sljit_compiler *compiler;
2242 int store_bases[RECURSE_TMP_REG_COUNT];
2243 int store_offsets[RECURSE_TMP_REG_COUNT];
2244 int tmp_regs[RECURSE_TMP_REG_COUNT];
2245 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2246 int next_tmp_reg;
2247} delayed_mem_copy_status;
2248
2249static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2250{
2251int i;
2252
2253for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2254 {
2255 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2256 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2257
2258 status->store_bases[i] = -1;
2259 }
2260status->next_tmp_reg = 0;
2261status->compiler = common->compiler;
2262}
2263
2264static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2265 int store_base, sljit_sw store_offset)
2266{
2267struct sljit_compiler *compiler = status->compiler;
2268int next_tmp_reg = status->next_tmp_reg;
2269int tmp_reg = status->tmp_regs[next_tmp_reg];
2270
2271SLJIT_ASSERT(load_base > 0 && store_base > 0);
2272
2273if (status->store_bases[next_tmp_reg] == -1)
2274 {
2275 /* Preserve virtual registers. */
2276 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2277 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2278 }
2279else
2280 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2281
2282OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2283status->store_bases[next_tmp_reg] = store_base;
2284status->store_offsets[next_tmp_reg] = store_offset;
2285
2286status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2287}
2288
2289static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2290{
2291struct sljit_compiler *compiler = status->compiler;
2292int next_tmp_reg = status->next_tmp_reg;
2293int tmp_reg, saved_tmp_reg, i;
2294
2295for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2296 {
2297 if (status->store_bases[next_tmp_reg] != -1)
2298 {
2299 tmp_reg = status->tmp_regs[next_tmp_reg];
2300 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2301
2302 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2303
2304 /* Restore virtual registers. */
2305 if (sljit_get_register_index(saved_tmp_reg) < 0)
2306 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2307 }
2308
2309 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2310 }
2311}
2312
2313#undef RECURSE_TMP_REG_COUNT
2314
2315static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2316 BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
2317{
2318int length = 1;
2319int size;
2320PCRE2_SPTR alternative;
2321BOOL quit_found = FALSE;
2322BOOL accept_found = FALSE;
2323BOOL setsom_found = FALSE;
2324BOOL setmark_found = FALSE;
2325BOOL capture_last_found = FALSE;
2326BOOL control_head_found = FALSE;
2327
2328#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2329SLJIT_ASSERT(common->control_head_ptr != 0);
2330control_head_found = TRUE;
2331#endif
2332
2333/* Calculate the sum of the private machine words. */
2334while (cc < ccend)
2335 {
2336 size = 0;
2337 switch(*cc)
2338 {
2339 case OP_SET_SOM:
2340 SLJIT_ASSERT(common->has_set_som);
2341 setsom_found = TRUE;
2342 cc += 1;
2343 break;
2344
2345 case OP_RECURSE:
2346 if (common->has_set_som)
2347 setsom_found = TRUE;
2348 if (common->mark_ptr != 0)
2349 setmark_found = TRUE;
2350 if (common->capture_last_ptr != 0)
2351 capture_last_found = TRUE;
2352 cc += 1 + LINK_SIZE;
2353 break;
2354
2355 case OP_KET:
2356 if (PRIVATE_DATA(cc) != 0)
2357 {
2358 length++;
2359 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2360 cc += PRIVATE_DATA(cc + 1);
2361 }
2362 cc += 1 + LINK_SIZE;
2363 break;
2364
2365 case OP_ASSERT:
2366 case OP_ASSERT_NOT:
2367 case OP_ASSERTBACK:
2368 case OP_ASSERTBACK_NOT:
2369 case OP_ASSERT_NA:
2370 case OP_ASSERTBACK_NA:
2371 case OP_ONCE:
2372 case OP_SCRIPT_RUN:
2373 case OP_BRAPOS:
2374 case OP_SBRA:
2375 case OP_SBRAPOS:
2376 case OP_SCOND:
2377 length++;
2378 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
2379 cc += 1 + LINK_SIZE;
2380 break;
2381
2382 case OP_CBRA:
2383 case OP_SCBRA:
2384 length += 2;
2385 if (common->capture_last_ptr != 0)
2386 capture_last_found = TRUE;
2387 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2388 length++;
2389 cc += 1 + LINK_SIZE + IMM2_SIZE;
2390 break;
2391
2392 case OP_CBRAPOS:
2393 case OP_SCBRAPOS:
2394 length += 2 + 2;
2395 if (common->capture_last_ptr != 0)
2396 capture_last_found = TRUE;
2397 cc += 1 + LINK_SIZE + IMM2_SIZE;
2398 break;
2399
2400 case OP_COND:
2401 /* Might be a hidden SCOND. */
2402 alternative = cc + GET(cc, 1);
2403 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2404 length++;
2405 cc += 1 + LINK_SIZE;
2406 break;
2407
2408 CASE_ITERATOR_PRIVATE_DATA_1
2409 if (PRIVATE_DATA(cc) != 0)
2410 length++;
2411 cc += 2;
2412#ifdef SUPPORT_UNICODE
2413 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2414#endif
2415 break;
2416
2417 CASE_ITERATOR_PRIVATE_DATA_2A
2418 if (PRIVATE_DATA(cc) != 0)
2419 length += 2;
2420 cc += 2;
2421#ifdef SUPPORT_UNICODE
2422 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2423#endif
2424 break;
2425
2426 CASE_ITERATOR_PRIVATE_DATA_2B
2427 if (PRIVATE_DATA(cc) != 0)
2428 length += 2;
2429 cc += 2 + IMM2_SIZE;
2430#ifdef SUPPORT_UNICODE
2431 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2432#endif
2433 break;
2434
2435 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2436 if (PRIVATE_DATA(cc) != 0)
2437 length++;
2438 cc += 1;
2439 break;
2440
2441 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2442 if (PRIVATE_DATA(cc) != 0)
2443 length += 2;
2444 cc += 1;
2445 break;
2446
2447 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2448 if (PRIVATE_DATA(cc) != 0)
2449 length += 2;
2450 cc += 1 + IMM2_SIZE;
2451 break;
2452
2453 case OP_CLASS:
2454 case OP_NCLASS:
2455#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2456 case OP_XCLASS:
2457 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2458#else
2459 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2460#endif
2461 if (PRIVATE_DATA(cc) != 0)
2462 length += get_class_iterator_size(cc + size);
2463 cc += size;
2464 break;
2465
2466 case OP_MARK:
2467 case OP_COMMIT_ARG:
2468 case OP_PRUNE_ARG:
2469 case OP_THEN_ARG:
2470 SLJIT_ASSERT(common->mark_ptr != 0);
2471 if (!setmark_found)
2472 setmark_found = TRUE;
2473 if (common->control_head_ptr != 0)
2474 control_head_found = TRUE;
2475 if (*cc != OP_MARK)
2476 quit_found = TRUE;
2477
2478 cc += 1 + 2 + cc[1];
2479 break;
2480
2481 case OP_PRUNE:
2482 case OP_SKIP:
2483 case OP_COMMIT:
2484 quit_found = TRUE;
2485 cc++;
2486 break;
2487
2488 case OP_SKIP_ARG:
2489 quit_found = TRUE;
2490 cc += 1 + 2 + cc[1];
2491 break;
2492
2493 case OP_THEN:
2494 SLJIT_ASSERT(common->control_head_ptr != 0);
2495 quit_found = TRUE;
2496 if (!control_head_found)
2497 control_head_found = TRUE;
2498 cc++;
2499 break;
2500
2501 case OP_ACCEPT:
2502 case OP_ASSERT_ACCEPT:
2503 accept_found = TRUE;
2504 cc++;
2505 break;
2506
2507 default:
2508 cc = next_opcode(common, cc);
2509 SLJIT_ASSERT(cc != NULL);
2510 break;
2511 }
2512 }
2513SLJIT_ASSERT(cc == ccend);
2514
2515if (control_head_found)
2516 length++;
2517if (capture_last_found)
2518 length++;
2519if (quit_found)
2520 {
2521 if (setsom_found)
2522 length++;
2523 if (setmark_found)
2524 length++;
2525 }
2526
2527*needs_control_head = control_head_found;
2528*has_quit = quit_found;
2529*has_accept = accept_found;
2530return length;
2531}
2532
2533enum copy_recurse_data_types {
2534 recurse_copy_from_global,
2535 recurse_copy_private_to_global,
2536 recurse_copy_shared_to_global,
2537 recurse_copy_kept_shared_to_global,
2538 recurse_swap_global
2539};
2540
2541static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
2542 int type, int stackptr, int stacktop, BOOL has_quit)
2543{
2544delayed_mem_copy_status status;
2545PCRE2_SPTR alternative;
2546sljit_sw private_srcw[2];
2547sljit_sw shared_srcw[3];
2548sljit_sw kept_shared_srcw[2];
2549int private_count, shared_count, kept_shared_count;
2550int from_sp, base_reg, offset, i;
2551BOOL setsom_found = FALSE;
2552BOOL setmark_found = FALSE;
2553BOOL capture_last_found = FALSE;
2554BOOL control_head_found = FALSE;
2555
2556#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2557SLJIT_ASSERT(common->control_head_ptr != 0);
2558control_head_found = TRUE;
2559#endif
2560
2561switch (type)
2562 {
2563 case recurse_copy_from_global:
2564 from_sp = TRUE;
2565 base_reg = STACK_TOP;
2566 break;
2567
2568 case recurse_copy_private_to_global:
2569 case recurse_copy_shared_to_global:
2570 case recurse_copy_kept_shared_to_global:
2571 from_sp = FALSE;
2572 base_reg = STACK_TOP;
2573 break;
2574
2575 default:
2576 SLJIT_ASSERT(type == recurse_swap_global);
2577 from_sp = FALSE;
2578 base_reg = TMP2;
2579 break;
2580 }
2581
2582stackptr = STACK(stackptr);
2583stacktop = STACK(stacktop);
2584
2585status.tmp_regs[0] = TMP1;
2586status.saved_tmp_regs[0] = TMP1;
2587
2588if (base_reg != TMP2)
2589 {
2590 status.tmp_regs[1] = TMP2;
2591 status.saved_tmp_regs[1] = TMP2;
2592 }
2593else
2594 {
2595 status.saved_tmp_regs[1] = RETURN_ADDR;
2596 if (HAS_VIRTUAL_REGISTERS)
2597 status.tmp_regs[1] = STR_PTR;
2598 else
2599 status.tmp_regs[1] = RETURN_ADDR;
2600 }
2601
2602status.saved_tmp_regs[2] = TMP3;
2603if (HAS_VIRTUAL_REGISTERS)
2604 status.tmp_regs[2] = STR_END;
2605else
2606 status.tmp_regs[2] = TMP3;
2607
2608delayed_mem_copy_init(&status, common);
2609
2610if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2611 {
2612 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2613
2614 if (!from_sp)
2615 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2616
2617 if (from_sp || type == recurse_swap_global)
2618 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2619 }
2620
2621stackptr += sizeof(sljit_sw);
2622
2623#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2624if (type != recurse_copy_shared_to_global)
2625 {
2626 if (!from_sp)
2627 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2628
2629 if (from_sp || type == recurse_swap_global)
2630 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2631 }
2632
2633stackptr += sizeof(sljit_sw);
2634#endif
2635
2636while (cc < ccend)
2637 {
2638 private_count = 0;
2639 shared_count = 0;
2640 kept_shared_count = 0;
2641
2642 switch(*cc)
2643 {
2644 case OP_SET_SOM:
2645 SLJIT_ASSERT(common->has_set_som);
2646 if (has_quit && !setsom_found)
2647 {
2648 kept_shared_srcw[0] = OVECTOR(0);
2649 kept_shared_count = 1;
2650 setsom_found = TRUE;
2651 }
2652 cc += 1;
2653 break;
2654
2655 case OP_RECURSE:
2656 if (has_quit)
2657 {
2658 if (common->has_set_som && !setsom_found)
2659 {
2660 kept_shared_srcw[0] = OVECTOR(0);
2661 kept_shared_count = 1;
2662 setsom_found = TRUE;
2663 }
2664 if (common->mark_ptr != 0 && !setmark_found)
2665 {
2666 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2667 kept_shared_count++;
2668 setmark_found = TRUE;
2669 }
2670 }
2671 if (common->capture_last_ptr != 0 && !capture_last_found)
2672 {
2673 shared_srcw[0] = common->capture_last_ptr;
2674 shared_count = 1;
2675 capture_last_found = TRUE;
2676 }
2677 cc += 1 + LINK_SIZE;
2678 break;
2679
2680 case OP_KET:
2681 if (PRIVATE_DATA(cc) != 0)
2682 {
2683 private_count = 1;
2684 private_srcw[0] = PRIVATE_DATA(cc);
2685 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2686 cc += PRIVATE_DATA(cc + 1);
2687 }
2688 cc += 1 + LINK_SIZE;
2689 break;
2690
2691 case OP_ASSERT:
2692 case OP_ASSERT_NOT:
2693 case OP_ASSERTBACK:
2694 case OP_ASSERTBACK_NOT:
2695 case OP_ASSERT_NA:
2696 case OP_ASSERTBACK_NA:
2697 case OP_ONCE:
2698 case OP_SCRIPT_RUN:
2699 case OP_BRAPOS:
2700 case OP_SBRA:
2701 case OP_SBRAPOS:
2702 case OP_SCOND:
2703 private_count = 1;
2704 private_srcw[0] = PRIVATE_DATA(cc);
2705 cc += 1 + LINK_SIZE;
2706 break;
2707
2708 case OP_CBRA:
2709 case OP_SCBRA:
2710 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2711 shared_srcw[0] = OVECTOR(offset);
2712 shared_srcw[1] = OVECTOR(offset + 1);
2713 shared_count = 2;
2714
2715 if (common->capture_last_ptr != 0 && !capture_last_found)
2716 {
2717 shared_srcw[2] = common->capture_last_ptr;
2718 shared_count = 3;
2719 capture_last_found = TRUE;
2720 }
2721
2722 if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
2723 {
2724 private_count = 1;
2725 private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2726 }
2727 cc += 1 + LINK_SIZE + IMM2_SIZE;
2728 break;
2729
2730 case OP_CBRAPOS:
2731 case OP_SCBRAPOS:
2732 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2733 shared_srcw[0] = OVECTOR(offset);
2734 shared_srcw[1] = OVECTOR(offset + 1);
2735 shared_count = 2;
2736
2737 if (common->capture_last_ptr != 0 && !capture_last_found)
2738 {
2739 shared_srcw[2] = common->capture_last_ptr;
2740 shared_count = 3;
2741 capture_last_found = TRUE;
2742 }
2743
2744 private_count = 2;
2745 private_srcw[0] = PRIVATE_DATA(cc);
2746 private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
2747 cc += 1 + LINK_SIZE + IMM2_SIZE;
2748 break;
2749
2750 case OP_COND:
2751 /* Might be a hidden SCOND. */
2752 alternative = cc + GET(cc, 1);
2753 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2754 {
2755 private_count = 1;
2756 private_srcw[0] = PRIVATE_DATA(cc);
2757 }
2758 cc += 1 + LINK_SIZE;
2759 break;
2760
2761 CASE_ITERATOR_PRIVATE_DATA_1
2762 if (PRIVATE_DATA(cc))
2763 {
2764 private_count = 1;
2765 private_srcw[0] = PRIVATE_DATA(cc);
2766 }
2767 cc += 2;
2768#ifdef SUPPORT_UNICODE
2769 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2770#endif
2771 break;
2772
2773 CASE_ITERATOR_PRIVATE_DATA_2A
2774 if (PRIVATE_DATA(cc))
2775 {
2776 private_count = 2;
2777 private_srcw[0] = PRIVATE_DATA(cc);
2778 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2779 }
2780 cc += 2;
2781#ifdef SUPPORT_UNICODE
2782 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2783#endif
2784 break;
2785
2786 CASE_ITERATOR_PRIVATE_DATA_2B
2787 if (PRIVATE_DATA(cc))
2788 {
2789 private_count = 2;
2790 private_srcw[0] = PRIVATE_DATA(cc);
2791 private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
2792 }
2793 cc += 2 + IMM2_SIZE;
2794#ifdef SUPPORT_UNICODE
2795 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2796#endif
2797 break;
2798
2799 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2800 if (PRIVATE_DATA(cc))
2801 {
2802 private_count = 1;
2803 private_srcw[0] = PRIVATE_DATA(cc);
2804 }
2805 cc += 1;
2806 break;
2807
2808 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2809 if (PRIVATE_DATA(cc))
2810 {
2811 private_count = 2;
2812 private_srcw[0] = PRIVATE_DATA(cc);
2813 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2814 }
2815 cc += 1;
2816 break;
2817
2818 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2819 if (PRIVATE_DATA(cc))
2820 {
2821 private_count = 2;
2822 private_srcw[0] = PRIVATE_DATA(cc);
2823 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2824 }
2825 cc += 1 + IMM2_SIZE;
2826 break;
2827
2828 case OP_CLASS:
2829 case OP_NCLASS:
2830#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2831 case OP_XCLASS:
2832 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2833#else
2834 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2835#endif
2836 if (PRIVATE_DATA(cc) != 0)
2837 switch(get_class_iterator_size(cc + i))
2838 {
2839 case 1:
2840 private_count = 1;
2841 private_srcw[0] = PRIVATE_DATA(cc);
2842 break;
2843
2844 case 2:
2845 private_count = 2;
2846 private_srcw[0] = PRIVATE_DATA(cc);
2847 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2848 break;
2849
2850 default:
2851 SLJIT_UNREACHABLE();
2852 break;
2853 }
2854 cc += i;
2855 break;
2856
2857 case OP_MARK:
2858 case OP_COMMIT_ARG:
2859 case OP_PRUNE_ARG:
2860 case OP_THEN_ARG:
2861 SLJIT_ASSERT(common->mark_ptr != 0);
2862 if (has_quit && !setmark_found)
2863 {
2864 kept_shared_srcw[0] = common->mark_ptr;
2865 kept_shared_count = 1;
2866 setmark_found = TRUE;
2867 }
2868 if (common->control_head_ptr != 0 && !control_head_found)
2869 {
2870 private_srcw[0] = common->control_head_ptr;
2871 private_count = 1;
2872 control_head_found = TRUE;
2873 }
2874 cc += 1 + 2 + cc[1];
2875 break;
2876
2877 case OP_THEN:
2878 SLJIT_ASSERT(common->control_head_ptr != 0);
2879 if (!control_head_found)
2880 {
2881 private_srcw[0] = common->control_head_ptr;
2882 private_count = 1;
2883 control_head_found = TRUE;
2884 }
2885 cc++;
2886 break;
2887
2888 default:
2889 cc = next_opcode(common, cc);
2890 SLJIT_ASSERT(cc != NULL);
2891 break;
2892 }
2893
2894 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2895 {
2896 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2897
2898 for (i = 0; i < private_count; i++)
2899 {
2900 SLJIT_ASSERT(private_srcw[i] != 0);
2901
2902 if (!from_sp)
2903 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2904
2905 if (from_sp || type == recurse_swap_global)
2906 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2907
2908 stackptr += sizeof(sljit_sw);
2909 }
2910 }
2911 else
2912 stackptr += sizeof(sljit_sw) * private_count;
2913
2914 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2915 {
2916 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2917
2918 for (i = 0; i < shared_count; i++)
2919 {
2920 SLJIT_ASSERT(shared_srcw[i] != 0);
2921
2922 if (!from_sp)
2923 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
2924
2925 if (from_sp || type == recurse_swap_global)
2926 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
2927
2928 stackptr += sizeof(sljit_sw);
2929 }
2930 }
2931 else
2932 stackptr += sizeof(sljit_sw) * shared_count;
2933
2934 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
2935 {
2936 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
2937
2938 for (i = 0; i < kept_shared_count; i++)
2939 {
2940 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
2941
2942 if (!from_sp)
2943 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
2944
2945 if (from_sp || type == recurse_swap_global)
2946 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
2947
2948 stackptr += sizeof(sljit_sw);
2949 }
2950 }
2951 else
2952 stackptr += sizeof(sljit_sw) * kept_shared_count;
2953 }
2954
2955SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
2956
2957delayed_mem_copy_finish(&status);
2958}
2959
2960static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
2961{
2962PCRE2_SPTR end = bracketend(cc);
2963BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
2964
2965/* Assert captures then. */
2966if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
2967 current_offset = NULL;
2968/* Conditional block does not. */
2969if (*cc == OP_COND || *cc == OP_SCOND)
2970 has_alternatives = FALSE;
2971
2972cc = next_opcode(common, cc);
2973if (has_alternatives)
2974 current_offset = common->then_offsets + (cc - common->start);
2975
2976while (cc < end)
2977 {
2978 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
2979 cc = set_then_offsets(common, cc, current_offset);
2980 else
2981 {
2982 if (*cc == OP_ALT && has_alternatives)
2983 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
2984 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
2985 *current_offset = 1;
2986 cc = next_opcode(common, cc);
2987 }
2988 }
2989
2990return end;
2991}
2992
2993#undef CASE_ITERATOR_PRIVATE_DATA_1
2994#undef CASE_ITERATOR_PRIVATE_DATA_2A
2995#undef CASE_ITERATOR_PRIVATE_DATA_2B
2996#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
2997#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
2998#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
2999
3000static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3001{
3002return (value & (value - 1)) == 0;
3003}
3004
3005static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3006{
3007while (list)
3008 {
3009 /* sljit_set_label is clever enough to do nothing
3010 if either the jump or the label is NULL. */
3011 SET_LABEL(list->jump, label);
3012 list = list->next;
3013 }
3014}
3015
3016static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3017{
3018jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3019if (list_item)
3020 {
3021 list_item->next = *list;
3022 list_item->jump = jump;
3023 *list = list_item;
3024 }
3025}
3026
3027static void add_stub(compiler_common *common, struct sljit_jump *start)
3028{
3029DEFINE_COMPILER;
3030stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3031
3032if (list_item)
3033 {
3034 list_item->start = start;
3035 list_item->quit = LABEL();
3036 list_item->next = common->stubs;
3037 common->stubs = list_item;
3038 }
3039}
3040
3041static void flush_stubs(compiler_common *common)
3042{
3043DEFINE_COMPILER;
3044stub_list *list_item = common->stubs;
3045
3046while (list_item)
3047 {
3048 JUMPHERE(list_item->start);
3049 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3050 JUMPTO(SLJIT_JUMP, list_item->quit);
3051 list_item = list_item->next;
3052 }
3053common->stubs = NULL;
3054}
3055
3056static SLJIT_INLINE void count_match(compiler_common *common)
3057{
3058DEFINE_COMPILER;
3059
3060OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3061add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3062}
3063
3064static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3065{
3066/* May destroy all locals and registers except TMP2. */
3067DEFINE_COMPILER;
3068
3069SLJIT_ASSERT(size > 0);
3070OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3071#ifdef DESTROY_REGISTERS
3072OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3073OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3074OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3075OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3076OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3077#endif
3078add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3079}
3080
3081static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3082{
3083DEFINE_COMPILER;
3084
3085SLJIT_ASSERT(size > 0);
3086OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3087}
3088
3089static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3090{
3091DEFINE_COMPILER;
3092sljit_uw *result;
3093
3094if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3095 return NULL;
3096
3097result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3098if (SLJIT_UNLIKELY(result == NULL))
3099 {
3100 sljit_set_compiler_memory_error(compiler);
3101 return NULL;
3102 }
3103
3104*(void**)result = common->read_only_data_head;
3105common->read_only_data_head = (void *)result;
3106return result + 1;
3107}
3108
3109static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3110{
3111DEFINE_COMPILER;
3112struct sljit_label *loop;
3113sljit_s32 i;
3114
3115/* At this point we can freely use all temporary registers. */
3116SLJIT_ASSERT(length > 1);
3117/* TMP1 returns with begin - 1. */
3118OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3119if (length < 8)
3120 {
3121 for (i = 1; i < length; i++)
3122 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3123 }
3124else
3125 {
3126 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3127 {
3128 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3129 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3130 loop = LABEL();
3131 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3132 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3133 JUMPTO(SLJIT_NOT_ZERO, loop);
3134 }
3135 else
3136 {
3137 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3138 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3139 loop = LABEL();
3140 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3141 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3142 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3143 JUMPTO(SLJIT_NOT_ZERO, loop);
3144 }
3145 }
3146}
3147
3148static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3149{
3150DEFINE_COMPILER;
3151sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3152sljit_u32 uncleared_size;
3153sljit_s32 src = SLJIT_IMM;
3154sljit_s32 i;
3155struct sljit_label *loop;
3156
3157SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3158
3159if (size == sizeof(sljit_sw))
3160 {
3161 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3162 return;
3163 }
3164
3165if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3166 {
3167 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3168 src = TMP3;
3169 }
3170
3171if (size <= 6 * sizeof(sljit_sw))
3172 {
3173 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3174 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3175 return;
3176 }
3177
3178GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3179
3180uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3181
3182OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3183
3184loop = LABEL();
3185OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3186OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3187OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3188OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3189CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3190
3191if (uncleared_size >= sizeof(sljit_sw))
3192 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3193
3194if (uncleared_size >= 2 * sizeof(sljit_sw))
3195 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3196}
3197
3198static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3199{
3200DEFINE_COMPILER;
3201struct sljit_label *loop;
3202int i;
3203
3204SLJIT_ASSERT(length > 1);
3205/* OVECTOR(1) contains the "string begin - 1" constant. */
3206if (length > 2)
3207 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3208if (length < 8)
3209 {
3210 for (i = 2; i < length; i++)
3211 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3212 }
3213else
3214 {
3215 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3216 {
3217 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3218 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3219 loop = LABEL();
3220 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3221 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3222 JUMPTO(SLJIT_NOT_ZERO, loop);
3223 }
3224 else
3225 {
3226 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3227 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3228 loop = LABEL();
3229 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3230 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3231 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3232 JUMPTO(SLJIT_NOT_ZERO, loop);
3233 }
3234 }
3235
3236if (!HAS_VIRTUAL_REGISTERS)
3237 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3238else
3239 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3240
3241if (common->mark_ptr != 0)
3242 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3243if (common->control_head_ptr != 0)
3244 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3245if (HAS_VIRTUAL_REGISTERS)
3246 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3247
3248OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3249OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3250}
3251
3252static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3253{
3254while (current != NULL)
3255 {
3256 switch (current[1])
3257 {
3258 case type_then_trap:
3259 break;
3260
3261 case type_mark:
3262 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3263 return current[3];
3264 break;
3265
3266 default:
3267 SLJIT_UNREACHABLE();
3268 break;
3269 }
3270 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3271 current = (sljit_sw*)current[0];
3272 }
3273return 0;
3274}
3275
3276static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3277{
3278DEFINE_COMPILER;
3279struct sljit_label *loop;
3280BOOL has_pre;
3281
3282/* At this point we can freely use all registers. */
3283OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3284OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3285
3286if (HAS_VIRTUAL_REGISTERS)
3287 {
3288 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3289 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3290 if (common->mark_ptr != 0)
3291 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3292 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3293 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3294 if (common->mark_ptr != 0)
3295 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3296 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3297 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3298 }
3299else
3300 {
3301 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3302 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3303 if (common->mark_ptr != 0)
3304 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3305 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3306 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3307 if (common->mark_ptr != 0)
3308 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3309 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3310 }
3311
3312has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3313
3314GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3315OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3316
3317loop = LABEL();
3318
3319if (has_pre)
3320 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3321else
3322 {
3323 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3324 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3325 }
3326
3327OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3328OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3329/* Copy the integer value to the output buffer */
3330#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3331OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3332#endif
3333
3334SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3335OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3336
3337OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3338JUMPTO(SLJIT_NOT_ZERO, loop);
3339
3340/* Calculate the return value, which is the maximum ovector value. */
3341if (topbracket > 1)
3342 {
3343 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3344 {
3345 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3346 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3347
3348 /* OVECTOR(0) is never equal to SLJIT_S2. */
3349 loop = LABEL();
3350 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3351 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3352 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3353 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3354 }
3355 else
3356 {
3357 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3358 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3359
3360 /* OVECTOR(0) is never equal to SLJIT_S2. */
3361 loop = LABEL();
3362 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3363 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3364 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3365 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3366 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3367 }
3368 }
3369else
3370 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3371}
3372
3373static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3374{
3375DEFINE_COMPILER;
3376sljit_s32 mov_opcode;
3377sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3378
3379SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3380SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3381 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3382
3383if (arguments_reg != ARGUMENTS)
3384 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3385OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3386 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3387OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3388
3389/* Store match begin and end. */
3390OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3391OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3392OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3393
3394mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3395
3396OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3397#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3398OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3399#endif
3400OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3401
3402OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3403#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3404OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3405#endif
3406OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3407
3408JUMPTO(SLJIT_JUMP, quit);
3409}
3410
3411static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3412{
3413/* May destroy TMP1. */
3414DEFINE_COMPILER;
3415struct sljit_jump *jump;
3416
3417if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3418 {
3419 /* The value of -1 must be kept for start_used_ptr! */
3420 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3421 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3422 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3423 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3424 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3425 JUMPHERE(jump);
3426 }
3427else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3428 {
3429 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3431 JUMPHERE(jump);
3432 }
3433}
3434
3435static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3436{
3437/* Detects if the character has an othercase. */
3438unsigned int c;
3439
3440#ifdef SUPPORT_UNICODE
3441if (common->utf || common->ucp)
3442 {
3443 if (common->utf)
3444 {
3445 GETCHAR(c, cc);
3446 }
3447 else
3448 c = *cc;
3449
3450 if (c > 127)
3451 return c != UCD_OTHERCASE(c);
3452
3453 return common->fcc[c] != c;
3454 }
3455else
3456#endif
3457 c = *cc;
3458return MAX_255(c) ? common->fcc[c] != c : FALSE;
3459}
3460
3461static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3462{
3463/* Returns with the othercase. */
3464#ifdef SUPPORT_UNICODE
3465if ((common->utf || common->ucp) && c > 127)
3466 return UCD_OTHERCASE(c);
3467#endif
3468return TABLE_GET(c, common->fcc, c);
3469}
3470
3471static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3472{
3473/* Detects if the character and its othercase has only 1 bit difference. */
3474unsigned int c, oc, bit;
3475#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3476int n;
3477#endif
3478
3479#ifdef SUPPORT_UNICODE
3480if (common->utf || common->ucp)
3481 {
3482 if (common->utf)
3483 {
3484 GETCHAR(c, cc);
3485 }
3486 else
3487 c = *cc;
3488
3489 if (c <= 127)
3490 oc = common->fcc[c];
3491 else
3492 oc = UCD_OTHERCASE(c);
3493 }
3494else
3495 {
3496 c = *cc;
3497 oc = TABLE_GET(c, common->fcc, c);
3498 }
3499#else
3500c = *cc;
3501oc = TABLE_GET(c, common->fcc, c);
3502#endif
3503
3504SLJIT_ASSERT(c != oc);
3505
3506bit = c ^ oc;
3507/* Optimized for English alphabet. */
3508if (c <= 127 && bit == 0x20)
3509 return (0 << 8) | 0x20;
3510
3511/* Since c != oc, they must have at least 1 bit difference. */
3512if (!is_powerof2(bit))
3513 return 0;
3514
3515#if PCRE2_CODE_UNIT_WIDTH == 8
3516
3517#ifdef SUPPORT_UNICODE
3518if (common->utf && c > 127)
3519 {
3520 n = GET_EXTRALEN(*cc);
3521 while ((bit & 0x3f) == 0)
3522 {
3523 n--;
3524 bit >>= 6;
3525 }
3526 return (n << 8) | bit;
3527 }
3528#endif /* SUPPORT_UNICODE */
3529return (0 << 8) | bit;
3530
3531#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3532
3533#ifdef SUPPORT_UNICODE
3534if (common->utf && c > 65535)
3535 {
3536 if (bit >= (1u << 10))
3537 bit >>= 10;
3538 else
3539 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3540 }
3541#endif /* SUPPORT_UNICODE */
3542return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3543
3544#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3545}
3546
3547static void check_partial(compiler_common *common, BOOL force)
3548{
3549/* Checks whether a partial matching is occurred. Does not modify registers. */
3550DEFINE_COMPILER;
3551struct sljit_jump *jump = NULL;
3552
3553SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3554
3555if (common->mode == PCRE2_JIT_COMPLETE)
3556 return;
3557
3558if (!force && !common->allow_empty_partial)
3559 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3560else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3561 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3562
3563if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3564 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3565else
3566 {
3567 if (common->partialmatchlabel != NULL)
3568 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3569 else
3570 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3571 }
3572
3573if (jump != NULL)
3574 JUMPHERE(jump);
3575}
3576
3577static void check_str_end(compiler_common *common, jump_list **end_reached)
3578{
3579/* Does not affect registers. Usually used in a tight spot. */
3580DEFINE_COMPILER;
3581struct sljit_jump *jump;
3582
3583if (common->mode == PCRE2_JIT_COMPLETE)
3584 {
3585 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3586 return;
3587 }
3588
3589jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3590if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3591 {
3592 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3593 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3594 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3595 }
3596else
3597 {
3598 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3599 if (common->partialmatchlabel != NULL)
3600 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3601 else
3602 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3603 }
3604JUMPHERE(jump);
3605}
3606
3607static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3608{
3609DEFINE_COMPILER;
3610struct sljit_jump *jump;
3611
3612if (common->mode == PCRE2_JIT_COMPLETE)
3613 {
3614 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3615 return;
3616 }
3617
3618/* Partial matching mode. */
3619jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3620if (!common->allow_empty_partial)
3621 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3622else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3623 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3624
3625if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3626 {
3627 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3628 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3629 }
3630else
3631 {
3632 if (common->partialmatchlabel != NULL)
3633 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3634 else
3635 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3636 }
3637JUMPHERE(jump);
3638}
3639
3640static void process_partial_match(compiler_common *common)
3641{
3642DEFINE_COMPILER;
3643struct sljit_jump *jump;
3644
3645/* Partial matching mode. */
3646if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3647 {
3648 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3650 JUMPHERE(jump);
3651 }
3652else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3653 {
3654 if (common->partialmatchlabel != NULL)
3655 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3656 else
3657 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3658 }
3659}
3660
3661static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3662{
3663DEFINE_COMPILER;
3664
3665CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3666process_partial_match(common);
3667}
3668
3669static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3670{
3671/* Reads the character into TMP1, keeps STR_PTR.
3672Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3673DEFINE_COMPILER;
3674#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3675struct sljit_jump *jump;
3676#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3677
3678SLJIT_UNUSED_ARG(max);
3679SLJIT_UNUSED_ARG(dst);
3680SLJIT_UNUSED_ARG(dstw);
3681SLJIT_UNUSED_ARG(backtracks);
3682
3683OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3684
3685#ifdef SUPPORT_UNICODE
3686#if PCRE2_CODE_UNIT_WIDTH == 8
3687if (common->utf)
3688 {
3689 if (max < 128) return;
3690
3691 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3692 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3693 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3694 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3695 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3696 if (backtracks && common->invalid_utf)
3697 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3698 JUMPHERE(jump);
3699 }
3700#elif PCRE2_CODE_UNIT_WIDTH == 16
3701if (common->utf)
3702 {
3703 if (max < 0xd800) return;
3704
3705 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3706
3707 if (common->invalid_utf)
3708 {
3709 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3710 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3711 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3712 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3713 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3714 if (backtracks && common->invalid_utf)
3715 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3716 }
3717 else
3718 {
3719 /* TMP2 contains the high surrogate. */
3720 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3721 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3722 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3723 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3724 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3725 }
3726
3727 JUMPHERE(jump);
3728 }
3729#elif PCRE2_CODE_UNIT_WIDTH == 32
3730if (common->invalid_utf)
3731 {
3732 if (max < 0xd800) return;
3733
3734 if (backtracks != NULL)
3735 {
3736 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3737 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3738 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3739 }
3740 else
3741 {
3742 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3743 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
3744 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3745 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3746 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3747 }
3748 }
3749#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3750#endif /* SUPPORT_UNICODE */
3751}
3752
3753static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3754{
3755/* Reads one character back without moving STR_PTR. TMP2 must
3756contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3757DEFINE_COMPILER;
3758
3759#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3760struct sljit_jump *jump;
3761#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3762
3763SLJIT_UNUSED_ARG(max);
3764SLJIT_UNUSED_ARG(backtracks);
3765
3766OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3767
3768#ifdef SUPPORT_UNICODE
3769#if PCRE2_CODE_UNIT_WIDTH == 8
3770if (common->utf)
3771 {
3772 if (max < 128) return;
3773
3774 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3775 if (common->invalid_utf)
3776 {
3777 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3778 if (backtracks != NULL)
3779 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3780 }
3781 else
3782 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3783 JUMPHERE(jump);
3784 }
3785#elif PCRE2_CODE_UNIT_WIDTH == 16
3786if (common->utf)
3787 {
3788 if (max < 0xd800) return;
3789
3790 if (common->invalid_utf)
3791 {
3792 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3793 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3794 if (backtracks != NULL)
3795 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3796 }
3797 else
3798 {
3799 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3800 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3801 /* TMP2 contains the low surrogate. */
3802 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3803 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3804 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3805 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3806 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3807 }
3808 JUMPHERE(jump);
3809 }
3810#elif PCRE2_CODE_UNIT_WIDTH == 32
3811if (common->invalid_utf)
3812 {
3813 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3814 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3815 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3816 }
3817#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3818#endif /* SUPPORT_UNICODE */
3819}
3820
3821#define READ_CHAR_UPDATE_STR_PTR 0x1
3822#define READ_CHAR_UTF8_NEWLINE 0x2
3823#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3824#define READ_CHAR_VALID_UTF 0x4
3825
3826static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3827 jump_list **backtracks, sljit_u32 options)
3828{
3829/* Reads the precise value of a character into TMP1, if the character is
3830between min and max (c >= min && c <= max). Otherwise it returns with a value
3831outside the range. Does not check STR_END. */
3832DEFINE_COMPILER;
3833#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3834struct sljit_jump *jump;
3835#endif
3836#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3837struct sljit_jump *jump2;
3838#endif
3839
3840SLJIT_UNUSED_ARG(min);
3841SLJIT_UNUSED_ARG(max);
3842SLJIT_UNUSED_ARG(backtracks);
3843SLJIT_UNUSED_ARG(options);
3844SLJIT_ASSERT(min <= max);
3845
3846OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3847OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3848
3849#ifdef SUPPORT_UNICODE
3850#if PCRE2_CODE_UNIT_WIDTH == 8
3851if (common->utf)
3852 {
3853 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3854
3855 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3856 {
3857 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3858
3859 if (options & READ_CHAR_UTF8_NEWLINE)
3860 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3861 else
3862 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3863
3864 if (backtracks != NULL)
3865 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3866 JUMPHERE(jump);
3867 return;
3868 }
3869
3870 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3871 if (min >= 0x10000)
3872 {
3873 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3874 if (options & READ_CHAR_UPDATE_STR_PTR)
3875 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3876 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3877 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3878 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3879 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3880 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3881 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3882 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3883 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3884 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3885 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3886 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3887 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3888 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3889 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3890 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3891 JUMPHERE(jump2);
3892 if (options & READ_CHAR_UPDATE_STR_PTR)
3893 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3894 }
3895 else if (min >= 0x800 && max <= 0xffff)
3896 {
3897 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3898 if (options & READ_CHAR_UPDATE_STR_PTR)
3899 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3900 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3901 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3902 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3903 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3904 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3905 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3906 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3907 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3908 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3909 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3910 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3911 JUMPHERE(jump2);
3912 if (options & READ_CHAR_UPDATE_STR_PTR)
3913 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3914 }
3915 else if (max >= 0x800)
3916 {
3917 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3918 }
3919 else if (max < 128)
3920 {
3921 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3922 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
3923 }
3924 else
3925 {
3926 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3927 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3928 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3929 else
3930 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3931 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3932 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3933 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3934 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3935 if (options & READ_CHAR_UPDATE_STR_PTR)
3936 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3937 }
3938 JUMPHERE(jump);
3939 }
3940#elif PCRE2_CODE_UNIT_WIDTH == 16
3941if (common->utf)
3942 {
3943 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3944
3945 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3946 {
3947 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3948 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3949
3950 if (options & READ_CHAR_UTF8_NEWLINE)
3951 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3952 else
3953 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3954
3955 if (backtracks != NULL)
3956 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3957 JUMPHERE(jump);
3958 return;
3959 }
3960
3961 if (max >= 0x10000)
3962 {
3963 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3964 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3965 /* TMP2 contains the high surrogate. */
3966 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3967 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3968 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3969 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3970 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3971 JUMPHERE(jump);
3972 return;
3973 }
3974
3975 /* Skip low surrogate if necessary. */
3976 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3977
3978 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
3979 {
3980 if (options & READ_CHAR_UPDATE_STR_PTR)
3981 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3982 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
3983 if (options & READ_CHAR_UPDATE_STR_PTR)
3984 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
3985 if (max >= 0xd800)
3986 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
3987 }
3988 else
3989 {
3990 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
3991 if (options & READ_CHAR_UPDATE_STR_PTR)
3992 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3993 if (max >= 0xd800)
3994 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
3995 JUMPHERE(jump);
3996 }
3997 }
3998#elif PCRE2_CODE_UNIT_WIDTH == 32
3999if (common->invalid_utf)
4000 {
4001 if (backtracks != NULL)
4002 {
4003 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4004 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4005 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4006 }
4007 else
4008 {
4009 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4010 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4011 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4012 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4013 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4014 }
4015 }
4016#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4017#endif /* SUPPORT_UNICODE */
4018}
4019
4020#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4021
4022static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4023{
4024/* Tells whether the character codes below 128 are enough
4025to determine a match. */
4026const sljit_u8 value = nclass ? 0xff : 0;
4027const sljit_u8 *end = bitset + 32;
4028
4029bitset += 16;
4030do
4031 {
4032 if (*bitset++ != value)
4033 return FALSE;
4034 }
4035while (bitset < end);
4036return TRUE;
4037}
4038
4039static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4040{
4041/* Reads the precise character type of a character into TMP1, if the character
4042is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4043full_read argument tells whether characters above max are accepted or not. */
4044DEFINE_COMPILER;
4045struct sljit_jump *jump;
4046
4047SLJIT_ASSERT(common->utf);
4048
4049OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4050OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4051
4052/* All values > 127 are zero in ctypes. */
4053OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4054
4055if (negated)
4056 {
4057 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4058
4059 if (common->invalid_utf)
4060 {
4061 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4062 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4063 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4064 }
4065 else
4066 {
4067 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4068 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4069 }
4070 JUMPHERE(jump);
4071 }
4072}
4073
4074#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4075
4076static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4077{
4078/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4079DEFINE_COMPILER;
4080#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4081struct sljit_jump *jump;
4082#endif
4083#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4084struct sljit_jump *jump2;
4085#endif
4086
4087SLJIT_UNUSED_ARG(backtracks);
4088SLJIT_UNUSED_ARG(negated);
4089
4090OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4091OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4092
4093#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4094if (common->utf)
4095 {
4096 /* The result of this read may be unused, but saves an "else" part. */
4097 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4098 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4099
4100 if (!negated)
4101 {
4102 if (common->invalid_utf)
4103 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4104
4105 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4106 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4107 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4108 if (common->invalid_utf)
4109 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4110
4111 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4112 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4113 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4114 if (common->invalid_utf)
4115 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4116
4117 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4118 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4119 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4120 JUMPHERE(jump2);
4121 }
4122 else if (common->invalid_utf)
4123 {
4124 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4125 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4126 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4127
4128 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4129 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4130 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4131 JUMPHERE(jump2);
4132 }
4133 else
4134 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4135
4136 JUMPHERE(jump);
4137 return;
4138 }
4139#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4140
4141#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4142if (common->invalid_utf && negated)
4143 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4144#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4145
4146#if PCRE2_CODE_UNIT_WIDTH != 8
4147/* The ctypes array contains only 256 values. */
4148OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4149jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4150#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4151OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4152#if PCRE2_CODE_UNIT_WIDTH != 8
4153JUMPHERE(jump);
4154#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4155
4156#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4157if (common->utf && negated)
4158 {
4159 /* Skip low surrogate if necessary. */
4160 if (!common->invalid_utf)
4161 {
4162 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4163
4164 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4165 {
4166 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4167 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4168 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4169 }
4170 else
4171 {
4172 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4173 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4174 JUMPHERE(jump);
4175 }
4176 return;
4177 }
4178
4179 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4180 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4181 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4182 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4183
4184 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4185 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4186 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4187 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4188
4189 JUMPHERE(jump);
4190 return;
4191 }
4192#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4193}
4194
4195static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4196{
4197/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4198TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4199and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4200DEFINE_COMPILER;
4201
4202SLJIT_UNUSED_ARG(backtracks);
4203SLJIT_UNUSED_ARG(must_be_valid);
4204
4205#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4206struct sljit_jump *jump;
4207#endif
4208
4209#ifdef SUPPORT_UNICODE
4210#if PCRE2_CODE_UNIT_WIDTH == 8
4211struct sljit_label *label;
4212
4213if (common->utf)
4214 {
4215 if (!must_be_valid && common->invalid_utf)
4216 {
4217 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4218 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4219 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4220 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4221 if (backtracks != NULL)
4222 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4223 JUMPHERE(jump);
4224 return;
4225 }
4226
4227 label = LABEL();
4228 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4229 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4230 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4231 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4232 return;
4233 }
4234#elif PCRE2_CODE_UNIT_WIDTH == 16
4235if (common->utf)
4236 {
4237 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4238 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4239
4240 if (!must_be_valid && common->invalid_utf)
4241 {
4242 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4243 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4244 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4245 if (backtracks != NULL)
4246 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4247 JUMPHERE(jump);
4248 return;
4249 }
4250
4251 /* Skip low surrogate if necessary. */
4252 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
4253 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4254 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4255 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4256 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4257 return;
4258 }
4259#elif PCRE2_CODE_UNIT_WIDTH == 32
4260if (common->invalid_utf && !must_be_valid)
4261 {
4262 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4263 if (backtracks != NULL)
4264 {
4265 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4266 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4267 return;
4268 }
4269
4270 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
4271 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4272 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4273 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4274 return;
4275 }
4276#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4277#endif /* SUPPORT_UNICODE */
4278OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4279}
4280
4281static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4282{
4283/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4284DEFINE_COMPILER;
4285struct sljit_jump *jump;
4286
4287if (nltype == NLTYPE_ANY)
4288 {
4289 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4290 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4291 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4292 }
4293else if (nltype == NLTYPE_ANYCRLF)
4294 {
4295 if (jumpifmatch)
4296 {
4297 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4298 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4299 }
4300 else
4301 {
4302 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4303 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4304 JUMPHERE(jump);
4305 }
4306 }
4307else
4308 {
4309 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4310 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4311 }
4312}
4313
4314#ifdef SUPPORT_UNICODE
4315
4316#if PCRE2_CODE_UNIT_WIDTH == 8
4317static void do_utfreadchar(compiler_common *common)
4318{
4319/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4320of the character (>= 0xc0). Return char value in TMP1. */
4321DEFINE_COMPILER;
4322struct sljit_jump *jump;
4323
4324sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4325OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4326OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4327OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4328OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4329
4330/* Searching for the first zero. */
4331OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4332jump = JUMP(SLJIT_NOT_ZERO);
4333/* Two byte sequence. */
4334OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4335OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4336OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4337
4338JUMPHERE(jump);
4339OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4340OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4341OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4342OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4343
4344OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4345jump = JUMP(SLJIT_NOT_ZERO);
4346/* Three byte sequence. */
4347OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4348OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4349OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4350
4351/* Four byte sequence. */
4352JUMPHERE(jump);
4353OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4354OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4355OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4356OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4357OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4358OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4359OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4360}
4361
4362static void do_utfreadtype8(compiler_common *common)
4363{
4364/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4365of the character (>= 0xc0). Return value in TMP1. */
4366DEFINE_COMPILER;
4367struct sljit_jump *jump;
4368struct sljit_jump *compare;
4369
4370sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4371
4372OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
4373jump = JUMP(SLJIT_NOT_ZERO);
4374/* Two byte sequence. */
4375OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4376OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4377OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4378/* The upper 5 bits are known at this point. */
4379compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4380OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4381OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4382OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4383OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4384OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4385
4386JUMPHERE(compare);
4387OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4388OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4389
4390/* We only have types for characters less than 256. */
4391JUMPHERE(jump);
4392OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4393OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4394OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4395OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4396}
4397
4398static void do_utfreadchar_invalid(compiler_common *common)
4399{
4400/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4401of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4402undefined for invalid characters. */
4403DEFINE_COMPILER;
4404sljit_s32 i;
4405sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4406struct sljit_jump *jump;
4407struct sljit_jump *buffer_end_close;
4408struct sljit_label *three_byte_entry;
4409struct sljit_label *exit_invalid_label;
4410struct sljit_jump *exit_invalid[11];
4411
4412sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4413
4414OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4415
4416/* Usually more than 3 characters remained in the subject buffer. */
4417OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4418
4419/* Not a valid start of a multi-byte sequence, no more bytes read. */
4420exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4421
4422buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4423
4424OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4425OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4426/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4427OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4428OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4429exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4430
4431OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4432jump = JUMP(SLJIT_NOT_ZERO);
4433
4434OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4435OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4436
4437JUMPHERE(jump);
4438
4439/* Three-byte sequence. */
4440OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4441OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4442OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4443OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4444if (has_cmov)
4445 {
4446 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4447 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4448 exit_invalid[2] = NULL;
4449 }
4450else
4451 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4452
4453OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4454jump = JUMP(SLJIT_NOT_ZERO);
4455
4456three_byte_entry = LABEL();
4457
4458OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4459if (has_cmov)
4460 {
4461 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4462 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4463 exit_invalid[3] = NULL;
4464 }
4465else
4466 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4467OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4468OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4469
4470if (has_cmov)
4471 {
4472 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4473 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4474 exit_invalid[4] = NULL;
4475 }
4476else
4477 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4478OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4479
4480JUMPHERE(jump);
4481
4482/* Four-byte sequence. */
4483OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4484OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4485OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4486OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4487if (has_cmov)
4488 {
4489 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4490 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4491 exit_invalid[5] = NULL;
4492 }
4493else
4494 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4495
4496OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4497if (has_cmov)
4498 {
4499 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4500 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4501 exit_invalid[6] = NULL;
4502 }
4503else
4504 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4505
4506OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4507OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4508
4509JUMPHERE(buffer_end_close);
4510OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4511exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4512
4513/* Two-byte sequence. */
4514OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4515OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4516/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4517OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4518OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4519exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4520
4521OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4522jump = JUMP(SLJIT_NOT_ZERO);
4523
4524OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4525
4526/* Three-byte sequence. */
4527JUMPHERE(jump);
4528exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4529
4530OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4531OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4532OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4533OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4534if (has_cmov)
4535 {
4536 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
4537 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4538 exit_invalid[10] = NULL;
4539 }
4540else
4541 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4542
4543/* One will be substracted from STR_PTR later. */
4544OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4545
4546/* Four byte sequences are not possible. */
4547CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4548
4549exit_invalid_label = LABEL();
4550for (i = 0; i < 11; i++)
4551 sljit_set_label(exit_invalid[i], exit_invalid_label);
4552
4553OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4554OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4555}
4556
4557static void do_utfreadnewline_invalid(compiler_common *common)
4558{
4559/* Slow decoding a UTF-8 character, specialized for newlines.
4560TMP1 contains the first byte of the character (>= 0xc0). Return
4561char value in TMP1. */
4562DEFINE_COMPILER;
4563struct sljit_label *loop;
4564struct sljit_label *skip_start;
4565struct sljit_label *three_byte_exit;
4566struct sljit_jump *jump[5];
4567
4568sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4569
4570if (common->nltype != NLTYPE_ANY)
4571 {
4572 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4573
4574 /* All newlines are ascii, just skip intermediate octets. */
4575 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4576 loop = LABEL();
4577 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4578 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4579 else
4580 {
4581 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4582 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4583 }
4584
4585 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4586 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4587 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4588
4589 JUMPHERE(jump[0]);
4590
4591 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4592 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4593 return;
4594 }
4595
4596jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4597OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4598OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4599
4600jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4601jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4602
4603skip_start = LABEL();
4604OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4605jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4606
4607/* Skip intermediate octets. */
4608loop = LABEL();
4609jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4610OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4611OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4612OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4613CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4614
4615JUMPHERE(jump[3]);
4616OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4617
4618three_byte_exit = LABEL();
4619JUMPHERE(jump[0]);
4620JUMPHERE(jump[4]);
4621
4622OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4623OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4624
4625/* Two byte long newline: 0x85. */
4626JUMPHERE(jump[1]);
4627CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4628
4629OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4630OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4631
4632/* Three byte long newlines: 0x2028 and 0x2029. */
4633JUMPHERE(jump[2]);
4634CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4635CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4636
4637OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4638OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4639
4640OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4641CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4642
4643OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4644OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4645OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4646}
4647
4648static void do_utfmoveback_invalid(compiler_common *common)
4649{
4650/* Goes one character back. */
4651DEFINE_COMPILER;
4652sljit_s32 i;
4653struct sljit_jump *jump;
4654struct sljit_jump *buffer_start_close;
4655struct sljit_label *exit_ok_label;
4656struct sljit_label *exit_invalid_label;
4657struct sljit_jump *exit_invalid[7];
4658
4659sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4660
4661OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4662exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4663
4664/* Two-byte sequence. */
4665buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4666
4667OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4668
4669OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4670jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4671
4672OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4673OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4674OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4675
4676/* Three-byte sequence. */
4677JUMPHERE(jump);
4678exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4679
4680OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4681
4682OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4683jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4684
4685OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4686OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4687OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4688
4689/* Four-byte sequence. */
4690JUMPHERE(jump);
4691OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4692exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4693
4694OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4695OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4696exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4697
4698exit_ok_label = LABEL();
4699OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4700OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4701
4702/* Two-byte sequence. */
4703JUMPHERE(buffer_start_close);
4704OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4705
4706exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4707
4708OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4709
4710OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4711CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4712
4713/* Three-byte sequence. */
4714OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4715exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4716exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4717
4718OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4719
4720OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4721CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4722
4723/* Four-byte sequences are not possible. */
4724
4725exit_invalid_label = LABEL();
4726sljit_set_label(exit_invalid[5], exit_invalid_label);
4727sljit_set_label(exit_invalid[6], exit_invalid_label);
4728OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4729OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4730OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4731
4732JUMPHERE(exit_invalid[4]);
4733/* -2 + 4 = 2 */
4734OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4735
4736exit_invalid_label = LABEL();
4737for (i = 0; i < 4; i++)
4738 sljit_set_label(exit_invalid[i], exit_invalid_label);
4739OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4740OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4741OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4742}
4743
4744static void do_utfpeakcharback(compiler_common *common)
4745{
4746/* Peak a character back. Does not modify STR_PTR. */
4747DEFINE_COMPILER;
4748struct sljit_jump *jump[2];
4749
4750sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4751
4752OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4753OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4754jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4755
4756OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4757OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4758jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4759
4760OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4761OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4762OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4763OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4764OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4765
4766JUMPHERE(jump[1]);
4767OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4768OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4769OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4770OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4771
4772JUMPHERE(jump[0]);
4773OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4774OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4775OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4776OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4777
4778OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779}
4780
4781static void do_utfpeakcharback_invalid(compiler_common *common)
4782{
4783/* Peak a character back. Does not modify STR_PTR. */
4784DEFINE_COMPILER;
4785sljit_s32 i;
4786sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4787struct sljit_jump *jump[2];
4788struct sljit_label *two_byte_entry;
4789struct sljit_label *three_byte_entry;
4790struct sljit_label *exit_invalid_label;
4791struct sljit_jump *exit_invalid[8];
4792
4793sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4794
4795OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4796exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4797jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4798
4799/* Two-byte sequence. */
4800OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4801OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4802jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4803
4804two_byte_entry = LABEL();
4805OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4806/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4807OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4808OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809
4810JUMPHERE(jump[1]);
4811OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4812OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4813exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4814OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4815OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4816
4817/* Three-byte sequence. */
4818OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4819OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4820jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4821
4822three_byte_entry = LABEL();
4823OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4824OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4825
4826OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4827if (has_cmov)
4828 {
4829 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4830 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4831 exit_invalid[2] = NULL;
4832 }
4833else
4834 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4835
4836OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4837if (has_cmov)
4838 {
4839 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
4840 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4841 exit_invalid[3] = NULL;
4842 }
4843else
4844 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4845
4846OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4847
4848JUMPHERE(jump[1]);
4849OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4850exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4851OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4852OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4853
4854/* Four-byte sequence. */
4855OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4856OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4857OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4858OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4859/* ADD is used instead of OR because of the SUB 0x10000 above. */
4860OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4861
4862if (has_cmov)
4863 {
4864 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
4865 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4866 exit_invalid[5] = NULL;
4867 }
4868else
4869 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4870
4871OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4872OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4873
4874JUMPHERE(jump[0]);
4875OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4876jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4877
4878/* Two-byte sequence. */
4879OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4880OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4881CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4882
4883OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4884OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4885exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4886OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4887OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4888
4889/* Three-byte sequence. */
4890OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4891OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4892CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4893
4894OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4895OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4896
4897JUMPHERE(jump[0]);
4898exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4899
4900/* Two-byte sequence. */
4901OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4902OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4903CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4904
4905exit_invalid_label = LABEL();
4906for (i = 0; i < 8; i++)
4907 sljit_set_label(exit_invalid[i], exit_invalid_label);
4908
4909OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4910OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4911}
4912
4913#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4914
4915#if PCRE2_CODE_UNIT_WIDTH == 16
4916
4917static void do_utfreadchar_invalid(compiler_common *common)
4918{
4919/* Slow decoding a UTF-16 character. TMP1 contains the first half
4920of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4921undefined for invalid characters. */
4922DEFINE_COMPILER;
4923struct sljit_jump *exit_invalid[3];
4924
4925sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4926
4927/* TMP2 contains the high surrogate. */
4928exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4929exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4930
4931OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4932OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4933OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4934
4935OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
4936OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
4937exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4938
4939OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4940OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4941
4942JUMPHERE(exit_invalid[0]);
4943JUMPHERE(exit_invalid[1]);
4944JUMPHERE(exit_invalid[2]);
4945OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4946OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4947}
4948
4949static void do_utfreadnewline_invalid(compiler_common *common)
4950{
4951/* Slow decoding a UTF-16 character, specialized for newlines.
4952TMP1 contains the first half of the character (>= 0xd800). Return
4953char value in TMP1. */
4954
4955DEFINE_COMPILER;
4956struct sljit_jump *exit_invalid[2];
4957
4958sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4959
4960/* TMP2 contains the high surrogate. */
4961exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4962
4963OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4964exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
4965
4966OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4967OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
4968OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
4969OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4970OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
4971OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4972
4973OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974
4975JUMPHERE(exit_invalid[0]);
4976JUMPHERE(exit_invalid[1]);
4977OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4978OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4979}
4980
4981static void do_utfmoveback_invalid(compiler_common *common)
4982{
4983/* Goes one character back. */
4984DEFINE_COMPILER;
4985struct sljit_jump *exit_invalid[3];
4986
4987sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4988
4989exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
4990exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4991
4992OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4993OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4994exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
4995
4996OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4997OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4998OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4999
5000JUMPHERE(exit_invalid[0]);
5001JUMPHERE(exit_invalid[1]);
5002JUMPHERE(exit_invalid[2]);
5003
5004OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5005OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5006OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5007}
5008
5009static void do_utfpeakcharback_invalid(compiler_common *common)
5010{
5011/* Peak a character back. Does not modify STR_PTR. */
5012DEFINE_COMPILER;
5013struct sljit_jump *jump;
5014struct sljit_jump *exit_invalid[3];
5015
5016sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5017
5018jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5019OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5020exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5021exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5022
5023OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5024OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5025OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5026exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5027OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5028OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5029
5030JUMPHERE(jump);
5031OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5032
5033JUMPHERE(exit_invalid[0]);
5034JUMPHERE(exit_invalid[1]);
5035JUMPHERE(exit_invalid[2]);
5036
5037OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5038OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5039}
5040
5041#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5042
5043/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5044#define UCD_BLOCK_MASK 127
5045#define UCD_BLOCK_SHIFT 7
5046
5047static void do_getucd(compiler_common *common)
5048{
5049/* Search the UCD record for the character comes in TMP1.
5050Returns chartype in TMP1 and UCD offset in TMP2. */
5051DEFINE_COMPILER;
5052#if PCRE2_CODE_UNIT_WIDTH == 32
5053struct sljit_jump *jump;
5054#endif
5055
5056#if defined SLJIT_DEBUG && SLJIT_DEBUG
5057/* dummy_ucd_record */
5058const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5059SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5060SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5061#endif
5062
5063SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5064
5065sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066
5067#if PCRE2_CODE_UNIT_WIDTH == 32
5068if (!common->utf)
5069 {
5070 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5071 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5072 JUMPHERE(jump);
5073 }
5074#endif
5075
5076OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5077OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5078OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5079OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5080OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5081OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5082OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5083OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5084OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085}
5086
5087static void do_getucdtype(compiler_common *common)
5088{
5089/* Search the UCD record for the character comes in TMP1.
5090Returns chartype in TMP1 and UCD offset in TMP2. */
5091DEFINE_COMPILER;
5092#if PCRE2_CODE_UNIT_WIDTH == 32
5093struct sljit_jump *jump;
5094#endif
5095
5096#if defined SLJIT_DEBUG && SLJIT_DEBUG
5097/* dummy_ucd_record */
5098const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5099SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5100SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5101#endif
5102
5103SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5104
5105sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5106
5107#if PCRE2_CODE_UNIT_WIDTH == 32
5108if (!common->utf)
5109 {
5110 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5111 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5112 JUMPHERE(jump);
5113 }
5114#endif
5115
5116OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5117OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5118OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5119OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5120OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5121OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5122OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5123OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5124
5125/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5126OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5127OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5128OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5129OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5130
5131OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5132}
5133
5134#endif /* SUPPORT_UNICODE */
5135
5136static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5137{
5138DEFINE_COMPILER;
5139struct sljit_label *mainloop;
5140struct sljit_label *newlinelabel = NULL;
5141struct sljit_jump *start;
5142struct sljit_jump *end = NULL;
5143struct sljit_jump *end2 = NULL;
5144#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5145struct sljit_label *loop;
5146struct sljit_jump *jump;
5147#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5148jump_list *newline = NULL;
5149sljit_u32 overall_options = common->re->overall_options;
5150BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5151BOOL newlinecheck = FALSE;
5152BOOL readuchar = FALSE;
5153
5154if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5155 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5156 newlinecheck = TRUE;
5157
5158SLJIT_ASSERT(common->abort_label == NULL);
5159
5160if ((overall_options & PCRE2_FIRSTLINE) != 0)
5161 {
5162 /* Search for the end of the first line. */
5163 SLJIT_ASSERT(common->match_end_ptr != 0);
5164 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5165
5166 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5167 {
5168 mainloop = LABEL();
5169 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5170 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5171 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5172 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5173 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5174 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5175 JUMPHERE(end);
5176 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5177 }
5178 else
5179 {
5180 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5181 mainloop = LABEL();
5182 /* Continual stores does not cause data dependency. */
5183 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5184 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5185 check_newlinechar(common, common->nltype, &newline, TRUE);
5186 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5187 JUMPHERE(end);
5188 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5189 set_jumps(newline, LABEL());
5190 }
5191
5192 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5193 }
5194else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5195 {
5196 /* Check whether offset limit is set and valid. */
5197 SLJIT_ASSERT(common->match_end_ptr != 0);
5198
5199 if (HAS_VIRTUAL_REGISTERS)
5200 {
5201 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5203 }
5204 else
5205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5206
5207 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5208 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5209 if (HAS_VIRTUAL_REGISTERS)
5210 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5211 else
5212 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5213
5214#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5215 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5216#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5217 if (HAS_VIRTUAL_REGISTERS)
5218 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5219
5220 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5221 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5222 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5223 JUMPHERE(end2);
5224 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5225 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5226 JUMPHERE(end);
5227 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5228 }
5229
5230start = JUMP(SLJIT_JUMP);
5231
5232if (newlinecheck)
5233 {
5234 newlinelabel = LABEL();
5235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5236 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5237 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5238 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
5239 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5240#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5241 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5242#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5243 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5244 end2 = JUMP(SLJIT_JUMP);
5245 }
5246
5247mainloop = LABEL();
5248
5249/* Increasing the STR_PTR here requires one less jump in the most common case. */
5250#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5251if (common->utf && !common->invalid_utf) readuchar = TRUE;
5252#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5253if (newlinecheck) readuchar = TRUE;
5254
5255if (readuchar)
5256 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5257
5258if (newlinecheck)
5259 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5260
5261OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5262#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5263#if PCRE2_CODE_UNIT_WIDTH == 8
5264if (common->invalid_utf)
5265 {
5266 /* Skip continuation code units. */
5267 loop = LABEL();
5268 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5269 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5270 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5271 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5272 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5273 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5274 JUMPHERE(jump);
5275 }
5276else if (common->utf)
5277 {
5278 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5279 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5280 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5281 JUMPHERE(jump);
5282 }
5283#elif PCRE2_CODE_UNIT_WIDTH == 16
5284if (common->invalid_utf)
5285 {
5286 /* Skip continuation code units. */
5287 loop = LABEL();
5288 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5289 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5291 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5292 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5293 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5294 JUMPHERE(jump);
5295 }
5296else if (common->utf)
5297 {
5298 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5299
5300 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5301 {
5302 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5303 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5304 CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5305 }
5306 else
5307 {
5308 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
5309 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5310 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5311 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5312 }
5313 }
5314#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5315#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5316JUMPHERE(start);
5317
5318if (newlinecheck)
5319 {
5320 JUMPHERE(end);
5321 JUMPHERE(end2);
5322 }
5323
5324return mainloop;
5325}
5326
5327
5328static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5329{
5330sljit_u32 i, count = chars->count;
5331
5332if (count == 255)
5333 return;
5334
5335if (count == 0)
5336 {
5337 chars->count = 1;
5338 chars->chars[0] = chr;
5339
5340 if (last)
5341 chars->last_count = 1;
5342 return;
5343 }
5344
5345for (i = 0; i < count; i++)
5346 if (chars->chars[i] == chr)
5347 return;
5348
5349if (count >= MAX_DIFF_CHARS)
5350 {
5351 chars->count = 255;
5352 return;
5353 }
5354
5355chars->chars[count] = chr;
5356chars->count = count + 1;
5357
5358if (last)
5359 chars->last_count++;
5360}
5361
5362static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5363{
5364/* Recursive function, which scans prefix literals. */
5365BOOL last, any, class, caseless;
5366int len, repeat, len_save, consumed = 0;
5367sljit_u32 chr; /* Any unicode character. */
5368sljit_u8 *bytes, *bytes_end, byte;
5369PCRE2_SPTR alternative, cc_save, oc;
5370#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5371PCRE2_UCHAR othercase[4];
5372#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5373PCRE2_UCHAR othercase[2];
5374#else
5375PCRE2_UCHAR othercase[1];
5376#endif
5377
5378repeat = 1;
5379while (TRUE)
5380 {
5381 if (*rec_count == 0)
5382 return 0;
5383 (*rec_count)--;
5384
5385 last = TRUE;
5386 any = FALSE;
5387 class = FALSE;
5388 caseless = FALSE;
5389
5390 switch (*cc)
5391 {
5392 case OP_CHARI:
5393 caseless = TRUE;
5394 /* Fall through */
5395 case OP_CHAR:
5396 last = FALSE;
5397 cc++;
5398 break;
5399
5400 case OP_SOD:
5401 case OP_SOM:
5402 case OP_SET_SOM:
5403 case OP_NOT_WORD_BOUNDARY:
5404 case OP_WORD_BOUNDARY:
5405 case OP_EODN:
5406 case OP_EOD:
5407 case OP_CIRC:
5408 case OP_CIRCM:
5409 case OP_DOLL:
5410 case OP_DOLLM:
5411 /* Zero width assertions. */
5412 cc++;
5413 continue;
5414
5415 case OP_ASSERT:
5416 case OP_ASSERT_NOT:
5417 case OP_ASSERTBACK:
5418 case OP_ASSERTBACK_NOT:
5419 case OP_ASSERT_NA:
5420 case OP_ASSERTBACK_NA:
5421 cc = bracketend(cc);
5422 continue;
5423
5424 case OP_PLUSI:
5425 case OP_MINPLUSI:
5426 case OP_POSPLUSI:
5427 caseless = TRUE;
5428 /* Fall through */
5429 case OP_PLUS:
5430 case OP_MINPLUS:
5431 case OP_POSPLUS:
5432 cc++;
5433 break;
5434
5435 case OP_EXACTI:
5436 caseless = TRUE;
5437 /* Fall through */
5438 case OP_EXACT:
5439 repeat = GET2(cc, 1);
5440 last = FALSE;
5441 cc += 1 + IMM2_SIZE;
5442 break;
5443
5444 case OP_QUERYI:
5445 case OP_MINQUERYI:
5446 case OP_POSQUERYI:
5447 caseless = TRUE;
5448 /* Fall through */
5449 case OP_QUERY:
5450 case OP_MINQUERY:
5451 case OP_POSQUERY:
5452 len = 1;
5453 cc++;
5454#ifdef SUPPORT_UNICODE
5455 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5456#endif
5457 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5458 if (max_chars == 0)
5459 return consumed;
5460 last = FALSE;
5461 break;
5462
5463 case OP_KET:
5464 cc += 1 + LINK_SIZE;
5465 continue;
5466
5467 case OP_ALT:
5468 cc += GET(cc, 1);
5469 continue;
5470
5471 case OP_ONCE:
5472 case OP_BRA:
5473 case OP_BRAPOS:
5474 case OP_CBRA:
5475 case OP_CBRAPOS:
5476 alternative = cc + GET(cc, 1);
5477 while (*alternative == OP_ALT)
5478 {
5479 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5480 if (max_chars == 0)
5481 return consumed;
5482 alternative += GET(alternative, 1);
5483 }
5484
5485 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5486 cc += IMM2_SIZE;
5487 cc += 1 + LINK_SIZE;
5488 continue;
5489
5490 case OP_CLASS:
5491#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5492 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5493 return consumed;
5494#endif
5495 class = TRUE;
5496 break;
5497
5498 case OP_NCLASS:
5499#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5500 if (common->utf) return consumed;
5501#endif
5502 class = TRUE;
5503 break;
5504
5505#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5506 case OP_XCLASS:
5507#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5508 if (common->utf) return consumed;
5509#endif
5510 any = TRUE;
5511 cc += GET(cc, 1);
5512 break;
5513#endif
5514
5515 case OP_DIGIT:
5516#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5517 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5518 return consumed;
5519#endif
5520 any = TRUE;
5521 cc++;
5522 break;
5523
5524 case OP_WHITESPACE:
5525#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5526 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5527 return consumed;
5528#endif
5529 any = TRUE;
5530 cc++;
5531 break;
5532
5533 case OP_WORDCHAR:
5534#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5535 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5536 return consumed;
5537#endif
5538 any = TRUE;
5539 cc++;
5540 break;
5541
5542 case OP_NOT:
5543 case OP_NOTI:
5544 cc++;
5545 /* Fall through. */
5546 case OP_NOT_DIGIT:
5547 case OP_NOT_WHITESPACE:
5548 case OP_NOT_WORDCHAR:
5549 case OP_ANY:
5550 case OP_ALLANY:
5551#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5552 if (common->utf) return consumed;
5553#endif
5554 any = TRUE;
5555 cc++;
5556 break;
5557
5558#ifdef SUPPORT_UNICODE
5559 case OP_NOTPROP:
5560 case OP_PROP:
5561#if PCRE2_CODE_UNIT_WIDTH != 32
5562 if (common->utf) return consumed;
5563#endif
5564 any = TRUE;
5565 cc += 1 + 2;
5566 break;
5567#endif
5568
5569 case OP_TYPEEXACT:
5570 repeat = GET2(cc, 1);
5571 cc += 1 + IMM2_SIZE;
5572 continue;
5573
5574 case OP_NOTEXACT:
5575 case OP_NOTEXACTI:
5576#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5577 if (common->utf) return consumed;
5578#endif
5579 any = TRUE;
5580 repeat = GET2(cc, 1);
5581 cc += 1 + IMM2_SIZE + 1;
5582 break;
5583
5584 default:
5585 return consumed;
5586 }
5587
5588 if (any)
5589 {
5590 do
5591 {
5592 chars->count = 255;
5593
5594 consumed++;
5595 if (--max_chars == 0)
5596 return consumed;
5597 chars++;
5598 }
5599 while (--repeat > 0);
5600
5601 repeat = 1;
5602 continue;
5603 }
5604
5605 if (class)
5606 {
5607 bytes = (sljit_u8*) (cc + 1);
5608 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5609
5610 switch (*cc)
5611 {
5612 case OP_CRSTAR:
5613 case OP_CRMINSTAR:
5614 case OP_CRPOSSTAR:
5615 case OP_CRQUERY:
5616 case OP_CRMINQUERY:
5617 case OP_CRPOSQUERY:
5618 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5619 if (max_chars == 0)
5620 return consumed;
5621 break;
5622
5623 default:
5624 case OP_CRPLUS:
5625 case OP_CRMINPLUS:
5626 case OP_CRPOSPLUS:
5627 break;
5628
5629 case OP_CRRANGE:
5630 case OP_CRMINRANGE:
5631 case OP_CRPOSRANGE:
5632 repeat = GET2(cc, 1);
5633 if (repeat <= 0)
5634 return consumed;
5635 break;
5636 }
5637
5638 do
5639 {
5640 if (bytes[31] & 0x80)
5641 chars->count = 255;
5642 else if (chars->count != 255)
5643 {
5644 bytes_end = bytes + 32;
5645 chr = 0;
5646 do
5647 {
5648 byte = *bytes++;
5649 SLJIT_ASSERT((chr & 0x7) == 0);
5650 if (byte == 0)
5651 chr += 8;
5652 else
5653 {
5654 do
5655 {
5656 if ((byte & 0x1) != 0)
5657 add_prefix_char(chr, chars, TRUE);
5658 byte >>= 1;
5659 chr++;
5660 }
5661 while (byte != 0);
5662 chr = (chr + 7) & ~7;
5663 }
5664 }
5665 while (chars->count != 255 && bytes < bytes_end);
5666 bytes = bytes_end - 32;
5667 }
5668
5669 consumed++;
5670 if (--max_chars == 0)
5671 return consumed;
5672 chars++;
5673 }
5674 while (--repeat > 0);
5675
5676 switch (*cc)
5677 {
5678 case OP_CRSTAR:
5679 case OP_CRMINSTAR:
5680 case OP_CRPOSSTAR:
5681 return consumed;
5682
5683 case OP_CRQUERY:
5684 case OP_CRMINQUERY:
5685 case OP_CRPOSQUERY:
5686 cc++;
5687 break;
5688
5689 case OP_CRRANGE:
5690 case OP_CRMINRANGE:
5691 case OP_CRPOSRANGE:
5692 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5693 return consumed;
5694 cc += 1 + 2 * IMM2_SIZE;
5695 break;
5696 }
5697
5698 repeat = 1;
5699 continue;
5700 }
5701
5702 len = 1;
5703#ifdef SUPPORT_UNICODE
5704 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5705#endif
5706
5707 if (caseless && char_has_othercase(common, cc))
5708 {
5709#ifdef SUPPORT_UNICODE
5710 if (common->utf)
5711 {
5712 GETCHAR(chr, cc);
5713 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5714 return consumed;
5715 }
5716 else
5717#endif
5718 {
5719 chr = *cc;
5720#ifdef SUPPORT_UNICODE
5721 if (common->ucp && chr > 127)
5722 othercase[0] = UCD_OTHERCASE(chr);
5723 else
5724#endif
5725 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5726 }
5727 }
5728 else
5729 {
5730 caseless = FALSE;
5731 othercase[0] = 0; /* Stops compiler warning - PH */
5732 }
5733
5734 len_save = len;
5735 cc_save = cc;
5736 while (TRUE)
5737 {
5738 oc = othercase;
5739 do
5740 {
5741 len--;
5742 consumed++;
5743
5744 chr = *cc;
5745 add_prefix_char(*cc, chars, len == 0);
5746
5747 if (caseless)
5748 add_prefix_char(*oc, chars, len == 0);
5749
5750 if (--max_chars == 0)
5751 return consumed;
5752 chars++;
5753 cc++;
5754 oc++;
5755 }
5756 while (len > 0);
5757
5758 if (--repeat == 0)
5759 break;
5760
5761 len = len_save;
5762 cc = cc_save;
5763 }
5764
5765 repeat = 1;
5766 if (last)
5767 return consumed;
5768 }
5769}
5770
5771#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5772static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5773{
5774#if PCRE2_CODE_UNIT_WIDTH == 8
5775OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5776CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5777#elif PCRE2_CODE_UNIT_WIDTH == 16
5778OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5779CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5780#else
5781#error "Unknown code width"
5782#endif
5783}
5784#endif
5785
5786#include "pcre2_jit_simd_inc.h"
5787
5788#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5789
5790static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5791{
5792 sljit_s32 i, j, max_i = 0, max_j = 0;
5793 sljit_u32 max_pri = 0;
5794 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5795
5796 for (i = max - 1; i >= 1; i--)
5797 {
5798 if (chars[i].last_count > 2)
5799 {
5800 a1 = chars[i].chars[0];
5801 a2 = chars[i].chars[1];
5802 a_pri = chars[i].last_count;
5803
5804 j = i - max_fast_forward_char_pair_offset();
5805 if (j < 0)
5806 j = 0;
5807
5808 while (j < i)
5809 {
5810 b_pri = chars[j].last_count;
5811 if (b_pri > 2 && a_pri + b_pri >= max_pri)
5812 {
5813 b1 = chars[j].chars[0];
5814 b2 = chars[j].chars[1];
5815
5816 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5817 {
5818 max_pri = a_pri + b_pri;
5819 max_i = i;
5820 max_j = j;
5821 }
5822 }
5823 j++;
5824 }
5825 }
5826 }
5827
5828if (max_pri == 0)
5829 return FALSE;
5830
5831fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5832return TRUE;
5833}
5834
5835#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5836
5837static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5838{
5839DEFINE_COMPILER;
5840struct sljit_label *start;
5841struct sljit_jump *match;
5842struct sljit_jump *partial_quit;
5843PCRE2_UCHAR mask;
5844BOOL has_match_end = (common->match_end_ptr != 0);
5845
5846SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5847
5848if (has_match_end)
5849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5850
5851if (offset > 0)
5852 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5853
5854if (has_match_end)
5855 {
5856 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5857
5858 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5859 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
5860 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5861 }
5862
5863#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5864
5865if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5866 {
5867 fast_forward_char_simd(common, char1, char2, offset);
5868
5869 if (offset > 0)
5870 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5871
5872 if (has_match_end)
5873 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5874 return;
5875 }
5876
5877#endif
5878
5879start = LABEL();
5880
5881partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5882if (common->mode == PCRE2_JIT_COMPLETE)
5883 add_jump(compiler, &common->failed_match, partial_quit);
5884
5885OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5886OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5887
5888if (char1 == char2)
5889 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5890else
5891 {
5892 mask = char1 ^ char2;
5893 if (is_powerof2(mask))
5894 {
5895 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5896 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5897 }
5898 else
5899 {
5900 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5901 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5902 JUMPHERE(match);
5903 }
5904 }
5905
5906#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5907if (common->utf && offset > 0)
5908 {
5909 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5910 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5911 }
5912#endif
5913
5914OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5915
5916if (common->mode != PCRE2_JIT_COMPLETE)
5917 JUMPHERE(partial_quit);
5918
5919if (has_match_end)
5920 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5921}
5922
5923static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
5924{
5925DEFINE_COMPILER;
5926struct sljit_label *start;
5927struct sljit_jump *match;
5928fast_forward_char_data chars[MAX_N_CHARS];
5929sljit_s32 offset;
5930PCRE2_UCHAR mask;
5931PCRE2_UCHAR *char_set, *char_set_end;
5932int i, max, from;
5933int range_right = -1, range_len;
5934sljit_u8 *update_table = NULL;
5935BOOL in_range;
5936sljit_u32 rec_count;
5937
5938for (i = 0; i < MAX_N_CHARS; i++)
5939 {
5940 chars[i].count = 0;
5941 chars[i].last_count = 0;
5942 }
5943
5944rec_count = 10000;
5945max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
5946
5947if (max < 1)
5948 return FALSE;
5949
5950/* Convert last_count to priority. */
5951for (i = 0; i < max; i++)
5952 {
5953 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
5954
5955 if (chars[i].count == 1)
5956 {
5957 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
5958 /* Simplifies algorithms later. */
5959 chars[i].chars[1] = chars[i].chars[0];
5960 }
5961 else if (chars[i].count == 2)
5962 {
5963 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
5964
5965 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
5966 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
5967 else
5968 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
5969 }
5970 else
5971 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
5972 }
5973
5974#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5975if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
5976 return TRUE;
5977#endif
5978
5979in_range = FALSE;
5980/* Prevent compiler "uninitialized" warning */
5981from = 0;
5982range_len = 4 /* minimum length */ - 1;
5983for (i = 0; i <= max; i++)
5984 {
5985 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
5986 {
5987 range_len = i - from;
5988 range_right = i - 1;
5989 }
5990
5991 if (i < max && chars[i].count < 255)
5992 {
5993 SLJIT_ASSERT(chars[i].count > 0);
5994 if (!in_range)
5995 {
5996 in_range = TRUE;
5997 from = i;
5998 }
5999 }
6000 else
6001 in_range = FALSE;
6002 }
6003
6004if (range_right >= 0)
6005 {
6006 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6007 if (update_table == NULL)
6008 return TRUE;
6009 memset(update_table, IN_UCHARS(range_len), 256);
6010
6011 for (i = 0; i < range_len; i++)
6012 {
6013 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6014
6015 char_set = chars[range_right - i].chars;
6016 char_set_end = char_set + chars[range_right - i].count;
6017 do
6018 {
6019 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6020 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6021 char_set++;
6022 }
6023 while (char_set < char_set_end);
6024 }
6025 }
6026
6027offset = -1;
6028/* Scan forward. */
6029for (i = 0; i < max; i++)
6030 {
6031 if (range_right == i)
6032 continue;
6033
6034 if (offset == -1)
6035 {
6036 if (chars[i].last_count >= 2)
6037 offset = i;
6038 }
6039 else if (chars[offset].last_count < chars[i].last_count)
6040 offset = i;
6041 }
6042
6043SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6044
6045if (range_right < 0)
6046 {
6047 if (offset < 0)
6048 return FALSE;
6049 /* Works regardless the value is 1 or 2. */
6050 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6051 return TRUE;
6052 }
6053
6054SLJIT_ASSERT(range_right != offset);
6055
6056if (common->match_end_ptr != 0)
6057 {
6058 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6059 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6060 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6061 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6062 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6063 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6064 }
6065else
6066 {
6067 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6068 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6069 }
6070
6071SLJIT_ASSERT(range_right >= 0);
6072
6073if (!HAS_VIRTUAL_REGISTERS)
6074 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6075
6076start = LABEL();
6077add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6078
6079#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6080OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6081#else
6082OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6083#endif
6084
6085if (!HAS_VIRTUAL_REGISTERS)
6086 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6087else
6088 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6089
6090OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6091CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6092
6093if (offset >= 0)
6094 {
6095 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6096 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6097
6098 if (chars[offset].count == 1)
6099 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6100 else
6101 {
6102 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6103 if (is_powerof2(mask))
6104 {
6105 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6106 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6107 }
6108 else
6109 {
6110 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6111 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6112 JUMPHERE(match);
6113 }
6114 }
6115 }
6116
6117#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6118if (common->utf && offset != 0)
6119 {
6120 if (offset < 0)
6121 {
6122 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6123 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6124 }
6125 else
6126 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6127
6128 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6129
6130 if (offset < 0)
6131 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6132 }
6133#endif
6134
6135if (offset >= 0)
6136 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6137
6138if (common->match_end_ptr != 0)
6139 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6140else
6141 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6142return TRUE;
6143}
6144
6145static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6146{
6147PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6148PCRE2_UCHAR oc;
6149
6150oc = first_char;
6151if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6152 {
6153 oc = TABLE_GET(first_char, common->fcc, first_char);
6154#if defined SUPPORT_UNICODE
6155 if (first_char > 127 && (common->utf || common->ucp))
6156 oc = UCD_OTHERCASE(first_char);
6157#endif
6158 }
6159
6160fast_forward_first_char2(common, first_char, oc, 0);
6161}
6162
6163static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6164{
6165DEFINE_COMPILER;
6166struct sljit_label *loop;
6167struct sljit_jump *lastchar = NULL;
6168struct sljit_jump *firstchar;
6169struct sljit_jump *quit = NULL;
6170struct sljit_jump *foundcr = NULL;
6171struct sljit_jump *notfoundnl;
6172jump_list *newline = NULL;
6173
6174if (common->match_end_ptr != 0)
6175 {
6176 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6177 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6178 }
6179
6180if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6181 {
6182#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6183 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6184 {
6185 if (HAS_VIRTUAL_REGISTERS)
6186 {
6187 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6188 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6190 }
6191 else
6192 {
6193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6195 }
6196 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6197
6198 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6199 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6200 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6201#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6202 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6203#endif
6204 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6205
6206 fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6207 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6208 }
6209 else
6210#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6211 {
6212 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6213 if (HAS_VIRTUAL_REGISTERS)
6214 {
6215 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6216 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6217 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6218 }
6219 else
6220 {
6221 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6222 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6223 }
6224 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6225
6226 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
6227 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
6228 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6229#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6230 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6231#endif
6232 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6233
6234 loop = LABEL();
6235 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6236 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6237 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6238 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6239 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6240 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6241
6242 JUMPHERE(quit);
6243 JUMPHERE(lastchar);
6244 }
6245
6246 JUMPHERE(firstchar);
6247
6248 if (common->match_end_ptr != 0)
6249 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6250 return;
6251 }
6252
6253if (HAS_VIRTUAL_REGISTERS)
6254 {
6255 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6256 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6257 }
6258else
6259 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6260
6261/* Example: match /^/ to \r\n from offset 1. */
6262firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6263
6264if (common->nltype == NLTYPE_ANY)
6265 move_back(common, NULL, FALSE);
6266else
6267 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6268
6269loop = LABEL();
6270common->ff_newline_shortcut = loop;
6271
6272#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6273if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6274 {
6275 if (common->nltype == NLTYPE_ANYCRLF)
6276 {
6277 fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6278 if (common->mode != PCRE2_JIT_COMPLETE)
6279 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6280
6281 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6282 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6283 quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6284 }
6285 else
6286 {
6287 fast_forward_char_simd(common, common->newline, common->newline, 0);
6288
6289 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6290 if (common->mode != PCRE2_JIT_COMPLETE)
6291 {
6292 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
6293 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6294 }
6295 }
6296 }
6297else
6298#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6299 {
6300 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6301 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6302 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6303 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6304 check_newlinechar(common, common->nltype, &newline, FALSE);
6305 set_jumps(newline, loop);
6306 }
6307
6308if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6309 {
6310 if (quit == NULL)
6311 {
6312 quit = JUMP(SLJIT_JUMP);
6313 JUMPHERE(foundcr);
6314 }
6315
6316 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6317 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6318 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
6319 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6320#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6321 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6322#endif
6323 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6324 JUMPHERE(notfoundnl);
6325 JUMPHERE(quit);
6326 }
6327
6328if (lastchar)
6329 JUMPHERE(lastchar);
6330JUMPHERE(firstchar);
6331
6332if (common->match_end_ptr != 0)
6333 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6334}
6335
6336static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6337
6338static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6339{
6340DEFINE_COMPILER;
6341const sljit_u8 *start_bits = common->re->start_bitmap;
6342struct sljit_label *start;
6343struct sljit_jump *partial_quit;
6344#if PCRE2_CODE_UNIT_WIDTH != 8
6345struct sljit_jump *found = NULL;
6346#endif
6347jump_list *matches = NULL;
6348
6349if (common->match_end_ptr != 0)
6350 {
6351 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6352 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6353 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6354 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
6355 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6356 }
6357
6358start = LABEL();
6359
6360partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6361if (common->mode == PCRE2_JIT_COMPLETE)
6362 add_jump(compiler, &common->failed_match, partial_quit);
6363
6364OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6365OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6366
6367if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6368 {
6369#if PCRE2_CODE_UNIT_WIDTH != 8
6370 if ((start_bits[31] & 0x80) != 0)
6371 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6372 else
6373 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6374#elif defined SUPPORT_UNICODE
6375 if (common->utf && is_char7_bitset(start_bits, FALSE))
6376 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6377#endif
6378 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6379 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6380 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6381 if (!HAS_VIRTUAL_REGISTERS)
6382 {
6383 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
6384 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
6385 }
6386 else
6387 {
6388 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
6389 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
6390 }
6391 JUMPTO(SLJIT_ZERO, start);
6392 }
6393else
6394 set_jumps(matches, start);
6395
6396#if PCRE2_CODE_UNIT_WIDTH != 8
6397if (found != NULL)
6398 JUMPHERE(found);
6399#endif
6400
6401OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6402
6403if (common->mode != PCRE2_JIT_COMPLETE)
6404 JUMPHERE(partial_quit);
6405
6406if (common->match_end_ptr != 0)
6407 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6408}
6409
6410static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6411{
6412DEFINE_COMPILER;
6413struct sljit_label *loop;
6414struct sljit_jump *toolong;
6415struct sljit_jump *already_found;
6416struct sljit_jump *found;
6417struct sljit_jump *found_oc = NULL;
6418jump_list *not_found = NULL;
6419sljit_u32 oc, bit;
6420
6421SLJIT_ASSERT(common->req_char_ptr != 0);
6422OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6423OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6424toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6425already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6426
6427if (has_firstchar)
6428 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6429else
6430 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6431
6432oc = req_char;
6433if (caseless)
6434 {
6435 oc = TABLE_GET(req_char, common->fcc, req_char);
6436#if defined SUPPORT_UNICODE
6437 if (req_char > 127 && (common->utf || common->ucp))
6438 oc = UCD_OTHERCASE(req_char);
6439#endif
6440 }
6441
6442#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6443if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6444 {
6445 not_found = fast_requested_char_simd(common, req_char, oc);
6446 }
6447else
6448#endif
6449 {
6450 loop = LABEL();
6451 add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6452
6453 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6454
6455 if (req_char == oc)
6456 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6457 else
6458 {
6459 bit = req_char ^ oc;
6460 if (is_powerof2(bit))
6461 {
6462 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6463 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6464 }
6465 else
6466 {
6467 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6468 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6469 }
6470 }
6471 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6472 JUMPTO(SLJIT_JUMP, loop);
6473
6474 JUMPHERE(found);
6475 if (found_oc)
6476 JUMPHERE(found_oc);
6477 }
6478
6479OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6480
6481JUMPHERE(already_found);
6482JUMPHERE(toolong);
6483return not_found;
6484}
6485
6486static void do_revertframes(compiler_common *common)
6487{
6488DEFINE_COMPILER;
6489struct sljit_jump *jump;
6490struct sljit_label *mainloop;
6491
6492sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6493GET_LOCAL_BASE(TMP1, 0, 0);
6494
6495/* Drop frames until we reach STACK_TOP. */
6496mainloop = LABEL();
6497OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6498jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6499
6500OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6501if (HAS_VIRTUAL_REGISTERS)
6502 {
6503 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6504 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6505 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6506 }
6507else
6508 {
6509 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6510 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6511 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6512 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6513 GET_LOCAL_BASE(TMP1, 0, 0);
6514 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6515 }
6516JUMPTO(SLJIT_JUMP, mainloop);
6517
6518JUMPHERE(jump);
6519jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6520/* End of reverting values. */
6521OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6522
6523JUMPHERE(jump);
6524OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
6525OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6526if (HAS_VIRTUAL_REGISTERS)
6527 {
6528 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6529 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6530 }
6531else
6532 {
6533 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6534 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6535 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6536 }
6537JUMPTO(SLJIT_JUMP, mainloop);
6538}
6539
6540static void check_wordboundary(compiler_common *common)
6541{
6542DEFINE_COMPILER;
6543struct sljit_jump *skipread;
6544jump_list *skipread_list = NULL;
6545#ifdef SUPPORT_UNICODE
6546struct sljit_label *valid_utf;
6547jump_list *invalid_utf1 = NULL;
6548#endif /* SUPPORT_UNICODE */
6549jump_list *invalid_utf2 = NULL;
6550#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6551struct sljit_jump *jump;
6552#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6553
6554SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6555
6556sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6557/* Get type of the previous char, and put it to TMP3. */
6558OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6559OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6560OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6561skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6562
6563#ifdef SUPPORT_UNICODE
6564if (common->invalid_utf)
6565 {
6566 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6567
6568 if (common->mode != PCRE2_JIT_COMPLETE)
6569 {
6570 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6571 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6572 move_back(common, NULL, TRUE);
6573 check_start_used_ptr(common);
6574 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6575 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6576 }
6577 }
6578else
6579#endif /* SUPPORT_UNICODE */
6580 {
6581 if (common->mode == PCRE2_JIT_COMPLETE)
6582 peek_char_back(common, READ_CHAR_MAX, NULL);
6583 else
6584 {
6585 move_back(common, NULL, TRUE);
6586 check_start_used_ptr(common);
6587 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6588 }
6589 }
6590
6591/* Testing char type. */
6592#ifdef SUPPORT_UNICODE
6593if (common->ucp)
6594 {
6595 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6596 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6597 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6598 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6599 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6600 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6601 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6602 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6603 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6604 JUMPHERE(jump);
6605 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6606 }
6607else
6608#endif /* SUPPORT_UNICODE */
6609 {
6610#if PCRE2_CODE_UNIT_WIDTH != 8
6611 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6612#elif defined SUPPORT_UNICODE
6613 /* Here TMP3 has already been zeroed. */
6614 jump = NULL;
6615 if (common->utf)
6616 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6617#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6618 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6619 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6620 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6621#if PCRE2_CODE_UNIT_WIDTH != 8
6622 JUMPHERE(jump);
6623#elif defined SUPPORT_UNICODE
6624 if (jump != NULL)
6625 JUMPHERE(jump);
6626#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6627 }
6628JUMPHERE(skipread);
6629
6630OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6631check_str_end(common, &skipread_list);
6632peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6633
6634/* Testing char type. This is a code duplication. */
6635#ifdef SUPPORT_UNICODE
6636
6637valid_utf = LABEL();
6638
6639if (common->ucp)
6640 {
6641 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6642 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6643 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6644 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
6645 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
6646 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6647 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
6648 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
6649 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6650 JUMPHERE(jump);
6651 }
6652else
6653#endif /* SUPPORT_UNICODE */
6654 {
6655#if PCRE2_CODE_UNIT_WIDTH != 8
6656 /* TMP2 may be destroyed by peek_char. */
6657 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6658 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6659#elif defined SUPPORT_UNICODE
6660 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6661 jump = NULL;
6662 if (common->utf)
6663 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6664#endif
6665 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6666 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6667 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6668#if PCRE2_CODE_UNIT_WIDTH != 8
6669 JUMPHERE(jump);
6670#elif defined SUPPORT_UNICODE
6671 if (jump != NULL)
6672 JUMPHERE(jump);
6673#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6674 }
6675set_jumps(skipread_list, LABEL());
6676
6677OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6678OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6679OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6680
6681#ifdef SUPPORT_UNICODE
6682if (common->invalid_utf)
6683 {
6684 set_jumps(invalid_utf1, LABEL());
6685
6686 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6687 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6688
6689 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6690 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6691 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6692
6693 set_jumps(invalid_utf2, LABEL());
6694 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6695 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6696 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6697 }
6698#endif /* SUPPORT_UNICODE */
6699}
6700
6701static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6702{
6703/* May destroy TMP1. */
6704DEFINE_COMPILER;
6705int ranges[MAX_CLASS_RANGE_SIZE];
6706sljit_u8 bit, cbit, all;
6707int i, byte, length = 0;
6708
6709bit = bits[0] & 0x1;
6710/* All bits will be zero or one (since bit is zero or one). */
6711all = -bit;
6712
6713for (i = 0; i < 256; )
6714 {
6715 byte = i >> 3;
6716 if ((i & 0x7) == 0 && bits[byte] == all)
6717 i += 8;
6718 else
6719 {
6720 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6721 if (cbit != bit)
6722 {
6723 if (length >= MAX_CLASS_RANGE_SIZE)
6724 return FALSE;
6725 ranges[length] = i;
6726 length++;
6727 bit = cbit;
6728 all = -cbit;
6729 }
6730 i++;
6731 }
6732 }
6733
6734if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6735 {
6736 if (length >= MAX_CLASS_RANGE_SIZE)
6737 return FALSE;
6738 ranges[length] = 256;
6739 length++;
6740 }
6741
6742if (length < 0 || length > 4)
6743 return FALSE;
6744
6745bit = bits[0] & 0x1;
6746if (invert) bit ^= 0x1;
6747
6748/* No character is accepted. */
6749if (length == 0 && bit == 0)
6750 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6751
6752switch(length)
6753 {
6754 case 0:
6755 /* When bit != 0, all characters are accepted. */
6756 return TRUE;
6757
6758 case 1:
6759 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6760 return TRUE;
6761
6762 case 2:
6763 if (ranges[0] + 1 != ranges[1])
6764 {
6765 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6766 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6767 }
6768 else
6769 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6770 return TRUE;
6771
6772 case 3:
6773 if (bit != 0)
6774 {
6775 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6776 if (ranges[0] + 1 != ranges[1])
6777 {
6778 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6779 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6780 }
6781 else
6782 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6783 return TRUE;
6784 }
6785
6786 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6787 if (ranges[1] + 1 != ranges[2])
6788 {
6789 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6790 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6791 }
6792 else
6793 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6794 return TRUE;
6795
6796 case 4:
6797 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6798 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6799 && (ranges[1] & (ranges[2] - ranges[0])) == 0
6800 && is_powerof2(ranges[2] - ranges[0]))
6801 {
6802 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6803 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6804 if (ranges[2] + 1 != ranges[3])
6805 {
6806 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6807 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6808 }
6809 else
6810 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6811 return TRUE;
6812 }
6813
6814 if (bit != 0)
6815 {
6816 i = 0;
6817 if (ranges[0] + 1 != ranges[1])
6818 {
6819 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6820 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6821 i = ranges[0];
6822 }
6823 else
6824 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6825
6826 if (ranges[2] + 1 != ranges[3])
6827 {
6828 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6829 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6830 }
6831 else
6832 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6833 return TRUE;
6834 }
6835
6836 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6837 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6838 if (ranges[1] + 1 != ranges[2])
6839 {
6840 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6841 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6842 }
6843 else
6844 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6845 return TRUE;
6846
6847 default:
6848 SLJIT_UNREACHABLE();
6849 return FALSE;
6850 }
6851}
6852
6853static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6854{
6855/* May destroy TMP1. */
6856DEFINE_COMPILER;
6857uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6858uint8_t byte;
6859sljit_s32 type;
6860int i, j, k, len, c;
6861
6862if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6863 return FALSE;
6864
6865len = 0;
6866
6867for (i = 0; i < 32; i++)
6868 {
6869 byte = bits[i];
6870
6871 if (nclass)
6872 byte = ~byte;
6873
6874 j = 0;
6875 while (byte != 0)
6876 {
6877 if (byte & 0x1)
6878 {
6879 c = i * 8 + j;
6880
6881 k = len;
6882
6883 if ((c & 0x20) != 0)
6884 {
6885 for (k = 0; k < len; k++)
6886 if (char_list[k] == c - 0x20)
6887 {
6888 char_list[k] |= 0x120;
6889 break;
6890 }
6891 }
6892
6893 if (k == len)
6894 {
6895 if (len >= MAX_CLASS_CHARS_SIZE)
6896 return FALSE;
6897
6898 char_list[len++] = (uint16_t) c;
6899 }
6900 }
6901
6902 byte >>= 1;
6903 j++;
6904 }
6905 }
6906
6907if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
6908
6909i = 0;
6910j = 0;
6911
6912if (char_list[0] == 0)
6913 {
6914 i++;
6915 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
6916 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6917 }
6918else
6919 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6920
6921while (i < len)
6922 {
6923 if ((char_list[i] & 0x100) != 0)
6924 j++;
6925 else
6926 {
6927 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
6928 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6929 }
6930 i++;
6931 }
6932
6933if (j != 0)
6934 {
6935 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
6936
6937 for (i = 0; i < len; i++)
6938 if ((char_list[i] & 0x100) != 0)
6939 {
6940 j--;
6941 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
6942 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
6943 }
6944 }
6945
6946if (invert)
6947 nclass = !nclass;
6948
6949type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
6950add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
6951return TRUE;
6952}
6953
6954static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6955{
6956/* May destroy TMP1. */
6957if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
6958 return TRUE;
6959return optimize_class_chars(common, bits, nclass, invert, backtracks);
6960}
6961
6962static void check_anynewline(compiler_common *common)
6963{
6964/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6965DEFINE_COMPILER;
6966
6967sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6968
6969OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
6970OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
6971OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6972OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
6973#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6974#if PCRE2_CODE_UNIT_WIDTH == 8
6975if (common->utf)
6976 {
6977#endif
6978 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
6979 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
6980 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
6981#if PCRE2_CODE_UNIT_WIDTH == 8
6982 }
6983#endif
6984#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
6985OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
6986OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6987}
6988
6989static void check_hspace(compiler_common *common)
6990{
6991/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
6992DEFINE_COMPILER;
6993
6994sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6995
6996OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
6997OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
6998OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
6999OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7000OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
7001#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7002#if PCRE2_CODE_UNIT_WIDTH == 8
7003if (common->utf)
7004 {
7005#endif
7006 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7007 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
7008 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7009 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
7010 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7011 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
7012 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
7013 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7014 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
7015 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7016 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
7017 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7018 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
7019#if PCRE2_CODE_UNIT_WIDTH == 8
7020 }
7021#endif
7022#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7023OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7024
7025OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7026}
7027
7028static void check_vspace(compiler_common *common)
7029{
7030/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7031DEFINE_COMPILER;
7032
7033sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7034
7035OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
7036OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
7037OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7038OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
7039#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7040#if PCRE2_CODE_UNIT_WIDTH == 8
7041if (common->utf)
7042 {
7043#endif
7044 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7045 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
7046 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
7047#if PCRE2_CODE_UNIT_WIDTH == 8
7048 }
7049#endif
7050#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7051OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7052
7053OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7054}
7055
7056static void do_casefulcmp(compiler_common *common)
7057{
7058DEFINE_COMPILER;
7059struct sljit_jump *jump;
7060struct sljit_label *label;
7061int char1_reg;
7062int char2_reg;
7063
7064if (HAS_VIRTUAL_REGISTERS)
7065 {
7066 char1_reg = STR_END;
7067 char2_reg = STACK_TOP;
7068 }
7069else
7070 {
7071 char1_reg = TMP3;
7072 char2_reg = RETURN_ADDR;
7073 }
7074
7075sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7076OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7077
7078if (char1_reg == STR_END)
7079 {
7080 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7081 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7082 }
7083
7084if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7085 {
7086 label = LABEL();
7087 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7088 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7089 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7090 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7091 JUMPTO(SLJIT_NOT_ZERO, label);
7092
7093 JUMPHERE(jump);
7094 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7095 }
7096else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7097 {
7098 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7099 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7100
7101 label = LABEL();
7102 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7103 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7104 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7105 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7106 JUMPTO(SLJIT_NOT_ZERO, label);
7107
7108 JUMPHERE(jump);
7109 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7110 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7111 }
7112else
7113 {
7114 label = LABEL();
7115 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7116 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7117 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7118 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7119 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7120 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7121 JUMPTO(SLJIT_NOT_ZERO, label);
7122
7123 JUMPHERE(jump);
7124 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7125 }
7126
7127if (char1_reg == STR_END)
7128 {
7129 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7130 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7131 }
7132
7133OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7134}
7135
7136static void do_caselesscmp(compiler_common *common)
7137{
7138DEFINE_COMPILER;
7139struct sljit_jump *jump;
7140struct sljit_label *label;
7141int char1_reg = STR_END;
7142int char2_reg;
7143int lcc_table;
7144int opt_type = 0;
7145
7146if (HAS_VIRTUAL_REGISTERS)
7147 {
7148 char2_reg = STACK_TOP;
7149 lcc_table = STACK_LIMIT;
7150 }
7151else
7152 {
7153 char2_reg = RETURN_ADDR;
7154 lcc_table = TMP3;
7155 }
7156
7157if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7158 opt_type = 1;
7159else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7160 opt_type = 2;
7161
7162sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7163OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7164
7165OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7166
7167if (char2_reg == STACK_TOP)
7168 {
7169 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7170 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7171 }
7172
7173OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7174
7175if (opt_type == 1)
7176 {
7177 label = LABEL();
7178 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7179 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7180 }
7181else if (opt_type == 2)
7182 {
7183 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7184 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7185
7186 label = LABEL();
7187 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7188 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7189 }
7190else
7191 {
7192 label = LABEL();
7193 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7194 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7195 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7196 }
7197
7198#if PCRE2_CODE_UNIT_WIDTH != 8
7199jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7200#endif
7201OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7202#if PCRE2_CODE_UNIT_WIDTH != 8
7203JUMPHERE(jump);
7204jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7205#endif
7206OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7207#if PCRE2_CODE_UNIT_WIDTH != 8
7208JUMPHERE(jump);
7209#endif
7210
7211if (opt_type == 0)
7212 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7213
7214jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7215OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7216JUMPTO(SLJIT_NOT_ZERO, label);
7217
7218JUMPHERE(jump);
7219OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7220
7221if (opt_type == 2)
7222 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7223
7224if (char2_reg == STACK_TOP)
7225 {
7226 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7227 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7228 }
7229
7230OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7231OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7232}
7233
7234static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7235 compare_context *context, jump_list **backtracks)
7236{
7237DEFINE_COMPILER;
7238unsigned int othercasebit = 0;
7239PCRE2_SPTR othercasechar = NULL;
7240#ifdef SUPPORT_UNICODE
7241int utflength;
7242#endif
7243
7244if (caseless && char_has_othercase(common, cc))
7245 {
7246 othercasebit = char_get_othercase_bit(common, cc);
7247 SLJIT_ASSERT(othercasebit);
7248 /* Extracting bit difference info. */
7249#if PCRE2_CODE_UNIT_WIDTH == 8
7250 othercasechar = cc + (othercasebit >> 8);
7251 othercasebit &= 0xff;
7252#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7253 /* Note that this code only handles characters in the BMP. If there
7254 ever are characters outside the BMP whose othercase differs in only one
7255 bit from itself (there currently are none), this code will need to be
7256 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7257 othercasechar = cc + (othercasebit >> 9);
7258 if ((othercasebit & 0x100) != 0)
7259 othercasebit = (othercasebit & 0xff) << 8;
7260 else
7261 othercasebit &= 0xff;
7262#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7263 }
7264
7265if (context->sourcereg == -1)
7266 {
7267#if PCRE2_CODE_UNIT_WIDTH == 8
7268#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7269 if (context->length >= 4)
7270 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7271 else if (context->length >= 2)
7272 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7273 else
7274#endif
7275 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7276#elif PCRE2_CODE_UNIT_WIDTH == 16
7277#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7278 if (context->length >= 4)
7279 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7280 else
7281#endif
7282 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7283#elif PCRE2_CODE_UNIT_WIDTH == 32
7284 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7285#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7286 context->sourcereg = TMP2;
7287 }
7288
7289#ifdef SUPPORT_UNICODE
7290utflength = 1;
7291if (common->utf && HAS_EXTRALEN(*cc))
7292 utflength += GET_EXTRALEN(*cc);
7293
7294do
7295 {
7296#endif
7297
7298 context->length -= IN_UCHARS(1);
7299#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7300
7301 /* Unaligned read is supported. */
7302 if (othercasebit != 0 && othercasechar == cc)
7303 {
7304 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7305 context->oc.asuchars[context->ucharptr] = othercasebit;
7306 }
7307 else
7308 {
7309 context->c.asuchars[context->ucharptr] = *cc;
7310 context->oc.asuchars[context->ucharptr] = 0;
7311 }
7312 context->ucharptr++;
7313
7314#if PCRE2_CODE_UNIT_WIDTH == 8
7315 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7316#else
7317 if (context->ucharptr >= 2 || context->length == 0)
7318#endif
7319 {
7320 if (context->length >= 4)
7321 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7322 else if (context->length >= 2)
7323 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7324#if PCRE2_CODE_UNIT_WIDTH == 8
7325 else if (context->length >= 1)
7326 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7327#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7328 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7329
7330 switch(context->ucharptr)
7331 {
7332 case 4 / sizeof(PCRE2_UCHAR):
7333 if (context->oc.asint != 0)
7334 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7335 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7336 break;
7337
7338 case 2 / sizeof(PCRE2_UCHAR):
7339 if (context->oc.asushort != 0)
7340 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7341 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7342 break;
7343
7344#if PCRE2_CODE_UNIT_WIDTH == 8
7345 case 1:
7346 if (context->oc.asbyte != 0)
7347 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7348 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7349 break;
7350#endif
7351
7352 default:
7353 SLJIT_UNREACHABLE();
7354 break;
7355 }
7356 context->ucharptr = 0;
7357 }
7358
7359#else
7360
7361 /* Unaligned read is unsupported or in 32 bit mode. */
7362 if (context->length >= 1)
7363 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7364
7365 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7366
7367 if (othercasebit != 0 && othercasechar == cc)
7368 {
7369 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7370 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7371 }
7372 else
7373 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7374
7375#endif
7376
7377 cc++;
7378#ifdef SUPPORT_UNICODE
7379 utflength--;
7380 }
7381while (utflength > 0);
7382#endif
7383
7384return cc;
7385}
7386
7387#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7388
7389#define SET_TYPE_OFFSET(value) \
7390 if ((value) != typeoffset) \
7391 { \
7392 if ((value) < typeoffset) \
7393 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7394 else \
7395 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7396 } \
7397 typeoffset = (value);
7398
7399#define SET_CHAR_OFFSET(value) \
7400 if ((value) != charoffset) \
7401 { \
7402 if ((value) < charoffset) \
7403 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7404 else \
7405 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7406 } \
7407 charoffset = (value);
7408
7409static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7410
7411static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7412{
7413DEFINE_COMPILER;
7414jump_list *found = NULL;
7415jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7416sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7417struct sljit_jump *jump = NULL;
7418PCRE2_SPTR ccbegin;
7419int compares, invertcmp, numberofcmps;
7420#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7421BOOL utf = common->utf;
7422#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7423
7424#ifdef SUPPORT_UNICODE
7425BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
7426BOOL charsaved = FALSE;
7427int typereg = TMP1;
7428const sljit_u32 *other_cases;
7429sljit_uw typeoffset;
7430#endif /* SUPPORT_UNICODE */
7431
7432/* Scanning the necessary info. */
7433cc++;
7434ccbegin = cc;
7435compares = 0;
7436
7437if (cc[-1] & XCL_MAP)
7438 {
7439 min = 0;
7440 cc += 32 / sizeof(PCRE2_UCHAR);
7441 }
7442
7443while (*cc != XCL_END)
7444 {
7445 compares++;
7446 if (*cc == XCL_SINGLE)
7447 {
7448 cc ++;
7449 GETCHARINCTEST(c, cc);
7450 if (c > max) max = c;
7451 if (c < min) min = c;
7452#ifdef SUPPORT_UNICODE
7453 needschar = TRUE;
7454#endif /* SUPPORT_UNICODE */
7455 }
7456 else if (*cc == XCL_RANGE)
7457 {
7458 cc ++;
7459 GETCHARINCTEST(c, cc);
7460 if (c < min) min = c;
7461 GETCHARINCTEST(c, cc);
7462 if (c > max) max = c;
7463#ifdef SUPPORT_UNICODE
7464 needschar = TRUE;
7465#endif /* SUPPORT_UNICODE */
7466 }
7467#ifdef SUPPORT_UNICODE
7468 else
7469 {
7470 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7471 cc++;
7472 if (*cc == PT_CLIST)
7473 {
7474 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7475 while (*other_cases != NOTACHAR)
7476 {
7477 if (*other_cases > max) max = *other_cases;
7478 if (*other_cases < min) min = *other_cases;
7479 other_cases++;
7480 }
7481 }
7482 else
7483 {
7484 max = READ_CHAR_MAX;
7485 min = 0;
7486 }
7487
7488 switch(*cc)
7489 {
7490 case PT_ANY:
7491 /* Any either accepts everything or ignored. */
7492 if (cc[-1] == XCL_PROP)
7493 {
7494 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7495 if (list == backtracks)
7496 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7497 return;
7498 }
7499 break;
7500
7501 case PT_LAMP:
7502 case PT_GC:
7503 case PT_PC:
7504 case PT_ALNUM:
7505 needstype = TRUE;
7506 break;
7507
7508 case PT_SC:
7509 needsscript = TRUE;
7510 break;
7511
7512 case PT_SPACE:
7513 case PT_PXSPACE:
7514 case PT_WORD:
7515 case PT_PXGRAPH:
7516 case PT_PXPRINT:
7517 case PT_PXPUNCT:
7518 needstype = TRUE;
7519 needschar = TRUE;
7520 break;
7521
7522 case PT_CLIST:
7523 case PT_UCNC:
7524 needschar = TRUE;
7525 break;
7526
7527 default:
7528 SLJIT_UNREACHABLE();
7529 break;
7530 }
7531 cc += 2;
7532 }
7533#endif /* SUPPORT_UNICODE */
7534 }
7535SLJIT_ASSERT(compares > 0);
7536
7537/* We are not necessary in utf mode even in 8 bit mode. */
7538cc = ccbegin;
7539if ((cc[-1] & XCL_NOT) != 0)
7540 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7541else
7542 {
7543#ifdef SUPPORT_UNICODE
7544 read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
7545#else /* !SUPPORT_UNICODE */
7546 read_char(common, min, max, NULL, 0);
7547#endif /* SUPPORT_UNICODE */
7548 }
7549
7550if ((cc[-1] & XCL_HASPROP) == 0)
7551 {
7552 if ((cc[-1] & XCL_MAP) != 0)
7553 {
7554 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7555 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7556 {
7557 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7558 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7559 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7560 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7561 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7562 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7563 }
7564
7565 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7566 JUMPHERE(jump);
7567
7568 cc += 32 / sizeof(PCRE2_UCHAR);
7569 }
7570 else
7571 {
7572 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7573 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7574 }
7575 }
7576else if ((cc[-1] & XCL_MAP) != 0)
7577 {
7578 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7579#ifdef SUPPORT_UNICODE
7580 charsaved = TRUE;
7581#endif /* SUPPORT_UNICODE */
7582 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7583 {
7584#if PCRE2_CODE_UNIT_WIDTH == 8
7585 jump = NULL;
7586 if (common->utf)
7587#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7588 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7589
7590 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7591 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7592 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7593 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
7594 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
7595 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7596
7597#if PCRE2_CODE_UNIT_WIDTH == 8
7598 if (common->utf)
7599#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7600 JUMPHERE(jump);
7601 }
7602
7603 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7604 cc += 32 / sizeof(PCRE2_UCHAR);
7605 }
7606
7607#ifdef SUPPORT_UNICODE
7608if (needstype || needsscript)
7609 {
7610 if (needschar && !charsaved)
7611 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7612
7613#if PCRE2_CODE_UNIT_WIDTH == 32
7614 if (!common->utf)
7615 {
7616 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7618 JUMPHERE(jump);
7619 }
7620#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7621
7622 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7623 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7624 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7625 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7626 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7627 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7628 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7629 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
7630
7631 /* Before anything else, we deal with scripts. */
7632 if (needsscript)
7633 {
7634 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7635 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7636 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7637
7638 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7639
7640 ccbegin = cc;
7641
7642 while (*cc != XCL_END)
7643 {
7644 if (*cc == XCL_SINGLE)
7645 {
7646 cc ++;
7647 GETCHARINCTEST(c, cc);
7648 }
7649 else if (*cc == XCL_RANGE)
7650 {
7651 cc ++;
7652 GETCHARINCTEST(c, cc);
7653 GETCHARINCTEST(c, cc);
7654 }
7655 else
7656 {
7657 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7658 cc++;
7659 if (*cc == PT_SC)
7660 {
7661 compares--;
7662 invertcmp = (compares == 0 && list != backtracks);
7663 if (cc[-1] == XCL_NOTPROP)
7664 invertcmp ^= 0x1;
7665 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7666 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7667 }
7668 cc += 2;
7669 }
7670 }
7671
7672 cc = ccbegin;
7673
7674 if (needstype)
7675 {
7676 /* TMP2 has already been shifted by 2 */
7677 if (!needschar)
7678 {
7679 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7680 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7681
7682 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7683 }
7684 else
7685 {
7686 OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
7687 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7688
7689 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7690 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7691 typereg = RETURN_ADDR;
7692 }
7693 }
7694 else if (needschar)
7695 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7696 }
7697 else if (needstype)
7698 {
7699 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7700 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7701
7702 if (!needschar)
7703 {
7704 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7705
7706 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7707 }
7708 else
7709 {
7710 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
7711
7712 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7713 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7714 typereg = RETURN_ADDR;
7715 }
7716 }
7717 else if (needschar)
7718 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7719 }
7720#endif /* SUPPORT_UNICODE */
7721
7722/* Generating code. */
7723charoffset = 0;
7724numberofcmps = 0;
7725#ifdef SUPPORT_UNICODE
7726typeoffset = 0;
7727#endif /* SUPPORT_UNICODE */
7728
7729while (*cc != XCL_END)
7730 {
7731 compares--;
7732 invertcmp = (compares == 0 && list != backtracks);
7733 jump = NULL;
7734
7735 if (*cc == XCL_SINGLE)
7736 {
7737 cc ++;
7738 GETCHARINCTEST(c, cc);
7739
7740 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7741 {
7742 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7743 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7744 numberofcmps++;
7745 }
7746 else if (numberofcmps > 0)
7747 {
7748 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7749 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7750 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7751 numberofcmps = 0;
7752 }
7753 else
7754 {
7755 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7756 numberofcmps = 0;
7757 }
7758 }
7759 else if (*cc == XCL_RANGE)
7760 {
7761 cc ++;
7762 GETCHARINCTEST(c, cc);
7763 SET_CHAR_OFFSET(c);
7764 GETCHARINCTEST(c, cc);
7765
7766 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7767 {
7768 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7769 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7770 numberofcmps++;
7771 }
7772 else if (numberofcmps > 0)
7773 {
7774 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7775 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7776 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7777 numberofcmps = 0;
7778 }
7779 else
7780 {
7781 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7782 numberofcmps = 0;
7783 }
7784 }
7785#ifdef SUPPORT_UNICODE
7786 else
7787 {
7788 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7789 if (*cc == XCL_NOTPROP)
7790 invertcmp ^= 0x1;
7791 cc++;
7792 switch(*cc)
7793 {
7794 case PT_ANY:
7795 if (!invertcmp)
7796 jump = JUMP(SLJIT_JUMP);
7797 break;
7798
7799 case PT_LAMP:
7800 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
7801 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7802 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
7803 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7804 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
7805 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7806 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7807 break;
7808
7809 case PT_GC:
7810 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
7811 SET_TYPE_OFFSET(c);
7812 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
7813 break;
7814
7815 case PT_PC:
7816 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
7817 break;
7818
7819 case PT_SC:
7820 compares++;
7821 /* Do nothing. */
7822 break;
7823
7824 case PT_SPACE:
7825 case PT_PXSPACE:
7826 SET_CHAR_OFFSET(9);
7827 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
7828 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7829
7830 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
7831 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7832
7833 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
7834 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7835
7836 SET_TYPE_OFFSET(ucp_Zl);
7837 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
7838 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7839 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7840 break;
7841
7842 case PT_WORD:
7843 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
7844 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7845 /* Fall through. */
7846
7847 case PT_ALNUM:
7848 SET_TYPE_OFFSET(ucp_Ll);
7849 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
7850 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7851 SET_TYPE_OFFSET(ucp_Nd);
7852 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
7853 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7854 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7855 break;
7856
7857 case PT_CLIST:
7858 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7859
7860 /* At least three characters are required.
7861 Otherwise this case would be handled by the normal code path. */
7862 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
7863 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
7864
7865 /* Optimizing character pairs, if their difference is power of 2. */
7866 if (is_powerof2(other_cases[1] ^ other_cases[0]))
7867 {
7868 if (charoffset == 0)
7869 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7870 else
7871 {
7872 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7873 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7874 }
7875 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
7876 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7877 other_cases += 2;
7878 }
7879 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
7880 {
7881 if (charoffset == 0)
7882 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
7883 else
7884 {
7885 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
7886 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
7887 }
7888 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
7889 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7890
7891 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
7892 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7893
7894 other_cases += 3;
7895 }
7896 else
7897 {
7898 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7899 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7900 }
7901
7902 while (*other_cases != NOTACHAR)
7903 {
7904 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
7905 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
7906 }
7907 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7908 break;
7909
7910 case PT_UCNC:
7911 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
7912 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
7913 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
7914 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7915 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
7916 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7917
7918 SET_CHAR_OFFSET(0xa0);
7919 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
7920 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
7921 SET_CHAR_OFFSET(0);
7922 OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
7923 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
7924 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7925 break;
7926
7927 case PT_PXGRAPH:
7928 /* C and Z groups are the farthest two groups. */
7929 SET_TYPE_OFFSET(ucp_Ll);
7930 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7931 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7932
7933 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7934
7935 /* In case of ucp_Cf, we overwrite the result. */
7936 SET_CHAR_OFFSET(0x2066);
7937 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7938 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7939
7940 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7941 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7942
7943 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
7944 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7945
7946 JUMPHERE(jump);
7947 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7948 break;
7949
7950 case PT_PXPRINT:
7951 /* C and Z groups are the farthest two groups. */
7952 SET_TYPE_OFFSET(ucp_Ll);
7953 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
7954 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
7955
7956 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
7957 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
7958
7959 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
7960
7961 /* In case of ucp_Cf, we overwrite the result. */
7962 SET_CHAR_OFFSET(0x2066);
7963 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
7964 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7965
7966 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
7967 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7968
7969 JUMPHERE(jump);
7970 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
7971 break;
7972
7973 case PT_PXPUNCT:
7974 SET_TYPE_OFFSET(ucp_Sc);
7975 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
7976 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
7977
7978 SET_CHAR_OFFSET(0);
7979 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
7980 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
7981
7982 SET_TYPE_OFFSET(ucp_Pc);
7983 OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
7984 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
7985 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7986 break;
7987
7988 default:
7989 SLJIT_UNREACHABLE();
7990 break;
7991 }
7992 cc += 2;
7993 }
7994#endif /* SUPPORT_UNICODE */
7995
7996 if (jump != NULL)
7997 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7998 }
7999
8000if (found != NULL)
8001 set_jumps(found, LABEL());
8002}
8003
8004#undef SET_TYPE_OFFSET
8005#undef SET_CHAR_OFFSET
8006
8007#endif
8008
8009static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8010{
8011DEFINE_COMPILER;
8012int length;
8013struct sljit_jump *jump[4];
8014#ifdef SUPPORT_UNICODE
8015struct sljit_label *label;
8016#endif /* SUPPORT_UNICODE */
8017
8018switch(type)
8019 {
8020 case OP_SOD:
8021 if (HAS_VIRTUAL_REGISTERS)
8022 {
8023 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8024 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8025 }
8026 else
8027 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8028 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8029 return cc;
8030
8031 case OP_SOM:
8032 if (HAS_VIRTUAL_REGISTERS)
8033 {
8034 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8035 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8036 }
8037 else
8038 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8039 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8040 return cc;
8041
8042 case OP_NOT_WORD_BOUNDARY:
8043 case OP_WORD_BOUNDARY:
8044 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8045#ifdef SUPPORT_UNICODE
8046 if (common->invalid_utf)
8047 {
8048 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8049 return cc;
8050 }
8051#endif /* SUPPORT_UNICODE */
8052 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8053 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8054 return cc;
8055
8056 case OP_EODN:
8057 /* Requires rather complex checks. */
8058 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8059 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8060 {
8061 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8062 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8063 if (common->mode == PCRE2_JIT_COMPLETE)
8064 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8065 else
8066 {
8067 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
8068 OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8069 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
8070 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8071 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8072 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8073 check_partial(common, TRUE);
8074 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8075 JUMPHERE(jump[1]);
8076 }
8077 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8078 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8079 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8080 }
8081 else if (common->nltype == NLTYPE_FIXED)
8082 {
8083 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8084 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8085 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8086 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8087 }
8088 else
8089 {
8090 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8091 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8092 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8093 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
8094 jump[2] = JUMP(SLJIT_GREATER);
8095 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8096 /* Equal. */
8097 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8098 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8099 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8100
8101 JUMPHERE(jump[1]);
8102 if (common->nltype == NLTYPE_ANYCRLF)
8103 {
8104 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8105 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8106 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8107 }
8108 else
8109 {
8110 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8111 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8112 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8113 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8114 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8115 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8116 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8117 }
8118 JUMPHERE(jump[2]);
8119 JUMPHERE(jump[3]);
8120 }
8121 JUMPHERE(jump[0]);
8122 if (common->mode != PCRE2_JIT_COMPLETE)
8123 check_partial(common, TRUE);
8124 return cc;
8125
8126 case OP_EOD:
8127 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8128 if (common->mode != PCRE2_JIT_COMPLETE)
8129 check_partial(common, TRUE);
8130 return cc;
8131
8132 case OP_DOLL:
8133 if (HAS_VIRTUAL_REGISTERS)
8134 {
8135 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8136 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8137 }
8138 else
8139 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8140 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8141
8142 if (!common->endonly)
8143 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8144 else
8145 {
8146 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8147 check_partial(common, FALSE);
8148 }
8149 return cc;
8150
8151 case OP_DOLLM:
8152 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8153 if (HAS_VIRTUAL_REGISTERS)
8154 {
8155 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8156 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8157 }
8158 else
8159 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
8160 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8161 check_partial(common, FALSE);
8162 jump[0] = JUMP(SLJIT_JUMP);
8163 JUMPHERE(jump[1]);
8164
8165 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8166 {
8167 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8168 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8169 if (common->mode == PCRE2_JIT_COMPLETE)
8170 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8171 else
8172 {
8173 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8174 /* STR_PTR = STR_END - IN_UCHARS(1) */
8175 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8176 check_partial(common, TRUE);
8177 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8178 JUMPHERE(jump[1]);
8179 }
8180
8181 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8182 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8183 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8184 }
8185 else
8186 {
8187 peek_char(common, common->nlmax, TMP3, 0, NULL);
8188 check_newlinechar(common, common->nltype, backtracks, FALSE);
8189 }
8190 JUMPHERE(jump[0]);
8191 return cc;
8192
8193 case OP_CIRC:
8194 if (HAS_VIRTUAL_REGISTERS)
8195 {
8196 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8197 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8198 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8199 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8200 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8201 }
8202 else
8203 {
8204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8205 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
8206 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8207 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8208 }
8209 return cc;
8210
8211 case OP_CIRCM:
8212 /* TMP2 might be used by peek_char_back. */
8213 if (HAS_VIRTUAL_REGISTERS)
8214 {
8215 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8216 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8217 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8218 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8219 }
8220 else
8221 {
8222 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8223 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
8224 OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
8225 }
8226 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8227 jump[0] = JUMP(SLJIT_JUMP);
8228 JUMPHERE(jump[1]);
8229
8230 if (!common->alt_circumflex)
8231 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8232
8233 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8234 {
8235 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8236 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8237 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8238 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8239 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8240 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8241 }
8242 else
8243 {
8244 peek_char_back(common, common->nlmax, backtracks);
8245 check_newlinechar(common, common->nltype, backtracks, FALSE);
8246 }
8247 JUMPHERE(jump[0]);
8248 return cc;
8249
8250 case OP_REVERSE:
8251 length = GET(cc, 0);
8252 if (length == 0)
8253 return cc + LINK_SIZE;
8254 if (HAS_VIRTUAL_REGISTERS)
8255 {
8256 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8257 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8258 }
8259 else
8260 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8261#ifdef SUPPORT_UNICODE
8262 if (common->utf)
8263 {
8264 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8265 label = LABEL();
8266 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8267 move_back(common, backtracks, FALSE);
8268 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8269 JUMPTO(SLJIT_NOT_ZERO, label);
8270 }
8271 else
8272#endif
8273 {
8274 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8275 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8276 }
8277 check_start_used_ptr(common);
8278 return cc + LINK_SIZE;
8279 }
8280SLJIT_UNREACHABLE();
8281return cc;
8282}
8283
8284#ifdef SUPPORT_UNICODE
8285
8286#if PCRE2_CODE_UNIT_WIDTH != 32
8287
8288static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8289{
8290PCRE2_SPTR start_subject = args->begin;
8291PCRE2_SPTR end_subject = args->end;
8292int lgb, rgb, ricount;
8293PCRE2_SPTR prevcc, endcc, bptr;
8294BOOL first = TRUE;
8295uint32_t c;
8296
8297prevcc = cc;
8298endcc = NULL;
8299do
8300 {
8301 GETCHARINC(c, cc);
8302 rgb = UCD_GRAPHBREAK(c);
8303
8304 if (first)
8305 {
8306 lgb = rgb;
8307 endcc = cc;
8308 first = FALSE;
8309 continue;
8310 }
8311
8312 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8313 break;
8314
8315 /* Not breaking between Regional Indicators is allowed only if there
8316 are an even number of preceding RIs. */
8317
8318 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8319 {
8320 ricount = 0;
8321 bptr = prevcc;
8322
8323 /* bptr is pointing to the left-hand character */
8324 while (bptr > start_subject)
8325 {
8326 bptr--;
8327 BACKCHAR(bptr);
8328 GETCHAR(c, bptr);
8329
8330 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8331 break;
8332
8333 ricount++;
8334 }
8335
8336 if ((ricount & 1) != 0) break; /* Grapheme break required */
8337 }
8338
8339 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8340 allows any number of them before a following Extended_Pictographic. */
8341
8342 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8343 lgb != ucp_gbExtended_Pictographic)
8344 lgb = rgb;
8345
8346 prevcc = endcc;
8347 endcc = cc;
8348 }
8349while (cc < end_subject);
8350
8351return endcc;
8352}
8353
8354#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8355
8356static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8357{
8358PCRE2_SPTR start_subject = args->begin;
8359PCRE2_SPTR end_subject = args->end;
8360int lgb, rgb, ricount;
8361PCRE2_SPTR prevcc, endcc, bptr;
8362BOOL first = TRUE;
8363uint32_t c;
8364
8365prevcc = cc;
8366endcc = NULL;
8367do
8368 {
8369 GETCHARINC_INVALID(c, cc, end_subject, break);
8370 rgb = UCD_GRAPHBREAK(c);
8371
8372 if (first)
8373 {
8374 lgb = rgb;
8375 endcc = cc;
8376 first = FALSE;
8377 continue;
8378 }
8379
8380 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8381 break;
8382
8383 /* Not breaking between Regional Indicators is allowed only if there
8384 are an even number of preceding RIs. */
8385
8386 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8387 {
8388 ricount = 0;
8389 bptr = prevcc;
8390
8391 /* bptr is pointing to the left-hand character */
8392 while (bptr > start_subject)
8393 {
8394 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8395
8396 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
8397 break;
8398
8399 ricount++;
8400 }
8401
8402 if ((ricount & 1) != 0)
8403 break; /* Grapheme break required */
8404 }
8405
8406 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8407 allows any number of them before a following Extended_Pictographic. */
8408
8409 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8410 lgb != ucp_gbExtended_Pictographic)
8411 lgb = rgb;
8412
8413 prevcc = endcc;
8414 endcc = cc;
8415 }
8416while (cc < end_subject);
8417
8418return endcc;
8419}
8420
8421static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8422{
8423PCRE2_SPTR start_subject = args->begin;
8424PCRE2_SPTR end_subject = args->end;
8425int lgb, rgb, ricount;
8426PCRE2_SPTR bptr;
8427uint32_t c;
8428
8429/* Patch by PH */
8430/* GETCHARINC(c, cc); */
8431c = *cc++;
8432
8433#if PCRE2_CODE_UNIT_WIDTH == 32
8434if (c >= 0x110000)
8435 return NULL;
8436#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8437lgb = UCD_GRAPHBREAK(c);
8438
8439while (cc < end_subject)
8440 {
8441 c = *cc;
8442#if PCRE2_CODE_UNIT_WIDTH == 32
8443 if (c >= 0x110000)
8444 break;
8445#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8446 rgb = UCD_GRAPHBREAK(c);
8447
8448 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8449 break;
8450
8451 /* Not breaking between Regional Indicators is allowed only if there
8452 are an even number of preceding RIs. */
8453
8454 if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
8455 {
8456 ricount = 0;
8457 bptr = cc - 1;
8458
8459 /* bptr is pointing to the left-hand character */
8460 while (bptr > start_subject)
8461 {
8462 bptr--;
8463 c = *bptr;
8464#if PCRE2_CODE_UNIT_WIDTH == 32
8465 if (c >= 0x110000)
8466 break;
8467#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8468
8469 if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
8470
8471 ricount++;
8472 }
8473
8474 if ((ricount & 1) != 0)
8475 break; /* Grapheme break required */
8476 }
8477
8478 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8479 allows any number of them before a following Extended_Pictographic. */
8480
8481 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8482 lgb != ucp_gbExtended_Pictographic)
8483 lgb = rgb;
8484
8485 cc++;
8486 }
8487
8488return cc;
8489}
8490
8491#endif /* SUPPORT_UNICODE */
8492
8493static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8494{
8495DEFINE_COMPILER;
8496int length;
8497unsigned int c, oc, bit;
8498compare_context context;
8499struct sljit_jump *jump[3];
8500jump_list *end_list;
8501#ifdef SUPPORT_UNICODE
8502PCRE2_UCHAR propdata[5];
8503#endif /* SUPPORT_UNICODE */
8504
8505switch(type)
8506 {
8507 case OP_NOT_DIGIT:
8508 case OP_DIGIT:
8509 /* Digits are usually 0-9, so it is worth to optimize them. */
8510 if (check_str_ptr)
8511 detect_partial_match(common, backtracks);
8512#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8513 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8514 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8515 else
8516#endif
8517 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8518 /* Flip the starting bit in the negative case. */
8519 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
8520 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8521 return cc;
8522
8523 case OP_NOT_WHITESPACE:
8524 case OP_WHITESPACE:
8525 if (check_str_ptr)
8526 detect_partial_match(common, backtracks);
8527#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8528 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8529 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8530 else
8531#endif
8532 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
8533 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
8534 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8535 return cc;
8536
8537 case OP_NOT_WORDCHAR:
8538 case OP_WORDCHAR:
8539 if (check_str_ptr)
8540 detect_partial_match(common, backtracks);
8541#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8542 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8543 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8544 else
8545#endif
8546 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
8547 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
8548 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8549 return cc;
8550
8551 case OP_ANY:
8552 if (check_str_ptr)
8553 detect_partial_match(common, backtracks);
8554 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8555 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8556 {
8557 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8558 end_list = NULL;
8559 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8560 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8561 else
8562 check_str_end(common, &end_list);
8563
8564 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8565 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8566 set_jumps(end_list, LABEL());
8567 JUMPHERE(jump[0]);
8568 }
8569 else
8570 check_newlinechar(common, common->nltype, backtracks, TRUE);
8571 return cc;
8572
8573 case OP_ALLANY:
8574 if (check_str_ptr)
8575 detect_partial_match(common, backtracks);
8576#ifdef SUPPORT_UNICODE
8577 if (common->utf)
8578 {
8579 if (common->invalid_utf)
8580 {
8581 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8582 return cc;
8583 }
8584
8585#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8586 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8587 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8588#if PCRE2_CODE_UNIT_WIDTH == 8
8589 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8590 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8591 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8592#elif PCRE2_CODE_UNIT_WIDTH == 16
8593 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8594 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
8595 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
8596 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8597 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8598 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8599#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8600 JUMPHERE(jump[0]);
8601 return cc;
8602#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8603 }
8604#endif /* SUPPORT_UNICODE */
8605 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8606 return cc;
8607
8608 case OP_ANYBYTE:
8609 if (check_str_ptr)
8610 detect_partial_match(common, backtracks);
8611 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8612 return cc;
8613
8614#ifdef SUPPORT_UNICODE
8615 case OP_NOTPROP:
8616 case OP_PROP:
8617 propdata[0] = XCL_HASPROP;
8618 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8619 propdata[2] = cc[0];
8620 propdata[3] = cc[1];
8621 propdata[4] = XCL_END;
8622 if (check_str_ptr)
8623 detect_partial_match(common, backtracks);
8624 compile_xclass_matchingpath(common, propdata, backtracks);
8625 return cc + 2;
8626#endif
8627
8628 case OP_ANYNL:
8629 if (check_str_ptr)
8630 detect_partial_match(common, backtracks);
8631 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8632 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8633 /* We don't need to handle soft partial matching case. */
8634 end_list = NULL;
8635 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8636 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8637 else
8638 check_str_end(common, &end_list);
8639 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8640 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8641 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8642 jump[2] = JUMP(SLJIT_JUMP);
8643 JUMPHERE(jump[0]);
8644 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8645 set_jumps(end_list, LABEL());
8646 JUMPHERE(jump[1]);
8647 JUMPHERE(jump[2]);
8648 return cc;
8649
8650 case OP_NOT_HSPACE:
8651 case OP_HSPACE:
8652 if (check_str_ptr)
8653 detect_partial_match(common, backtracks);
8654
8655 if (type == OP_NOT_HSPACE)
8656 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8657 else
8658 read_char(common, 0x9, 0x3000, NULL, 0);
8659
8660 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8661 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8662 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8663 return cc;
8664
8665 case OP_NOT_VSPACE:
8666 case OP_VSPACE:
8667 if (check_str_ptr)
8668 detect_partial_match(common, backtracks);
8669
8670 if (type == OP_NOT_VSPACE)
8671 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8672 else
8673 read_char(common, 0xa, 0x2029, NULL, 0);
8674
8675 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8676 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8677 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8678 return cc;
8679
8680#ifdef SUPPORT_UNICODE
8681 case OP_EXTUNI:
8682 if (check_str_ptr)
8683 detect_partial_match(common, backtracks);
8684
8685 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8686 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8687
8688#if PCRE2_CODE_UNIT_WIDTH != 32
8689 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8690 common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8691 if (common->invalid_utf)
8692 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8693#else
8694 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
8695 common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
8696 if (!common->utf || common->invalid_utf)
8697 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8698#endif
8699
8700 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8701
8702 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8703 {
8704 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8705 /* Since we successfully read a char above, partial matching must occure. */
8706 check_partial(common, TRUE);
8707 JUMPHERE(jump[0]);
8708 }
8709 return cc;
8710#endif
8711
8712 case OP_CHAR:
8713 case OP_CHARI:
8714 length = 1;
8715#ifdef SUPPORT_UNICODE
8716 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8717#endif
8718
8719 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8720 detect_partial_match(common, backtracks);
8721
8722 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8723 {
8724 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8725 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8726 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8727
8728 context.length = IN_UCHARS(length);
8729 context.sourcereg = -1;
8730#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8731 context.ucharptr = 0;
8732#endif
8733 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8734 }
8735
8736#ifdef SUPPORT_UNICODE
8737 if (common->utf)
8738 {
8739 GETCHAR(c, cc);
8740 }
8741 else
8742#endif
8743 c = *cc;
8744
8745 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8746
8747 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8748 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8749
8750 oc = char_othercase(common, c);
8751 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8752
8753 SLJIT_ASSERT(!is_powerof2(c ^ oc));
8754
8755 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8756 {
8757 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
8758 CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8759 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8760 }
8761 else
8762 {
8763 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8764 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8765 JUMPHERE(jump[0]);
8766 }
8767 return cc + length;
8768
8769 case OP_NOT:
8770 case OP_NOTI:
8771 if (check_str_ptr)
8772 detect_partial_match(common, backtracks);
8773
8774 length = 1;
8775#ifdef SUPPORT_UNICODE
8776 if (common->utf)
8777 {
8778#if PCRE2_CODE_UNIT_WIDTH == 8
8779 c = *cc;
8780 if (c < 128 && !common->invalid_utf)
8781 {
8782 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8783 if (type == OP_NOT || !char_has_othercase(common, cc))
8784 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8785 else
8786 {
8787 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
8788 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
8789 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
8790 }
8791 /* Skip the variable-length character. */
8792 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8793 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8794 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8795 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8796 JUMPHERE(jump[0]);
8797 return cc + 1;
8798 }
8799 else
8800#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8801 {
8802 GETCHARLEN(c, cc, length);
8803 }
8804 }
8805 else
8806#endif /* SUPPORT_UNICODE */
8807 c = *cc;
8808
8809 if (type == OP_NOT || !char_has_othercase(common, cc))
8810 {
8811 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
8812 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8813 }
8814 else
8815 {
8816 oc = char_othercase(common, c);
8817 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
8818 bit = c ^ oc;
8819 if (is_powerof2(bit))
8820 {
8821 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
8822 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
8823 }
8824 else
8825 {
8826 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8827 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
8828 }
8829 }
8830 return cc + length;
8831
8832 case OP_CLASS:
8833 case OP_NCLASS:
8834 if (check_str_ptr)
8835 detect_partial_match(common, backtracks);
8836
8837#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8838 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
8839 if (type == OP_NCLASS)
8840 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
8841 else
8842 read_char(common, 0, bit, NULL, 0);
8843#else
8844 if (type == OP_NCLASS)
8845 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
8846 else
8847 read_char(common, 0, 255, NULL, 0);
8848#endif
8849
8850 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
8851 return cc + 32 / sizeof(PCRE2_UCHAR);
8852
8853#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8854 jump[0] = NULL;
8855 if (common->utf)
8856 {
8857 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
8858 if (type == OP_CLASS)
8859 {
8860 add_jump(compiler, backtracks, jump[0]);
8861 jump[0] = NULL;
8862 }
8863 }
8864#elif PCRE2_CODE_UNIT_WIDTH != 8
8865 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
8866 if (type == OP_CLASS)
8867 {
8868 add_jump(compiler, backtracks, jump[0]);
8869 jump[0] = NULL;
8870 }
8871#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
8872
8873 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
8874 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
8875 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
8876 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
8877 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
8878 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8879
8880#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
8881 if (jump[0] != NULL)
8882 JUMPHERE(jump[0]);
8883#endif
8884 return cc + 32 / sizeof(PCRE2_UCHAR);
8885
8886#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
8887 case OP_XCLASS:
8888 if (check_str_ptr)
8889 detect_partial_match(common, backtracks);
8890 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
8891 return cc + GET(cc, 0) - 1;
8892#endif
8893 }
8894SLJIT_UNREACHABLE();
8895return cc;
8896}
8897
8898static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
8899{
8900/* This function consumes at least one input character. */
8901/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
8902DEFINE_COMPILER;
8903PCRE2_SPTR ccbegin = cc;
8904compare_context context;
8905int size;
8906
8907context.length = 0;
8908do
8909 {
8910 if (cc >= ccend)
8911 break;
8912
8913 if (*cc == OP_CHAR)
8914 {
8915 size = 1;
8916#ifdef SUPPORT_UNICODE
8917 if (common->utf && HAS_EXTRALEN(cc[1]))
8918 size += GET_EXTRALEN(cc[1]);
8919#endif
8920 }
8921 else if (*cc == OP_CHARI)
8922 {
8923 size = 1;
8924#ifdef SUPPORT_UNICODE
8925 if (common->utf)
8926 {
8927 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8928 size = 0;
8929 else if (HAS_EXTRALEN(cc[1]))
8930 size += GET_EXTRALEN(cc[1]);
8931 }
8932 else
8933#endif
8934 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
8935 size = 0;
8936 }
8937 else
8938 size = 0;
8939
8940 cc += 1 + size;
8941 context.length += IN_UCHARS(size);
8942 }
8943while (size > 0 && context.length <= 128);
8944
8945cc = ccbegin;
8946if (context.length > 0)
8947 {
8948 /* We have a fixed-length byte sequence. */
8949 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
8950 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8951
8952 context.sourcereg = -1;
8953#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8954 context.ucharptr = 0;
8955#endif
8956 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
8957 return cc;
8958 }
8959
8960/* A non-fixed length character will be checked if length == 0. */
8961return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
8962}
8963
8964/* Forward definitions. */
8965static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
8966static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
8967
8968#define PUSH_BACKTRACK(size, ccstart, error) \
8969 do \
8970 { \
8971 backtrack = sljit_alloc_memory(compiler, (size)); \
8972 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8973 return error; \
8974 memset(backtrack, 0, size); \
8975 backtrack->prev = parent->top; \
8976 backtrack->cc = (ccstart); \
8977 parent->top = backtrack; \
8978 } \
8979 while (0)
8980
8981#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
8982 do \
8983 { \
8984 backtrack = sljit_alloc_memory(compiler, (size)); \
8985 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
8986 return; \
8987 memset(backtrack, 0, size); \
8988 backtrack->prev = parent->top; \
8989 backtrack->cc = (ccstart); \
8990 parent->top = backtrack; \
8991 } \
8992 while (0)
8993
8994#define BACKTRACK_AS(type) ((type *)backtrack)
8995
8996static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
8997{
8998/* The OVECTOR offset goes to TMP2. */
8999DEFINE_COMPILER;
9000int count = GET2(cc, 1 + IMM2_SIZE);
9001PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9002unsigned int offset;
9003jump_list *found = NULL;
9004
9005SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9006
9007OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9008
9009count--;
9010while (count-- > 0)
9011 {
9012 offset = GET2(slot, 0) << 1;
9013 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9014 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9015 slot += common->name_entry_size;
9016 }
9017
9018offset = GET2(slot, 0) << 1;
9019GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9020if (backtracks != NULL && !common->unset_backref)
9021 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9022
9023set_jumps(found, LABEL());
9024}
9025
9026static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9027{
9028DEFINE_COMPILER;
9029BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9030int offset = 0;
9031struct sljit_jump *jump = NULL;
9032struct sljit_jump *partial;
9033struct sljit_jump *nopartial;
9034#if defined SUPPORT_UNICODE
9035struct sljit_label *loop;
9036struct sljit_label *caseless_loop;
9037jump_list *no_match = NULL;
9038int source_reg = COUNT_MATCH;
9039int source_end_reg = ARGUMENTS;
9040int char1_reg = STACK_LIMIT;
9041#endif /* SUPPORT_UNICODE */
9042
9043if (ref)
9044 {
9045 offset = GET2(cc, 1) << 1;
9046 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9047 /* OVECTOR(1) contains the "string begin - 1" constant. */
9048 if (withchecks && !common->unset_backref)
9049 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9050 }
9051else
9052 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9053
9054#if defined SUPPORT_UNICODE
9055if (common->utf && *cc == OP_REFI)
9056 {
9057 SLJIT_ASSERT(common->iref_ptr != 0);
9058
9059 if (ref)
9060 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9061 else
9062 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9063
9064 if (withchecks && emptyfail)
9065 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9066
9067 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9068 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9069 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9070
9071 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9072 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9073
9074 loop = LABEL();
9075 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9076 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9077
9078 /* Read original character. It must be a valid UTF character. */
9079 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9080 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9081
9082 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9083
9084 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9085 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9086 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9087
9088 /* Read second character. */
9089 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9090
9091 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9092
9093 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9094
9095 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9096
9097 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9098 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9099 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9100
9101 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9102
9103 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9104 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9105 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9106 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9107
9108 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9109 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9110 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9111
9112 caseless_loop = LABEL();
9113 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9114 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
9115 OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
9116 JUMPTO(SLJIT_EQUAL, loop);
9117 JUMPTO(SLJIT_LESS, caseless_loop);
9118
9119 set_jumps(no_match, LABEL());
9120 if (common->mode == PCRE2_JIT_COMPLETE)
9121 JUMPHERE(partial);
9122
9123 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9124 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9125 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9126 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9127
9128 if (common->mode != PCRE2_JIT_COMPLETE)
9129 {
9130 JUMPHERE(partial);
9131 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9132 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9133 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9134
9135 check_partial(common, FALSE);
9136 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9137 }
9138
9139 JUMPHERE(jump);
9140 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9141 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9142 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9143 return;
9144 }
9145else
9146#endif /* SUPPORT_UNICODE */
9147 {
9148 if (ref)
9149 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9150 else
9151 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9152
9153 if (withchecks)
9154 jump = JUMP(SLJIT_ZERO);
9155
9156 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9157 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9158 if (common->mode == PCRE2_JIT_COMPLETE)
9159 add_jump(compiler, backtracks, partial);
9160
9161 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9162 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9163
9164 if (common->mode != PCRE2_JIT_COMPLETE)
9165 {
9166 nopartial = JUMP(SLJIT_JUMP);
9167 JUMPHERE(partial);
9168 /* TMP2 -= STR_END - STR_PTR */
9169 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9170 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9171 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9172 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9173 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9174 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9175 JUMPHERE(partial);
9176 check_partial(common, FALSE);
9177 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9178 JUMPHERE(nopartial);
9179 }
9180 }
9181
9182if (jump != NULL)
9183 {
9184 if (emptyfail)
9185 add_jump(compiler, backtracks, jump);
9186 else
9187 JUMPHERE(jump);
9188 }
9189}
9190
9191static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9192{
9193DEFINE_COMPILER;
9194BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9195backtrack_common *backtrack;
9196PCRE2_UCHAR type;
9197int offset = 0;
9198struct sljit_label *label;
9199struct sljit_jump *zerolength;
9200struct sljit_jump *jump = NULL;
9201PCRE2_SPTR ccbegin = cc;
9202int min = 0, max = 0;
9203BOOL minimize;
9204
9205PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9206
9207if (ref)
9208 offset = GET2(cc, 1) << 1;
9209else
9210 cc += IMM2_SIZE;
9211type = cc[1 + IMM2_SIZE];
9212
9213SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9214minimize = (type & 0x1) != 0;
9215switch(type)
9216 {
9217 case OP_CRSTAR:
9218 case OP_CRMINSTAR:
9219 min = 0;
9220 max = 0;
9221 cc += 1 + IMM2_SIZE + 1;
9222 break;
9223 case OP_CRPLUS:
9224 case OP_CRMINPLUS:
9225 min = 1;
9226 max = 0;
9227 cc += 1 + IMM2_SIZE + 1;
9228 break;
9229 case OP_CRQUERY:
9230 case OP_CRMINQUERY:
9231 min = 0;
9232 max = 1;
9233 cc += 1 + IMM2_SIZE + 1;
9234 break;
9235 case OP_CRRANGE:
9236 case OP_CRMINRANGE:
9237 min = GET2(cc, 1 + IMM2_SIZE + 1);
9238 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9239 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9240 break;
9241 default:
9242 SLJIT_UNREACHABLE();
9243 break;
9244 }
9245
9246if (!minimize)
9247 {
9248 if (min == 0)
9249 {
9250 allocate_stack(common, 2);
9251 if (ref)
9252 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9253 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9254 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9255 /* Temporary release of STR_PTR. */
9256 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9257 /* Handles both invalid and empty cases. Since the minimum repeat,
9258 is zero the invalid case is basically the same as an empty case. */
9259 if (ref)
9260 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9261 else
9262 {
9263 compile_dnref_search(common, ccbegin, NULL);
9264 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9265 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9266 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9267 }
9268 /* Restore if not zero length. */
9269 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9270 }
9271 else
9272 {
9273 allocate_stack(common, 1);
9274 if (ref)
9275 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9276 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9277 if (ref)
9278 {
9279 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9280 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9281 }
9282 else
9283 {
9284 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9285 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9286 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9287 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9288 }
9289 }
9290
9291 if (min > 1 || max > 1)
9292 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9293
9294 label = LABEL();
9295 if (!ref)
9296 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9297 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9298
9299 if (min > 1 || max > 1)
9300 {
9301 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9302 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9304 if (min > 1)
9305 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9306 if (max > 1)
9307 {
9308 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9309 allocate_stack(common, 1);
9310 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9311 JUMPTO(SLJIT_JUMP, label);
9312 JUMPHERE(jump);
9313 }
9314 }
9315
9316 if (max == 0)
9317 {
9318 /* Includes min > 1 case as well. */
9319 allocate_stack(common, 1);
9320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9321 JUMPTO(SLJIT_JUMP, label);
9322 }
9323
9324 JUMPHERE(zerolength);
9325 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9326
9327 count_match(common);
9328 return cc;
9329 }
9330
9331allocate_stack(common, ref ? 2 : 3);
9332if (ref)
9333 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9334OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9335if (type != OP_CRMINSTAR)
9336 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9337
9338if (min == 0)
9339 {
9340 /* Handles both invalid and empty cases. Since the minimum repeat,
9341 is zero the invalid case is basically the same as an empty case. */
9342 if (ref)
9343 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9344 else
9345 {
9346 compile_dnref_search(common, ccbegin, NULL);
9347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9348 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9349 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9350 }
9351 /* Length is non-zero, we can match real repeats. */
9352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9353 jump = JUMP(SLJIT_JUMP);
9354 }
9355else
9356 {
9357 if (ref)
9358 {
9359 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9360 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9361 }
9362 else
9363 {
9364 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9365 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9366 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9367 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9368 }
9369 }
9370
9371BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9372if (max > 0)
9373 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9374
9375if (!ref)
9376 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9377compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9378OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9379
9380if (min > 1)
9381 {
9382 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9383 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9384 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9385 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9386 }
9387else if (max > 0)
9388 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9389
9390if (jump != NULL)
9391 JUMPHERE(jump);
9392JUMPHERE(zerolength);
9393
9394count_match(common);
9395return cc;
9396}
9397
9398static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9399{
9400DEFINE_COMPILER;
9401backtrack_common *backtrack;
9402recurse_entry *entry = common->entries;
9403recurse_entry *prev = NULL;
9404sljit_sw start = GET(cc, 1);
9405PCRE2_SPTR start_cc;
9406BOOL needs_control_head;
9407
9408PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9409
9410/* Inlining simple patterns. */
9411if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9412 {
9413 start_cc = common->start + start;
9414 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9415 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9416 return cc + 1 + LINK_SIZE;
9417 }
9418
9419while (entry != NULL)
9420 {
9421 if (entry->start == start)
9422 break;
9423 prev = entry;
9424 entry = entry->next;
9425 }
9426
9427if (entry == NULL)
9428 {
9429 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9430 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9431 return NULL;
9432 entry->next = NULL;
9433 entry->entry_label = NULL;
9434 entry->backtrack_label = NULL;
9435 entry->entry_calls = NULL;
9436 entry->backtrack_calls = NULL;
9437 entry->start = start;
9438
9439 if (prev != NULL)
9440 prev->next = entry;
9441 else
9442 common->entries = entry;
9443 }
9444
9445BACKTRACK_AS(recurse_backtrack)->entry = entry;
9446
9447if (entry->entry_label == NULL)
9448 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9449else
9450 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9451/* Leave if the match is failed. */
9452add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9453BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9454return cc + 1 + LINK_SIZE;
9455}
9456
9457static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9458{
9459PCRE2_SPTR begin;
9460PCRE2_SIZE *ovector;
9461sljit_u32 oveccount, capture_top;
9462
9463if (arguments->callout == NULL)
9464 return 0;
9465
9466SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9467
9468begin = arguments->begin;
9469ovector = (PCRE2_SIZE*)(callout_block + 1);
9470oveccount = callout_block->capture_top;
9471
9472SLJIT_ASSERT(oveccount >= 1);
9473
9474callout_block->version = 2;
9475callout_block->callout_flags = 0;
9476
9477/* Offsets in subject. */
9478callout_block->subject_length = arguments->end - arguments->begin;
9479callout_block->start_match = jit_ovector[0] - begin;
9480callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9481callout_block->subject = begin;
9482
9483/* Convert and copy the JIT offset vector to the ovector array. */
9484callout_block->capture_top = 1;
9485callout_block->offset_vector = ovector;
9486
9487ovector[0] = PCRE2_UNSET;
9488ovector[1] = PCRE2_UNSET;
9489ovector += 2;
9490jit_ovector += 2;
9491capture_top = 1;
9492
9493/* Convert pointers to sizes. */
9494while (--oveccount != 0)
9495 {
9496 capture_top++;
9497
9498 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9499 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9500
9501 if (ovector[0] != PCRE2_UNSET)
9502 callout_block->capture_top = capture_top;
9503
9504 ovector += 2;
9505 jit_ovector += 2;
9506 }
9507
9508return (arguments->callout)(callout_block, arguments->callout_data);
9509}
9510
9511#define CALLOUT_ARG_OFFSET(arg) \
9512 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9513
9514static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9515{
9516DEFINE_COMPILER;
9517backtrack_common *backtrack;
9518sljit_s32 mov_opcode;
9519unsigned int callout_length = (*cc == OP_CALLOUT)
9520 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9521sljit_sw value1;
9522sljit_sw value2;
9523sljit_sw value3;
9524sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9525
9526PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9527
9528callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9529
9530allocate_stack(common, callout_arg_size);
9531
9532SLJIT_ASSERT(common->capture_last_ptr != 0);
9533OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9534OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9535value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9536OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9537OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9538OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9539
9540/* These pointer sized fields temporarly stores internal variables. */
9541OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9542
9543if (common->mark_ptr != 0)
9544 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9545mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9546OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9547OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9548
9549if (*cc == OP_CALLOUT)
9550 {
9551 value1 = 0;
9552 value2 = 0;
9553 value3 = 0;
9554 }
9555else
9556 {
9557 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9558 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9559 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9560 }
9561
9562OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9563OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9564OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9565OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9566
9567SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9568
9569/* Needed to save important temporary registers. */
9570OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9571/* SLJIT_R0 = arguments */
9572OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9573GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
9574sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
9575OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9576free_stack(common, callout_arg_size);
9577
9578/* Check return value. */
9579OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
9580add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9581if (common->abort_label == NULL)
9582 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9583else
9584 JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9585return cc + callout_length;
9586}
9587
9588#undef CALLOUT_ARG_SIZE
9589#undef CALLOUT_ARG_OFFSET
9590
9591static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9592{
9593while (TRUE)
9594 {
9595 switch (*cc)
9596 {
9597 case OP_CALLOUT_STR:
9598 cc += GET(cc, 1 + 2*LINK_SIZE);
9599 break;
9600
9601 case OP_NOT_WORD_BOUNDARY:
9602 case OP_WORD_BOUNDARY:
9603 case OP_CIRC:
9604 case OP_CIRCM:
9605 case OP_DOLL:
9606 case OP_DOLLM:
9607 case OP_CALLOUT:
9608 case OP_ALT:
9609 cc += PRIV(OP_lengths)[*cc];
9610 break;
9611
9612 case OP_KET:
9613 return FALSE;
9614
9615 default:
9616 return TRUE;
9617 }
9618 }
9619}
9620
9621static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9622{
9623DEFINE_COMPILER;
9624int framesize;
9625int extrasize;
9626BOOL local_quit_available = FALSE;
9627BOOL needs_control_head;
9628int private_data_ptr;
9629backtrack_common altbacktrack;
9630PCRE2_SPTR ccbegin;
9631PCRE2_UCHAR opcode;
9632PCRE2_UCHAR bra = OP_BRA;
9633jump_list *tmp = NULL;
9634jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9635jump_list **found;
9636/* Saving previous accept variables. */
9637BOOL save_local_quit_available = common->local_quit_available;
9638BOOL save_in_positive_assertion = common->in_positive_assertion;
9639then_trap_backtrack *save_then_trap = common->then_trap;
9640struct sljit_label *save_quit_label = common->quit_label;
9641struct sljit_label *save_accept_label = common->accept_label;
9642jump_list *save_quit = common->quit;
9643jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9644jump_list *save_accept = common->accept;
9645struct sljit_jump *jump;
9646struct sljit_jump *brajump = NULL;
9647
9648/* Assert captures then. */
9649common->then_trap = NULL;
9650
9651if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9652 {
9653 SLJIT_ASSERT(!conditional);
9654 bra = *cc;
9655 cc++;
9656 }
9657private_data_ptr = PRIVATE_DATA(cc);
9658SLJIT_ASSERT(private_data_ptr != 0);
9659framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9660backtrack->framesize = framesize;
9661backtrack->private_data_ptr = private_data_ptr;
9662opcode = *cc;
9663SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9664found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9665ccbegin = cc;
9666cc += GET(cc, 1);
9667
9668if (bra == OP_BRAMINZERO)
9669 {
9670 /* This is a braminzero backtrack path. */
9671 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9672 free_stack(common, 1);
9673 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9674 }
9675
9676if (framesize < 0)
9677 {
9678 extrasize = 1;
9679 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9680 extrasize = 0;
9681
9682 if (needs_control_head)
9683 extrasize++;
9684
9685 if (framesize == no_frame)
9686 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9687
9688 if (extrasize > 0)
9689 allocate_stack(common, extrasize);
9690
9691 if (needs_control_head)
9692 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9693
9694 if (extrasize > 0)
9695 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9696
9697 if (needs_control_head)
9698 {
9699 SLJIT_ASSERT(extrasize == 2);
9700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9702 }
9703 }
9704else
9705 {
9706 extrasize = needs_control_head ? 3 : 2;
9707 allocate_stack(common, framesize + extrasize);
9708
9709 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9710 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9711 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9712 if (needs_control_head)
9713 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9714 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9715
9716 if (needs_control_head)
9717 {
9718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9719 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9720 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9721 }
9722 else
9723 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9724
9725 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9726 }
9727
9728memset(&altbacktrack, 0, sizeof(backtrack_common));
9729if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9730 {
9731 /* Control verbs cannot escape from these asserts. */
9732 local_quit_available = TRUE;
9733 common->local_quit_available = TRUE;
9734 common->quit_label = NULL;
9735 common->quit = NULL;
9736 }
9737
9738common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9739common->positive_assertion_quit = NULL;
9740
9741while (1)
9742 {
9743 common->accept_label = NULL;
9744 common->accept = NULL;
9745 altbacktrack.top = NULL;
9746 altbacktrack.topbacktracks = NULL;
9747
9748 if (*ccbegin == OP_ALT && extrasize > 0)
9749 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9750
9751 altbacktrack.cc = ccbegin;
9752 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9753 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9754 {
9755 if (local_quit_available)
9756 {
9757 common->local_quit_available = save_local_quit_available;
9758 common->quit_label = save_quit_label;
9759 common->quit = save_quit;
9760 }
9761 common->in_positive_assertion = save_in_positive_assertion;
9762 common->then_trap = save_then_trap;
9763 common->accept_label = save_accept_label;
9764 common->positive_assertion_quit = save_positive_assertion_quit;
9765 common->accept = save_accept;
9766 return NULL;
9767 }
9768 common->accept_label = LABEL();
9769 if (common->accept != NULL)
9770 set_jumps(common->accept, common->accept_label);
9771
9772 /* Reset stack. */
9773 if (framesize < 0)
9774 {
9775 if (framesize == no_frame)
9776 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9777 else if (extrasize > 0)
9778 free_stack(common, extrasize);
9779
9780 if (needs_control_head)
9781 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9782 }
9783 else
9784 {
9785 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
9786 {
9787 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9788 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9789 if (needs_control_head)
9790 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
9791 }
9792 else
9793 {
9794 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9795 if (needs_control_head)
9796 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
9797 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9798 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9799 }
9800 }
9801
9802 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
9803 {
9804 /* We know that STR_PTR was stored on the top of the stack. */
9805 if (conditional)
9806 {
9807 if (extrasize > 0)
9808 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
9809 }
9810 else if (bra == OP_BRAZERO)
9811 {
9812 if (framesize < 0)
9813 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9814 else
9815 {
9816 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9817 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
9818 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9819 }
9820 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9821 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9822 }
9823 else if (framesize >= 0)
9824 {
9825 /* For OP_BRA and OP_BRAMINZERO. */
9826 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
9827 }
9828 }
9829 add_jump(compiler, found, JUMP(SLJIT_JUMP));
9830
9831 compile_backtrackingpath(common, altbacktrack.top);
9832 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9833 {
9834 if (local_quit_available)
9835 {
9836 common->local_quit_available = save_local_quit_available;
9837 common->quit_label = save_quit_label;
9838 common->quit = save_quit;
9839 }
9840 common->in_positive_assertion = save_in_positive_assertion;
9841 common->then_trap = save_then_trap;
9842 common->accept_label = save_accept_label;
9843 common->positive_assertion_quit = save_positive_assertion_quit;
9844 common->accept = save_accept;
9845 return NULL;
9846 }
9847 set_jumps(altbacktrack.topbacktracks, LABEL());
9848
9849 if (*cc != OP_ALT)
9850 break;
9851
9852 ccbegin = cc;
9853 cc += GET(cc, 1);
9854 }
9855
9856if (local_quit_available)
9857 {
9858 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
9859 /* Makes the check less complicated below. */
9860 common->positive_assertion_quit = common->quit;
9861 }
9862
9863/* None of them matched. */
9864if (common->positive_assertion_quit != NULL)
9865 {
9866 jump = JUMP(SLJIT_JUMP);
9867 set_jumps(common->positive_assertion_quit, LABEL());
9868 SLJIT_ASSERT(framesize != no_stack);
9869 if (framesize < 0)
9870 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
9871 else
9872 {
9873 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9874 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9875 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
9876 }
9877 JUMPHERE(jump);
9878 }
9879
9880if (needs_control_head)
9881 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
9882
9883if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
9884 {
9885 /* Assert is failed. */
9886 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
9887 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9888
9889 if (framesize < 0)
9890 {
9891 /* The topmost item should be 0. */
9892 if (bra == OP_BRAZERO)
9893 {
9894 if (extrasize == 2)
9895 free_stack(common, 1);
9896 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9897 }
9898 else if (extrasize > 0)
9899 free_stack(common, extrasize);
9900 }
9901 else
9902 {
9903 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
9904 /* The topmost item should be 0. */
9905 if (bra == OP_BRAZERO)
9906 {
9907 free_stack(common, framesize + extrasize - 1);
9908 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9909 }
9910 else
9911 free_stack(common, framesize + extrasize);
9912 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9913 }
9914 jump = JUMP(SLJIT_JUMP);
9915 if (bra != OP_BRAZERO)
9916 add_jump(compiler, target, jump);
9917
9918 /* Assert is successful. */
9919 set_jumps(tmp, LABEL());
9920 if (framesize < 0)
9921 {
9922 /* We know that STR_PTR was stored on the top of the stack. */
9923 if (extrasize > 0)
9924 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
9925
9926 /* Keep the STR_PTR on the top of the stack. */
9927 if (bra == OP_BRAZERO)
9928 {
9929 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9930 if (extrasize == 2)
9931 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9932 }
9933 else if (bra == OP_BRAMINZERO)
9934 {
9935 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9936 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9937 }
9938 }
9939 else
9940 {
9941 if (bra == OP_BRA)
9942 {
9943 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9944 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
9945 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
9946 }
9947 else
9948 {
9949 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
9950 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
9951 if (extrasize == 2)
9952 {
9953 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9954 if (bra == OP_BRAMINZERO)
9955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9956 }
9957 else
9958 {
9959 SLJIT_ASSERT(extrasize == 3);
9960 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
9961 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
9962 }
9963 }
9964 }
9965
9966 if (bra == OP_BRAZERO)
9967 {
9968 backtrack->matchingpath = LABEL();
9969 SET_LABEL(jump, backtrack->matchingpath);
9970 }
9971 else if (bra == OP_BRAMINZERO)
9972 {
9973 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
9974 JUMPHERE(brajump);
9975 if (framesize >= 0)
9976 {
9977 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9978 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
9979 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
9980 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
9981 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
9982 }
9983 set_jumps(backtrack->common.topbacktracks, LABEL());
9984 }
9985 }
9986else
9987 {
9988 /* AssertNot is successful. */
9989 if (framesize < 0)
9990 {
9991 if (extrasize > 0)
9992 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9993
9994 if (bra != OP_BRA)
9995 {
9996 if (extrasize == 2)
9997 free_stack(common, 1);
9998 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9999 }
10000 else if (extrasize > 0)
10001 free_stack(common, extrasize);
10002 }
10003 else
10004 {
10005 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10006 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10007 /* The topmost item should be 0. */
10008 if (bra != OP_BRA)
10009 {
10010 free_stack(common, framesize + extrasize - 1);
10011 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10012 }
10013 else
10014 free_stack(common, framesize + extrasize);
10015 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10016 }
10017
10018 if (bra == OP_BRAZERO)
10019 backtrack->matchingpath = LABEL();
10020 else if (bra == OP_BRAMINZERO)
10021 {
10022 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10023 JUMPHERE(brajump);
10024 }
10025
10026 if (bra != OP_BRA)
10027 {
10028 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10029 set_jumps(backtrack->common.topbacktracks, LABEL());
10030 backtrack->common.topbacktracks = NULL;
10031 }
10032 }
10033
10034if (local_quit_available)
10035 {
10036 common->local_quit_available = save_local_quit_available;
10037 common->quit_label = save_quit_label;
10038 common->quit = save_quit;
10039 }
10040common->in_positive_assertion = save_in_positive_assertion;
10041common->then_trap = save_then_trap;
10042common->accept_label = save_accept_label;
10043common->positive_assertion_quit = save_positive_assertion_quit;
10044common->accept = save_accept;
10045return cc + 1 + LINK_SIZE;
10046}
10047
10048static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10049{
10050DEFINE_COMPILER;
10051int stacksize;
10052
10053if (framesize < 0)
10054 {
10055 if (framesize == no_frame)
10056 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10057 else
10058 {
10059 stacksize = needs_control_head ? 1 : 0;
10060 if (ket != OP_KET || has_alternatives)
10061 stacksize++;
10062
10063 if (stacksize > 0)
10064 free_stack(common, stacksize);
10065 }
10066
10067 if (needs_control_head)
10068 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10069
10070 /* TMP2 which is set here used by OP_KETRMAX below. */
10071 if (ket == OP_KETRMAX)
10072 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10073 else if (ket == OP_KETRMIN)
10074 {
10075 /* Move the STR_PTR to the private_data_ptr. */
10076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10077 }
10078 }
10079else
10080 {
10081 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10082 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10083 if (needs_control_head)
10084 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10085
10086 if (ket == OP_KETRMAX)
10087 {
10088 /* TMP2 which is set here used by OP_KETRMAX below. */
10089 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10090 }
10091 }
10092if (needs_control_head)
10093 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10094}
10095
10096static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10097{
10098DEFINE_COMPILER;
10099
10100if (common->capture_last_ptr != 0)
10101 {
10102 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10103 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10104 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10105 stacksize++;
10106 }
10107if (common->optimized_cbracket[offset >> 1] == 0)
10108 {
10109 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10110 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10111 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10112 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10113 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10115 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10116 stacksize += 2;
10117 }
10118return stacksize;
10119}
10120
10121static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10122{
10123 if (PRIV(script_run)(ptr, endptr, FALSE))
10124 return endptr;
10125 return NULL;
10126}
10127
10128#ifdef SUPPORT_UNICODE
10129
10130static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10131{
10132 if (PRIV(script_run)(ptr, endptr, TRUE))
10133 return endptr;
10134 return NULL;
10135}
10136
10137#endif /* SUPPORT_UNICODE */
10138
10139static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10140{
10141DEFINE_COMPILER;
10142
10143SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10144
10145OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10146#ifdef SUPPORT_UNICODE
10147sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
10148 common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
10149#else
10150sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
10151#endif
10152
10153OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10154add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10155}
10156
10157/*
10158 Handling bracketed expressions is probably the most complex part.
10159
10160 Stack layout naming characters:
10161 S - Push the current STR_PTR
10162 0 - Push a 0 (NULL)
10163 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10164 before the next alternative. Not pushed if there are no alternatives.
10165 M - Any values pushed by the current alternative. Can be empty, or anything.
10166 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10167 L - Push the previous local (pointed by localptr) to the stack
10168 () - opional values stored on the stack
10169 ()* - optonal, can be stored multiple times
10170
10171 The following list shows the regular expression templates, their PCRE byte codes
10172 and stack layout supported by pcre-sljit.
10173
10174 (?:) OP_BRA | OP_KET A M
10175 () OP_CBRA | OP_KET C M
10176 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10177 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10178 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10179 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10180 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10181 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10182 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10183 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10184 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10185 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10186 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10187 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10188 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10189 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10190 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10191 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10192 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10193 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10194 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10195 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10196
10197
10198 Stack layout naming characters:
10199 A - Push the alternative index (starting from 0) on the stack.
10200 Not pushed if there is no alternatives.
10201 M - Any values pushed by the current alternative. Can be empty, or anything.
10202
10203 The next list shows the possible content of a bracket:
10204 (|) OP_*BRA | OP_ALT ... M A
10205 (?()|) OP_*COND | OP_ALT M A
10206 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10207 Or nothing, if trace is unnecessary
10208*/
10209
10210static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10211{
10212DEFINE_COMPILER;
10213backtrack_common *backtrack;
10214PCRE2_UCHAR opcode;
10215int private_data_ptr = 0;
10216int offset = 0;
10217int i, stacksize;
10218int repeat_ptr = 0, repeat_length = 0;
10219int repeat_type = 0, repeat_count = 0;
10220PCRE2_SPTR ccbegin;
10221PCRE2_SPTR matchingpath;
10222PCRE2_SPTR slot;
10223PCRE2_UCHAR bra = OP_BRA;
10224PCRE2_UCHAR ket;
10225assert_backtrack *assert;
10226BOOL has_alternatives;
10227BOOL needs_control_head = FALSE;
10228struct sljit_jump *jump;
10229struct sljit_jump *skip;
10230struct sljit_label *rmax_label = NULL;
10231struct sljit_jump *braminzero = NULL;
10232
10233PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10234
10235if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10236 {
10237 bra = *cc;
10238 cc++;
10239 opcode = *cc;
10240 }
10241
10242opcode = *cc;
10243ccbegin = cc;
10244matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10245ket = *matchingpath;
10246if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10247 {
10248 repeat_ptr = PRIVATE_DATA(matchingpath);
10249 repeat_length = PRIVATE_DATA(matchingpath + 1);
10250 repeat_type = PRIVATE_DATA(matchingpath + 2);
10251 repeat_count = PRIVATE_DATA(matchingpath + 3);
10252 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10253 if (repeat_type == OP_UPTO)
10254 ket = OP_KETRMAX;
10255 if (repeat_type == OP_MINUPTO)
10256 ket = OP_KETRMIN;
10257 }
10258
10259matchingpath = ccbegin + 1 + LINK_SIZE;
10260SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10261SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10262cc += GET(cc, 1);
10263
10264has_alternatives = *cc == OP_ALT;
10265if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10266 {
10267 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10268 compile_time_checks_must_be_grouped_together);
10269 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10270 }
10271
10272if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10273 opcode = OP_SCOND;
10274
10275if (opcode == OP_CBRA || opcode == OP_SCBRA)
10276 {
10277 /* Capturing brackets has a pre-allocated space. */
10278 offset = GET2(ccbegin, 1 + LINK_SIZE);
10279 if (common->optimized_cbracket[offset] == 0)
10280 {
10281 private_data_ptr = OVECTOR_PRIV(offset);
10282 offset <<= 1;
10283 }
10284 else
10285 {
10286 offset <<= 1;
10287 private_data_ptr = OVECTOR(offset);
10288 }
10289 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10290 matchingpath += IMM2_SIZE;
10291 }
10292else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10293 {
10294 /* Other brackets simply allocate the next entry. */
10295 private_data_ptr = PRIVATE_DATA(ccbegin);
10296 SLJIT_ASSERT(private_data_ptr != 0);
10297 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10298 if (opcode == OP_ONCE)
10299 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10300 }
10301
10302/* Instructions before the first alternative. */
10303stacksize = 0;
10304if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10305 stacksize++;
10306if (bra == OP_BRAZERO)
10307 stacksize++;
10308
10309if (stacksize > 0)
10310 allocate_stack(common, stacksize);
10311
10312stacksize = 0;
10313if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10314 {
10315 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10316 stacksize++;
10317 }
10318
10319if (bra == OP_BRAZERO)
10320 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10321
10322if (bra == OP_BRAMINZERO)
10323 {
10324 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10325 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10326 if (ket != OP_KETRMIN)
10327 {
10328 free_stack(common, 1);
10329 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10330 }
10331 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10332 {
10333 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10334 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10335 /* Nothing stored during the first run. */
10336 skip = JUMP(SLJIT_JUMP);
10337 JUMPHERE(jump);
10338 /* Checking zero-length iteration. */
10339 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10340 {
10341 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10342 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10343 }
10344 else
10345 {
10346 /* Except when the whole stack frame must be saved. */
10347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10348 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10349 }
10350 JUMPHERE(skip);
10351 }
10352 else
10353 {
10354 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10355 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10356 JUMPHERE(jump);
10357 }
10358 }
10359
10360if (repeat_type != 0)
10361 {
10362 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10363 if (repeat_type == OP_EXACT)
10364 rmax_label = LABEL();
10365 }
10366
10367if (ket == OP_KETRMIN)
10368 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10369
10370if (ket == OP_KETRMAX)
10371 {
10372 rmax_label = LABEL();
10373 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10374 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10375 }
10376
10377/* Handling capturing brackets and alternatives. */
10378if (opcode == OP_ONCE)
10379 {
10380 stacksize = 0;
10381 if (needs_control_head)
10382 {
10383 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10384 stacksize++;
10385 }
10386
10387 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10388 {
10389 /* Neither capturing brackets nor recursions are found in the block. */
10390 if (ket == OP_KETRMIN)
10391 {
10392 stacksize += 2;
10393 if (!needs_control_head)
10394 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10395 }
10396 else
10397 {
10398 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10399 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10400 if (ket == OP_KETRMAX || has_alternatives)
10401 stacksize++;
10402 }
10403
10404 if (stacksize > 0)
10405 allocate_stack(common, stacksize);
10406
10407 stacksize = 0;
10408 if (needs_control_head)
10409 {
10410 stacksize++;
10411 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10412 }
10413
10414 if (ket == OP_KETRMIN)
10415 {
10416 if (needs_control_head)
10417 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10419 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10420 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10421 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10422 }
10423 else if (ket == OP_KETRMAX || has_alternatives)
10424 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10425 }
10426 else
10427 {
10428 if (ket != OP_KET || has_alternatives)
10429 stacksize++;
10430
10431 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10432 allocate_stack(common, stacksize);
10433
10434 if (needs_control_head)
10435 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10436
10437 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10438 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10439
10440 stacksize = needs_control_head ? 1 : 0;
10441 if (ket != OP_KET || has_alternatives)
10442 {
10443 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10444 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10445 stacksize++;
10446 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10447 }
10448 else
10449 {
10450 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10451 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10452 }
10453 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10454 }
10455 }
10456else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10457 {
10458 /* Saving the previous values. */
10459 if (common->optimized_cbracket[offset >> 1] != 0)
10460 {
10461 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10462 allocate_stack(common, 2);
10463 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10464 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10465 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10466 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10467 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10468 }
10469 else
10470 {
10471 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10472 allocate_stack(common, 1);
10473 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10474 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10475 }
10476 }
10477else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10478 {
10479 /* Saving the previous value. */
10480 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10481 allocate_stack(common, 1);
10482 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10483 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10484 }
10485else if (has_alternatives)
10486 {
10487 /* Pushing the starting string pointer. */
10488 allocate_stack(common, 1);
10489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10490 }
10491
10492/* Generating code for the first alternative. */
10493if (opcode == OP_COND || opcode == OP_SCOND)
10494 {
10495 if (*matchingpath == OP_CREF)
10496 {
10497 SLJIT_ASSERT(has_alternatives);
10498 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10499 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10500 matchingpath += 1 + IMM2_SIZE;
10501 }
10502 else if (*matchingpath == OP_DNCREF)
10503 {
10504 SLJIT_ASSERT(has_alternatives);
10505
10506 i = GET2(matchingpath, 1 + IMM2_SIZE);
10507 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10508 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10509 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10510 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10511 slot += common->name_entry_size;
10512 i--;
10513 while (i-- > 0)
10514 {
10515 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10516 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10517 slot += common->name_entry_size;
10518 }
10519 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10520 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10521 matchingpath += 1 + 2 * IMM2_SIZE;
10522 }
10523 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10524 {
10525 /* Never has other case. */
10526 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10527 SLJIT_ASSERT(!has_alternatives);
10528
10529 if (*matchingpath == OP_TRUE)
10530 {
10531 stacksize = 1;
10532 matchingpath++;
10533 }
10534 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10535 stacksize = 0;
10536 else if (*matchingpath == OP_RREF)
10537 {
10538 stacksize = GET2(matchingpath, 1);
10539 if (common->currententry == NULL)
10540 stacksize = 0;
10541 else if (stacksize == RREF_ANY)
10542 stacksize = 1;
10543 else if (common->currententry->start == 0)
10544 stacksize = stacksize == 0;
10545 else
10546 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10547
10548 if (stacksize != 0)
10549 matchingpath += 1 + IMM2_SIZE;
10550 }
10551 else
10552 {
10553 if (common->currententry == NULL || common->currententry->start == 0)
10554 stacksize = 0;
10555 else
10556 {
10557 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10558 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10559 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10560 while (stacksize > 0)
10561 {
10562 if ((int)GET2(slot, 0) == i)
10563 break;
10564 slot += common->name_entry_size;
10565 stacksize--;
10566 }
10567 }
10568
10569 if (stacksize != 0)
10570 matchingpath += 1 + 2 * IMM2_SIZE;
10571 }
10572
10573 /* The stacksize == 0 is a common "else" case. */
10574 if (stacksize == 0)
10575 {
10576 if (*cc == OP_ALT)
10577 {
10578 matchingpath = cc + 1 + LINK_SIZE;
10579 cc += GET(cc, 1);
10580 }
10581 else
10582 matchingpath = cc;
10583 }
10584 }
10585 else
10586 {
10587 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10588 /* Similar code as PUSH_BACKTRACK macro. */
10589 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10590 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10591 return NULL;
10592 memset(assert, 0, sizeof(assert_backtrack));
10593 assert->common.cc = matchingpath;
10594 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10595 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10596 }
10597 }
10598
10599compile_matchingpath(common, matchingpath, cc, backtrack);
10600if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10601 return NULL;
10602
10603if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10604 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10605
10606if (opcode == OP_ONCE)
10607 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10608
10609if (opcode == OP_SCRIPT_RUN)
10610 match_script_run_common(common, private_data_ptr, backtrack);
10611
10612stacksize = 0;
10613if (repeat_type == OP_MINUPTO)
10614 {
10615 /* We need to preserve the counter. TMP2 will be used below. */
10616 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10617 stacksize++;
10618 }
10619if (ket != OP_KET || bra != OP_BRA)
10620 stacksize++;
10621if (offset != 0)
10622 {
10623 if (common->capture_last_ptr != 0)
10624 stacksize++;
10625 if (common->optimized_cbracket[offset >> 1] == 0)
10626 stacksize += 2;
10627 }
10628if (has_alternatives && opcode != OP_ONCE)
10629 stacksize++;
10630
10631if (stacksize > 0)
10632 allocate_stack(common, stacksize);
10633
10634stacksize = 0;
10635if (repeat_type == OP_MINUPTO)
10636 {
10637 /* TMP2 was set above. */
10638 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10639 stacksize++;
10640 }
10641
10642if (ket != OP_KET || bra != OP_BRA)
10643 {
10644 if (ket != OP_KET)
10645 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10646 else
10647 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10648 stacksize++;
10649 }
10650
10651if (offset != 0)
10652 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10653
10654/* Skip and count the other alternatives. */
10655i = 1;
10656while (*cc == OP_ALT)
10657 {
10658 cc += GET(cc, 1);
10659 i++;
10660 }
10661
10662if (has_alternatives)
10663 {
10664 if (opcode != OP_ONCE)
10665 {
10666 if (i <= 3)
10667 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10668 else
10669 BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10670 }
10671 if (ket != OP_KETRMAX)
10672 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10673 }
10674
10675/* Must be after the matchingpath label. */
10676if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10677 {
10678 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10680 }
10681
10682if (ket == OP_KETRMAX)
10683 {
10684 if (repeat_type != 0)
10685 {
10686 if (has_alternatives)
10687 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10688 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10689 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10690 /* Drop STR_PTR for greedy plus quantifier. */
10691 if (opcode != OP_ONCE)
10692 free_stack(common, 1);
10693 }
10694 else if (opcode < OP_BRA || opcode >= OP_SBRA)
10695 {
10696 if (has_alternatives)
10697 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10698
10699 /* Checking zero-length iteration. */
10700 if (opcode != OP_ONCE)
10701 {
10702 /* This case includes opcodes such as OP_SCRIPT_RUN. */
10703 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10704 /* Drop STR_PTR for greedy plus quantifier. */
10705 if (bra != OP_BRAZERO)
10706 free_stack(common, 1);
10707 }
10708 else
10709 /* TMP2 must contain the starting STR_PTR. */
10710 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10711 }
10712 else
10713 JUMPTO(SLJIT_JUMP, rmax_label);
10714 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10715 }
10716
10717if (repeat_type == OP_EXACT)
10718 {
10719 count_match(common);
10720 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10721 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10722 }
10723else if (repeat_type == OP_UPTO)
10724 {
10725 /* We need to preserve the counter. */
10726 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10727 allocate_stack(common, 1);
10728 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10729 }
10730
10731if (bra == OP_BRAZERO)
10732 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10733
10734if (bra == OP_BRAMINZERO)
10735 {
10736 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10737 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10738 if (braminzero != NULL)
10739 {
10740 JUMPHERE(braminzero);
10741 /* We need to release the end pointer to perform the
10742 backtrack for the zero-length iteration. When
10743 framesize is < 0, OP_ONCE will do the release itself. */
10744 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10745 {
10746 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10747 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10748 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10749 }
10750 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10751 free_stack(common, 1);
10752 }
10753 /* Continue to the normal backtrack. */
10754 }
10755
10756if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10757 count_match(common);
10758
10759cc += 1 + LINK_SIZE;
10760
10761if (opcode == OP_ONCE)
10762 {
10763 /* We temporarily encode the needs_control_head in the lowest bit.
10764 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
10765 the same value for small signed numbers (including negative numbers). */
10766 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
10767 }
10768return cc + repeat_length;
10769}
10770
10771static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10772{
10773DEFINE_COMPILER;
10774backtrack_common *backtrack;
10775PCRE2_UCHAR opcode;
10776int private_data_ptr;
10777int cbraprivptr = 0;
10778BOOL needs_control_head;
10779int framesize;
10780int stacksize;
10781int offset = 0;
10782BOOL zero = FALSE;
10783PCRE2_SPTR ccbegin = NULL;
10784int stack; /* Also contains the offset of control head. */
10785struct sljit_label *loop = NULL;
10786struct jump_list *emptymatch = NULL;
10787
10788PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
10789if (*cc == OP_BRAPOSZERO)
10790 {
10791 zero = TRUE;
10792 cc++;
10793 }
10794
10795opcode = *cc;
10796private_data_ptr = PRIVATE_DATA(cc);
10797SLJIT_ASSERT(private_data_ptr != 0);
10798BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
10799switch(opcode)
10800 {
10801 case OP_BRAPOS:
10802 case OP_SBRAPOS:
10803 ccbegin = cc + 1 + LINK_SIZE;
10804 break;
10805
10806 case OP_CBRAPOS:
10807 case OP_SCBRAPOS:
10808 offset = GET2(cc, 1 + LINK_SIZE);
10809 /* This case cannot be optimized in the same was as
10810 normal capturing brackets. */
10811 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
10812 cbraprivptr = OVECTOR_PRIV(offset);
10813 offset <<= 1;
10814 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
10815 break;
10816
10817 default:
10818 SLJIT_UNREACHABLE();
10819 break;
10820 }
10821
10822framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
10823BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
10824if (framesize < 0)
10825 {
10826 if (offset != 0)
10827 {
10828 stacksize = 2;
10829 if (common->capture_last_ptr != 0)
10830 stacksize++;
10831 }
10832 else
10833 stacksize = 1;
10834
10835 if (needs_control_head)
10836 stacksize++;
10837 if (!zero)
10838 stacksize++;
10839
10840 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10841 allocate_stack(common, stacksize);
10842 if (framesize == no_frame)
10843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10844
10845 stack = 0;
10846 if (offset != 0)
10847 {
10848 stack = 2;
10849 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10850 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10851 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10852 if (common->capture_last_ptr != 0)
10853 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10854 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10855 if (needs_control_head)
10856 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10857 if (common->capture_last_ptr != 0)
10858 {
10859 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
10860 stack = 3;
10861 }
10862 }
10863 else
10864 {
10865 if (needs_control_head)
10866 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10867 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10868 stack = 1;
10869 }
10870
10871 if (needs_control_head)
10872 stack++;
10873 if (!zero)
10874 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
10875 if (needs_control_head)
10876 {
10877 stack--;
10878 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10879 }
10880 }
10881else
10882 {
10883 stacksize = framesize + 1;
10884 if (!zero)
10885 stacksize++;
10886 if (needs_control_head)
10887 stacksize++;
10888 if (offset == 0)
10889 stacksize++;
10890 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
10891
10892 allocate_stack(common, stacksize);
10893 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10894 if (needs_control_head)
10895 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10896 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10897
10898 stack = 0;
10899 if (!zero)
10900 {
10901 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
10902 stack = 1;
10903 }
10904 if (needs_control_head)
10905 {
10906 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
10907 stack++;
10908 }
10909 if (offset == 0)
10910 {
10911 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
10912 stack++;
10913 }
10914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
10915 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
10916 stack -= 1 + (offset == 0);
10917 }
10918
10919if (offset != 0)
10920 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10921
10922loop = LABEL();
10923while (*cc != OP_KETRPOS)
10924 {
10925 backtrack->top = NULL;
10926 backtrack->topbacktracks = NULL;
10927 cc += GET(cc, 1);
10928
10929 compile_matchingpath(common, ccbegin, cc, backtrack);
10930 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10931 return NULL;
10932
10933 if (framesize < 0)
10934 {
10935 if (framesize == no_frame)
10936 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10937
10938 if (offset != 0)
10939 {
10940 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10941 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10942 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10943 if (common->capture_last_ptr != 0)
10944 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10945 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10946 }
10947 else
10948 {
10949 if (opcode == OP_SBRAPOS)
10950 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10951 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10952 }
10953
10954 /* Even if the match is empty, we need to reset the control head. */
10955 if (needs_control_head)
10956 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10957
10958 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10959 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10960
10961 if (!zero)
10962 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10963 }
10964 else
10965 {
10966 if (offset != 0)
10967 {
10968 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
10970 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10971 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
10972 if (common->capture_last_ptr != 0)
10973 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10974 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10975 }
10976 else
10977 {
10978 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10979 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10980 if (opcode == OP_SBRAPOS)
10981 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
10982 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
10983 }
10984
10985 /* Even if the match is empty, we need to reset the control head. */
10986 if (needs_control_head)
10987 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
10988
10989 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
10990 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
10991
10992 if (!zero)
10993 {
10994 if (framesize < 0)
10995 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
10996 else
10997 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10998 }
10999 }
11000
11001 JUMPTO(SLJIT_JUMP, loop);
11002 flush_stubs(common);
11003
11004 compile_backtrackingpath(common, backtrack->top);
11005 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11006 return NULL;
11007 set_jumps(backtrack->topbacktracks, LABEL());
11008
11009 if (framesize < 0)
11010 {
11011 if (offset != 0)
11012 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11013 else
11014 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11015 }
11016 else
11017 {
11018 if (offset != 0)
11019 {
11020 /* Last alternative. */
11021 if (*cc == OP_KETRPOS)
11022 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11023 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11024 }
11025 else
11026 {
11027 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11028 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11029 }
11030 }
11031
11032 if (*cc == OP_KETRPOS)
11033 break;
11034 ccbegin = cc + 1 + LINK_SIZE;
11035 }
11036
11037/* We don't have to restore the control head in case of a failed match. */
11038
11039backtrack->topbacktracks = NULL;
11040if (!zero)
11041 {
11042 if (framesize < 0)
11043 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11044 else /* TMP2 is set to [private_data_ptr] above. */
11045 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11046 }
11047
11048/* None of them matched. */
11049set_jumps(emptymatch, LABEL());
11050count_match(common);
11051return cc + 1 + LINK_SIZE;
11052}
11053
11054static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11055{
11056int class_len;
11057
11058*opcode = *cc;
11059*exact = 0;
11060
11061if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11062 {
11063 cc++;
11064 *type = OP_CHAR;
11065 }
11066else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11067 {
11068 cc++;
11069 *type = OP_CHARI;
11070 *opcode -= OP_STARI - OP_STAR;
11071 }
11072else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11073 {
11074 cc++;
11075 *type = OP_NOT;
11076 *opcode -= OP_NOTSTAR - OP_STAR;
11077 }
11078else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11079 {
11080 cc++;
11081 *type = OP_NOTI;
11082 *opcode -= OP_NOTSTARI - OP_STAR;
11083 }
11084else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11085 {
11086 cc++;
11087 *opcode -= OP_TYPESTAR - OP_STAR;
11088 *type = OP_END;
11089 }
11090else
11091 {
11092 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11093 *type = *opcode;
11094 cc++;
11095 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11096 *opcode = cc[class_len - 1];
11097
11098 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11099 {
11100 *opcode -= OP_CRSTAR - OP_STAR;
11101 *end = cc + class_len;
11102
11103 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11104 {
11105 *exact = 1;
11106 *opcode -= OP_PLUS - OP_STAR;
11107 }
11108 }
11109 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11110 {
11111 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11112 *end = cc + class_len;
11113
11114 if (*opcode == OP_POSPLUS)
11115 {
11116 *exact = 1;
11117 *opcode = OP_POSSTAR;
11118 }
11119 }
11120 else
11121 {
11122 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11123 *max = GET2(cc, (class_len + IMM2_SIZE));
11124 *exact = GET2(cc, class_len);
11125
11126 if (*max == 0)
11127 {
11128 if (*opcode == OP_CRPOSRANGE)
11129 *opcode = OP_POSSTAR;
11130 else
11131 *opcode -= OP_CRRANGE - OP_STAR;
11132 }
11133 else
11134 {
11135 *max -= *exact;
11136 if (*max == 0)
11137 *opcode = OP_EXACT;
11138 else if (*max == 1)
11139 {
11140 if (*opcode == OP_CRPOSRANGE)
11141 *opcode = OP_POSQUERY;
11142 else
11143 *opcode -= OP_CRRANGE - OP_QUERY;
11144 }
11145 else
11146 {
11147 if (*opcode == OP_CRPOSRANGE)
11148 *opcode = OP_POSUPTO;
11149 else
11150 *opcode -= OP_CRRANGE - OP_UPTO;
11151 }
11152 }
11153 *end = cc + class_len + 2 * IMM2_SIZE;
11154 }
11155 return cc;
11156 }
11157
11158switch(*opcode)
11159 {
11160 case OP_EXACT:
11161 *exact = GET2(cc, 0);
11162 cc += IMM2_SIZE;
11163 break;
11164
11165 case OP_PLUS:
11166 case OP_MINPLUS:
11167 *exact = 1;
11168 *opcode -= OP_PLUS - OP_STAR;
11169 break;
11170
11171 case OP_POSPLUS:
11172 *exact = 1;
11173 *opcode = OP_POSSTAR;
11174 break;
11175
11176 case OP_UPTO:
11177 case OP_MINUPTO:
11178 case OP_POSUPTO:
11179 *max = GET2(cc, 0);
11180 cc += IMM2_SIZE;
11181 break;
11182 }
11183
11184if (*type == OP_END)
11185 {
11186 *type = *cc;
11187 *end = next_opcode(common, cc);
11188 cc++;
11189 return cc;
11190 }
11191
11192*end = cc + 1;
11193#ifdef SUPPORT_UNICODE
11194if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11195#endif
11196return cc;
11197}
11198
11199static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11200{
11201DEFINE_COMPILER;
11202backtrack_common *backtrack;
11203PCRE2_UCHAR opcode;
11204PCRE2_UCHAR type;
11205sljit_u32 max = 0, exact;
11206sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11207sljit_s32 early_fail_type;
11208BOOL charpos_enabled;
11209PCRE2_UCHAR charpos_char;
11210unsigned int charpos_othercasebit;
11211PCRE2_SPTR end;
11212jump_list *no_match = NULL;
11213jump_list *no_char1_match = NULL;
11214struct sljit_jump *jump = NULL;
11215struct sljit_label *label;
11216int private_data_ptr = PRIVATE_DATA(cc);
11217int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11218int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11219int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11220int tmp_base, tmp_offset;
11221#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11222BOOL use_tmp;
11223#endif
11224
11225PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11226
11227early_fail_type = (early_fail_ptr & 0x7);
11228early_fail_ptr >>= 3;
11229
11230/* During recursion, these optimizations are disabled. */
11231if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11232 {
11233 early_fail_ptr = 0;
11234 early_fail_type = type_skip;
11235 }
11236
11237SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11238 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11239
11240if (early_fail_type == type_fail)
11241 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11242
11243cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11244
11245if (type != OP_EXTUNI)
11246 {
11247 tmp_base = TMP3;
11248 tmp_offset = 0;
11249 }
11250else
11251 {
11252 tmp_base = SLJIT_MEM1(SLJIT_SP);
11253 tmp_offset = POSSESSIVE0;
11254 }
11255
11256/* Handle fixed part first. */
11257if (exact > 1)
11258 {
11259 SLJIT_ASSERT(early_fail_ptr == 0);
11260
11261 if (common->mode == PCRE2_JIT_COMPLETE
11262#ifdef SUPPORT_UNICODE
11263 && !common->utf
11264#endif
11265 && type != OP_ANYNL && type != OP_EXTUNI)
11266 {
11267 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11268 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11269 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11270 label = LABEL();
11271 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11272 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11273 JUMPTO(SLJIT_NOT_ZERO, label);
11274 }
11275 else
11276 {
11277 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11278 label = LABEL();
11279 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11280 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11281 JUMPTO(SLJIT_NOT_ZERO, label);
11282 }
11283 }
11284else if (exact == 1)
11285 {
11286 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11287
11288 if (early_fail_type == type_fail_range)
11289 {
11290 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11291 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11292 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11293 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11294 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11295
11296 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11297 }
11298 }
11299
11300switch(opcode)
11301 {
11302 case OP_STAR:
11303 case OP_UPTO:
11304 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11305
11306 if (type == OP_ANYNL || type == OP_EXTUNI)
11307 {
11308 SLJIT_ASSERT(private_data_ptr == 0);
11309 SLJIT_ASSERT(early_fail_ptr == 0);
11310
11311 allocate_stack(common, 2);
11312 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11313 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11314
11315 if (opcode == OP_UPTO)
11316 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11317
11318 label = LABEL();
11319 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11320 if (opcode == OP_UPTO)
11321 {
11322 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11323 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11324 jump = JUMP(SLJIT_ZERO);
11325 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11326 }
11327
11328 /* We cannot use TMP3 because of allocate_stack. */
11329 allocate_stack(common, 1);
11330 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11331 JUMPTO(SLJIT_JUMP, label);
11332 if (jump != NULL)
11333 JUMPHERE(jump);
11334 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11335 break;
11336 }
11337#ifdef SUPPORT_UNICODE
11338 else if (type == OP_ALLANY && !common->invalid_utf)
11339#else
11340 else if (type == OP_ALLANY)
11341#endif
11342 {
11343 if (opcode == OP_STAR)
11344 {
11345 if (private_data_ptr == 0)
11346 allocate_stack(common, 2);
11347
11348 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11349 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11350
11351 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11352 process_partial_match(common);
11353
11354 if (early_fail_ptr != 0)
11355 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11356 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11357 break;
11358 }
11359#ifdef SUPPORT_UNICODE
11360 else if (!common->utf)
11361#else
11362 else
11363#endif
11364 {
11365 if (private_data_ptr == 0)
11366 allocate_stack(common, 2);
11367
11368 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11369 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11370
11371 if (common->mode == PCRE2_JIT_COMPLETE)
11372 {
11373 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11374 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11375 }
11376 else
11377 {
11378 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11379 process_partial_match(common);
11380 JUMPHERE(jump);
11381 }
11382
11383 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11384
11385 if (early_fail_ptr != 0)
11386 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11387 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11388 break;
11389 }
11390 }
11391
11392 charpos_enabled = FALSE;
11393 charpos_char = 0;
11394 charpos_othercasebit = 0;
11395
11396 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11397 {
11398#ifdef SUPPORT_UNICODE
11399 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11400#else
11401 charpos_enabled = TRUE;
11402#endif
11403 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11404 {
11405 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11406 if (charpos_othercasebit == 0)
11407 charpos_enabled = FALSE;
11408 }
11409
11410 if (charpos_enabled)
11411 {
11412 charpos_char = end[1];
11413 /* Consume the OP_CHAR opcode. */
11414 end += 2;
11415#if PCRE2_CODE_UNIT_WIDTH == 8
11416 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11417#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11418 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11419 if ((charpos_othercasebit & 0x100) != 0)
11420 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11421#endif
11422 if (charpos_othercasebit != 0)
11423 charpos_char |= charpos_othercasebit;
11424
11425 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11426 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11427 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11428 }
11429 }
11430
11431 if (charpos_enabled)
11432 {
11433 if (opcode == OP_UPTO)
11434 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11435
11436 /* Search the first instance of charpos_char. */
11437 jump = JUMP(SLJIT_JUMP);
11438 label = LABEL();
11439 if (opcode == OP_UPTO)
11440 {
11441 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11442 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11443 }
11444 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11445 if (early_fail_ptr != 0)
11446 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11447 JUMPHERE(jump);
11448
11449 detect_partial_match(common, &backtrack->topbacktracks);
11450 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11451 if (charpos_othercasebit != 0)
11452 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11453 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11454
11455 if (private_data_ptr == 0)
11456 allocate_stack(common, 2);
11457 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11458 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11459
11460 if (opcode == OP_UPTO)
11461 {
11462 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11463 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11464 }
11465
11466 /* Search the last instance of charpos_char. */
11467 label = LABEL();
11468 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11469 if (early_fail_ptr != 0)
11470 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11471 detect_partial_match(common, &no_match);
11472 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11473 if (charpos_othercasebit != 0)
11474 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11475
11476 if (opcode == OP_STAR)
11477 {
11478 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11479 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11480 JUMPTO(SLJIT_JUMP, label);
11481 }
11482 else
11483 {
11484 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11485 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11486 JUMPHERE(jump);
11487 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11488 JUMPTO(SLJIT_NOT_ZERO, label);
11489 }
11490
11491 set_jumps(no_match, LABEL());
11492 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11493 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11494 }
11495 else
11496 {
11497 if (private_data_ptr == 0)
11498 allocate_stack(common, 2);
11499
11500 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11501#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11502 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11503 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11504
11505 if (common->utf)
11506 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11507#endif
11508 if (opcode == OP_UPTO)
11509 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11510
11511 detect_partial_match(common, &no_match);
11512 label = LABEL();
11513 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11514#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11515 if (common->utf)
11516 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11517#endif
11518
11519 if (opcode == OP_UPTO)
11520 {
11521 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11522 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11523 }
11524
11525 detect_partial_match_to(common, label);
11526 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11527
11528 set_jumps(no_char1_match, LABEL());
11529#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11530 if (common->utf)
11531 {
11532 set_jumps(no_match, LABEL());
11533 if (use_tmp)
11534 {
11535 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11536 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11537 }
11538 else
11539 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11540 }
11541 else
11542#endif
11543 {
11544 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11545 set_jumps(no_match, LABEL());
11546 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11547 }
11548
11549 if (early_fail_ptr != 0)
11550 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11551 }
11552
11553 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11554 break;
11555
11556 case OP_MINSTAR:
11557 if (private_data_ptr == 0)
11558 allocate_stack(common, 1);
11559 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11560 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11561 if (early_fail_ptr != 0)
11562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11563 break;
11564
11565 case OP_MINUPTO:
11566 SLJIT_ASSERT(early_fail_ptr == 0);
11567 if (private_data_ptr == 0)
11568 allocate_stack(common, 2);
11569 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11570 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11571 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11572 break;
11573
11574 case OP_QUERY:
11575 case OP_MINQUERY:
11576 SLJIT_ASSERT(early_fail_ptr == 0);
11577 if (private_data_ptr == 0)
11578 allocate_stack(common, 1);
11579 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11580 if (opcode == OP_QUERY)
11581 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11582 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11583 break;
11584
11585 case OP_EXACT:
11586 break;
11587
11588 case OP_POSSTAR:
11589#if defined SUPPORT_UNICODE
11590 if (type == OP_ALLANY && !common->invalid_utf)
11591#else
11592 if (type == OP_ALLANY)
11593#endif
11594 {
11595 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11596 process_partial_match(common);
11597 if (early_fail_ptr != 0)
11598 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11599 break;
11600 }
11601
11602#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11603 if (common->utf)
11604 {
11605 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11606 detect_partial_match(common, &no_match);
11607 label = LABEL();
11608 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11609 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11610 detect_partial_match_to(common, label);
11611
11612 set_jumps(no_match, LABEL());
11613 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11614 if (early_fail_ptr != 0)
11615 {
11616 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11617 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11618 else
11619 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11620 }
11621 break;
11622 }
11623#endif
11624
11625 detect_partial_match(common, &no_match);
11626 label = LABEL();
11627 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11628 detect_partial_match_to(common, label);
11629 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11630
11631 set_jumps(no_char1_match, LABEL());
11632 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11633 set_jumps(no_match, LABEL());
11634 if (early_fail_ptr != 0)
11635 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11636 break;
11637
11638 case OP_POSUPTO:
11639 SLJIT_ASSERT(early_fail_ptr == 0);
11640#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11641 if (common->utf)
11642 {
11643 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11644 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11645
11646 detect_partial_match(common, &no_match);
11647 label = LABEL();
11648 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11649 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11650 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11651 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11652 detect_partial_match_to(common, label);
11653
11654 set_jumps(no_match, LABEL());
11655 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11656 break;
11657 }
11658#endif
11659
11660 if (type == OP_ALLANY)
11661 {
11662 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11663
11664 if (common->mode == PCRE2_JIT_COMPLETE)
11665 {
11666 OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
11667 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11668 }
11669 else
11670 {
11671 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11672 process_partial_match(common);
11673 JUMPHERE(jump);
11674 }
11675 break;
11676 }
11677
11678 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11679
11680 detect_partial_match(common, &no_match);
11681 label = LABEL();
11682 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11683 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11684 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11685 detect_partial_match_to(common, label);
11686 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11687
11688 set_jumps(no_char1_match, LABEL());
11689 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11690 set_jumps(no_match, LABEL());
11691 break;
11692
11693 case OP_POSQUERY:
11694 SLJIT_ASSERT(early_fail_ptr == 0);
11695 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11696 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11697 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11698 set_jumps(no_match, LABEL());
11699 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11700 break;
11701
11702 default:
11703 SLJIT_UNREACHABLE();
11704 break;
11705 }
11706
11707count_match(common);
11708return end;
11709}
11710
11711static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11712{
11713DEFINE_COMPILER;
11714backtrack_common *backtrack;
11715
11716PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11717
11718if (*cc == OP_FAIL)
11719 {
11720 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11721 return cc + 1;
11722 }
11723
11724if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11725 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11726
11727if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11728 {
11729 /* No need to check notempty conditions. */
11730 if (common->accept_label == NULL)
11731 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11732 else
11733 JUMPTO(SLJIT_JUMP, common->accept_label);
11734 return cc + 1;
11735 }
11736
11737if (common->accept_label == NULL)
11738 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11739else
11740 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11741
11742if (HAS_VIRTUAL_REGISTERS)
11743 {
11744 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11745 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11746 }
11747else
11748 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11749
11750OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
11751add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
11752OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
11753if (common->accept_label == NULL)
11754 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11755else
11756 JUMPTO(SLJIT_ZERO, common->accept_label);
11757
11758OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11759if (common->accept_label == NULL)
11760 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11761else
11762 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11763add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11764return cc + 1;
11765}
11766
11767static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
11768{
11769DEFINE_COMPILER;
11770int offset = GET2(cc, 1);
11771BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
11772
11773/* Data will be discarded anyway... */
11774if (common->currententry != NULL)
11775 return cc + 1 + IMM2_SIZE;
11776
11777if (!optimized_cbracket)
11778 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
11779offset <<= 1;
11780OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11781if (!optimized_cbracket)
11782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11783return cc + 1 + IMM2_SIZE;
11784}
11785
11786static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11787{
11788DEFINE_COMPILER;
11789backtrack_common *backtrack;
11790PCRE2_UCHAR opcode = *cc;
11791PCRE2_SPTR ccend = cc + 1;
11792
11793if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
11794 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
11795 ccend += 2 + cc[1];
11796
11797PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11798
11799if (opcode == OP_SKIP)
11800 {
11801 allocate_stack(common, 1);
11802 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11803 return ccend;
11804 }
11805
11806if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
11807 {
11808 if (HAS_VIRTUAL_REGISTERS)
11809 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11810 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
11811 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
11812 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
11813 }
11814
11815return ccend;
11816}
11817
11818static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
11819
11820static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11821{
11822DEFINE_COMPILER;
11823backtrack_common *backtrack;
11824BOOL needs_control_head;
11825int size;
11826
11827PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
11828common->then_trap = BACKTRACK_AS(then_trap_backtrack);
11829BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
11830BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
11831BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
11832
11833size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11834size = 3 + (size < 0 ? 0 : size);
11835
11836OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11837allocate_stack(common, size);
11838if (size > 3)
11839 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
11840else
11841 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
11842OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
11843OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
11844OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
11845
11846size = BACKTRACK_AS(then_trap_backtrack)->framesize;
11847if (size >= 0)
11848 init_frame(common, cc, ccend, size - 1, 0);
11849}
11850
11851static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
11852{
11853DEFINE_COMPILER;
11854backtrack_common *backtrack;
11855BOOL has_then_trap = FALSE;
11856then_trap_backtrack *save_then_trap = NULL;
11857
11858SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
11859
11860if (common->has_then && common->then_offsets[cc - common->start] != 0)
11861 {
11862 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
11863 has_then_trap = TRUE;
11864 save_then_trap = common->then_trap;
11865 /* Tail item on backtrack. */
11866 compile_then_trap_matchingpath(common, cc, ccend, parent);
11867 }
11868
11869while (cc < ccend)
11870 {
11871 switch(*cc)
11872 {
11873 case OP_SOD:
11874 case OP_SOM:
11875 case OP_NOT_WORD_BOUNDARY:
11876 case OP_WORD_BOUNDARY:
11877 case OP_EODN:
11878 case OP_EOD:
11879 case OP_DOLL:
11880 case OP_DOLLM:
11881 case OP_CIRC:
11882 case OP_CIRCM:
11883 case OP_REVERSE:
11884 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11885 break;
11886
11887 case OP_NOT_DIGIT:
11888 case OP_DIGIT:
11889 case OP_NOT_WHITESPACE:
11890 case OP_WHITESPACE:
11891 case OP_NOT_WORDCHAR:
11892 case OP_WORDCHAR:
11893 case OP_ANY:
11894 case OP_ALLANY:
11895 case OP_ANYBYTE:
11896 case OP_NOTPROP:
11897 case OP_PROP:
11898 case OP_ANYNL:
11899 case OP_NOT_HSPACE:
11900 case OP_HSPACE:
11901 case OP_NOT_VSPACE:
11902 case OP_VSPACE:
11903 case OP_EXTUNI:
11904 case OP_NOT:
11905 case OP_NOTI:
11906 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11907 break;
11908
11909 case OP_SET_SOM:
11910 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
11911 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
11912 allocate_stack(common, 1);
11913 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
11914 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
11915 cc++;
11916 break;
11917
11918 case OP_CHAR:
11919 case OP_CHARI:
11920 if (common->mode == PCRE2_JIT_COMPLETE)
11921 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
11922 else
11923 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
11924 break;
11925
11926 case OP_STAR:
11927 case OP_MINSTAR:
11928 case OP_PLUS:
11929 case OP_MINPLUS:
11930 case OP_QUERY:
11931 case OP_MINQUERY:
11932 case OP_UPTO:
11933 case OP_MINUPTO:
11934 case OP_EXACT:
11935 case OP_POSSTAR:
11936 case OP_POSPLUS:
11937 case OP_POSQUERY:
11938 case OP_POSUPTO:
11939 case OP_STARI:
11940 case OP_MINSTARI:
11941 case OP_PLUSI:
11942 case OP_MINPLUSI:
11943 case OP_QUERYI:
11944 case OP_MINQUERYI:
11945 case OP_UPTOI:
11946 case OP_MINUPTOI:
11947 case OP_EXACTI:
11948 case OP_POSSTARI:
11949 case OP_POSPLUSI:
11950 case OP_POSQUERYI:
11951 case OP_POSUPTOI:
11952 case OP_NOTSTAR:
11953 case OP_NOTMINSTAR:
11954 case OP_NOTPLUS:
11955 case OP_NOTMINPLUS:
11956 case OP_NOTQUERY:
11957 case OP_NOTMINQUERY:
11958 case OP_NOTUPTO:
11959 case OP_NOTMINUPTO:
11960 case OP_NOTEXACT:
11961 case OP_NOTPOSSTAR:
11962 case OP_NOTPOSPLUS:
11963 case OP_NOTPOSQUERY:
11964 case OP_NOTPOSUPTO:
11965 case OP_NOTSTARI:
11966 case OP_NOTMINSTARI:
11967 case OP_NOTPLUSI:
11968 case OP_NOTMINPLUSI:
11969 case OP_NOTQUERYI:
11970 case OP_NOTMINQUERYI:
11971 case OP_NOTUPTOI:
11972 case OP_NOTMINUPTOI:
11973 case OP_NOTEXACTI:
11974 case OP_NOTPOSSTARI:
11975 case OP_NOTPOSPLUSI:
11976 case OP_NOTPOSQUERYI:
11977 case OP_NOTPOSUPTOI:
11978 case OP_TYPESTAR:
11979 case OP_TYPEMINSTAR:
11980 case OP_TYPEPLUS:
11981 case OP_TYPEMINPLUS:
11982 case OP_TYPEQUERY:
11983 case OP_TYPEMINQUERY:
11984 case OP_TYPEUPTO:
11985 case OP_TYPEMINUPTO:
11986 case OP_TYPEEXACT:
11987 case OP_TYPEPOSSTAR:
11988 case OP_TYPEPOSPLUS:
11989 case OP_TYPEPOSQUERY:
11990 case OP_TYPEPOSUPTO:
11991 cc = compile_iterator_matchingpath(common, cc, parent);
11992 break;
11993
11994 case OP_CLASS:
11995 case OP_NCLASS:
11996 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
11997 cc = compile_iterator_matchingpath(common, cc, parent);
11998 else
11999 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12000 break;
12001
12002#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12003 case OP_XCLASS:
12004 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12005 cc = compile_iterator_matchingpath(common, cc, parent);
12006 else
12007 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12008 break;
12009#endif
12010
12011 case OP_REF:
12012 case OP_REFI:
12013 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12014 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12015 else
12016 {
12017 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12018 cc += 1 + IMM2_SIZE;
12019 }
12020 break;
12021
12022 case OP_DNREF:
12023 case OP_DNREFI:
12024 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12025 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12026 else
12027 {
12028 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12029 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12030 cc += 1 + 2 * IMM2_SIZE;
12031 }
12032 break;
12033
12034 case OP_RECURSE:
12035 cc = compile_recurse_matchingpath(common, cc, parent);
12036 break;
12037
12038 case OP_CALLOUT:
12039 case OP_CALLOUT_STR:
12040 cc = compile_callout_matchingpath(common, cc, parent);
12041 break;
12042
12043 case OP_ASSERT:
12044 case OP_ASSERT_NOT:
12045 case OP_ASSERTBACK:
12046 case OP_ASSERTBACK_NOT:
12047 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12048 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12049 break;
12050
12051 case OP_BRAMINZERO:
12052 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12053 cc = bracketend(cc + 1);
12054 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12055 {
12056 allocate_stack(common, 1);
12057 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12058 }
12059 else
12060 {
12061 allocate_stack(common, 2);
12062 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12063 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12064 }
12065 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12066 count_match(common);
12067 break;
12068
12069 case OP_ASSERT_NA:
12070 case OP_ASSERTBACK_NA:
12071 case OP_ONCE:
12072 case OP_SCRIPT_RUN:
12073 case OP_BRA:
12074 case OP_CBRA:
12075 case OP_COND:
12076 case OP_SBRA:
12077 case OP_SCBRA:
12078 case OP_SCOND:
12079 cc = compile_bracket_matchingpath(common, cc, parent);
12080 break;
12081
12082 case OP_BRAZERO:
12083 if (cc[1] > OP_ASSERTBACK_NOT)
12084 cc = compile_bracket_matchingpath(common, cc, parent);
12085 else
12086 {
12087 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12088 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12089 }
12090 break;
12091
12092 case OP_BRAPOS:
12093 case OP_CBRAPOS:
12094 case OP_SBRAPOS:
12095 case OP_SCBRAPOS:
12096 case OP_BRAPOSZERO:
12097 cc = compile_bracketpos_matchingpath(common, cc, parent);
12098 break;
12099
12100 case OP_MARK:
12101 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12102 SLJIT_ASSERT(common->mark_ptr != 0);
12103 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12104 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12105 if (HAS_VIRTUAL_REGISTERS)
12106 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12107 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12108 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12109 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12110 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12111 if (common->has_skip_arg)
12112 {
12113 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12114 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12115 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12116 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12117 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12118 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12119 }
12120 cc += 1 + 2 + cc[1];
12121 break;
12122
12123 case OP_PRUNE:
12124 case OP_PRUNE_ARG:
12125 case OP_SKIP:
12126 case OP_SKIP_ARG:
12127 case OP_THEN:
12128 case OP_THEN_ARG:
12129 case OP_COMMIT:
12130 case OP_COMMIT_ARG:
12131 cc = compile_control_verb_matchingpath(common, cc, parent);
12132 break;
12133
12134 case OP_FAIL:
12135 case OP_ACCEPT:
12136 case OP_ASSERT_ACCEPT:
12137 cc = compile_fail_accept_matchingpath(common, cc, parent);
12138 break;
12139
12140 case OP_CLOSE:
12141 cc = compile_close_matchingpath(common, cc);
12142 break;
12143
12144 case OP_SKIPZERO:
12145 cc = bracketend(cc + 1);
12146 break;
12147
12148 default:
12149 SLJIT_UNREACHABLE();
12150 return;
12151 }
12152 if (cc == NULL)
12153 return;
12154 }
12155
12156if (has_then_trap)
12157 {
12158 /* Head item on backtrack. */
12159 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12160 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12161 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12162 common->then_trap = save_then_trap;
12163 }
12164SLJIT_ASSERT(cc == ccend);
12165}
12166
12167#undef PUSH_BACKTRACK
12168#undef PUSH_BACKTRACK_NOVALUE
12169#undef BACKTRACK_AS
12170
12171#define COMPILE_BACKTRACKINGPATH(current) \
12172 do \
12173 { \
12174 compile_backtrackingpath(common, (current)); \
12175 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12176 return; \
12177 } \
12178 while (0)
12179
12180#define CURRENT_AS(type) ((type *)current)
12181
12182static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12183{
12184DEFINE_COMPILER;
12185PCRE2_SPTR cc = current->cc;
12186PCRE2_UCHAR opcode;
12187PCRE2_UCHAR type;
12188sljit_u32 max = 0, exact;
12189struct sljit_label *label = NULL;
12190struct sljit_jump *jump = NULL;
12191jump_list *jumplist = NULL;
12192PCRE2_SPTR end;
12193int private_data_ptr = PRIVATE_DATA(cc);
12194int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12195int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12196int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12197
12198cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12199
12200switch(opcode)
12201 {
12202 case OP_STAR:
12203 case OP_UPTO:
12204 if (type == OP_ANYNL || type == OP_EXTUNI)
12205 {
12206 SLJIT_ASSERT(private_data_ptr == 0);
12207 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12208 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12209 free_stack(common, 1);
12210 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12211 }
12212 else
12213 {
12214 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12215 {
12216 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12217 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12218 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12219
12220 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12221 label = LABEL();
12222 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12223 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12224 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12225 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12226 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12227 move_back(common, NULL, TRUE);
12228 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12229 }
12230 else
12231 {
12232 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12233 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12234 move_back(common, NULL, TRUE);
12235 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12236 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12237 }
12238 JUMPHERE(jump);
12239 if (private_data_ptr == 0)
12240 free_stack(common, 2);
12241 }
12242 break;
12243
12244 case OP_MINSTAR:
12245 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12246 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12247 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12248 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12249 set_jumps(jumplist, LABEL());
12250 if (private_data_ptr == 0)
12251 free_stack(common, 1);
12252 break;
12253
12254 case OP_MINUPTO:
12255 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12256 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12257 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12258 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12259
12260 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12261 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12262 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12263 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12264
12265 set_jumps(jumplist, LABEL());
12266 if (private_data_ptr == 0)
12267 free_stack(common, 2);
12268 break;
12269
12270 case OP_QUERY:
12271 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12272 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12273 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12274 jump = JUMP(SLJIT_JUMP);
12275 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12276 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12277 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12278 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12279 JUMPHERE(jump);
12280 if (private_data_ptr == 0)
12281 free_stack(common, 1);
12282 break;
12283
12284 case OP_MINQUERY:
12285 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12286 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12287 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12288 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12289 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12290 set_jumps(jumplist, LABEL());
12291 JUMPHERE(jump);
12292 if (private_data_ptr == 0)
12293 free_stack(common, 1);
12294 break;
12295
12296 case OP_EXACT:
12297 case OP_POSSTAR:
12298 case OP_POSQUERY:
12299 case OP_POSUPTO:
12300 break;
12301
12302 default:
12303 SLJIT_UNREACHABLE();
12304 break;
12305 }
12306
12307set_jumps(current->topbacktracks, LABEL());
12308}
12309
12310static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12311{
12312DEFINE_COMPILER;
12313PCRE2_SPTR cc = current->cc;
12314BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12315PCRE2_UCHAR type;
12316
12317type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12318
12319if ((type & 0x1) == 0)
12320 {
12321 /* Maximize case. */
12322 set_jumps(current->topbacktracks, LABEL());
12323 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12324 free_stack(common, 1);
12325 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12326 return;
12327 }
12328
12329OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12330CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12331set_jumps(current->topbacktracks, LABEL());
12332free_stack(common, ref ? 2 : 3);
12333}
12334
12335static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12336{
12337DEFINE_COMPILER;
12338recurse_entry *entry;
12339
12340if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12341 {
12342 entry = CURRENT_AS(recurse_backtrack)->entry;
12343 if (entry->backtrack_label == NULL)
12344 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12345 else
12346 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12347 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12348 }
12349else
12350 compile_backtrackingpath(common, current->top);
12351
12352set_jumps(current->topbacktracks, LABEL());
12353}
12354
12355static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12356{
12357DEFINE_COMPILER;
12358PCRE2_SPTR cc = current->cc;
12359PCRE2_UCHAR bra = OP_BRA;
12360struct sljit_jump *brajump = NULL;
12361
12362SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12363if (*cc == OP_BRAZERO)
12364 {
12365 bra = *cc;
12366 cc++;
12367 }
12368
12369if (bra == OP_BRAZERO)
12370 {
12371 SLJIT_ASSERT(current->topbacktracks == NULL);
12372 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12373 }
12374
12375if (CURRENT_AS(assert_backtrack)->framesize < 0)
12376 {
12377 set_jumps(current->topbacktracks, LABEL());
12378
12379 if (bra == OP_BRAZERO)
12380 {
12381 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12382 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12383 free_stack(common, 1);
12384 }
12385 return;
12386 }
12387
12388if (bra == OP_BRAZERO)
12389 {
12390 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12391 {
12392 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12393 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12394 free_stack(common, 1);
12395 return;
12396 }
12397 free_stack(common, 1);
12398 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12399 }
12400
12401if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12402 {
12403 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12404 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12405 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12406 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12407 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12408
12409 set_jumps(current->topbacktracks, LABEL());
12410 }
12411else
12412 set_jumps(current->topbacktracks, LABEL());
12413
12414if (bra == OP_BRAZERO)
12415 {
12416 /* We know there is enough place on the stack. */
12417 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12419 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12420 JUMPHERE(brajump);
12421 }
12422}
12423
12424static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12425{
12426DEFINE_COMPILER;
12427int opcode, stacksize, alt_count, alt_max;
12428int offset = 0;
12429int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12430int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12431PCRE2_SPTR cc = current->cc;
12432PCRE2_SPTR ccbegin;
12433PCRE2_SPTR ccprev;
12434PCRE2_UCHAR bra = OP_BRA;
12435PCRE2_UCHAR ket;
12436assert_backtrack *assert;
12437BOOL has_alternatives;
12438BOOL needs_control_head = FALSE;
12439struct sljit_jump *brazero = NULL;
12440struct sljit_jump *next_alt = NULL;
12441struct sljit_jump *once = NULL;
12442struct sljit_jump *cond = NULL;
12443struct sljit_label *rmin_label = NULL;
12444struct sljit_label *exact_label = NULL;
12445struct sljit_put_label *put_label = NULL;
12446
12447if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12448 {
12449 bra = *cc;
12450 cc++;
12451 }
12452
12453opcode = *cc;
12454ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12455ket = *ccbegin;
12456if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12457 {
12458 repeat_ptr = PRIVATE_DATA(ccbegin);
12459 repeat_type = PRIVATE_DATA(ccbegin + 2);
12460 repeat_count = PRIVATE_DATA(ccbegin + 3);
12461 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12462 if (repeat_type == OP_UPTO)
12463 ket = OP_KETRMAX;
12464 if (repeat_type == OP_MINUPTO)
12465 ket = OP_KETRMIN;
12466 }
12467ccbegin = cc;
12468cc += GET(cc, 1);
12469has_alternatives = *cc == OP_ALT;
12470if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12471 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12472if (opcode == OP_CBRA || opcode == OP_SCBRA)
12473 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12474if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12475 opcode = OP_SCOND;
12476
12477alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12478
12479/* Decoding the needs_control_head in framesize. */
12480if (opcode == OP_ONCE)
12481 {
12482 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12483 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12484 }
12485
12486if (ket != OP_KET && repeat_type != 0)
12487 {
12488 /* TMP1 is used in OP_KETRMIN below. */
12489 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12490 free_stack(common, 1);
12491 if (repeat_type == OP_UPTO)
12492 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12493 else
12494 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12495 }
12496
12497if (ket == OP_KETRMAX)
12498 {
12499 if (bra == OP_BRAZERO)
12500 {
12501 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12502 free_stack(common, 1);
12503 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12504 }
12505 }
12506else if (ket == OP_KETRMIN)
12507 {
12508 if (bra != OP_BRAMINZERO)
12509 {
12510 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12511 if (repeat_type != 0)
12512 {
12513 /* TMP1 was set a few lines above. */
12514 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12515 /* Drop STR_PTR for non-greedy plus quantifier. */
12516 if (opcode != OP_ONCE)
12517 free_stack(common, 1);
12518 }
12519 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12520 {
12521 /* Checking zero-length iteration. */
12522 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12523 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12524 else
12525 {
12526 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12527 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12528 }
12529 /* Drop STR_PTR for non-greedy plus quantifier. */
12530 if (opcode != OP_ONCE)
12531 free_stack(common, 1);
12532 }
12533 else
12534 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12535 }
12536 rmin_label = LABEL();
12537 if (repeat_type != 0)
12538 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12539 }
12540else if (bra == OP_BRAZERO)
12541 {
12542 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12543 free_stack(common, 1);
12544 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12545 }
12546else if (repeat_type == OP_EXACT)
12547 {
12548 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12549 exact_label = LABEL();
12550 }
12551
12552if (offset != 0)
12553 {
12554 if (common->capture_last_ptr != 0)
12555 {
12556 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12558 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12559 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12560 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12561 free_stack(common, 3);
12562 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12563 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12564 }
12565 else if (common->optimized_cbracket[offset >> 1] == 0)
12566 {
12567 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12568 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12569 free_stack(common, 2);
12570 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12571 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12572 }
12573 }
12574
12575if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12576 {
12577 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12578 {
12579 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12580 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12581 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12582 }
12583 once = JUMP(SLJIT_JUMP);
12584 }
12585else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12586 {
12587 if (has_alternatives)
12588 {
12589 /* Always exactly one alternative. */
12590 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12591 free_stack(common, 1);
12592
12593 alt_max = 2;
12594 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12595 }
12596 }
12597else if (has_alternatives)
12598 {
12599 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12600 free_stack(common, 1);
12601
12602 if (alt_max > 3)
12603 {
12604 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12605
12606 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12607 sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12608 sljit_emit_op0(compiler, SLJIT_ENDBR);
12609 }
12610 else
12611 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12612 }
12613
12614COMPILE_BACKTRACKINGPATH(current->top);
12615if (current->topbacktracks)
12616 set_jumps(current->topbacktracks, LABEL());
12617
12618if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12619 {
12620 /* Conditional block always has at most one alternative. */
12621 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12622 {
12623 SLJIT_ASSERT(has_alternatives);
12624 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12625 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12626 {
12627 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12628 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12629 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12630 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12631 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12632 }
12633 cond = JUMP(SLJIT_JUMP);
12634 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12635 }
12636 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12637 {
12638 SLJIT_ASSERT(has_alternatives);
12639 cond = JUMP(SLJIT_JUMP);
12640 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12641 }
12642 else
12643 SLJIT_ASSERT(!has_alternatives);
12644 }
12645
12646if (has_alternatives)
12647 {
12648 alt_count = 1;
12649 do
12650 {
12651 current->top = NULL;
12652 current->topbacktracks = NULL;
12653 current->nextbacktracks = NULL;
12654 /* Conditional blocks always have an additional alternative, even if it is empty. */
12655 if (*cc == OP_ALT)
12656 {
12657 ccprev = cc + 1 + LINK_SIZE;
12658 cc += GET(cc, 1);
12659 if (opcode != OP_COND && opcode != OP_SCOND)
12660 {
12661 if (opcode != OP_ONCE)
12662 {
12663 if (private_data_ptr != 0)
12664 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12665 else
12666 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12667 }
12668 else
12669 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12670 }
12671 compile_matchingpath(common, ccprev, cc, current);
12672 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12673 return;
12674
12675 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12676 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12677
12678 if (opcode == OP_SCRIPT_RUN)
12679 match_script_run_common(common, private_data_ptr, current);
12680 }
12681
12682 /* Instructions after the current alternative is successfully matched. */
12683 /* There is a similar code in compile_bracket_matchingpath. */
12684 if (opcode == OP_ONCE)
12685 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12686
12687 stacksize = 0;
12688 if (repeat_type == OP_MINUPTO)
12689 {
12690 /* We need to preserve the counter. TMP2 will be used below. */
12691 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12692 stacksize++;
12693 }
12694 if (ket != OP_KET || bra != OP_BRA)
12695 stacksize++;
12696 if (offset != 0)
12697 {
12698 if (common->capture_last_ptr != 0)
12699 stacksize++;
12700 if (common->optimized_cbracket[offset >> 1] == 0)
12701 stacksize += 2;
12702 }
12703 if (opcode != OP_ONCE)
12704 stacksize++;
12705
12706 if (stacksize > 0)
12707 allocate_stack(common, stacksize);
12708
12709 stacksize = 0;
12710 if (repeat_type == OP_MINUPTO)
12711 {
12712 /* TMP2 was set above. */
12713 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12714 stacksize++;
12715 }
12716
12717 if (ket != OP_KET || bra != OP_BRA)
12718 {
12719 if (ket != OP_KET)
12720 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12721 else
12722 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12723 stacksize++;
12724 }
12725
12726 if (offset != 0)
12727 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12728
12729 if (opcode != OP_ONCE)
12730 {
12731 if (alt_max <= 3)
12732 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12733 else
12734 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12735 }
12736
12737 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12738 {
12739 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12740 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12741 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12742 }
12743
12744 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12745
12746 if (opcode != OP_ONCE)
12747 {
12748 if (alt_max <= 3)
12749 {
12750 JUMPHERE(next_alt);
12751 alt_count++;
12752 if (alt_count < alt_max)
12753 {
12754 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12755 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12756 }
12757 }
12758 else
12759 {
12760 sljit_set_put_label(put_label, LABEL());
12761 sljit_emit_op0(compiler, SLJIT_ENDBR);
12762 }
12763 }
12764
12765 COMPILE_BACKTRACKINGPATH(current->top);
12766 if (current->topbacktracks)
12767 set_jumps(current->topbacktracks, LABEL());
12768 SLJIT_ASSERT(!current->nextbacktracks);
12769 }
12770 while (*cc == OP_ALT);
12771
12772 if (cond != NULL)
12773 {
12774 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
12775 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12776 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
12777 {
12778 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12779 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12780 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12781 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12782 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12783 }
12784 JUMPHERE(cond);
12785 }
12786
12787 /* Free the STR_PTR. */
12788 if (private_data_ptr == 0)
12789 free_stack(common, 1);
12790 }
12791
12792if (offset != 0)
12793 {
12794 /* Using both tmp register is better for instruction scheduling. */
12795 if (common->optimized_cbracket[offset >> 1] != 0)
12796 {
12797 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12798 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12799 free_stack(common, 2);
12800 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12801 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12802 }
12803 else
12804 {
12805 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12806 free_stack(common, 1);
12807 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12808 }
12809 }
12810else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
12811 {
12812 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
12813 free_stack(common, 1);
12814 }
12815else if (opcode == OP_ONCE)
12816 {
12817 cc = ccbegin + GET(ccbegin, 1);
12818 stacksize = needs_control_head ? 1 : 0;
12819
12820 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12821 {
12822 /* Reset head and drop saved frame. */
12823 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
12824 }
12825 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
12826 {
12827 /* The STR_PTR must be released. */
12828 stacksize++;
12829 }
12830
12831 if (stacksize > 0)
12832 free_stack(common, stacksize);
12833
12834 JUMPHERE(once);
12835 /* Restore previous private_data_ptr */
12836 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12837 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
12838 else if (ket == OP_KETRMIN)
12839 {
12840 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12841 /* See the comment below. */
12842 free_stack(common, 2);
12843 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
12844 }
12845 }
12846
12847if (repeat_type == OP_EXACT)
12848 {
12849 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12850 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12851 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
12852 }
12853else if (ket == OP_KETRMAX)
12854 {
12855 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12856 if (bra != OP_BRAZERO)
12857 free_stack(common, 1);
12858
12859 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12860 if (bra == OP_BRAZERO)
12861 {
12862 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12863 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12864 JUMPHERE(brazero);
12865 free_stack(common, 1);
12866 }
12867 }
12868else if (ket == OP_KETRMIN)
12869 {
12870 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12871
12872 /* OP_ONCE removes everything in case of a backtrack, so we don't
12873 need to explicitly release the STR_PTR. The extra release would
12874 affect badly the free_stack(2) above. */
12875 if (opcode != OP_ONCE)
12876 free_stack(common, 1);
12877 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
12878 if (opcode == OP_ONCE)
12879 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
12880 else if (bra == OP_BRAMINZERO)
12881 free_stack(common, 1);
12882 }
12883else if (bra == OP_BRAZERO)
12884 {
12885 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12886 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
12887 JUMPHERE(brazero);
12888 }
12889}
12890
12891static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12892{
12893DEFINE_COMPILER;
12894int offset;
12895struct sljit_jump *jump;
12896
12897if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
12898 {
12899 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
12900 {
12901 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
12902 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12903 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12904 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12905 if (common->capture_last_ptr != 0)
12906 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12907 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12908 if (common->capture_last_ptr != 0)
12909 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12910 }
12911 set_jumps(current->topbacktracks, LABEL());
12912 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12913 return;
12914 }
12915
12916OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
12917add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12918OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
12919
12920if (current->topbacktracks)
12921 {
12922 jump = JUMP(SLJIT_JUMP);
12923 set_jumps(current->topbacktracks, LABEL());
12924 /* Drop the stack frame. */
12925 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
12926 JUMPHERE(jump);
12927 }
12928OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
12929}
12930
12931static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12932{
12933assert_backtrack backtrack;
12934
12935current->top = NULL;
12936current->topbacktracks = NULL;
12937current->nextbacktracks = NULL;
12938if (current->cc[1] > OP_ASSERTBACK_NOT)
12939 {
12940 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
12941 compile_bracket_matchingpath(common, current->cc, current);
12942 compile_bracket_backtrackingpath(common, current->top);
12943 }
12944else
12945 {
12946 memset(&backtrack, 0, sizeof(backtrack));
12947 backtrack.common.cc = current->cc;
12948 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
12949 /* Manual call of compile_assert_matchingpath. */
12950 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
12951 }
12952SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
12953}
12954
12955static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12956{
12957DEFINE_COMPILER;
12958PCRE2_UCHAR opcode = *current->cc;
12959struct sljit_label *loop;
12960struct sljit_jump *jump;
12961
12962if (opcode == OP_THEN || opcode == OP_THEN_ARG)
12963 {
12964 if (common->then_trap != NULL)
12965 {
12966 SLJIT_ASSERT(common->control_head_ptr != 0);
12967
12968 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12969 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
12970 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
12971 jump = JUMP(SLJIT_JUMP);
12972
12973 loop = LABEL();
12974 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12975 JUMPHERE(jump);
12976 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
12977 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
12978 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
12979 return;
12980 }
12981 else if (!common->local_quit_available && common->in_positive_assertion)
12982 {
12983 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
12984 return;
12985 }
12986 }
12987
12988if (common->local_quit_available)
12989 {
12990 /* Abort match with a fail. */
12991 if (common->quit_label == NULL)
12992 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
12993 else
12994 JUMPTO(SLJIT_JUMP, common->quit_label);
12995 return;
12996 }
12997
12998if (opcode == OP_SKIP_ARG)
12999 {
13000 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13001 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13002 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
13003 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
13004
13005 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13006 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13007 return;
13008 }
13009
13010if (opcode == OP_SKIP)
13011 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13012else
13013 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13014add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13015}
13016
13017static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13018{
13019DEFINE_COMPILER;
13020struct sljit_jump *jump;
13021int size;
13022
13023if (CURRENT_AS(then_trap_backtrack)->then_trap)
13024 {
13025 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13026 return;
13027 }
13028
13029size = CURRENT_AS(then_trap_backtrack)->framesize;
13030size = 3 + (size < 0 ? 0 : size);
13031
13032OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13033free_stack(common, size);
13034jump = JUMP(SLJIT_JUMP);
13035
13036set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13037/* STACK_TOP is set by THEN. */
13038if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13039 {
13040 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13041 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13042 }
13043OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13044free_stack(common, 3);
13045
13046JUMPHERE(jump);
13047OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13048}
13049
13050static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13051{
13052DEFINE_COMPILER;
13053then_trap_backtrack *save_then_trap = common->then_trap;
13054
13055while (current)
13056 {
13057 if (current->nextbacktracks != NULL)
13058 set_jumps(current->nextbacktracks, LABEL());
13059 switch(*current->cc)
13060 {
13061 case OP_SET_SOM:
13062 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13063 free_stack(common, 1);
13064 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13065 break;
13066
13067 case OP_STAR:
13068 case OP_MINSTAR:
13069 case OP_PLUS:
13070 case OP_MINPLUS:
13071 case OP_QUERY:
13072 case OP_MINQUERY:
13073 case OP_UPTO:
13074 case OP_MINUPTO:
13075 case OP_EXACT:
13076 case OP_POSSTAR:
13077 case OP_POSPLUS:
13078 case OP_POSQUERY:
13079 case OP_POSUPTO:
13080 case OP_STARI:
13081 case OP_MINSTARI:
13082 case OP_PLUSI:
13083 case OP_MINPLUSI:
13084 case OP_QUERYI:
13085 case OP_MINQUERYI:
13086 case OP_UPTOI:
13087 case OP_MINUPTOI:
13088 case OP_EXACTI:
13089 case OP_POSSTARI:
13090 case OP_POSPLUSI:
13091 case OP_POSQUERYI:
13092 case OP_POSUPTOI:
13093 case OP_NOTSTAR:
13094 case OP_NOTMINSTAR:
13095 case OP_NOTPLUS:
13096 case OP_NOTMINPLUS:
13097 case OP_NOTQUERY:
13098 case OP_NOTMINQUERY:
13099 case OP_NOTUPTO:
13100 case OP_NOTMINUPTO:
13101 case OP_NOTEXACT:
13102 case OP_NOTPOSSTAR:
13103 case OP_NOTPOSPLUS:
13104 case OP_NOTPOSQUERY:
13105 case OP_NOTPOSUPTO:
13106 case OP_NOTSTARI:
13107 case OP_NOTMINSTARI:
13108 case OP_NOTPLUSI:
13109 case OP_NOTMINPLUSI:
13110 case OP_NOTQUERYI:
13111 case OP_NOTMINQUERYI:
13112 case OP_NOTUPTOI:
13113 case OP_NOTMINUPTOI:
13114 case OP_NOTEXACTI:
13115 case OP_NOTPOSSTARI:
13116 case OP_NOTPOSPLUSI:
13117 case OP_NOTPOSQUERYI:
13118 case OP_NOTPOSUPTOI:
13119 case OP_TYPESTAR:
13120 case OP_TYPEMINSTAR:
13121 case OP_TYPEPLUS:
13122 case OP_TYPEMINPLUS:
13123 case OP_TYPEQUERY:
13124 case OP_TYPEMINQUERY:
13125 case OP_TYPEUPTO:
13126 case OP_TYPEMINUPTO:
13127 case OP_TYPEEXACT:
13128 case OP_TYPEPOSSTAR:
13129 case OP_TYPEPOSPLUS:
13130 case OP_TYPEPOSQUERY:
13131 case OP_TYPEPOSUPTO:
13132 case OP_CLASS:
13133 case OP_NCLASS:
13134#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13135 case OP_XCLASS:
13136#endif
13137 compile_iterator_backtrackingpath(common, current);
13138 break;
13139
13140 case OP_REF:
13141 case OP_REFI:
13142 case OP_DNREF:
13143 case OP_DNREFI:
13144 compile_ref_iterator_backtrackingpath(common, current);
13145 break;
13146
13147 case OP_RECURSE:
13148 compile_recurse_backtrackingpath(common, current);
13149 break;
13150
13151 case OP_ASSERT:
13152 case OP_ASSERT_NOT:
13153 case OP_ASSERTBACK:
13154 case OP_ASSERTBACK_NOT:
13155 compile_assert_backtrackingpath(common, current);
13156 break;
13157
13158 case OP_ASSERT_NA:
13159 case OP_ASSERTBACK_NA:
13160 case OP_ONCE:
13161 case OP_SCRIPT_RUN:
13162 case OP_BRA:
13163 case OP_CBRA:
13164 case OP_COND:
13165 case OP_SBRA:
13166 case OP_SCBRA:
13167 case OP_SCOND:
13168 compile_bracket_backtrackingpath(common, current);
13169 break;
13170
13171 case OP_BRAZERO:
13172 if (current->cc[1] > OP_ASSERTBACK_NOT)
13173 compile_bracket_backtrackingpath(common, current);
13174 else
13175 compile_assert_backtrackingpath(common, current);
13176 break;
13177
13178 case OP_BRAPOS:
13179 case OP_CBRAPOS:
13180 case OP_SBRAPOS:
13181 case OP_SCBRAPOS:
13182 case OP_BRAPOSZERO:
13183 compile_bracketpos_backtrackingpath(common, current);
13184 break;
13185
13186 case OP_BRAMINZERO:
13187 compile_braminzero_backtrackingpath(common, current);
13188 break;
13189
13190 case OP_MARK:
13191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13192 if (common->has_skip_arg)
13193 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13194 free_stack(common, common->has_skip_arg ? 5 : 1);
13195 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13196 if (common->has_skip_arg)
13197 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13198 break;
13199
13200 case OP_THEN:
13201 case OP_THEN_ARG:
13202 case OP_PRUNE:
13203 case OP_PRUNE_ARG:
13204 case OP_SKIP:
13205 case OP_SKIP_ARG:
13206 compile_control_verb_backtrackingpath(common, current);
13207 break;
13208
13209 case OP_COMMIT:
13210 case OP_COMMIT_ARG:
13211 if (!common->local_quit_available)
13212 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13213 if (common->quit_label == NULL)
13214 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13215 else
13216 JUMPTO(SLJIT_JUMP, common->quit_label);
13217 break;
13218
13219 case OP_CALLOUT:
13220 case OP_CALLOUT_STR:
13221 case OP_FAIL:
13222 case OP_ACCEPT:
13223 case OP_ASSERT_ACCEPT:
13224 set_jumps(current->topbacktracks, LABEL());
13225 break;
13226
13227 case OP_THEN_TRAP:
13228 /* A virtual opcode for then traps. */
13229 compile_then_trap_backtrackingpath(common, current);
13230 break;
13231
13232 default:
13233 SLJIT_UNREACHABLE();
13234 break;
13235 }
13236 current = current->prev;
13237 }
13238common->then_trap = save_then_trap;
13239}
13240
13241static SLJIT_INLINE void compile_recurse(compiler_common *common)
13242{
13243DEFINE_COMPILER;
13244PCRE2_SPTR cc = common->start + common->currententry->start;
13245PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13246PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
13247BOOL needs_control_head;
13248BOOL has_quit;
13249BOOL has_accept;
13250int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
13251int alt_count, alt_max, local_size;
13252backtrack_common altbacktrack;
13253jump_list *match = NULL;
13254struct sljit_jump *next_alt = NULL;
13255struct sljit_jump *accept_exit = NULL;
13256struct sljit_label *quit;
13257struct sljit_put_label *put_label = NULL;
13258
13259/* Recurse captures then. */
13260common->then_trap = NULL;
13261
13262SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13263
13264alt_max = no_alternatives(cc);
13265alt_count = 0;
13266
13267/* Matching path. */
13268SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13269common->currententry->entry_label = LABEL();
13270set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13271
13272sljit_emit_fast_enter(compiler, TMP2, 0);
13273count_match(common);
13274
13275local_size = (alt_max > 1) ? 2 : 1;
13276
13277/* (Reversed) stack layout:
13278 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13279
13280allocate_stack(common, private_data_size + local_size);
13281/* Save return address. */
13282OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13283
13284copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
13285
13286/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13287OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13288
13289if (needs_control_head)
13290 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13291
13292if (alt_max > 1)
13293 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13294
13295memset(&altbacktrack, 0, sizeof(backtrack_common));
13296common->quit_label = NULL;
13297common->accept_label = NULL;
13298common->quit = NULL;
13299common->accept = NULL;
13300altbacktrack.cc = ccbegin;
13301cc += GET(cc, 1);
13302while (1)
13303 {
13304 altbacktrack.top = NULL;
13305 altbacktrack.topbacktracks = NULL;
13306
13307 if (altbacktrack.cc != ccbegin)
13308 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13309
13310 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13311 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13312 return;
13313
13314 allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
13315 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13316
13317 if (alt_max > 1 || has_accept)
13318 {
13319 if (alt_max > 3)
13320 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13321 else
13322 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13323 }
13324
13325 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13326
13327 if (alt_count == 0)
13328 {
13329 /* Backtracking path entry. */
13330 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13331 common->currententry->backtrack_label = LABEL();
13332 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13333
13334 sljit_emit_fast_enter(compiler, TMP1, 0);
13335
13336 if (has_accept)
13337 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13338
13339 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13340 /* Save return address. */
13341 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13342
13343 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13344
13345 if (alt_max > 1)
13346 {
13347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13348 free_stack(common, 2);
13349
13350 if (alt_max > 3)
13351 {
13352 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13353 sljit_set_put_label(put_label, LABEL());
13354 sljit_emit_op0(compiler, SLJIT_ENDBR);
13355 }
13356 else
13357 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13358 }
13359 else
13360 free_stack(common, has_accept ? 2 : 1);
13361 }
13362 else if (alt_max > 3)
13363 {
13364 sljit_set_put_label(put_label, LABEL());
13365 sljit_emit_op0(compiler, SLJIT_ENDBR);
13366 }
13367 else
13368 {
13369 JUMPHERE(next_alt);
13370 if (alt_count + 1 < alt_max)
13371 {
13372 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13373 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13374 }
13375 }
13376
13377 alt_count++;
13378
13379 compile_backtrackingpath(common, altbacktrack.top);
13380 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13381 return;
13382 set_jumps(altbacktrack.topbacktracks, LABEL());
13383
13384 if (*cc != OP_ALT)
13385 break;
13386
13387 altbacktrack.cc = cc + 1 + LINK_SIZE;
13388 cc += GET(cc, 1);
13389 }
13390
13391/* No alternative is matched. */
13392
13393quit = LABEL();
13394
13395copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
13396
13397OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13398free_stack(common, private_data_size + local_size);
13399OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13400OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13401
13402if (common->quit != NULL)
13403 {
13404 SLJIT_ASSERT(has_quit);
13405
13406 set_jumps(common->quit, LABEL());
13407 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13408 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
13409 JUMPTO(SLJIT_JUMP, quit);
13410 }
13411
13412if (has_accept)
13413 {
13414 JUMPHERE(accept_exit);
13415 free_stack(common, 2);
13416
13417 /* Save return address. */
13418 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13419
13420 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
13421
13422 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13423 free_stack(common, private_data_size + local_size);
13424 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13425 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13426 }
13427
13428if (common->accept != NULL)
13429 {
13430 SLJIT_ASSERT(has_accept);
13431
13432 set_jumps(common->accept, LABEL());
13433
13434 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13435 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13436
13437 allocate_stack(common, 2);
13438 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13439 }
13440
13441set_jumps(match, LABEL());
13442
13443OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13444
13445copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
13446
13447OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13448OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13449OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13450}
13451
13452#undef COMPILE_BACKTRACKINGPATH
13453#undef CURRENT_AS
13454
13455#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13456 (PCRE2_JIT_INVALID_UTF)
13457
13458static int jit_compile(pcre2_code *code, sljit_u32 mode)
13459{
13460pcre2_real_code *re = (pcre2_real_code *)code;
13461struct sljit_compiler *compiler;
13462backtrack_common rootbacktrack;
13463compiler_common common_data;
13464compiler_common *common = &common_data;
13465const sljit_u8 *tables = re->tables;
13466void *allocator_data = &re->memctl;
13467int private_data_size;
13468PCRE2_SPTR ccend;
13469executable_functions *functions;
13470void *executable_func;
13471sljit_uw executable_size;
13472sljit_uw total_length;
13473struct sljit_label *mainloop_label = NULL;
13474struct sljit_label *continue_match_label;
13475struct sljit_label *empty_match_found_label = NULL;
13476struct sljit_label *empty_match_backtrack_label = NULL;
13477struct sljit_label *reset_match_label;
13478struct sljit_label *quit_label;
13479struct sljit_jump *jump;
13480struct sljit_jump *minlength_check_failed = NULL;
13481struct sljit_jump *empty_match = NULL;
13482struct sljit_jump *end_anchor_failed = NULL;
13483jump_list *reqcu_not_found = NULL;
13484
13485SLJIT_ASSERT(tables);
13486
13487#if HAS_VIRTUAL_REGISTERS == 1
13488SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13489#elif HAS_VIRTUAL_REGISTERS == 0
13490SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13491#else
13492#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13493#endif
13494
13495memset(&rootbacktrack, 0, sizeof(backtrack_common));
13496memset(common, 0, sizeof(compiler_common));
13497common->re = re;
13498common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13499rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13500
13501#ifdef SUPPORT_UNICODE
13502common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13503#endif /* SUPPORT_UNICODE */
13504mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13505
13506common->start = rootbacktrack.cc;
13507common->read_only_data_head = NULL;
13508common->fcc = tables + fcc_offset;
13509common->lcc = (sljit_sw)(tables + lcc_offset);
13510common->mode = mode;
13511common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13512common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13513common->nltype = NLTYPE_FIXED;
13514switch(re->newline_convention)
13515 {
13516 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13517 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13518 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13519 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13520 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13521 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13522 default: return PCRE2_ERROR_INTERNAL;
13523 }
13524common->nlmax = READ_CHAR_MAX;
13525common->nlmin = 0;
13526if (re->bsr_convention == PCRE2_BSR_UNICODE)
13527 common->bsr_nltype = NLTYPE_ANY;
13528else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13529 common->bsr_nltype = NLTYPE_ANYCRLF;
13530else
13531 {
13532#ifdef BSR_ANYCRLF
13533 common->bsr_nltype = NLTYPE_ANYCRLF;
13534#else
13535 common->bsr_nltype = NLTYPE_ANY;
13536#endif
13537 }
13538common->bsr_nlmax = READ_CHAR_MAX;
13539common->bsr_nlmin = 0;
13540common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13541common->ctypes = (sljit_sw)(tables + ctypes_offset);
13542common->name_count = re->name_count;
13543common->name_entry_size = re->name_entry_size;
13544common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13545common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13546#ifdef SUPPORT_UNICODE
13547/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13548common->utf = (re->overall_options & PCRE2_UTF) != 0;
13549common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13550if (common->utf)
13551 {
13552 if (common->nltype == NLTYPE_ANY)
13553 common->nlmax = 0x2029;
13554 else if (common->nltype == NLTYPE_ANYCRLF)
13555 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13556 else
13557 {
13558 /* We only care about the first newline character. */
13559 common->nlmax = common->newline & 0xff;
13560 }
13561
13562 if (common->nltype == NLTYPE_FIXED)
13563 common->nlmin = common->newline & 0xff;
13564 else
13565 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13566
13567 if (common->bsr_nltype == NLTYPE_ANY)
13568 common->bsr_nlmax = 0x2029;
13569 else
13570 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13571 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13572 }
13573else
13574 common->invalid_utf = FALSE;
13575#endif /* SUPPORT_UNICODE */
13576ccend = bracketend(common->start);
13577
13578/* Calculate the local space size on the stack. */
13579common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13580common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13581if (!common->optimized_cbracket)
13582 return PCRE2_ERROR_NOMEMORY;
13583#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13584memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13585#else
13586memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13587#endif
13588
13589SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13590#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13591common->capture_last_ptr = common->ovector_start;
13592common->ovector_start += sizeof(sljit_sw);
13593#endif
13594if (!check_opcode_types(common, common->start, ccend))
13595 {
13596 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13597 return PCRE2_ERROR_NOMEMORY;
13598 }
13599
13600/* Checking flags and updating ovector_start. */
13601if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13602 {
13603 common->req_char_ptr = common->ovector_start;
13604 common->ovector_start += sizeof(sljit_sw);
13605 }
13606if (mode != PCRE2_JIT_COMPLETE)
13607 {
13608 common->start_used_ptr = common->ovector_start;
13609 common->ovector_start += sizeof(sljit_sw);
13610 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13611 {
13612 common->hit_start = common->ovector_start;
13613 common->ovector_start += sizeof(sljit_sw);
13614 }
13615 }
13616if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13617 {
13618 common->match_end_ptr = common->ovector_start;
13619 common->ovector_start += sizeof(sljit_sw);
13620 }
13621#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13622common->control_head_ptr = 1;
13623#endif
13624if (common->control_head_ptr != 0)
13625 {
13626 common->control_head_ptr = common->ovector_start;
13627 common->ovector_start += sizeof(sljit_sw);
13628 }
13629if (common->has_set_som)
13630 {
13631 /* Saving the real start pointer is necessary. */
13632 common->start_ptr = common->ovector_start;
13633 common->ovector_start += sizeof(sljit_sw);
13634 }
13635
13636/* Aligning ovector to even number of sljit words. */
13637if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13638 common->ovector_start += sizeof(sljit_sw);
13639
13640if (common->start_ptr == 0)
13641 common->start_ptr = OVECTOR(0);
13642
13643/* Capturing brackets cannot be optimized if callouts are allowed. */
13644if (common->capture_last_ptr != 0)
13645 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13646
13647SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13648common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13649
13650total_length = ccend - common->start;
13651common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
13652if (!common->private_data_ptrs)
13653 {
13654 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13655 return PCRE2_ERROR_NOMEMORY;
13656 }
13657memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13658
13659private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13660
13661if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13662 detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13663
13664set_private_data_ptrs(common, &private_data_size, ccend);
13665
13666SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13667
13668if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13669 {
13670 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13671 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13672 return PCRE2_ERROR_NOMEMORY;
13673 }
13674
13675if (common->has_then)
13676 {
13677 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13678 memset(common->then_offsets, 0, total_length);
13679 set_then_offsets(common, common->start, NULL);
13680 }
13681
13682compiler = sljit_create_compiler(allocator_data, NULL);
13683if (!compiler)
13684 {
13685 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13686 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13687 return PCRE2_ERROR_NOMEMORY;
13688 }
13689common->compiler = compiler;
13690
13691/* Main pcre_jit_exec entry. */
13692sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
13693
13694/* Register init. */
13695reset_ovector(common, (re->top_bracket + 1) * 2);
13696if (common->req_char_ptr != 0)
13697 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13698
13699OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13700OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13701OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13702OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13703OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13704OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13705OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13706OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13707OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13708OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13709
13710if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13711 reset_early_fail(common);
13712
13713if (mode == PCRE2_JIT_PARTIAL_SOFT)
13714 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13715if (common->mark_ptr != 0)
13716 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13717if (common->control_head_ptr != 0)
13718 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13719
13720/* Main part of the matching */
13721if ((re->overall_options & PCRE2_ANCHORED) == 0)
13722 {
13723 mainloop_label = mainloop_entry(common);
13724 continue_match_label = LABEL();
13725 /* Forward search if possible. */
13726 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13727 {
13728 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13729 ;
13730 else if ((re->flags & PCRE2_FIRSTSET) != 0)
13731 fast_forward_first_char(common);
13732 else if ((re->flags & PCRE2_STARTLINE) != 0)
13733 fast_forward_newline(common);
13734 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13735 fast_forward_start_bits(common);
13736 }
13737 }
13738else
13739 continue_match_label = LABEL();
13740
13741if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13742 {
13743 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13744 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13745 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13746 }
13747if (common->req_char_ptr != 0)
13748 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13749
13750/* Store the current STR_PTR in OVECTOR(0). */
13751OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13752/* Copy the limit of allowed recursions. */
13753OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13754if (common->capture_last_ptr != 0)
13755 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13756if (common->fast_forward_bc_ptr != NULL)
13757 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13758
13759if (common->start_ptr != OVECTOR(0))
13760 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13761
13762/* Copy the beginning of the string. */
13763if (mode == PCRE2_JIT_PARTIAL_SOFT)
13764 {
13765 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13766 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13767 JUMPHERE(jump);
13768 }
13769else if (mode == PCRE2_JIT_PARTIAL_HARD)
13770 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
13771
13772compile_matchingpath(common, common->start, ccend, &rootbacktrack);
13773if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13774 {
13775 sljit_free_compiler(compiler);
13776 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13777 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13778 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13779 return PCRE2_ERROR_NOMEMORY;
13780 }
13781
13782if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13783 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
13784
13785if (common->might_be_empty)
13786 {
13787 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
13788 empty_match_found_label = LABEL();
13789 }
13790
13791common->accept_label = LABEL();
13792if (common->accept != NULL)
13793 set_jumps(common->accept, common->accept_label);
13794
13795/* This means we have a match. Update the ovector. */
13796copy_ovector(common, re->top_bracket + 1);
13797common->quit_label = common->abort_label = LABEL();
13798if (common->quit != NULL)
13799 set_jumps(common->quit, common->quit_label);
13800if (common->abort != NULL)
13801 set_jumps(common->abort, common->abort_label);
13802if (minlength_check_failed != NULL)
13803 SET_LABEL(minlength_check_failed, common->abort_label);
13804
13805sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
13806sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
13807
13808if (common->failed_match != NULL)
13809 {
13810 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
13811 set_jumps(common->failed_match, LABEL());
13812 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13813 JUMPTO(SLJIT_JUMP, common->abort_label);
13814 }
13815
13816if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
13817 JUMPHERE(end_anchor_failed);
13818
13819if (mode != PCRE2_JIT_COMPLETE)
13820 {
13821 common->partialmatchlabel = LABEL();
13822 set_jumps(common->partialmatch, common->partialmatchlabel);
13823 return_with_partial_match(common, common->quit_label);
13824 }
13825
13826if (common->might_be_empty)
13827 empty_match_backtrack_label = LABEL();
13828compile_backtrackingpath(common, rootbacktrack.top);
13829if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13830 {
13831 sljit_free_compiler(compiler);
13832 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13833 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13834 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13835 return PCRE2_ERROR_NOMEMORY;
13836 }
13837
13838SLJIT_ASSERT(rootbacktrack.prev == NULL);
13839reset_match_label = LABEL();
13840
13841if (mode == PCRE2_JIT_PARTIAL_SOFT)
13842 {
13843 /* Update hit_start only in the first time. */
13844 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
13845 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
13846 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
13847 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
13848 JUMPHERE(jump);
13849 }
13850
13851/* Check we have remaining characters. */
13852if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
13853 {
13854 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
13855 }
13856
13857OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
13858 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
13859
13860if ((re->overall_options & PCRE2_ANCHORED) == 0)
13861 {
13862 if (common->ff_newline_shortcut != NULL)
13863 {
13864 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
13865 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
13866 {
13867 if (common->match_end_ptr != 0)
13868 {
13869 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
13870 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
13871 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
13872 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
13873 }
13874 else
13875 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
13876 }
13877 }
13878 else
13879 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
13880 }
13881
13882/* No more remaining characters. */
13883if (reqcu_not_found != NULL)
13884 set_jumps(reqcu_not_found, LABEL());
13885
13886if (mode == PCRE2_JIT_PARTIAL_SOFT)
13887 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
13888
13889OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13890JUMPTO(SLJIT_JUMP, common->quit_label);
13891
13892flush_stubs(common);
13893
13894if (common->might_be_empty)
13895 {
13896 JUMPHERE(empty_match);
13897 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
13898 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
13899 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
13900 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
13901 OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
13902 JUMPTO(SLJIT_ZERO, empty_match_found_label);
13903 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13904 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
13905 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
13906 }
13907
13908common->fast_forward_bc_ptr = NULL;
13909common->early_fail_start_ptr = 0;
13910common->early_fail_end_ptr = 0;
13911common->currententry = common->entries;
13912common->local_quit_available = TRUE;
13913quit_label = common->quit_label;
13914while (common->currententry != NULL)
13915 {
13916 /* Might add new entries. */
13917 compile_recurse(common);
13918 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13919 {
13920 sljit_free_compiler(compiler);
13921 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13922 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13923 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
13924 return PCRE2_ERROR_NOMEMORY;
13925 }
13926 flush_stubs(common);
13927 common->currententry = common->currententry->next;
13928 }
13929common->local_quit_available = FALSE;
13930common->quit_label = quit_label;
13931
13932/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
13933/* This is a (really) rare case. */
13934set_jumps(common->stackalloc, LABEL());
13935/* RETURN_ADDR is not a saved register. */
13936sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13937
13938SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13939
13940OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
13941OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
13942OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
13943OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
13944OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
13945
13946sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
13947
13948jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
13949OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
13950OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
13951OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
13952OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
13953OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
13954
13955/* Allocation failed. */
13956JUMPHERE(jump);
13957/* We break the return address cache here, but this is a really rare case. */
13958OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
13959JUMPTO(SLJIT_JUMP, common->quit_label);
13960
13961/* Call limit reached. */
13962set_jumps(common->calllimit, LABEL());
13963OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
13964JUMPTO(SLJIT_JUMP, common->quit_label);
13965
13966if (common->revertframes != NULL)
13967 {
13968 set_jumps(common->revertframes, LABEL());
13969 do_revertframes(common);
13970 }
13971if (common->wordboundary != NULL)
13972 {
13973 set_jumps(common->wordboundary, LABEL());
13974 check_wordboundary(common);
13975 }
13976if (common->anynewline != NULL)
13977 {
13978 set_jumps(common->anynewline, LABEL());
13979 check_anynewline(common);
13980 }
13981if (common->hspace != NULL)
13982 {
13983 set_jumps(common->hspace, LABEL());
13984 check_hspace(common);
13985 }
13986if (common->vspace != NULL)
13987 {
13988 set_jumps(common->vspace, LABEL());
13989 check_vspace(common);
13990 }
13991if (common->casefulcmp != NULL)
13992 {
13993 set_jumps(common->casefulcmp, LABEL());
13994 do_casefulcmp(common);
13995 }
13996if (common->caselesscmp != NULL)
13997 {
13998 set_jumps(common->caselesscmp, LABEL());
13999 do_caselesscmp(common);
14000 }
14001if (common->reset_match != NULL)
14002 {
14003 set_jumps(common->reset_match, LABEL());
14004 do_reset_match(common, (re->top_bracket + 1) * 2);
14005 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14006 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14007 JUMPTO(SLJIT_JUMP, reset_match_label);
14008 }
14009#ifdef SUPPORT_UNICODE
14010#if PCRE2_CODE_UNIT_WIDTH == 8
14011if (common->utfreadchar != NULL)
14012 {
14013 set_jumps(common->utfreadchar, LABEL());
14014 do_utfreadchar(common);
14015 }
14016if (common->utfreadtype8 != NULL)
14017 {
14018 set_jumps(common->utfreadtype8, LABEL());
14019 do_utfreadtype8(common);
14020 }
14021if (common->utfpeakcharback != NULL)
14022 {
14023 set_jumps(common->utfpeakcharback, LABEL());
14024 do_utfpeakcharback(common);
14025 }
14026#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14027#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14028if (common->utfreadchar_invalid != NULL)
14029 {
14030 set_jumps(common->utfreadchar_invalid, LABEL());
14031 do_utfreadchar_invalid(common);
14032 }
14033if (common->utfreadnewline_invalid != NULL)
14034 {
14035 set_jumps(common->utfreadnewline_invalid, LABEL());
14036 do_utfreadnewline_invalid(common);
14037 }
14038if (common->utfmoveback_invalid)
14039 {
14040 set_jumps(common->utfmoveback_invalid, LABEL());
14041 do_utfmoveback_invalid(common);
14042 }
14043if (common->utfpeakcharback_invalid)
14044 {
14045 set_jumps(common->utfpeakcharback_invalid, LABEL());
14046 do_utfpeakcharback_invalid(common);
14047 }
14048#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14049if (common->getucd != NULL)
14050 {
14051 set_jumps(common->getucd, LABEL());
14052 do_getucd(common);
14053 }
14054if (common->getucdtype != NULL)
14055 {
14056 set_jumps(common->getucdtype, LABEL());
14057 do_getucdtype(common);
14058 }
14059#endif /* SUPPORT_UNICODE */
14060
14061SLJIT_FREE(common->optimized_cbracket, allocator_data);
14062SLJIT_FREE(common->private_data_ptrs, allocator_data);
14063
14064executable_func = sljit_generate_code(compiler);
14065executable_size = sljit_get_generated_code_size(compiler);
14066sljit_free_compiler(compiler);
14067
14068if (executable_func == NULL)
14069 {
14070 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14071 return PCRE2_ERROR_NOMEMORY;
14072 }
14073
14074/* Reuse the function descriptor if possible. */
14075if (re->executable_jit != NULL)
14076 functions = (executable_functions *)re->executable_jit;
14077else
14078 {
14079 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14080 if (functions == NULL)
14081 {
14082 /* This case is highly unlikely since we just recently
14083 freed a lot of memory. Not impossible though. */
14084 sljit_free_code(executable_func, NULL);
14085 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14086 return PCRE2_ERROR_NOMEMORY;
14087 }
14088 memset(functions, 0, sizeof(executable_functions));
14089 functions->top_bracket = re->top_bracket + 1;
14090 functions->limit_match = re->limit_match;
14091 re->executable_jit = functions;
14092 }
14093
14094/* Turn mode into an index. */
14095if (mode == PCRE2_JIT_COMPLETE)
14096 mode = 0;
14097else
14098 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14099
14100SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14101functions->executable_funcs[mode] = executable_func;
14102functions->read_only_data_heads[mode] = common->read_only_data_head;
14103functions->executable_sizes[mode] = executable_size;
14104return 0;
14105}
14106
14107#endif
14108
14109/*************************************************
14110* JIT compile a Regular Expression *
14111*************************************************/
14112
14113/* This function used JIT to convert a previously-compiled pattern into machine
14114code.
14115
14116Arguments:
14117 code a compiled pattern
14118 options JIT option bits
14119
14120Returns: 0: success or (*NOJIT) was used
14121 <0: an error code
14122*/
14123
14124#define PUBLIC_JIT_COMPILE_OPTIONS \
14125 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14126
14127PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14128pcre2_jit_compile(pcre2_code *code, uint32_t options)
14129{
14130pcre2_real_code *re = (pcre2_real_code *)code;
14131
14132if (code == NULL)
14133 return PCRE2_ERROR_NULL;
14134
14135if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14136 return PCRE2_ERROR_JIT_BADOPTION;
14137
14138/* Support for invalid UTF was first introduced in JIT, with the option
14139PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14140compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14141preferred feature, with the earlier option deprecated. However, for backward
14142compatibility, if the earlier option is set, it forces the new option so that
14143if JIT matching falls back to the interpreter, there is still support for
14144invalid UTF. However, if this function has already been successfully called
14145without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14146non-invalid-supporting JIT code was compiled), give an error.
14147
14148If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14149actions are needed:
14150
14151 1. Remove the definition from pcre2.h.in and from the list in
14152 PUBLIC_JIT_COMPILE_OPTIONS above.
14153
14154 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14155
14156 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14157
14158 4. Delete the following short block of code. The setting of "re" and
14159 "functions" can be moved into the JIT-only block below, but if that is
14160 done, (void)re and (void)functions will be needed in the non-JIT case, to
14161 avoid compiler warnings.
14162*/
14163
14164#ifdef SUPPORT_JIT
14165executable_functions *functions = (executable_functions *)re->executable_jit;
14166static int executable_allocator_is_working = 0;
14167#endif
14168
14169if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14170 {
14171 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14172 {
14173#ifdef SUPPORT_JIT
14174 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14175#endif
14176 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14177 }
14178 }
14179
14180/* The above tests are run with and without JIT support. This means that
14181PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14182interpreter support) even in the absence of JIT. But now, if there is no JIT
14183support, give an error return. */
14184
14185#ifndef SUPPORT_JIT
14186return PCRE2_ERROR_JIT_BADOPTION;
14187#else /* SUPPORT_JIT */
14188
14189/* There is JIT support. Do the necessary. */
14190
14191if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14192
14193if (executable_allocator_is_working == 0)
14194 {
14195 /* Checks whether the executable allocator is working. This check
14196 might run multiple times in multi-threaded environments, but the
14197 result should not be affected by it. */
14198 void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14199
14200 executable_allocator_is_working = -1;
14201
14202 if (ptr != NULL)
14203 {
14204 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14205 executable_allocator_is_working = 1;
14206 }
14207 }
14208
14209if (executable_allocator_is_working < 0)
14210 return PCRE2_ERROR_NOMEMORY;
14211
14212if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14213 options |= PCRE2_JIT_INVALID_UTF;
14214
14215if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14216 || functions->executable_funcs[0] == NULL)) {
14217 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14218 int result = jit_compile(code, options & ~excluded_options);
14219 if (result != 0)
14220 return result;
14221 }
14222
14223if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14224 || functions->executable_funcs[1] == NULL)) {
14225 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14226 int result = jit_compile(code, options & ~excluded_options);
14227 if (result != 0)
14228 return result;
14229 }
14230
14231if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14232 || functions->executable_funcs[2] == NULL)) {
14233 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14234 int result = jit_compile(code, options & ~excluded_options);
14235 if (result != 0)
14236 return result;
14237 }
14238
14239return 0;
14240
14241#endif /* SUPPORT_JIT */
14242}
14243
14244/* JIT compiler uses an all-in-one approach. This improves security,
14245 since the code generator functions are not exported. */
14246
14247#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14248
14249#include "pcre2_jit_match.c"
14250#include "pcre2_jit_misc.c"
14251
14252/* End of pcre2_jit_compile.c */