blob: d726c3ca0499fabbf0258e9923f69f5801572a48 [file] [log] [blame]
Elliott Hughes5b808042021-10-01 10:56:10 -07001/*************************************************
2* Perl-Compatible Regular Expressions *
3*************************************************/
4
5/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 This module by Zoltan Herczeg
10 Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070011 New API code Copyright (c) 2016-2021 University of Cambridge
Elliott Hughes5b808042021-10-01 10:56:10 -070012
13-----------------------------------------------------------------------------
14Redistribution and use in source and binary forms, with or without
15modification, are permitted provided that the following conditions are met:
16
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
19
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
23
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
27
28THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38POSSIBILITY OF SUCH DAMAGE.
39-----------------------------------------------------------------------------
40*/
41
42#ifdef HAVE_CONFIG_H
43#include "config.h"
44#endif
45
46#include "pcre2_internal.h"
47
48#ifdef SUPPORT_JIT
49
50/* All-in-one: Since we use the JIT compiler only from here,
51we just include it. This way we don't need to touch the build
52system files. */
53
54#define SLJIT_CONFIG_AUTO 1
55#define SLJIT_CONFIG_STATIC 1
56#define SLJIT_VERBOSE 0
57
58#ifdef PCRE2_DEBUG
59#define SLJIT_DEBUG 1
60#else
61#define SLJIT_DEBUG 0
62#endif
63
64#define SLJIT_MALLOC(size, allocator_data) pcre2_jit_malloc(size, allocator_data)
65#define SLJIT_FREE(ptr, allocator_data) pcre2_jit_free(ptr, allocator_data)
66
67static void * pcre2_jit_malloc(size_t size, void *allocator_data)
68{
69pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
70return allocator->malloc(size, allocator->memory_data);
71}
72
73static void pcre2_jit_free(void *ptr, void *allocator_data)
74{
75pcre2_memctl *allocator = ((pcre2_memctl*)allocator_data);
76allocator->free(ptr, allocator->memory_data);
77}
78
79#include "sljit/sljitLir.c"
80
81#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED
82#error Unsupported architecture
83#endif
84
85/* Defines for debugging purposes. */
86
87/* 1 - Use unoptimized capturing brackets.
88 2 - Enable capture_last_ptr (includes option 1). */
89/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */
90
91/* 1 - Always have a control head. */
92/* #define DEBUG_FORCE_CONTROL_HEAD 1 */
93
94/* Allocate memory for the regex stack on the real machine stack.
95Fast, but limited size. */
96#define MACHINE_STACK_SIZE 32768
97
98/* Growth rate for stack allocated by the OS. Should be the multiply
99of page size. */
100#define STACK_GROWTH_RATE 8192
101
102/* Enable to check that the allocation could destroy temporaries. */
103#if defined SLJIT_DEBUG && SLJIT_DEBUG
104#define DESTROY_REGISTERS 1
105#endif
106
107/*
108Short summary about the backtracking mechanism empolyed by the jit code generator:
109
110The code generator follows the recursive nature of the PERL compatible regular
111expressions. The basic blocks of regular expressions are condition checkers
112whose execute different commands depending on the result of the condition check.
113The relationship between the operators can be horizontal (concatenation) and
114vertical (sub-expression) (See struct backtrack_common for more details).
115
116 'ab' - 'a' and 'b' regexps are concatenated
117 'a+' - 'a' is the sub-expression of the '+' operator
118
119The condition checkers are boolean (true/false) checkers. Machine code is generated
120for the checker itself and for the actions depending on the result of the checker.
121The 'true' case is called as the matching path (expected path), and the other is called as
122the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken
123branches on the matching path.
124
125 Greedy star operator (*) :
126 Matching path: match happens.
127 Backtrack path: match failed.
128 Non-greedy star operator (*?) :
129 Matching path: no need to perform a match.
130 Backtrack path: match is required.
131
132The following example shows how the code generated for a capturing bracket
133with two alternatives. Let A, B, C, D are arbirary regular expressions, and
134we have the following regular expression:
135
136 A(B|C)D
137
138The generated code will be the following:
139
140 A matching path
141 '(' matching path (pushing arguments to the stack)
142 B matching path
143 ')' matching path (pushing arguments to the stack)
144 D matching path
145 return with successful match
146
147 D backtrack path
148 ')' backtrack path (If we arrived from "C" jump to the backtrack of "C")
149 B backtrack path
150 C expected path
151 jump to D matching path
152 C backtrack path
153 A backtrack path
154
155 Notice, that the order of backtrack code paths are the opposite of the fast
156 code paths. In this way the topmost value on the stack is always belong
157 to the current backtrack code path. The backtrack path must check
158 whether there is a next alternative. If so, it needs to jump back to
159 the matching path eventually. Otherwise it needs to clear out its own stack
160 frame and continue the execution on the backtrack code paths.
161*/
162
163/*
164Saved stack frames:
165
166Atomic blocks and asserts require reloading the values of private data
167when the backtrack mechanism performed. Because of OP_RECURSE, the data
168are not necessarly known in compile time, thus we need a dynamic restore
169mechanism.
170
171The stack frames are stored in a chain list, and have the following format:
172([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ]
173
174Thus we can restore the private data to a particular point in the stack.
175*/
176
177typedef struct jit_arguments {
178 /* Pointers first. */
179 struct sljit_stack *stack;
180 PCRE2_SPTR str;
181 PCRE2_SPTR begin;
182 PCRE2_SPTR end;
183 pcre2_match_data *match_data;
184 PCRE2_SPTR startchar_ptr;
185 PCRE2_UCHAR *mark_ptr;
186 int (*callout)(pcre2_callout_block *, void *);
187 void *callout_data;
188 /* Everything else after. */
189 sljit_uw offset_limit;
190 sljit_u32 limit_match;
191 sljit_u32 oveccount;
192 sljit_u32 options;
193} jit_arguments;
194
195#define JIT_NUMBER_OF_COMPILE_MODES 3
196
197typedef struct executable_functions {
198 void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES];
199 void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES];
200 sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES];
201 sljit_u32 top_bracket;
202 sljit_u32 limit_match;
203} executable_functions;
204
205typedef struct jump_list {
206 struct sljit_jump *jump;
207 struct jump_list *next;
208} jump_list;
209
210typedef struct stub_list {
211 struct sljit_jump *start;
212 struct sljit_label *quit;
213 struct stub_list *next;
214} stub_list;
215
216enum frame_types {
217 no_frame = -1,
218 no_stack = -2
219};
220
221enum control_types {
222 type_mark = 0,
223 type_then_trap = 1
224};
225
226enum early_fail_types {
227 type_skip = 0,
228 type_fail = 1,
229 type_fail_range = 2
230};
231
232typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args);
233
234/* The following structure is the key data type for the recursive
235code generator. It is allocated by compile_matchingpath, and contains
236the arguments for compile_backtrackingpath. Must be the first member
237of its descendants. */
238typedef struct backtrack_common {
239 /* Concatenation stack. */
240 struct backtrack_common *prev;
241 jump_list *nextbacktracks;
242 /* Internal stack (for component operators). */
243 struct backtrack_common *top;
244 jump_list *topbacktracks;
245 /* Opcode pointer. */
246 PCRE2_SPTR cc;
247} backtrack_common;
248
249typedef struct assert_backtrack {
250 backtrack_common common;
251 jump_list *condfailed;
252 /* Less than 0 if a frame is not needed. */
253 int framesize;
254 /* Points to our private memory word on the stack. */
255 int private_data_ptr;
256 /* For iterators. */
257 struct sljit_label *matchingpath;
258} assert_backtrack;
259
260typedef struct bracket_backtrack {
261 backtrack_common common;
262 /* Where to coninue if an alternative is successfully matched. */
263 struct sljit_label *alternative_matchingpath;
264 /* For rmin and rmax iterators. */
265 struct sljit_label *recursive_matchingpath;
266 /* For greedy ? operator. */
267 struct sljit_label *zero_matchingpath;
268 /* Contains the branches of a failed condition. */
269 union {
270 /* Both for OP_COND, OP_SCOND. */
271 jump_list *condfailed;
272 assert_backtrack *assert;
273 /* For OP_ONCE. Less than 0 if not needed. */
274 int framesize;
275 /* For brackets with >3 alternatives. */
276 struct sljit_put_label *matching_put_label;
277 } u;
278 /* Points to our private memory word on the stack. */
279 int private_data_ptr;
280} bracket_backtrack;
281
282typedef struct bracketpos_backtrack {
283 backtrack_common common;
284 /* Points to our private memory word on the stack. */
285 int private_data_ptr;
286 /* Reverting stack is needed. */
287 int framesize;
288 /* Allocated stack size. */
289 int stacksize;
290} bracketpos_backtrack;
291
292typedef struct braminzero_backtrack {
293 backtrack_common common;
294 struct sljit_label *matchingpath;
295} braminzero_backtrack;
296
297typedef struct char_iterator_backtrack {
298 backtrack_common common;
299 /* Next iteration. */
300 struct sljit_label *matchingpath;
301 union {
302 jump_list *backtracks;
303 struct {
304 unsigned int othercasebit;
305 PCRE2_UCHAR chr;
306 BOOL enabled;
307 } charpos;
308 } u;
309} char_iterator_backtrack;
310
311typedef struct ref_iterator_backtrack {
312 backtrack_common common;
313 /* Next iteration. */
314 struct sljit_label *matchingpath;
315} ref_iterator_backtrack;
316
317typedef struct recurse_entry {
318 struct recurse_entry *next;
319 /* Contains the function entry label. */
320 struct sljit_label *entry_label;
321 /* Contains the function entry label. */
322 struct sljit_label *backtrack_label;
323 /* Collects the entry calls until the function is not created. */
324 jump_list *entry_calls;
325 /* Collects the backtrack calls until the function is not created. */
326 jump_list *backtrack_calls;
327 /* Points to the starting opcode. */
328 sljit_sw start;
329} recurse_entry;
330
331typedef struct recurse_backtrack {
332 backtrack_common common;
333 /* Return to the matching path. */
334 struct sljit_label *matchingpath;
335 /* Recursive pattern. */
336 recurse_entry *entry;
337 /* Pattern is inlined. */
338 BOOL inlined_pattern;
339} recurse_backtrack;
340
341#define OP_THEN_TRAP OP_TABLE_LENGTH
342
343typedef struct then_trap_backtrack {
344 backtrack_common common;
345 /* If then_trap is not NULL, this structure contains the real
346 then_trap for the backtracking path. */
347 struct then_trap_backtrack *then_trap;
348 /* Points to the starting opcode. */
349 sljit_sw start;
350 /* Exit point for the then opcodes of this alternative. */
351 jump_list *quit;
352 /* Frame size of the current alternative. */
353 int framesize;
354} then_trap_backtrack;
355
356#define MAX_N_CHARS 12
357#define MAX_DIFF_CHARS 5
358
359typedef struct fast_forward_char_data {
360 /* Number of characters in the chars array, 255 for any character. */
361 sljit_u8 count;
362 /* Number of last UTF-8 characters in the chars array. */
363 sljit_u8 last_count;
364 /* Available characters in the current position. */
365 PCRE2_UCHAR chars[MAX_DIFF_CHARS];
366} fast_forward_char_data;
367
368#define MAX_CLASS_RANGE_SIZE 4
369#define MAX_CLASS_CHARS_SIZE 3
370
371typedef struct compiler_common {
372 /* The sljit ceneric compiler. */
373 struct sljit_compiler *compiler;
374 /* Compiled regular expression. */
375 pcre2_real_code *re;
376 /* First byte code. */
377 PCRE2_SPTR start;
378 /* Maps private data offset to each opcode. */
379 sljit_s32 *private_data_ptrs;
380 /* Chain list of read-only data ptrs. */
381 void *read_only_data_head;
382 /* Tells whether the capturing bracket is optimized. */
383 sljit_u8 *optimized_cbracket;
384 /* Tells whether the starting offset is a target of then. */
385 sljit_u8 *then_offsets;
386 /* Current position where a THEN must jump. */
387 then_trap_backtrack *then_trap;
388 /* Starting offset of private data for capturing brackets. */
389 sljit_s32 cbra_ptr;
390 /* Output vector starting point. Must be divisible by 2. */
391 sljit_s32 ovector_start;
392 /* Points to the starting character of the current match. */
393 sljit_s32 start_ptr;
394 /* Last known position of the requested byte. */
395 sljit_s32 req_char_ptr;
396 /* Head of the last recursion. */
397 sljit_s32 recursive_head_ptr;
398 /* First inspected character for partial matching.
399 (Needed for avoiding zero length partial matches.) */
400 sljit_s32 start_used_ptr;
401 /* Starting pointer for partial soft matches. */
402 sljit_s32 hit_start;
403 /* Pointer of the match end position. */
404 sljit_s32 match_end_ptr;
405 /* Points to the marked string. */
406 sljit_s32 mark_ptr;
407 /* Recursive control verb management chain. */
408 sljit_s32 control_head_ptr;
409 /* Points to the last matched capture block index. */
410 sljit_s32 capture_last_ptr;
411 /* Fast forward skipping byte code pointer. */
412 PCRE2_SPTR fast_forward_bc_ptr;
413 /* Locals used by fast fail optimization. */
414 sljit_s32 early_fail_start_ptr;
415 sljit_s32 early_fail_end_ptr;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700416 /* Variables used by recursive call generator. */
417 sljit_s32 recurse_bitset_size;
418 uint8_t *recurse_bitset;
Elliott Hughes5b808042021-10-01 10:56:10 -0700419
420 /* Flipped and lower case tables. */
421 const sljit_u8 *fcc;
422 sljit_sw lcc;
423 /* Mode can be PCRE2_JIT_COMPLETE and others. */
424 int mode;
425 /* TRUE, when empty match is accepted for partial matching. */
426 BOOL allow_empty_partial;
427 /* TRUE, when minlength is greater than 0. */
428 BOOL might_be_empty;
429 /* \K is found in the pattern. */
430 BOOL has_set_som;
431 /* (*SKIP:arg) is found in the pattern. */
432 BOOL has_skip_arg;
433 /* (*THEN) is found in the pattern. */
434 BOOL has_then;
435 /* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */
436 BOOL has_skip_in_assert_back;
437 /* Quit is redirected by recurse, negative assertion, or positive assertion in conditional block. */
438 BOOL local_quit_available;
439 /* Currently in a positive assertion. */
440 BOOL in_positive_assertion;
441 /* Newline control. */
442 int nltype;
443 sljit_u32 nlmax;
444 sljit_u32 nlmin;
445 int newline;
446 int bsr_nltype;
447 sljit_u32 bsr_nlmax;
448 sljit_u32 bsr_nlmin;
449 /* Dollar endonly. */
450 int endonly;
451 /* Tables. */
452 sljit_sw ctypes;
453 /* Named capturing brackets. */
454 PCRE2_SPTR name_table;
455 sljit_sw name_count;
456 sljit_sw name_entry_size;
457
458 /* Labels and jump lists. */
459 struct sljit_label *partialmatchlabel;
460 struct sljit_label *quit_label;
461 struct sljit_label *abort_label;
462 struct sljit_label *accept_label;
463 struct sljit_label *ff_newline_shortcut;
464 stub_list *stubs;
465 recurse_entry *entries;
466 recurse_entry *currententry;
467 jump_list *partialmatch;
468 jump_list *quit;
469 jump_list *positive_assertion_quit;
470 jump_list *abort;
471 jump_list *failed_match;
472 jump_list *accept;
473 jump_list *calllimit;
474 jump_list *stackalloc;
475 jump_list *revertframes;
476 jump_list *wordboundary;
477 jump_list *anynewline;
478 jump_list *hspace;
479 jump_list *vspace;
480 jump_list *casefulcmp;
481 jump_list *caselesscmp;
482 jump_list *reset_match;
483 BOOL unset_backref;
484 BOOL alt_circumflex;
485#ifdef SUPPORT_UNICODE
486 BOOL utf;
487 BOOL invalid_utf;
488 BOOL ucp;
489 /* Points to saving area for iref. */
490 sljit_s32 iref_ptr;
491 jump_list *getucd;
492 jump_list *getucdtype;
493#if PCRE2_CODE_UNIT_WIDTH == 8
494 jump_list *utfreadchar;
495 jump_list *utfreadtype8;
496 jump_list *utfpeakcharback;
497#endif
498#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
499 jump_list *utfreadchar_invalid;
500 jump_list *utfreadnewline_invalid;
501 jump_list *utfmoveback_invalid;
502 jump_list *utfpeakcharback_invalid;
503#endif
504#endif /* SUPPORT_UNICODE */
505} compiler_common;
506
507/* For byte_sequence_compare. */
508
509typedef struct compare_context {
510 int length;
511 int sourcereg;
512#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
513 int ucharptr;
514 union {
515 sljit_s32 asint;
516 sljit_u16 asushort;
517#if PCRE2_CODE_UNIT_WIDTH == 8
518 sljit_u8 asbyte;
519 sljit_u8 asuchars[4];
520#elif PCRE2_CODE_UNIT_WIDTH == 16
521 sljit_u16 asuchars[2];
522#elif PCRE2_CODE_UNIT_WIDTH == 32
523 sljit_u32 asuchars[1];
524#endif
525 } c;
526 union {
527 sljit_s32 asint;
528 sljit_u16 asushort;
529#if PCRE2_CODE_UNIT_WIDTH == 8
530 sljit_u8 asbyte;
531 sljit_u8 asuchars[4];
532#elif PCRE2_CODE_UNIT_WIDTH == 16
533 sljit_u16 asuchars[2];
534#elif PCRE2_CODE_UNIT_WIDTH == 32
535 sljit_u32 asuchars[1];
536#endif
537 } oc;
538#endif
539} compare_context;
540
541/* Undefine sljit macros. */
542#undef CMP
543
544/* Used for accessing the elements of the stack. */
545#define STACK(i) ((i) * (int)sizeof(sljit_sw))
546
547#ifdef SLJIT_PREF_SHIFT_REG
548#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
549/* Nothing. */
550#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3
551#define SHIFT_REG_IS_R3
552#else
553#error "Unsupported shift register"
554#endif
555#endif
556
557#define TMP1 SLJIT_R0
558#ifdef SHIFT_REG_IS_R3
559#define TMP2 SLJIT_R3
560#define TMP3 SLJIT_R2
561#else
562#define TMP2 SLJIT_R2
563#define TMP3 SLJIT_R3
564#endif
565#define STR_PTR SLJIT_R1
566#define STR_END SLJIT_S0
567#define STACK_TOP SLJIT_S1
568#define STACK_LIMIT SLJIT_S2
569#define COUNT_MATCH SLJIT_S3
570#define ARGUMENTS SLJIT_S4
571#define RETURN_ADDR SLJIT_R4
572
573#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
574#define HAS_VIRTUAL_REGISTERS 1
575#else
576#define HAS_VIRTUAL_REGISTERS 0
577#endif
578
579/* Local space layout. */
580/* These two locals can be used by the current opcode. */
581#define LOCALS0 (0 * sizeof(sljit_sw))
582#define LOCALS1 (1 * sizeof(sljit_sw))
583/* Two local variables for possessive quantifiers (char1 cannot use them). */
584#define POSSESSIVE0 (2 * sizeof(sljit_sw))
585#define POSSESSIVE1 (3 * sizeof(sljit_sw))
586/* Max limit of recursions. */
587#define LIMIT_MATCH (4 * sizeof(sljit_sw))
588/* The output vector is stored on the stack, and contains pointers
589to characters. The vector data is divided into two groups: the first
590group contains the start / end character pointers, and the second is
591the start pointers when the end of the capturing group has not yet reached. */
592#define OVECTOR_START (common->ovector_start)
593#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
594#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
595#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
596
597#if PCRE2_CODE_UNIT_WIDTH == 8
598#define MOV_UCHAR SLJIT_MOV_U8
599#define IN_UCHARS(x) (x)
600#elif PCRE2_CODE_UNIT_WIDTH == 16
601#define MOV_UCHAR SLJIT_MOV_U16
602#define UCHAR_SHIFT (1)
603#define IN_UCHARS(x) ((x) * 2)
604#elif PCRE2_CODE_UNIT_WIDTH == 32
605#define MOV_UCHAR SLJIT_MOV_U32
606#define UCHAR_SHIFT (2)
607#define IN_UCHARS(x) ((x) * 4)
608#else
609#error Unsupported compiling mode
610#endif
611
612/* Shortcuts. */
613#define DEFINE_COMPILER \
614 struct sljit_compiler *compiler = common->compiler
615#define OP1(op, dst, dstw, src, srcw) \
616 sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
617#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
618 sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700619#define OP2U(op, src1, src1w, src2, src2w) \
620 sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
Elliott Hughes5b808042021-10-01 10:56:10 -0700621#define OP_SRC(op, src, srcw) \
622 sljit_emit_op_src(compiler, (op), (src), (srcw))
623#define LABEL() \
624 sljit_emit_label(compiler)
625#define JUMP(type) \
626 sljit_emit_jump(compiler, (type))
627#define JUMPTO(type, label) \
628 sljit_set_label(sljit_emit_jump(compiler, (type)), (label))
629#define JUMPHERE(jump) \
630 sljit_set_label((jump), sljit_emit_label(compiler))
631#define SET_LABEL(jump, label) \
632 sljit_set_label((jump), (label))
633#define CMP(type, src1, src1w, src2, src2w) \
634 sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w))
635#define CMPTO(type, src1, src1w, src2, src2w, label) \
636 sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
637#define OP_FLAGS(op, dst, dstw, type) \
638 sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
639#define CMOV(type, dst_reg, src, srcw) \
640 sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
641#define GET_LOCAL_BASE(dst, dstw, offset) \
642 sljit_get_local_base(compiler, (dst), (dstw), (offset))
643
644#define READ_CHAR_MAX 0x7fffffff
645
646#define INVALID_UTF_CHAR -1
647#define UNASSIGNED_UTF_CHAR 888
648
649#if defined SUPPORT_UNICODE
650#if PCRE2_CODE_UNIT_WIDTH == 8
651
652#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
653 { \
654 if (ptr[0] <= 0x7f) \
655 c = *ptr++; \
656 else if (ptr + 1 < end && ptr[1] >= 0x80 && ptr[1] < 0xc0) \
657 { \
658 c = ptr[1] - 0x80; \
659 \
660 if (ptr[0] >= 0xc2 && ptr[0] <= 0xdf) \
661 { \
662 c |= (ptr[0] - 0xc0) << 6; \
663 ptr += 2; \
664 } \
665 else if (ptr + 2 < end && ptr[2] >= 0x80 && ptr[2] < 0xc0) \
666 { \
667 c = c << 6 | (ptr[2] - 0x80); \
668 \
669 if (ptr[0] >= 0xe0 && ptr[0] <= 0xef) \
670 { \
671 c |= (ptr[0] - 0xe0) << 12; \
672 ptr += 3; \
673 \
674 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
675 { \
676 invalid_action; \
677 } \
678 } \
679 else if (ptr + 3 < end && ptr[3] >= 0x80 && ptr[3] < 0xc0) \
680 { \
681 c = c << 6 | (ptr[3] - 0x80); \
682 \
683 if (ptr[0] >= 0xf0 && ptr[0] <= 0xf4) \
684 { \
685 c |= (ptr[0] - 0xf0) << 18; \
686 ptr += 4; \
687 \
688 if (c >= 0x110000 || c < 0x10000) \
689 { \
690 invalid_action; \
691 } \
692 } \
693 else \
694 { \
695 invalid_action; \
696 } \
697 } \
698 else \
699 { \
700 invalid_action; \
701 } \
702 } \
703 else \
704 { \
705 invalid_action; \
706 } \
707 } \
708 else \
709 { \
710 invalid_action; \
711 } \
712 }
713
714#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
715 { \
716 c = ptr[-1]; \
717 if (c <= 0x7f) \
718 ptr--; \
719 else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \
720 { \
721 c -= 0x80; \
722 \
723 if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \
724 { \
725 c |= (ptr[-2] - 0xc0) << 6; \
726 ptr -= 2; \
727 } \
728 else if (ptr - 2 > start && ptr[-2] >= 0x80 && ptr[-2] < 0xc0) \
729 { \
730 c = c << 6 | (ptr[-2] - 0x80); \
731 \
732 if (ptr[-3] >= 0xe0 && ptr[-3] <= 0xef) \
733 { \
734 c |= (ptr[-3] - 0xe0) << 12; \
735 ptr -= 3; \
736 \
737 if (c < 0x800 || (c >= 0xd800 && c < 0xe000)) \
738 { \
739 invalid_action; \
740 } \
741 } \
742 else if (ptr - 3 > start && ptr[-3] >= 0x80 && ptr[-3] < 0xc0) \
743 { \
744 c = c << 6 | (ptr[-3] - 0x80); \
745 \
746 if (ptr[-4] >= 0xf0 && ptr[-4] <= 0xf4) \
747 { \
748 c |= (ptr[-4] - 0xf0) << 18; \
749 ptr -= 4; \
750 \
751 if (c >= 0x110000 || c < 0x10000) \
752 { \
753 invalid_action; \
754 } \
755 } \
756 else \
757 { \
758 invalid_action; \
759 } \
760 } \
761 else \
762 { \
763 invalid_action; \
764 } \
765 } \
766 else \
767 { \
768 invalid_action; \
769 } \
770 } \
771 else \
772 { \
773 invalid_action; \
774 } \
775 }
776
777#elif PCRE2_CODE_UNIT_WIDTH == 16
778
779#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
780 { \
781 if (ptr[0] < 0xd800 || ptr[0] >= 0xe000) \
782 c = *ptr++; \
783 else if (ptr[0] < 0xdc00 && ptr + 1 < end && ptr[1] >= 0xdc00 && ptr[1] < 0xe000) \
784 { \
785 c = (((ptr[0] - 0xd800) << 10) | (ptr[1] - 0xdc00)) + 0x10000; \
786 ptr += 2; \
787 } \
788 else \
789 { \
790 invalid_action; \
791 } \
792 }
793
794#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
795 { \
796 c = ptr[-1]; \
797 if (c < 0xd800 || c >= 0xe000) \
798 ptr--; \
799 else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \
800 { \
801 c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \
802 ptr -= 2; \
803 } \
804 else \
805 { \
806 invalid_action; \
807 } \
808 }
809
810
811#elif PCRE2_CODE_UNIT_WIDTH == 32
812
813#define GETCHARINC_INVALID(c, ptr, end, invalid_action) \
814 { \
815 if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \
816 c = *ptr++; \
817 else \
818 { \
819 invalid_action; \
820 } \
821 }
822
823#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \
824 { \
825 c = ptr[-1]; \
826 if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \
827 ptr--; \
828 else \
829 { \
830 invalid_action; \
831 } \
832 }
833
834#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
835#endif /* SUPPORT_UNICODE */
836
837static PCRE2_SPTR bracketend(PCRE2_SPTR cc)
838{
839SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
840do cc += GET(cc, 1); while (*cc == OP_ALT);
841SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
842cc += 1 + LINK_SIZE;
843return cc;
844}
845
846static int no_alternatives(PCRE2_SPTR cc)
847{
848int count = 0;
849SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND));
850do
851 {
852 cc += GET(cc, 1);
853 count++;
854 }
855while (*cc == OP_ALT);
856SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
857return count;
858}
859
860/* Functions whose might need modification for all new supported opcodes:
861 next_opcode
862 check_opcode_types
863 set_private_data_ptrs
864 get_framesize
865 init_frame
866 get_recurse_data_length
867 copy_recurse_data
868 compile_matchingpath
869 compile_backtrackingpath
870*/
871
872static PCRE2_SPTR next_opcode(compiler_common *common, PCRE2_SPTR cc)
873{
874SLJIT_UNUSED_ARG(common);
875switch(*cc)
876 {
877 case OP_SOD:
878 case OP_SOM:
879 case OP_SET_SOM:
880 case OP_NOT_WORD_BOUNDARY:
881 case OP_WORD_BOUNDARY:
882 case OP_NOT_DIGIT:
883 case OP_DIGIT:
884 case OP_NOT_WHITESPACE:
885 case OP_WHITESPACE:
886 case OP_NOT_WORDCHAR:
887 case OP_WORDCHAR:
888 case OP_ANY:
889 case OP_ALLANY:
890 case OP_NOTPROP:
891 case OP_PROP:
892 case OP_ANYNL:
893 case OP_NOT_HSPACE:
894 case OP_HSPACE:
895 case OP_NOT_VSPACE:
896 case OP_VSPACE:
897 case OP_EXTUNI:
898 case OP_EODN:
899 case OP_EOD:
900 case OP_CIRC:
901 case OP_CIRCM:
902 case OP_DOLL:
903 case OP_DOLLM:
904 case OP_CRSTAR:
905 case OP_CRMINSTAR:
906 case OP_CRPLUS:
907 case OP_CRMINPLUS:
908 case OP_CRQUERY:
909 case OP_CRMINQUERY:
910 case OP_CRRANGE:
911 case OP_CRMINRANGE:
912 case OP_CRPOSSTAR:
913 case OP_CRPOSPLUS:
914 case OP_CRPOSQUERY:
915 case OP_CRPOSRANGE:
916 case OP_CLASS:
917 case OP_NCLASS:
918 case OP_REF:
919 case OP_REFI:
920 case OP_DNREF:
921 case OP_DNREFI:
922 case OP_RECURSE:
923 case OP_CALLOUT:
924 case OP_ALT:
925 case OP_KET:
926 case OP_KETRMAX:
927 case OP_KETRMIN:
928 case OP_KETRPOS:
929 case OP_REVERSE:
930 case OP_ASSERT:
931 case OP_ASSERT_NOT:
932 case OP_ASSERTBACK:
933 case OP_ASSERTBACK_NOT:
934 case OP_ASSERT_NA:
935 case OP_ASSERTBACK_NA:
936 case OP_ONCE:
937 case OP_SCRIPT_RUN:
938 case OP_BRA:
939 case OP_BRAPOS:
940 case OP_CBRA:
941 case OP_CBRAPOS:
942 case OP_COND:
943 case OP_SBRA:
944 case OP_SBRAPOS:
945 case OP_SCBRA:
946 case OP_SCBRAPOS:
947 case OP_SCOND:
948 case OP_CREF:
949 case OP_DNCREF:
950 case OP_RREF:
951 case OP_DNRREF:
952 case OP_FALSE:
953 case OP_TRUE:
954 case OP_BRAZERO:
955 case OP_BRAMINZERO:
956 case OP_BRAPOSZERO:
957 case OP_PRUNE:
958 case OP_SKIP:
959 case OP_THEN:
960 case OP_COMMIT:
961 case OP_FAIL:
962 case OP_ACCEPT:
963 case OP_ASSERT_ACCEPT:
964 case OP_CLOSE:
965 case OP_SKIPZERO:
966 return cc + PRIV(OP_lengths)[*cc];
967
968 case OP_CHAR:
969 case OP_CHARI:
970 case OP_NOT:
971 case OP_NOTI:
972 case OP_STAR:
973 case OP_MINSTAR:
974 case OP_PLUS:
975 case OP_MINPLUS:
976 case OP_QUERY:
977 case OP_MINQUERY:
978 case OP_UPTO:
979 case OP_MINUPTO:
980 case OP_EXACT:
981 case OP_POSSTAR:
982 case OP_POSPLUS:
983 case OP_POSQUERY:
984 case OP_POSUPTO:
985 case OP_STARI:
986 case OP_MINSTARI:
987 case OP_PLUSI:
988 case OP_MINPLUSI:
989 case OP_QUERYI:
990 case OP_MINQUERYI:
991 case OP_UPTOI:
992 case OP_MINUPTOI:
993 case OP_EXACTI:
994 case OP_POSSTARI:
995 case OP_POSPLUSI:
996 case OP_POSQUERYI:
997 case OP_POSUPTOI:
998 case OP_NOTSTAR:
999 case OP_NOTMINSTAR:
1000 case OP_NOTPLUS:
1001 case OP_NOTMINPLUS:
1002 case OP_NOTQUERY:
1003 case OP_NOTMINQUERY:
1004 case OP_NOTUPTO:
1005 case OP_NOTMINUPTO:
1006 case OP_NOTEXACT:
1007 case OP_NOTPOSSTAR:
1008 case OP_NOTPOSPLUS:
1009 case OP_NOTPOSQUERY:
1010 case OP_NOTPOSUPTO:
1011 case OP_NOTSTARI:
1012 case OP_NOTMINSTARI:
1013 case OP_NOTPLUSI:
1014 case OP_NOTMINPLUSI:
1015 case OP_NOTQUERYI:
1016 case OP_NOTMINQUERYI:
1017 case OP_NOTUPTOI:
1018 case OP_NOTMINUPTOI:
1019 case OP_NOTEXACTI:
1020 case OP_NOTPOSSTARI:
1021 case OP_NOTPOSPLUSI:
1022 case OP_NOTPOSQUERYI:
1023 case OP_NOTPOSUPTOI:
1024 cc += PRIV(OP_lengths)[*cc];
1025#ifdef SUPPORT_UNICODE
1026 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1027#endif
1028 return cc;
1029
1030 /* Special cases. */
1031 case OP_TYPESTAR:
1032 case OP_TYPEMINSTAR:
1033 case OP_TYPEPLUS:
1034 case OP_TYPEMINPLUS:
1035 case OP_TYPEQUERY:
1036 case OP_TYPEMINQUERY:
1037 case OP_TYPEUPTO:
1038 case OP_TYPEMINUPTO:
1039 case OP_TYPEEXACT:
1040 case OP_TYPEPOSSTAR:
1041 case OP_TYPEPOSPLUS:
1042 case OP_TYPEPOSQUERY:
1043 case OP_TYPEPOSUPTO:
1044 return cc + PRIV(OP_lengths)[*cc] - 1;
1045
1046 case OP_ANYBYTE:
1047#ifdef SUPPORT_UNICODE
1048 if (common->utf) return NULL;
1049#endif
1050 return cc + 1;
1051
1052 case OP_CALLOUT_STR:
1053 return cc + GET(cc, 1 + 2*LINK_SIZE);
1054
1055#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1056 case OP_XCLASS:
1057 return cc + GET(cc, 1);
1058#endif
1059
1060 case OP_MARK:
1061 case OP_COMMIT_ARG:
1062 case OP_PRUNE_ARG:
1063 case OP_SKIP_ARG:
1064 case OP_THEN_ARG:
1065 return cc + 1 + 2 + cc[1];
1066
1067 default:
1068 SLJIT_UNREACHABLE();
1069 return NULL;
1070 }
1071}
1072
1073static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend)
1074{
1075int count;
1076PCRE2_SPTR slot;
1077PCRE2_SPTR assert_back_end = cc - 1;
1078PCRE2_SPTR assert_na_end = cc - 1;
1079
1080/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
1081while (cc < ccend)
1082 {
1083 switch(*cc)
1084 {
1085 case OP_SET_SOM:
1086 common->has_set_som = TRUE;
1087 common->might_be_empty = TRUE;
1088 cc += 1;
1089 break;
1090
1091 case OP_REFI:
1092#ifdef SUPPORT_UNICODE
1093 if (common->iref_ptr == 0)
1094 {
1095 common->iref_ptr = common->ovector_start;
1096 common->ovector_start += 3 * sizeof(sljit_sw);
1097 }
1098#endif /* SUPPORT_UNICODE */
1099 /* Fall through. */
1100 case OP_REF:
1101 common->optimized_cbracket[GET2(cc, 1)] = 0;
1102 cc += 1 + IMM2_SIZE;
1103 break;
1104
1105 case OP_ASSERT_NA:
1106 case OP_ASSERTBACK_NA:
1107 slot = bracketend(cc);
1108 if (slot > assert_na_end)
1109 assert_na_end = slot;
1110 cc += 1 + LINK_SIZE;
1111 break;
1112
1113 case OP_CBRAPOS:
1114 case OP_SCBRAPOS:
1115 common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0;
1116 cc += 1 + LINK_SIZE + IMM2_SIZE;
1117 break;
1118
1119 case OP_COND:
1120 case OP_SCOND:
1121 /* Only AUTO_CALLOUT can insert this opcode. We do
1122 not intend to support this case. */
1123 if (cc[1 + LINK_SIZE] == OP_CALLOUT || cc[1 + LINK_SIZE] == OP_CALLOUT_STR)
1124 return FALSE;
1125 cc += 1 + LINK_SIZE;
1126 break;
1127
1128 case OP_CREF:
1129 common->optimized_cbracket[GET2(cc, 1)] = 0;
1130 cc += 1 + IMM2_SIZE;
1131 break;
1132
1133 case OP_DNREF:
1134 case OP_DNREFI:
1135 case OP_DNCREF:
1136 count = GET2(cc, 1 + IMM2_SIZE);
1137 slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
1138 while (count-- > 0)
1139 {
1140 common->optimized_cbracket[GET2(slot, 0)] = 0;
1141 slot += common->name_entry_size;
1142 }
1143 cc += 1 + 2 * IMM2_SIZE;
1144 break;
1145
1146 case OP_RECURSE:
1147 /* Set its value only once. */
1148 if (common->recursive_head_ptr == 0)
1149 {
1150 common->recursive_head_ptr = common->ovector_start;
1151 common->ovector_start += sizeof(sljit_sw);
1152 }
1153 cc += 1 + LINK_SIZE;
1154 break;
1155
1156 case OP_CALLOUT:
1157 case OP_CALLOUT_STR:
1158 if (common->capture_last_ptr == 0)
1159 {
1160 common->capture_last_ptr = common->ovector_start;
1161 common->ovector_start += sizeof(sljit_sw);
1162 }
1163 cc += (*cc == OP_CALLOUT) ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2*LINK_SIZE);
1164 break;
1165
1166 case OP_ASSERTBACK:
1167 slot = bracketend(cc);
1168 if (slot > assert_back_end)
1169 assert_back_end = slot;
1170 cc += 1 + LINK_SIZE;
1171 break;
1172
1173 case OP_THEN_ARG:
1174 common->has_then = TRUE;
1175 common->control_head_ptr = 1;
1176 /* Fall through. */
1177
1178 case OP_COMMIT_ARG:
1179 case OP_PRUNE_ARG:
1180 if (cc < assert_na_end)
1181 return FALSE;
1182 /* Fall through */
1183 case OP_MARK:
1184 if (common->mark_ptr == 0)
1185 {
1186 common->mark_ptr = common->ovector_start;
1187 common->ovector_start += sizeof(sljit_sw);
1188 }
1189 cc += 1 + 2 + cc[1];
1190 break;
1191
1192 case OP_THEN:
1193 common->has_then = TRUE;
1194 common->control_head_ptr = 1;
1195 cc += 1;
1196 break;
1197
1198 case OP_SKIP:
1199 if (cc < assert_back_end)
1200 common->has_skip_in_assert_back = TRUE;
1201 if (cc < assert_na_end)
1202 return FALSE;
1203 cc += 1;
1204 break;
1205
1206 case OP_SKIP_ARG:
1207 common->control_head_ptr = 1;
1208 common->has_skip_arg = TRUE;
1209 if (cc < assert_back_end)
1210 common->has_skip_in_assert_back = TRUE;
1211 if (cc < assert_na_end)
1212 return FALSE;
1213 cc += 1 + 2 + cc[1];
1214 break;
1215
1216 case OP_PRUNE:
1217 case OP_COMMIT:
1218 case OP_ASSERT_ACCEPT:
1219 if (cc < assert_na_end)
1220 return FALSE;
1221 cc++;
1222 break;
1223
1224 default:
1225 cc = next_opcode(common, cc);
1226 if (cc == NULL)
1227 return FALSE;
1228 break;
1229 }
1230 }
1231return TRUE;
1232}
1233
1234#define EARLY_FAIL_ENHANCE_MAX (1 + 3)
1235
1236/*
1237start:
1238 0 - skip / early fail allowed
1239 1 - only early fail with range allowed
1240 >1 - (start - 1) early fail is processed
1241
1242return: current number of iterators enhanced with fast fail
1243*/
1244static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
1245 sljit_s32 depth, int start, BOOL fast_forward_allowed)
1246{
1247PCRE2_SPTR begin = cc;
1248PCRE2_SPTR next_alt;
1249PCRE2_SPTR end;
1250PCRE2_SPTR accelerated_start;
1251BOOL prev_fast_forward_allowed;
1252int result = 0;
1253int count;
1254
1255SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
1256SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
1257SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
1258
Elliott Hughes16619d62021-10-29 12:10:38 -07001259next_alt = cc + GET(cc, 1);
1260if (*next_alt == OP_ALT)
1261 fast_forward_allowed = FALSE;
1262
Elliott Hughes5b808042021-10-01 10:56:10 -07001263do
1264 {
1265 count = start;
Elliott Hughes5b808042021-10-01 10:56:10 -07001266 cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0);
1267
1268 while (TRUE)
1269 {
1270 accelerated_start = NULL;
1271
1272 switch(*cc)
1273 {
1274 case OP_SOD:
1275 case OP_SOM:
1276 case OP_SET_SOM:
1277 case OP_NOT_WORD_BOUNDARY:
1278 case OP_WORD_BOUNDARY:
1279 case OP_EODN:
1280 case OP_EOD:
1281 case OP_CIRC:
1282 case OP_CIRCM:
1283 case OP_DOLL:
1284 case OP_DOLLM:
1285 /* Zero width assertions. */
1286 cc++;
1287 continue;
1288
1289 case OP_NOT_DIGIT:
1290 case OP_DIGIT:
1291 case OP_NOT_WHITESPACE:
1292 case OP_WHITESPACE:
1293 case OP_NOT_WORDCHAR:
1294 case OP_WORDCHAR:
1295 case OP_ANY:
1296 case OP_ALLANY:
1297 case OP_ANYBYTE:
1298 case OP_NOT_HSPACE:
1299 case OP_HSPACE:
1300 case OP_NOT_VSPACE:
1301 case OP_VSPACE:
1302 fast_forward_allowed = FALSE;
1303 cc++;
1304 continue;
1305
1306 case OP_ANYNL:
1307 case OP_EXTUNI:
1308 fast_forward_allowed = FALSE;
1309 if (count == 0)
1310 count = 1;
1311 cc++;
1312 continue;
1313
1314 case OP_NOTPROP:
1315 case OP_PROP:
1316 fast_forward_allowed = FALSE;
1317 cc += 1 + 2;
1318 continue;
1319
1320 case OP_CHAR:
1321 case OP_CHARI:
1322 case OP_NOT:
1323 case OP_NOTI:
1324 fast_forward_allowed = FALSE;
1325 cc += 2;
1326#ifdef SUPPORT_UNICODE
1327 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1328#endif
1329 continue;
1330
1331 case OP_TYPESTAR:
1332 case OP_TYPEMINSTAR:
1333 case OP_TYPEPLUS:
1334 case OP_TYPEMINPLUS:
1335 case OP_TYPEPOSSTAR:
1336 case OP_TYPEPOSPLUS:
1337 /* The type or prop opcode is skipped in the next iteration. */
1338 cc += 1;
1339
1340 if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI)
1341 {
1342 accelerated_start = cc - 1;
1343 break;
1344 }
1345
1346 if (count == 0)
1347 count = 1;
1348 fast_forward_allowed = FALSE;
1349 continue;
1350
1351 case OP_TYPEUPTO:
1352 case OP_TYPEMINUPTO:
1353 case OP_TYPEEXACT:
1354 case OP_TYPEPOSUPTO:
1355 cc += IMM2_SIZE;
1356 /* Fall through */
1357
1358 case OP_TYPEQUERY:
1359 case OP_TYPEMINQUERY:
1360 case OP_TYPEPOSQUERY:
1361 /* The type or prop opcode is skipped in the next iteration. */
1362 fast_forward_allowed = FALSE;
1363 if (count == 0)
1364 count = 1;
1365 cc += 1;
1366 continue;
1367
1368 case OP_STAR:
1369 case OP_MINSTAR:
1370 case OP_PLUS:
1371 case OP_MINPLUS:
1372 case OP_POSSTAR:
1373 case OP_POSPLUS:
1374
1375 case OP_STARI:
1376 case OP_MINSTARI:
1377 case OP_PLUSI:
1378 case OP_MINPLUSI:
1379 case OP_POSSTARI:
1380 case OP_POSPLUSI:
1381
1382 case OP_NOTSTAR:
1383 case OP_NOTMINSTAR:
1384 case OP_NOTPLUS:
1385 case OP_NOTMINPLUS:
1386 case OP_NOTPOSSTAR:
1387 case OP_NOTPOSPLUS:
1388
1389 case OP_NOTSTARI:
1390 case OP_NOTMINSTARI:
1391 case OP_NOTPLUSI:
1392 case OP_NOTMINPLUSI:
1393 case OP_NOTPOSSTARI:
1394 case OP_NOTPOSPLUSI:
1395 accelerated_start = cc;
1396 cc += 2;
1397#ifdef SUPPORT_UNICODE
1398 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1399#endif
1400 break;
1401
1402 case OP_UPTO:
1403 case OP_MINUPTO:
1404 case OP_EXACT:
1405 case OP_POSUPTO:
1406 case OP_UPTOI:
1407 case OP_MINUPTOI:
1408 case OP_EXACTI:
1409 case OP_POSUPTOI:
1410 case OP_NOTUPTO:
1411 case OP_NOTMINUPTO:
1412 case OP_NOTEXACT:
1413 case OP_NOTPOSUPTO:
1414 case OP_NOTUPTOI:
1415 case OP_NOTMINUPTOI:
1416 case OP_NOTEXACTI:
1417 case OP_NOTPOSUPTOI:
1418 cc += IMM2_SIZE;
1419 /* Fall through */
1420
1421 case OP_QUERY:
1422 case OP_MINQUERY:
1423 case OP_POSQUERY:
1424 case OP_QUERYI:
1425 case OP_MINQUERYI:
1426 case OP_POSQUERYI:
1427 case OP_NOTQUERY:
1428 case OP_NOTMINQUERY:
1429 case OP_NOTPOSQUERY:
1430 case OP_NOTQUERYI:
1431 case OP_NOTMINQUERYI:
1432 case OP_NOTPOSQUERYI:
1433 fast_forward_allowed = FALSE;
1434 if (count == 0)
1435 count = 1;
1436 cc += 2;
1437#ifdef SUPPORT_UNICODE
1438 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1439#endif
1440 continue;
1441
1442 case OP_CLASS:
1443 case OP_NCLASS:
1444#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1445 case OP_XCLASS:
1446 accelerated_start = cc;
1447 cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR))));
1448#else
1449 accelerated_start = cc;
1450 cc += (1 + (32 / sizeof(PCRE2_UCHAR)));
1451#endif
1452
1453 switch (*cc)
1454 {
1455 case OP_CRSTAR:
1456 case OP_CRMINSTAR:
1457 case OP_CRPLUS:
1458 case OP_CRMINPLUS:
1459 case OP_CRPOSSTAR:
1460 case OP_CRPOSPLUS:
1461 cc++;
1462 break;
1463
1464 case OP_CRRANGE:
1465 case OP_CRMINRANGE:
1466 case OP_CRPOSRANGE:
1467 cc += 2 * IMM2_SIZE;
1468 /* Fall through */
1469 case OP_CRQUERY:
1470 case OP_CRMINQUERY:
1471 case OP_CRPOSQUERY:
1472 cc++;
1473 if (count == 0)
1474 count = 1;
1475 /* Fall through */
1476 default:
1477 accelerated_start = NULL;
1478 fast_forward_allowed = FALSE;
1479 continue;
1480 }
1481 break;
1482
1483 case OP_ONCE:
1484 case OP_BRA:
1485 case OP_CBRA:
1486 end = cc + GET(cc, 1);
1487
1488 prev_fast_forward_allowed = fast_forward_allowed;
1489 fast_forward_allowed = FALSE;
1490 if (depth >= 4)
1491 break;
1492
1493 end = bracketend(cc) - (1 + LINK_SIZE);
1494 if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
1495 break;
1496
1497 count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
1498
1499 if (PRIVATE_DATA(cc) != 0)
1500 common->private_data_ptrs[begin - common->start] = 1;
1501
1502 if (count < EARLY_FAIL_ENHANCE_MAX)
1503 {
1504 cc = end + (1 + LINK_SIZE);
1505 continue;
1506 }
1507 break;
1508
1509 case OP_KET:
1510 SLJIT_ASSERT(PRIVATE_DATA(cc) == 0);
1511 if (cc >= next_alt)
1512 break;
1513 cc += 1 + LINK_SIZE;
1514 continue;
1515 }
1516
1517 if (accelerated_start != NULL)
1518 {
1519 if (count == 0)
1520 {
1521 count++;
1522
Elliott Hughes16619d62021-10-29 12:10:38 -07001523 if (fast_forward_allowed)
Elliott Hughes5b808042021-10-01 10:56:10 -07001524 {
1525 common->fast_forward_bc_ptr = accelerated_start;
1526 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
1527 *private_data_start += sizeof(sljit_sw);
1528 }
1529 else
1530 {
1531 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
1532
1533 if (common->early_fail_start_ptr == 0)
1534 common->early_fail_start_ptr = *private_data_start;
1535
1536 *private_data_start += sizeof(sljit_sw);
1537 common->early_fail_end_ptr = *private_data_start;
1538
1539 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1540 return EARLY_FAIL_ENHANCE_MAX;
1541 }
1542 }
1543 else
1544 {
1545 common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
1546
1547 if (common->early_fail_start_ptr == 0)
1548 common->early_fail_start_ptr = *private_data_start;
1549
1550 *private_data_start += 2 * sizeof(sljit_sw);
1551 common->early_fail_end_ptr = *private_data_start;
1552
1553 if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
1554 return EARLY_FAIL_ENHANCE_MAX;
1555 }
1556
1557 /* Cannot be part of a repeat. */
1558 common->private_data_ptrs[begin - common->start] = 1;
1559 count++;
1560
1561 if (count < EARLY_FAIL_ENHANCE_MAX)
1562 continue;
1563 }
1564
1565 break;
1566 }
1567
1568 if (*cc != OP_ALT && *cc != OP_KET)
1569 result = EARLY_FAIL_ENHANCE_MAX;
1570 else if (result < count)
1571 result = count;
1572
Elliott Hughes5b808042021-10-01 10:56:10 -07001573 cc = next_alt;
Elliott Hughes16619d62021-10-29 12:10:38 -07001574 next_alt = cc + GET(cc, 1);
Elliott Hughes5b808042021-10-01 10:56:10 -07001575 }
1576while (*cc == OP_ALT);
1577
1578return result;
1579}
1580
1581static int get_class_iterator_size(PCRE2_SPTR cc)
1582{
1583sljit_u32 min;
1584sljit_u32 max;
1585switch(*cc)
1586 {
1587 case OP_CRSTAR:
1588 case OP_CRPLUS:
1589 return 2;
1590
1591 case OP_CRMINSTAR:
1592 case OP_CRMINPLUS:
1593 case OP_CRQUERY:
1594 case OP_CRMINQUERY:
1595 return 1;
1596
1597 case OP_CRRANGE:
1598 case OP_CRMINRANGE:
1599 min = GET2(cc, 1);
1600 max = GET2(cc, 1 + IMM2_SIZE);
1601 if (max == 0)
1602 return (*cc == OP_CRRANGE) ? 2 : 1;
1603 max -= min;
1604 if (max > 2)
1605 max = 2;
1606 return max;
1607
1608 default:
1609 return 0;
1610 }
1611}
1612
1613static BOOL detect_repeat(compiler_common *common, PCRE2_SPTR begin)
1614{
1615PCRE2_SPTR end = bracketend(begin);
1616PCRE2_SPTR next;
1617PCRE2_SPTR next_end;
1618PCRE2_SPTR max_end;
1619PCRE2_UCHAR type;
1620sljit_sw length = end - begin;
1621sljit_s32 min, max, i;
1622
1623/* Detect fixed iterations first. */
1624if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
1625 return FALSE;
1626
1627/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
1628 * Skip the check of the second part. */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07001629if (PRIVATE_DATA(end - LINK_SIZE) != 0)
Elliott Hughes5b808042021-10-01 10:56:10 -07001630 return TRUE;
1631
1632next = end;
1633min = 1;
1634while (1)
1635 {
1636 if (*next != *begin)
1637 break;
1638 next_end = bracketend(next);
1639 if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0)
1640 break;
1641 next = next_end;
1642 min++;
1643 }
1644
1645if (min == 2)
1646 return FALSE;
1647
1648max = 0;
1649max_end = next;
1650if (*next == OP_BRAZERO || *next == OP_BRAMINZERO)
1651 {
1652 type = *next;
1653 while (1)
1654 {
1655 if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin)
1656 break;
1657 next_end = bracketend(next + 2 + LINK_SIZE);
1658 if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0)
1659 break;
1660 next = next_end;
1661 max++;
1662 }
1663
1664 if (next[0] == type && next[1] == *begin && max >= 1)
1665 {
1666 next_end = bracketend(next + 1);
1667 if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0)
1668 {
1669 for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE)
1670 if (*next_end != OP_KET)
1671 break;
1672
1673 if (i == max)
1674 {
1675 common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end;
1676 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO;
1677 /* +2 the original and the last. */
1678 common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2;
1679 if (min == 1)
1680 return TRUE;
1681 min--;
1682 max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE);
1683 }
1684 }
1685 }
1686 }
1687
1688if (min >= 3)
1689 {
1690 common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end;
1691 common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT;
1692 common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min;
1693 return TRUE;
1694 }
1695
1696return FALSE;
1697}
1698
1699#define CASE_ITERATOR_PRIVATE_DATA_1 \
1700 case OP_MINSTAR: \
1701 case OP_MINPLUS: \
1702 case OP_QUERY: \
1703 case OP_MINQUERY: \
1704 case OP_MINSTARI: \
1705 case OP_MINPLUSI: \
1706 case OP_QUERYI: \
1707 case OP_MINQUERYI: \
1708 case OP_NOTMINSTAR: \
1709 case OP_NOTMINPLUS: \
1710 case OP_NOTQUERY: \
1711 case OP_NOTMINQUERY: \
1712 case OP_NOTMINSTARI: \
1713 case OP_NOTMINPLUSI: \
1714 case OP_NOTQUERYI: \
1715 case OP_NOTMINQUERYI:
1716
1717#define CASE_ITERATOR_PRIVATE_DATA_2A \
1718 case OP_STAR: \
1719 case OP_PLUS: \
1720 case OP_STARI: \
1721 case OP_PLUSI: \
1722 case OP_NOTSTAR: \
1723 case OP_NOTPLUS: \
1724 case OP_NOTSTARI: \
1725 case OP_NOTPLUSI:
1726
1727#define CASE_ITERATOR_PRIVATE_DATA_2B \
1728 case OP_UPTO: \
1729 case OP_MINUPTO: \
1730 case OP_UPTOI: \
1731 case OP_MINUPTOI: \
1732 case OP_NOTUPTO: \
1733 case OP_NOTMINUPTO: \
1734 case OP_NOTUPTOI: \
1735 case OP_NOTMINUPTOI:
1736
1737#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \
1738 case OP_TYPEMINSTAR: \
1739 case OP_TYPEMINPLUS: \
1740 case OP_TYPEQUERY: \
1741 case OP_TYPEMINQUERY:
1742
1743#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \
1744 case OP_TYPESTAR: \
1745 case OP_TYPEPLUS:
1746
1747#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \
1748 case OP_TYPEUPTO: \
1749 case OP_TYPEMINUPTO:
1750
1751static void set_private_data_ptrs(compiler_common *common, int *private_data_start, PCRE2_SPTR ccend)
1752{
1753PCRE2_SPTR cc = common->start;
1754PCRE2_SPTR alternative;
1755PCRE2_SPTR end = NULL;
1756int private_data_ptr = *private_data_start;
1757int space, size, bracketlen;
1758BOOL repeat_check = TRUE;
1759
1760while (cc < ccend)
1761 {
1762 space = 0;
1763 size = 0;
1764 bracketlen = 0;
1765 if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE)
1766 break;
1767
1768 /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */
1769 if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND))
1770 {
1771 if (detect_repeat(common, cc))
1772 {
1773 /* These brackets are converted to repeats, so no global
1774 based single character repeat is allowed. */
1775 if (cc >= end)
1776 end = bracketend(cc);
1777 }
1778 }
1779 repeat_check = TRUE;
1780
1781 switch(*cc)
1782 {
1783 case OP_KET:
1784 if (common->private_data_ptrs[cc + 1 - common->start] != 0)
1785 {
1786 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1787 private_data_ptr += sizeof(sljit_sw);
1788 cc += common->private_data_ptrs[cc + 1 - common->start];
1789 }
1790 cc += 1 + LINK_SIZE;
1791 break;
1792
1793 case OP_ASSERT:
1794 case OP_ASSERT_NOT:
1795 case OP_ASSERTBACK:
1796 case OP_ASSERTBACK_NOT:
1797 case OP_ASSERT_NA:
1798 case OP_ASSERTBACK_NA:
1799 case OP_ONCE:
1800 case OP_SCRIPT_RUN:
1801 case OP_BRAPOS:
1802 case OP_SBRA:
1803 case OP_SBRAPOS:
1804 case OP_SCOND:
1805 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1806 private_data_ptr += sizeof(sljit_sw);
1807 bracketlen = 1 + LINK_SIZE;
1808 break;
1809
1810 case OP_CBRAPOS:
1811 case OP_SCBRAPOS:
1812 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1813 private_data_ptr += sizeof(sljit_sw);
1814 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1815 break;
1816
1817 case OP_COND:
1818 /* Might be a hidden SCOND. */
1819 common->private_data_ptrs[cc - common->start] = 0;
1820 alternative = cc + GET(cc, 1);
1821 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
1822 {
1823 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1824 private_data_ptr += sizeof(sljit_sw);
1825 }
1826 bracketlen = 1 + LINK_SIZE;
1827 break;
1828
1829 case OP_BRA:
1830 bracketlen = 1 + LINK_SIZE;
1831 break;
1832
1833 case OP_CBRA:
1834 case OP_SCBRA:
1835 bracketlen = 1 + LINK_SIZE + IMM2_SIZE;
1836 break;
1837
1838 case OP_BRAZERO:
1839 case OP_BRAMINZERO:
1840 case OP_BRAPOSZERO:
1841 size = 1;
1842 repeat_check = FALSE;
1843 break;
1844
1845 CASE_ITERATOR_PRIVATE_DATA_1
1846 size = -2;
1847 space = 1;
1848 break;
1849
1850 CASE_ITERATOR_PRIVATE_DATA_2A
1851 size = -2;
1852 space = 2;
1853 break;
1854
1855 CASE_ITERATOR_PRIVATE_DATA_2B
1856 size = -(2 + IMM2_SIZE);
1857 space = 2;
1858 break;
1859
1860 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
1861 size = 1;
1862 space = 1;
1863 break;
1864
1865 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
1866 size = 1;
1867 if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI)
1868 space = 2;
1869 break;
1870
1871 case OP_TYPEUPTO:
1872 size = 1 + IMM2_SIZE;
1873 if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI)
1874 space = 2;
1875 break;
1876
1877 case OP_TYPEMINUPTO:
1878 size = 1 + IMM2_SIZE;
1879 space = 2;
1880 break;
1881
1882 case OP_CLASS:
1883 case OP_NCLASS:
1884 size = 1 + 32 / sizeof(PCRE2_UCHAR);
1885 space = get_class_iterator_size(cc + size);
1886 break;
1887
1888#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
1889 case OP_XCLASS:
1890 size = GET(cc, 1);
1891 space = get_class_iterator_size(cc + size);
1892 break;
1893#endif
1894
1895 default:
1896 cc = next_opcode(common, cc);
1897 SLJIT_ASSERT(cc != NULL);
1898 break;
1899 }
1900
1901 /* Character iterators, which are not inside a repeated bracket,
1902 gets a private slot instead of allocating it on the stack. */
1903 if (space > 0 && cc >= end)
1904 {
1905 common->private_data_ptrs[cc - common->start] = private_data_ptr;
1906 private_data_ptr += sizeof(sljit_sw) * space;
1907 }
1908
1909 if (size != 0)
1910 {
1911 if (size < 0)
1912 {
1913 cc += -size;
1914#ifdef SUPPORT_UNICODE
1915 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1916#endif
1917 }
1918 else
1919 cc += size;
1920 }
1921
1922 if (bracketlen > 0)
1923 {
1924 if (cc >= end)
1925 {
1926 end = bracketend(cc);
1927 if (end[-1 - LINK_SIZE] == OP_KET)
1928 end = NULL;
1929 }
1930 cc += bracketlen;
1931 }
1932 }
1933*private_data_start = private_data_ptr;
1934}
1935
1936/* Returns with a frame_types (always < 0) if no need for frame. */
1937static int get_framesize(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, BOOL recursive, BOOL *needs_control_head)
1938{
1939int length = 0;
1940int possessive = 0;
1941BOOL stack_restore = FALSE;
1942BOOL setsom_found = recursive;
1943BOOL setmark_found = recursive;
1944/* The last capture is a local variable even for recursions. */
1945BOOL capture_last_found = FALSE;
1946
1947#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
1948SLJIT_ASSERT(common->control_head_ptr != 0);
1949*needs_control_head = TRUE;
1950#else
1951*needs_control_head = FALSE;
1952#endif
1953
1954if (ccend == NULL)
1955 {
1956 ccend = bracketend(cc) - (1 + LINK_SIZE);
1957 if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS))
1958 {
1959 possessive = length = (common->capture_last_ptr != 0) ? 5 : 3;
1960 /* This is correct regardless of common->capture_last_ptr. */
1961 capture_last_found = TRUE;
1962 }
1963 cc = next_opcode(common, cc);
1964 }
1965
1966SLJIT_ASSERT(cc != NULL);
1967while (cc < ccend)
1968 switch(*cc)
1969 {
1970 case OP_SET_SOM:
1971 SLJIT_ASSERT(common->has_set_som);
1972 stack_restore = TRUE;
1973 if (!setsom_found)
1974 {
1975 length += 2;
1976 setsom_found = TRUE;
1977 }
1978 cc += 1;
1979 break;
1980
1981 case OP_MARK:
1982 case OP_COMMIT_ARG:
1983 case OP_PRUNE_ARG:
1984 case OP_THEN_ARG:
1985 SLJIT_ASSERT(common->mark_ptr != 0);
1986 stack_restore = TRUE;
1987 if (!setmark_found)
1988 {
1989 length += 2;
1990 setmark_found = TRUE;
1991 }
1992 if (common->control_head_ptr != 0)
1993 *needs_control_head = TRUE;
1994 cc += 1 + 2 + cc[1];
1995 break;
1996
1997 case OP_RECURSE:
1998 stack_restore = TRUE;
1999 if (common->has_set_som && !setsom_found)
2000 {
2001 length += 2;
2002 setsom_found = TRUE;
2003 }
2004 if (common->mark_ptr != 0 && !setmark_found)
2005 {
2006 length += 2;
2007 setmark_found = TRUE;
2008 }
2009 if (common->capture_last_ptr != 0 && !capture_last_found)
2010 {
2011 length += 2;
2012 capture_last_found = TRUE;
2013 }
2014 cc += 1 + LINK_SIZE;
2015 break;
2016
2017 case OP_CBRA:
2018 case OP_CBRAPOS:
2019 case OP_SCBRA:
2020 case OP_SCBRAPOS:
2021 stack_restore = TRUE;
2022 if (common->capture_last_ptr != 0 && !capture_last_found)
2023 {
2024 length += 2;
2025 capture_last_found = TRUE;
2026 }
2027 length += 3;
2028 cc += 1 + LINK_SIZE + IMM2_SIZE;
2029 break;
2030
2031 case OP_THEN:
2032 stack_restore = TRUE;
2033 if (common->control_head_ptr != 0)
2034 *needs_control_head = TRUE;
2035 cc ++;
2036 break;
2037
2038 default:
2039 stack_restore = TRUE;
2040 /* Fall through. */
2041
2042 case OP_NOT_WORD_BOUNDARY:
2043 case OP_WORD_BOUNDARY:
2044 case OP_NOT_DIGIT:
2045 case OP_DIGIT:
2046 case OP_NOT_WHITESPACE:
2047 case OP_WHITESPACE:
2048 case OP_NOT_WORDCHAR:
2049 case OP_WORDCHAR:
2050 case OP_ANY:
2051 case OP_ALLANY:
2052 case OP_ANYBYTE:
2053 case OP_NOTPROP:
2054 case OP_PROP:
2055 case OP_ANYNL:
2056 case OP_NOT_HSPACE:
2057 case OP_HSPACE:
2058 case OP_NOT_VSPACE:
2059 case OP_VSPACE:
2060 case OP_EXTUNI:
2061 case OP_EODN:
2062 case OP_EOD:
2063 case OP_CIRC:
2064 case OP_CIRCM:
2065 case OP_DOLL:
2066 case OP_DOLLM:
2067 case OP_CHAR:
2068 case OP_CHARI:
2069 case OP_NOT:
2070 case OP_NOTI:
2071
2072 case OP_EXACT:
2073 case OP_POSSTAR:
2074 case OP_POSPLUS:
2075 case OP_POSQUERY:
2076 case OP_POSUPTO:
2077
2078 case OP_EXACTI:
2079 case OP_POSSTARI:
2080 case OP_POSPLUSI:
2081 case OP_POSQUERYI:
2082 case OP_POSUPTOI:
2083
2084 case OP_NOTEXACT:
2085 case OP_NOTPOSSTAR:
2086 case OP_NOTPOSPLUS:
2087 case OP_NOTPOSQUERY:
2088 case OP_NOTPOSUPTO:
2089
2090 case OP_NOTEXACTI:
2091 case OP_NOTPOSSTARI:
2092 case OP_NOTPOSPLUSI:
2093 case OP_NOTPOSQUERYI:
2094 case OP_NOTPOSUPTOI:
2095
2096 case OP_TYPEEXACT:
2097 case OP_TYPEPOSSTAR:
2098 case OP_TYPEPOSPLUS:
2099 case OP_TYPEPOSQUERY:
2100 case OP_TYPEPOSUPTO:
2101
2102 case OP_CLASS:
2103 case OP_NCLASS:
2104 case OP_XCLASS:
2105
2106 case OP_CALLOUT:
2107 case OP_CALLOUT_STR:
2108
2109 cc = next_opcode(common, cc);
2110 SLJIT_ASSERT(cc != NULL);
2111 break;
2112 }
2113
2114/* Possessive quantifiers can use a special case. */
2115if (SLJIT_UNLIKELY(possessive == length))
2116 return stack_restore ? no_frame : no_stack;
2117
2118if (length > 0)
2119 return length + 1;
2120return stack_restore ? no_frame : no_stack;
2121}
2122
2123static void init_frame(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, int stackpos, int stacktop)
2124{
2125DEFINE_COMPILER;
2126BOOL setsom_found = FALSE;
2127BOOL setmark_found = FALSE;
2128/* The last capture is a local variable even for recursions. */
2129BOOL capture_last_found = FALSE;
2130int offset;
2131
2132/* >= 1 + shortest item size (2) */
2133SLJIT_UNUSED_ARG(stacktop);
2134SLJIT_ASSERT(stackpos >= stacktop + 2);
2135
2136stackpos = STACK(stackpos);
2137if (ccend == NULL)
2138 {
2139 ccend = bracketend(cc) - (1 + LINK_SIZE);
2140 if (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)
2141 cc = next_opcode(common, cc);
2142 }
2143
2144SLJIT_ASSERT(cc != NULL);
2145while (cc < ccend)
2146 switch(*cc)
2147 {
2148 case OP_SET_SOM:
2149 SLJIT_ASSERT(common->has_set_som);
2150 if (!setsom_found)
2151 {
2152 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2153 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2154 stackpos -= (int)sizeof(sljit_sw);
2155 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2156 stackpos -= (int)sizeof(sljit_sw);
2157 setsom_found = TRUE;
2158 }
2159 cc += 1;
2160 break;
2161
2162 case OP_MARK:
2163 case OP_COMMIT_ARG:
2164 case OP_PRUNE_ARG:
2165 case OP_THEN_ARG:
2166 SLJIT_ASSERT(common->mark_ptr != 0);
2167 if (!setmark_found)
2168 {
2169 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2170 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2171 stackpos -= (int)sizeof(sljit_sw);
2172 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2173 stackpos -= (int)sizeof(sljit_sw);
2174 setmark_found = TRUE;
2175 }
2176 cc += 1 + 2 + cc[1];
2177 break;
2178
2179 case OP_RECURSE:
2180 if (common->has_set_som && !setsom_found)
2181 {
2182 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
2183 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
2184 stackpos -= (int)sizeof(sljit_sw);
2185 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2186 stackpos -= (int)sizeof(sljit_sw);
2187 setsom_found = TRUE;
2188 }
2189 if (common->mark_ptr != 0 && !setmark_found)
2190 {
2191 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
2192 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
2193 stackpos -= (int)sizeof(sljit_sw);
2194 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2195 stackpos -= (int)sizeof(sljit_sw);
2196 setmark_found = TRUE;
2197 }
2198 if (common->capture_last_ptr != 0 && !capture_last_found)
2199 {
2200 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2201 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2202 stackpos -= (int)sizeof(sljit_sw);
2203 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2204 stackpos -= (int)sizeof(sljit_sw);
2205 capture_last_found = TRUE;
2206 }
2207 cc += 1 + LINK_SIZE;
2208 break;
2209
2210 case OP_CBRA:
2211 case OP_CBRAPOS:
2212 case OP_SCBRA:
2213 case OP_SCBRAPOS:
2214 if (common->capture_last_ptr != 0 && !capture_last_found)
2215 {
2216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
2217 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
2218 stackpos -= (int)sizeof(sljit_sw);
2219 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2220 stackpos -= (int)sizeof(sljit_sw);
2221 capture_last_found = TRUE;
2222 }
2223 offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
2224 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
2225 stackpos -= (int)sizeof(sljit_sw);
2226 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
2227 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
2228 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
2229 stackpos -= (int)sizeof(sljit_sw);
2230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
2231 stackpos -= (int)sizeof(sljit_sw);
2232
2233 cc += 1 + LINK_SIZE + IMM2_SIZE;
2234 break;
2235
2236 default:
2237 cc = next_opcode(common, cc);
2238 SLJIT_ASSERT(cc != NULL);
2239 break;
2240 }
2241
2242OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0);
2243SLJIT_ASSERT(stackpos == STACK(stacktop));
2244}
2245
2246#define RECURSE_TMP_REG_COUNT 3
2247
2248typedef struct delayed_mem_copy_status {
2249 struct sljit_compiler *compiler;
2250 int store_bases[RECURSE_TMP_REG_COUNT];
2251 int store_offsets[RECURSE_TMP_REG_COUNT];
2252 int tmp_regs[RECURSE_TMP_REG_COUNT];
2253 int saved_tmp_regs[RECURSE_TMP_REG_COUNT];
2254 int next_tmp_reg;
2255} delayed_mem_copy_status;
2256
2257static void delayed_mem_copy_init(delayed_mem_copy_status *status, compiler_common *common)
2258{
2259int i;
2260
2261for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2262 {
2263 SLJIT_ASSERT(status->tmp_regs[i] >= 0);
2264 SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
2265
2266 status->store_bases[i] = -1;
2267 }
2268status->next_tmp_reg = 0;
2269status->compiler = common->compiler;
2270}
2271
2272static void delayed_mem_copy_move(delayed_mem_copy_status *status, int load_base, sljit_sw load_offset,
2273 int store_base, sljit_sw store_offset)
2274{
2275struct sljit_compiler *compiler = status->compiler;
2276int next_tmp_reg = status->next_tmp_reg;
2277int tmp_reg = status->tmp_regs[next_tmp_reg];
2278
2279SLJIT_ASSERT(load_base > 0 && store_base > 0);
2280
2281if (status->store_bases[next_tmp_reg] == -1)
2282 {
2283 /* Preserve virtual registers. */
2284 if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
2285 OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
2286 }
2287else
2288 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2289
2290OP1(SLJIT_MOV, tmp_reg, 0, SLJIT_MEM1(load_base), load_offset);
2291status->store_bases[next_tmp_reg] = store_base;
2292status->store_offsets[next_tmp_reg] = store_offset;
2293
2294status->next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2295}
2296
2297static void delayed_mem_copy_finish(delayed_mem_copy_status *status)
2298{
2299struct sljit_compiler *compiler = status->compiler;
2300int next_tmp_reg = status->next_tmp_reg;
2301int tmp_reg, saved_tmp_reg, i;
2302
2303for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
2304 {
2305 if (status->store_bases[next_tmp_reg] != -1)
2306 {
2307 tmp_reg = status->tmp_regs[next_tmp_reg];
2308 saved_tmp_reg = status->saved_tmp_regs[next_tmp_reg];
2309
2310 OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
2311
2312 /* Restore virtual registers. */
2313 if (sljit_get_register_index(saved_tmp_reg) < 0)
2314 OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
2315 }
2316
2317 next_tmp_reg = (next_tmp_reg + 1) % RECURSE_TMP_REG_COUNT;
2318 }
2319}
2320
2321#undef RECURSE_TMP_REG_COUNT
2322
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002323static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
2324{
2325uint8_t *byte;
2326uint8_t mask;
2327
2328SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
2329
2330bit_index >>= SLJIT_WORD_SHIFT;
2331
2332SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
2333
2334mask = 1 << (bit_index & 0x7);
2335byte = common->recurse_bitset + (bit_index >> 3);
2336
2337if (*byte & mask)
2338 return FALSE;
2339
2340*byte |= mask;
2341return TRUE;
2342}
2343
2344enum get_recurse_flags {
2345 recurse_flag_quit_found = (1 << 0),
2346 recurse_flag_accept_found = (1 << 1),
2347 recurse_flag_setsom_found = (1 << 2),
2348 recurse_flag_setmark_found = (1 << 3),
2349 recurse_flag_control_head_found = (1 << 4),
2350};
2351
2352static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
Elliott Hughes5b808042021-10-01 10:56:10 -07002353{
2354int length = 1;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002355int size, offset;
Elliott Hughes5b808042021-10-01 10:56:10 -07002356PCRE2_SPTR alternative;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002357uint32_t recurse_flags = 0;
2358
2359memset(common->recurse_bitset, 0, common->recurse_bitset_size);
Elliott Hughes5b808042021-10-01 10:56:10 -07002360
2361#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2362SLJIT_ASSERT(common->control_head_ptr != 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002363recurse_flags |= recurse_flag_control_head_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002364#endif
2365
2366/* Calculate the sum of the private machine words. */
2367while (cc < ccend)
2368 {
2369 size = 0;
2370 switch(*cc)
2371 {
2372 case OP_SET_SOM:
2373 SLJIT_ASSERT(common->has_set_som);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002374 recurse_flags |= recurse_flag_setsom_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002375 cc += 1;
2376 break;
2377
2378 case OP_RECURSE:
2379 if (common->has_set_som)
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002380 recurse_flags |= recurse_flag_setsom_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002381 if (common->mark_ptr != 0)
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002382 recurse_flags |= recurse_flag_setmark_found;
2383 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2384 length++;
Elliott Hughes5b808042021-10-01 10:56:10 -07002385 cc += 1 + LINK_SIZE;
2386 break;
2387
2388 case OP_KET:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002389 offset = PRIVATE_DATA(cc);
2390 if (offset != 0)
Elliott Hughes5b808042021-10-01 10:56:10 -07002391 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002392 if (recurse_check_bit(common, offset))
2393 length++;
Elliott Hughes5b808042021-10-01 10:56:10 -07002394 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2395 cc += PRIVATE_DATA(cc + 1);
2396 }
2397 cc += 1 + LINK_SIZE;
2398 break;
2399
2400 case OP_ASSERT:
2401 case OP_ASSERT_NOT:
2402 case OP_ASSERTBACK:
2403 case OP_ASSERTBACK_NOT:
2404 case OP_ASSERT_NA:
2405 case OP_ASSERTBACK_NA:
2406 case OP_ONCE:
2407 case OP_SCRIPT_RUN:
2408 case OP_BRAPOS:
2409 case OP_SBRA:
2410 case OP_SBRAPOS:
2411 case OP_SCOND:
Elliott Hughes5b808042021-10-01 10:56:10 -07002412 SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002413 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2414 length++;
Elliott Hughes5b808042021-10-01 10:56:10 -07002415 cc += 1 + LINK_SIZE;
2416 break;
2417
2418 case OP_CBRA:
2419 case OP_SCBRA:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002420 offset = GET2(cc, 1 + LINK_SIZE);
2421 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2422 {
2423 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2424 length += 2;
2425 }
2426 if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
2427 length++;
2428 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
Elliott Hughes5b808042021-10-01 10:56:10 -07002429 length++;
2430 cc += 1 + LINK_SIZE + IMM2_SIZE;
2431 break;
2432
2433 case OP_CBRAPOS:
2434 case OP_SCBRAPOS:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002435 offset = GET2(cc, 1 + LINK_SIZE);
2436 if (recurse_check_bit(common, OVECTOR(offset << 1)))
2437 {
2438 SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
2439 length += 2;
2440 }
2441 if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
2442 length++;
2443 if (recurse_check_bit(common, PRIVATE_DATA(cc)))
2444 length++;
2445 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2446 length++;
Elliott Hughes5b808042021-10-01 10:56:10 -07002447 cc += 1 + LINK_SIZE + IMM2_SIZE;
2448 break;
2449
2450 case OP_COND:
2451 /* Might be a hidden SCOND. */
2452 alternative = cc + GET(cc, 1);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002453 if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
Elliott Hughes5b808042021-10-01 10:56:10 -07002454 length++;
2455 cc += 1 + LINK_SIZE;
2456 break;
2457
2458 CASE_ITERATOR_PRIVATE_DATA_1
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002459 offset = PRIVATE_DATA(cc);
2460 if (offset != 0 && recurse_check_bit(common, offset))
Elliott Hughes5b808042021-10-01 10:56:10 -07002461 length++;
2462 cc += 2;
2463#ifdef SUPPORT_UNICODE
2464 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2465#endif
2466 break;
2467
2468 CASE_ITERATOR_PRIVATE_DATA_2A
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002469 offset = PRIVATE_DATA(cc);
2470 if (offset != 0 && recurse_check_bit(common, offset))
2471 {
2472 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
Elliott Hughes5b808042021-10-01 10:56:10 -07002473 length += 2;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002474 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002475 cc += 2;
2476#ifdef SUPPORT_UNICODE
2477 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2478#endif
2479 break;
2480
2481 CASE_ITERATOR_PRIVATE_DATA_2B
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002482 offset = PRIVATE_DATA(cc);
2483 if (offset != 0 && recurse_check_bit(common, offset))
2484 {
2485 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
Elliott Hughes5b808042021-10-01 10:56:10 -07002486 length += 2;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002487 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002488 cc += 2 + IMM2_SIZE;
2489#ifdef SUPPORT_UNICODE
2490 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2491#endif
2492 break;
2493
2494 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002495 offset = PRIVATE_DATA(cc);
2496 if (offset != 0 && recurse_check_bit(common, offset))
Elliott Hughes5b808042021-10-01 10:56:10 -07002497 length++;
2498 cc += 1;
2499 break;
2500
2501 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002502 offset = PRIVATE_DATA(cc);
2503 if (offset != 0 && recurse_check_bit(common, offset))
2504 {
2505 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
Elliott Hughes5b808042021-10-01 10:56:10 -07002506 length += 2;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002507 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002508 cc += 1;
2509 break;
2510
2511 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002512 offset = PRIVATE_DATA(cc);
2513 if (offset != 0 && recurse_check_bit(common, offset))
2514 {
2515 SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
Elliott Hughes5b808042021-10-01 10:56:10 -07002516 length += 2;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002517 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002518 cc += 1 + IMM2_SIZE;
2519 break;
2520
2521 case OP_CLASS:
2522 case OP_NCLASS:
2523#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2524 case OP_XCLASS:
2525 size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2526#else
2527 size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2528#endif
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002529
2530 offset = PRIVATE_DATA(cc);
2531 if (offset != 0 && recurse_check_bit(common, offset))
Elliott Hughes5b808042021-10-01 10:56:10 -07002532 length += get_class_iterator_size(cc + size);
2533 cc += size;
2534 break;
2535
2536 case OP_MARK:
2537 case OP_COMMIT_ARG:
2538 case OP_PRUNE_ARG:
2539 case OP_THEN_ARG:
2540 SLJIT_ASSERT(common->mark_ptr != 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002541 recurse_flags |= recurse_flag_setmark_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002542 if (common->control_head_ptr != 0)
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002543 recurse_flags |= recurse_flag_control_head_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002544 if (*cc != OP_MARK)
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002545 recurse_flags |= recurse_flag_quit_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002546
2547 cc += 1 + 2 + cc[1];
2548 break;
2549
2550 case OP_PRUNE:
2551 case OP_SKIP:
2552 case OP_COMMIT:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002553 recurse_flags |= recurse_flag_quit_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002554 cc++;
2555 break;
2556
2557 case OP_SKIP_ARG:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002558 recurse_flags |= recurse_flag_quit_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002559 cc += 1 + 2 + cc[1];
2560 break;
2561
2562 case OP_THEN:
2563 SLJIT_ASSERT(common->control_head_ptr != 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002564 recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002565 cc++;
2566 break;
2567
2568 case OP_ACCEPT:
2569 case OP_ASSERT_ACCEPT:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002570 recurse_flags |= recurse_flag_accept_found;
Elliott Hughes5b808042021-10-01 10:56:10 -07002571 cc++;
2572 break;
2573
2574 default:
2575 cc = next_opcode(common, cc);
2576 SLJIT_ASSERT(cc != NULL);
2577 break;
2578 }
2579 }
2580SLJIT_ASSERT(cc == ccend);
2581
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002582if (recurse_flags & recurse_flag_control_head_found)
Elliott Hughes5b808042021-10-01 10:56:10 -07002583 length++;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002584if (recurse_flags & recurse_flag_quit_found)
Elliott Hughes5b808042021-10-01 10:56:10 -07002585 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002586 if (recurse_flags & recurse_flag_setsom_found)
Elliott Hughes5b808042021-10-01 10:56:10 -07002587 length++;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002588 if (recurse_flags & recurse_flag_setmark_found)
Elliott Hughes5b808042021-10-01 10:56:10 -07002589 length++;
2590 }
2591
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002592*result_flags = recurse_flags;
Elliott Hughes5b808042021-10-01 10:56:10 -07002593return length;
2594}
2595
2596enum copy_recurse_data_types {
2597 recurse_copy_from_global,
2598 recurse_copy_private_to_global,
2599 recurse_copy_shared_to_global,
2600 recurse_copy_kept_shared_to_global,
2601 recurse_swap_global
2602};
2603
2604static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002605 int type, int stackptr, int stacktop, uint32_t recurse_flags)
Elliott Hughes5b808042021-10-01 10:56:10 -07002606{
2607delayed_mem_copy_status status;
2608PCRE2_SPTR alternative;
2609sljit_sw private_srcw[2];
2610sljit_sw shared_srcw[3];
2611sljit_sw kept_shared_srcw[2];
2612int private_count, shared_count, kept_shared_count;
2613int from_sp, base_reg, offset, i;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002614
2615memset(common->recurse_bitset, 0, common->recurse_bitset_size);
Elliott Hughes5b808042021-10-01 10:56:10 -07002616
2617#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2618SLJIT_ASSERT(common->control_head_ptr != 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002619recurse_check_bit(common, common->control_head_ptr);
Elliott Hughes5b808042021-10-01 10:56:10 -07002620#endif
2621
2622switch (type)
2623 {
2624 case recurse_copy_from_global:
2625 from_sp = TRUE;
2626 base_reg = STACK_TOP;
2627 break;
2628
2629 case recurse_copy_private_to_global:
2630 case recurse_copy_shared_to_global:
2631 case recurse_copy_kept_shared_to_global:
2632 from_sp = FALSE;
2633 base_reg = STACK_TOP;
2634 break;
2635
2636 default:
2637 SLJIT_ASSERT(type == recurse_swap_global);
2638 from_sp = FALSE;
2639 base_reg = TMP2;
2640 break;
2641 }
2642
2643stackptr = STACK(stackptr);
2644stacktop = STACK(stacktop);
2645
2646status.tmp_regs[0] = TMP1;
2647status.saved_tmp_regs[0] = TMP1;
2648
2649if (base_reg != TMP2)
2650 {
2651 status.tmp_regs[1] = TMP2;
2652 status.saved_tmp_regs[1] = TMP2;
2653 }
2654else
2655 {
2656 status.saved_tmp_regs[1] = RETURN_ADDR;
2657 if (HAS_VIRTUAL_REGISTERS)
2658 status.tmp_regs[1] = STR_PTR;
2659 else
2660 status.tmp_regs[1] = RETURN_ADDR;
2661 }
2662
2663status.saved_tmp_regs[2] = TMP3;
2664if (HAS_VIRTUAL_REGISTERS)
2665 status.tmp_regs[2] = STR_END;
2666else
2667 status.tmp_regs[2] = TMP3;
2668
2669delayed_mem_copy_init(&status, common);
2670
2671if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2672 {
2673 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2674
2675 if (!from_sp)
2676 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->recursive_head_ptr);
2677
2678 if (from_sp || type == recurse_swap_global)
2679 delayed_mem_copy_move(&status, SLJIT_SP, common->recursive_head_ptr, base_reg, stackptr);
2680 }
2681
2682stackptr += sizeof(sljit_sw);
2683
2684#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
2685if (type != recurse_copy_shared_to_global)
2686 {
2687 if (!from_sp)
2688 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, common->control_head_ptr);
2689
2690 if (from_sp || type == recurse_swap_global)
2691 delayed_mem_copy_move(&status, SLJIT_SP, common->control_head_ptr, base_reg, stackptr);
2692 }
2693
2694stackptr += sizeof(sljit_sw);
2695#endif
2696
2697while (cc < ccend)
2698 {
2699 private_count = 0;
2700 shared_count = 0;
2701 kept_shared_count = 0;
2702
2703 switch(*cc)
2704 {
2705 case OP_SET_SOM:
2706 SLJIT_ASSERT(common->has_set_som);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002707 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
Elliott Hughes5b808042021-10-01 10:56:10 -07002708 {
2709 kept_shared_srcw[0] = OVECTOR(0);
2710 kept_shared_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002711 }
2712 cc += 1;
2713 break;
2714
2715 case OP_RECURSE:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002716 if (recurse_flags & recurse_flag_quit_found)
Elliott Hughes5b808042021-10-01 10:56:10 -07002717 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002718 if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
Elliott Hughes5b808042021-10-01 10:56:10 -07002719 {
2720 kept_shared_srcw[0] = OVECTOR(0);
2721 kept_shared_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002722 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002723 if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
Elliott Hughes5b808042021-10-01 10:56:10 -07002724 {
2725 kept_shared_srcw[kept_shared_count] = common->mark_ptr;
2726 kept_shared_count++;
Elliott Hughes5b808042021-10-01 10:56:10 -07002727 }
2728 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002729 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
Elliott Hughes5b808042021-10-01 10:56:10 -07002730 {
2731 shared_srcw[0] = common->capture_last_ptr;
2732 shared_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002733 }
2734 cc += 1 + LINK_SIZE;
2735 break;
2736
2737 case OP_KET:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002738 private_srcw[0] = PRIVATE_DATA(cc);
2739 if (private_srcw[0] != 0)
Elliott Hughes5b808042021-10-01 10:56:10 -07002740 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002741 if (recurse_check_bit(common, private_srcw[0]))
2742 private_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002743 SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
2744 cc += PRIVATE_DATA(cc + 1);
2745 }
2746 cc += 1 + LINK_SIZE;
2747 break;
2748
2749 case OP_ASSERT:
2750 case OP_ASSERT_NOT:
2751 case OP_ASSERTBACK:
2752 case OP_ASSERTBACK_NOT:
2753 case OP_ASSERT_NA:
2754 case OP_ASSERTBACK_NA:
2755 case OP_ONCE:
2756 case OP_SCRIPT_RUN:
2757 case OP_BRAPOS:
2758 case OP_SBRA:
2759 case OP_SBRAPOS:
2760 case OP_SCOND:
Elliott Hughes5b808042021-10-01 10:56:10 -07002761 private_srcw[0] = PRIVATE_DATA(cc);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002762 if (recurse_check_bit(common, private_srcw[0]))
2763 private_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002764 cc += 1 + LINK_SIZE;
2765 break;
2766
2767 case OP_CBRA:
2768 case OP_SCBRA:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002769 offset = GET2(cc, 1 + LINK_SIZE);
2770 shared_srcw[0] = OVECTOR(offset << 1);
2771 if (recurse_check_bit(common, shared_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002772 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002773 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2774 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2775 shared_count = 2;
Elliott Hughes5b808042021-10-01 10:56:10 -07002776 }
2777
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002778 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
Elliott Hughes5b808042021-10-01 10:56:10 -07002779 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002780 shared_srcw[shared_count] = common->capture_last_ptr;
2781 shared_count++;
Elliott Hughes5b808042021-10-01 10:56:10 -07002782 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002783
2784 if (common->optimized_cbracket[offset] == 0)
2785 {
2786 private_srcw[0] = OVECTOR_PRIV(offset);
2787 if (recurse_check_bit(common, private_srcw[0]))
2788 private_count = 1;
2789 }
2790
Elliott Hughes5b808042021-10-01 10:56:10 -07002791 cc += 1 + LINK_SIZE + IMM2_SIZE;
2792 break;
2793
2794 case OP_CBRAPOS:
2795 case OP_SCBRAPOS:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002796 offset = GET2(cc, 1 + LINK_SIZE);
2797 shared_srcw[0] = OVECTOR(offset << 1);
2798 if (recurse_check_bit(common, shared_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002799 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002800 shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
2801 SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
2802 shared_count = 2;
Elliott Hughes5b808042021-10-01 10:56:10 -07002803 }
2804
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002805 if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
2806 {
2807 shared_srcw[shared_count] = common->capture_last_ptr;
2808 shared_count++;
2809 }
2810
Elliott Hughes5b808042021-10-01 10:56:10 -07002811 private_srcw[0] = PRIVATE_DATA(cc);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002812 if (recurse_check_bit(common, private_srcw[0]))
2813 private_count = 1;
2814
2815 offset = OVECTOR_PRIV(offset);
2816 if (recurse_check_bit(common, offset))
2817 {
2818 private_srcw[private_count] = offset;
2819 private_count++;
2820 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002821 cc += 1 + LINK_SIZE + IMM2_SIZE;
2822 break;
2823
2824 case OP_COND:
2825 /* Might be a hidden SCOND. */
2826 alternative = cc + GET(cc, 1);
2827 if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
2828 {
Elliott Hughes5b808042021-10-01 10:56:10 -07002829 private_srcw[0] = PRIVATE_DATA(cc);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002830 if (recurse_check_bit(common, private_srcw[0]))
2831 private_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002832 }
2833 cc += 1 + LINK_SIZE;
2834 break;
2835
2836 CASE_ITERATOR_PRIVATE_DATA_1
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002837 private_srcw[0] = PRIVATE_DATA(cc);
2838 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002839 private_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002840 cc += 2;
2841#ifdef SUPPORT_UNICODE
2842 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2843#endif
2844 break;
2845
2846 CASE_ITERATOR_PRIVATE_DATA_2A
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002847 private_srcw[0] = PRIVATE_DATA(cc);
2848 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002849 {
2850 private_count = 2;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002851 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2852 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
Elliott Hughes5b808042021-10-01 10:56:10 -07002853 }
2854 cc += 2;
2855#ifdef SUPPORT_UNICODE
2856 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2857#endif
2858 break;
2859
2860 CASE_ITERATOR_PRIVATE_DATA_2B
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002861 private_srcw[0] = PRIVATE_DATA(cc);
2862 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002863 {
2864 private_count = 2;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002865 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2866 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
Elliott Hughes5b808042021-10-01 10:56:10 -07002867 }
2868 cc += 2 + IMM2_SIZE;
2869#ifdef SUPPORT_UNICODE
2870 if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
2871#endif
2872 break;
2873
2874 CASE_ITERATOR_TYPE_PRIVATE_DATA_1
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002875 private_srcw[0] = PRIVATE_DATA(cc);
2876 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002877 private_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002878 cc += 1;
2879 break;
2880
2881 CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002882 private_srcw[0] = PRIVATE_DATA(cc);
2883 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002884 {
2885 private_count = 2;
Elliott Hughes5b808042021-10-01 10:56:10 -07002886 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002887 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
Elliott Hughes5b808042021-10-01 10:56:10 -07002888 }
2889 cc += 1;
2890 break;
2891
2892 CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002893 private_srcw[0] = PRIVATE_DATA(cc);
2894 if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
Elliott Hughes5b808042021-10-01 10:56:10 -07002895 {
2896 private_count = 2;
Elliott Hughes5b808042021-10-01 10:56:10 -07002897 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002898 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
Elliott Hughes5b808042021-10-01 10:56:10 -07002899 }
2900 cc += 1 + IMM2_SIZE;
2901 break;
2902
2903 case OP_CLASS:
2904 case OP_NCLASS:
2905#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
2906 case OP_XCLASS:
2907 i = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2908#else
2909 i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
2910#endif
2911 if (PRIVATE_DATA(cc) != 0)
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002912 {
2913 private_count = 1;
2914 private_srcw[0] = PRIVATE_DATA(cc);
Elliott Hughes5b808042021-10-01 10:56:10 -07002915 switch(get_class_iterator_size(cc + i))
2916 {
2917 case 1:
Elliott Hughes5b808042021-10-01 10:56:10 -07002918 break;
2919
2920 case 2:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002921 if (recurse_check_bit(common, private_srcw[0]))
2922 {
2923 private_count = 2;
2924 private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
2925 SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
2926 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002927 break;
2928
2929 default:
2930 SLJIT_UNREACHABLE();
2931 break;
2932 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002933 }
Elliott Hughes5b808042021-10-01 10:56:10 -07002934 cc += i;
2935 break;
2936
2937 case OP_MARK:
2938 case OP_COMMIT_ARG:
2939 case OP_PRUNE_ARG:
2940 case OP_THEN_ARG:
2941 SLJIT_ASSERT(common->mark_ptr != 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002942 if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
Elliott Hughes5b808042021-10-01 10:56:10 -07002943 {
2944 kept_shared_srcw[0] = common->mark_ptr;
2945 kept_shared_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002946 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002947 if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
Elliott Hughes5b808042021-10-01 10:56:10 -07002948 {
2949 private_srcw[0] = common->control_head_ptr;
2950 private_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002951 }
2952 cc += 1 + 2 + cc[1];
2953 break;
2954
2955 case OP_THEN:
2956 SLJIT_ASSERT(common->control_head_ptr != 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002957 if (recurse_check_bit(common, common->control_head_ptr))
Elliott Hughes5b808042021-10-01 10:56:10 -07002958 {
2959 private_srcw[0] = common->control_head_ptr;
2960 private_count = 1;
Elliott Hughes5b808042021-10-01 10:56:10 -07002961 }
2962 cc++;
2963 break;
2964
2965 default:
2966 cc = next_opcode(common, cc);
2967 SLJIT_ASSERT(cc != NULL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07002968 continue;
Elliott Hughes5b808042021-10-01 10:56:10 -07002969 }
2970
2971 if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
2972 {
2973 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_private_to_global || type == recurse_swap_global);
2974
2975 for (i = 0; i < private_count; i++)
2976 {
2977 SLJIT_ASSERT(private_srcw[i] != 0);
2978
2979 if (!from_sp)
2980 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, private_srcw[i]);
2981
2982 if (from_sp || type == recurse_swap_global)
2983 delayed_mem_copy_move(&status, SLJIT_SP, private_srcw[i], base_reg, stackptr);
2984
2985 stackptr += sizeof(sljit_sw);
2986 }
2987 }
2988 else
2989 stackptr += sizeof(sljit_sw) * private_count;
2990
2991 if (type != recurse_copy_private_to_global && type != recurse_copy_kept_shared_to_global)
2992 {
2993 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_swap_global);
2994
2995 for (i = 0; i < shared_count; i++)
2996 {
2997 SLJIT_ASSERT(shared_srcw[i] != 0);
2998
2999 if (!from_sp)
3000 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, shared_srcw[i]);
3001
3002 if (from_sp || type == recurse_swap_global)
3003 delayed_mem_copy_move(&status, SLJIT_SP, shared_srcw[i], base_reg, stackptr);
3004
3005 stackptr += sizeof(sljit_sw);
3006 }
3007 }
3008 else
3009 stackptr += sizeof(sljit_sw) * shared_count;
3010
3011 if (type != recurse_copy_private_to_global && type != recurse_swap_global)
3012 {
3013 SLJIT_ASSERT(type == recurse_copy_from_global || type == recurse_copy_shared_to_global || type == recurse_copy_kept_shared_to_global);
3014
3015 for (i = 0; i < kept_shared_count; i++)
3016 {
3017 SLJIT_ASSERT(kept_shared_srcw[i] != 0);
3018
3019 if (!from_sp)
3020 delayed_mem_copy_move(&status, base_reg, stackptr, SLJIT_SP, kept_shared_srcw[i]);
3021
3022 if (from_sp || type == recurse_swap_global)
3023 delayed_mem_copy_move(&status, SLJIT_SP, kept_shared_srcw[i], base_reg, stackptr);
3024
3025 stackptr += sizeof(sljit_sw);
3026 }
3027 }
3028 else
3029 stackptr += sizeof(sljit_sw) * kept_shared_count;
3030 }
3031
3032SLJIT_ASSERT(cc == ccend && stackptr == stacktop);
3033
3034delayed_mem_copy_finish(&status);
3035}
3036
3037static SLJIT_INLINE PCRE2_SPTR set_then_offsets(compiler_common *common, PCRE2_SPTR cc, sljit_u8 *current_offset)
3038{
3039PCRE2_SPTR end = bracketend(cc);
3040BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT;
3041
3042/* Assert captures then. */
3043if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA)
3044 current_offset = NULL;
3045/* Conditional block does not. */
3046if (*cc == OP_COND || *cc == OP_SCOND)
3047 has_alternatives = FALSE;
3048
3049cc = next_opcode(common, cc);
3050if (has_alternatives)
3051 current_offset = common->then_offsets + (cc - common->start);
3052
3053while (cc < end)
3054 {
3055 if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND))
3056 cc = set_then_offsets(common, cc, current_offset);
3057 else
3058 {
3059 if (*cc == OP_ALT && has_alternatives)
3060 current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
3061 if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
3062 *current_offset = 1;
3063 cc = next_opcode(common, cc);
3064 }
3065 }
3066
3067return end;
3068}
3069
3070#undef CASE_ITERATOR_PRIVATE_DATA_1
3071#undef CASE_ITERATOR_PRIVATE_DATA_2A
3072#undef CASE_ITERATOR_PRIVATE_DATA_2B
3073#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1
3074#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
3075#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
3076
3077static SLJIT_INLINE BOOL is_powerof2(unsigned int value)
3078{
3079return (value & (value - 1)) == 0;
3080}
3081
3082static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
3083{
3084while (list)
3085 {
3086 /* sljit_set_label is clever enough to do nothing
3087 if either the jump or the label is NULL. */
3088 SET_LABEL(list->jump, label);
3089 list = list->next;
3090 }
3091}
3092
3093static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump)
3094{
3095jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list));
3096if (list_item)
3097 {
3098 list_item->next = *list;
3099 list_item->jump = jump;
3100 *list = list_item;
3101 }
3102}
3103
3104static void add_stub(compiler_common *common, struct sljit_jump *start)
3105{
3106DEFINE_COMPILER;
3107stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list));
3108
3109if (list_item)
3110 {
3111 list_item->start = start;
3112 list_item->quit = LABEL();
3113 list_item->next = common->stubs;
3114 common->stubs = list_item;
3115 }
3116}
3117
3118static void flush_stubs(compiler_common *common)
3119{
3120DEFINE_COMPILER;
3121stub_list *list_item = common->stubs;
3122
3123while (list_item)
3124 {
3125 JUMPHERE(list_item->start);
3126 add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL));
3127 JUMPTO(SLJIT_JUMP, list_item->quit);
3128 list_item = list_item->next;
3129 }
3130common->stubs = NULL;
3131}
3132
3133static SLJIT_INLINE void count_match(compiler_common *common)
3134{
3135DEFINE_COMPILER;
3136
3137OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1);
3138add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO));
3139}
3140
3141static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
3142{
3143/* May destroy all locals and registers except TMP2. */
3144DEFINE_COMPILER;
3145
3146SLJIT_ASSERT(size > 0);
3147OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3148#ifdef DESTROY_REGISTERS
3149OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
3150OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
3151OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
3152OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0);
3153OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0);
3154#endif
3155add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0));
3156}
3157
3158static SLJIT_INLINE void free_stack(compiler_common *common, int size)
3159{
3160DEFINE_COMPILER;
3161
3162SLJIT_ASSERT(size > 0);
3163OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
3164}
3165
3166static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
3167{
3168DEFINE_COMPILER;
3169sljit_uw *result;
3170
3171if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
3172 return NULL;
3173
3174result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data);
3175if (SLJIT_UNLIKELY(result == NULL))
3176 {
3177 sljit_set_compiler_memory_error(compiler);
3178 return NULL;
3179 }
3180
3181*(void**)result = common->read_only_data_head;
3182common->read_only_data_head = (void *)result;
3183return result + 1;
3184}
3185
3186static SLJIT_INLINE void reset_ovector(compiler_common *common, int length)
3187{
3188DEFINE_COMPILER;
3189struct sljit_label *loop;
3190sljit_s32 i;
3191
3192/* At this point we can freely use all temporary registers. */
3193SLJIT_ASSERT(length > 1);
3194/* TMP1 returns with begin - 1. */
3195OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1));
3196if (length < 8)
3197 {
3198 for (i = 1; i < length; i++)
3199 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0);
3200 }
3201else
3202 {
3203 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3204 {
3205 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
3206 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3207 loop = LABEL();
3208 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
3209 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3210 JUMPTO(SLJIT_NOT_ZERO, loop);
3211 }
3212 else
3213 {
3214 GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw));
3215 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
3216 loop = LABEL();
3217 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0);
3218 OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw));
3219 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
3220 JUMPTO(SLJIT_NOT_ZERO, loop);
3221 }
3222 }
3223}
3224
3225static SLJIT_INLINE void reset_early_fail(compiler_common *common)
3226{
3227DEFINE_COMPILER;
3228sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr);
3229sljit_u32 uncleared_size;
3230sljit_s32 src = SLJIT_IMM;
3231sljit_s32 i;
3232struct sljit_label *loop;
3233
3234SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr);
3235
3236if (size == sizeof(sljit_sw))
3237 {
3238 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0);
3239 return;
3240 }
3241
3242if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
3243 {
3244 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
3245 src = TMP3;
3246 }
3247
3248if (size <= 6 * sizeof(sljit_sw))
3249 {
3250 for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw))
3251 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0);
3252 return;
3253 }
3254
3255GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr);
3256
3257uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw);
3258
3259OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
3260
3261loop = LABEL();
3262OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3263OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
3264OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
3265OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
3266CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
3267
3268if (uncleared_size >= sizeof(sljit_sw))
3269 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
3270
3271if (uncleared_size >= 2 * sizeof(sljit_sw))
3272 OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0);
3273}
3274
3275static SLJIT_INLINE void do_reset_match(compiler_common *common, int length)
3276{
3277DEFINE_COMPILER;
3278struct sljit_label *loop;
3279int i;
3280
3281SLJIT_ASSERT(length > 1);
3282/* OVECTOR(1) contains the "string begin - 1" constant. */
3283if (length > 2)
3284 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3285if (length < 8)
3286 {
3287 for (i = 2; i < length; i++)
3288 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0);
3289 }
3290else
3291 {
3292 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
3293 {
3294 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
3295 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3296 loop = LABEL();
3297 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
3298 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3299 JUMPTO(SLJIT_NOT_ZERO, loop);
3300 }
3301 else
3302 {
3303 GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw));
3304 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
3305 loop = LABEL();
3306 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
3307 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw));
3308 OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
3309 JUMPTO(SLJIT_NOT_ZERO, loop);
3310 }
3311 }
3312
3313if (!HAS_VIRTUAL_REGISTERS)
3314 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack));
3315else
3316 OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0);
3317
3318if (common->mark_ptr != 0)
3319 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
3320if (common->control_head_ptr != 0)
3321 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
3322if (HAS_VIRTUAL_REGISTERS)
3323 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack));
3324
3325OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3326OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end));
3327}
3328
3329static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, PCRE2_SPTR skip_arg)
3330{
3331while (current != NULL)
3332 {
3333 switch (current[1])
3334 {
3335 case type_then_trap:
3336 break;
3337
3338 case type_mark:
3339 if (PRIV(strcmp)(skip_arg, (PCRE2_SPTR)current[2]) == 0)
3340 return current[3];
3341 break;
3342
3343 default:
3344 SLJIT_UNREACHABLE();
3345 break;
3346 }
3347 SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]);
3348 current = (sljit_sw*)current[0];
3349 }
3350return 0;
3351}
3352
3353static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket)
3354{
3355DEFINE_COMPILER;
3356struct sljit_label *loop;
3357BOOL has_pre;
3358
3359/* At this point we can freely use all registers. */
3360OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
3361OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0);
3362
3363if (HAS_VIRTUAL_REGISTERS)
3364 {
3365 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
3366 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3367 if (common->mark_ptr != 0)
3368 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3369 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount));
3370 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3371 if (common->mark_ptr != 0)
3372 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0);
3373 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data),
3374 SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3375 }
3376else
3377 {
3378 OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
3379 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data));
3380 if (common->mark_ptr != 0)
3381 OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
3382 OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount));
3383 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0);
3384 if (common->mark_ptr != 0)
3385 OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0);
3386 OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
3387 }
3388
3389has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
3390
3391GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
3392OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
3393
3394loop = LABEL();
3395
3396if (has_pre)
3397 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
3398else
3399 {
3400 OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
3401 OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw));
3402 }
3403
3404OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(PCRE2_SIZE));
3405OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0);
3406/* Copy the integer value to the output buffer */
3407#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3408OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT);
3409#endif
3410
3411SLJIT_ASSERT(sizeof(PCRE2_SIZE) == 4 || sizeof(PCRE2_SIZE) == 8);
3412OP1(((sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV), SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0);
3413
3414OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3415JUMPTO(SLJIT_NOT_ZERO, loop);
3416
3417/* Calculate the return value, which is the maximum ovector value. */
3418if (topbracket > 1)
3419 {
3420 if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
3421 {
3422 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
3423 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3424
3425 /* OVECTOR(0) is never equal to SLJIT_S2. */
3426 loop = LABEL();
3427 sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
3428 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3429 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3430 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3431 }
3432 else
3433 {
3434 GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw));
3435 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
3436
3437 /* OVECTOR(0) is never equal to SLJIT_S2. */
3438 loop = LABEL();
3439 OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
3440 OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
3441 OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
3442 CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
3443 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
3444 }
3445 }
3446else
3447 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1);
3448}
3449
3450static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit)
3451{
3452DEFINE_COMPILER;
3453sljit_s32 mov_opcode;
3454sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1;
3455
3456SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0);
3457SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0
3458 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0));
3459
3460if (arguments_reg != ARGUMENTS)
3461 OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0);
3462OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP),
3463 common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr);
3464OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL);
3465
3466/* Store match begin and end. */
3467OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin));
3468OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0);
3469OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data));
3470
3471mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
3472
3473OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S1, 0);
3474#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3475OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT);
3476#endif
3477OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector), SLJIT_R2, 0);
3478
3479OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_S1, 0);
3480#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3481OP2(SLJIT_ASHR, STR_END, 0, STR_END, 0, SLJIT_IMM, UCHAR_SHIFT);
3482#endif
3483OP1(mov_opcode, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(pcre2_match_data, ovector) + sizeof(PCRE2_SIZE), STR_END, 0);
3484
3485JUMPTO(SLJIT_JUMP, quit);
3486}
3487
3488static SLJIT_INLINE void check_start_used_ptr(compiler_common *common)
3489{
3490/* May destroy TMP1. */
3491DEFINE_COMPILER;
3492struct sljit_jump *jump;
3493
3494if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3495 {
3496 /* The value of -1 must be kept for start_used_ptr! */
3497 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1);
3498 /* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting
3499 is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */
3500 jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0);
3501 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3502 JUMPHERE(jump);
3503 }
3504else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3505 {
3506 jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3507 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3508 JUMPHERE(jump);
3509 }
3510}
3511
3512static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR cc)
3513{
3514/* Detects if the character has an othercase. */
3515unsigned int c;
3516
3517#ifdef SUPPORT_UNICODE
3518if (common->utf || common->ucp)
3519 {
3520 if (common->utf)
3521 {
3522 GETCHAR(c, cc);
3523 }
3524 else
3525 c = *cc;
3526
3527 if (c > 127)
3528 return c != UCD_OTHERCASE(c);
3529
3530 return common->fcc[c] != c;
3531 }
3532else
3533#endif
3534 c = *cc;
3535return MAX_255(c) ? common->fcc[c] != c : FALSE;
3536}
3537
3538static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c)
3539{
3540/* Returns with the othercase. */
3541#ifdef SUPPORT_UNICODE
3542if ((common->utf || common->ucp) && c > 127)
3543 return UCD_OTHERCASE(c);
3544#endif
3545return TABLE_GET(c, common->fcc, c);
3546}
3547
3548static unsigned int char_get_othercase_bit(compiler_common *common, PCRE2_SPTR cc)
3549{
3550/* Detects if the character and its othercase has only 1 bit difference. */
3551unsigned int c, oc, bit;
3552#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3553int n;
3554#endif
3555
3556#ifdef SUPPORT_UNICODE
3557if (common->utf || common->ucp)
3558 {
3559 if (common->utf)
3560 {
3561 GETCHAR(c, cc);
3562 }
3563 else
3564 c = *cc;
3565
3566 if (c <= 127)
3567 oc = common->fcc[c];
3568 else
3569 oc = UCD_OTHERCASE(c);
3570 }
3571else
3572 {
3573 c = *cc;
3574 oc = TABLE_GET(c, common->fcc, c);
3575 }
3576#else
3577c = *cc;
3578oc = TABLE_GET(c, common->fcc, c);
3579#endif
3580
3581SLJIT_ASSERT(c != oc);
3582
3583bit = c ^ oc;
3584/* Optimized for English alphabet. */
3585if (c <= 127 && bit == 0x20)
3586 return (0 << 8) | 0x20;
3587
3588/* Since c != oc, they must have at least 1 bit difference. */
3589if (!is_powerof2(bit))
3590 return 0;
3591
3592#if PCRE2_CODE_UNIT_WIDTH == 8
3593
3594#ifdef SUPPORT_UNICODE
3595if (common->utf && c > 127)
3596 {
3597 n = GET_EXTRALEN(*cc);
3598 while ((bit & 0x3f) == 0)
3599 {
3600 n--;
3601 bit >>= 6;
3602 }
3603 return (n << 8) | bit;
3604 }
3605#endif /* SUPPORT_UNICODE */
3606return (0 << 8) | bit;
3607
3608#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
3609
3610#ifdef SUPPORT_UNICODE
3611if (common->utf && c > 65535)
3612 {
3613 if (bit >= (1u << 10))
3614 bit >>= 10;
3615 else
3616 return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8));
3617 }
3618#endif /* SUPPORT_UNICODE */
3619return (bit < 256) ? ((0u << 8) | bit) : ((1u << 8) | (bit >> 8));
3620
3621#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3622}
3623
3624static void check_partial(compiler_common *common, BOOL force)
3625{
3626/* Checks whether a partial matching is occurred. Does not modify registers. */
3627DEFINE_COMPILER;
3628struct sljit_jump *jump = NULL;
3629
3630SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE);
3631
3632if (common->mode == PCRE2_JIT_COMPLETE)
3633 return;
3634
3635if (!force && !common->allow_empty_partial)
3636 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3637else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3638 jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
3639
3640if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3641 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3642else
3643 {
3644 if (common->partialmatchlabel != NULL)
3645 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3646 else
3647 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3648 }
3649
3650if (jump != NULL)
3651 JUMPHERE(jump);
3652}
3653
3654static void check_str_end(compiler_common *common, jump_list **end_reached)
3655{
3656/* Does not affect registers. Usually used in a tight spot. */
3657DEFINE_COMPILER;
3658struct sljit_jump *jump;
3659
3660if (common->mode == PCRE2_JIT_COMPLETE)
3661 {
3662 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3663 return;
3664 }
3665
3666jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3667if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3668 {
3669 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3670 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3671 add_jump(compiler, end_reached, JUMP(SLJIT_JUMP));
3672 }
3673else
3674 {
3675 add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3676 if (common->partialmatchlabel != NULL)
3677 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3678 else
3679 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3680 }
3681JUMPHERE(jump);
3682}
3683
3684static void detect_partial_match(compiler_common *common, jump_list **backtracks)
3685{
3686DEFINE_COMPILER;
3687struct sljit_jump *jump;
3688
3689if (common->mode == PCRE2_JIT_COMPLETE)
3690 {
3691 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
3692 return;
3693 }
3694
3695/* Partial matching mode. */
3696jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
3697if (!common->allow_empty_partial)
3698 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3699else if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3700 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1));
3701
3702if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3703 {
3704 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3705 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
3706 }
3707else
3708 {
3709 if (common->partialmatchlabel != NULL)
3710 JUMPTO(SLJIT_JUMP, common->partialmatchlabel);
3711 else
3712 add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP));
3713 }
3714JUMPHERE(jump);
3715}
3716
3717static void process_partial_match(compiler_common *common)
3718{
3719DEFINE_COMPILER;
3720struct sljit_jump *jump;
3721
3722/* Partial matching mode. */
3723if (common->mode == PCRE2_JIT_PARTIAL_SOFT)
3724 {
3725 jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
3726 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
3727 JUMPHERE(jump);
3728 }
3729else if (common->mode == PCRE2_JIT_PARTIAL_HARD)
3730 {
3731 if (common->partialmatchlabel != NULL)
3732 CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel);
3733 else
3734 add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0));
3735 }
3736}
3737
3738static void detect_partial_match_to(compiler_common *common, struct sljit_label *label)
3739{
3740DEFINE_COMPILER;
3741
3742CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label);
3743process_partial_match(common);
3744}
3745
3746static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks)
3747{
3748/* Reads the character into TMP1, keeps STR_PTR.
3749Does not check STR_END. TMP2, dst, RETURN_ADDR Destroyed. */
3750DEFINE_COMPILER;
3751#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3752struct sljit_jump *jump;
3753#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3754
3755SLJIT_UNUSED_ARG(max);
3756SLJIT_UNUSED_ARG(dst);
3757SLJIT_UNUSED_ARG(dstw);
3758SLJIT_UNUSED_ARG(backtracks);
3759
3760OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3761
3762#ifdef SUPPORT_UNICODE
3763#if PCRE2_CODE_UNIT_WIDTH == 8
3764if (common->utf)
3765 {
3766 if (max < 128) return;
3767
3768 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3769 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3770 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3771 add_jump(compiler, common->invalid_utf ? &common->utfreadchar_invalid : &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3772 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3773 if (backtracks && common->invalid_utf)
3774 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3775 JUMPHERE(jump);
3776 }
3777#elif PCRE2_CODE_UNIT_WIDTH == 16
3778if (common->utf)
3779 {
3780 if (max < 0xd800) return;
3781
3782 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3783
3784 if (common->invalid_utf)
3785 {
3786 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
3787 OP1(SLJIT_MOV, dst, dstw, STR_PTR, 0);
3788 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3789 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3790 OP1(SLJIT_MOV, STR_PTR, 0, dst, dstw);
3791 if (backtracks && common->invalid_utf)
3792 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3793 }
3794 else
3795 {
3796 /* TMP2 contains the high surrogate. */
3797 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
3798 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3799 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
3800 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
3801 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3802 }
3803
3804 JUMPHERE(jump);
3805 }
3806#elif PCRE2_CODE_UNIT_WIDTH == 32
3807if (common->invalid_utf)
3808 {
3809 if (max < 0xd800) return;
3810
3811 if (backtracks != NULL)
3812 {
3813 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3814 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3815 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3816 }
3817 else
3818 {
3819 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003820 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
Elliott Hughes5b808042021-10-01 10:56:10 -07003821 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07003822 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
Elliott Hughes5b808042021-10-01 10:56:10 -07003823 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
3824 }
3825 }
3826#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3827#endif /* SUPPORT_UNICODE */
3828}
3829
3830static void peek_char_back(compiler_common *common, sljit_u32 max, jump_list **backtracks)
3831{
3832/* Reads one character back without moving STR_PTR. TMP2 must
3833contain the start of the subject buffer. Affects TMP1, TMP2, and RETURN_ADDR. */
3834DEFINE_COMPILER;
3835
3836#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3837struct sljit_jump *jump;
3838#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
3839
3840SLJIT_UNUSED_ARG(max);
3841SLJIT_UNUSED_ARG(backtracks);
3842
3843OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
3844
3845#ifdef SUPPORT_UNICODE
3846#if PCRE2_CODE_UNIT_WIDTH == 8
3847if (common->utf)
3848 {
3849 if (max < 128) return;
3850
3851 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3852 if (common->invalid_utf)
3853 {
3854 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3855 if (backtracks != NULL)
3856 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3857 }
3858 else
3859 add_jump(compiler, &common->utfpeakcharback, JUMP(SLJIT_FAST_CALL));
3860 JUMPHERE(jump);
3861 }
3862#elif PCRE2_CODE_UNIT_WIDTH == 16
3863if (common->utf)
3864 {
3865 if (max < 0xd800) return;
3866
3867 if (common->invalid_utf)
3868 {
3869 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
3870 add_jump(compiler, &common->utfpeakcharback_invalid, JUMP(SLJIT_FAST_CALL));
3871 if (backtracks != NULL)
3872 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3873 }
3874 else
3875 {
3876 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
3877 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xdc00);
3878 /* TMP2 contains the low surrogate. */
3879 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
3880 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
3881 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3882 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 10);
3883 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
3884 }
3885 JUMPHERE(jump);
3886 }
3887#elif PCRE2_CODE_UNIT_WIDTH == 32
3888if (common->invalid_utf)
3889 {
3890 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
3891 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
3892 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
3893 }
3894#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
3895#endif /* SUPPORT_UNICODE */
3896}
3897
3898#define READ_CHAR_UPDATE_STR_PTR 0x1
3899#define READ_CHAR_UTF8_NEWLINE 0x2
3900#define READ_CHAR_NEWLINE (READ_CHAR_UPDATE_STR_PTR | READ_CHAR_UTF8_NEWLINE)
3901#define READ_CHAR_VALID_UTF 0x4
3902
3903static void read_char(compiler_common *common, sljit_u32 min, sljit_u32 max,
3904 jump_list **backtracks, sljit_u32 options)
3905{
3906/* Reads the precise value of a character into TMP1, if the character is
3907between min and max (c >= min && c <= max). Otherwise it returns with a value
3908outside the range. Does not check STR_END. */
3909DEFINE_COMPILER;
3910#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
3911struct sljit_jump *jump;
3912#endif
3913#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
3914struct sljit_jump *jump2;
3915#endif
3916
3917SLJIT_UNUSED_ARG(min);
3918SLJIT_UNUSED_ARG(max);
3919SLJIT_UNUSED_ARG(backtracks);
3920SLJIT_UNUSED_ARG(options);
3921SLJIT_ASSERT(min <= max);
3922
3923OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3924OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
3925
3926#ifdef SUPPORT_UNICODE
3927#if PCRE2_CODE_UNIT_WIDTH == 8
3928if (common->utf)
3929 {
3930 if (max < 128 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
3931
3932 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
3933 {
3934 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
3935
3936 if (options & READ_CHAR_UTF8_NEWLINE)
3937 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
3938 else
3939 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
3940
3941 if (backtracks != NULL)
3942 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
3943 JUMPHERE(jump);
3944 return;
3945 }
3946
3947 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
3948 if (min >= 0x10000)
3949 {
3950 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0);
3951 if (options & READ_CHAR_UPDATE_STR_PTR)
3952 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3953 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3954 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7);
3955 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3956 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3957 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3958 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3959 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3960 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3961 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3962 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
3963 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3964 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
3965 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3966 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3967 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3968 JUMPHERE(jump2);
3969 if (options & READ_CHAR_UPDATE_STR_PTR)
3970 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3971 }
3972 else if (min >= 0x800 && max <= 0xffff)
3973 {
3974 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0);
3975 if (options & READ_CHAR_UPDATE_STR_PTR)
3976 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3977 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
3978 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf);
3979 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
3980 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
3981 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3982 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
3983 if (!(options & READ_CHAR_UPDATE_STR_PTR))
3984 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
3985 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
3986 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
3987 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
3988 JUMPHERE(jump2);
3989 if (options & READ_CHAR_UPDATE_STR_PTR)
3990 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
3991 }
3992 else if (max >= 0x800)
3993 {
3994 add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL));
3995 }
3996 else if (max < 128)
3997 {
3998 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
3999 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4000 }
4001 else
4002 {
4003 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4004 if (!(options & READ_CHAR_UPDATE_STR_PTR))
4005 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4006 else
4007 OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4008 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4009 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4010 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4011 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4012 if (options & READ_CHAR_UPDATE_STR_PTR)
4013 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0);
4014 }
4015 JUMPHERE(jump);
4016 }
4017#elif PCRE2_CODE_UNIT_WIDTH == 16
4018if (common->utf)
4019 {
4020 if (max < 0xd800 && !(options & READ_CHAR_UPDATE_STR_PTR)) return;
4021
4022 if (common->invalid_utf && !(options & READ_CHAR_VALID_UTF))
4023 {
4024 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4025 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4026
4027 if (options & READ_CHAR_UTF8_NEWLINE)
4028 add_jump(compiler, &common->utfreadnewline_invalid, JUMP(SLJIT_FAST_CALL));
4029 else
4030 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4031
4032 if (backtracks != NULL)
4033 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4034 JUMPHERE(jump);
4035 return;
4036 }
4037
4038 if (max >= 0x10000)
4039 {
4040 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4041 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800);
4042 /* TMP2 contains the high surrogate. */
4043 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4044 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
4045 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4046 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
4047 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4048 JUMPHERE(jump);
4049 return;
4050 }
4051
4052 /* Skip low surrogate if necessary. */
4053 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4054
4055 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4056 {
4057 if (options & READ_CHAR_UPDATE_STR_PTR)
4058 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004059 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
Elliott Hughes5b808042021-10-01 10:56:10 -07004060 if (options & READ_CHAR_UPDATE_STR_PTR)
4061 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4062 if (max >= 0xd800)
4063 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
4064 }
4065 else
4066 {
4067 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4068 if (options & READ_CHAR_UPDATE_STR_PTR)
4069 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4070 if (max >= 0xd800)
4071 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
4072 JUMPHERE(jump);
4073 }
4074 }
4075#elif PCRE2_CODE_UNIT_WIDTH == 32
4076if (common->invalid_utf)
4077 {
4078 if (backtracks != NULL)
4079 {
4080 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4081 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4082 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800));
4083 }
4084 else
4085 {
4086 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004087 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
Elliott Hughes5b808042021-10-01 10:56:10 -07004088 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004089 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004090 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4091 }
4092 }
4093#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4094#endif /* SUPPORT_UNICODE */
4095}
4096
4097#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4098
4099static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
4100{
4101/* Tells whether the character codes below 128 are enough
4102to determine a match. */
4103const sljit_u8 value = nclass ? 0xff : 0;
4104const sljit_u8 *end = bitset + 32;
4105
4106bitset += 16;
4107do
4108 {
4109 if (*bitset++ != value)
4110 return FALSE;
4111 }
4112while (bitset < end);
4113return TRUE;
4114}
4115
4116static void read_char7_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4117{
4118/* Reads the precise character type of a character into TMP1, if the character
4119is less than 128. Otherwise it returns with zero. Does not check STR_END. The
4120full_read argument tells whether characters above max are accepted or not. */
4121DEFINE_COMPILER;
4122struct sljit_jump *jump;
4123
4124SLJIT_ASSERT(common->utf);
4125
4126OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4127OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4128
4129/* All values > 127 are zero in ctypes. */
4130OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4131
4132if (negated)
4133 {
4134 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4135
4136 if (common->invalid_utf)
4137 {
4138 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4139 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4140 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4141 }
4142 else
4143 {
4144 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4145 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4146 }
4147 JUMPHERE(jump);
4148 }
4149}
4150
4151#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4152
4153static void read_char8_type(compiler_common *common, jump_list **backtracks, BOOL negated)
4154{
4155/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */
4156DEFINE_COMPILER;
4157#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
4158struct sljit_jump *jump;
4159#endif
4160#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4161struct sljit_jump *jump2;
4162#endif
4163
4164SLJIT_UNUSED_ARG(backtracks);
4165SLJIT_UNUSED_ARG(negated);
4166
4167OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0);
4168OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4169
4170#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
4171if (common->utf)
4172 {
4173 /* The result of this read may be unused, but saves an "else" part. */
4174 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4175 jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x80);
4176
4177 if (!negated)
4178 {
4179 if (common->invalid_utf)
4180 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4181
4182 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4183 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4184 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4185 if (common->invalid_utf)
4186 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe0 - 0xc2));
4187
4188 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4189 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
4190 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4191 if (common->invalid_utf)
4192 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40));
4193
4194 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4195 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4196 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4197 JUMPHERE(jump2);
4198 }
4199 else if (common->invalid_utf)
4200 {
4201 add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
4202 OP1(SLJIT_MOV, TMP2, 0, TMP1, 0);
4203 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
4204
4205 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4206 jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4207 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4208 JUMPHERE(jump2);
4209 }
4210 else
4211 add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL));
4212
4213 JUMPHERE(jump);
4214 return;
4215 }
4216#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
4217
4218#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32
4219if (common->invalid_utf && negated)
4220 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x110000));
4221#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 32 */
4222
4223#if PCRE2_CODE_UNIT_WIDTH != 8
4224/* The ctypes array contains only 256 values. */
4225OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4226jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255);
4227#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4228OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4229#if PCRE2_CODE_UNIT_WIDTH != 8
4230JUMPHERE(jump);
4231#endif /* PCRE2_CODE_UNIT_WIDTH != 8 */
4232
4233#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
4234if (common->utf && negated)
4235 {
4236 /* Skip low surrogate if necessary. */
4237 if (!common->invalid_utf)
4238 {
4239 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4240
4241 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
4242 {
4243 OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004244 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
Elliott Hughes5b808042021-10-01 10:56:10 -07004245 CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
4246 }
4247 else
4248 {
4249 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
4250 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4251 JUMPHERE(jump);
4252 }
4253 return;
4254 }
4255
4256 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
4257 jump = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
4258 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4259 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
4260
4261 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4262 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4263 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
4264 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400));
4265
4266 JUMPHERE(jump);
4267 return;
4268 }
4269#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16 */
4270}
4271
4272static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid)
4273{
4274/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE,
4275TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer,
4276and it is destroyed. Does not modify STR_PTR for invalid character sequences. */
4277DEFINE_COMPILER;
4278
Elliott Hughes5b808042021-10-01 10:56:10 -07004279#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
4280struct sljit_jump *jump;
4281#endif
4282
4283#ifdef SUPPORT_UNICODE
4284#if PCRE2_CODE_UNIT_WIDTH == 8
4285struct sljit_label *label;
4286
4287if (common->utf)
4288 {
4289 if (!must_be_valid && common->invalid_utf)
4290 {
4291 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4292 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4293 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x80);
4294 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4295 if (backtracks != NULL)
4296 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4297 JUMPHERE(jump);
4298 return;
4299 }
4300
4301 label = LABEL();
4302 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4303 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4304 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4305 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label);
4306 return;
4307 }
4308#elif PCRE2_CODE_UNIT_WIDTH == 16
4309if (common->utf)
4310 {
4311 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4312 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4313
4314 if (!must_be_valid && common->invalid_utf)
4315 {
4316 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4317 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0xd800);
4318 add_jump(compiler, &common->utfmoveback_invalid, JUMP(SLJIT_FAST_CALL));
4319 if (backtracks != NULL)
4320 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
4321 JUMPHERE(jump);
4322 return;
4323 }
4324
4325 /* Skip low surrogate if necessary. */
4326 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004327 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
Elliott Hughes5b808042021-10-01 10:56:10 -07004328 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
4329 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4330 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4331 return;
4332 }
4333#elif PCRE2_CODE_UNIT_WIDTH == 32
4334if (common->invalid_utf && !must_be_valid)
4335 {
4336 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1));
4337 if (backtracks != NULL)
4338 {
4339 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000));
4340 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4341 return;
4342 }
4343
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004344 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
Elliott Hughes5b808042021-10-01 10:56:10 -07004345 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
4346 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
4347 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
4348 return;
4349 }
4350#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
4351#endif /* SUPPORT_UNICODE */
Elliott Hughes16619d62021-10-29 12:10:38 -07004352
4353SLJIT_UNUSED_ARG(backtracks);
4354SLJIT_UNUSED_ARG(must_be_valid);
4355
Elliott Hughes5b808042021-10-01 10:56:10 -07004356OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4357}
4358
4359static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch)
4360{
4361/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */
4362DEFINE_COMPILER;
4363struct sljit_jump *jump;
4364
4365if (nltype == NLTYPE_ANY)
4366 {
4367 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
4368 sljit_set_current_flags(compiler, SLJIT_SET_Z);
4369 add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO));
4370 }
4371else if (nltype == NLTYPE_ANYCRLF)
4372 {
4373 if (jumpifmatch)
4374 {
4375 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR));
4376 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4377 }
4378 else
4379 {
4380 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
4381 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
4382 JUMPHERE(jump);
4383 }
4384 }
4385else
4386 {
4387 SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256);
4388 add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
4389 }
4390}
4391
4392#ifdef SUPPORT_UNICODE
4393
4394#if PCRE2_CODE_UNIT_WIDTH == 8
4395static void do_utfreadchar(compiler_common *common)
4396{
4397/* Fast decoding a UTF-8 character. TMP1 contains the first byte
4398of the character (>= 0xc0). Return char value in TMP1. */
4399DEFINE_COMPILER;
4400struct sljit_jump *jump;
4401
4402sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4403OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4404OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4405OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4406OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4407
4408/* Searching for the first zero. */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004409OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004410jump = JUMP(SLJIT_NOT_ZERO);
4411/* Two byte sequence. */
4412OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
4413OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4414OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4415
4416JUMPHERE(jump);
4417OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4418OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4419OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4420OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4421
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004422OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
Elliott Hughes5b808042021-10-01 10:56:10 -07004423jump = JUMP(SLJIT_NOT_ZERO);
4424/* Three byte sequence. */
4425OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
4426OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4427OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4428
4429/* Four byte sequence. */
4430JUMPHERE(jump);
4431OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4432OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0000);
4433OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4434OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4435OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
4436OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4437OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4438}
4439
4440static void do_utfreadtype8(compiler_common *common)
4441{
4442/* Fast decoding a UTF-8 character type. TMP2 contains the first byte
4443of the character (>= 0xc0). Return value in TMP1. */
4444DEFINE_COMPILER;
4445struct sljit_jump *jump;
4446struct sljit_jump *compare;
4447
4448sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4449
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004450OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
Elliott Hughes5b808042021-10-01 10:56:10 -07004451jump = JUMP(SLJIT_NOT_ZERO);
4452/* Two byte sequence. */
4453OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4454OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4455OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f);
4456/* The upper 5 bits are known at this point. */
4457compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3);
4458OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4459OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f);
4460OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0);
4461OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes);
4462OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4463
4464JUMPHERE(compare);
4465OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4466OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4467
4468/* We only have types for characters less than 256. */
4469JUMPHERE(jump);
4470OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0);
4471OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4472OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
4473OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4474}
4475
4476static void do_utfreadchar_invalid(compiler_common *common)
4477{
4478/* Slow decoding a UTF-8 character. TMP1 contains the first byte
4479of the character (>= 0xc0). Return char value in TMP1. STR_PTR is
4480undefined for invalid characters. */
4481DEFINE_COMPILER;
4482sljit_s32 i;
4483sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4484struct sljit_jump *jump;
4485struct sljit_jump *buffer_end_close;
4486struct sljit_label *three_byte_entry;
4487struct sljit_label *exit_invalid_label;
4488struct sljit_jump *exit_invalid[11];
4489
4490sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4491
4492OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
4493
4494/* Usually more than 3 characters remained in the subject buffer. */
4495OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4496
4497/* Not a valid start of a multi-byte sequence, no more bytes read. */
4498exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xf5 - 0xc2);
4499
4500buffer_end_close = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4501
4502OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4503OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4504/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4505OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4506OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4507exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4508
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004509OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004510jump = JUMP(SLJIT_NOT_ZERO);
4511
4512OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4513OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4514
4515JUMPHERE(jump);
4516
4517/* Three-byte sequence. */
4518OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4519OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4520OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4521OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4522if (has_cmov)
4523 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004524 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
Elliott Hughes5b808042021-10-01 10:56:10 -07004525 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
4526 exit_invalid[2] = NULL;
4527 }
4528else
4529 exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4530
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004531OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
Elliott Hughes5b808042021-10-01 10:56:10 -07004532jump = JUMP(SLJIT_NOT_ZERO);
4533
4534three_byte_entry = LABEL();
4535
4536OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
4537if (has_cmov)
4538 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004539 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004540 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
4541 exit_invalid[3] = NULL;
4542 }
4543else
4544 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4545OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4546OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4547
4548if (has_cmov)
4549 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004550 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004551 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4552 exit_invalid[4] = NULL;
4553 }
4554else
4555 exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4556OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4557
4558JUMPHERE(jump);
4559
4560/* Four-byte sequence. */
4561OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4562OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4563OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4564OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4565if (has_cmov)
4566 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004567 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
Elliott Hughes5b808042021-10-01 10:56:10 -07004568 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
4569 exit_invalid[5] = NULL;
4570 }
4571else
4572 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4573
4574OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
4575if (has_cmov)
4576 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004577 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
Elliott Hughes5b808042021-10-01 10:56:10 -07004578 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4579 exit_invalid[6] = NULL;
4580 }
4581else
4582 exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4583
4584OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4585OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4586
4587JUMPHERE(buffer_end_close);
4588OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4589exit_invalid[7] = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
4590
4591/* Two-byte sequence. */
4592OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4593OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4594/* If TMP2 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4595OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4596OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4597exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4598
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004599OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004600jump = JUMP(SLJIT_NOT_ZERO);
4601
4602OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4603
4604/* Three-byte sequence. */
4605JUMPHERE(jump);
4606exit_invalid[9] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4607
4608OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4609OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4610OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4611OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4612if (has_cmov)
4613 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004614 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
Elliott Hughes5b808042021-10-01 10:56:10 -07004615 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4616 exit_invalid[10] = NULL;
4617 }
4618else
4619 exit_invalid[10] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4620
4621/* One will be substracted from STR_PTR later. */
4622OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4623
4624/* Four byte sequences are not possible. */
4625CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x30000, three_byte_entry);
4626
4627exit_invalid_label = LABEL();
4628for (i = 0; i < 11; i++)
4629 sljit_set_label(exit_invalid[i], exit_invalid_label);
4630
4631OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4632OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4633}
4634
4635static void do_utfreadnewline_invalid(compiler_common *common)
4636{
4637/* Slow decoding a UTF-8 character, specialized for newlines.
4638TMP1 contains the first byte of the character (>= 0xc0). Return
4639char value in TMP1. */
4640DEFINE_COMPILER;
4641struct sljit_label *loop;
4642struct sljit_label *skip_start;
4643struct sljit_label *three_byte_exit;
4644struct sljit_jump *jump[5];
4645
4646sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4647
4648if (common->nltype != NLTYPE_ANY)
4649 {
4650 SLJIT_ASSERT(common->nltype != NLTYPE_FIXED || common->newline < 128);
4651
4652 /* All newlines are ascii, just skip intermediate octets. */
4653 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4654 loop = LABEL();
4655 if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
4656 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4657 else
4658 {
4659 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4660 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4661 }
4662
4663 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4664 CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4665 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4666
4667 JUMPHERE(jump[0]);
4668
4669 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4670 OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4671 return;
4672 }
4673
4674jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4675OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4676OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4677
4678jump[1] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xc2);
4679jump[2] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xe2);
4680
4681skip_start = LABEL();
4682OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4683jump[3] = CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80);
4684
4685/* Skip intermediate octets. */
4686loop = LABEL();
4687jump[4] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
4688OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4689OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4690OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0);
4691CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop);
4692
4693JUMPHERE(jump[3]);
4694OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4695
4696three_byte_exit = LABEL();
4697JUMPHERE(jump[0]);
4698JUMPHERE(jump[4]);
4699
4700OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4701OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4702
4703/* Two byte long newline: 0x85. */
4704JUMPHERE(jump[1]);
4705CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start);
4706
4707OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85);
4708OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4709
4710/* Three byte long newlines: 0x2028 and 0x2029. */
4711JUMPHERE(jump[2]);
4712CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, skip_start);
4713CMPTO(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0, three_byte_exit);
4714
4715OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4716OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4717
4718OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 0x80);
4719CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start);
4720
4721OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000);
4722OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4723OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4724}
4725
4726static void do_utfmoveback_invalid(compiler_common *common)
4727{
4728/* Goes one character back. */
4729DEFINE_COMPILER;
4730sljit_s32 i;
4731struct sljit_jump *jump;
4732struct sljit_jump *buffer_start_close;
4733struct sljit_label *exit_ok_label;
4734struct sljit_label *exit_invalid_label;
4735struct sljit_jump *exit_invalid[7];
4736
4737sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4738
4739OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4740exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4741
4742/* Two-byte sequence. */
4743buffer_start_close = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4744
4745OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2));
4746
4747OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4748jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20);
4749
4750OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4751OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4752OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4753
4754/* Three-byte sequence. */
4755JUMPHERE(jump);
4756exit_invalid[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4757
4758OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
4759
4760OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4761jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10);
4762
4763OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4764OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4765OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4766
4767/* Four-byte sequence. */
4768JUMPHERE(jump);
4769OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4770exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40);
4771
4772OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4773OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xf0);
4774exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05);
4775
4776exit_ok_label = LABEL();
4777OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
4778OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4779
4780/* Two-byte sequence. */
4781JUMPHERE(buffer_start_close);
4782OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4783
4784exit_invalid[4] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4785
4786OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4787
4788OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4789CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20, exit_ok_label);
4790
4791/* Three-byte sequence. */
4792OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
4793exit_invalid[5] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, -0x40);
4794exit_invalid[6] = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0);
4795
4796OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
4797
4798OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4799CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10, exit_ok_label);
4800
4801/* Four-byte sequences are not possible. */
4802
4803exit_invalid_label = LABEL();
4804sljit_set_label(exit_invalid[5], exit_invalid_label);
4805sljit_set_label(exit_invalid[6], exit_invalid_label);
4806OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4807OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
4808OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4809
4810JUMPHERE(exit_invalid[4]);
4811/* -2 + 4 = 2 */
4812OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
4813
4814exit_invalid_label = LABEL();
4815for (i = 0; i < 4; i++)
4816 sljit_set_label(exit_invalid[i], exit_invalid_label);
4817OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
4818OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4));
4819OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4820}
4821
4822static void do_utfpeakcharback(compiler_common *common)
4823{
4824/* Peak a character back. Does not modify STR_PTR. */
4825DEFINE_COMPILER;
4826struct sljit_jump *jump[2];
4827
4828sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4829
4830OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4831OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
4832jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x20);
4833
4834OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4835OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0);
4836jump[1] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10);
4837
4838OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4839OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0 - 0x80);
4840OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4841OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4842OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4843
4844JUMPHERE(jump[1]);
4845OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4846OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4847OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4848OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4849
4850JUMPHERE(jump[0]);
4851OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
4852OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
4853OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
4854OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4855
4856OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4857}
4858
4859static void do_utfpeakcharback_invalid(compiler_common *common)
4860{
4861/* Peak a character back. Does not modify STR_PTR. */
4862DEFINE_COMPILER;
4863sljit_s32 i;
4864sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV);
4865struct sljit_jump *jump[2];
4866struct sljit_label *two_byte_entry;
4867struct sljit_label *three_byte_entry;
4868struct sljit_label *exit_invalid_label;
4869struct sljit_jump *exit_invalid[8];
4870
4871sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
4872
4873OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
4874exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
4875jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4876
4877/* Two-byte sequence. */
4878OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4879OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4880jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x1e);
4881
4882two_byte_entry = LABEL();
4883OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4884/* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */
4885OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4886OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4887
4888JUMPHERE(jump[1]);
4889OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4890OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4891exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4892OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4893OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4894
4895/* Three-byte sequence. */
4896OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4897OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4898jump[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x10);
4899
4900three_byte_entry = LABEL();
4901OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4902OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4903
4904OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4905if (has_cmov)
4906 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004907 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004908 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
4909 exit_invalid[2] = NULL;
4910 }
4911else
4912 exit_invalid[2] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4913
4914OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
4915if (has_cmov)
4916 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004917 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
Elliott Hughes5b808042021-10-01 10:56:10 -07004918 CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
4919 exit_invalid[3] = NULL;
4920 }
4921else
4922 exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800);
4923
4924OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4925
4926JUMPHERE(jump[1]);
4927OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80);
4928exit_invalid[4] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4929OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 12);
4930OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4931
4932/* Four-byte sequence. */
4933OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-4));
4934OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4935OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf0);
4936OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 18);
4937/* ADD is used instead of OR because of the SUB 0x10000 above. */
4938OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
4939
4940if (has_cmov)
4941 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07004942 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
Elliott Hughes5b808042021-10-01 10:56:10 -07004943 CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
4944 exit_invalid[5] = NULL;
4945 }
4946else
4947 exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
4948
4949OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000);
4950OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4951
4952JUMPHERE(jump[0]);
4953OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
4954jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
4955
4956/* Two-byte sequence. */
4957OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4958OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4959CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4960
4961OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80);
4962OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
4963exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
4964OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6);
4965OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
4966
4967/* Three-byte sequence. */
4968OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-3));
4969OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0);
4970CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry);
4971
4972OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4973OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4974
4975JUMPHERE(jump[0]);
4976exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0);
4977
4978/* Two-byte sequence. */
4979OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
4980OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2);
4981CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x1e, two_byte_entry);
4982
4983exit_invalid_label = LABEL();
4984for (i = 0; i < 8; i++)
4985 sljit_set_label(exit_invalid[i], exit_invalid_label);
4986
4987OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
4988OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
4989}
4990
4991#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
4992
4993#if PCRE2_CODE_UNIT_WIDTH == 16
4994
4995static void do_utfreadchar_invalid(compiler_common *common)
4996{
4997/* Slow decoding a UTF-16 character. TMP1 contains the first half
4998of the character (>= 0xd800). Return char value in TMP1. STR_PTR is
4999undefined for invalid characters. */
5000DEFINE_COMPILER;
5001struct sljit_jump *exit_invalid[3];
5002
5003sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5004
5005/* TMP2 contains the high surrogate. */
5006exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5007exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5008
5009OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5010OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5011OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5012
5013OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5014OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000);
5015exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5016
5017OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5018OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5019
5020JUMPHERE(exit_invalid[0]);
5021JUMPHERE(exit_invalid[1]);
5022JUMPHERE(exit_invalid[2]);
5023OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5024OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5025}
5026
5027static void do_utfreadnewline_invalid(compiler_common *common)
5028{
5029/* Slow decoding a UTF-16 character, specialized for newlines.
5030TMP1 contains the first half of the character (>= 0xd800). Return
5031char value in TMP1. */
5032
5033DEFINE_COMPILER;
5034struct sljit_jump *exit_invalid[2];
5035
5036sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5037
5038/* TMP2 contains the high surrogate. */
5039exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5040
5041OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5042exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
5043
5044OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005045OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
Elliott Hughes5b808042021-10-01 10:56:10 -07005046OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
5047OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
5048OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
5049OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
5050
5051OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5052
5053JUMPHERE(exit_invalid[0]);
5054JUMPHERE(exit_invalid[1]);
5055OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5056OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5057}
5058
5059static void do_utfmoveback_invalid(compiler_common *common)
5060{
5061/* Goes one character back. */
5062DEFINE_COMPILER;
5063struct sljit_jump *exit_invalid[3];
5064
5065sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5066
5067exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
5068exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5069
5070OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5071OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5072exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400);
5073
5074OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5075OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
5076OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5077
5078JUMPHERE(exit_invalid[0]);
5079JUMPHERE(exit_invalid[1]);
5080JUMPHERE(exit_invalid[2]);
5081
5082OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5083OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
5084OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5085}
5086
5087static void do_utfpeakcharback_invalid(compiler_common *common)
5088{
5089/* Peak a character back. Does not modify STR_PTR. */
5090DEFINE_COMPILER;
5091struct sljit_jump *jump;
5092struct sljit_jump *exit_invalid[3];
5093
5094sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5095
5096jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
5097OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
5098exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xdc00);
5099exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
5100
5101OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
5102OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000 - 0xdc00);
5103OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800);
5104exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x400);
5105OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10);
5106OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5107
5108JUMPHERE(jump);
5109OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5110
5111JUMPHERE(exit_invalid[0]);
5112JUMPHERE(exit_invalid[1]);
5113JUMPHERE(exit_invalid[2]);
5114
5115OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR);
5116OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5117}
5118
5119#endif /* PCRE2_CODE_UNIT_WIDTH == 16 */
5120
5121/* UCD_BLOCK_SIZE must be 128 (see the assert below). */
5122#define UCD_BLOCK_MASK 127
5123#define UCD_BLOCK_SHIFT 7
5124
5125static void do_getucd(compiler_common *common)
5126{
5127/* Search the UCD record for the character comes in TMP1.
5128Returns chartype in TMP1 and UCD offset in TMP2. */
5129DEFINE_COMPILER;
5130#if PCRE2_CODE_UNIT_WIDTH == 32
5131struct sljit_jump *jump;
5132#endif
5133
5134#if defined SLJIT_DEBUG && SLJIT_DEBUG
5135/* dummy_ucd_record */
5136const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5137SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5138SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5139#endif
5140
5141SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5142
5143sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5144
5145#if PCRE2_CODE_UNIT_WIDTH == 32
5146if (!common->utf)
5147 {
5148 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5149 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5150 JUMPHERE(jump);
5151 }
5152#endif
5153
5154OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5155OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5156OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5157OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5158OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5159OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5160OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5161OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5162OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5163}
5164
5165static void do_getucdtype(compiler_common *common)
5166{
5167/* Search the UCD record for the character comes in TMP1.
5168Returns chartype in TMP1 and UCD offset in TMP2. */
5169DEFINE_COMPILER;
5170#if PCRE2_CODE_UNIT_WIDTH == 32
5171struct sljit_jump *jump;
5172#endif
5173
5174#if defined SLJIT_DEBUG && SLJIT_DEBUG
5175/* dummy_ucd_record */
5176const ucd_record *record = GET_UCD(UNASSIGNED_UTF_CHAR);
5177SLJIT_ASSERT(record->script == ucp_Unknown && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther);
5178SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
5179#endif
5180
5181SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
5182
5183sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
5184
5185#if PCRE2_CODE_UNIT_WIDTH == 32
5186if (!common->utf)
5187 {
5188 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
5189 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
5190 JUMPHERE(jump);
5191 }
5192#endif
5193
5194OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5195OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
5196OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
5197OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
5198OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
5199OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5200OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
5201OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
5202
5203/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */
5204OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
5205OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
5206OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
5207OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1);
5208
5209OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
5210}
5211
5212#endif /* SUPPORT_UNICODE */
5213
5214static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common)
5215{
5216DEFINE_COMPILER;
5217struct sljit_label *mainloop;
5218struct sljit_label *newlinelabel = NULL;
5219struct sljit_jump *start;
5220struct sljit_jump *end = NULL;
5221struct sljit_jump *end2 = NULL;
5222#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5223struct sljit_label *loop;
5224struct sljit_jump *jump;
5225#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5226jump_list *newline = NULL;
5227sljit_u32 overall_options = common->re->overall_options;
5228BOOL hascrorlf = (common->re->flags & PCRE2_HASCRORLF) != 0;
5229BOOL newlinecheck = FALSE;
5230BOOL readuchar = FALSE;
5231
5232if (!(hascrorlf || (overall_options & PCRE2_FIRSTLINE) != 0)
5233 && (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255))
5234 newlinecheck = TRUE;
5235
5236SLJIT_ASSERT(common->abort_label == NULL);
5237
5238if ((overall_options & PCRE2_FIRSTLINE) != 0)
5239 {
5240 /* Search for the end of the first line. */
5241 SLJIT_ASSERT(common->match_end_ptr != 0);
5242 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
5243
5244 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
5245 {
5246 mainloop = LABEL();
5247 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5248 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5249 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
5250 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
5251 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop);
5252 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop);
5253 JUMPHERE(end);
5254 OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5255 }
5256 else
5257 {
5258 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5259 mainloop = LABEL();
5260 /* Continual stores does not cause data dependency. */
5261 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5262 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
5263 check_newlinechar(common, common->nltype, &newline, TRUE);
5264 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop);
5265 JUMPHERE(end);
5266 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0);
5267 set_jumps(newline, LABEL());
5268 }
5269
5270 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
5271 }
5272else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0)
5273 {
5274 /* Check whether offset limit is set and valid. */
5275 SLJIT_ASSERT(common->match_end_ptr != 0);
5276
5277 if (HAS_VIRTUAL_REGISTERS)
5278 {
5279 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
5280 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5281 }
5282 else
5283 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit));
5284
5285 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5286 end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET);
5287 if (HAS_VIRTUAL_REGISTERS)
5288 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
5289 else
5290 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
5291
5292#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5293 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5294#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5295 if (HAS_VIRTUAL_REGISTERS)
5296 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
5297
5298 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
5299 end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
5300 OP1(SLJIT_MOV, TMP2, 0, STR_END, 0);
5301 JUMPHERE(end2);
5302 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
5303 add_jump(compiler, &common->abort, CMP(SLJIT_LESS, TMP2, 0, STR_PTR, 0));
5304 JUMPHERE(end);
5305 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, TMP2, 0);
5306 }
5307
5308start = JUMP(SLJIT_JUMP);
5309
5310if (newlinecheck)
5311 {
5312 newlinelabel = LABEL();
5313 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5314 end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5315 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005316 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
Elliott Hughes5b808042021-10-01 10:56:10 -07005317 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
5318#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
5319 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5320#endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */
5321 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5322 end2 = JUMP(SLJIT_JUMP);
5323 }
5324
5325mainloop = LABEL();
5326
5327/* Increasing the STR_PTR here requires one less jump in the most common case. */
5328#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5329if (common->utf && !common->invalid_utf) readuchar = TRUE;
5330#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5331if (newlinecheck) readuchar = TRUE;
5332
5333if (readuchar)
5334 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5335
5336if (newlinecheck)
5337 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel);
5338
5339OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5340#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5341#if PCRE2_CODE_UNIT_WIDTH == 8
5342if (common->invalid_utf)
5343 {
5344 /* Skip continuation code units. */
5345 loop = LABEL();
5346 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5347 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5348 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5349 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x80);
5350 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x40, loop);
5351 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5352 JUMPHERE(jump);
5353 }
5354else if (common->utf)
5355 {
5356 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
5357 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
5358 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5359 JUMPHERE(jump);
5360 }
5361#elif PCRE2_CODE_UNIT_WIDTH == 16
5362if (common->invalid_utf)
5363 {
5364 /* Skip continuation code units. */
5365 loop = LABEL();
5366 jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5367 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5368 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5369 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
5370 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400, loop);
5371 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5372 JUMPHERE(jump);
5373 }
5374else if (common->utf)
5375 {
5376 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
5377
5378 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
5379 {
5380 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005381 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
Elliott Hughes5b808042021-10-01 10:56:10 -07005382 CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
5383 }
5384 else
5385 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005386 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
Elliott Hughes5b808042021-10-01 10:56:10 -07005387 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
5388 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
5389 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
5390 }
5391 }
5392#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
5393#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */
5394JUMPHERE(start);
5395
5396if (newlinecheck)
5397 {
5398 JUMPHERE(end);
5399 JUMPHERE(end2);
5400 }
5401
5402return mainloop;
5403}
5404
5405
5406static SLJIT_INLINE void add_prefix_char(PCRE2_UCHAR chr, fast_forward_char_data *chars, BOOL last)
5407{
5408sljit_u32 i, count = chars->count;
5409
5410if (count == 255)
5411 return;
5412
5413if (count == 0)
5414 {
5415 chars->count = 1;
5416 chars->chars[0] = chr;
5417
5418 if (last)
5419 chars->last_count = 1;
5420 return;
5421 }
5422
5423for (i = 0; i < count; i++)
5424 if (chars->chars[i] == chr)
5425 return;
5426
5427if (count >= MAX_DIFF_CHARS)
5428 {
5429 chars->count = 255;
5430 return;
5431 }
5432
5433chars->chars[count] = chr;
5434chars->count = count + 1;
5435
5436if (last)
5437 chars->last_count++;
5438}
5439
5440static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
5441{
5442/* Recursive function, which scans prefix literals. */
5443BOOL last, any, class, caseless;
5444int len, repeat, len_save, consumed = 0;
5445sljit_u32 chr; /* Any unicode character. */
5446sljit_u8 *bytes, *bytes_end, byte;
5447PCRE2_SPTR alternative, cc_save, oc;
5448#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5449PCRE2_UCHAR othercase[4];
5450#elif defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 16
5451PCRE2_UCHAR othercase[2];
5452#else
5453PCRE2_UCHAR othercase[1];
5454#endif
5455
5456repeat = 1;
5457while (TRUE)
5458 {
5459 if (*rec_count == 0)
5460 return 0;
5461 (*rec_count)--;
5462
5463 last = TRUE;
5464 any = FALSE;
5465 class = FALSE;
5466 caseless = FALSE;
5467
5468 switch (*cc)
5469 {
5470 case OP_CHARI:
5471 caseless = TRUE;
5472 /* Fall through */
5473 case OP_CHAR:
5474 last = FALSE;
5475 cc++;
5476 break;
5477
5478 case OP_SOD:
5479 case OP_SOM:
5480 case OP_SET_SOM:
5481 case OP_NOT_WORD_BOUNDARY:
5482 case OP_WORD_BOUNDARY:
5483 case OP_EODN:
5484 case OP_EOD:
5485 case OP_CIRC:
5486 case OP_CIRCM:
5487 case OP_DOLL:
5488 case OP_DOLLM:
5489 /* Zero width assertions. */
5490 cc++;
5491 continue;
5492
5493 case OP_ASSERT:
5494 case OP_ASSERT_NOT:
5495 case OP_ASSERTBACK:
5496 case OP_ASSERTBACK_NOT:
5497 case OP_ASSERT_NA:
5498 case OP_ASSERTBACK_NA:
5499 cc = bracketend(cc);
5500 continue;
5501
5502 case OP_PLUSI:
5503 case OP_MINPLUSI:
5504 case OP_POSPLUSI:
5505 caseless = TRUE;
5506 /* Fall through */
5507 case OP_PLUS:
5508 case OP_MINPLUS:
5509 case OP_POSPLUS:
5510 cc++;
5511 break;
5512
5513 case OP_EXACTI:
5514 caseless = TRUE;
5515 /* Fall through */
5516 case OP_EXACT:
5517 repeat = GET2(cc, 1);
5518 last = FALSE;
5519 cc += 1 + IMM2_SIZE;
5520 break;
5521
5522 case OP_QUERYI:
5523 case OP_MINQUERYI:
5524 case OP_POSQUERYI:
5525 caseless = TRUE;
5526 /* Fall through */
5527 case OP_QUERY:
5528 case OP_MINQUERY:
5529 case OP_POSQUERY:
5530 len = 1;
5531 cc++;
5532#ifdef SUPPORT_UNICODE
5533 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5534#endif
5535 max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
5536 if (max_chars == 0)
5537 return consumed;
5538 last = FALSE;
5539 break;
5540
5541 case OP_KET:
5542 cc += 1 + LINK_SIZE;
5543 continue;
5544
5545 case OP_ALT:
5546 cc += GET(cc, 1);
5547 continue;
5548
5549 case OP_ONCE:
5550 case OP_BRA:
5551 case OP_BRAPOS:
5552 case OP_CBRA:
5553 case OP_CBRAPOS:
5554 alternative = cc + GET(cc, 1);
5555 while (*alternative == OP_ALT)
5556 {
5557 max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
5558 if (max_chars == 0)
5559 return consumed;
5560 alternative += GET(alternative, 1);
5561 }
5562
5563 if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
5564 cc += IMM2_SIZE;
5565 cc += 1 + LINK_SIZE;
5566 continue;
5567
5568 case OP_CLASS:
5569#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5570 if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
5571 return consumed;
5572#endif
5573 class = TRUE;
5574 break;
5575
5576 case OP_NCLASS:
5577#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5578 if (common->utf) return consumed;
5579#endif
5580 class = TRUE;
5581 break;
5582
5583#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
5584 case OP_XCLASS:
5585#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5586 if (common->utf) return consumed;
5587#endif
5588 any = TRUE;
5589 cc += GET(cc, 1);
5590 break;
5591#endif
5592
5593 case OP_DIGIT:
5594#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5595 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
5596 return consumed;
5597#endif
5598 any = TRUE;
5599 cc++;
5600 break;
5601
5602 case OP_WHITESPACE:
5603#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5604 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
5605 return consumed;
5606#endif
5607 any = TRUE;
5608 cc++;
5609 break;
5610
5611 case OP_WORDCHAR:
5612#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
5613 if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
5614 return consumed;
5615#endif
5616 any = TRUE;
5617 cc++;
5618 break;
5619
5620 case OP_NOT:
5621 case OP_NOTI:
5622 cc++;
5623 /* Fall through. */
5624 case OP_NOT_DIGIT:
5625 case OP_NOT_WHITESPACE:
5626 case OP_NOT_WORDCHAR:
5627 case OP_ANY:
5628 case OP_ALLANY:
5629#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5630 if (common->utf) return consumed;
5631#endif
5632 any = TRUE;
5633 cc++;
5634 break;
5635
5636#ifdef SUPPORT_UNICODE
5637 case OP_NOTPROP:
5638 case OP_PROP:
5639#if PCRE2_CODE_UNIT_WIDTH != 32
5640 if (common->utf) return consumed;
5641#endif
5642 any = TRUE;
5643 cc += 1 + 2;
5644 break;
5645#endif
5646
5647 case OP_TYPEEXACT:
5648 repeat = GET2(cc, 1);
5649 cc += 1 + IMM2_SIZE;
5650 continue;
5651
5652 case OP_NOTEXACT:
5653 case OP_NOTEXACTI:
5654#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5655 if (common->utf) return consumed;
5656#endif
5657 any = TRUE;
5658 repeat = GET2(cc, 1);
5659 cc += 1 + IMM2_SIZE + 1;
5660 break;
5661
5662 default:
5663 return consumed;
5664 }
5665
5666 if (any)
5667 {
5668 do
5669 {
5670 chars->count = 255;
5671
5672 consumed++;
5673 if (--max_chars == 0)
5674 return consumed;
5675 chars++;
5676 }
5677 while (--repeat > 0);
5678
5679 repeat = 1;
5680 continue;
5681 }
5682
5683 if (class)
5684 {
5685 bytes = (sljit_u8*) (cc + 1);
5686 cc += 1 + 32 / sizeof(PCRE2_UCHAR);
5687
5688 switch (*cc)
5689 {
5690 case OP_CRSTAR:
5691 case OP_CRMINSTAR:
5692 case OP_CRPOSSTAR:
5693 case OP_CRQUERY:
5694 case OP_CRMINQUERY:
5695 case OP_CRPOSQUERY:
5696 max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
5697 if (max_chars == 0)
5698 return consumed;
5699 break;
5700
5701 default:
5702 case OP_CRPLUS:
5703 case OP_CRMINPLUS:
5704 case OP_CRPOSPLUS:
5705 break;
5706
5707 case OP_CRRANGE:
5708 case OP_CRMINRANGE:
5709 case OP_CRPOSRANGE:
5710 repeat = GET2(cc, 1);
5711 if (repeat <= 0)
5712 return consumed;
5713 break;
5714 }
5715
5716 do
5717 {
5718 if (bytes[31] & 0x80)
5719 chars->count = 255;
5720 else if (chars->count != 255)
5721 {
5722 bytes_end = bytes + 32;
5723 chr = 0;
5724 do
5725 {
5726 byte = *bytes++;
5727 SLJIT_ASSERT((chr & 0x7) == 0);
5728 if (byte == 0)
5729 chr += 8;
5730 else
5731 {
5732 do
5733 {
5734 if ((byte & 0x1) != 0)
5735 add_prefix_char(chr, chars, TRUE);
5736 byte >>= 1;
5737 chr++;
5738 }
5739 while (byte != 0);
5740 chr = (chr + 7) & ~7;
5741 }
5742 }
5743 while (chars->count != 255 && bytes < bytes_end);
5744 bytes = bytes_end - 32;
5745 }
5746
5747 consumed++;
5748 if (--max_chars == 0)
5749 return consumed;
5750 chars++;
5751 }
5752 while (--repeat > 0);
5753
5754 switch (*cc)
5755 {
5756 case OP_CRSTAR:
5757 case OP_CRMINSTAR:
5758 case OP_CRPOSSTAR:
5759 return consumed;
5760
5761 case OP_CRQUERY:
5762 case OP_CRMINQUERY:
5763 case OP_CRPOSQUERY:
5764 cc++;
5765 break;
5766
5767 case OP_CRRANGE:
5768 case OP_CRMINRANGE:
5769 case OP_CRPOSRANGE:
5770 if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
5771 return consumed;
5772 cc += 1 + 2 * IMM2_SIZE;
5773 break;
5774 }
5775
5776 repeat = 1;
5777 continue;
5778 }
5779
5780 len = 1;
5781#ifdef SUPPORT_UNICODE
5782 if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
5783#endif
5784
5785 if (caseless && char_has_othercase(common, cc))
5786 {
5787#ifdef SUPPORT_UNICODE
5788 if (common->utf)
5789 {
5790 GETCHAR(chr, cc);
5791 if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
5792 return consumed;
5793 }
5794 else
5795#endif
5796 {
5797 chr = *cc;
5798#ifdef SUPPORT_UNICODE
5799 if (common->ucp && chr > 127)
5800 othercase[0] = UCD_OTHERCASE(chr);
5801 else
5802#endif
5803 othercase[0] = TABLE_GET(chr, common->fcc, chr);
5804 }
5805 }
5806 else
5807 {
5808 caseless = FALSE;
5809 othercase[0] = 0; /* Stops compiler warning - PH */
5810 }
5811
5812 len_save = len;
5813 cc_save = cc;
5814 while (TRUE)
5815 {
5816 oc = othercase;
5817 do
5818 {
5819 len--;
5820 consumed++;
5821
5822 chr = *cc;
5823 add_prefix_char(*cc, chars, len == 0);
5824
5825 if (caseless)
5826 add_prefix_char(*oc, chars, len == 0);
5827
5828 if (--max_chars == 0)
5829 return consumed;
5830 chars++;
5831 cc++;
5832 oc++;
5833 }
5834 while (len > 0);
5835
5836 if (--repeat == 0)
5837 break;
5838
5839 len = len_save;
5840 cc = cc_save;
5841 }
5842
5843 repeat = 1;
5844 if (last)
5845 return consumed;
5846 }
5847}
5848
5849#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5850static void jumpto_if_not_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg, struct sljit_label *label)
5851{
5852#if PCRE2_CODE_UNIT_WIDTH == 8
5853OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0);
5854CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0x80, label);
5855#elif PCRE2_CODE_UNIT_WIDTH == 16
5856OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00);
5857CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label);
5858#else
5859#error "Unknown code width"
5860#endif
5861}
5862#endif
5863
5864#include "pcre2_jit_simd_inc.h"
5865
5866#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
5867
5868static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max)
5869{
5870 sljit_s32 i, j, max_i = 0, max_j = 0;
5871 sljit_u32 max_pri = 0;
5872 PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
5873
5874 for (i = max - 1; i >= 1; i--)
5875 {
5876 if (chars[i].last_count > 2)
5877 {
5878 a1 = chars[i].chars[0];
5879 a2 = chars[i].chars[1];
5880 a_pri = chars[i].last_count;
5881
5882 j = i - max_fast_forward_char_pair_offset();
5883 if (j < 0)
5884 j = 0;
5885
5886 while (j < i)
5887 {
5888 b_pri = chars[j].last_count;
5889 if (b_pri > 2 && a_pri + b_pri >= max_pri)
5890 {
5891 b1 = chars[j].chars[0];
5892 b2 = chars[j].chars[1];
5893
5894 if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2)
5895 {
5896 max_pri = a_pri + b_pri;
5897 max_i = i;
5898 max_j = j;
5899 }
5900 }
5901 j++;
5902 }
5903 }
5904 }
5905
5906if (max_pri == 0)
5907 return FALSE;
5908
5909fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]);
5910return TRUE;
5911}
5912
5913#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
5914
5915static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset)
5916{
5917DEFINE_COMPILER;
5918struct sljit_label *start;
5919struct sljit_jump *match;
5920struct sljit_jump *partial_quit;
5921PCRE2_UCHAR mask;
5922BOOL has_match_end = (common->match_end_ptr != 0);
5923
5924SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE || offset == 0);
5925
5926if (has_match_end)
5927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
5928
5929if (offset > 0)
5930 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5931
5932if (has_match_end)
5933 {
5934 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
5935
5936 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07005937 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07005938 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
5939 }
5940
5941#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
5942
5943if (JIT_HAS_FAST_FORWARD_CHAR_SIMD)
5944 {
5945 fast_forward_char_simd(common, char1, char2, offset);
5946
5947 if (offset > 0)
5948 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset));
5949
5950 if (has_match_end)
5951 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5952 return;
5953 }
5954
5955#endif
5956
5957start = LABEL();
5958
5959partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
5960if (common->mode == PCRE2_JIT_COMPLETE)
5961 add_jump(compiler, &common->failed_match, partial_quit);
5962
5963OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
5964OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
5965
5966if (char1 == char2)
5967 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1, start);
5968else
5969 {
5970 mask = char1 ^ char2;
5971 if (is_powerof2(mask))
5972 {
5973 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
5974 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask, start);
5975 }
5976 else
5977 {
5978 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1);
5979 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, char2, start);
5980 JUMPHERE(match);
5981 }
5982 }
5983
5984#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
5985if (common->utf && offset > 0)
5986 {
5987 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-(offset + 1)));
5988 jumpto_if_not_utf_char_start(compiler, TMP1, start);
5989 }
5990#endif
5991
5992OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
5993
5994if (common->mode != PCRE2_JIT_COMPLETE)
5995 JUMPHERE(partial_quit);
5996
5997if (has_match_end)
5998 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
5999}
6000
6001static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common)
6002{
6003DEFINE_COMPILER;
6004struct sljit_label *start;
6005struct sljit_jump *match;
6006fast_forward_char_data chars[MAX_N_CHARS];
6007sljit_s32 offset;
6008PCRE2_UCHAR mask;
6009PCRE2_UCHAR *char_set, *char_set_end;
6010int i, max, from;
6011int range_right = -1, range_len;
6012sljit_u8 *update_table = NULL;
6013BOOL in_range;
6014sljit_u32 rec_count;
6015
6016for (i = 0; i < MAX_N_CHARS; i++)
6017 {
6018 chars[i].count = 0;
6019 chars[i].last_count = 0;
6020 }
6021
6022rec_count = 10000;
6023max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
6024
6025if (max < 1)
6026 return FALSE;
6027
6028/* Convert last_count to priority. */
6029for (i = 0; i < max; i++)
6030 {
6031 SLJIT_ASSERT(chars[i].count > 0 && chars[i].last_count <= chars[i].count);
6032
6033 if (chars[i].count == 1)
6034 {
6035 chars[i].last_count = (chars[i].last_count == 1) ? 7 : 5;
6036 /* Simplifies algorithms later. */
6037 chars[i].chars[1] = chars[i].chars[0];
6038 }
6039 else if (chars[i].count == 2)
6040 {
6041 SLJIT_ASSERT(chars[i].chars[0] != chars[i].chars[1]);
6042
6043 if (is_powerof2(chars[i].chars[0] ^ chars[i].chars[1]))
6044 chars[i].last_count = (chars[i].last_count == 2) ? 6 : 4;
6045 else
6046 chars[i].last_count = (chars[i].last_count == 2) ? 3 : 2;
6047 }
6048 else
6049 chars[i].last_count = (chars[i].count == 255) ? 0 : 1;
6050 }
6051
6052#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6053if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max))
6054 return TRUE;
6055#endif
6056
6057in_range = FALSE;
6058/* Prevent compiler "uninitialized" warning */
6059from = 0;
6060range_len = 4 /* minimum length */ - 1;
6061for (i = 0; i <= max; i++)
6062 {
6063 if (in_range && (i - from) > range_len && (chars[i - 1].count < 255))
6064 {
6065 range_len = i - from;
6066 range_right = i - 1;
6067 }
6068
6069 if (i < max && chars[i].count < 255)
6070 {
6071 SLJIT_ASSERT(chars[i].count > 0);
6072 if (!in_range)
6073 {
6074 in_range = TRUE;
6075 from = i;
6076 }
6077 }
6078 else
6079 in_range = FALSE;
6080 }
6081
6082if (range_right >= 0)
6083 {
6084 update_table = (sljit_u8 *)allocate_read_only_data(common, 256);
6085 if (update_table == NULL)
6086 return TRUE;
6087 memset(update_table, IN_UCHARS(range_len), 256);
6088
6089 for (i = 0; i < range_len; i++)
6090 {
6091 SLJIT_ASSERT(chars[range_right - i].count > 0 && chars[range_right - i].count < 255);
6092
6093 char_set = chars[range_right - i].chars;
6094 char_set_end = char_set + chars[range_right - i].count;
6095 do
6096 {
6097 if (update_table[(*char_set) & 0xff] > IN_UCHARS(i))
6098 update_table[(*char_set) & 0xff] = IN_UCHARS(i);
6099 char_set++;
6100 }
6101 while (char_set < char_set_end);
6102 }
6103 }
6104
6105offset = -1;
6106/* Scan forward. */
6107for (i = 0; i < max; i++)
6108 {
6109 if (range_right == i)
6110 continue;
6111
6112 if (offset == -1)
6113 {
6114 if (chars[i].last_count >= 2)
6115 offset = i;
6116 }
6117 else if (chars[offset].last_count < chars[i].last_count)
6118 offset = i;
6119 }
6120
6121SLJIT_ASSERT(offset == -1 || (chars[offset].count >= 1 && chars[offset].count <= 2));
6122
6123if (range_right < 0)
6124 {
6125 if (offset < 0)
6126 return FALSE;
6127 /* Works regardless the value is 1 or 2. */
6128 fast_forward_first_char2(common, chars[offset].chars[0], chars[offset].chars[1], offset);
6129 return TRUE;
6130 }
6131
6132SLJIT_ASSERT(range_right != offset);
6133
6134if (common->match_end_ptr != 0)
6135 {
6136 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6137 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6138 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6139 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006140 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006141 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6142 }
6143else
6144 {
6145 OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6146 add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
6147 }
6148
6149SLJIT_ASSERT(range_right >= 0);
6150
6151if (!HAS_VIRTUAL_REGISTERS)
6152 OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table);
6153
6154start = LABEL();
6155add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
6156
6157#if PCRE2_CODE_UNIT_WIDTH == 8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
6158OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right));
6159#else
6160OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1);
6161#endif
6162
6163if (!HAS_VIRTUAL_REGISTERS)
6164 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0);
6165else
6166 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table);
6167
6168OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6169CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start);
6170
6171if (offset >= 0)
6172 {
6173 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset));
6174 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6175
6176 if (chars[offset].count == 1)
6177 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0], start);
6178 else
6179 {
6180 mask = chars[offset].chars[0] ^ chars[offset].chars[1];
6181 if (is_powerof2(mask))
6182 {
6183 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask);
6184 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0] | mask, start);
6185 }
6186 else
6187 {
6188 match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[0]);
6189 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset].chars[1], start);
6190 JUMPHERE(match);
6191 }
6192 }
6193 }
6194
6195#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
6196if (common->utf && offset != 0)
6197 {
6198 if (offset < 0)
6199 {
6200 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6201 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6202 }
6203 else
6204 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6205
6206 jumpto_if_not_utf_char_start(compiler, TMP1, start);
6207
6208 if (offset < 0)
6209 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6210 }
6211#endif
6212
6213if (offset >= 0)
6214 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6215
6216if (common->match_end_ptr != 0)
6217 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6218else
6219 OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
6220return TRUE;
6221}
6222
6223static SLJIT_INLINE void fast_forward_first_char(compiler_common *common)
6224{
6225PCRE2_UCHAR first_char = (PCRE2_UCHAR)(common->re->first_codeunit);
6226PCRE2_UCHAR oc;
6227
6228oc = first_char;
6229if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0)
6230 {
6231 oc = TABLE_GET(first_char, common->fcc, first_char);
6232#if defined SUPPORT_UNICODE
6233 if (first_char > 127 && (common->utf || common->ucp))
6234 oc = UCD_OTHERCASE(first_char);
6235#endif
6236 }
6237
6238fast_forward_first_char2(common, first_char, oc, 0);
6239}
6240
6241static SLJIT_INLINE void fast_forward_newline(compiler_common *common)
6242{
6243DEFINE_COMPILER;
6244struct sljit_label *loop;
6245struct sljit_jump *lastchar = NULL;
6246struct sljit_jump *firstchar;
6247struct sljit_jump *quit = NULL;
6248struct sljit_jump *foundcr = NULL;
6249struct sljit_jump *notfoundnl;
6250jump_list *newline = NULL;
6251
6252if (common->match_end_ptr != 0)
6253 {
6254 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
6255 OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6256 }
6257
6258if (common->nltype == NLTYPE_FIXED && common->newline > 255)
6259 {
6260#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD
6261 if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE)
6262 {
6263 if (HAS_VIRTUAL_REGISTERS)
6264 {
6265 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6266 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6267 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6268 }
6269 else
6270 {
6271 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6272 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6273 }
6274 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6275
6276 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006277 OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006278 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
6279#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6280 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6281#endif
6282 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6283
6284 fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff);
6285 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
6286 }
6287 else
6288#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */
6289 {
6290 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6291 if (HAS_VIRTUAL_REGISTERS)
6292 {
6293 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6294 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6295 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6296 }
6297 else
6298 {
6299 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6300 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
6301 }
6302 firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6303
6304 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006305 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006306 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
6307#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6308 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
6309#endif
6310 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
6311
6312 loop = LABEL();
6313 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6314 quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6315 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
6316 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
6317 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop);
6318 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop);
6319
6320 JUMPHERE(quit);
6321 JUMPHERE(lastchar);
6322 }
6323
6324 JUMPHERE(firstchar);
6325
6326 if (common->match_end_ptr != 0)
6327 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6328 return;
6329 }
6330
6331if (HAS_VIRTUAL_REGISTERS)
6332 {
6333 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6334 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
6335 }
6336else
6337 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
6338
6339/* Example: match /^/ to \r\n from offset 1. */
6340firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6341
6342if (common->nltype == NLTYPE_ANY)
6343 move_back(common, NULL, FALSE);
6344else
6345 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6346
6347loop = LABEL();
6348common->ff_newline_shortcut = loop;
6349
6350#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
6351if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF))
6352 {
6353 if (common->nltype == NLTYPE_ANYCRLF)
6354 {
6355 fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0);
6356 if (common->mode != PCRE2_JIT_COMPLETE)
6357 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6358
6359 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6360 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6361 quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6362 }
6363 else
6364 {
6365 fast_forward_char_simd(common, common->newline, common->newline, 0);
6366
6367 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6368 if (common->mode != PCRE2_JIT_COMPLETE)
6369 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006370 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006371 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
6372 }
6373 }
6374 }
6375else
6376#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */
6377 {
6378 read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE);
6379 lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6380 if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6381 foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
6382 check_newlinechar(common, common->nltype, &newline, FALSE);
6383 set_jumps(newline, loop);
6384 }
6385
6386if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
6387 {
6388 if (quit == NULL)
6389 {
6390 quit = JUMP(SLJIT_JUMP);
6391 JUMPHERE(foundcr);
6392 }
6393
6394 notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6395 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006396 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
Elliott Hughes5b808042021-10-01 10:56:10 -07006397 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
6398#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
6399 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
6400#endif
6401 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
6402 JUMPHERE(notfoundnl);
6403 JUMPHERE(quit);
6404 }
6405
6406if (lastchar)
6407 JUMPHERE(lastchar);
6408JUMPHERE(firstchar);
6409
6410if (common->match_end_ptr != 0)
6411 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
6412}
6413
6414static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks);
6415
6416static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common)
6417{
6418DEFINE_COMPILER;
6419const sljit_u8 *start_bits = common->re->start_bitmap;
6420struct sljit_label *start;
6421struct sljit_jump *partial_quit;
6422#if PCRE2_CODE_UNIT_WIDTH != 8
6423struct sljit_jump *found = NULL;
6424#endif
6425jump_list *matches = NULL;
6426
6427if (common->match_end_ptr != 0)
6428 {
6429 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
6430 OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
6431 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006432 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006433 CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
6434 }
6435
6436start = LABEL();
6437
6438partial_quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
6439if (common->mode == PCRE2_JIT_COMPLETE)
6440 add_jump(compiler, &common->failed_match, partial_quit);
6441
6442OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
6443OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6444
6445if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &matches))
6446 {
6447#if PCRE2_CODE_UNIT_WIDTH != 8
6448 if ((start_bits[31] & 0x80) != 0)
6449 found = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255);
6450 else
6451 CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 255, start);
6452#elif defined SUPPORT_UNICODE
6453 if (common->utf && is_char7_bitset(start_bits, FALSE))
6454 CMPTO(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 127, start);
6455#endif
6456 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
6457 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
6458 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits);
6459 if (!HAS_VIRTUAL_REGISTERS)
6460 {
6461 OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006462 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006463 }
6464 else
6465 {
6466 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006467 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006468 }
6469 JUMPTO(SLJIT_ZERO, start);
6470 }
6471else
6472 set_jumps(matches, start);
6473
6474#if PCRE2_CODE_UNIT_WIDTH != 8
6475if (found != NULL)
6476 JUMPHERE(found);
6477#endif
6478
6479OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6480
6481if (common->mode != PCRE2_JIT_COMPLETE)
6482 JUMPHERE(partial_quit);
6483
6484if (common->match_end_ptr != 0)
6485 OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0);
6486}
6487
6488static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar)
6489{
6490DEFINE_COMPILER;
6491struct sljit_label *loop;
6492struct sljit_jump *toolong;
6493struct sljit_jump *already_found;
6494struct sljit_jump *found;
6495struct sljit_jump *found_oc = NULL;
6496jump_list *not_found = NULL;
6497sljit_u32 oc, bit;
6498
6499SLJIT_ASSERT(common->req_char_ptr != 0);
6500OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100);
6501OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr);
6502toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0);
6503already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0);
6504
6505if (has_firstchar)
6506 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
6507else
6508 OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0);
6509
6510oc = req_char;
6511if (caseless)
6512 {
6513 oc = TABLE_GET(req_char, common->fcc, req_char);
6514#if defined SUPPORT_UNICODE
6515 if (req_char > 127 && (common->utf || common->ucp))
6516 oc = UCD_OTHERCASE(req_char);
6517#endif
6518 }
6519
6520#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD
6521if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD)
6522 {
6523 not_found = fast_requested_char_simd(common, req_char, oc);
6524 }
6525else
6526#endif
6527 {
6528 loop = LABEL();
6529 add_jump(compiler, &not_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0));
6530
6531 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0);
6532
6533 if (req_char == oc)
6534 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6535 else
6536 {
6537 bit = req_char ^ oc;
6538 if (is_powerof2(bit))
6539 {
6540 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit);
6541 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit);
6542 }
6543 else
6544 {
6545 found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char);
6546 found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc);
6547 }
6548 }
6549 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
6550 JUMPTO(SLJIT_JUMP, loop);
6551
6552 JUMPHERE(found);
6553 if (found_oc)
6554 JUMPHERE(found_oc);
6555 }
6556
6557OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0);
6558
6559JUMPHERE(already_found);
6560JUMPHERE(toolong);
6561return not_found;
6562}
6563
6564static void do_revertframes(compiler_common *common)
6565{
6566DEFINE_COMPILER;
6567struct sljit_jump *jump;
6568struct sljit_label *mainloop;
6569
6570sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
6571GET_LOCAL_BASE(TMP1, 0, 0);
6572
6573/* Drop frames until we reach STACK_TOP. */
6574mainloop = LABEL();
6575OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
6576jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
6577
6578OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6579if (HAS_VIRTUAL_REGISTERS)
6580 {
6581 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6582 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6583 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6584 }
6585else
6586 {
6587 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6588 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
6589 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
6590 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
6591 GET_LOCAL_BASE(TMP1, 0, 0);
6592 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
6593 }
6594JUMPTO(SLJIT_JUMP, mainloop);
6595
6596JUMPHERE(jump);
6597jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
6598/* End of reverting values. */
6599OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
6600
6601JUMPHERE(jump);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006602OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006603OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
6604if (HAS_VIRTUAL_REGISTERS)
6605 {
6606 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6607 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6608 }
6609else
6610 {
6611 OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
6612 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
6613 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
6614 }
6615JUMPTO(SLJIT_JUMP, mainloop);
6616}
6617
6618static void check_wordboundary(compiler_common *common)
6619{
6620DEFINE_COMPILER;
6621struct sljit_jump *skipread;
6622jump_list *skipread_list = NULL;
6623#ifdef SUPPORT_UNICODE
6624struct sljit_label *valid_utf;
6625jump_list *invalid_utf1 = NULL;
6626#endif /* SUPPORT_UNICODE */
6627jump_list *invalid_utf2 = NULL;
6628#if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE
6629struct sljit_jump *jump;
6630#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
6631
6632SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
6633
6634sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6635/* Get type of the previous char, and put it to TMP3. */
6636OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
6637OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
6638OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
6639skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
6640
6641#ifdef SUPPORT_UNICODE
6642if (common->invalid_utf)
6643 {
6644 peek_char_back(common, READ_CHAR_MAX, &invalid_utf1);
6645
6646 if (common->mode != PCRE2_JIT_COMPLETE)
6647 {
6648 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
6649 OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0);
6650 move_back(common, NULL, TRUE);
6651 check_start_used_ptr(common);
6652 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
6653 OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0);
6654 }
6655 }
6656else
6657#endif /* SUPPORT_UNICODE */
6658 {
6659 if (common->mode == PCRE2_JIT_COMPLETE)
6660 peek_char_back(common, READ_CHAR_MAX, NULL);
6661 else
6662 {
6663 move_back(common, NULL, TRUE);
6664 check_start_used_ptr(common);
6665 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR);
6666 }
6667 }
6668
6669/* Testing char type. */
6670#ifdef SUPPORT_UNICODE
6671if (common->ucp)
6672 {
6673 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6674 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6675 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6676 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006677 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
Elliott Hughes5b808042021-10-01 10:56:10 -07006678 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6679 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006680 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
Elliott Hughes5b808042021-10-01 10:56:10 -07006681 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6682 JUMPHERE(jump);
6683 OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
6684 }
6685else
6686#endif /* SUPPORT_UNICODE */
6687 {
6688#if PCRE2_CODE_UNIT_WIDTH != 8
6689 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6690#elif defined SUPPORT_UNICODE
6691 /* Here TMP3 has already been zeroed. */
6692 jump = NULL;
6693 if (common->utf)
6694 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6695#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6696 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes);
6697 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */);
6698 OP2(SLJIT_AND, TMP3, 0, TMP1, 0, SLJIT_IMM, 1);
6699#if PCRE2_CODE_UNIT_WIDTH != 8
6700 JUMPHERE(jump);
6701#elif defined SUPPORT_UNICODE
6702 if (jump != NULL)
6703 JUMPHERE(jump);
6704#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6705 }
6706JUMPHERE(skipread);
6707
6708OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6709check_str_end(common, &skipread_list);
6710peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
6711
6712/* Testing char type. This is a code duplication. */
6713#ifdef SUPPORT_UNICODE
6714
6715valid_utf = LABEL();
6716
6717if (common->ucp)
6718 {
6719 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
6720 jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
6721 add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
6722 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006723 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
Elliott Hughes5b808042021-10-01 10:56:10 -07006724 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
6725 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006726 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
Elliott Hughes5b808042021-10-01 10:56:10 -07006727 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
6728 JUMPHERE(jump);
6729 }
6730else
6731#endif /* SUPPORT_UNICODE */
6732 {
6733#if PCRE2_CODE_UNIT_WIDTH != 8
6734 /* TMP2 may be destroyed by peek_char. */
6735 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6736 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6737#elif defined SUPPORT_UNICODE
6738 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6739 jump = NULL;
6740 if (common->utf)
6741 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
6742#endif
6743 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes);
6744 OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */);
6745 OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
6746#if PCRE2_CODE_UNIT_WIDTH != 8
6747 JUMPHERE(jump);
6748#elif defined SUPPORT_UNICODE
6749 if (jump != NULL)
6750 JUMPHERE(jump);
6751#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
6752 }
6753set_jumps(skipread_list, LABEL());
6754
6755OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6756OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0);
6757OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6758
6759#ifdef SUPPORT_UNICODE
6760if (common->invalid_utf)
6761 {
6762 set_jumps(invalid_utf1, LABEL());
6763
6764 peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL);
6765 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf);
6766
6767 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6768 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1);
6769 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6770
6771 set_jumps(invalid_utf2, LABEL());
6772 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
6773 OP1(SLJIT_MOV, TMP2, 0, TMP3, 0);
6774 OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
6775 }
6776#endif /* SUPPORT_UNICODE */
6777}
6778
6779static BOOL optimize_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6780{
6781/* May destroy TMP1. */
6782DEFINE_COMPILER;
6783int ranges[MAX_CLASS_RANGE_SIZE];
6784sljit_u8 bit, cbit, all;
6785int i, byte, length = 0;
6786
6787bit = bits[0] & 0x1;
6788/* All bits will be zero or one (since bit is zero or one). */
6789all = -bit;
6790
6791for (i = 0; i < 256; )
6792 {
6793 byte = i >> 3;
6794 if ((i & 0x7) == 0 && bits[byte] == all)
6795 i += 8;
6796 else
6797 {
6798 cbit = (bits[byte] >> (i & 0x7)) & 0x1;
6799 if (cbit != bit)
6800 {
6801 if (length >= MAX_CLASS_RANGE_SIZE)
6802 return FALSE;
6803 ranges[length] = i;
6804 length++;
6805 bit = cbit;
6806 all = -cbit;
6807 }
6808 i++;
6809 }
6810 }
6811
6812if (((bit == 0) && nclass) || ((bit == 1) && !nclass))
6813 {
6814 if (length >= MAX_CLASS_RANGE_SIZE)
6815 return FALSE;
6816 ranges[length] = 256;
6817 length++;
6818 }
6819
6820if (length < 0 || length > 4)
6821 return FALSE;
6822
6823bit = bits[0] & 0x1;
6824if (invert) bit ^= 0x1;
6825
6826/* No character is accepted. */
6827if (length == 0 && bit == 0)
6828 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
6829
6830switch(length)
6831 {
6832 case 0:
6833 /* When bit != 0, all characters are accepted. */
6834 return TRUE;
6835
6836 case 1:
6837 add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6838 return TRUE;
6839
6840 case 2:
6841 if (ranges[0] + 1 != ranges[1])
6842 {
6843 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6844 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6845 }
6846 else
6847 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6848 return TRUE;
6849
6850 case 3:
6851 if (bit != 0)
6852 {
6853 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6854 if (ranges[0] + 1 != ranges[1])
6855 {
6856 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6857 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6858 }
6859 else
6860 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6861 return TRUE;
6862 }
6863
6864 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0]));
6865 if (ranges[1] + 1 != ranges[2])
6866 {
6867 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]);
6868 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6869 }
6870 else
6871 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1]));
6872 return TRUE;
6873
6874 case 4:
6875 if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2])
6876 && (ranges[0] | (ranges[2] - ranges[0])) == ranges[2]
6877 && (ranges[1] & (ranges[2] - ranges[0])) == 0
6878 && is_powerof2(ranges[2] - ranges[0]))
6879 {
6880 SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0);
6881 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]);
6882 if (ranges[2] + 1 != ranges[3])
6883 {
6884 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]);
6885 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6886 }
6887 else
6888 add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2]));
6889 return TRUE;
6890 }
6891
6892 if (bit != 0)
6893 {
6894 i = 0;
6895 if (ranges[0] + 1 != ranges[1])
6896 {
6897 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6898 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6899 i = ranges[0];
6900 }
6901 else
6902 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0]));
6903
6904 if (ranges[2] + 1 != ranges[3])
6905 {
6906 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i);
6907 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2]));
6908 }
6909 else
6910 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i));
6911 return TRUE;
6912 }
6913
6914 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]);
6915 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0]));
6916 if (ranges[1] + 1 != ranges[2])
6917 {
6918 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]);
6919 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1]));
6920 }
6921 else
6922 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]));
6923 return TRUE;
6924
6925 default:
6926 SLJIT_UNREACHABLE();
6927 return FALSE;
6928 }
6929}
6930
6931static BOOL optimize_class_chars(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
6932{
6933/* May destroy TMP1. */
6934DEFINE_COMPILER;
6935uint16_t char_list[MAX_CLASS_CHARS_SIZE];
6936uint8_t byte;
6937sljit_s32 type;
6938int i, j, k, len, c;
6939
6940if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
6941 return FALSE;
6942
6943len = 0;
6944
6945for (i = 0; i < 32; i++)
6946 {
6947 byte = bits[i];
6948
6949 if (nclass)
6950 byte = ~byte;
6951
6952 j = 0;
6953 while (byte != 0)
6954 {
6955 if (byte & 0x1)
6956 {
6957 c = i * 8 + j;
6958
6959 k = len;
6960
6961 if ((c & 0x20) != 0)
6962 {
6963 for (k = 0; k < len; k++)
6964 if (char_list[k] == c - 0x20)
6965 {
6966 char_list[k] |= 0x120;
6967 break;
6968 }
6969 }
6970
6971 if (k == len)
6972 {
6973 if (len >= MAX_CLASS_CHARS_SIZE)
6974 return FALSE;
6975
6976 char_list[len++] = (uint16_t) c;
6977 }
6978 }
6979
6980 byte >>= 1;
6981 j++;
6982 }
6983 }
6984
6985if (len == 0) return FALSE; /* Should never occur, but stops analyzers complaining. */
6986
6987i = 0;
6988j = 0;
6989
6990if (char_list[0] == 0)
6991 {
6992 i++;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07006993 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07006994 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
6995 }
6996else
6997 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);
6998
6999while (i < len)
7000 {
7001 if ((char_list[i] & 0x100) != 0)
7002 j++;
7003 else
7004 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007005 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
Elliott Hughes5b808042021-10-01 10:56:10 -07007006 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7007 }
7008 i++;
7009 }
7010
7011if (j != 0)
7012 {
7013 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x20);
7014
7015 for (i = 0; i < len; i++)
7016 if ((char_list[i] & 0x100) != 0)
7017 {
7018 j--;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007019 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
Elliott Hughes5b808042021-10-01 10:56:10 -07007020 CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
7021 }
7022 }
7023
7024if (invert)
7025 nclass = !nclass;
7026
7027type = nclass ? SLJIT_NOT_EQUAL : SLJIT_EQUAL;
7028add_jump(compiler, backtracks, CMP(type, TMP2, 0, SLJIT_IMM, 0));
7029return TRUE;
7030}
7031
7032static BOOL optimize_class(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks)
7033{
7034/* May destroy TMP1. */
7035if (optimize_class_ranges(common, bits, nclass, invert, backtracks))
7036 return TRUE;
7037return optimize_class_chars(common, bits, nclass, invert, backtracks);
7038}
7039
7040static void check_anynewline(compiler_common *common)
7041{
7042/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7043DEFINE_COMPILER;
7044
7045sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7046
7047OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007048OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
Elliott Hughes5b808042021-10-01 10:56:10 -07007049OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007050OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
Elliott Hughes5b808042021-10-01 10:56:10 -07007051#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7052#if PCRE2_CODE_UNIT_WIDTH == 8
7053if (common->utf)
7054 {
7055#endif
7056 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7057 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007058 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
Elliott Hughes5b808042021-10-01 10:56:10 -07007059#if PCRE2_CODE_UNIT_WIDTH == 8
7060 }
7061#endif
7062#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7063OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7064OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7065}
7066
7067static void check_hspace(compiler_common *common)
7068{
7069/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7070DEFINE_COMPILER;
7071
7072sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7073
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007074OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
Elliott Hughes5b808042021-10-01 10:56:10 -07007075OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007076OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
Elliott Hughes5b808042021-10-01 10:56:10 -07007077OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007078OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
Elliott Hughes5b808042021-10-01 10:56:10 -07007079#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7080#if PCRE2_CODE_UNIT_WIDTH == 8
7081if (common->utf)
7082 {
7083#endif
7084 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007085 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
Elliott Hughes5b808042021-10-01 10:56:10 -07007086 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007087 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
Elliott Hughes5b808042021-10-01 10:56:10 -07007088 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7089 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007090 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
Elliott Hughes5b808042021-10-01 10:56:10 -07007091 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007092 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
Elliott Hughes5b808042021-10-01 10:56:10 -07007093 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007094 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
Elliott Hughes5b808042021-10-01 10:56:10 -07007095 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007096 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
Elliott Hughes5b808042021-10-01 10:56:10 -07007097#if PCRE2_CODE_UNIT_WIDTH == 8
7098 }
7099#endif
7100#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7101OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7102
7103OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7104}
7105
7106static void check_vspace(compiler_common *common)
7107{
7108/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
7109DEFINE_COMPILER;
7110
7111sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
7112
7113OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007114OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
Elliott Hughes5b808042021-10-01 10:56:10 -07007115OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007116OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
Elliott Hughes5b808042021-10-01 10:56:10 -07007117#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7118#if PCRE2_CODE_UNIT_WIDTH == 8
7119if (common->utf)
7120 {
7121#endif
7122 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7123 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007124 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
Elliott Hughes5b808042021-10-01 10:56:10 -07007125#if PCRE2_CODE_UNIT_WIDTH == 8
7126 }
7127#endif
7128#endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */
7129OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7130
7131OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
7132}
7133
7134static void do_casefulcmp(compiler_common *common)
7135{
7136DEFINE_COMPILER;
7137struct sljit_jump *jump;
7138struct sljit_label *label;
7139int char1_reg;
7140int char2_reg;
7141
7142if (HAS_VIRTUAL_REGISTERS)
7143 {
7144 char1_reg = STR_END;
7145 char2_reg = STACK_TOP;
7146 }
7147else
7148 {
7149 char1_reg = TMP3;
7150 char2_reg = RETURN_ADDR;
7151 }
7152
7153sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7154OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7155
7156if (char1_reg == STR_END)
7157 {
7158 OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0);
7159 OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
7160 }
7161
7162if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7163 {
7164 label = LABEL();
7165 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7166 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7167 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7168 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7169 JUMPTO(SLJIT_NOT_ZERO, label);
7170
7171 JUMPHERE(jump);
7172 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7173 }
7174else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7175 {
7176 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7177 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7178
7179 label = LABEL();
7180 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7181 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7182 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7183 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7184 JUMPTO(SLJIT_NOT_ZERO, label);
7185
7186 JUMPHERE(jump);
7187 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7188 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7189 }
7190else
7191 {
7192 label = LABEL();
7193 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7194 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7195 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7196 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7197 jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7198 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7199 JUMPTO(SLJIT_NOT_ZERO, label);
7200
7201 JUMPHERE(jump);
7202 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7203 }
7204
7205if (char1_reg == STR_END)
7206 {
7207 OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0);
7208 OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0);
7209 }
7210
7211OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7212}
7213
7214static void do_caselesscmp(compiler_common *common)
7215{
7216DEFINE_COMPILER;
7217struct sljit_jump *jump;
7218struct sljit_label *label;
7219int char1_reg = STR_END;
7220int char2_reg;
7221int lcc_table;
7222int opt_type = 0;
7223
7224if (HAS_VIRTUAL_REGISTERS)
7225 {
7226 char2_reg = STACK_TOP;
7227 lcc_table = STACK_LIMIT;
7228 }
7229else
7230 {
7231 char2_reg = RETURN_ADDR;
7232 lcc_table = TMP3;
7233 }
7234
7235if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7236 opt_type = 1;
7237else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
7238 opt_type = 2;
7239
7240sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7241OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
7242
7243OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
7244
7245if (char2_reg == STACK_TOP)
7246 {
7247 OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0);
7248 OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0);
7249 }
7250
7251OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
7252
7253if (opt_type == 1)
7254 {
7255 label = LABEL();
7256 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7257 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7258 }
7259else if (opt_type == 2)
7260 {
7261 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7262 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7263
7264 label = LABEL();
7265 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
7266 sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
7267 }
7268else
7269 {
7270 label = LABEL();
7271 OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0);
7272 OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0);
7273 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
7274 }
7275
7276#if PCRE2_CODE_UNIT_WIDTH != 8
7277jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255);
7278#endif
7279OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0);
7280#if PCRE2_CODE_UNIT_WIDTH != 8
7281JUMPHERE(jump);
7282jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255);
7283#endif
7284OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0);
7285#if PCRE2_CODE_UNIT_WIDTH != 8
7286JUMPHERE(jump);
7287#endif
7288
7289if (opt_type == 0)
7290 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7291
7292jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
7293OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
7294JUMPTO(SLJIT_NOT_ZERO, label);
7295
7296JUMPHERE(jump);
7297OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7298
7299if (opt_type == 2)
7300 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
7301
7302if (char2_reg == STACK_TOP)
7303 {
7304 OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0);
7305 OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0);
7306 }
7307
7308OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
7309OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
7310}
7311
7312static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc,
7313 compare_context *context, jump_list **backtracks)
7314{
7315DEFINE_COMPILER;
7316unsigned int othercasebit = 0;
7317PCRE2_SPTR othercasechar = NULL;
7318#ifdef SUPPORT_UNICODE
7319int utflength;
7320#endif
7321
7322if (caseless && char_has_othercase(common, cc))
7323 {
7324 othercasebit = char_get_othercase_bit(common, cc);
7325 SLJIT_ASSERT(othercasebit);
7326 /* Extracting bit difference info. */
7327#if PCRE2_CODE_UNIT_WIDTH == 8
7328 othercasechar = cc + (othercasebit >> 8);
7329 othercasebit &= 0xff;
7330#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
7331 /* Note that this code only handles characters in the BMP. If there
7332 ever are characters outside the BMP whose othercase differs in only one
7333 bit from itself (there currently are none), this code will need to be
7334 revised for PCRE2_CODE_UNIT_WIDTH == 32. */
7335 othercasechar = cc + (othercasebit >> 9);
7336 if ((othercasebit & 0x100) != 0)
7337 othercasebit = (othercasebit & 0xff) << 8;
7338 else
7339 othercasebit &= 0xff;
7340#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7341 }
7342
7343if (context->sourcereg == -1)
7344 {
7345#if PCRE2_CODE_UNIT_WIDTH == 8
7346#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7347 if (context->length >= 4)
7348 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7349 else if (context->length >= 2)
7350 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7351 else
7352#endif
7353 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7354#elif PCRE2_CODE_UNIT_WIDTH == 16
7355#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
7356 if (context->length >= 4)
7357 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7358 else
7359#endif
7360 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7361#elif PCRE2_CODE_UNIT_WIDTH == 32
7362 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length);
7363#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
7364 context->sourcereg = TMP2;
7365 }
7366
7367#ifdef SUPPORT_UNICODE
7368utflength = 1;
7369if (common->utf && HAS_EXTRALEN(*cc))
7370 utflength += GET_EXTRALEN(*cc);
7371
7372do
7373 {
7374#endif
7375
7376 context->length -= IN_UCHARS(1);
7377#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7378
7379 /* Unaligned read is supported. */
7380 if (othercasebit != 0 && othercasechar == cc)
7381 {
7382 context->c.asuchars[context->ucharptr] = *cc | othercasebit;
7383 context->oc.asuchars[context->ucharptr] = othercasebit;
7384 }
7385 else
7386 {
7387 context->c.asuchars[context->ucharptr] = *cc;
7388 context->oc.asuchars[context->ucharptr] = 0;
7389 }
7390 context->ucharptr++;
7391
7392#if PCRE2_CODE_UNIT_WIDTH == 8
7393 if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1))
7394#else
7395 if (context->ucharptr >= 2 || context->length == 0)
7396#endif
7397 {
7398 if (context->length >= 4)
7399 OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7400 else if (context->length >= 2)
7401 OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7402#if PCRE2_CODE_UNIT_WIDTH == 8
7403 else if (context->length >= 1)
7404 OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7405#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7406 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7407
7408 switch(context->ucharptr)
7409 {
7410 case 4 / sizeof(PCRE2_UCHAR):
7411 if (context->oc.asint != 0)
7412 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint);
7413 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint));
7414 break;
7415
7416 case 2 / sizeof(PCRE2_UCHAR):
7417 if (context->oc.asushort != 0)
7418 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort);
7419 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort));
7420 break;
7421
7422#if PCRE2_CODE_UNIT_WIDTH == 8
7423 case 1:
7424 if (context->oc.asbyte != 0)
7425 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte);
7426 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte));
7427 break;
7428#endif
7429
7430 default:
7431 SLJIT_UNREACHABLE();
7432 break;
7433 }
7434 context->ucharptr = 0;
7435 }
7436
7437#else
7438
7439 /* Unaligned read is unsupported or in 32 bit mode. */
7440 if (context->length >= 1)
7441 OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length);
7442
7443 context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1;
7444
7445 if (othercasebit != 0 && othercasechar == cc)
7446 {
7447 OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit);
7448 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit));
7449 }
7450 else
7451 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc));
7452
7453#endif
7454
7455 cc++;
7456#ifdef SUPPORT_UNICODE
7457 utflength--;
7458 }
7459while (utflength > 0);
7460#endif
7461
7462return cc;
7463}
7464
7465#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
7466
7467#define SET_TYPE_OFFSET(value) \
7468 if ((value) != typeoffset) \
7469 { \
7470 if ((value) < typeoffset) \
7471 OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
7472 else \
7473 OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
7474 } \
7475 typeoffset = (value);
7476
7477#define SET_CHAR_OFFSET(value) \
7478 if ((value) != charoffset) \
7479 { \
7480 if ((value) < charoffset) \
7481 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \
7482 else \
7483 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \
7484 } \
7485 charoffset = (value);
7486
7487static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
7488
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007489#ifdef SUPPORT_UNICODE
7490#define XCLASS_SAVE_CHAR 0x001
7491#define XCLASS_CHAR_SAVED 0x002
7492#define XCLASS_HAS_TYPE 0x004
7493#define XCLASS_HAS_SCRIPT 0x008
7494#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
7495#define XCLASS_HAS_BOOL 0x020
7496#define XCLASS_HAS_BIDICL 0x040
7497#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
7498#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
7499#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
7500#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
7501
7502#endif /* SUPPORT_UNICODE */
7503
Elliott Hughes5b808042021-10-01 10:56:10 -07007504static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
7505{
7506DEFINE_COMPILER;
7507jump_list *found = NULL;
7508jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks;
7509sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX;
7510struct sljit_jump *jump = NULL;
7511PCRE2_SPTR ccbegin;
7512int compares, invertcmp, numberofcmps;
7513#if defined SUPPORT_UNICODE && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
7514BOOL utf = common->utf;
7515#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
7516
7517#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007518sljit_u32 unicode_status = 0;
Elliott Hughes5b808042021-10-01 10:56:10 -07007519int typereg = TMP1;
7520const sljit_u32 *other_cases;
7521sljit_uw typeoffset;
7522#endif /* SUPPORT_UNICODE */
7523
7524/* Scanning the necessary info. */
7525cc++;
7526ccbegin = cc;
7527compares = 0;
7528
7529if (cc[-1] & XCL_MAP)
7530 {
7531 min = 0;
7532 cc += 32 / sizeof(PCRE2_UCHAR);
7533 }
7534
7535while (*cc != XCL_END)
7536 {
7537 compares++;
7538 if (*cc == XCL_SINGLE)
7539 {
7540 cc ++;
7541 GETCHARINCTEST(c, cc);
7542 if (c > max) max = c;
7543 if (c < min) min = c;
7544#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007545 unicode_status |= XCLASS_SAVE_CHAR;
Elliott Hughes5b808042021-10-01 10:56:10 -07007546#endif /* SUPPORT_UNICODE */
7547 }
7548 else if (*cc == XCL_RANGE)
7549 {
7550 cc ++;
7551 GETCHARINCTEST(c, cc);
7552 if (c < min) min = c;
7553 GETCHARINCTEST(c, cc);
7554 if (c > max) max = c;
7555#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007556 unicode_status |= XCLASS_SAVE_CHAR;
Elliott Hughes5b808042021-10-01 10:56:10 -07007557#endif /* SUPPORT_UNICODE */
7558 }
7559#ifdef SUPPORT_UNICODE
7560 else
7561 {
7562 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7563 cc++;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007564 if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
Elliott Hughes5b808042021-10-01 10:56:10 -07007565 {
7566 other_cases = PRIV(ucd_caseless_sets) + cc[1];
7567 while (*other_cases != NOTACHAR)
7568 {
7569 if (*other_cases > max) max = *other_cases;
7570 if (*other_cases < min) min = *other_cases;
7571 other_cases++;
7572 }
7573 }
7574 else
7575 {
7576 max = READ_CHAR_MAX;
7577 min = 0;
7578 }
7579
7580 switch(*cc)
7581 {
7582 case PT_ANY:
7583 /* Any either accepts everything or ignored. */
7584 if (cc[-1] == XCL_PROP)
7585 {
7586 compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
7587 if (list == backtracks)
7588 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7589 return;
7590 }
7591 break;
7592
7593 case PT_LAMP:
7594 case PT_GC:
7595 case PT_PC:
7596 case PT_ALNUM:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007597 unicode_status |= XCLASS_HAS_TYPE;
Elliott Hughes5b808042021-10-01 10:56:10 -07007598 break;
7599
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007600 case PT_SCX:
7601 unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
7602 if (cc[-1] == XCL_NOTPROP)
7603 {
7604 unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
7605 break;
7606 }
7607 compares++;
7608 /* Fall through */
7609
Elliott Hughes5b808042021-10-01 10:56:10 -07007610 case PT_SC:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007611 unicode_status |= XCLASS_HAS_SCRIPT;
Elliott Hughes5b808042021-10-01 10:56:10 -07007612 break;
7613
7614 case PT_SPACE:
7615 case PT_PXSPACE:
7616 case PT_WORD:
7617 case PT_PXGRAPH:
7618 case PT_PXPRINT:
7619 case PT_PXPUNCT:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007620 unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
Elliott Hughes5b808042021-10-01 10:56:10 -07007621 break;
7622
7623 case PT_CLIST:
7624 case PT_UCNC:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007625 unicode_status |= XCLASS_SAVE_CHAR;
7626 break;
7627
7628 case PT_BOOL:
7629 unicode_status |= XCLASS_HAS_BOOL;
7630 break;
7631
7632 case PT_BIDICL:
7633 unicode_status |= XCLASS_HAS_BIDICL;
Elliott Hughes5b808042021-10-01 10:56:10 -07007634 break;
7635
7636 default:
7637 SLJIT_UNREACHABLE();
7638 break;
7639 }
7640 cc += 2;
7641 }
7642#endif /* SUPPORT_UNICODE */
7643 }
7644SLJIT_ASSERT(compares > 0);
7645
7646/* We are not necessary in utf mode even in 8 bit mode. */
7647cc = ccbegin;
7648if ((cc[-1] & XCL_NOT) != 0)
7649 read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR);
7650else
7651 {
7652#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007653 read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07007654#else /* !SUPPORT_UNICODE */
7655 read_char(common, min, max, NULL, 0);
7656#endif /* SUPPORT_UNICODE */
7657 }
7658
7659if ((cc[-1] & XCL_HASPROP) == 0)
7660 {
7661 if ((cc[-1] & XCL_MAP) != 0)
7662 {
7663 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7664 if (!optimize_class(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found))
7665 {
7666 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7667 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7668 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7669 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007670 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07007671 add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
7672 }
7673
7674 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
7675 JUMPHERE(jump);
7676
7677 cc += 32 / sizeof(PCRE2_UCHAR);
7678 }
7679 else
7680 {
7681 OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min);
7682 add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min));
7683 }
7684 }
7685else if ((cc[-1] & XCL_MAP) != 0)
7686 {
7687 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7688#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007689 unicode_status |= XCLASS_CHAR_SAVED;
Elliott Hughes5b808042021-10-01 10:56:10 -07007690#endif /* SUPPORT_UNICODE */
7691 if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
7692 {
7693#if PCRE2_CODE_UNIT_WIDTH == 8
7694 jump = NULL;
7695 if (common->utf)
7696#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7697 jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
7698
7699 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
7700 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
7701 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
7702 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007703 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07007704 add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
7705
7706#if PCRE2_CODE_UNIT_WIDTH == 8
7707 if (common->utf)
7708#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
7709 JUMPHERE(jump);
7710 }
7711
7712 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
7713 cc += 32 / sizeof(PCRE2_UCHAR);
7714 }
7715
7716#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007717if (unicode_status & XCLASS_NEEDS_UCD)
Elliott Hughes5b808042021-10-01 10:56:10 -07007718 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007719 if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
Elliott Hughes5b808042021-10-01 10:56:10 -07007720 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
7721
7722#if PCRE2_CODE_UNIT_WIDTH == 32
7723 if (!common->utf)
7724 {
7725 jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, MAX_UTF_CODE_POINT + 1);
7726 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, UNASSIGNED_UTF_CHAR);
7727 JUMPHERE(jump);
7728 }
7729#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
7730
7731 OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7732 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 1);
7733 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1));
7734 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK);
7735 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT);
7736 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
7737 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
7738 OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007739 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
7740 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
7741 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07007742
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007743 ccbegin = cc;
7744
7745 if (unicode_status & XCLASS_HAS_BIDICL)
Elliott Hughes5b808042021-10-01 10:56:10 -07007746 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007747 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7748 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
Elliott Hughes5b808042021-10-01 10:56:10 -07007749
7750 while (*cc != XCL_END)
7751 {
7752 if (*cc == XCL_SINGLE)
7753 {
7754 cc ++;
7755 GETCHARINCTEST(c, cc);
7756 }
7757 else if (*cc == XCL_RANGE)
7758 {
7759 cc ++;
7760 GETCHARINCTEST(c, cc);
7761 GETCHARINCTEST(c, cc);
7762 }
7763 else
7764 {
7765 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7766 cc++;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007767 if (*cc == PT_BIDICL)
Elliott Hughes5b808042021-10-01 10:56:10 -07007768 {
7769 compares--;
7770 invertcmp = (compares == 0 && list != backtracks);
7771 if (cc[-1] == XCL_NOTPROP)
7772 invertcmp ^= 0x1;
7773 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]);
7774 add_jump(compiler, compares > 0 ? list : backtracks, jump);
7775 }
7776 cc += 2;
7777 }
7778 }
7779
7780 cc = ccbegin;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007781 }
Elliott Hughes5b808042021-10-01 10:56:10 -07007782
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007783 if (unicode_status & XCLASS_HAS_BOOL)
7784 {
7785 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
7786 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
7787 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7788
7789 while (*cc != XCL_END)
Elliott Hughes5b808042021-10-01 10:56:10 -07007790 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007791 if (*cc == XCL_SINGLE)
Elliott Hughes5b808042021-10-01 10:56:10 -07007792 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007793 cc ++;
7794 GETCHARINCTEST(c, cc);
7795 }
7796 else if (*cc == XCL_RANGE)
7797 {
7798 cc ++;
7799 GETCHARINCTEST(c, cc);
7800 GETCHARINCTEST(c, cc);
Elliott Hughes5b808042021-10-01 10:56:10 -07007801 }
7802 else
7803 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007804 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7805 cc++;
7806 if (*cc == PT_BOOL)
7807 {
7808 compares--;
7809 invertcmp = (compares == 0 && list != backtracks);
7810 if (cc[-1] == XCL_NOTPROP)
7811 invertcmp ^= 0x1;
Elliott Hughes5b808042021-10-01 10:56:10 -07007812
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007813 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7814 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7815 }
7816 cc += 2;
Elliott Hughes5b808042021-10-01 10:56:10 -07007817 }
7818 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007819
7820 cc = ccbegin;
Elliott Hughes5b808042021-10-01 10:56:10 -07007821 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007822
7823 if (unicode_status & XCLASS_HAS_SCRIPT)
Elliott Hughes5b808042021-10-01 10:56:10 -07007824 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007825 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
Elliott Hughes5b808042021-10-01 10:56:10 -07007826
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007827 while (*cc != XCL_END)
Elliott Hughes5b808042021-10-01 10:56:10 -07007828 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007829 if (*cc == XCL_SINGLE)
7830 {
7831 cc ++;
7832 GETCHARINCTEST(c, cc);
7833 }
7834 else if (*cc == XCL_RANGE)
7835 {
7836 cc ++;
7837 GETCHARINCTEST(c, cc);
7838 GETCHARINCTEST(c, cc);
7839 }
7840 else
7841 {
7842 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7843 cc++;
7844 switch (*cc)
7845 {
7846 case PT_SCX:
7847 if (cc[-1] == XCL_NOTPROP)
7848 break;
7849 /* Fall through */
Elliott Hughes5b808042021-10-01 10:56:10 -07007850
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007851 case PT_SC:
7852 compares--;
7853 invertcmp = (compares == 0 && list != backtracks);
7854 if (cc[-1] == XCL_NOTPROP)
7855 invertcmp ^= 0x1;
Elliott Hughes5b808042021-10-01 10:56:10 -07007856
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007857 add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
7858 }
7859 cc += 2;
7860 }
Elliott Hughes5b808042021-10-01 10:56:10 -07007861 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007862
7863 cc = ccbegin;
Elliott Hughes5b808042021-10-01 10:56:10 -07007864 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007865
7866 if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
7867 {
7868 OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
7869 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
7870 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
7871
7872 if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
7873 {
7874 if (unicode_status & XCLASS_HAS_TYPE)
7875 {
7876 if (unicode_status & XCLASS_SAVE_CHAR)
7877 {
7878 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
7879 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
7880 }
7881 else
7882 {
7883 OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
7884 unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
7885 }
7886 }
7887 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
7888 }
7889
7890 while (*cc != XCL_END)
7891 {
7892 if (*cc == XCL_SINGLE)
7893 {
7894 cc ++;
7895 GETCHARINCTEST(c, cc);
7896 }
7897 else if (*cc == XCL_RANGE)
7898 {
7899 cc ++;
7900 GETCHARINCTEST(c, cc);
7901 GETCHARINCTEST(c, cc);
7902 }
7903 else
7904 {
7905 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
7906 cc++;
7907 if (*cc == PT_SCX)
7908 {
7909 compares--;
7910 invertcmp = (compares == 0 && list != backtracks);
7911
7912 jump = NULL;
7913 if (cc[-1] == XCL_NOTPROP)
7914 {
7915 jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
7916 if (invertcmp)
7917 {
7918 add_jump(compiler, backtracks, jump);
7919 jump = NULL;
7920 }
7921 invertcmp ^= 0x1;
7922 }
7923
7924 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
7925 add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
7926
7927 if (jump != NULL)
7928 JUMPHERE(jump);
7929 }
7930 cc += 2;
7931 }
7932 }
7933
7934 if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
7935 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
7936 else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
7937 OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
7938 cc = ccbegin;
7939 }
7940
7941 if (unicode_status & XCLASS_SAVE_CHAR)
Elliott Hughes5b808042021-10-01 10:56:10 -07007942 OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007943
7944 if (unicode_status & XCLASS_HAS_TYPE)
7945 {
7946 if (unicode_status & XCLASS_SAVE_CHAR)
7947 typereg = RETURN_ADDR;
7948
7949 OP1(SLJIT_MOV_U8, typereg, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
7950 }
Elliott Hughes5b808042021-10-01 10:56:10 -07007951 }
7952#endif /* SUPPORT_UNICODE */
7953
7954/* Generating code. */
7955charoffset = 0;
7956numberofcmps = 0;
7957#ifdef SUPPORT_UNICODE
7958typeoffset = 0;
7959#endif /* SUPPORT_UNICODE */
7960
7961while (*cc != XCL_END)
7962 {
7963 compares--;
7964 invertcmp = (compares == 0 && list != backtracks);
7965 jump = NULL;
7966
7967 if (*cc == XCL_SINGLE)
7968 {
7969 cc ++;
7970 GETCHARINCTEST(c, cc);
7971
7972 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7973 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007974 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07007975 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
7976 numberofcmps++;
7977 }
7978 else if (numberofcmps > 0)
7979 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07007980 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07007981 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
7982 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
7983 numberofcmps = 0;
7984 }
7985 else
7986 {
7987 jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
7988 numberofcmps = 0;
7989 }
7990 }
7991 else if (*cc == XCL_RANGE)
7992 {
7993 cc ++;
7994 GETCHARINCTEST(c, cc);
7995 SET_CHAR_OFFSET(c);
7996 GETCHARINCTEST(c, cc);
7997
7998 if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
7999 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008000 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008001 OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8002 numberofcmps++;
8003 }
8004 else if (numberofcmps > 0)
8005 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008006 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008007 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8008 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8009 numberofcmps = 0;
8010 }
8011 else
8012 {
8013 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
8014 numberofcmps = 0;
8015 }
8016 }
8017#ifdef SUPPORT_UNICODE
8018 else
8019 {
8020 SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
8021 if (*cc == XCL_NOTPROP)
8022 invertcmp ^= 0x1;
8023 cc++;
8024 switch(*cc)
8025 {
8026 case PT_ANY:
8027 if (!invertcmp)
8028 jump = JUMP(SLJIT_JUMP);
8029 break;
8030
8031 case PT_LAMP:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008032 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
Elliott Hughes5b808042021-10-01 10:56:10 -07008033 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008034 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
Elliott Hughes5b808042021-10-01 10:56:10 -07008035 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008036 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
Elliott Hughes5b808042021-10-01 10:56:10 -07008037 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
8038 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8039 break;
8040
8041 case PT_GC:
8042 c = PRIV(ucp_typerange)[(int)cc[1] * 2];
8043 SET_TYPE_OFFSET(c);
8044 jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
8045 break;
8046
8047 case PT_PC:
8048 jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
8049 break;
8050
8051 case PT_SC:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008052 case PT_SCX:
8053 case PT_BOOL:
8054 case PT_BIDICL:
Elliott Hughes5b808042021-10-01 10:56:10 -07008055 compares++;
8056 /* Do nothing. */
8057 break;
8058
8059 case PT_SPACE:
8060 case PT_PXSPACE:
8061 SET_CHAR_OFFSET(9);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008062 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
Elliott Hughes5b808042021-10-01 10:56:10 -07008063 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8064
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008065 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
Elliott Hughes5b808042021-10-01 10:56:10 -07008066 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8067
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008068 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
Elliott Hughes5b808042021-10-01 10:56:10 -07008069 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8070
8071 SET_TYPE_OFFSET(ucp_Zl);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008072 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
Elliott Hughes5b808042021-10-01 10:56:10 -07008073 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8074 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8075 break;
8076
8077 case PT_WORD:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008078 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008079 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8080 /* Fall through. */
8081
8082 case PT_ALNUM:
8083 SET_TYPE_OFFSET(ucp_Ll);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008084 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
Elliott Hughes5b808042021-10-01 10:56:10 -07008085 OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8086 SET_TYPE_OFFSET(ucp_Nd);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008087 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
Elliott Hughes5b808042021-10-01 10:56:10 -07008088 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8089 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8090 break;
8091
8092 case PT_CLIST:
8093 other_cases = PRIV(ucd_caseless_sets) + cc[1];
8094
8095 /* At least three characters are required.
8096 Otherwise this case would be handled by the normal code path. */
8097 SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR);
8098 SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]);
8099
8100 /* Optimizing character pairs, if their difference is power of 2. */
8101 if (is_powerof2(other_cases[1] ^ other_cases[0]))
8102 {
8103 if (charoffset == 0)
8104 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8105 else
8106 {
8107 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8108 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8109 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008110 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
Elliott Hughes5b808042021-10-01 10:56:10 -07008111 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8112 other_cases += 2;
8113 }
8114 else if (is_powerof2(other_cases[2] ^ other_cases[1]))
8115 {
8116 if (charoffset == 0)
8117 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]);
8118 else
8119 {
8120 OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
8121 OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
8122 }
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008123 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
Elliott Hughes5b808042021-10-01 10:56:10 -07008124 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8125
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008126 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008127 OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8128
8129 other_cases += 3;
8130 }
8131 else
8132 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008133 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008134 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
8135 }
8136
8137 while (*other_cases != NOTACHAR)
8138 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008139 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008140 OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
8141 }
8142 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8143 break;
8144
8145 case PT_UCNC:
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008146 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008147 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008148 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008149 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008150 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008151 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8152
8153 SET_CHAR_OFFSET(0xa0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008154 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
Elliott Hughes5b808042021-10-01 10:56:10 -07008155 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
8156 SET_CHAR_OFFSET(0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008157 OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07008158 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
8159 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8160 break;
8161
8162 case PT_PXGRAPH:
8163 /* C and Z groups are the farthest two groups. */
8164 SET_TYPE_OFFSET(ucp_Ll);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008165 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
Elliott Hughes5b808042021-10-01 10:56:10 -07008166 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8167
8168 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8169
8170 /* In case of ucp_Cf, we overwrite the result. */
8171 SET_CHAR_OFFSET(0x2066);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008172 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
Elliott Hughes5b808042021-10-01 10:56:10 -07008173 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8174
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008175 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
Elliott Hughes5b808042021-10-01 10:56:10 -07008176 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8177
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008178 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
Elliott Hughes5b808042021-10-01 10:56:10 -07008179 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8180
8181 JUMPHERE(jump);
8182 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8183 break;
8184
8185 case PT_PXPRINT:
8186 /* C and Z groups are the farthest two groups. */
8187 SET_TYPE_OFFSET(ucp_Ll);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008188 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
Elliott Hughes5b808042021-10-01 10:56:10 -07008189 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
8190
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008191 OP2U(SLJIT_SUB | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
Elliott Hughes5b808042021-10-01 10:56:10 -07008192 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
8193
8194 jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
8195
8196 /* In case of ucp_Cf, we overwrite the result. */
8197 SET_CHAR_OFFSET(0x2066);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008198 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
Elliott Hughes5b808042021-10-01 10:56:10 -07008199 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8200
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008201 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
Elliott Hughes5b808042021-10-01 10:56:10 -07008202 OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
8203
8204 JUMPHERE(jump);
8205 jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
8206 break;
8207
8208 case PT_PXPUNCT:
8209 SET_TYPE_OFFSET(ucp_Sc);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008210 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
Elliott Hughes5b808042021-10-01 10:56:10 -07008211 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
8212
8213 SET_CHAR_OFFSET(0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008214 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
Elliott Hughes5b808042021-10-01 10:56:10 -07008215 OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
8216
8217 SET_TYPE_OFFSET(ucp_Pc);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008218 OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
Elliott Hughes5b808042021-10-01 10:56:10 -07008219 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
8220 jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
8221 break;
8222
8223 default:
8224 SLJIT_UNREACHABLE();
8225 break;
8226 }
8227 cc += 2;
8228 }
8229#endif /* SUPPORT_UNICODE */
8230
8231 if (jump != NULL)
8232 add_jump(compiler, compares > 0 ? list : backtracks, jump);
8233 }
8234
8235if (found != NULL)
8236 set_jumps(found, LABEL());
8237}
8238
8239#undef SET_TYPE_OFFSET
8240#undef SET_CHAR_OFFSET
8241
8242#endif
8243
8244static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
8245{
8246DEFINE_COMPILER;
8247int length;
8248struct sljit_jump *jump[4];
8249#ifdef SUPPORT_UNICODE
8250struct sljit_label *label;
8251#endif /* SUPPORT_UNICODE */
8252
8253switch(type)
8254 {
8255 case OP_SOD:
8256 if (HAS_VIRTUAL_REGISTERS)
8257 {
8258 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8259 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8260 }
8261 else
8262 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8263 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8264 return cc;
8265
8266 case OP_SOM:
8267 if (HAS_VIRTUAL_REGISTERS)
8268 {
8269 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8270 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
8271 }
8272 else
8273 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
8274 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0));
8275 return cc;
8276
8277 case OP_NOT_WORD_BOUNDARY:
8278 case OP_WORD_BOUNDARY:
8279 add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
8280#ifdef SUPPORT_UNICODE
8281 if (common->invalid_utf)
8282 {
8283 add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
8284 return cc;
8285 }
8286#endif /* SUPPORT_UNICODE */
8287 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8288 add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8289 return cc;
8290
8291 case OP_EODN:
8292 /* Requires rather complex checks. */
8293 jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
8294 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8295 {
8296 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8297 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8298 if (common->mode == PCRE2_JIT_COMPLETE)
8299 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8300 else
8301 {
8302 jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008303 OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07008304 OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008305 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
Elliott Hughes5b808042021-10-01 10:56:10 -07008306 OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
8307 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
8308 check_partial(common, TRUE);
8309 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8310 JUMPHERE(jump[1]);
8311 }
8312 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8313 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8314 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8315 }
8316 else if (common->nltype == NLTYPE_FIXED)
8317 {
8318 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8319 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8320 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0));
8321 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline));
8322 }
8323 else
8324 {
8325 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8326 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8327 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008328 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07008329 jump[2] = JUMP(SLJIT_GREATER);
8330 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
8331 /* Equal. */
8332 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8333 jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8334 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8335
8336 JUMPHERE(jump[1]);
8337 if (common->nltype == NLTYPE_ANYCRLF)
8338 {
8339 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8340 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0));
8341 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL));
8342 }
8343 else
8344 {
8345 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
8346 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8347 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
8348 add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL));
8349 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8350 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
8351 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
8352 }
8353 JUMPHERE(jump[2]);
8354 JUMPHERE(jump[3]);
8355 }
8356 JUMPHERE(jump[0]);
8357 if (common->mode != PCRE2_JIT_COMPLETE)
8358 check_partial(common, TRUE);
8359 return cc;
8360
8361 case OP_EOD:
8362 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8363 if (common->mode != PCRE2_JIT_COMPLETE)
8364 check_partial(common, TRUE);
8365 return cc;
8366
8367 case OP_DOLL:
8368 if (HAS_VIRTUAL_REGISTERS)
8369 {
8370 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008371 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008372 }
8373 else
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008374 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008375 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8376
8377 if (!common->endonly)
8378 compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks);
8379 else
8380 {
8381 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
8382 check_partial(common, FALSE);
8383 }
8384 return cc;
8385
8386 case OP_DOLLM:
8387 jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0);
8388 if (HAS_VIRTUAL_REGISTERS)
8389 {
8390 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008391 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008392 }
8393 else
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008394 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008395 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8396 check_partial(common, FALSE);
8397 jump[0] = JUMP(SLJIT_JUMP);
8398 JUMPHERE(jump[1]);
8399
8400 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8401 {
8402 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8403 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
8404 if (common->mode == PCRE2_JIT_COMPLETE)
8405 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0));
8406 else
8407 {
8408 jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0);
8409 /* STR_PTR = STR_END - IN_UCHARS(1) */
8410 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8411 check_partial(common, TRUE);
8412 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
8413 JUMPHERE(jump[1]);
8414 }
8415
8416 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
8417 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8418 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8419 }
8420 else
8421 {
8422 peek_char(common, common->nlmax, TMP3, 0, NULL);
8423 check_newlinechar(common, common->nltype, backtracks, FALSE);
8424 }
8425 JUMPHERE(jump[0]);
8426 return cc;
8427
8428 case OP_CIRC:
8429 if (HAS_VIRTUAL_REGISTERS)
8430 {
8431 OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
8432 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
8433 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008434 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008435 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8436 }
8437 else
8438 {
8439 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8440 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008441 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008442 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8443 }
8444 return cc;
8445
8446 case OP_CIRCM:
8447 /* TMP2 might be used by peek_char_back. */
8448 if (HAS_VIRTUAL_REGISTERS)
8449 {
8450 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8451 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8452 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008453 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008454 }
8455 else
8456 {
8457 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8458 jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008459 OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
Elliott Hughes5b808042021-10-01 10:56:10 -07008460 }
8461 add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
8462 jump[0] = JUMP(SLJIT_JUMP);
8463 JUMPHERE(jump[1]);
8464
8465 if (!common->alt_circumflex)
8466 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8467
8468 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8469 {
8470 OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
8471 add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, TMP2, 0));
8472 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
8473 OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
8474 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff));
8475 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff));
8476 }
8477 else
8478 {
8479 peek_char_back(common, common->nlmax, backtracks);
8480 check_newlinechar(common, common->nltype, backtracks, FALSE);
8481 }
8482 JUMPHERE(jump[0]);
8483 return cc;
8484
8485 case OP_REVERSE:
8486 length = GET(cc, 0);
8487 if (length == 0)
8488 return cc + LINK_SIZE;
8489 if (HAS_VIRTUAL_REGISTERS)
8490 {
8491 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
8492 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
8493 }
8494 else
8495 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
8496#ifdef SUPPORT_UNICODE
8497 if (common->utf)
8498 {
8499 OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
8500 label = LABEL();
8501 add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
8502 move_back(common, backtracks, FALSE);
8503 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
8504 JUMPTO(SLJIT_NOT_ZERO, label);
8505 }
8506 else
8507#endif
8508 {
8509 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8510 add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
8511 }
8512 check_start_used_ptr(common);
8513 return cc + LINK_SIZE;
8514 }
8515SLJIT_UNREACHABLE();
8516return cc;
8517}
8518
8519#ifdef SUPPORT_UNICODE
8520
8521#if PCRE2_CODE_UNIT_WIDTH != 32
8522
8523static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc)
8524{
8525PCRE2_SPTR start_subject = args->begin;
8526PCRE2_SPTR end_subject = args->end;
8527int lgb, rgb, ricount;
8528PCRE2_SPTR prevcc, endcc, bptr;
8529BOOL first = TRUE;
8530uint32_t c;
8531
8532prevcc = cc;
8533endcc = NULL;
8534do
8535 {
8536 GETCHARINC(c, cc);
8537 rgb = UCD_GRAPHBREAK(c);
8538
8539 if (first)
8540 {
8541 lgb = rgb;
8542 endcc = cc;
8543 first = FALSE;
8544 continue;
8545 }
8546
8547 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8548 break;
8549
8550 /* Not breaking between Regional Indicators is allowed only if there
8551 are an even number of preceding RIs. */
8552
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008553 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
Elliott Hughes5b808042021-10-01 10:56:10 -07008554 {
8555 ricount = 0;
8556 bptr = prevcc;
8557
8558 /* bptr is pointing to the left-hand character */
8559 while (bptr > start_subject)
8560 {
8561 bptr--;
8562 BACKCHAR(bptr);
8563 GETCHAR(c, bptr);
8564
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008565 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
Elliott Hughes5b808042021-10-01 10:56:10 -07008566 break;
8567
8568 ricount++;
8569 }
8570
8571 if ((ricount & 1) != 0) break; /* Grapheme break required */
8572 }
8573
8574 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8575 allows any number of them before a following Extended_Pictographic. */
8576
8577 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8578 lgb != ucp_gbExtended_Pictographic)
8579 lgb = rgb;
8580
8581 prevcc = endcc;
8582 endcc = cc;
8583 }
8584while (cc < end_subject);
8585
8586return endcc;
8587}
8588
8589#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */
8590
8591static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc)
8592{
8593PCRE2_SPTR start_subject = args->begin;
8594PCRE2_SPTR end_subject = args->end;
8595int lgb, rgb, ricount;
8596PCRE2_SPTR prevcc, endcc, bptr;
8597BOOL first = TRUE;
8598uint32_t c;
8599
8600prevcc = cc;
8601endcc = NULL;
8602do
8603 {
8604 GETCHARINC_INVALID(c, cc, end_subject, break);
8605 rgb = UCD_GRAPHBREAK(c);
8606
8607 if (first)
8608 {
8609 lgb = rgb;
8610 endcc = cc;
8611 first = FALSE;
8612 continue;
8613 }
8614
8615 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8616 break;
8617
8618 /* Not breaking between Regional Indicators is allowed only if there
8619 are an even number of preceding RIs. */
8620
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008621 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
Elliott Hughes5b808042021-10-01 10:56:10 -07008622 {
8623 ricount = 0;
8624 bptr = prevcc;
8625
8626 /* bptr is pointing to the left-hand character */
8627 while (bptr > start_subject)
8628 {
8629 GETCHARBACK_INVALID(c, bptr, start_subject, break);
8630
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008631 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
Elliott Hughes5b808042021-10-01 10:56:10 -07008632 break;
8633
8634 ricount++;
8635 }
8636
8637 if ((ricount & 1) != 0)
8638 break; /* Grapheme break required */
8639 }
8640
8641 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8642 allows any number of them before a following Extended_Pictographic. */
8643
8644 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8645 lgb != ucp_gbExtended_Pictographic)
8646 lgb = rgb;
8647
8648 prevcc = endcc;
8649 endcc = cc;
8650 }
8651while (cc < end_subject);
8652
8653return endcc;
8654}
8655
8656static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc)
8657{
8658PCRE2_SPTR start_subject = args->begin;
8659PCRE2_SPTR end_subject = args->end;
8660int lgb, rgb, ricount;
8661PCRE2_SPTR bptr;
8662uint32_t c;
8663
8664/* Patch by PH */
8665/* GETCHARINC(c, cc); */
8666c = *cc++;
8667
8668#if PCRE2_CODE_UNIT_WIDTH == 32
8669if (c >= 0x110000)
8670 return NULL;
8671#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8672lgb = UCD_GRAPHBREAK(c);
8673
8674while (cc < end_subject)
8675 {
8676 c = *cc;
8677#if PCRE2_CODE_UNIT_WIDTH == 32
8678 if (c >= 0x110000)
8679 break;
8680#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8681 rgb = UCD_GRAPHBREAK(c);
8682
8683 if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0)
8684 break;
8685
8686 /* Not breaking between Regional Indicators is allowed only if there
8687 are an even number of preceding RIs. */
8688
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008689 if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
Elliott Hughes5b808042021-10-01 10:56:10 -07008690 {
8691 ricount = 0;
8692 bptr = cc - 1;
8693
8694 /* bptr is pointing to the left-hand character */
8695 while (bptr > start_subject)
8696 {
8697 bptr--;
8698 c = *bptr;
8699#if PCRE2_CODE_UNIT_WIDTH == 32
8700 if (c >= 0x110000)
8701 break;
8702#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
8703
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008704 if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
Elliott Hughes5b808042021-10-01 10:56:10 -07008705
8706 ricount++;
8707 }
8708
8709 if ((ricount & 1) != 0)
8710 break; /* Grapheme break required */
8711 }
8712
8713 /* If Extend or ZWJ follows Extended_Pictographic, do not update lgb; this
8714 allows any number of them before a following Extended_Pictographic. */
8715
8716 if ((rgb != ucp_gbExtend && rgb != ucp_gbZWJ) ||
8717 lgb != ucp_gbExtended_Pictographic)
8718 lgb = rgb;
8719
8720 cc++;
8721 }
8722
8723return cc;
8724}
8725
8726#endif /* SUPPORT_UNICODE */
8727
8728static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr)
8729{
8730DEFINE_COMPILER;
8731int length;
8732unsigned int c, oc, bit;
8733compare_context context;
8734struct sljit_jump *jump[3];
8735jump_list *end_list;
8736#ifdef SUPPORT_UNICODE
8737PCRE2_UCHAR propdata[5];
8738#endif /* SUPPORT_UNICODE */
8739
8740switch(type)
8741 {
8742 case OP_NOT_DIGIT:
8743 case OP_DIGIT:
8744 /* Digits are usually 0-9, so it is worth to optimize them. */
8745 if (check_str_ptr)
8746 detect_partial_match(common, backtracks);
8747#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8748 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_digit, FALSE))
8749 read_char7_type(common, backtracks, type == OP_NOT_DIGIT);
8750 else
8751#endif
8752 read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
8753 /* Flip the starting bit in the negative case. */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008754 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
Elliott Hughes5b808042021-10-01 10:56:10 -07008755 add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8756 return cc;
8757
8758 case OP_NOT_WHITESPACE:
8759 case OP_WHITESPACE:
8760 if (check_str_ptr)
8761 detect_partial_match(common, backtracks);
8762#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8763 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_space, FALSE))
8764 read_char7_type(common, backtracks, type == OP_NOT_WHITESPACE);
8765 else
8766#endif
8767 read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008768 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
Elliott Hughes5b808042021-10-01 10:56:10 -07008769 add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8770 return cc;
8771
8772 case OP_NOT_WORDCHAR:
8773 case OP_WORDCHAR:
8774 if (check_str_ptr)
8775 detect_partial_match(common, backtracks);
8776#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
8777 if (common->utf && is_char7_bitset((const sljit_u8*)common->ctypes - cbit_length + cbit_word, FALSE))
8778 read_char7_type(common, backtracks, type == OP_NOT_WORDCHAR);
8779 else
8780#endif
8781 read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008782 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
Elliott Hughes5b808042021-10-01 10:56:10 -07008783 add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
8784 return cc;
8785
8786 case OP_ANY:
8787 if (check_str_ptr)
8788 detect_partial_match(common, backtracks);
8789 read_char(common, common->nlmin, common->nlmax, backtracks, READ_CHAR_UPDATE_STR_PTR);
8790 if (common->nltype == NLTYPE_FIXED && common->newline > 255)
8791 {
8792 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
8793 end_list = NULL;
8794 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8795 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8796 else
8797 check_str_end(common, &end_list);
8798
8799 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8800 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff));
8801 set_jumps(end_list, LABEL());
8802 JUMPHERE(jump[0]);
8803 }
8804 else
8805 check_newlinechar(common, common->nltype, backtracks, TRUE);
8806 return cc;
8807
8808 case OP_ALLANY:
8809 if (check_str_ptr)
8810 detect_partial_match(common, backtracks);
8811#ifdef SUPPORT_UNICODE
8812 if (common->utf)
8813 {
8814 if (common->invalid_utf)
8815 {
8816 read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
8817 return cc;
8818 }
8819
8820#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
8821 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8822 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8823#if PCRE2_CODE_UNIT_WIDTH == 8
8824 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
8825 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
8826 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8827#elif PCRE2_CODE_UNIT_WIDTH == 16
8828 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
8829 OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008830 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
Elliott Hughes5b808042021-10-01 10:56:10 -07008831 OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
8832 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
8833 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
8834#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
8835 JUMPHERE(jump[0]);
8836 return cc;
8837#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
8838 }
8839#endif /* SUPPORT_UNICODE */
8840 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8841 return cc;
8842
8843 case OP_ANYBYTE:
8844 if (check_str_ptr)
8845 detect_partial_match(common, backtracks);
8846 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8847 return cc;
8848
8849#ifdef SUPPORT_UNICODE
8850 case OP_NOTPROP:
8851 case OP_PROP:
8852 propdata[0] = XCL_HASPROP;
8853 propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP;
8854 propdata[2] = cc[0];
8855 propdata[3] = cc[1];
8856 propdata[4] = XCL_END;
8857 if (check_str_ptr)
8858 detect_partial_match(common, backtracks);
8859 compile_xclass_matchingpath(common, propdata, backtracks);
8860 return cc + 2;
8861#endif
8862
8863 case OP_ANYNL:
8864 if (check_str_ptr)
8865 detect_partial_match(common, backtracks);
8866 read_char(common, common->bsr_nlmin, common->bsr_nlmax, NULL, 0);
8867 jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
8868 /* We don't need to handle soft partial matching case. */
8869 end_list = NULL;
8870 if (common->mode != PCRE2_JIT_PARTIAL_HARD)
8871 add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8872 else
8873 check_str_end(common, &end_list);
8874 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
8875 jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL);
8876 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
8877 jump[2] = JUMP(SLJIT_JUMP);
8878 JUMPHERE(jump[0]);
8879 check_newlinechar(common, common->bsr_nltype, backtracks, FALSE);
8880 set_jumps(end_list, LABEL());
8881 JUMPHERE(jump[1]);
8882 JUMPHERE(jump[2]);
8883 return cc;
8884
8885 case OP_NOT_HSPACE:
8886 case OP_HSPACE:
8887 if (check_str_ptr)
8888 detect_partial_match(common, backtracks);
8889
8890 if (type == OP_NOT_HSPACE)
8891 read_char(common, 0x9, 0x3000, backtracks, READ_CHAR_UPDATE_STR_PTR);
8892 else
8893 read_char(common, 0x9, 0x3000, NULL, 0);
8894
8895 add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL));
8896 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8897 add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8898 return cc;
8899
8900 case OP_NOT_VSPACE:
8901 case OP_VSPACE:
8902 if (check_str_ptr)
8903 detect_partial_match(common, backtracks);
8904
8905 if (type == OP_NOT_VSPACE)
8906 read_char(common, 0xa, 0x2029, backtracks, READ_CHAR_UPDATE_STR_PTR);
8907 else
8908 read_char(common, 0xa, 0x2029, NULL, 0);
8909
8910 add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL));
8911 sljit_set_current_flags(compiler, SLJIT_SET_Z);
8912 add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO));
8913 return cc;
8914
8915#ifdef SUPPORT_UNICODE
8916 case OP_EXTUNI:
8917 if (check_str_ptr)
8918 detect_partial_match(common, backtracks);
8919
8920 SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
8921 OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
8922
8923#if PCRE2_CODE_UNIT_WIDTH != 32
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008924 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8925 common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
Elliott Hughes5b808042021-10-01 10:56:10 -07008926 if (common->invalid_utf)
8927 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8928#else
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008929 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
8930 common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
Elliott Hughes5b808042021-10-01 10:56:10 -07008931 if (!common->utf || common->invalid_utf)
8932 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
8933#endif
8934
8935 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
8936
8937 if (common->mode == PCRE2_JIT_PARTIAL_HARD)
8938 {
8939 jump[0] = CMP(SLJIT_LESS, SLJIT_RETURN_REG, 0, STR_END, 0);
8940 /* Since we successfully read a char above, partial matching must occure. */
8941 check_partial(common, TRUE);
8942 JUMPHERE(jump[0]);
8943 }
8944 return cc;
8945#endif
8946
8947 case OP_CHAR:
8948 case OP_CHARI:
8949 length = 1;
8950#ifdef SUPPORT_UNICODE
8951 if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc);
8952#endif
8953
8954 if (check_str_ptr && common->mode != PCRE2_JIT_COMPLETE)
8955 detect_partial_match(common, backtracks);
8956
8957 if (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)
8958 {
8959 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
8960 if (length > 1 || (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE))
8961 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
8962
8963 context.length = IN_UCHARS(length);
8964 context.sourcereg = -1;
8965#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
8966 context.ucharptr = 0;
8967#endif
8968 return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks);
8969 }
8970
8971#ifdef SUPPORT_UNICODE
8972 if (common->utf)
8973 {
8974 GETCHAR(c, cc);
8975 }
8976 else
8977#endif
8978 c = *cc;
8979
8980 SLJIT_ASSERT(type == OP_CHARI && char_has_othercase(common, cc));
8981
8982 if (check_str_ptr && common->mode == PCRE2_JIT_COMPLETE)
8983 add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0));
8984
8985 oc = char_othercase(common, c);
8986 read_char(common, c < oc ? c : oc, c > oc ? c : oc, NULL, 0);
8987
8988 SLJIT_ASSERT(!is_powerof2(c ^ oc));
8989
8990 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
8991 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07008992 OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
Elliott Hughes5b808042021-10-01 10:56:10 -07008993 CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
8994 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
8995 }
8996 else
8997 {
8998 jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c);
8999 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9000 JUMPHERE(jump[0]);
9001 }
9002 return cc + length;
9003
9004 case OP_NOT:
9005 case OP_NOTI:
9006 if (check_str_ptr)
9007 detect_partial_match(common, backtracks);
9008
9009 length = 1;
9010#ifdef SUPPORT_UNICODE
9011 if (common->utf)
9012 {
9013#if PCRE2_CODE_UNIT_WIDTH == 8
9014 c = *cc;
9015 if (c < 128 && !common->invalid_utf)
9016 {
9017 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
9018 if (type == OP_NOT || !char_has_othercase(common, cc))
9019 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9020 else
9021 {
9022 /* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */
9023 OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20);
9024 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20));
9025 }
9026 /* Skip the variable-length character. */
9027 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
9028 jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
9029 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
9030 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
9031 JUMPHERE(jump[0]);
9032 return cc + 1;
9033 }
9034 else
9035#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
9036 {
9037 GETCHARLEN(c, cc, length);
9038 }
9039 }
9040 else
9041#endif /* SUPPORT_UNICODE */
9042 c = *cc;
9043
9044 if (type == OP_NOT || !char_has_othercase(common, cc))
9045 {
9046 read_char(common, c, c, backtracks, READ_CHAR_UPDATE_STR_PTR);
9047 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9048 }
9049 else
9050 {
9051 oc = char_othercase(common, c);
9052 read_char(common, c < oc ? c : oc, c > oc ? c : oc, backtracks, READ_CHAR_UPDATE_STR_PTR);
9053 bit = c ^ oc;
9054 if (is_powerof2(bit))
9055 {
9056 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit);
9057 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit));
9058 }
9059 else
9060 {
9061 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c));
9062 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc));
9063 }
9064 }
9065 return cc + length;
9066
9067 case OP_CLASS:
9068 case OP_NCLASS:
9069 if (check_str_ptr)
9070 detect_partial_match(common, backtracks);
9071
9072#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9073 bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255;
9074 if (type == OP_NCLASS)
9075 read_char(common, 0, bit, backtracks, READ_CHAR_UPDATE_STR_PTR);
9076 else
9077 read_char(common, 0, bit, NULL, 0);
9078#else
9079 if (type == OP_NCLASS)
9080 read_char(common, 0, 255, backtracks, READ_CHAR_UPDATE_STR_PTR);
9081 else
9082 read_char(common, 0, 255, NULL, 0);
9083#endif
9084
9085 if (optimize_class(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks))
9086 return cc + 32 / sizeof(PCRE2_UCHAR);
9087
9088#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
9089 jump[0] = NULL;
9090 if (common->utf)
9091 {
9092 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit);
9093 if (type == OP_CLASS)
9094 {
9095 add_jump(compiler, backtracks, jump[0]);
9096 jump[0] = NULL;
9097 }
9098 }
9099#elif PCRE2_CODE_UNIT_WIDTH != 8
9100 jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255);
9101 if (type == OP_CLASS)
9102 {
9103 add_jump(compiler, backtracks, jump[0]);
9104 jump[0] = NULL;
9105 }
9106#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 */
9107
9108 OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7);
9109 OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
9110 OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
9111 OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07009112 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07009113 add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
9114
9115#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
9116 if (jump[0] != NULL)
9117 JUMPHERE(jump[0]);
9118#endif
9119 return cc + 32 / sizeof(PCRE2_UCHAR);
9120
9121#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
9122 case OP_XCLASS:
9123 if (check_str_ptr)
9124 detect_partial_match(common, backtracks);
9125 compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks);
9126 return cc + GET(cc, 0) - 1;
9127#endif
9128 }
9129SLJIT_UNREACHABLE();
9130return cc;
9131}
9132
9133static SLJIT_INLINE PCRE2_SPTR compile_charn_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, jump_list **backtracks)
9134{
9135/* This function consumes at least one input character. */
9136/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */
9137DEFINE_COMPILER;
9138PCRE2_SPTR ccbegin = cc;
9139compare_context context;
9140int size;
9141
9142context.length = 0;
9143do
9144 {
9145 if (cc >= ccend)
9146 break;
9147
9148 if (*cc == OP_CHAR)
9149 {
9150 size = 1;
9151#ifdef SUPPORT_UNICODE
9152 if (common->utf && HAS_EXTRALEN(cc[1]))
9153 size += GET_EXTRALEN(cc[1]);
9154#endif
9155 }
9156 else if (*cc == OP_CHARI)
9157 {
9158 size = 1;
9159#ifdef SUPPORT_UNICODE
9160 if (common->utf)
9161 {
9162 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9163 size = 0;
9164 else if (HAS_EXTRALEN(cc[1]))
9165 size += GET_EXTRALEN(cc[1]);
9166 }
9167 else
9168#endif
9169 if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0)
9170 size = 0;
9171 }
9172 else
9173 size = 0;
9174
9175 cc += 1 + size;
9176 context.length += IN_UCHARS(size);
9177 }
9178while (size > 0 && context.length <= 128);
9179
9180cc = ccbegin;
9181if (context.length > 0)
9182 {
9183 /* We have a fixed-length byte sequence. */
9184 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length);
9185 add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0));
9186
9187 context.sourcereg = -1;
9188#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED
9189 context.ucharptr = 0;
9190#endif
9191 do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0);
9192 return cc;
9193 }
9194
9195/* A non-fixed length character will be checked if length == 0. */
9196return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE);
9197}
9198
9199/* Forward definitions. */
9200static void compile_matchingpath(compiler_common *, PCRE2_SPTR, PCRE2_SPTR, backtrack_common *);
9201static void compile_backtrackingpath(compiler_common *, struct backtrack_common *);
9202
9203#define PUSH_BACKTRACK(size, ccstart, error) \
9204 do \
9205 { \
9206 backtrack = sljit_alloc_memory(compiler, (size)); \
9207 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9208 return error; \
9209 memset(backtrack, 0, size); \
9210 backtrack->prev = parent->top; \
9211 backtrack->cc = (ccstart); \
9212 parent->top = backtrack; \
9213 } \
9214 while (0)
9215
9216#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \
9217 do \
9218 { \
9219 backtrack = sljit_alloc_memory(compiler, (size)); \
9220 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
9221 return; \
9222 memset(backtrack, 0, size); \
9223 backtrack->prev = parent->top; \
9224 backtrack->cc = (ccstart); \
9225 parent->top = backtrack; \
9226 } \
9227 while (0)
9228
9229#define BACKTRACK_AS(type) ((type *)backtrack)
9230
9231static void compile_dnref_search(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
9232{
9233/* The OVECTOR offset goes to TMP2. */
9234DEFINE_COMPILER;
9235int count = GET2(cc, 1 + IMM2_SIZE);
9236PCRE2_SPTR slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
9237unsigned int offset;
9238jump_list *found = NULL;
9239
9240SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI);
9241
9242OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
9243
9244count--;
9245while (count-- > 0)
9246 {
9247 offset = GET2(slot, 0) << 1;
9248 GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9249 add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9250 slot += common->name_entry_size;
9251 }
9252
9253offset = GET2(slot, 0) << 1;
9254GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset));
9255if (backtracks != NULL && !common->unset_backref)
9256 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0));
9257
9258set_jumps(found, LABEL());
9259}
9260
9261static void compile_ref_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail)
9262{
9263DEFINE_COMPILER;
9264BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9265int offset = 0;
9266struct sljit_jump *jump = NULL;
9267struct sljit_jump *partial;
9268struct sljit_jump *nopartial;
9269#if defined SUPPORT_UNICODE
9270struct sljit_label *loop;
9271struct sljit_label *caseless_loop;
9272jump_list *no_match = NULL;
9273int source_reg = COUNT_MATCH;
9274int source_end_reg = ARGUMENTS;
9275int char1_reg = STACK_LIMIT;
9276#endif /* SUPPORT_UNICODE */
9277
9278if (ref)
9279 {
9280 offset = GET2(cc, 1) << 1;
9281 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9282 /* OVECTOR(1) contains the "string begin - 1" constant. */
9283 if (withchecks && !common->unset_backref)
9284 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9285 }
9286else
9287 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9288
9289#if defined SUPPORT_UNICODE
9290if (common->utf && *cc == OP_REFI)
9291 {
9292 SLJIT_ASSERT(common->iref_ptr != 0);
9293
9294 if (ref)
9295 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9296 else
9297 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9298
9299 if (withchecks && emptyfail)
9300 add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, TMP2, 0));
9301
9302 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr, source_reg, 0);
9303 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw), source_end_reg, 0);
9304 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2, char1_reg, 0);
9305
9306 OP1(SLJIT_MOV, source_reg, 0, TMP1, 0);
9307 OP1(SLJIT_MOV, source_end_reg, 0, TMP2, 0);
9308
9309 loop = LABEL();
9310 jump = CMP(SLJIT_GREATER_EQUAL, source_reg, 0, source_end_reg, 0);
9311 partial = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
9312
9313 /* Read original character. It must be a valid UTF character. */
9314 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
9315 OP1(SLJIT_MOV, STR_PTR, 0, source_reg, 0);
9316
9317 read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR | READ_CHAR_VALID_UTF);
9318
9319 OP1(SLJIT_MOV, source_reg, 0, STR_PTR, 0);
9320 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
9321 OP1(SLJIT_MOV, char1_reg, 0, TMP1, 0);
9322
9323 /* Read second character. */
9324 read_char(common, 0, READ_CHAR_MAX, &no_match, READ_CHAR_UPDATE_STR_PTR);
9325
9326 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9327
9328 OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
9329
9330 add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL));
9331
9332 OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2);
9333 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3);
9334 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
9335
9336 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records));
9337
9338 OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, other_case));
9339 OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(ucd_record, caseset));
9340 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0);
9341 CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop);
9342
9343 add_jump(compiler, &no_match, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9344 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
9345 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_caseless_sets));
9346
9347 caseless_loop = LABEL();
9348 OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9349 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07009350 OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07009351 JUMPTO(SLJIT_EQUAL, loop);
9352 JUMPTO(SLJIT_LESS, caseless_loop);
9353
9354 set_jumps(no_match, LABEL());
9355 if (common->mode == PCRE2_JIT_COMPLETE)
9356 JUMPHERE(partial);
9357
9358 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9359 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9360 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9361 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9362
9363 if (common->mode != PCRE2_JIT_COMPLETE)
9364 {
9365 JUMPHERE(partial);
9366 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9367 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9368 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9369
9370 check_partial(common, FALSE);
9371 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9372 }
9373
9374 JUMPHERE(jump);
9375 OP1(SLJIT_MOV, source_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr);
9376 OP1(SLJIT_MOV, source_end_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw));
9377 OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), common->iref_ptr + sizeof(sljit_sw) * 2);
9378 return;
9379 }
9380else
9381#endif /* SUPPORT_UNICODE */
9382 {
9383 if (ref)
9384 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
9385 else
9386 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0);
9387
9388 if (withchecks)
9389 jump = JUMP(SLJIT_ZERO);
9390
9391 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
9392 partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0);
9393 if (common->mode == PCRE2_JIT_COMPLETE)
9394 add_jump(compiler, backtracks, partial);
9395
9396 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9397 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9398
9399 if (common->mode != PCRE2_JIT_COMPLETE)
9400 {
9401 nopartial = JUMP(SLJIT_JUMP);
9402 JUMPHERE(partial);
9403 /* TMP2 -= STR_END - STR_PTR */
9404 OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0);
9405 OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0);
9406 partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0);
9407 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
9408 add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL));
9409 add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0));
9410 JUMPHERE(partial);
9411 check_partial(common, FALSE);
9412 add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
9413 JUMPHERE(nopartial);
9414 }
9415 }
9416
9417if (jump != NULL)
9418 {
9419 if (emptyfail)
9420 add_jump(compiler, backtracks, jump);
9421 else
9422 JUMPHERE(jump);
9423 }
9424}
9425
9426static SLJIT_INLINE PCRE2_SPTR compile_ref_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9427{
9428DEFINE_COMPILER;
9429BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
9430backtrack_common *backtrack;
9431PCRE2_UCHAR type;
9432int offset = 0;
9433struct sljit_label *label;
9434struct sljit_jump *zerolength;
9435struct sljit_jump *jump = NULL;
9436PCRE2_SPTR ccbegin = cc;
9437int min = 0, max = 0;
9438BOOL minimize;
9439
9440PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL);
9441
9442if (ref)
9443 offset = GET2(cc, 1) << 1;
9444else
9445 cc += IMM2_SIZE;
9446type = cc[1 + IMM2_SIZE];
9447
9448SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even);
9449minimize = (type & 0x1) != 0;
9450switch(type)
9451 {
9452 case OP_CRSTAR:
9453 case OP_CRMINSTAR:
9454 min = 0;
9455 max = 0;
9456 cc += 1 + IMM2_SIZE + 1;
9457 break;
9458 case OP_CRPLUS:
9459 case OP_CRMINPLUS:
9460 min = 1;
9461 max = 0;
9462 cc += 1 + IMM2_SIZE + 1;
9463 break;
9464 case OP_CRQUERY:
9465 case OP_CRMINQUERY:
9466 min = 0;
9467 max = 1;
9468 cc += 1 + IMM2_SIZE + 1;
9469 break;
9470 case OP_CRRANGE:
9471 case OP_CRMINRANGE:
9472 min = GET2(cc, 1 + IMM2_SIZE + 1);
9473 max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE);
9474 cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE;
9475 break;
9476 default:
9477 SLJIT_UNREACHABLE();
9478 break;
9479 }
9480
9481if (!minimize)
9482 {
9483 if (min == 0)
9484 {
9485 allocate_stack(common, 2);
9486 if (ref)
9487 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9488 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9489 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9490 /* Temporary release of STR_PTR. */
9491 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9492 /* Handles both invalid and empty cases. Since the minimum repeat,
9493 is zero the invalid case is basically the same as an empty case. */
9494 if (ref)
9495 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9496 else
9497 {
9498 compile_dnref_search(common, ccbegin, NULL);
9499 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9500 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9501 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9502 }
9503 /* Restore if not zero length. */
9504 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
9505 }
9506 else
9507 {
9508 allocate_stack(common, 1);
9509 if (ref)
9510 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9511 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9512 if (ref)
9513 {
9514 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9515 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9516 }
9517 else
9518 {
9519 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9520 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9521 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
9522 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9523 }
9524 }
9525
9526 if (min > 1 || max > 1)
9527 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0);
9528
9529 label = LABEL();
9530 if (!ref)
9531 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
9532 compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
9533
9534 if (min > 1 || max > 1)
9535 {
9536 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
9537 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9538 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
9539 if (min > 1)
9540 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label);
9541 if (max > 1)
9542 {
9543 jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
9544 allocate_stack(common, 1);
9545 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9546 JUMPTO(SLJIT_JUMP, label);
9547 JUMPHERE(jump);
9548 }
9549 }
9550
9551 if (max == 0)
9552 {
9553 /* Includes min > 1 case as well. */
9554 allocate_stack(common, 1);
9555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9556 JUMPTO(SLJIT_JUMP, label);
9557 }
9558
9559 JUMPHERE(zerolength);
9560 BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9561
9562 count_match(common);
9563 return cc;
9564 }
9565
9566allocate_stack(common, ref ? 2 : 3);
9567if (ref)
9568 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
9569OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
9570if (type != OP_CRMINSTAR)
9571 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
9572
9573if (min == 0)
9574 {
9575 /* Handles both invalid and empty cases. Since the minimum repeat,
9576 is zero the invalid case is basically the same as an empty case. */
9577 if (ref)
9578 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9579 else
9580 {
9581 compile_dnref_search(common, ccbegin, NULL);
9582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9583 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9584 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9585 }
9586 /* Length is non-zero, we can match real repeats. */
9587 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9588 jump = JUMP(SLJIT_JUMP);
9589 }
9590else
9591 {
9592 if (ref)
9593 {
9594 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
9595 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
9596 }
9597 else
9598 {
9599 compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
9600 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
9601 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
9602 zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
9603 }
9604 }
9605
9606BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
9607if (max > 0)
9608 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
9609
9610if (!ref)
9611 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
9612compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
9613OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9614
9615if (min > 1)
9616 {
9617 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
9618 OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
9619 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9620 CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath);
9621 }
9622else if (max > 0)
9623 OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1);
9624
9625if (jump != NULL)
9626 JUMPHERE(jump);
9627JUMPHERE(zerolength);
9628
9629count_match(common);
9630return cc;
9631}
9632
9633static SLJIT_INLINE PCRE2_SPTR compile_recurse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9634{
9635DEFINE_COMPILER;
9636backtrack_common *backtrack;
9637recurse_entry *entry = common->entries;
9638recurse_entry *prev = NULL;
9639sljit_sw start = GET(cc, 1);
9640PCRE2_SPTR start_cc;
9641BOOL needs_control_head;
9642
9643PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL);
9644
9645/* Inlining simple patterns. */
9646if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack)
9647 {
9648 start_cc = common->start + start;
9649 compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack);
9650 BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE;
9651 return cc + 1 + LINK_SIZE;
9652 }
9653
9654while (entry != NULL)
9655 {
9656 if (entry->start == start)
9657 break;
9658 prev = entry;
9659 entry = entry->next;
9660 }
9661
9662if (entry == NULL)
9663 {
9664 entry = sljit_alloc_memory(compiler, sizeof(recurse_entry));
9665 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9666 return NULL;
9667 entry->next = NULL;
9668 entry->entry_label = NULL;
9669 entry->backtrack_label = NULL;
9670 entry->entry_calls = NULL;
9671 entry->backtrack_calls = NULL;
9672 entry->start = start;
9673
9674 if (prev != NULL)
9675 prev->next = entry;
9676 else
9677 common->entries = entry;
9678 }
9679
9680BACKTRACK_AS(recurse_backtrack)->entry = entry;
9681
9682if (entry->entry_label == NULL)
9683 add_jump(compiler, &entry->entry_calls, JUMP(SLJIT_FAST_CALL));
9684else
9685 JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
9686/* Leave if the match is failed. */
9687add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
9688BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
9689return cc + 1 + LINK_SIZE;
9690}
9691
9692static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
9693{
9694PCRE2_SPTR begin;
9695PCRE2_SIZE *ovector;
9696sljit_u32 oveccount, capture_top;
9697
9698if (arguments->callout == NULL)
9699 return 0;
9700
9701SLJIT_COMPILE_ASSERT(sizeof (PCRE2_SIZE) <= sizeof (sljit_sw), pcre2_size_must_be_lower_than_sljit_sw_size);
9702
9703begin = arguments->begin;
9704ovector = (PCRE2_SIZE*)(callout_block + 1);
9705oveccount = callout_block->capture_top;
9706
9707SLJIT_ASSERT(oveccount >= 1);
9708
9709callout_block->version = 2;
9710callout_block->callout_flags = 0;
9711
9712/* Offsets in subject. */
9713callout_block->subject_length = arguments->end - arguments->begin;
9714callout_block->start_match = jit_ovector[0] - begin;
9715callout_block->current_position = (PCRE2_SPTR)callout_block->offset_vector - begin;
9716callout_block->subject = begin;
9717
9718/* Convert and copy the JIT offset vector to the ovector array. */
9719callout_block->capture_top = 1;
9720callout_block->offset_vector = ovector;
9721
9722ovector[0] = PCRE2_UNSET;
9723ovector[1] = PCRE2_UNSET;
9724ovector += 2;
9725jit_ovector += 2;
9726capture_top = 1;
9727
9728/* Convert pointers to sizes. */
9729while (--oveccount != 0)
9730 {
9731 capture_top++;
9732
9733 ovector[0] = (PCRE2_SIZE)(jit_ovector[0] - begin);
9734 ovector[1] = (PCRE2_SIZE)(jit_ovector[1] - begin);
9735
9736 if (ovector[0] != PCRE2_UNSET)
9737 callout_block->capture_top = capture_top;
9738
9739 ovector += 2;
9740 jit_ovector += 2;
9741 }
9742
9743return (arguments->callout)(callout_block, arguments->callout_data);
9744}
9745
9746#define CALLOUT_ARG_OFFSET(arg) \
9747 SLJIT_OFFSETOF(pcre2_callout_block, arg)
9748
9749static SLJIT_INLINE PCRE2_SPTR compile_callout_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
9750{
9751DEFINE_COMPILER;
9752backtrack_common *backtrack;
9753sljit_s32 mov_opcode;
9754unsigned int callout_length = (*cc == OP_CALLOUT)
9755 ? PRIV(OP_lengths)[OP_CALLOUT] : GET(cc, 1 + 2 * LINK_SIZE);
9756sljit_sw value1;
9757sljit_sw value2;
9758sljit_sw value3;
9759sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
9760
9761PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
9762
9763callout_arg_size = (sizeof(pcre2_callout_block) + callout_arg_size + sizeof(sljit_sw) - 1) / sizeof(sljit_sw);
9764
9765allocate_stack(common, callout_arg_size);
9766
9767SLJIT_ASSERT(common->capture_last_ptr != 0);
9768OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
9769OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
9770value1 = (*cc == OP_CALLOUT) ? cc[1 + 2 * LINK_SIZE] : 0;
9771OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, value1);
9772OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0);
9773OP1(SLJIT_MOV_U32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_top), SLJIT_IMM, common->re->top_bracket + 1);
9774
9775/* These pointer sized fields temporarly stores internal variables. */
9776OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0);
9777
9778if (common->mark_ptr != 0)
9779 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr));
9780mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV;
9781OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 1));
9782OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 1 + LINK_SIZE));
9783
9784if (*cc == OP_CALLOUT)
9785 {
9786 value1 = 0;
9787 value2 = 0;
9788 value3 = 0;
9789 }
9790else
9791 {
9792 value1 = (sljit_sw) (cc + (1 + 4*LINK_SIZE) + 1);
9793 value2 = (callout_length - (1 + 4*LINK_SIZE + 2));
9794 value3 = (sljit_sw) (GET(cc, 1 + 3*LINK_SIZE));
9795 }
9796
9797OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string), SLJIT_IMM, value1);
9798OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_length), SLJIT_IMM, value2);
9799OP1(mov_opcode, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_string_offset), SLJIT_IMM, value3);
9800OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0);
9801
9802SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
9803
9804/* Needed to save important temporary registers. */
9805OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
9806/* SLJIT_R0 = arguments */
9807OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
9808GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07009809sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout));
Elliott Hughes5b808042021-10-01 10:56:10 -07009810OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
9811free_stack(common, callout_arg_size);
9812
9813/* Check return value. */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -07009814OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -07009815add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
9816if (common->abort_label == NULL)
9817 add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
9818else
9819 JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label);
9820return cc + callout_length;
9821}
9822
9823#undef CALLOUT_ARG_SIZE
9824#undef CALLOUT_ARG_OFFSET
9825
9826static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
9827{
9828while (TRUE)
9829 {
9830 switch (*cc)
9831 {
9832 case OP_CALLOUT_STR:
9833 cc += GET(cc, 1 + 2*LINK_SIZE);
9834 break;
9835
9836 case OP_NOT_WORD_BOUNDARY:
9837 case OP_WORD_BOUNDARY:
9838 case OP_CIRC:
9839 case OP_CIRCM:
9840 case OP_DOLL:
9841 case OP_DOLLM:
9842 case OP_CALLOUT:
9843 case OP_ALT:
9844 cc += PRIV(OP_lengths)[*cc];
9845 break;
9846
9847 case OP_KET:
9848 return FALSE;
9849
9850 default:
9851 return TRUE;
9852 }
9853 }
9854}
9855
9856static PCRE2_SPTR compile_assert_matchingpath(compiler_common *common, PCRE2_SPTR cc, assert_backtrack *backtrack, BOOL conditional)
9857{
9858DEFINE_COMPILER;
9859int framesize;
9860int extrasize;
9861BOOL local_quit_available = FALSE;
9862BOOL needs_control_head;
9863int private_data_ptr;
9864backtrack_common altbacktrack;
9865PCRE2_SPTR ccbegin;
9866PCRE2_UCHAR opcode;
9867PCRE2_UCHAR bra = OP_BRA;
9868jump_list *tmp = NULL;
9869jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
9870jump_list **found;
9871/* Saving previous accept variables. */
9872BOOL save_local_quit_available = common->local_quit_available;
9873BOOL save_in_positive_assertion = common->in_positive_assertion;
9874then_trap_backtrack *save_then_trap = common->then_trap;
9875struct sljit_label *save_quit_label = common->quit_label;
9876struct sljit_label *save_accept_label = common->accept_label;
9877jump_list *save_quit = common->quit;
9878jump_list *save_positive_assertion_quit = common->positive_assertion_quit;
9879jump_list *save_accept = common->accept;
9880struct sljit_jump *jump;
9881struct sljit_jump *brajump = NULL;
9882
9883/* Assert captures then. */
9884common->then_trap = NULL;
9885
9886if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
9887 {
9888 SLJIT_ASSERT(!conditional);
9889 bra = *cc;
9890 cc++;
9891 }
9892private_data_ptr = PRIVATE_DATA(cc);
9893SLJIT_ASSERT(private_data_ptr != 0);
9894framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
9895backtrack->framesize = framesize;
9896backtrack->private_data_ptr = private_data_ptr;
9897opcode = *cc;
9898SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT);
9899found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target;
9900ccbegin = cc;
9901cc += GET(cc, 1);
9902
9903if (bra == OP_BRAMINZERO)
9904 {
9905 /* This is a braminzero backtrack path. */
9906 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9907 free_stack(common, 1);
9908 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
9909 }
9910
9911if (framesize < 0)
9912 {
9913 extrasize = 1;
9914 if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
9915 extrasize = 0;
9916
9917 if (needs_control_head)
9918 extrasize++;
9919
9920 if (framesize == no_frame)
9921 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
9922
9923 if (extrasize > 0)
9924 allocate_stack(common, extrasize);
9925
9926 if (needs_control_head)
9927 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9928
9929 if (extrasize > 0)
9930 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9931
9932 if (needs_control_head)
9933 {
9934 SLJIT_ASSERT(extrasize == 2);
9935 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9936 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9937 }
9938 }
9939else
9940 {
9941 extrasize = needs_control_head ? 3 : 2;
9942 allocate_stack(common, framesize + extrasize);
9943
9944 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
9945 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
9946 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
9947 if (needs_control_head)
9948 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
9949 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
9950
9951 if (needs_control_head)
9952 {
9953 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
9954 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
9955 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
9956 }
9957 else
9958 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
9959
9960 init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
9961 }
9962
9963memset(&altbacktrack, 0, sizeof(backtrack_common));
9964if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
9965 {
9966 /* Control verbs cannot escape from these asserts. */
9967 local_quit_available = TRUE;
9968 common->local_quit_available = TRUE;
9969 common->quit_label = NULL;
9970 common->quit = NULL;
9971 }
9972
9973common->in_positive_assertion = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK);
9974common->positive_assertion_quit = NULL;
9975
9976while (1)
9977 {
9978 common->accept_label = NULL;
9979 common->accept = NULL;
9980 altbacktrack.top = NULL;
9981 altbacktrack.topbacktracks = NULL;
9982
9983 if (*ccbegin == OP_ALT && extrasize > 0)
9984 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
9985
9986 altbacktrack.cc = ccbegin;
9987 compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
9988 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
9989 {
9990 if (local_quit_available)
9991 {
9992 common->local_quit_available = save_local_quit_available;
9993 common->quit_label = save_quit_label;
9994 common->quit = save_quit;
9995 }
9996 common->in_positive_assertion = save_in_positive_assertion;
9997 common->then_trap = save_then_trap;
9998 common->accept_label = save_accept_label;
9999 common->positive_assertion_quit = save_positive_assertion_quit;
10000 common->accept = save_accept;
10001 return NULL;
10002 }
10003 common->accept_label = LABEL();
10004 if (common->accept != NULL)
10005 set_jumps(common->accept, common->accept_label);
10006
10007 /* Reset stack. */
10008 if (framesize < 0)
10009 {
10010 if (framesize == no_frame)
10011 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10012 else if (extrasize > 0)
10013 free_stack(common, extrasize);
10014
10015 if (needs_control_head)
10016 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10017 }
10018 else
10019 {
10020 if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional)
10021 {
10022 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10023 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10024 if (needs_control_head)
10025 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10026 }
10027 else
10028 {
10029 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10030 if (needs_control_head)
10031 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
10032 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10033 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10034 }
10035 }
10036
10037 if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)
10038 {
10039 /* We know that STR_PTR was stored on the top of the stack. */
10040 if (conditional)
10041 {
10042 if (extrasize > 0)
10043 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
10044 }
10045 else if (bra == OP_BRAZERO)
10046 {
10047 if (framesize < 0)
10048 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10049 else
10050 {
10051 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10052 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize));
10053 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10054 }
10055 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10056 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10057 }
10058 else if (framesize >= 0)
10059 {
10060 /* For OP_BRA and OP_BRAMINZERO. */
10061 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1));
10062 }
10063 }
10064 add_jump(compiler, found, JUMP(SLJIT_JUMP));
10065
10066 compile_backtrackingpath(common, altbacktrack.top);
10067 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10068 {
10069 if (local_quit_available)
10070 {
10071 common->local_quit_available = save_local_quit_available;
10072 common->quit_label = save_quit_label;
10073 common->quit = save_quit;
10074 }
10075 common->in_positive_assertion = save_in_positive_assertion;
10076 common->then_trap = save_then_trap;
10077 common->accept_label = save_accept_label;
10078 common->positive_assertion_quit = save_positive_assertion_quit;
10079 common->accept = save_accept;
10080 return NULL;
10081 }
10082 set_jumps(altbacktrack.topbacktracks, LABEL());
10083
10084 if (*cc != OP_ALT)
10085 break;
10086
10087 ccbegin = cc;
10088 cc += GET(cc, 1);
10089 }
10090
10091if (local_quit_available)
10092 {
10093 SLJIT_ASSERT(common->positive_assertion_quit == NULL);
10094 /* Makes the check less complicated below. */
10095 common->positive_assertion_quit = common->quit;
10096 }
10097
10098/* None of them matched. */
10099if (common->positive_assertion_quit != NULL)
10100 {
10101 jump = JUMP(SLJIT_JUMP);
10102 set_jumps(common->positive_assertion_quit, LABEL());
10103 SLJIT_ASSERT(framesize != no_stack);
10104 if (framesize < 0)
10105 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw));
10106 else
10107 {
10108 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10109 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10110 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (extrasize + 1) * sizeof(sljit_sw));
10111 }
10112 JUMPHERE(jump);
10113 }
10114
10115if (needs_control_head)
10116 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
10117
10118if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
10119 {
10120 /* Assert is failed. */
10121 if ((conditional && extrasize > 0) || bra == OP_BRAZERO)
10122 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10123
10124 if (framesize < 0)
10125 {
10126 /* The topmost item should be 0. */
10127 if (bra == OP_BRAZERO)
10128 {
10129 if (extrasize == 2)
10130 free_stack(common, 1);
10131 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10132 }
10133 else if (extrasize > 0)
10134 free_stack(common, extrasize);
10135 }
10136 else
10137 {
10138 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10139 /* The topmost item should be 0. */
10140 if (bra == OP_BRAZERO)
10141 {
10142 free_stack(common, framesize + extrasize - 1);
10143 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10144 }
10145 else
10146 free_stack(common, framesize + extrasize);
10147 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10148 }
10149 jump = JUMP(SLJIT_JUMP);
10150 if (bra != OP_BRAZERO)
10151 add_jump(compiler, target, jump);
10152
10153 /* Assert is successful. */
10154 set_jumps(tmp, LABEL());
10155 if (framesize < 0)
10156 {
10157 /* We know that STR_PTR was stored on the top of the stack. */
10158 if (extrasize > 0)
10159 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize));
10160
10161 /* Keep the STR_PTR on the top of the stack. */
10162 if (bra == OP_BRAZERO)
10163 {
10164 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10165 if (extrasize == 2)
10166 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10167 }
10168 else if (bra == OP_BRAMINZERO)
10169 {
10170 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
10171 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10172 }
10173 }
10174 else
10175 {
10176 if (bra == OP_BRA)
10177 {
10178 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10179 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
10180 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
10181 }
10182 else
10183 {
10184 /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
10185 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
10186 if (extrasize == 2)
10187 {
10188 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10189 if (bra == OP_BRAMINZERO)
10190 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10191 }
10192 else
10193 {
10194 SLJIT_ASSERT(extrasize == 3);
10195 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10196 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
10197 }
10198 }
10199 }
10200
10201 if (bra == OP_BRAZERO)
10202 {
10203 backtrack->matchingpath = LABEL();
10204 SET_LABEL(jump, backtrack->matchingpath);
10205 }
10206 else if (bra == OP_BRAMINZERO)
10207 {
10208 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10209 JUMPHERE(brajump);
10210 if (framesize >= 0)
10211 {
10212 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10213 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10214 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
10215 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
10216 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10217 }
10218 set_jumps(backtrack->common.topbacktracks, LABEL());
10219 }
10220 }
10221else
10222 {
10223 /* AssertNot is successful. */
10224 if (framesize < 0)
10225 {
10226 if (extrasize > 0)
10227 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10228
10229 if (bra != OP_BRA)
10230 {
10231 if (extrasize == 2)
10232 free_stack(common, 1);
10233 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10234 }
10235 else if (extrasize > 0)
10236 free_stack(common, extrasize);
10237 }
10238 else
10239 {
10240 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10241 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1));
10242 /* The topmost item should be 0. */
10243 if (bra != OP_BRA)
10244 {
10245 free_stack(common, framesize + extrasize - 1);
10246 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
10247 }
10248 else
10249 free_stack(common, framesize + extrasize);
10250 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
10251 }
10252
10253 if (bra == OP_BRAZERO)
10254 backtrack->matchingpath = LABEL();
10255 else if (bra == OP_BRAMINZERO)
10256 {
10257 JUMPTO(SLJIT_JUMP, backtrack->matchingpath);
10258 JUMPHERE(brajump);
10259 }
10260
10261 if (bra != OP_BRA)
10262 {
10263 SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
10264 set_jumps(backtrack->common.topbacktracks, LABEL());
10265 backtrack->common.topbacktracks = NULL;
10266 }
10267 }
10268
10269if (local_quit_available)
10270 {
10271 common->local_quit_available = save_local_quit_available;
10272 common->quit_label = save_quit_label;
10273 common->quit = save_quit;
10274 }
10275common->in_positive_assertion = save_in_positive_assertion;
10276common->then_trap = save_then_trap;
10277common->accept_label = save_accept_label;
10278common->positive_assertion_quit = save_positive_assertion_quit;
10279common->accept = save_accept;
10280return cc + 1 + LINK_SIZE;
10281}
10282
10283static SLJIT_INLINE void match_once_common(compiler_common *common, PCRE2_UCHAR ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
10284{
10285DEFINE_COMPILER;
10286int stacksize;
10287
10288if (framesize < 0)
10289 {
10290 if (framesize == no_frame)
10291 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10292 else
10293 {
10294 stacksize = needs_control_head ? 1 : 0;
10295 if (ket != OP_KET || has_alternatives)
10296 stacksize++;
10297
10298 if (stacksize > 0)
10299 free_stack(common, stacksize);
10300 }
10301
10302 if (needs_control_head)
10303 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1));
10304
10305 /* TMP2 which is set here used by OP_KETRMAX below. */
10306 if (ket == OP_KETRMAX)
10307 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10308 else if (ket == OP_KETRMIN)
10309 {
10310 /* Move the STR_PTR to the private_data_ptr. */
10311 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
10312 }
10313 }
10314else
10315 {
10316 stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1;
10317 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw));
10318 if (needs_control_head)
10319 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
10320
10321 if (ket == OP_KETRMAX)
10322 {
10323 /* TMP2 which is set here used by OP_KETRMAX below. */
10324 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10325 }
10326 }
10327if (needs_control_head)
10328 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
10329}
10330
10331static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr)
10332{
10333DEFINE_COMPILER;
10334
10335if (common->capture_last_ptr != 0)
10336 {
10337 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
10338 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
10339 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10340 stacksize++;
10341 }
10342if (common->optimized_cbracket[offset >> 1] == 0)
10343 {
10344 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
10345 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
10346 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10347 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10348 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10350 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
10351 stacksize += 2;
10352 }
10353return stacksize;
10354}
10355
10356static PCRE2_SPTR SLJIT_FUNC do_script_run(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10357{
10358 if (PRIV(script_run)(ptr, endptr, FALSE))
10359 return endptr;
10360 return NULL;
10361}
10362
10363#ifdef SUPPORT_UNICODE
10364
10365static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr)
10366{
10367 if (PRIV(script_run)(ptr, endptr, TRUE))
10368 return endptr;
10369 return NULL;
10370}
10371
10372#endif /* SUPPORT_UNICODE */
10373
10374static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
10375{
10376DEFINE_COMPILER;
10377
10378SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
10379
10380OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10381#ifdef SUPPORT_UNICODE
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070010382sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
10383 common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
Elliott Hughes5b808042021-10-01 10:56:10 -070010384#else
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070010385sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
Elliott Hughes5b808042021-10-01 10:56:10 -070010386#endif
10387
10388OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
10389add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
10390}
10391
10392/*
10393 Handling bracketed expressions is probably the most complex part.
10394
10395 Stack layout naming characters:
10396 S - Push the current STR_PTR
10397 0 - Push a 0 (NULL)
10398 A - Push the current STR_PTR. Needed for restoring the STR_PTR
10399 before the next alternative. Not pushed if there are no alternatives.
10400 M - Any values pushed by the current alternative. Can be empty, or anything.
10401 C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack.
10402 L - Push the previous local (pointed by localptr) to the stack
10403 () - opional values stored on the stack
10404 ()* - optonal, can be stored multiple times
10405
10406 The following list shows the regular expression templates, their PCRE byte codes
10407 and stack layout supported by pcre-sljit.
10408
10409 (?:) OP_BRA | OP_KET A M
10410 () OP_CBRA | OP_KET C M
10411 (?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )*
10412 OP_SBRA | OP_KETRMAX 0 L M S ( L M S )*
10413 (?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )*
10414 OP_SBRA | OP_KETRMIN 0 L M S ( L M S )*
10415 ()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )*
10416 OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )*
10417 ()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )*
10418 OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )*
10419 (?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 )
10420 (?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 )
10421 ()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 )
10422 ()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 )
10423 (?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )*
10424 OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )*
10425 (?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )*
10426 OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )*
10427 ()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )*
10428 OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )*
10429 ()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )*
10430 OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )*
10431
10432
10433 Stack layout naming characters:
10434 A - Push the alternative index (starting from 0) on the stack.
10435 Not pushed if there is no alternatives.
10436 M - Any values pushed by the current alternative. Can be empty, or anything.
10437
10438 The next list shows the possible content of a bracket:
10439 (|) OP_*BRA | OP_ALT ... M A
10440 (?()|) OP_*COND | OP_ALT M A
10441 (?>|) OP_ONCE | OP_ALT ... [stack trace] M A
10442 Or nothing, if trace is unnecessary
10443*/
10444
10445static PCRE2_SPTR compile_bracket_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
10446{
10447DEFINE_COMPILER;
10448backtrack_common *backtrack;
10449PCRE2_UCHAR opcode;
10450int private_data_ptr = 0;
10451int offset = 0;
10452int i, stacksize;
10453int repeat_ptr = 0, repeat_length = 0;
10454int repeat_type = 0, repeat_count = 0;
10455PCRE2_SPTR ccbegin;
10456PCRE2_SPTR matchingpath;
10457PCRE2_SPTR slot;
10458PCRE2_UCHAR bra = OP_BRA;
10459PCRE2_UCHAR ket;
10460assert_backtrack *assert;
10461BOOL has_alternatives;
10462BOOL needs_control_head = FALSE;
10463struct sljit_jump *jump;
10464struct sljit_jump *skip;
10465struct sljit_label *rmax_label = NULL;
10466struct sljit_jump *braminzero = NULL;
10467
10468PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL);
10469
10470if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
10471 {
10472 bra = *cc;
10473 cc++;
10474 opcode = *cc;
10475 }
10476
10477opcode = *cc;
10478ccbegin = cc;
10479matchingpath = bracketend(cc) - 1 - LINK_SIZE;
10480ket = *matchingpath;
10481if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0)
10482 {
10483 repeat_ptr = PRIVATE_DATA(matchingpath);
10484 repeat_length = PRIVATE_DATA(matchingpath + 1);
10485 repeat_type = PRIVATE_DATA(matchingpath + 2);
10486 repeat_count = PRIVATE_DATA(matchingpath + 3);
10487 SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0);
10488 if (repeat_type == OP_UPTO)
10489 ket = OP_KETRMAX;
10490 if (repeat_type == OP_MINUPTO)
10491 ket = OP_KETRMIN;
10492 }
10493
10494matchingpath = ccbegin + 1 + LINK_SIZE;
10495SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN);
10496SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX)));
10497cc += GET(cc, 1);
10498
10499has_alternatives = *cc == OP_ALT;
10500if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
10501 {
10502 SLJIT_COMPILE_ASSERT(OP_DNRREF == OP_RREF + 1 && OP_FALSE == OP_RREF + 2 && OP_TRUE == OP_RREF + 3,
10503 compile_time_checks_must_be_grouped_together);
10504 has_alternatives = ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL) ? FALSE : TRUE;
10505 }
10506
10507if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
10508 opcode = OP_SCOND;
10509
10510if (opcode == OP_CBRA || opcode == OP_SCBRA)
10511 {
10512 /* Capturing brackets has a pre-allocated space. */
10513 offset = GET2(ccbegin, 1 + LINK_SIZE);
10514 if (common->optimized_cbracket[offset] == 0)
10515 {
10516 private_data_ptr = OVECTOR_PRIV(offset);
10517 offset <<= 1;
10518 }
10519 else
10520 {
10521 offset <<= 1;
10522 private_data_ptr = OVECTOR(offset);
10523 }
10524 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10525 matchingpath += IMM2_SIZE;
10526 }
10527else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10528 {
10529 /* Other brackets simply allocate the next entry. */
10530 private_data_ptr = PRIVATE_DATA(ccbegin);
10531 SLJIT_ASSERT(private_data_ptr != 0);
10532 BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr;
10533 if (opcode == OP_ONCE)
10534 BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head);
10535 }
10536
10537/* Instructions before the first alternative. */
10538stacksize = 0;
10539if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10540 stacksize++;
10541if (bra == OP_BRAZERO)
10542 stacksize++;
10543
10544if (stacksize > 0)
10545 allocate_stack(common, stacksize);
10546
10547stacksize = 0;
10548if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO))
10549 {
10550 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10551 stacksize++;
10552 }
10553
10554if (bra == OP_BRAZERO)
10555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10556
10557if (bra == OP_BRAMINZERO)
10558 {
10559 /* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */
10560 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
10561 if (ket != OP_KETRMIN)
10562 {
10563 free_stack(common, 1);
10564 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10565 }
10566 else if (opcode == OP_ONCE || opcode >= OP_SBRA)
10567 {
10568 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10569 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10570 /* Nothing stored during the first run. */
10571 skip = JUMP(SLJIT_JUMP);
10572 JUMPHERE(jump);
10573 /* Checking zero-length iteration. */
10574 if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10575 {
10576 /* When we come from outside, private_data_ptr contains the previous STR_PTR. */
10577 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10578 }
10579 else
10580 {
10581 /* Except when the whole stack frame must be saved. */
10582 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10583 braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2));
10584 }
10585 JUMPHERE(skip);
10586 }
10587 else
10588 {
10589 jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
10590 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
10591 JUMPHERE(jump);
10592 }
10593 }
10594
10595if (repeat_type != 0)
10596 {
10597 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count);
10598 if (repeat_type == OP_EXACT)
10599 rmax_label = LABEL();
10600 }
10601
10602if (ket == OP_KETRMIN)
10603 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10604
10605if (ket == OP_KETRMAX)
10606 {
10607 rmax_label = LABEL();
10608 if (has_alternatives && opcode >= OP_BRA && opcode < OP_SBRA && repeat_type == 0)
10609 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label;
10610 }
10611
10612/* Handling capturing brackets and alternatives. */
10613if (opcode == OP_ONCE)
10614 {
10615 stacksize = 0;
10616 if (needs_control_head)
10617 {
10618 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
10619 stacksize++;
10620 }
10621
10622 if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0)
10623 {
10624 /* Neither capturing brackets nor recursions are found in the block. */
10625 if (ket == OP_KETRMIN)
10626 {
10627 stacksize += 2;
10628 if (!needs_control_head)
10629 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10630 }
10631 else
10632 {
10633 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10634 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
10635 if (ket == OP_KETRMAX || has_alternatives)
10636 stacksize++;
10637 }
10638
10639 if (stacksize > 0)
10640 allocate_stack(common, stacksize);
10641
10642 stacksize = 0;
10643 if (needs_control_head)
10644 {
10645 stacksize++;
10646 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10647 }
10648
10649 if (ket == OP_KETRMIN)
10650 {
10651 if (needs_control_head)
10652 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10654 if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame)
10655 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw));
10656 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0);
10657 }
10658 else if (ket == OP_KETRMAX || has_alternatives)
10659 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10660 }
10661 else
10662 {
10663 if (ket != OP_KET || has_alternatives)
10664 stacksize++;
10665
10666 stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1;
10667 allocate_stack(common, stacksize);
10668
10669 if (needs_control_head)
10670 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10671
10672 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10673 OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
10674
10675 stacksize = needs_control_head ? 1 : 0;
10676 if (ket != OP_KET || has_alternatives)
10677 {
10678 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10679 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10680 stacksize++;
10681 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10682 }
10683 else
10684 {
10685 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
10686 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0);
10687 }
10688 init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1);
10689 }
10690 }
10691else if (opcode == OP_CBRA || opcode == OP_SCBRA)
10692 {
10693 /* Saving the previous values. */
10694 if (common->optimized_cbracket[offset >> 1] != 0)
10695 {
10696 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset));
10697 allocate_stack(common, 2);
10698 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10699 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
10700 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10701 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
10702 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
10703 }
10704 else
10705 {
10706 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10707 allocate_stack(common, 1);
10708 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10709 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10710 }
10711 }
10712else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
10713 {
10714 /* Saving the previous value. */
10715 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10716 allocate_stack(common, 1);
10717 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
10718 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10719 }
10720else if (has_alternatives)
10721 {
10722 /* Pushing the starting string pointer. */
10723 allocate_stack(common, 1);
10724 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
10725 }
10726
10727/* Generating code for the first alternative. */
10728if (opcode == OP_COND || opcode == OP_SCOND)
10729 {
10730 if (*matchingpath == OP_CREF)
10731 {
10732 SLJIT_ASSERT(has_alternatives);
10733 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed),
10734 CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
10735 matchingpath += 1 + IMM2_SIZE;
10736 }
10737 else if (*matchingpath == OP_DNCREF)
10738 {
10739 SLJIT_ASSERT(has_alternatives);
10740
10741 i = GET2(matchingpath, 1 + IMM2_SIZE);
10742 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10743 OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
10744 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1));
10745 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10746 slot += common->name_entry_size;
10747 i--;
10748 while (i-- > 0)
10749 {
10750 OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
10751 OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0);
10752 slot += common->name_entry_size;
10753 }
10754 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
10755 add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO));
10756 matchingpath += 1 + 2 * IMM2_SIZE;
10757 }
10758 else if ((*matchingpath >= OP_RREF && *matchingpath <= OP_TRUE) || *matchingpath == OP_FAIL)
10759 {
10760 /* Never has other case. */
10761 BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
10762 SLJIT_ASSERT(!has_alternatives);
10763
10764 if (*matchingpath == OP_TRUE)
10765 {
10766 stacksize = 1;
10767 matchingpath++;
10768 }
10769 else if (*matchingpath == OP_FALSE || *matchingpath == OP_FAIL)
10770 stacksize = 0;
10771 else if (*matchingpath == OP_RREF)
10772 {
10773 stacksize = GET2(matchingpath, 1);
10774 if (common->currententry == NULL)
10775 stacksize = 0;
10776 else if (stacksize == RREF_ANY)
10777 stacksize = 1;
10778 else if (common->currententry->start == 0)
10779 stacksize = stacksize == 0;
10780 else
10781 stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10782
10783 if (stacksize != 0)
10784 matchingpath += 1 + IMM2_SIZE;
10785 }
10786 else
10787 {
10788 if (common->currententry == NULL || common->currententry->start == 0)
10789 stacksize = 0;
10790 else
10791 {
10792 stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
10793 slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
10794 i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
10795 while (stacksize > 0)
10796 {
10797 if ((int)GET2(slot, 0) == i)
10798 break;
10799 slot += common->name_entry_size;
10800 stacksize--;
10801 }
10802 }
10803
10804 if (stacksize != 0)
10805 matchingpath += 1 + 2 * IMM2_SIZE;
10806 }
10807
10808 /* The stacksize == 0 is a common "else" case. */
10809 if (stacksize == 0)
10810 {
10811 if (*cc == OP_ALT)
10812 {
10813 matchingpath = cc + 1 + LINK_SIZE;
10814 cc += GET(cc, 1);
10815 }
10816 else
10817 matchingpath = cc;
10818 }
10819 }
10820 else
10821 {
10822 SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT);
10823 /* Similar code as PUSH_BACKTRACK macro. */
10824 assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack));
10825 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10826 return NULL;
10827 memset(assert, 0, sizeof(assert_backtrack));
10828 assert->common.cc = matchingpath;
10829 BACKTRACK_AS(bracket_backtrack)->u.assert = assert;
10830 matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE);
10831 }
10832 }
10833
10834compile_matchingpath(common, matchingpath, cc, backtrack);
10835if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
10836 return NULL;
10837
10838if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
10839 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10840
10841if (opcode == OP_ONCE)
10842 match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
10843
10844if (opcode == OP_SCRIPT_RUN)
10845 match_script_run_common(common, private_data_ptr, backtrack);
10846
10847stacksize = 0;
10848if (repeat_type == OP_MINUPTO)
10849 {
10850 /* We need to preserve the counter. TMP2 will be used below. */
10851 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10852 stacksize++;
10853 }
10854if (ket != OP_KET || bra != OP_BRA)
10855 stacksize++;
10856if (offset != 0)
10857 {
10858 if (common->capture_last_ptr != 0)
10859 stacksize++;
10860 if (common->optimized_cbracket[offset >> 1] == 0)
10861 stacksize += 2;
10862 }
10863if (has_alternatives && opcode != OP_ONCE)
10864 stacksize++;
10865
10866if (stacksize > 0)
10867 allocate_stack(common, stacksize);
10868
10869stacksize = 0;
10870if (repeat_type == OP_MINUPTO)
10871 {
10872 /* TMP2 was set above. */
10873 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
10874 stacksize++;
10875 }
10876
10877if (ket != OP_KET || bra != OP_BRA)
10878 {
10879 if (ket != OP_KET)
10880 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
10881 else
10882 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10883 stacksize++;
10884 }
10885
10886if (offset != 0)
10887 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
10888
10889/* Skip and count the other alternatives. */
10890i = 1;
10891while (*cc == OP_ALT)
10892 {
10893 cc += GET(cc, 1);
10894 i++;
10895 }
10896
10897if (has_alternatives)
10898 {
10899 if (opcode != OP_ONCE)
10900 {
10901 if (i <= 3)
10902 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
10903 else
10904 BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
10905 }
10906 if (ket != OP_KETRMAX)
10907 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10908 }
10909
10910/* Must be after the matchingpath label. */
10911if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0)
10912 {
10913 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
10914 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
10915 }
10916
10917if (ket == OP_KETRMAX)
10918 {
10919 if (repeat_type != 0)
10920 {
10921 if (has_alternatives)
10922 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10923 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10924 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10925 /* Drop STR_PTR for greedy plus quantifier. */
10926 if (opcode != OP_ONCE)
10927 free_stack(common, 1);
10928 }
10929 else if (opcode < OP_BRA || opcode >= OP_SBRA)
10930 {
10931 if (has_alternatives)
10932 BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL();
10933
10934 /* Checking zero-length iteration. */
10935 if (opcode != OP_ONCE)
10936 {
10937 /* This case includes opcodes such as OP_SCRIPT_RUN. */
10938 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label);
10939 /* Drop STR_PTR for greedy plus quantifier. */
10940 if (bra != OP_BRAZERO)
10941 free_stack(common, 1);
10942 }
10943 else
10944 /* TMP2 must contain the starting STR_PTR. */
10945 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label);
10946 }
10947 else
10948 JUMPTO(SLJIT_JUMP, rmax_label);
10949 BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL();
10950 }
10951
10952if (repeat_type == OP_EXACT)
10953 {
10954 count_match(common);
10955 OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
10956 JUMPTO(SLJIT_NOT_ZERO, rmax_label);
10957 }
10958else if (repeat_type == OP_UPTO)
10959 {
10960 /* We need to preserve the counter. */
10961 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
10962 allocate_stack(common, 1);
10963 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
10964 }
10965
10966if (bra == OP_BRAZERO)
10967 BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL();
10968
10969if (bra == OP_BRAMINZERO)
10970 {
10971 /* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */
10972 JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath);
10973 if (braminzero != NULL)
10974 {
10975 JUMPHERE(braminzero);
10976 /* We need to release the end pointer to perform the
10977 backtrack for the zero-length iteration. When
10978 framesize is < 0, OP_ONCE will do the release itself. */
10979 if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0)
10980 {
10981 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
10982 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
10983 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (BACKTRACK_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
10984 }
10985 else if (ket == OP_KETRMIN && opcode != OP_ONCE)
10986 free_stack(common, 1);
10987 }
10988 /* Continue to the normal backtrack. */
10989 }
10990
10991if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO)
10992 count_match(common);
10993
10994cc += 1 + LINK_SIZE;
10995
10996if (opcode == OP_ONCE)
10997 {
10998 /* We temporarily encode the needs_control_head in the lowest bit.
10999 Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns
11000 the same value for small signed numbers (including negative numbers). */
11001 BACKTRACK_AS(bracket_backtrack)->u.framesize = (int)((unsigned)BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0);
11002 }
11003return cc + repeat_length;
11004}
11005
11006static PCRE2_SPTR compile_bracketpos_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11007{
11008DEFINE_COMPILER;
11009backtrack_common *backtrack;
11010PCRE2_UCHAR opcode;
11011int private_data_ptr;
11012int cbraprivptr = 0;
11013BOOL needs_control_head;
11014int framesize;
11015int stacksize;
11016int offset = 0;
11017BOOL zero = FALSE;
11018PCRE2_SPTR ccbegin = NULL;
11019int stack; /* Also contains the offset of control head. */
11020struct sljit_label *loop = NULL;
11021struct jump_list *emptymatch = NULL;
11022
11023PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL);
11024if (*cc == OP_BRAPOSZERO)
11025 {
11026 zero = TRUE;
11027 cc++;
11028 }
11029
11030opcode = *cc;
11031private_data_ptr = PRIVATE_DATA(cc);
11032SLJIT_ASSERT(private_data_ptr != 0);
11033BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr;
11034switch(opcode)
11035 {
11036 case OP_BRAPOS:
11037 case OP_SBRAPOS:
11038 ccbegin = cc + 1 + LINK_SIZE;
11039 break;
11040
11041 case OP_CBRAPOS:
11042 case OP_SCBRAPOS:
11043 offset = GET2(cc, 1 + LINK_SIZE);
11044 /* This case cannot be optimized in the same was as
11045 normal capturing brackets. */
11046 SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
11047 cbraprivptr = OVECTOR_PRIV(offset);
11048 offset <<= 1;
11049 ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE;
11050 break;
11051
11052 default:
11053 SLJIT_UNREACHABLE();
11054 break;
11055 }
11056
11057framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
11058BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize;
11059if (framesize < 0)
11060 {
11061 if (offset != 0)
11062 {
11063 stacksize = 2;
11064 if (common->capture_last_ptr != 0)
11065 stacksize++;
11066 }
11067 else
11068 stacksize = 1;
11069
11070 if (needs_control_head)
11071 stacksize++;
11072 if (!zero)
11073 stacksize++;
11074
11075 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11076 allocate_stack(common, stacksize);
11077 if (framesize == no_frame)
11078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0);
11079
11080 stack = 0;
11081 if (offset != 0)
11082 {
11083 stack = 2;
11084 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
11085 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
11086 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
11087 if (common->capture_last_ptr != 0)
11088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
11089 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
11090 if (needs_control_head)
11091 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11092 if (common->capture_last_ptr != 0)
11093 {
11094 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
11095 stack = 3;
11096 }
11097 }
11098 else
11099 {
11100 if (needs_control_head)
11101 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11102 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11103 stack = 1;
11104 }
11105
11106 if (needs_control_head)
11107 stack++;
11108 if (!zero)
11109 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1);
11110 if (needs_control_head)
11111 {
11112 stack--;
11113 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11114 }
11115 }
11116else
11117 {
11118 stacksize = framesize + 1;
11119 if (!zero)
11120 stacksize++;
11121 if (needs_control_head)
11122 stacksize++;
11123 if (offset == 0)
11124 stacksize++;
11125 BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize;
11126
11127 allocate_stack(common, stacksize);
11128 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11129 if (needs_control_head)
11130 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
11131 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11132
11133 stack = 0;
11134 if (!zero)
11135 {
11136 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1);
11137 stack = 1;
11138 }
11139 if (needs_control_head)
11140 {
11141 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0);
11142 stack++;
11143 }
11144 if (offset == 0)
11145 {
11146 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0);
11147 stack++;
11148 }
11149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0);
11150 init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize);
11151 stack -= 1 + (offset == 0);
11152 }
11153
11154if (offset != 0)
11155 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11156
11157loop = LABEL();
11158while (*cc != OP_KETRPOS)
11159 {
11160 backtrack->top = NULL;
11161 backtrack->topbacktracks = NULL;
11162 cc += GET(cc, 1);
11163
11164 compile_matchingpath(common, ccbegin, cc, backtrack);
11165 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11166 return NULL;
11167
11168 if (framesize < 0)
11169 {
11170 if (framesize == no_frame)
11171 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11172
11173 if (offset != 0)
11174 {
11175 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11176 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11177 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11178 if (common->capture_last_ptr != 0)
11179 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11180 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11181 }
11182 else
11183 {
11184 if (opcode == OP_SBRAPOS)
11185 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11186 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11187 }
11188
11189 /* Even if the match is empty, we need to reset the control head. */
11190 if (needs_control_head)
11191 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11192
11193 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11194 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11195
11196 if (!zero)
11197 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11198 }
11199 else
11200 {
11201 if (offset != 0)
11202 {
11203 OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11205 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
11206 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0);
11207 if (common->capture_last_ptr != 0)
11208 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1);
11209 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
11210 }
11211 else
11212 {
11213 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11214 OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw));
11215 if (opcode == OP_SBRAPOS)
11216 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11217 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0);
11218 }
11219
11220 /* Even if the match is empty, we need to reset the control head. */
11221 if (needs_control_head)
11222 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack));
11223
11224 if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS)
11225 add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0));
11226
11227 if (!zero)
11228 {
11229 if (framesize < 0)
11230 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0);
11231 else
11232 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
11233 }
11234 }
11235
11236 JUMPTO(SLJIT_JUMP, loop);
11237 flush_stubs(common);
11238
11239 compile_backtrackingpath(common, backtrack->top);
11240 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
11241 return NULL;
11242 set_jumps(backtrack->topbacktracks, LABEL());
11243
11244 if (framesize < 0)
11245 {
11246 if (offset != 0)
11247 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11248 else
11249 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
11250 }
11251 else
11252 {
11253 if (offset != 0)
11254 {
11255 /* Last alternative. */
11256 if (*cc == OP_KETRPOS)
11257 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11258 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr);
11259 }
11260 else
11261 {
11262 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
11263 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2));
11264 }
11265 }
11266
11267 if (*cc == OP_KETRPOS)
11268 break;
11269 ccbegin = cc + 1 + LINK_SIZE;
11270 }
11271
11272/* We don't have to restore the control head in case of a failed match. */
11273
11274backtrack->topbacktracks = NULL;
11275if (!zero)
11276 {
11277 if (framesize < 0)
11278 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
11279 else /* TMP2 is set to [private_data_ptr] above. */
11280 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
11281 }
11282
11283/* None of them matched. */
11284set_jumps(emptymatch, LABEL());
11285count_match(common);
11286return cc + 1 + LINK_SIZE;
11287}
11288
11289static SLJIT_INLINE PCRE2_SPTR get_iterator_parameters(compiler_common *common, PCRE2_SPTR cc, PCRE2_UCHAR *opcode, PCRE2_UCHAR *type, sljit_u32 *max, sljit_u32 *exact, PCRE2_SPTR *end)
11290{
11291int class_len;
11292
11293*opcode = *cc;
11294*exact = 0;
11295
11296if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO)
11297 {
11298 cc++;
11299 *type = OP_CHAR;
11300 }
11301else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI)
11302 {
11303 cc++;
11304 *type = OP_CHARI;
11305 *opcode -= OP_STARI - OP_STAR;
11306 }
11307else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO)
11308 {
11309 cc++;
11310 *type = OP_NOT;
11311 *opcode -= OP_NOTSTAR - OP_STAR;
11312 }
11313else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI)
11314 {
11315 cc++;
11316 *type = OP_NOTI;
11317 *opcode -= OP_NOTSTARI - OP_STAR;
11318 }
11319else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
11320 {
11321 cc++;
11322 *opcode -= OP_TYPESTAR - OP_STAR;
11323 *type = OP_END;
11324 }
11325else
11326 {
11327 SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
11328 *type = *opcode;
11329 cc++;
11330 class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(PCRE2_UCHAR))) : GET(cc, 0);
11331 *opcode = cc[class_len - 1];
11332
11333 if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY)
11334 {
11335 *opcode -= OP_CRSTAR - OP_STAR;
11336 *end = cc + class_len;
11337
11338 if (*opcode == OP_PLUS || *opcode == OP_MINPLUS)
11339 {
11340 *exact = 1;
11341 *opcode -= OP_PLUS - OP_STAR;
11342 }
11343 }
11344 else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
11345 {
11346 *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
11347 *end = cc + class_len;
11348
11349 if (*opcode == OP_POSPLUS)
11350 {
11351 *exact = 1;
11352 *opcode = OP_POSSTAR;
11353 }
11354 }
11355 else
11356 {
11357 SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
11358 *max = GET2(cc, (class_len + IMM2_SIZE));
11359 *exact = GET2(cc, class_len);
11360
11361 if (*max == 0)
11362 {
11363 if (*opcode == OP_CRPOSRANGE)
11364 *opcode = OP_POSSTAR;
11365 else
11366 *opcode -= OP_CRRANGE - OP_STAR;
11367 }
11368 else
11369 {
11370 *max -= *exact;
11371 if (*max == 0)
11372 *opcode = OP_EXACT;
11373 else if (*max == 1)
11374 {
11375 if (*opcode == OP_CRPOSRANGE)
11376 *opcode = OP_POSQUERY;
11377 else
11378 *opcode -= OP_CRRANGE - OP_QUERY;
11379 }
11380 else
11381 {
11382 if (*opcode == OP_CRPOSRANGE)
11383 *opcode = OP_POSUPTO;
11384 else
11385 *opcode -= OP_CRRANGE - OP_UPTO;
11386 }
11387 }
11388 *end = cc + class_len + 2 * IMM2_SIZE;
11389 }
11390 return cc;
11391 }
11392
11393switch(*opcode)
11394 {
11395 case OP_EXACT:
11396 *exact = GET2(cc, 0);
11397 cc += IMM2_SIZE;
11398 break;
11399
11400 case OP_PLUS:
11401 case OP_MINPLUS:
11402 *exact = 1;
11403 *opcode -= OP_PLUS - OP_STAR;
11404 break;
11405
11406 case OP_POSPLUS:
11407 *exact = 1;
11408 *opcode = OP_POSSTAR;
11409 break;
11410
11411 case OP_UPTO:
11412 case OP_MINUPTO:
11413 case OP_POSUPTO:
11414 *max = GET2(cc, 0);
11415 cc += IMM2_SIZE;
11416 break;
11417 }
11418
11419if (*type == OP_END)
11420 {
11421 *type = *cc;
11422 *end = next_opcode(common, cc);
11423 cc++;
11424 return cc;
11425 }
11426
11427*end = cc + 1;
11428#ifdef SUPPORT_UNICODE
11429if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc);
11430#endif
11431return cc;
11432}
11433
11434static PCRE2_SPTR compile_iterator_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11435{
11436DEFINE_COMPILER;
11437backtrack_common *backtrack;
11438PCRE2_UCHAR opcode;
11439PCRE2_UCHAR type;
11440sljit_u32 max = 0, exact;
11441sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1);
11442sljit_s32 early_fail_type;
11443BOOL charpos_enabled;
11444PCRE2_UCHAR charpos_char;
11445unsigned int charpos_othercasebit;
11446PCRE2_SPTR end;
11447jump_list *no_match = NULL;
11448jump_list *no_char1_match = NULL;
11449struct sljit_jump *jump = NULL;
11450struct sljit_label *label;
11451int private_data_ptr = PRIVATE_DATA(cc);
11452int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
11453int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
11454int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
11455int tmp_base, tmp_offset;
11456#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11457BOOL use_tmp;
11458#endif
11459
11460PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL);
11461
11462early_fail_type = (early_fail_ptr & 0x7);
11463early_fail_ptr >>= 3;
11464
11465/* During recursion, these optimizations are disabled. */
11466if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL)
11467 {
11468 early_fail_ptr = 0;
11469 early_fail_type = type_skip;
11470 }
11471
11472SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
11473 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
11474
11475if (early_fail_type == type_fail)
11476 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
11477
11478cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
11479
11480if (type != OP_EXTUNI)
11481 {
11482 tmp_base = TMP3;
11483 tmp_offset = 0;
11484 }
11485else
11486 {
11487 tmp_base = SLJIT_MEM1(SLJIT_SP);
11488 tmp_offset = POSSESSIVE0;
11489 }
11490
11491/* Handle fixed part first. */
11492if (exact > 1)
11493 {
11494 SLJIT_ASSERT(early_fail_ptr == 0);
11495
11496 if (common->mode == PCRE2_JIT_COMPLETE
11497#ifdef SUPPORT_UNICODE
11498 && !common->utf
11499#endif
11500 && type != OP_ANYNL && type != OP_EXTUNI)
11501 {
11502 OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
11503 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
11504 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11505 label = LABEL();
11506 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11507 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11508 JUMPTO(SLJIT_NOT_ZERO, label);
11509 }
11510 else
11511 {
11512 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
11513 label = LABEL();
11514 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11515 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11516 JUMPTO(SLJIT_NOT_ZERO, label);
11517 }
11518 }
11519else if (exact == 1)
11520 {
11521 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
11522
11523 if (early_fail_type == type_fail_range)
11524 {
11525 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
11526 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
11527 OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
11528 OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
11529 add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
11530
11531 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
11532 }
11533 }
11534
11535switch(opcode)
11536 {
11537 case OP_STAR:
11538 case OP_UPTO:
11539 SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR);
11540
11541 if (type == OP_ANYNL || type == OP_EXTUNI)
11542 {
11543 SLJIT_ASSERT(private_data_ptr == 0);
11544 SLJIT_ASSERT(early_fail_ptr == 0);
11545
11546 allocate_stack(common, 2);
11547 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11548 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0);
11549
11550 if (opcode == OP_UPTO)
11551 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max);
11552
11553 label = LABEL();
11554 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11555 if (opcode == OP_UPTO)
11556 {
11557 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0);
11558 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
11559 jump = JUMP(SLJIT_ZERO);
11560 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0);
11561 }
11562
11563 /* We cannot use TMP3 because of allocate_stack. */
11564 allocate_stack(common, 1);
11565 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
11566 JUMPTO(SLJIT_JUMP, label);
11567 if (jump != NULL)
11568 JUMPHERE(jump);
11569 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11570 break;
11571 }
11572#ifdef SUPPORT_UNICODE
11573 else if (type == OP_ALLANY && !common->invalid_utf)
11574#else
11575 else if (type == OP_ALLANY)
11576#endif
11577 {
11578 if (opcode == OP_STAR)
11579 {
11580 if (private_data_ptr == 0)
11581 allocate_stack(common, 2);
11582
11583 OP1(SLJIT_MOV, base, offset0, STR_END, 0);
11584 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11585
11586 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11587 process_partial_match(common);
11588
11589 if (early_fail_ptr != 0)
11590 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11591 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11592 break;
11593 }
11594#ifdef SUPPORT_UNICODE
11595 else if (!common->utf)
11596#else
11597 else
11598#endif
11599 {
11600 if (private_data_ptr == 0)
11601 allocate_stack(common, 2);
11602
11603 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11604 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11605
11606 if (common->mode == PCRE2_JIT_COMPLETE)
11607 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070011608 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -070011609 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11610 }
11611 else
11612 {
11613 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11614 process_partial_match(common);
11615 JUMPHERE(jump);
11616 }
11617
11618 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11619
11620 if (early_fail_ptr != 0)
11621 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11622 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11623 break;
11624 }
11625 }
11626
11627 charpos_enabled = FALSE;
11628 charpos_char = 0;
11629 charpos_othercasebit = 0;
11630
11631 if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI))
11632 {
11633#ifdef SUPPORT_UNICODE
11634 charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]);
11635#else
11636 charpos_enabled = TRUE;
11637#endif
11638 if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1))
11639 {
11640 charpos_othercasebit = char_get_othercase_bit(common, end + 1);
11641 if (charpos_othercasebit == 0)
11642 charpos_enabled = FALSE;
11643 }
11644
11645 if (charpos_enabled)
11646 {
11647 charpos_char = end[1];
11648 /* Consume the OP_CHAR opcode. */
11649 end += 2;
11650#if PCRE2_CODE_UNIT_WIDTH == 8
11651 SLJIT_ASSERT((charpos_othercasebit >> 8) == 0);
11652#elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
11653 SLJIT_ASSERT((charpos_othercasebit >> 9) == 0);
11654 if ((charpos_othercasebit & 0x100) != 0)
11655 charpos_othercasebit = (charpos_othercasebit & 0xff) << 8;
11656#endif
11657 if (charpos_othercasebit != 0)
11658 charpos_char |= charpos_othercasebit;
11659
11660 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE;
11661 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char;
11662 BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit;
11663 }
11664 }
11665
11666 if (charpos_enabled)
11667 {
11668 if (opcode == OP_UPTO)
11669 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1);
11670
11671 /* Search the first instance of charpos_char. */
11672 jump = JUMP(SLJIT_JUMP);
11673 label = LABEL();
11674 if (opcode == OP_UPTO)
11675 {
11676 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11677 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
11678 }
11679 compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
11680 if (early_fail_ptr != 0)
11681 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11682 JUMPHERE(jump);
11683
11684 detect_partial_match(common, &backtrack->topbacktracks);
11685 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11686 if (charpos_othercasebit != 0)
11687 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11688 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11689
11690 if (private_data_ptr == 0)
11691 allocate_stack(common, 2);
11692 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11693 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11694
11695 if (opcode == OP_UPTO)
11696 {
11697 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11698 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11699 }
11700
11701 /* Search the last instance of charpos_char. */
11702 label = LABEL();
11703 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11704 if (early_fail_ptr != 0)
11705 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11706 detect_partial_match(common, &no_match);
11707 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
11708 if (charpos_othercasebit != 0)
11709 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
11710
11711 if (opcode == OP_STAR)
11712 {
11713 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label);
11714 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11715 JUMPTO(SLJIT_JUMP, label);
11716 }
11717 else
11718 {
11719 jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char);
11720 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11721 JUMPHERE(jump);
11722 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11723 JUMPTO(SLJIT_NOT_ZERO, label);
11724 }
11725
11726 set_jumps(no_match, LABEL());
11727 OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1));
11728 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11729 }
11730 else
11731 {
11732 if (private_data_ptr == 0)
11733 allocate_stack(common, 2);
11734
11735 OP1(SLJIT_MOV, base, offset1, STR_PTR, 0);
11736#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11737 use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR);
11738 SLJIT_ASSERT(!use_tmp || tmp_base == TMP3);
11739
11740 if (common->utf)
11741 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11742#endif
11743 if (opcode == OP_UPTO)
11744 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11745
11746 detect_partial_match(common, &no_match);
11747 label = LABEL();
11748 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11749#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11750 if (common->utf)
11751 OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0);
11752#endif
11753
11754 if (opcode == OP_UPTO)
11755 {
11756 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11757 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11758 }
11759
11760 detect_partial_match_to(common, label);
11761 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11762
11763 set_jumps(no_char1_match, LABEL());
11764#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11765 if (common->utf)
11766 {
11767 set_jumps(no_match, LABEL());
11768 if (use_tmp)
11769 {
11770 OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
11771 OP1(SLJIT_MOV, base, offset0, TMP3, 0);
11772 }
11773 else
11774 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
11775 }
11776 else
11777#endif
11778 {
11779 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11780 set_jumps(no_match, LABEL());
11781 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11782 }
11783
11784 if (early_fail_ptr != 0)
11785 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11786 }
11787
11788 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11789 break;
11790
11791 case OP_MINSTAR:
11792 if (private_data_ptr == 0)
11793 allocate_stack(common, 1);
11794 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11795 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11796 if (early_fail_ptr != 0)
11797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11798 break;
11799
11800 case OP_MINUPTO:
11801 SLJIT_ASSERT(early_fail_ptr == 0);
11802 if (private_data_ptr == 0)
11803 allocate_stack(common, 2);
11804 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11805 OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1);
11806 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11807 break;
11808
11809 case OP_QUERY:
11810 case OP_MINQUERY:
11811 SLJIT_ASSERT(early_fail_ptr == 0);
11812 if (private_data_ptr == 0)
11813 allocate_stack(common, 1);
11814 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
11815 if (opcode == OP_QUERY)
11816 compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE);
11817 BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL();
11818 break;
11819
11820 case OP_EXACT:
11821 break;
11822
11823 case OP_POSSTAR:
11824#if defined SUPPORT_UNICODE
11825 if (type == OP_ALLANY && !common->invalid_utf)
11826#else
11827 if (type == OP_ALLANY)
11828#endif
11829 {
11830 OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0);
11831 process_partial_match(common);
11832 if (early_fail_ptr != 0)
11833 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0);
11834 break;
11835 }
11836
11837#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11838 if (common->utf)
11839 {
11840 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11841 detect_partial_match(common, &no_match);
11842 label = LABEL();
11843 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11844 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11845 detect_partial_match_to(common, label);
11846
11847 set_jumps(no_match, LABEL());
11848 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11849 if (early_fail_ptr != 0)
11850 {
11851 if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3)
11852 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0);
11853 else
11854 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11855 }
11856 break;
11857 }
11858#endif
11859
11860 detect_partial_match(common, &no_match);
11861 label = LABEL();
11862 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11863 detect_partial_match_to(common, label);
11864 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11865
11866 set_jumps(no_char1_match, LABEL());
11867 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11868 set_jumps(no_match, LABEL());
11869 if (early_fail_ptr != 0)
11870 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
11871 break;
11872
11873 case OP_POSUPTO:
11874 SLJIT_ASSERT(early_fail_ptr == 0);
11875#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
11876 if (common->utf)
11877 {
11878 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11879 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11880
11881 detect_partial_match(common, &no_match);
11882 label = LABEL();
11883 compile_char1_matchingpath(common, type, cc, &no_match, FALSE);
11884 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0);
11885 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11886 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11887 detect_partial_match_to(common, label);
11888
11889 set_jumps(no_match, LABEL());
11890 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
11891 break;
11892 }
11893#endif
11894
11895 if (type == OP_ALLANY)
11896 {
11897 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max));
11898
11899 if (common->mode == PCRE2_JIT_COMPLETE)
11900 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070011901 OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
Elliott Hughes5b808042021-10-01 10:56:10 -070011902 CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
11903 }
11904 else
11905 {
11906 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0);
11907 process_partial_match(common);
11908 JUMPHERE(jump);
11909 }
11910 break;
11911 }
11912
11913 OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
11914
11915 detect_partial_match(common, &no_match);
11916 label = LABEL();
11917 compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE);
11918 OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
11919 add_jump(compiler, &no_match, JUMP(SLJIT_ZERO));
11920 detect_partial_match_to(common, label);
11921 OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11922
11923 set_jumps(no_char1_match, LABEL());
11924 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
11925 set_jumps(no_match, LABEL());
11926 break;
11927
11928 case OP_POSQUERY:
11929 SLJIT_ASSERT(early_fail_ptr == 0);
11930 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11931 compile_char1_matchingpath(common, type, cc, &no_match, TRUE);
11932 OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
11933 set_jumps(no_match, LABEL());
11934 OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
11935 break;
11936
11937 default:
11938 SLJIT_UNREACHABLE();
11939 break;
11940 }
11941
11942count_match(common);
11943return end;
11944}
11945
11946static SLJIT_INLINE PCRE2_SPTR compile_fail_accept_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
11947{
11948DEFINE_COMPILER;
11949backtrack_common *backtrack;
11950
11951PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
11952
11953if (*cc == OP_FAIL)
11954 {
11955 add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11956 return cc + 1;
11957 }
11958
11959if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
11960 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
11961
11962if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
11963 {
11964 /* No need to check notempty conditions. */
11965 if (common->accept_label == NULL)
11966 add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP));
11967 else
11968 JUMPTO(SLJIT_JUMP, common->accept_label);
11969 return cc + 1;
11970 }
11971
11972if (common->accept_label == NULL)
11973 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)));
11974else
11975 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label);
11976
11977if (HAS_VIRTUAL_REGISTERS)
11978 {
11979 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
11980 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
11981 }
11982else
11983 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
11984
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070011985OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
Elliott Hughes5b808042021-10-01 10:56:10 -070011986add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070011987OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
Elliott Hughes5b808042021-10-01 10:56:10 -070011988if (common->accept_label == NULL)
11989 add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
11990else
11991 JUMPTO(SLJIT_ZERO, common->accept_label);
11992
11993OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str));
11994if (common->accept_label == NULL)
11995 add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
11996else
11997 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
11998add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
11999return cc + 1;
12000}
12001
12002static SLJIT_INLINE PCRE2_SPTR compile_close_matchingpath(compiler_common *common, PCRE2_SPTR cc)
12003{
12004DEFINE_COMPILER;
12005int offset = GET2(cc, 1);
12006BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0;
12007
12008/* Data will be discarded anyway... */
12009if (common->currententry != NULL)
12010 return cc + 1 + IMM2_SIZE;
12011
12012if (!optimized_cbracket)
12013 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset));
12014offset <<= 1;
12015OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12016if (!optimized_cbracket)
12017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12018return cc + 1 + IMM2_SIZE;
12019}
12020
12021static SLJIT_INLINE PCRE2_SPTR compile_control_verb_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
12022{
12023DEFINE_COMPILER;
12024backtrack_common *backtrack;
12025PCRE2_UCHAR opcode = *cc;
12026PCRE2_SPTR ccend = cc + 1;
12027
12028if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG ||
12029 opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG)
12030 ccend += 2 + cc[1];
12031
12032PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
12033
12034if (opcode == OP_SKIP)
12035 {
12036 allocate_stack(common, 1);
12037 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12038 return ccend;
12039 }
12040
12041if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG)
12042 {
12043 if (HAS_VIRTUAL_REGISTERS)
12044 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12045 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12046 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12047 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12048 }
12049
12050return ccend;
12051}
12052
12053static PCRE2_UCHAR then_trap_opcode[1] = { OP_THEN_TRAP };
12054
12055static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12056{
12057DEFINE_COMPILER;
12058backtrack_common *backtrack;
12059BOOL needs_control_head;
12060int size;
12061
12062PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12063common->then_trap = BACKTRACK_AS(then_trap_backtrack);
12064BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12065BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start);
12066BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head);
12067
12068size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12069size = 3 + (size < 0 ? 0 : size);
12070
12071OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12072allocate_stack(common, size);
12073if (size > 3)
12074 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw));
12075else
12076 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12077OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start);
12078OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap);
12079OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0);
12080
12081size = BACKTRACK_AS(then_trap_backtrack)->framesize;
12082if (size >= 0)
12083 init_frame(common, cc, ccend, size - 1, 0);
12084}
12085
12086static void compile_matchingpath(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, backtrack_common *parent)
12087{
12088DEFINE_COMPILER;
12089backtrack_common *backtrack;
12090BOOL has_then_trap = FALSE;
12091then_trap_backtrack *save_then_trap = NULL;
12092
12093SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS));
12094
12095if (common->has_then && common->then_offsets[cc - common->start] != 0)
12096 {
12097 SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0);
12098 has_then_trap = TRUE;
12099 save_then_trap = common->then_trap;
12100 /* Tail item on backtrack. */
12101 compile_then_trap_matchingpath(common, cc, ccend, parent);
12102 }
12103
12104while (cc < ccend)
12105 {
12106 switch(*cc)
12107 {
12108 case OP_SOD:
12109 case OP_SOM:
12110 case OP_NOT_WORD_BOUNDARY:
12111 case OP_WORD_BOUNDARY:
12112 case OP_EODN:
12113 case OP_EOD:
12114 case OP_DOLL:
12115 case OP_DOLLM:
12116 case OP_CIRC:
12117 case OP_CIRCM:
12118 case OP_REVERSE:
12119 cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12120 break;
12121
12122 case OP_NOT_DIGIT:
12123 case OP_DIGIT:
12124 case OP_NOT_WHITESPACE:
12125 case OP_WHITESPACE:
12126 case OP_NOT_WORDCHAR:
12127 case OP_WORDCHAR:
12128 case OP_ANY:
12129 case OP_ALLANY:
12130 case OP_ANYBYTE:
12131 case OP_NOTPROP:
12132 case OP_PROP:
12133 case OP_ANYNL:
12134 case OP_NOT_HSPACE:
12135 case OP_HSPACE:
12136 case OP_NOT_VSPACE:
12137 case OP_VSPACE:
12138 case OP_EXTUNI:
12139 case OP_NOT:
12140 case OP_NOTI:
12141 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12142 break;
12143
12144 case OP_SET_SOM:
12145 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12146 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
12147 allocate_stack(common, 1);
12148 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
12149 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
12150 cc++;
12151 break;
12152
12153 case OP_CHAR:
12154 case OP_CHARI:
12155 if (common->mode == PCRE2_JIT_COMPLETE)
12156 cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12157 else
12158 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12159 break;
12160
12161 case OP_STAR:
12162 case OP_MINSTAR:
12163 case OP_PLUS:
12164 case OP_MINPLUS:
12165 case OP_QUERY:
12166 case OP_MINQUERY:
12167 case OP_UPTO:
12168 case OP_MINUPTO:
12169 case OP_EXACT:
12170 case OP_POSSTAR:
12171 case OP_POSPLUS:
12172 case OP_POSQUERY:
12173 case OP_POSUPTO:
12174 case OP_STARI:
12175 case OP_MINSTARI:
12176 case OP_PLUSI:
12177 case OP_MINPLUSI:
12178 case OP_QUERYI:
12179 case OP_MINQUERYI:
12180 case OP_UPTOI:
12181 case OP_MINUPTOI:
12182 case OP_EXACTI:
12183 case OP_POSSTARI:
12184 case OP_POSPLUSI:
12185 case OP_POSQUERYI:
12186 case OP_POSUPTOI:
12187 case OP_NOTSTAR:
12188 case OP_NOTMINSTAR:
12189 case OP_NOTPLUS:
12190 case OP_NOTMINPLUS:
12191 case OP_NOTQUERY:
12192 case OP_NOTMINQUERY:
12193 case OP_NOTUPTO:
12194 case OP_NOTMINUPTO:
12195 case OP_NOTEXACT:
12196 case OP_NOTPOSSTAR:
12197 case OP_NOTPOSPLUS:
12198 case OP_NOTPOSQUERY:
12199 case OP_NOTPOSUPTO:
12200 case OP_NOTSTARI:
12201 case OP_NOTMINSTARI:
12202 case OP_NOTPLUSI:
12203 case OP_NOTMINPLUSI:
12204 case OP_NOTQUERYI:
12205 case OP_NOTMINQUERYI:
12206 case OP_NOTUPTOI:
12207 case OP_NOTMINUPTOI:
12208 case OP_NOTEXACTI:
12209 case OP_NOTPOSSTARI:
12210 case OP_NOTPOSPLUSI:
12211 case OP_NOTPOSQUERYI:
12212 case OP_NOTPOSUPTOI:
12213 case OP_TYPESTAR:
12214 case OP_TYPEMINSTAR:
12215 case OP_TYPEPLUS:
12216 case OP_TYPEMINPLUS:
12217 case OP_TYPEQUERY:
12218 case OP_TYPEMINQUERY:
12219 case OP_TYPEUPTO:
12220 case OP_TYPEMINUPTO:
12221 case OP_TYPEEXACT:
12222 case OP_TYPEPOSSTAR:
12223 case OP_TYPEPOSPLUS:
12224 case OP_TYPEPOSQUERY:
12225 case OP_TYPEPOSUPTO:
12226 cc = compile_iterator_matchingpath(common, cc, parent);
12227 break;
12228
12229 case OP_CLASS:
12230 case OP_NCLASS:
12231 if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
12232 cc = compile_iterator_matchingpath(common, cc, parent);
12233 else
12234 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12235 break;
12236
12237#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
12238 case OP_XCLASS:
12239 if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
12240 cc = compile_iterator_matchingpath(common, cc, parent);
12241 else
12242 cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
12243 break;
12244#endif
12245
12246 case OP_REF:
12247 case OP_REFI:
12248 if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
12249 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12250 else
12251 {
12252 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12253 cc += 1 + IMM2_SIZE;
12254 }
12255 break;
12256
12257 case OP_DNREF:
12258 case OP_DNREFI:
12259 if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
12260 cc = compile_ref_iterator_matchingpath(common, cc, parent);
12261 else
12262 {
12263 compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
12264 compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
12265 cc += 1 + 2 * IMM2_SIZE;
12266 }
12267 break;
12268
12269 case OP_RECURSE:
12270 cc = compile_recurse_matchingpath(common, cc, parent);
12271 break;
12272
12273 case OP_CALLOUT:
12274 case OP_CALLOUT_STR:
12275 cc = compile_callout_matchingpath(common, cc, parent);
12276 break;
12277
12278 case OP_ASSERT:
12279 case OP_ASSERT_NOT:
12280 case OP_ASSERTBACK:
12281 case OP_ASSERTBACK_NOT:
12282 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12283 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12284 break;
12285
12286 case OP_BRAMINZERO:
12287 PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc);
12288 cc = bracketend(cc + 1);
12289 if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN)
12290 {
12291 allocate_stack(common, 1);
12292 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
12293 }
12294 else
12295 {
12296 allocate_stack(common, 2);
12297 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12298 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0);
12299 }
12300 BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL();
12301 count_match(common);
12302 break;
12303
12304 case OP_ASSERT_NA:
12305 case OP_ASSERTBACK_NA:
12306 case OP_ONCE:
12307 case OP_SCRIPT_RUN:
12308 case OP_BRA:
12309 case OP_CBRA:
12310 case OP_COND:
12311 case OP_SBRA:
12312 case OP_SCBRA:
12313 case OP_SCOND:
12314 cc = compile_bracket_matchingpath(common, cc, parent);
12315 break;
12316
12317 case OP_BRAZERO:
12318 if (cc[1] > OP_ASSERTBACK_NOT)
12319 cc = compile_bracket_matchingpath(common, cc, parent);
12320 else
12321 {
12322 PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc);
12323 cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE);
12324 }
12325 break;
12326
12327 case OP_BRAPOS:
12328 case OP_CBRAPOS:
12329 case OP_SBRAPOS:
12330 case OP_SCBRAPOS:
12331 case OP_BRAPOSZERO:
12332 cc = compile_bracketpos_matchingpath(common, cc, parent);
12333 break;
12334
12335 case OP_MARK:
12336 PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc);
12337 SLJIT_ASSERT(common->mark_ptr != 0);
12338 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
12339 allocate_stack(common, common->has_skip_arg ? 5 : 1);
12340 if (HAS_VIRTUAL_REGISTERS)
12341 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
12342 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0);
12343 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2));
12344 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0);
12345 OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0);
12346 if (common->has_skip_arg)
12347 {
12348 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
12349 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0);
12350 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark);
12351 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2));
12352 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
12353 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0);
12354 }
12355 cc += 1 + 2 + cc[1];
12356 break;
12357
12358 case OP_PRUNE:
12359 case OP_PRUNE_ARG:
12360 case OP_SKIP:
12361 case OP_SKIP_ARG:
12362 case OP_THEN:
12363 case OP_THEN_ARG:
12364 case OP_COMMIT:
12365 case OP_COMMIT_ARG:
12366 cc = compile_control_verb_matchingpath(common, cc, parent);
12367 break;
12368
12369 case OP_FAIL:
12370 case OP_ACCEPT:
12371 case OP_ASSERT_ACCEPT:
12372 cc = compile_fail_accept_matchingpath(common, cc, parent);
12373 break;
12374
12375 case OP_CLOSE:
12376 cc = compile_close_matchingpath(common, cc);
12377 break;
12378
12379 case OP_SKIPZERO:
12380 cc = bracketend(cc + 1);
12381 break;
12382
12383 default:
12384 SLJIT_UNREACHABLE();
12385 return;
12386 }
12387 if (cc == NULL)
12388 return;
12389 }
12390
12391if (has_then_trap)
12392 {
12393 /* Head item on backtrack. */
12394 PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc);
12395 BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode;
12396 BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap;
12397 common->then_trap = save_then_trap;
12398 }
12399SLJIT_ASSERT(cc == ccend);
12400}
12401
12402#undef PUSH_BACKTRACK
12403#undef PUSH_BACKTRACK_NOVALUE
12404#undef BACKTRACK_AS
12405
12406#define COMPILE_BACKTRACKINGPATH(current) \
12407 do \
12408 { \
12409 compile_backtrackingpath(common, (current)); \
12410 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \
12411 return; \
12412 } \
12413 while (0)
12414
12415#define CURRENT_AS(type) ((type *)current)
12416
12417static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12418{
12419DEFINE_COMPILER;
12420PCRE2_SPTR cc = current->cc;
12421PCRE2_UCHAR opcode;
12422PCRE2_UCHAR type;
12423sljit_u32 max = 0, exact;
12424struct sljit_label *label = NULL;
12425struct sljit_jump *jump = NULL;
12426jump_list *jumplist = NULL;
12427PCRE2_SPTR end;
12428int private_data_ptr = PRIVATE_DATA(cc);
12429int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
12430int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
12431int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
12432
12433cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
12434
12435switch(opcode)
12436 {
12437 case OP_STAR:
12438 case OP_UPTO:
12439 if (type == OP_ANYNL || type == OP_EXTUNI)
12440 {
12441 SLJIT_ASSERT(private_data_ptr == 0);
12442 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12443 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12444 free_stack(common, 1);
12445 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12446 }
12447 else
12448 {
12449 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled)
12450 {
12451 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12452 OP1(SLJIT_MOV, TMP2, 0, base, offset1);
12453 OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
12454
12455 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
12456 label = LABEL();
12457 OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1));
12458 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12459 if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0)
12460 OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit);
12461 CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12462 move_back(common, NULL, TRUE);
12463 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label);
12464 }
12465 else
12466 {
12467 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12468 jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1);
12469 move_back(common, NULL, TRUE);
12470 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12471 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12472 }
12473 JUMPHERE(jump);
12474 if (private_data_ptr == 0)
12475 free_stack(common, 2);
12476 }
12477 break;
12478
12479 case OP_MINSTAR:
12480 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12481 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12482 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12483 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12484 set_jumps(jumplist, LABEL());
12485 if (private_data_ptr == 0)
12486 free_stack(common, 1);
12487 break;
12488
12489 case OP_MINUPTO:
12490 OP1(SLJIT_MOV, TMP1, 0, base, offset1);
12491 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12492 OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
12493 add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO));
12494
12495 OP1(SLJIT_MOV, base, offset1, TMP1, 0);
12496 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12497 OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
12498 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12499
12500 set_jumps(jumplist, LABEL());
12501 if (private_data_ptr == 0)
12502 free_stack(common, 2);
12503 break;
12504
12505 case OP_QUERY:
12506 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12507 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12508 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12509 jump = JUMP(SLJIT_JUMP);
12510 set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL());
12511 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12512 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12513 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12514 JUMPHERE(jump);
12515 if (private_data_ptr == 0)
12516 free_stack(common, 1);
12517 break;
12518
12519 case OP_MINQUERY:
12520 OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
12521 OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0);
12522 jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12523 compile_char1_matchingpath(common, type, cc, &jumplist, TRUE);
12524 JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath);
12525 set_jumps(jumplist, LABEL());
12526 JUMPHERE(jump);
12527 if (private_data_ptr == 0)
12528 free_stack(common, 1);
12529 break;
12530
12531 case OP_EXACT:
12532 case OP_POSSTAR:
12533 case OP_POSQUERY:
12534 case OP_POSUPTO:
12535 break;
12536
12537 default:
12538 SLJIT_UNREACHABLE();
12539 break;
12540 }
12541
12542set_jumps(current->topbacktracks, LABEL());
12543}
12544
12545static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12546{
12547DEFINE_COMPILER;
12548PCRE2_SPTR cc = current->cc;
12549BOOL ref = (*cc == OP_REF || *cc == OP_REFI);
12550PCRE2_UCHAR type;
12551
12552type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
12553
12554if ((type & 0x1) == 0)
12555 {
12556 /* Maximize case. */
12557 set_jumps(current->topbacktracks, LABEL());
12558 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12559 free_stack(common, 1);
12560 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12561 return;
12562 }
12563
12564OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12565CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
12566set_jumps(current->topbacktracks, LABEL());
12567free_stack(common, ref ? 2 : 3);
12568}
12569
12570static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12571{
12572DEFINE_COMPILER;
12573recurse_entry *entry;
12574
12575if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
12576 {
12577 entry = CURRENT_AS(recurse_backtrack)->entry;
12578 if (entry->backtrack_label == NULL)
12579 add_jump(compiler, &entry->backtrack_calls, JUMP(SLJIT_FAST_CALL));
12580 else
12581 JUMPTO(SLJIT_FAST_CALL, entry->backtrack_label);
12582 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(recurse_backtrack)->matchingpath);
12583 }
12584else
12585 compile_backtrackingpath(common, current->top);
12586
12587set_jumps(current->topbacktracks, LABEL());
12588}
12589
12590static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12591{
12592DEFINE_COMPILER;
12593PCRE2_SPTR cc = current->cc;
12594PCRE2_UCHAR bra = OP_BRA;
12595struct sljit_jump *brajump = NULL;
12596
12597SLJIT_ASSERT(*cc != OP_BRAMINZERO);
12598if (*cc == OP_BRAZERO)
12599 {
12600 bra = *cc;
12601 cc++;
12602 }
12603
12604if (bra == OP_BRAZERO)
12605 {
12606 SLJIT_ASSERT(current->topbacktracks == NULL);
12607 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12608 }
12609
12610if (CURRENT_AS(assert_backtrack)->framesize < 0)
12611 {
12612 set_jumps(current->topbacktracks, LABEL());
12613
12614 if (bra == OP_BRAZERO)
12615 {
12616 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12617 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12618 free_stack(common, 1);
12619 }
12620 return;
12621 }
12622
12623if (bra == OP_BRAZERO)
12624 {
12625 if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT)
12626 {
12627 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12628 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath);
12629 free_stack(common, 1);
12630 return;
12631 }
12632 free_stack(common, 1);
12633 brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
12634 }
12635
12636if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
12637 {
12638 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr);
12639 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12640 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12641 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
12642 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
12643
12644 set_jumps(current->topbacktracks, LABEL());
12645 }
12646else
12647 set_jumps(current->topbacktracks, LABEL());
12648
12649if (bra == OP_BRAZERO)
12650 {
12651 /* We know there is enough place on the stack. */
12652 OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
12653 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
12654 JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath);
12655 JUMPHERE(brajump);
12656 }
12657}
12658
12659static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current)
12660{
12661DEFINE_COMPILER;
12662int opcode, stacksize, alt_count, alt_max;
12663int offset = 0;
12664int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr;
12665int repeat_ptr = 0, repeat_type = 0, repeat_count = 0;
12666PCRE2_SPTR cc = current->cc;
12667PCRE2_SPTR ccbegin;
12668PCRE2_SPTR ccprev;
12669PCRE2_UCHAR bra = OP_BRA;
12670PCRE2_UCHAR ket;
12671assert_backtrack *assert;
12672BOOL has_alternatives;
12673BOOL needs_control_head = FALSE;
12674struct sljit_jump *brazero = NULL;
12675struct sljit_jump *next_alt = NULL;
12676struct sljit_jump *once = NULL;
12677struct sljit_jump *cond = NULL;
12678struct sljit_label *rmin_label = NULL;
12679struct sljit_label *exact_label = NULL;
12680struct sljit_put_label *put_label = NULL;
12681
12682if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
12683 {
12684 bra = *cc;
12685 cc++;
12686 }
12687
12688opcode = *cc;
12689ccbegin = bracketend(cc) - 1 - LINK_SIZE;
12690ket = *ccbegin;
12691if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0)
12692 {
12693 repeat_ptr = PRIVATE_DATA(ccbegin);
12694 repeat_type = PRIVATE_DATA(ccbegin + 2);
12695 repeat_count = PRIVATE_DATA(ccbegin + 3);
12696 SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0);
12697 if (repeat_type == OP_UPTO)
12698 ket = OP_KETRMAX;
12699 if (repeat_type == OP_MINUPTO)
12700 ket = OP_KETRMIN;
12701 }
12702ccbegin = cc;
12703cc += GET(cc, 1);
12704has_alternatives = *cc == OP_ALT;
12705if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12706 has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL;
12707if (opcode == OP_CBRA || opcode == OP_SCBRA)
12708 offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1;
12709if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
12710 opcode = OP_SCOND;
12711
12712alt_max = has_alternatives ? no_alternatives(ccbegin) : 0;
12713
12714/* Decoding the needs_control_head in framesize. */
12715if (opcode == OP_ONCE)
12716 {
12717 needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0;
12718 CURRENT_AS(bracket_backtrack)->u.framesize >>= 1;
12719 }
12720
12721if (ket != OP_KET && repeat_type != 0)
12722 {
12723 /* TMP1 is used in OP_KETRMIN below. */
12724 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12725 free_stack(common, 1);
12726 if (repeat_type == OP_UPTO)
12727 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1);
12728 else
12729 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
12730 }
12731
12732if (ket == OP_KETRMAX)
12733 {
12734 if (bra == OP_BRAZERO)
12735 {
12736 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12737 free_stack(common, 1);
12738 brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12739 }
12740 }
12741else if (ket == OP_KETRMIN)
12742 {
12743 if (bra != OP_BRAMINZERO)
12744 {
12745 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12746 if (repeat_type != 0)
12747 {
12748 /* TMP1 was set a few lines above. */
12749 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12750 /* Drop STR_PTR for non-greedy plus quantifier. */
12751 if (opcode != OP_ONCE)
12752 free_stack(common, 1);
12753 }
12754 else if (opcode >= OP_SBRA || opcode == OP_ONCE)
12755 {
12756 /* Checking zero-length iteration. */
12757 if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0)
12758 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12759 else
12760 {
12761 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12762 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12763 }
12764 /* Drop STR_PTR for non-greedy plus quantifier. */
12765 if (opcode != OP_ONCE)
12766 free_stack(common, 1);
12767 }
12768 else
12769 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
12770 }
12771 rmin_label = LABEL();
12772 if (repeat_type != 0)
12773 OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12774 }
12775else if (bra == OP_BRAZERO)
12776 {
12777 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12778 free_stack(common, 1);
12779 brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12780 }
12781else if (repeat_type == OP_EXACT)
12782 {
12783 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
12784 exact_label = LABEL();
12785 }
12786
12787if (offset != 0)
12788 {
12789 if (common->capture_last_ptr != 0)
12790 {
12791 SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0);
12792 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12793 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12794 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
12795 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
12796 free_stack(common, 3);
12797 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0);
12798 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0);
12799 }
12800 else if (common->optimized_cbracket[offset >> 1] == 0)
12801 {
12802 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12803 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
12804 free_stack(common, 2);
12805 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
12806 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
12807 }
12808 }
12809
12810if (SLJIT_UNLIKELY(opcode == OP_ONCE))
12811 {
12812 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
12813 {
12814 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12815 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12816 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracket_backtrack)->u.framesize - 1) * sizeof(sljit_sw));
12817 }
12818 once = JUMP(SLJIT_JUMP);
12819 }
12820else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12821 {
12822 if (has_alternatives)
12823 {
12824 /* Always exactly one alternative. */
12825 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12826 free_stack(common, 1);
12827
12828 alt_max = 2;
12829 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12830 }
12831 }
12832else if (has_alternatives)
12833 {
12834 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12835 free_stack(common, 1);
12836
12837 if (alt_max > 3)
12838 {
12839 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
12840
12841 SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label);
12842 sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL());
12843 sljit_emit_op0(compiler, SLJIT_ENDBR);
12844 }
12845 else
12846 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
12847 }
12848
12849COMPILE_BACKTRACKINGPATH(current->top);
12850if (current->topbacktracks)
12851 set_jumps(current->topbacktracks, LABEL());
12852
12853if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
12854 {
12855 /* Conditional block always has at most one alternative. */
12856 if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT)
12857 {
12858 SLJIT_ASSERT(has_alternatives);
12859 assert = CURRENT_AS(bracket_backtrack)->u.assert;
12860 if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK))
12861 {
12862 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
12863 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
12864 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
12865 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
12866 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
12867 }
12868 cond = JUMP(SLJIT_JUMP);
12869 set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL());
12870 }
12871 else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL)
12872 {
12873 SLJIT_ASSERT(has_alternatives);
12874 cond = JUMP(SLJIT_JUMP);
12875 set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL());
12876 }
12877 else
12878 SLJIT_ASSERT(!has_alternatives);
12879 }
12880
12881if (has_alternatives)
12882 {
12883 alt_count = 1;
12884 do
12885 {
12886 current->top = NULL;
12887 current->topbacktracks = NULL;
12888 current->nextbacktracks = NULL;
12889 /* Conditional blocks always have an additional alternative, even if it is empty. */
12890 if (*cc == OP_ALT)
12891 {
12892 ccprev = cc + 1 + LINK_SIZE;
12893 cc += GET(cc, 1);
12894 if (opcode != OP_COND && opcode != OP_SCOND)
12895 {
12896 if (opcode != OP_ONCE)
12897 {
12898 if (private_data_ptr != 0)
12899 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12900 else
12901 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
12902 }
12903 else
12904 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
12905 }
12906 compile_matchingpath(common, ccprev, cc, current);
12907 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
12908 return;
12909
12910 if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
12911 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
12912
12913 if (opcode == OP_SCRIPT_RUN)
12914 match_script_run_common(common, private_data_ptr, current);
12915 }
12916
12917 /* Instructions after the current alternative is successfully matched. */
12918 /* There is a similar code in compile_bracket_matchingpath. */
12919 if (opcode == OP_ONCE)
12920 match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
12921
12922 stacksize = 0;
12923 if (repeat_type == OP_MINUPTO)
12924 {
12925 /* We need to preserve the counter. TMP2 will be used below. */
12926 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr);
12927 stacksize++;
12928 }
12929 if (ket != OP_KET || bra != OP_BRA)
12930 stacksize++;
12931 if (offset != 0)
12932 {
12933 if (common->capture_last_ptr != 0)
12934 stacksize++;
12935 if (common->optimized_cbracket[offset >> 1] == 0)
12936 stacksize += 2;
12937 }
12938 if (opcode != OP_ONCE)
12939 stacksize++;
12940
12941 if (stacksize > 0)
12942 allocate_stack(common, stacksize);
12943
12944 stacksize = 0;
12945 if (repeat_type == OP_MINUPTO)
12946 {
12947 /* TMP2 was set above. */
12948 OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1);
12949 stacksize++;
12950 }
12951
12952 if (ket != OP_KET || bra != OP_BRA)
12953 {
12954 if (ket != OP_KET)
12955 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0);
12956 else
12957 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0);
12958 stacksize++;
12959 }
12960
12961 if (offset != 0)
12962 stacksize = match_capture_common(common, stacksize, offset, private_data_ptr);
12963
12964 if (opcode != OP_ONCE)
12965 {
12966 if (alt_max <= 3)
12967 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count);
12968 else
12969 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize));
12970 }
12971
12972 if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0)
12973 {
12974 /* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */
12975 SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0));
12976 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0);
12977 }
12978
12979 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath);
12980
12981 if (opcode != OP_ONCE)
12982 {
12983 if (alt_max <= 3)
12984 {
12985 JUMPHERE(next_alt);
12986 alt_count++;
12987 if (alt_count < alt_max)
12988 {
12989 SLJIT_ASSERT(alt_count == 2 && alt_max == 3);
12990 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
12991 }
12992 }
12993 else
12994 {
12995 sljit_set_put_label(put_label, LABEL());
12996 sljit_emit_op0(compiler, SLJIT_ENDBR);
12997 }
12998 }
12999
13000 COMPILE_BACKTRACKINGPATH(current->top);
13001 if (current->topbacktracks)
13002 set_jumps(current->topbacktracks, LABEL());
13003 SLJIT_ASSERT(!current->nextbacktracks);
13004 }
13005 while (*cc == OP_ALT);
13006
13007 if (cond != NULL)
13008 {
13009 SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND);
13010 assert = CURRENT_AS(bracket_backtrack)->u.assert;
13011 if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0)
13012 {
13013 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr);
13014 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13015 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-2));
13016 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (assert->framesize - 1) * sizeof(sljit_sw));
13017 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, TMP1, 0);
13018 }
13019 JUMPHERE(cond);
13020 }
13021
13022 /* Free the STR_PTR. */
13023 if (private_data_ptr == 0)
13024 free_stack(common, 1);
13025 }
13026
13027if (offset != 0)
13028 {
13029 /* Using both tmp register is better for instruction scheduling. */
13030 if (common->optimized_cbracket[offset >> 1] != 0)
13031 {
13032 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13033 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13034 free_stack(common, 2);
13035 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13036 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13037 }
13038 else
13039 {
13040 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13041 free_stack(common, 1);
13042 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13043 }
13044 }
13045else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
13046 {
13047 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
13048 free_stack(common, 1);
13049 }
13050else if (opcode == OP_ONCE)
13051 {
13052 cc = ccbegin + GET(ccbegin, 1);
13053 stacksize = needs_control_head ? 1 : 0;
13054
13055 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13056 {
13057 /* Reset head and drop saved frame. */
13058 stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1);
13059 }
13060 else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN))
13061 {
13062 /* The STR_PTR must be released. */
13063 stacksize++;
13064 }
13065
13066 if (stacksize > 0)
13067 free_stack(common, stacksize);
13068
13069 JUMPHERE(once);
13070 /* Restore previous private_data_ptr */
13071 if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0)
13072 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1));
13073 else if (ket == OP_KETRMIN)
13074 {
13075 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13076 /* See the comment below. */
13077 free_stack(common, 2);
13078 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
13079 }
13080 }
13081
13082if (repeat_type == OP_EXACT)
13083 {
13084 OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1);
13085 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0);
13086 CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label);
13087 }
13088else if (ket == OP_KETRMAX)
13089 {
13090 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13091 if (bra != OP_BRAZERO)
13092 free_stack(common, 1);
13093
13094 CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath);
13095 if (bra == OP_BRAZERO)
13096 {
13097 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13098 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13099 JUMPHERE(brazero);
13100 free_stack(common, 1);
13101 }
13102 }
13103else if (ket == OP_KETRMIN)
13104 {
13105 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13106
13107 /* OP_ONCE removes everything in case of a backtrack, so we don't
13108 need to explicitly release the STR_PTR. The extra release would
13109 affect badly the free_stack(2) above. */
13110 if (opcode != OP_ONCE)
13111 free_stack(common, 1);
13112 CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label);
13113 if (opcode == OP_ONCE)
13114 free_stack(common, bra == OP_BRAMINZERO ? 2 : 1);
13115 else if (bra == OP_BRAMINZERO)
13116 free_stack(common, 1);
13117 }
13118else if (bra == OP_BRAZERO)
13119 {
13120 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13121 JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath);
13122 JUMPHERE(brazero);
13123 }
13124}
13125
13126static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13127{
13128DEFINE_COMPILER;
13129int offset;
13130struct sljit_jump *jump;
13131
13132if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
13133 {
13134 if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
13135 {
13136 offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
13137 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13138 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13139 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
13140 if (common->capture_last_ptr != 0)
13141 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
13142 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0);
13143 if (common->capture_last_ptr != 0)
13144 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
13145 }
13146 set_jumps(current->topbacktracks, LABEL());
13147 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13148 return;
13149 }
13150
13151OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr);
13152add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13153OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
13154
13155if (current->topbacktracks)
13156 {
13157 jump = JUMP(SLJIT_JUMP);
13158 set_jumps(current->topbacktracks, LABEL());
13159 /* Drop the stack frame. */
13160 free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
13161 JUMPHERE(jump);
13162 }
13163OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1));
13164}
13165
13166static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13167{
13168assert_backtrack backtrack;
13169
13170current->top = NULL;
13171current->topbacktracks = NULL;
13172current->nextbacktracks = NULL;
13173if (current->cc[1] > OP_ASSERTBACK_NOT)
13174 {
13175 /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
13176 compile_bracket_matchingpath(common, current->cc, current);
13177 compile_bracket_backtrackingpath(common, current->top);
13178 }
13179else
13180 {
13181 memset(&backtrack, 0, sizeof(backtrack));
13182 backtrack.common.cc = current->cc;
13183 backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath;
13184 /* Manual call of compile_assert_matchingpath. */
13185 compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
13186 }
13187SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
13188}
13189
13190static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13191{
13192DEFINE_COMPILER;
13193PCRE2_UCHAR opcode = *current->cc;
13194struct sljit_label *loop;
13195struct sljit_jump *jump;
13196
13197if (opcode == OP_THEN || opcode == OP_THEN_ARG)
13198 {
13199 if (common->then_trap != NULL)
13200 {
13201 SLJIT_ASSERT(common->control_head_ptr != 0);
13202
13203 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13204 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap);
13205 OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start);
13206 jump = JUMP(SLJIT_JUMP);
13207
13208 loop = LABEL();
13209 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13210 JUMPHERE(jump);
13211 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop);
13212 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop);
13213 add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP));
13214 return;
13215 }
13216 else if (!common->local_quit_available && common->in_positive_assertion)
13217 {
13218 add_jump(compiler, &common->positive_assertion_quit, JUMP(SLJIT_JUMP));
13219 return;
13220 }
13221 }
13222
13223if (common->local_quit_available)
13224 {
13225 /* Abort match with a fail. */
13226 if (common->quit_label == NULL)
13227 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13228 else
13229 JUMPTO(SLJIT_JUMP, common->quit_label);
13230 return;
13231 }
13232
13233if (opcode == OP_SKIP_ARG)
13234 {
13235 SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
13236 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
13237 OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013238 sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
Elliott Hughes5b808042021-10-01 10:56:10 -070013239
13240 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
13241 add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
13242 return;
13243 }
13244
13245if (opcode == OP_SKIP)
13246 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13247else
13248 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0);
13249add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
13250}
13251
13252static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13253{
13254DEFINE_COMPILER;
13255struct sljit_jump *jump;
13256int size;
13257
13258if (CURRENT_AS(then_trap_backtrack)->then_trap)
13259 {
13260 common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap;
13261 return;
13262 }
13263
13264size = CURRENT_AS(then_trap_backtrack)->framesize;
13265size = 3 + (size < 0 ? 0 : size);
13266
13267OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3));
13268free_stack(common, size);
13269jump = JUMP(SLJIT_JUMP);
13270
13271set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL());
13272/* STACK_TOP is set by THEN. */
13273if (CURRENT_AS(then_trap_backtrack)->framesize >= 0)
13274 {
13275 add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
13276 OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(then_trap_backtrack)->framesize - 1) * sizeof(sljit_sw));
13277 }
13278OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13279free_stack(common, 3);
13280
13281JUMPHERE(jump);
13282OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0);
13283}
13284
13285static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current)
13286{
13287DEFINE_COMPILER;
13288then_trap_backtrack *save_then_trap = common->then_trap;
13289
13290while (current)
13291 {
13292 if (current->nextbacktracks != NULL)
13293 set_jumps(current->nextbacktracks, LABEL());
13294 switch(*current->cc)
13295 {
13296 case OP_SET_SOM:
13297 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13298 free_stack(common, 1);
13299 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0);
13300 break;
13301
13302 case OP_STAR:
13303 case OP_MINSTAR:
13304 case OP_PLUS:
13305 case OP_MINPLUS:
13306 case OP_QUERY:
13307 case OP_MINQUERY:
13308 case OP_UPTO:
13309 case OP_MINUPTO:
13310 case OP_EXACT:
13311 case OP_POSSTAR:
13312 case OP_POSPLUS:
13313 case OP_POSQUERY:
13314 case OP_POSUPTO:
13315 case OP_STARI:
13316 case OP_MINSTARI:
13317 case OP_PLUSI:
13318 case OP_MINPLUSI:
13319 case OP_QUERYI:
13320 case OP_MINQUERYI:
13321 case OP_UPTOI:
13322 case OP_MINUPTOI:
13323 case OP_EXACTI:
13324 case OP_POSSTARI:
13325 case OP_POSPLUSI:
13326 case OP_POSQUERYI:
13327 case OP_POSUPTOI:
13328 case OP_NOTSTAR:
13329 case OP_NOTMINSTAR:
13330 case OP_NOTPLUS:
13331 case OP_NOTMINPLUS:
13332 case OP_NOTQUERY:
13333 case OP_NOTMINQUERY:
13334 case OP_NOTUPTO:
13335 case OP_NOTMINUPTO:
13336 case OP_NOTEXACT:
13337 case OP_NOTPOSSTAR:
13338 case OP_NOTPOSPLUS:
13339 case OP_NOTPOSQUERY:
13340 case OP_NOTPOSUPTO:
13341 case OP_NOTSTARI:
13342 case OP_NOTMINSTARI:
13343 case OP_NOTPLUSI:
13344 case OP_NOTMINPLUSI:
13345 case OP_NOTQUERYI:
13346 case OP_NOTMINQUERYI:
13347 case OP_NOTUPTOI:
13348 case OP_NOTMINUPTOI:
13349 case OP_NOTEXACTI:
13350 case OP_NOTPOSSTARI:
13351 case OP_NOTPOSPLUSI:
13352 case OP_NOTPOSQUERYI:
13353 case OP_NOTPOSUPTOI:
13354 case OP_TYPESTAR:
13355 case OP_TYPEMINSTAR:
13356 case OP_TYPEPLUS:
13357 case OP_TYPEMINPLUS:
13358 case OP_TYPEQUERY:
13359 case OP_TYPEMINQUERY:
13360 case OP_TYPEUPTO:
13361 case OP_TYPEMINUPTO:
13362 case OP_TYPEEXACT:
13363 case OP_TYPEPOSSTAR:
13364 case OP_TYPEPOSPLUS:
13365 case OP_TYPEPOSQUERY:
13366 case OP_TYPEPOSUPTO:
13367 case OP_CLASS:
13368 case OP_NCLASS:
13369#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
13370 case OP_XCLASS:
13371#endif
13372 compile_iterator_backtrackingpath(common, current);
13373 break;
13374
13375 case OP_REF:
13376 case OP_REFI:
13377 case OP_DNREF:
13378 case OP_DNREFI:
13379 compile_ref_iterator_backtrackingpath(common, current);
13380 break;
13381
13382 case OP_RECURSE:
13383 compile_recurse_backtrackingpath(common, current);
13384 break;
13385
13386 case OP_ASSERT:
13387 case OP_ASSERT_NOT:
13388 case OP_ASSERTBACK:
13389 case OP_ASSERTBACK_NOT:
13390 compile_assert_backtrackingpath(common, current);
13391 break;
13392
13393 case OP_ASSERT_NA:
13394 case OP_ASSERTBACK_NA:
13395 case OP_ONCE:
13396 case OP_SCRIPT_RUN:
13397 case OP_BRA:
13398 case OP_CBRA:
13399 case OP_COND:
13400 case OP_SBRA:
13401 case OP_SCBRA:
13402 case OP_SCOND:
13403 compile_bracket_backtrackingpath(common, current);
13404 break;
13405
13406 case OP_BRAZERO:
13407 if (current->cc[1] > OP_ASSERTBACK_NOT)
13408 compile_bracket_backtrackingpath(common, current);
13409 else
13410 compile_assert_backtrackingpath(common, current);
13411 break;
13412
13413 case OP_BRAPOS:
13414 case OP_CBRAPOS:
13415 case OP_SBRAPOS:
13416 case OP_SCBRAPOS:
13417 case OP_BRAPOSZERO:
13418 compile_bracketpos_backtrackingpath(common, current);
13419 break;
13420
13421 case OP_BRAMINZERO:
13422 compile_braminzero_backtrackingpath(common, current);
13423 break;
13424
13425 case OP_MARK:
13426 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0));
13427 if (common->has_skip_arg)
13428 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13429 free_stack(common, common->has_skip_arg ? 5 : 1);
13430 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0);
13431 if (common->has_skip_arg)
13432 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0);
13433 break;
13434
13435 case OP_THEN:
13436 case OP_THEN_ARG:
13437 case OP_PRUNE:
13438 case OP_PRUNE_ARG:
13439 case OP_SKIP:
13440 case OP_SKIP_ARG:
13441 compile_control_verb_backtrackingpath(common, current);
13442 break;
13443
13444 case OP_COMMIT:
13445 case OP_COMMIT_ARG:
13446 if (!common->local_quit_available)
13447 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13448 if (common->quit_label == NULL)
13449 add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP));
13450 else
13451 JUMPTO(SLJIT_JUMP, common->quit_label);
13452 break;
13453
13454 case OP_CALLOUT:
13455 case OP_CALLOUT_STR:
13456 case OP_FAIL:
13457 case OP_ACCEPT:
13458 case OP_ASSERT_ACCEPT:
13459 set_jumps(current->topbacktracks, LABEL());
13460 break;
13461
13462 case OP_THEN_TRAP:
13463 /* A virtual opcode for then traps. */
13464 compile_then_trap_backtrackingpath(common, current);
13465 break;
13466
13467 default:
13468 SLJIT_UNREACHABLE();
13469 break;
13470 }
13471 current = current->prev;
13472 }
13473common->then_trap = save_then_trap;
13474}
13475
13476static SLJIT_INLINE void compile_recurse(compiler_common *common)
13477{
13478DEFINE_COMPILER;
13479PCRE2_SPTR cc = common->start + common->currententry->start;
13480PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
13481PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013482uint32_t recurse_flags = 0;
13483int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
Elliott Hughes5b808042021-10-01 10:56:10 -070013484int alt_count, alt_max, local_size;
13485backtrack_common altbacktrack;
13486jump_list *match = NULL;
13487struct sljit_jump *next_alt = NULL;
13488struct sljit_jump *accept_exit = NULL;
13489struct sljit_label *quit;
13490struct sljit_put_label *put_label = NULL;
13491
13492/* Recurse captures then. */
13493common->then_trap = NULL;
13494
13495SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS);
13496
13497alt_max = no_alternatives(cc);
13498alt_count = 0;
13499
13500/* Matching path. */
13501SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head_ptr != 0);
13502common->currententry->entry_label = LABEL();
13503set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
13504
13505sljit_emit_fast_enter(compiler, TMP2, 0);
13506count_match(common);
13507
13508local_size = (alt_max > 1) ? 2 : 1;
13509
13510/* (Reversed) stack layout:
13511 [private data][return address][optional: str ptr] ... [optional: alternative index][recursive_head_ptr] */
13512
13513allocate_stack(common, private_data_size + local_size);
13514/* Save return address. */
13515OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
13516
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013517copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
Elliott Hughes5b808042021-10-01 10:56:10 -070013518
13519/* This variable is saved and restored all time when we enter or exit from a recursive context. */
13520OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
13521
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013522if (recurse_flags & recurse_flag_control_head_found)
Elliott Hughes5b808042021-10-01 10:56:10 -070013523 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13524
13525if (alt_max > 1)
13526 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
13527
13528memset(&altbacktrack, 0, sizeof(backtrack_common));
13529common->quit_label = NULL;
13530common->accept_label = NULL;
13531common->quit = NULL;
13532common->accept = NULL;
13533altbacktrack.cc = ccbegin;
13534cc += GET(cc, 1);
13535while (1)
13536 {
13537 altbacktrack.top = NULL;
13538 altbacktrack.topbacktracks = NULL;
13539
13540 if (altbacktrack.cc != ccbegin)
13541 OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13542
13543 compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack);
13544 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13545 return;
13546
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013547 allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
Elliott Hughes5b808042021-10-01 10:56:10 -070013548 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13549
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013550 if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
Elliott Hughes5b808042021-10-01 10:56:10 -070013551 {
13552 if (alt_max > 3)
13553 put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
13554 else
13555 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count);
13556 }
13557
13558 add_jump(compiler, &match, JUMP(SLJIT_JUMP));
13559
13560 if (alt_count == 0)
13561 {
13562 /* Backtracking path entry. */
13563 SLJIT_ASSERT(common->currententry->backtrack_label == NULL);
13564 common->currententry->backtrack_label = LABEL();
13565 set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
13566
13567 sljit_emit_fast_enter(compiler, TMP1, 0);
13568
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013569 if (recurse_flags & recurse_flag_accept_found)
Elliott Hughes5b808042021-10-01 10:56:10 -070013570 accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13571
13572 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
13573 /* Save return address. */
13574 OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
13575
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013576 copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
Elliott Hughes5b808042021-10-01 10:56:10 -070013577
13578 if (alt_max > 1)
13579 {
13580 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
13581 free_stack(common, 2);
13582
13583 if (alt_max > 3)
13584 {
13585 sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0);
13586 sljit_set_put_label(put_label, LABEL());
13587 sljit_emit_op0(compiler, SLJIT_ENDBR);
13588 }
13589 else
13590 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
13591 }
13592 else
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013593 free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
Elliott Hughes5b808042021-10-01 10:56:10 -070013594 }
13595 else if (alt_max > 3)
13596 {
13597 sljit_set_put_label(put_label, LABEL());
13598 sljit_emit_op0(compiler, SLJIT_ENDBR);
13599 }
13600 else
13601 {
13602 JUMPHERE(next_alt);
13603 if (alt_count + 1 < alt_max)
13604 {
13605 SLJIT_ASSERT(alt_count == 1 && alt_max == 3);
13606 next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1);
13607 }
13608 }
13609
13610 alt_count++;
13611
13612 compile_backtrackingpath(common, altbacktrack.top);
13613 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
13614 return;
13615 set_jumps(altbacktrack.topbacktracks, LABEL());
13616
13617 if (*cc != OP_ALT)
13618 break;
13619
13620 altbacktrack.cc = cc + 1 + LINK_SIZE;
13621 cc += GET(cc, 1);
13622 }
13623
13624/* No alternative is matched. */
13625
13626quit = LABEL();
13627
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013628copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
Elliott Hughes5b808042021-10-01 10:56:10 -070013629
13630OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13631free_stack(common, private_data_size + local_size);
13632OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13633OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13634
13635if (common->quit != NULL)
13636 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013637 SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
Elliott Hughes5b808042021-10-01 10:56:10 -070013638
13639 set_jumps(common->quit, LABEL());
13640 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013641 copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
Elliott Hughes5b808042021-10-01 10:56:10 -070013642 JUMPTO(SLJIT_JUMP, quit);
13643 }
13644
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013645if (recurse_flags & recurse_flag_accept_found)
Elliott Hughes5b808042021-10-01 10:56:10 -070013646 {
13647 JUMPHERE(accept_exit);
13648 free_stack(common, 2);
13649
13650 /* Save return address. */
13651 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
13652
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013653 copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
Elliott Hughes5b808042021-10-01 10:56:10 -070013654
13655 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
13656 free_stack(common, private_data_size + local_size);
13657 OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
13658 OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13659 }
13660
13661if (common->accept != NULL)
13662 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013663 SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
Elliott Hughes5b808042021-10-01 10:56:10 -070013664
13665 set_jumps(common->accept, LABEL());
13666
13667 OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
13668 OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
13669
13670 allocate_stack(common, 2);
13671 OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
13672 }
13673
13674set_jumps(match, LABEL());
13675
13676OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
13677
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013678copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
Elliott Hughes5b808042021-10-01 10:56:10 -070013679
13680OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
13681OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
13682OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
13683}
13684
13685#undef COMPILE_BACKTRACKINGPATH
13686#undef CURRENT_AS
13687
13688#define PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS \
13689 (PCRE2_JIT_INVALID_UTF)
13690
13691static int jit_compile(pcre2_code *code, sljit_u32 mode)
13692{
13693pcre2_real_code *re = (pcre2_real_code *)code;
13694struct sljit_compiler *compiler;
13695backtrack_common rootbacktrack;
13696compiler_common common_data;
13697compiler_common *common = &common_data;
13698const sljit_u8 *tables = re->tables;
13699void *allocator_data = &re->memctl;
13700int private_data_size;
13701PCRE2_SPTR ccend;
13702executable_functions *functions;
13703void *executable_func;
13704sljit_uw executable_size;
13705sljit_uw total_length;
13706struct sljit_label *mainloop_label = NULL;
13707struct sljit_label *continue_match_label;
13708struct sljit_label *empty_match_found_label = NULL;
13709struct sljit_label *empty_match_backtrack_label = NULL;
13710struct sljit_label *reset_match_label;
13711struct sljit_label *quit_label;
13712struct sljit_jump *jump;
13713struct sljit_jump *minlength_check_failed = NULL;
13714struct sljit_jump *empty_match = NULL;
13715struct sljit_jump *end_anchor_failed = NULL;
13716jump_list *reqcu_not_found = NULL;
13717
13718SLJIT_ASSERT(tables);
13719
13720#if HAS_VIRTUAL_REGISTERS == 1
13721SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
13722#elif HAS_VIRTUAL_REGISTERS == 0
13723SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
13724#else
13725#error "Invalid value for HAS_VIRTUAL_REGISTERS"
13726#endif
13727
13728memset(&rootbacktrack, 0, sizeof(backtrack_common));
13729memset(common, 0, sizeof(compiler_common));
13730common->re = re;
13731common->name_table = (PCRE2_SPTR)((uint8_t *)re + sizeof(pcre2_real_code));
13732rootbacktrack.cc = common->name_table + re->name_count * re->name_entry_size;
13733
13734#ifdef SUPPORT_UNICODE
13735common->invalid_utf = (mode & PCRE2_JIT_INVALID_UTF) != 0;
13736#endif /* SUPPORT_UNICODE */
13737mode &= ~PUBLIC_JIT_COMPILE_CONFIGURATION_OPTIONS;
13738
13739common->start = rootbacktrack.cc;
13740common->read_only_data_head = NULL;
13741common->fcc = tables + fcc_offset;
13742common->lcc = (sljit_sw)(tables + lcc_offset);
13743common->mode = mode;
13744common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY);
13745common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY);
13746common->nltype = NLTYPE_FIXED;
13747switch(re->newline_convention)
13748 {
13749 case PCRE2_NEWLINE_CR: common->newline = CHAR_CR; break;
13750 case PCRE2_NEWLINE_LF: common->newline = CHAR_NL; break;
13751 case PCRE2_NEWLINE_CRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; break;
13752 case PCRE2_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break;
13753 case PCRE2_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break;
13754 case PCRE2_NEWLINE_NUL: common->newline = CHAR_NUL; break;
13755 default: return PCRE2_ERROR_INTERNAL;
13756 }
13757common->nlmax = READ_CHAR_MAX;
13758common->nlmin = 0;
13759if (re->bsr_convention == PCRE2_BSR_UNICODE)
13760 common->bsr_nltype = NLTYPE_ANY;
13761else if (re->bsr_convention == PCRE2_BSR_ANYCRLF)
13762 common->bsr_nltype = NLTYPE_ANYCRLF;
13763else
13764 {
13765#ifdef BSR_ANYCRLF
13766 common->bsr_nltype = NLTYPE_ANYCRLF;
13767#else
13768 common->bsr_nltype = NLTYPE_ANY;
13769#endif
13770 }
13771common->bsr_nlmax = READ_CHAR_MAX;
13772common->bsr_nlmin = 0;
13773common->endonly = (re->overall_options & PCRE2_DOLLAR_ENDONLY) != 0;
13774common->ctypes = (sljit_sw)(tables + ctypes_offset);
13775common->name_count = re->name_count;
13776common->name_entry_size = re->name_entry_size;
13777common->unset_backref = (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) != 0;
13778common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0;
13779#ifdef SUPPORT_UNICODE
13780/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
13781common->utf = (re->overall_options & PCRE2_UTF) != 0;
13782common->ucp = (re->overall_options & PCRE2_UCP) != 0;
13783if (common->utf)
13784 {
13785 if (common->nltype == NLTYPE_ANY)
13786 common->nlmax = 0x2029;
13787 else if (common->nltype == NLTYPE_ANYCRLF)
13788 common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13789 else
13790 {
13791 /* We only care about the first newline character. */
13792 common->nlmax = common->newline & 0xff;
13793 }
13794
13795 if (common->nltype == NLTYPE_FIXED)
13796 common->nlmin = common->newline & 0xff;
13797 else
13798 common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13799
13800 if (common->bsr_nltype == NLTYPE_ANY)
13801 common->bsr_nlmax = 0x2029;
13802 else
13803 common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL;
13804 common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL;
13805 }
13806else
13807 common->invalid_utf = FALSE;
13808#endif /* SUPPORT_UNICODE */
13809ccend = bracketend(common->start);
13810
13811/* Calculate the local space size on the stack. */
13812common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw);
13813common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, allocator_data);
13814if (!common->optimized_cbracket)
13815 return PCRE2_ERROR_NOMEMORY;
13816#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1
13817memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13818#else
13819memset(common->optimized_cbracket, 1, re->top_bracket + 1);
13820#endif
13821
13822SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET);
13823#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2
13824common->capture_last_ptr = common->ovector_start;
13825common->ovector_start += sizeof(sljit_sw);
13826#endif
13827if (!check_opcode_types(common, common->start, ccend))
13828 {
13829 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13830 return PCRE2_ERROR_NOMEMORY;
13831 }
13832
13833/* Checking flags and updating ovector_start. */
13834if (mode == PCRE2_JIT_COMPLETE && (re->flags & PCRE2_LASTSET) != 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13835 {
13836 common->req_char_ptr = common->ovector_start;
13837 common->ovector_start += sizeof(sljit_sw);
13838 }
13839if (mode != PCRE2_JIT_COMPLETE)
13840 {
13841 common->start_used_ptr = common->ovector_start;
13842 common->ovector_start += sizeof(sljit_sw);
13843 if (mode == PCRE2_JIT_PARTIAL_SOFT)
13844 {
13845 common->hit_start = common->ovector_start;
13846 common->ovector_start += sizeof(sljit_sw);
13847 }
13848 }
13849if ((re->overall_options & (PCRE2_FIRSTLINE | PCRE2_USE_OFFSET_LIMIT)) != 0)
13850 {
13851 common->match_end_ptr = common->ovector_start;
13852 common->ovector_start += sizeof(sljit_sw);
13853 }
13854#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
13855common->control_head_ptr = 1;
13856#endif
13857if (common->control_head_ptr != 0)
13858 {
13859 common->control_head_ptr = common->ovector_start;
13860 common->ovector_start += sizeof(sljit_sw);
13861 }
13862if (common->has_set_som)
13863 {
13864 /* Saving the real start pointer is necessary. */
13865 common->start_ptr = common->ovector_start;
13866 common->ovector_start += sizeof(sljit_sw);
13867 }
13868
13869/* Aligning ovector to even number of sljit words. */
13870if ((common->ovector_start & sizeof(sljit_sw)) != 0)
13871 common->ovector_start += sizeof(sljit_sw);
13872
13873if (common->start_ptr == 0)
13874 common->start_ptr = OVECTOR(0);
13875
13876/* Capturing brackets cannot be optimized if callouts are allowed. */
13877if (common->capture_last_ptr != 0)
13878 memset(common->optimized_cbracket, 0, re->top_bracket + 1);
13879
13880SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
13881common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
13882
13883total_length = ccend - common->start;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013884common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
Elliott Hughes5b808042021-10-01 10:56:10 -070013885if (!common->private_data_ptrs)
13886 {
13887 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13888 return PCRE2_ERROR_NOMEMORY;
13889 }
13890memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
13891
13892private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
13893
13894if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
13895 detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
13896
13897set_private_data_ptrs(common, &private_data_size, ccend);
13898
13899SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
13900
13901if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
13902 {
13903 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13904 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13905 return PCRE2_ERROR_NOMEMORY;
13906 }
13907
13908if (common->has_then)
13909 {
13910 common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length);
13911 memset(common->then_offsets, 0, total_length);
13912 set_then_offsets(common, common->start, NULL);
13913 }
13914
13915compiler = sljit_create_compiler(allocator_data, NULL);
13916if (!compiler)
13917 {
13918 SLJIT_FREE(common->optimized_cbracket, allocator_data);
13919 SLJIT_FREE(common->private_data_ptrs, allocator_data);
13920 return PCRE2_ERROR_NOMEMORY;
13921 }
13922common->compiler = compiler;
13923
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070013924/* Main pcre2_jit_exec entry. */
13925SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
13926sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, 0, 0, private_data_size);
Elliott Hughes5b808042021-10-01 10:56:10 -070013927
13928/* Register init. */
13929reset_ovector(common, (re->top_bracket + 1) * 2);
13930if (common->req_char_ptr != 0)
13931 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0);
13932
13933OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0);
13934OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0);
13935OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
13936OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end));
13937OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack));
13938OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match));
13939OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end));
13940OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start));
13941OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
13942OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0);
13943
13944if (common->early_fail_start_ptr < common->early_fail_end_ptr)
13945 reset_early_fail(common);
13946
13947if (mode == PCRE2_JIT_PARTIAL_SOFT)
13948 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
13949if (common->mark_ptr != 0)
13950 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0);
13951if (common->control_head_ptr != 0)
13952 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
13953
13954/* Main part of the matching */
13955if ((re->overall_options & PCRE2_ANCHORED) == 0)
13956 {
13957 mainloop_label = mainloop_entry(common);
13958 continue_match_label = LABEL();
13959 /* Forward search if possible. */
13960 if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13961 {
13962 if (mode == PCRE2_JIT_COMPLETE && fast_forward_first_n_chars(common))
13963 ;
13964 else if ((re->flags & PCRE2_FIRSTSET) != 0)
13965 fast_forward_first_char(common);
13966 else if ((re->flags & PCRE2_STARTLINE) != 0)
13967 fast_forward_newline(common);
13968 else if ((re->flags & PCRE2_FIRSTMAPSET) != 0)
13969 fast_forward_start_bits(common);
13970 }
13971 }
13972else
13973 continue_match_label = LABEL();
13974
13975if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
13976 {
13977 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
13978 OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(re->minlength));
13979 minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0);
13980 }
13981if (common->req_char_ptr != 0)
13982 reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0);
13983
13984/* Store the current STR_PTR in OVECTOR(0). */
13985OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0);
13986/* Copy the limit of allowed recursions. */
13987OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH);
13988if (common->capture_last_ptr != 0)
13989 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0);
13990if (common->fast_forward_bc_ptr != NULL)
13991 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0);
13992
13993if (common->start_ptr != OVECTOR(0))
13994 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0);
13995
13996/* Copy the beginning of the string. */
13997if (mode == PCRE2_JIT_PARTIAL_SOFT)
13998 {
13999 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1);
14000 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14001 JUMPHERE(jump);
14002 }
14003else if (mode == PCRE2_JIT_PARTIAL_HARD)
14004 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0);
14005
14006compile_matchingpath(common, common->start, ccend, &rootbacktrack);
14007if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14008 {
14009 sljit_free_compiler(compiler);
14010 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14011 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14012 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14013 return PCRE2_ERROR_NOMEMORY;
14014 }
14015
14016if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14017 end_anchor_failed = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0);
14018
14019if (common->might_be_empty)
14020 {
14021 empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
14022 empty_match_found_label = LABEL();
14023 }
14024
14025common->accept_label = LABEL();
14026if (common->accept != NULL)
14027 set_jumps(common->accept, common->accept_label);
14028
14029/* This means we have a match. Update the ovector. */
14030copy_ovector(common, re->top_bracket + 1);
14031common->quit_label = common->abort_label = LABEL();
14032if (common->quit != NULL)
14033 set_jumps(common->quit, common->quit_label);
14034if (common->abort != NULL)
14035 set_jumps(common->abort, common->abort_label);
14036if (minlength_check_failed != NULL)
14037 SET_LABEL(minlength_check_failed, common->abort_label);
14038
14039sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN);
14040sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0);
14041
14042if (common->failed_match != NULL)
14043 {
14044 SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE);
14045 set_jumps(common->failed_match, LABEL());
14046 OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14047 JUMPTO(SLJIT_JUMP, common->abort_label);
14048 }
14049
14050if ((re->overall_options & PCRE2_ENDANCHORED) != 0)
14051 JUMPHERE(end_anchor_failed);
14052
14053if (mode != PCRE2_JIT_COMPLETE)
14054 {
14055 common->partialmatchlabel = LABEL();
14056 set_jumps(common->partialmatch, common->partialmatchlabel);
14057 return_with_partial_match(common, common->quit_label);
14058 }
14059
14060if (common->might_be_empty)
14061 empty_match_backtrack_label = LABEL();
14062compile_backtrackingpath(common, rootbacktrack.top);
14063if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14064 {
14065 sljit_free_compiler(compiler);
14066 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14067 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14068 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14069 return PCRE2_ERROR_NOMEMORY;
14070 }
14071
14072SLJIT_ASSERT(rootbacktrack.prev == NULL);
14073reset_match_label = LABEL();
14074
14075if (mode == PCRE2_JIT_PARTIAL_SOFT)
14076 {
14077 /* Update hit_start only in the first time. */
14078 jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0);
14079 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
14080 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1);
14081 OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0);
14082 JUMPHERE(jump);
14083 }
14084
14085/* Check we have remaining characters. */
14086if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0)
14087 {
14088 OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
14089 }
14090
14091OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP),
14092 (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr);
14093
14094if ((re->overall_options & PCRE2_ANCHORED) == 0)
14095 {
14096 if (common->ff_newline_shortcut != NULL)
14097 {
14098 /* There cannot be more newlines if PCRE2_FIRSTLINE is set. */
14099 if ((re->overall_options & PCRE2_FIRSTLINE) == 0)
14100 {
14101 if (common->match_end_ptr != 0)
14102 {
14103 OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
14104 OP1(SLJIT_MOV, STR_END, 0, TMP1, 0);
14105 CMPTO(SLJIT_LESS, STR_PTR, 0, TMP1, 0, common->ff_newline_shortcut);
14106 OP1(SLJIT_MOV, STR_END, 0, TMP3, 0);
14107 }
14108 else
14109 CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut);
14110 }
14111 }
14112 else
14113 CMPTO(SLJIT_LESS, STR_PTR, 0, (common->match_end_ptr == 0) ? STR_END : TMP1, 0, mainloop_label);
14114 }
14115
14116/* No more remaining characters. */
14117if (reqcu_not_found != NULL)
14118 set_jumps(reqcu_not_found, LABEL());
14119
14120if (mode == PCRE2_JIT_PARTIAL_SOFT)
14121 CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel);
14122
14123OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_NOMATCH);
14124JUMPTO(SLJIT_JUMP, common->quit_label);
14125
14126flush_stubs(common);
14127
14128if (common->might_be_empty)
14129 {
14130 JUMPHERE(empty_match);
14131 OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
14132 OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070014133 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
Elliott Hughes5b808042021-10-01 10:56:10 -070014134 JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070014135 OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
Elliott Hughes5b808042021-10-01 10:56:10 -070014136 JUMPTO(SLJIT_ZERO, empty_match_found_label);
14137 OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
14138 CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
14139 JUMPTO(SLJIT_JUMP, empty_match_backtrack_label);
14140 }
14141
14142common->fast_forward_bc_ptr = NULL;
14143common->early_fail_start_ptr = 0;
14144common->early_fail_end_ptr = 0;
14145common->currententry = common->entries;
14146common->local_quit_available = TRUE;
14147quit_label = common->quit_label;
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070014148if (common->currententry != NULL)
Elliott Hughes5b808042021-10-01 10:56:10 -070014149 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070014150 /* A free bit for each private data. */
14151 common->recurse_bitset_size = ((private_data_size / (int)sizeof(sljit_sw)) + 7) >> 3;
14152 SLJIT_ASSERT(common->recurse_bitset_size > 0);
14153 common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
14154
14155 if (common->recurse_bitset != NULL)
Elliott Hughes5b808042021-10-01 10:56:10 -070014156 {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070014157 do
14158 {
14159 /* Might add new entries. */
14160 compile_recurse(common);
14161 if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
14162 break;
14163 flush_stubs(common);
14164 common->currententry = common->currententry->next;
14165 }
14166 while (common->currententry != NULL);
14167
14168 SLJIT_FREE(common->recurse_bitset, allocator_data);
14169 }
14170
14171 if (common->currententry != NULL)
14172 {
14173 /* The common->recurse_bitset has been freed. */
14174 SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
14175
Elliott Hughes5b808042021-10-01 10:56:10 -070014176 sljit_free_compiler(compiler);
14177 SLJIT_FREE(common->optimized_cbracket, allocator_data);
14178 SLJIT_FREE(common->private_data_ptrs, allocator_data);
14179 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14180 return PCRE2_ERROR_NOMEMORY;
14181 }
Elliott Hughes5b808042021-10-01 10:56:10 -070014182 }
14183common->local_quit_available = FALSE;
14184common->quit_label = quit_label;
14185
14186/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */
14187/* This is a (really) rare case. */
14188set_jumps(common->stackalloc, LABEL());
14189/* RETURN_ADDR is not a saved register. */
14190sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14191
14192SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
14193
14194OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0);
14195OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
14196OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
14197OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
14198OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
14199
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070014200sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
Elliott Hughes5b808042021-10-01 10:56:10 -070014201
14202jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
14203OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
14204OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0);
14205OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
14206OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1);
14207OP_SRC(SLJIT_FAST_RETURN, TMP1, 0);
14208
14209/* Allocation failed. */
14210JUMPHERE(jump);
14211/* We break the return address cache here, but this is a really rare case. */
14212OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_JIT_STACKLIMIT);
14213JUMPTO(SLJIT_JUMP, common->quit_label);
14214
14215/* Call limit reached. */
14216set_jumps(common->calllimit, LABEL());
14217OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_MATCHLIMIT);
14218JUMPTO(SLJIT_JUMP, common->quit_label);
14219
14220if (common->revertframes != NULL)
14221 {
14222 set_jumps(common->revertframes, LABEL());
14223 do_revertframes(common);
14224 }
14225if (common->wordboundary != NULL)
14226 {
14227 set_jumps(common->wordboundary, LABEL());
14228 check_wordboundary(common);
14229 }
14230if (common->anynewline != NULL)
14231 {
14232 set_jumps(common->anynewline, LABEL());
14233 check_anynewline(common);
14234 }
14235if (common->hspace != NULL)
14236 {
14237 set_jumps(common->hspace, LABEL());
14238 check_hspace(common);
14239 }
14240if (common->vspace != NULL)
14241 {
14242 set_jumps(common->vspace, LABEL());
14243 check_vspace(common);
14244 }
14245if (common->casefulcmp != NULL)
14246 {
14247 set_jumps(common->casefulcmp, LABEL());
14248 do_casefulcmp(common);
14249 }
14250if (common->caselesscmp != NULL)
14251 {
14252 set_jumps(common->caselesscmp, LABEL());
14253 do_caselesscmp(common);
14254 }
14255if (common->reset_match != NULL)
14256 {
14257 set_jumps(common->reset_match, LABEL());
14258 do_reset_match(common, (re->top_bracket + 1) * 2);
14259 CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
14260 OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
14261 JUMPTO(SLJIT_JUMP, reset_match_label);
14262 }
14263#ifdef SUPPORT_UNICODE
14264#if PCRE2_CODE_UNIT_WIDTH == 8
14265if (common->utfreadchar != NULL)
14266 {
14267 set_jumps(common->utfreadchar, LABEL());
14268 do_utfreadchar(common);
14269 }
14270if (common->utfreadtype8 != NULL)
14271 {
14272 set_jumps(common->utfreadtype8, LABEL());
14273 do_utfreadtype8(common);
14274 }
14275if (common->utfpeakcharback != NULL)
14276 {
14277 set_jumps(common->utfpeakcharback, LABEL());
14278 do_utfpeakcharback(common);
14279 }
14280#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
14281#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
14282if (common->utfreadchar_invalid != NULL)
14283 {
14284 set_jumps(common->utfreadchar_invalid, LABEL());
14285 do_utfreadchar_invalid(common);
14286 }
14287if (common->utfreadnewline_invalid != NULL)
14288 {
14289 set_jumps(common->utfreadnewline_invalid, LABEL());
14290 do_utfreadnewline_invalid(common);
14291 }
14292if (common->utfmoveback_invalid)
14293 {
14294 set_jumps(common->utfmoveback_invalid, LABEL());
14295 do_utfmoveback_invalid(common);
14296 }
14297if (common->utfpeakcharback_invalid)
14298 {
14299 set_jumps(common->utfpeakcharback_invalid, LABEL());
14300 do_utfpeakcharback_invalid(common);
14301 }
14302#endif /* PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 */
14303if (common->getucd != NULL)
14304 {
14305 set_jumps(common->getucd, LABEL());
14306 do_getucd(common);
14307 }
14308if (common->getucdtype != NULL)
14309 {
14310 set_jumps(common->getucdtype, LABEL());
14311 do_getucdtype(common);
14312 }
14313#endif /* SUPPORT_UNICODE */
14314
14315SLJIT_FREE(common->optimized_cbracket, allocator_data);
14316SLJIT_FREE(common->private_data_ptrs, allocator_data);
14317
14318executable_func = sljit_generate_code(compiler);
14319executable_size = sljit_get_generated_code_size(compiler);
14320sljit_free_compiler(compiler);
14321
14322if (executable_func == NULL)
14323 {
14324 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14325 return PCRE2_ERROR_NOMEMORY;
14326 }
14327
14328/* Reuse the function descriptor if possible. */
14329if (re->executable_jit != NULL)
14330 functions = (executable_functions *)re->executable_jit;
14331else
14332 {
14333 functions = SLJIT_MALLOC(sizeof(executable_functions), allocator_data);
14334 if (functions == NULL)
14335 {
14336 /* This case is highly unlikely since we just recently
14337 freed a lot of memory. Not impossible though. */
14338 sljit_free_code(executable_func, NULL);
14339 PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
14340 return PCRE2_ERROR_NOMEMORY;
14341 }
14342 memset(functions, 0, sizeof(executable_functions));
14343 functions->top_bracket = re->top_bracket + 1;
14344 functions->limit_match = re->limit_match;
14345 re->executable_jit = functions;
14346 }
14347
14348/* Turn mode into an index. */
14349if (mode == PCRE2_JIT_COMPLETE)
14350 mode = 0;
14351else
14352 mode = (mode == PCRE2_JIT_PARTIAL_SOFT) ? 1 : 2;
14353
14354SLJIT_ASSERT(mode < JIT_NUMBER_OF_COMPILE_MODES);
14355functions->executable_funcs[mode] = executable_func;
14356functions->read_only_data_heads[mode] = common->read_only_data_head;
14357functions->executable_sizes[mode] = executable_size;
14358return 0;
14359}
14360
14361#endif
14362
14363/*************************************************
14364* JIT compile a Regular Expression *
14365*************************************************/
14366
14367/* This function used JIT to convert a previously-compiled pattern into machine
14368code.
14369
14370Arguments:
14371 code a compiled pattern
14372 options JIT option bits
14373
14374Returns: 0: success or (*NOJIT) was used
14375 <0: an error code
14376*/
14377
14378#define PUBLIC_JIT_COMPILE_OPTIONS \
14379 (PCRE2_JIT_COMPLETE|PCRE2_JIT_PARTIAL_SOFT|PCRE2_JIT_PARTIAL_HARD|PCRE2_JIT_INVALID_UTF)
14380
14381PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION
14382pcre2_jit_compile(pcre2_code *code, uint32_t options)
14383{
14384pcre2_real_code *re = (pcre2_real_code *)code;
Elliott Hughes16619d62021-10-29 12:10:38 -070014385#ifdef SUPPORT_JIT
14386executable_functions *functions;
14387static int executable_allocator_is_working = 0;
14388#endif
Elliott Hughes5b808042021-10-01 10:56:10 -070014389
14390if (code == NULL)
14391 return PCRE2_ERROR_NULL;
14392
14393if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0)
14394 return PCRE2_ERROR_JIT_BADOPTION;
14395
14396/* Support for invalid UTF was first introduced in JIT, with the option
14397PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the
14398compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the
14399preferred feature, with the earlier option deprecated. However, for backward
14400compatibility, if the earlier option is set, it forces the new option so that
14401if JIT matching falls back to the interpreter, there is still support for
14402invalid UTF. However, if this function has already been successfully called
14403without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that
14404non-invalid-supporting JIT code was compiled), give an error.
14405
14406If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following
14407actions are needed:
14408
14409 1. Remove the definition from pcre2.h.in and from the list in
14410 PUBLIC_JIT_COMPILE_OPTIONS above.
14411
14412 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module.
14413
14414 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c.
14415
14416 4. Delete the following short block of code. The setting of "re" and
14417 "functions" can be moved into the JIT-only block below, but if that is
14418 done, (void)re and (void)functions will be needed in the non-JIT case, to
14419 avoid compiler warnings.
14420*/
14421
14422#ifdef SUPPORT_JIT
Elliott Hughes16619d62021-10-29 12:10:38 -070014423functions = (executable_functions *)re->executable_jit;
Elliott Hughes5b808042021-10-01 10:56:10 -070014424#endif
14425
14426if ((options & PCRE2_JIT_INVALID_UTF) != 0)
14427 {
14428 if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0)
14429 {
14430#ifdef SUPPORT_JIT
14431 if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION;
14432#endif
14433 re->overall_options |= PCRE2_MATCH_INVALID_UTF;
14434 }
14435 }
14436
14437/* The above tests are run with and without JIT support. This means that
14438PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring
14439interpreter support) even in the absence of JIT. But now, if there is no JIT
14440support, give an error return. */
14441
14442#ifndef SUPPORT_JIT
14443return PCRE2_ERROR_JIT_BADOPTION;
14444#else /* SUPPORT_JIT */
14445
14446/* There is JIT support. Do the necessary. */
14447
14448if ((re->flags & PCRE2_NOJIT) != 0) return 0;
14449
14450if (executable_allocator_is_working == 0)
14451 {
14452 /* Checks whether the executable allocator is working. This check
14453 might run multiple times in multi-threaded environments, but the
14454 result should not be affected by it. */
14455 void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
14456
14457 executable_allocator_is_working = -1;
14458
14459 if (ptr != NULL)
14460 {
14461 SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
14462 executable_allocator_is_working = 1;
14463 }
14464 }
14465
14466if (executable_allocator_is_working < 0)
14467 return PCRE2_ERROR_NOMEMORY;
14468
14469if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
14470 options |= PCRE2_JIT_INVALID_UTF;
14471
14472if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL
14473 || functions->executable_funcs[0] == NULL)) {
14474 uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
14475 int result = jit_compile(code, options & ~excluded_options);
14476 if (result != 0)
14477 return result;
14478 }
14479
14480if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL
14481 || functions->executable_funcs[1] == NULL)) {
14482 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD);
14483 int result = jit_compile(code, options & ~excluded_options);
14484 if (result != 0)
14485 return result;
14486 }
14487
14488if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL
14489 || functions->executable_funcs[2] == NULL)) {
14490 uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT);
14491 int result = jit_compile(code, options & ~excluded_options);
14492 if (result != 0)
14493 return result;
14494 }
14495
14496return 0;
14497
14498#endif /* SUPPORT_JIT */
14499}
14500
14501/* JIT compiler uses an all-in-one approach. This improves security,
14502 since the code generator functions are not exported. */
14503
14504#define INCLUDED_FROM_PCRE2_JIT_COMPILE
14505
14506#include "pcre2_jit_match.c"
14507#include "pcre2_jit_misc.c"
14508
14509/* End of pcre2_jit_compile.c */