| /************************************************* |
| * Perl-Compatible Regular Expressions * |
| *************************************************/ |
| |
| /* PCRE is a library of functions to support regular expressions whose syntax |
| and semantics are as close as possible to those of the Perl 5 language. |
| |
| Written by Philip Hazel |
| This module by Zoltan Herczeg |
| Original API code Copyright (c) 1997-2012 University of Cambridge |
| New API code Copyright (c) 2016-2019 University of Cambridge |
| |
| ----------------------------------------------------------------------------- |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions are met: |
| |
| * Redistributions of source code must retain the above copyright notice, |
| this list of conditions and the following disclaimer. |
| |
| * Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| * Neither the name of the University of Cambridge nor the names of its |
| contributors may be used to endorse or promote products derived from |
| this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| POSSIBILITY OF SUCH DAMAGE. |
| ----------------------------------------------------------------------------- |
| */ |
| |
| #if !(defined SUPPORT_VALGRIND) |
| |
| #if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ |
| || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)) |
| |
| typedef enum { |
| vector_compare_match1, |
| vector_compare_match1i, |
| vector_compare_match2, |
| } vector_compare_type; |
| |
| static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) |
| { |
| #if PCRE2_CODE_UNIT_WIDTH == 8 |
| return 15; |
| #elif PCRE2_CODE_UNIT_WIDTH == 16 |
| return 7; |
| #elif PCRE2_CODE_UNIT_WIDTH == 32 |
| return 3; |
| #else |
| #error "Unsupported unit width" |
| #endif |
| } |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) |
| { |
| #if PCRE2_CODE_UNIT_WIDTH == 8 |
| OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); |
| return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); |
| #elif PCRE2_CODE_UNIT_WIDTH == 16 |
| OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); |
| return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); |
| #else |
| #error "Unknown code width" |
| #endif |
| } |
| #endif |
| |
| #endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */ |
| |
| #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) |
| |
| static sljit_s32 character_to_int32(PCRE2_UCHAR chr) |
| { |
| sljit_u32 value = chr; |
| #if PCRE2_CODE_UNIT_WIDTH == 8 |
| #define SSE2_COMPARE_TYPE_INDEX 0 |
| return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); |
| #elif PCRE2_CODE_UNIT_WIDTH == 16 |
| #define SSE2_COMPARE_TYPE_INDEX 1 |
| return (sljit_s32)((value << 16) | value); |
| #elif PCRE2_CODE_UNIT_WIDTH == 32 |
| #define SSE2_COMPARE_TYPE_INDEX 2 |
| return (sljit_s32)(value); |
| #else |
| #error "Unsupported unit width" |
| #endif |
| } |
| |
| static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg, sljit_s8 offset) |
| { |
| sljit_u8 instruction[5]; |
| |
| SLJIT_ASSERT(dst_xmm_reg < 8); |
| SLJIT_ASSERT(src_general_reg < 8); |
| |
| /* MOVDQA xmm1, xmm2/m128 */ |
| instruction[0] = ((sljit_u8)offset & 0xf) == 0 ? 0x66 : 0xf3; |
| instruction[1] = 0x0f; |
| instruction[2] = 0x6f; |
| |
| if (offset == 0) |
| { |
| instruction[3] = (dst_xmm_reg << 3) | src_general_reg; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| return; |
| } |
| |
| instruction[3] = 0x40 | (dst_xmm_reg << 3) | src_general_reg; |
| instruction[4] = (sljit_u8)offset; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| } |
| |
| static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, |
| int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) |
| { |
| sljit_u8 instruction[4]; |
| instruction[0] = 0x66; |
| instruction[1] = 0x0f; |
| |
| SLJIT_ASSERT(step >= 0 && step <= 3); |
| |
| if (compare_type != vector_compare_match2) |
| { |
| if (step == 0) |
| { |
| if (compare_type == vector_compare_match1i) |
| { |
| /* POR xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xeb; |
| instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| } |
| return; |
| } |
| |
| if (step != 2) |
| return; |
| |
| /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
| instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| return; |
| } |
| |
| switch (step) |
| { |
| case 0: |
| /* MOVDQA xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x6f; |
| instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| return; |
| |
| case 1: |
| /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
| instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| return; |
| |
| case 2: |
| /* PCMPEQB/W/D xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
| instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| return; |
| |
| case 3: |
| /* POR xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xeb; |
| instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| return; |
| } |
| } |
| |
| #define JIT_HAS_FAST_FORWARD_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) |
| |
| static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) |
| { |
| DEFINE_COMPILER; |
| sljit_u8 instruction[8]; |
| struct sljit_label *start; |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| struct sljit_label *restart; |
| #endif |
| struct sljit_jump *quit; |
| struct sljit_jump *partial_quit[2]; |
| vector_compare_type compare_type = vector_compare_match1; |
| sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
| sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
| sljit_s32 data_ind = 0; |
| sljit_s32 tmp_ind = 1; |
| sljit_s32 cmp1_ind = 2; |
| sljit_s32 cmp2_ind = 3; |
| sljit_u32 bit = 0; |
| int i; |
| |
| SLJIT_UNUSED_ARG(offset); |
| |
| if (char1 != char2) |
| { |
| bit = char1 ^ char2; |
| compare_type = vector_compare_match1i; |
| |
| if (!is_powerof2(bit)) |
| { |
| bit = 0; |
| compare_type = vector_compare_match2; |
| } |
| } |
| |
| partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| if (common->mode == PCRE2_JIT_COMPLETE) |
| add_jump(compiler, &common->failed_match, partial_quit[0]); |
| |
| /* First part (unaligned start) */ |
| |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); |
| |
| SLJIT_ASSERT(tmp1_reg_ind < 8); |
| |
| /* MOVD xmm, r/m32 */ |
| instruction[0] = 0x66; |
| instruction[1] = 0x0f; |
| instruction[2] = 0x6e; |
| instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| if (char1 != char2) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); |
| |
| /* MOVD xmm, r/m32 */ |
| instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| } |
| |
| OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); |
| |
| /* PSHUFD xmm1, xmm2/m128, imm8 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x70; |
| instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; |
| instruction[4] = 0; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| |
| if (char1 != char2) |
| { |
| /* PSHUFD xmm1, xmm2/m128, imm8 */ |
| instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| } |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| restart = LABEL(); |
| #endif |
| OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); |
| |
| load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
| for (i = 0; i < 4; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* PMOVMSKB reg, xmm */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xd7; |
| instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); |
| |
| quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); |
| |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| |
| /* Second part (aligned) */ |
| start = LABEL(); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
| |
| partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| if (common->mode == PCRE2_JIT_COMPLETE) |
| add_jump(compiler, &common->failed_match, partial_quit[1]); |
| |
| load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
| for (i = 0; i < 4; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* PMOVMSKB reg, xmm */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xd7; |
| instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); |
| |
| JUMPHERE(quit); |
| |
| /* BSF r32, r/m32 */ |
| instruction[0] = 0x0f; |
| instruction[1] = 0xbc; |
| instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 3); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| |
| if (common->mode != PCRE2_JIT_COMPLETE) |
| { |
| JUMPHERE(partial_quit[0]); |
| JUMPHERE(partial_quit[1]); |
| OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); |
| CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); |
| } |
| else |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf && offset > 0) |
| { |
| SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); |
| |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
| |
| quit = jump_if_utf_char_start(compiler, TMP1); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); |
| JUMPTO(SLJIT_JUMP, restart); |
| |
| JUMPHERE(quit); |
| } |
| #endif |
| } |
| |
| #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) |
| |
| static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) |
| { |
| DEFINE_COMPILER; |
| sljit_u8 instruction[8]; |
| struct sljit_label *start; |
| struct sljit_jump *quit; |
| jump_list *not_found = NULL; |
| vector_compare_type compare_type = vector_compare_match1; |
| sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
| sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
| sljit_s32 data_ind = 0; |
| sljit_s32 tmp_ind = 1; |
| sljit_s32 cmp1_ind = 2; |
| sljit_s32 cmp2_ind = 3; |
| sljit_u32 bit = 0; |
| int i; |
| |
| if (char1 != char2) |
| { |
| bit = char1 ^ char2; |
| compare_type = vector_compare_match1i; |
| |
| if (!is_powerof2(bit)) |
| { |
| bit = 0; |
| compare_type = vector_compare_match2; |
| } |
| } |
| |
| add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
| OP1(SLJIT_MOV, TMP2, 0, TMP1, 0); |
| OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
| |
| /* First part (unaligned start) */ |
| |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); |
| |
| SLJIT_ASSERT(tmp1_reg_ind < 8); |
| |
| /* MOVD xmm, r/m32 */ |
| instruction[0] = 0x66; |
| instruction[1] = 0x0f; |
| instruction[2] = 0x6e; |
| instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| if (char1 != char2) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); |
| |
| /* MOVD xmm, r/m32 */ |
| instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| } |
| |
| OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); |
| |
| /* PSHUFD xmm1, xmm2/m128, imm8 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x70; |
| instruction[3] = 0xc0 | (cmp1_ind << 3) | cmp1_ind; |
| instruction[4] = 0; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| |
| if (char1 != char2) |
| { |
| /* PSHUFD xmm1, xmm2/m128, imm8 */ |
| instruction[3] = 0xc0 | (cmp2_ind << 3) | cmp2_ind; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| } |
| |
| OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); |
| |
| load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
| for (i = 0; i < 4; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* PMOVMSKB reg, xmm */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xd7; |
| instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); |
| |
| quit = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); |
| |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| |
| /* Second part (aligned) */ |
| start = LABEL(); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
| |
| add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| load_from_mem_sse2(compiler, data_ind, str_ptr_reg_ind, 0); |
| for (i = 0; i < 4; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* PMOVMSKB reg, xmm */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xd7; |
| instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | data_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); |
| |
| JUMPHERE(quit); |
| |
| /* BSF r32, r/m32 */ |
| instruction[0] = 0x0f; |
| instruction[1] = 0xbc; |
| instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 3); |
| |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, STR_PTR, 0); |
| add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
| |
| OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
| return not_found; |
| } |
| |
| #ifndef _WIN64 |
| |
| #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) |
| |
| static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, |
| PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) |
| { |
| DEFINE_COMPILER; |
| sljit_u8 instruction[8]; |
| vector_compare_type compare1_type = vector_compare_match1; |
| vector_compare_type compare2_type = vector_compare_match1; |
| sljit_u32 bit1 = 0; |
| sljit_u32 bit2 = 0; |
| sljit_u32 diff = IN_UCHARS(offs1 - offs2); |
| sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
| sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); |
| sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
| sljit_s32 data1_ind = 0; |
| sljit_s32 data2_ind = 1; |
| sljit_s32 tmp1_ind = 2; |
| sljit_s32 tmp2_ind = 3; |
| sljit_s32 cmp1a_ind = 4; |
| sljit_s32 cmp1b_ind = 5; |
| sljit_s32 cmp2a_ind = 6; |
| sljit_s32 cmp2b_ind = 7; |
| struct sljit_label *start; |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| struct sljit_label *restart; |
| #endif |
| struct sljit_jump *jump[2]; |
| int i; |
| |
| SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); |
| SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); |
| SLJIT_ASSERT(tmp1_reg_ind < 8 && tmp2_reg_ind == 1); |
| |
| /* Initialize. */ |
| if (common->match_end_ptr != 0) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
| OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); |
| |
| OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); |
| CMOV(SLJIT_LESS, STR_END, TMP1, 0); |
| } |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| /* MOVD xmm, r/m32 */ |
| instruction[0] = 0x66; |
| instruction[1] = 0x0f; |
| instruction[2] = 0x6e; |
| |
| if (char1a == char1b) |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); |
| else |
| { |
| bit1 = char1a ^ char1b; |
| if (is_powerof2(bit1)) |
| { |
| compare1_type = vector_compare_match1i; |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); |
| } |
| else |
| { |
| compare1_type = vector_compare_match2; |
| bit1 = 0; |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); |
| } |
| } |
| |
| instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| if (char1a != char1b) |
| { |
| instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| } |
| |
| if (char2a == char2b) |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); |
| else |
| { |
| bit2 = char2a ^ char2b; |
| if (is_powerof2(bit2)) |
| { |
| compare2_type = vector_compare_match1i; |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); |
| } |
| else |
| { |
| compare2_type = vector_compare_match2; |
| bit2 = 0; |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); |
| OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); |
| } |
| } |
| |
| instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| if (char2a != char2b) |
| { |
| instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| } |
| |
| /* PSHUFD xmm1, xmm2/m128, imm8 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x70; |
| instruction[4] = 0; |
| |
| instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| |
| if (char1a != char1b) |
| { |
| instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| } |
| |
| instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| |
| if (char2a != char2b) |
| { |
| instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| } |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| restart = LABEL(); |
| #endif |
| |
| OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, diff); |
| OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); |
| OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
| |
| load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); |
| |
| jump[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_PTR, 0); |
| |
| load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); |
| jump[1] = JUMP(SLJIT_JUMP); |
| |
| JUMPHERE(jump[0]); |
| |
| /* MOVDQA xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x6f; |
| instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| /* PSLLDQ xmm1, imm8 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0x73; |
| instruction[3] = 0xc0 | (7 << 3) | data2_ind; |
| instruction[4] = diff; |
| sljit_emit_op_custom(compiler, instruction, 5); |
| |
| JUMPHERE(jump[1]); |
| |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); |
| |
| for (i = 0; i < 4; i++) |
| { |
| fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); |
| fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); |
| } |
| |
| /* PAND xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xdb; |
| instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| /* PMOVMSKB reg, xmm */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xd7; |
| instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| /* Ignore matches before the first STR_PTR. */ |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); |
| |
| jump[0] = CMP(SLJIT_NOT_ZERO, TMP1, 0, SLJIT_IMM, 0); |
| |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| |
| /* Main loop. */ |
| start = LABEL(); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| load_from_mem_sse2(compiler, data1_ind, str_ptr_reg_ind, 0); |
| load_from_mem_sse2(compiler, data2_ind, str_ptr_reg_ind, -(sljit_s8)diff); |
| |
| for (i = 0; i < 4; i++) |
| { |
| fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp2_ind); |
| fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp1_ind); |
| } |
| |
| /* PAND xmm1, xmm2/m128 */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xdb; |
| instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| /* PMOVMSKB reg, xmm */ |
| /* instruction[0] = 0x66; */ |
| /* instruction[1] = 0x0f; */ |
| instruction[2] = 0xd7; |
| instruction[3] = 0xc0 | (tmp1_reg_ind << 3) | 0; |
| sljit_emit_op_custom(compiler, instruction, 4); |
| |
| CMPTO(SLJIT_ZERO, TMP1, 0, SLJIT_IMM, 0, start); |
| |
| JUMPHERE(jump[0]); |
| |
| /* BSF r32, r/m32 */ |
| instruction[0] = 0x0f; |
| instruction[1] = 0xbc; |
| instruction[2] = 0xc0 | (tmp1_reg_ind << 3) | tmp1_reg_ind; |
| sljit_emit_op_custom(compiler, instruction, 3); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf) |
| { |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); |
| |
| jump[0] = jump_if_utf_char_start(compiler, TMP1); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); |
| |
| add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); |
| |
| JUMPHERE(jump[0]); |
| } |
| #endif |
| |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
| |
| if (common->match_end_ptr != 0) |
| OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
| } |
| |
| #endif /* !_WIN64 */ |
| |
| #undef SSE2_COMPARE_TYPE_INDEX |
| |
| #endif /* SLJIT_CONFIG_X86 */ |
| |
| #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) |
| |
| #include <arm_neon.h> |
| |
| typedef union { |
| unsigned int x; |
| struct { unsigned char c1, c2, c3, c4; } c; |
| } int_char; |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| static SLJIT_INLINE int utf_continue(sljit_u8 *s) |
| { |
| #if PCRE2_CODE_UNIT_WIDTH == 8 |
| return (*s & 0xc0) == 0x80; |
| #elif PCRE2_CODE_UNIT_WIDTH == 16 |
| return (*s & 0xfc00) == 0xdc00; |
| #else |
| #error "Unknown code width" |
| #endif |
| } |
| #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 */ |
| |
| #if PCRE2_CODE_UNIT_WIDTH == 8 |
| # define VECTOR_FACTOR 16 |
| # define vect_t uint8x16_t |
| # define VLD1Q(X) vld1q_u8((sljit_u8 *)(X)) |
| # define VCEQQ vceqq_u8 |
| # define VORRQ vorrq_u8 |
| # define VST1Q vst1q_u8 |
| # define VDUPQ vdupq_n_u8 |
| # define VEXTQ vextq_u8 |
| # define VANDQ vandq_u8 |
| typedef union { |
| uint8_t mem[16]; |
| uint64_t dw[2]; |
| } quad_word; |
| #elif PCRE2_CODE_UNIT_WIDTH == 16 |
| # define VECTOR_FACTOR 8 |
| # define vect_t uint16x8_t |
| # define VLD1Q(X) vld1q_u16((sljit_u16 *)(X)) |
| # define VCEQQ vceqq_u16 |
| # define VORRQ vorrq_u16 |
| # define VST1Q vst1q_u16 |
| # define VDUPQ vdupq_n_u16 |
| # define VEXTQ vextq_u16 |
| # define VANDQ vandq_u16 |
| typedef union { |
| uint16_t mem[8]; |
| uint64_t dw[2]; |
| } quad_word; |
| #else |
| # define VECTOR_FACTOR 4 |
| # define vect_t uint32x4_t |
| # define VLD1Q(X) vld1q_u32((sljit_u32 *)(X)) |
| # define VCEQQ vceqq_u32 |
| # define VORRQ vorrq_u32 |
| # define VST1Q vst1q_u32 |
| # define VDUPQ vdupq_n_u32 |
| # define VEXTQ vextq_u32 |
| # define VANDQ vandq_u32 |
| typedef union { |
| uint32_t mem[4]; |
| uint64_t dw[2]; |
| } quad_word; |
| #endif |
| |
| #define FFCS |
| #include "pcre2_jit_neon_inc.h" |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| # define FF_UTF |
| # include "pcre2_jit_neon_inc.h" |
| # undef FF_UTF |
| #endif |
| #undef FFCS |
| |
| #define FFCS_2 |
| #include "pcre2_jit_neon_inc.h" |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| # define FF_UTF |
| # include "pcre2_jit_neon_inc.h" |
| # undef FF_UTF |
| #endif |
| #undef FFCS_2 |
| |
| #define FFCS_MASK |
| #include "pcre2_jit_neon_inc.h" |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| # define FF_UTF |
| # include "pcre2_jit_neon_inc.h" |
| # undef FF_UTF |
| #endif |
| #undef FFCS_MASK |
| |
| #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 |
| |
| static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) |
| { |
| DEFINE_COMPILER; |
| int_char ic; |
| struct sljit_jump *partial_quit; |
| /* Save temporary registers. */ |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP3, 0); |
| |
| /* Prepare function arguments */ |
| OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); |
| OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); |
| OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, offset); |
| |
| if (char1 == char2) |
| { |
| ic.c.c1 = char1; |
| ic.c.c2 = char2; |
| OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf && offset > 0) |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_utf)); |
| else |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); |
| #else |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs)); |
| #endif |
| } |
| else |
| { |
| PCRE2_UCHAR mask = char1 ^ char2; |
| if (is_powerof2(mask)) |
| { |
| ic.c.c1 = char1 | mask; |
| ic.c.c2 = mask; |
| OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf && offset > 0) |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask_utf)); |
| else |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); |
| #else |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_mask)); |
| #endif |
| } |
| else |
| { |
| ic.c.c1 = char1; |
| ic.c.c2 = char2; |
| OP1(SLJIT_MOV, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf && offset > 0) |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2_utf)); |
| else |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); |
| #else |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcs_2)); |
| #endif |
| } |
| } |
| /* Restore registers. */ |
| OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
| OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
| |
| /* Check return value. */ |
| partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
| if (common->mode == PCRE2_JIT_COMPLETE) |
| add_jump(compiler, &common->failed_match, partial_quit); |
| |
| /* Fast forward STR_PTR to the result of memchr. */ |
| OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); |
| |
| if (common->mode != PCRE2_JIT_COMPLETE) |
| JUMPHERE(partial_quit); |
| } |
| |
| typedef enum { |
| compare_match1, |
| compare_match1i, |
| compare_match2, |
| } compare_type; |
| |
| static inline vect_t fast_forward_char_pair_compare(compare_type ctype, vect_t dst, vect_t cmp1, vect_t cmp2) |
| { |
| if (ctype == compare_match2) |
| { |
| vect_t tmp = dst; |
| dst = VCEQQ(dst, cmp1); |
| tmp = VCEQQ(tmp, cmp2); |
| dst = VORRQ(dst, tmp); |
| return dst; |
| } |
| |
| if (ctype == compare_match1i) |
| dst = VORRQ(dst, cmp2); |
| dst = VCEQQ(dst, cmp1); |
| return dst; |
| } |
| |
| static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) |
| { |
| #if PCRE2_CODE_UNIT_WIDTH == 8 |
| return 15; |
| #elif PCRE2_CODE_UNIT_WIDTH == 16 |
| return 7; |
| #elif PCRE2_CODE_UNIT_WIDTH == 32 |
| return 3; |
| #else |
| #error "Unsupported unit width" |
| #endif |
| } |
| |
| /* ARM doesn't have a shift left across lanes. */ |
| static SLJIT_INLINE vect_t shift_left_n_lanes(vect_t a, sljit_u8 n) |
| { |
| vect_t zero = VDUPQ(0); |
| SLJIT_ASSERT(0 < n && n < VECTOR_FACTOR); |
| /* VEXTQ takes an immediate as last argument. */ |
| #define C(X) case X: return VEXTQ(zero, a, VECTOR_FACTOR - X); |
| switch (n) |
| { |
| C(1); C(2); C(3); |
| #if PCRE2_CODE_UNIT_WIDTH != 32 |
| C(4); C(5); C(6); C(7); |
| # if PCRE2_CODE_UNIT_WIDTH != 16 |
| C(8); C(9); C(10); C(11); C(12); C(13); C(14); C(15); |
| # endif |
| #endif |
| default: |
| /* Based on the ASSERT(0 < n && n < VECTOR_FACTOR) above, this won't |
| happen. The return is still here for compilers to not warn. */ |
| return a; |
| } |
| } |
| |
| #define FFCPS |
| #define FFCPS_DIFF1 |
| #define FFCPS_CHAR1A2A |
| |
| #define FFCPS_0 |
| #include "pcre2_jit_neon_inc.h" |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| # define FF_UTF |
| # include "pcre2_jit_neon_inc.h" |
| # undef FF_UTF |
| #endif |
| #undef FFCPS_0 |
| |
| #undef FFCPS_CHAR1A2A |
| |
| #define FFCPS_1 |
| #include "pcre2_jit_neon_inc.h" |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| # define FF_UTF |
| # include "pcre2_jit_neon_inc.h" |
| # undef FF_UTF |
| #endif |
| #undef FFCPS_1 |
| |
| #undef FFCPS_DIFF1 |
| |
| #define FFCPS_DEFAULT |
| #include "pcre2_jit_neon_inc.h" |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| # define FF_UTF |
| # include "pcre2_jit_neon_inc.h" |
| # undef FF_UTF |
| #endif |
| #undef FFCPS |
| |
| #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 |
| |
| static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, |
| PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) |
| { |
| DEFINE_COMPILER; |
| sljit_u32 diff = IN_UCHARS(offs1 - offs2); |
| struct sljit_jump *partial_quit; |
| int_char ic; |
| SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); |
| SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_offset())); |
| SLJIT_ASSERT(compiler->scratches == 5); |
| |
| /* Save temporary register STR_PTR. */ |
| OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); |
| |
| /* Prepare arguments for the function call. */ |
| if (common->match_end_ptr == 0) |
| OP1(SLJIT_MOV, SLJIT_R0, 0, STR_END, 0); |
| else |
| { |
| OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
| OP2(SLJIT_ADD, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); |
| |
| OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, SLJIT_R0, 0); |
| CMOV(SLJIT_LESS, SLJIT_R0, STR_END, 0); |
| } |
| |
| OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); |
| OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_IMM, offs1); |
| OP1(SLJIT_MOV_S32, SLJIT_R3, 0, SLJIT_IMM, offs2); |
| ic.c.c1 = char1a; |
| ic.c.c2 = char1b; |
| ic.c.c3 = char2a; |
| ic.c.c4 = char2b; |
| OP1(SLJIT_MOV_U32, SLJIT_R4, 0, SLJIT_IMM, ic.x); |
| |
| if (diff == 1) { |
| if (char1a == char1b && char2a == char2b) { |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf) |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0_utf)); |
| else |
| #endif |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_0)); |
| } else { |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf) |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1_utf)); |
| else |
| #endif |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_1)); |
| } |
| } else { |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf) |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default_utf)); |
| else |
| #endif |
| sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS4(W, W, W, W, W), |
| SLJIT_IMM, SLJIT_FUNC_ADDR(ffcps_default)); |
| } |
| |
| /* Restore STR_PTR register. */ |
| OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
| |
| /* Check return value. */ |
| partial_quit = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
| add_jump(compiler, &common->failed_match, partial_quit); |
| |
| /* Fast forward STR_PTR to the result of memchr. */ |
| OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); |
| |
| JUMPHERE(partial_quit); |
| } |
| |
| #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ |
| |
| #if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) |
| |
| #if PCRE2_CODE_UNIT_WIDTH == 8 |
| #define VECTOR_ELEMENT_SIZE 0 |
| #elif PCRE2_CODE_UNIT_WIDTH == 16 |
| #define VECTOR_ELEMENT_SIZE 1 |
| #elif PCRE2_CODE_UNIT_WIDTH == 32 |
| #define VECTOR_ELEMENT_SIZE 2 |
| #else |
| #error "Unsupported unit width" |
| #endif |
| |
| static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg, |
| sljit_s32 base_reg, sljit_s32 index_reg) |
| { |
| sljit_u16 instruction[3]; |
| |
| instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg); |
| instruction[1] = (sljit_u16)(base_reg << 12); |
| instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06)); |
| |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| #if PCRE2_CODE_UNIT_WIDTH == 32 |
| |
| static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg, |
| PCRE2_UCHAR chr, sljit_s32 tmp_general_reg) |
| { |
| sljit_u16 instruction[3]; |
| |
| SLJIT_ASSERT(step >= 0 && step <= 1); |
| |
| if (chr < 0x7fff) |
| { |
| if (step == 1) |
| return; |
| |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4)); |
| instruction[1] = (sljit_u16)chr; |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| return; |
| } |
| |
| if (step == 0) |
| { |
| OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr); |
| |
| /* VLVG */ |
| instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(tmp_general_reg)); |
| instruction[1] = 0; |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| return; |
| } |
| |
| /* VREP */ |
| instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg); |
| instruction[1] = 0; |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| #endif |
| |
| static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, |
| int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) |
| { |
| sljit_u16 instruction[3]; |
| |
| SLJIT_ASSERT(step >= 0 && step <= 2); |
| |
| if (step == 1) |
| { |
| /* VCEQ */ |
| instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); |
| instruction[1] = (sljit_u16)(cmp1_ind << 12); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| return; |
| } |
| |
| if (compare_type != vector_compare_match2) |
| { |
| if (step == 0 && compare_type == vector_compare_match1i) |
| { |
| /* VO */ |
| instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); |
| instruction[1] = (sljit_u16)(cmp2_ind << 12); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| return; |
| } |
| |
| switch (step) |
| { |
| case 0: |
| /* VCEQ */ |
| instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind); |
| instruction[1] = (sljit_u16)(cmp2_ind << 12); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| return; |
| |
| case 2: |
| /* VO */ |
| instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); |
| instruction[1] = (sljit_u16)(tmp_ind << 12); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| return; |
| } |
| } |
| |
| #define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 |
| |
| static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) |
| { |
| DEFINE_COMPILER; |
| sljit_u16 instruction[3]; |
| struct sljit_label *start; |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| struct sljit_label *restart; |
| #endif |
| struct sljit_jump *quit; |
| struct sljit_jump *partial_quit[2]; |
| vector_compare_type compare_type = vector_compare_match1; |
| sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
| sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
| sljit_s32 data_ind = 0; |
| sljit_s32 tmp_ind = 1; |
| sljit_s32 cmp1_ind = 2; |
| sljit_s32 cmp2_ind = 3; |
| sljit_s32 zero_ind = 4; |
| sljit_u32 bit = 0; |
| int i; |
| |
| SLJIT_UNUSED_ARG(offset); |
| |
| if (char1 != char2) |
| { |
| bit = char1 ^ char2; |
| compare_type = vector_compare_match1i; |
| |
| if (!is_powerof2(bit)) |
| { |
| bit = 0; |
| compare_type = vector_compare_match2; |
| } |
| } |
| |
| partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| if (common->mode == PCRE2_JIT_COMPLETE) |
| add_jump(compiler, &common->failed_match, partial_quit[0]); |
| |
| /* First part (unaligned start) */ |
| |
| OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); |
| |
| #if PCRE2_CODE_UNIT_WIDTH != 32 |
| |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); |
| instruction[1] = (sljit_u16)(char1 | bit); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| if (char1 != char2) |
| { |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); |
| instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); |
| /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| #else /* PCRE2_CODE_UNIT_WIDTH == 32 */ |
| |
| for (int i = 0; i < 2; i++) |
| { |
| replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1); |
| |
| if (char1 != char2) |
| replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1); |
| } |
| |
| #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ |
| |
| if (compare_type == vector_compare_match2) |
| { |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); |
| instruction[1] = 0; |
| instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| restart = LABEL(); |
| #endif |
| |
| load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); |
| |
| if (compare_type != vector_compare_match2) |
| { |
| if (compare_type == vector_compare_match1i) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFEE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| else |
| { |
| for (i = 0; i < 3; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFENE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| /* VLGVB */ |
| instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); |
| instruction[1] = 7; |
| instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); |
| |
| OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); |
| |
| /* Second part (aligned) */ |
| start = LABEL(); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
| |
| partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
| if (common->mode == PCRE2_JIT_COMPLETE) |
| add_jump(compiler, &common->failed_match, partial_quit[1]); |
| |
| load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); |
| |
| if (compare_type != vector_compare_match2) |
| { |
| if (compare_type == vector_compare_match1i) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFEE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| else |
| { |
| for (i = 0; i < 3; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFENE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); |
| JUMPTO(SLJIT_OVERFLOW, start); |
| |
| /* VLGVB */ |
| instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); |
| instruction[1] = 7; |
| instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| |
| JUMPHERE(quit); |
| |
| if (common->mode != PCRE2_JIT_COMPLETE) |
| { |
| JUMPHERE(partial_quit[0]); |
| JUMPHERE(partial_quit[1]); |
| OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); |
| CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); |
| } |
| else |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf && offset > 0) |
| { |
| SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); |
| |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
| |
| quit = jump_if_utf_char_start(compiler, TMP1); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); |
| JUMPTO(SLJIT_JUMP, restart); |
| |
| JUMPHERE(quit); |
| } |
| #endif |
| } |
| |
| #define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1 |
| |
| static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) |
| { |
| DEFINE_COMPILER; |
| sljit_u16 instruction[3]; |
| struct sljit_label *start; |
| struct sljit_jump *quit; |
| jump_list *not_found = NULL; |
| vector_compare_type compare_type = vector_compare_match1; |
| sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
| sljit_s32 tmp3_reg_ind = sljit_get_register_index(TMP3); |
| sljit_s32 data_ind = 0; |
| sljit_s32 tmp_ind = 1; |
| sljit_s32 cmp1_ind = 2; |
| sljit_s32 cmp2_ind = 3; |
| sljit_s32 zero_ind = 4; |
| sljit_u32 bit = 0; |
| int i; |
| |
| if (char1 != char2) |
| { |
| bit = char1 ^ char2; |
| compare_type = vector_compare_match1i; |
| |
| if (!is_powerof2(bit)) |
| { |
| bit = 0; |
| compare_type = vector_compare_match2; |
| } |
| } |
| |
| add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
| |
| /* First part (unaligned start) */ |
| |
| OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16); |
| |
| #if PCRE2_CODE_UNIT_WIDTH != 32 |
| |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); |
| instruction[1] = (sljit_u16)(char1 | bit); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| if (char1 != char2) |
| { |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); |
| instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); |
| /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| #else /* PCRE2_CODE_UNIT_WIDTH == 32 */ |
| |
| for (int i = 0; i < 2; i++) |
| { |
| replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3); |
| |
| if (char1 != char2) |
| replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3); |
| } |
| |
| #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ |
| |
| if (compare_type == vector_compare_match2) |
| { |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); |
| instruction[1] = 0; |
| instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); |
| OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); |
| |
| if (compare_type != vector_compare_match2) |
| { |
| if (compare_type == vector_compare_match1i) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFEE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| else |
| { |
| for (i = 0; i < 3; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFENE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| /* VLGVB */ |
| instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); |
| instruction[1] = 7; |
| instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); |
| quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); |
| |
| OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16); |
| |
| /* Second part (aligned) */ |
| start = LABEL(); |
| |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16); |
| |
| add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
| |
| load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); |
| |
| if (compare_type != vector_compare_match2) |
| { |
| if (compare_type == vector_compare_match1i) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFEE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| else |
| { |
| for (i = 0; i < 3; i++) |
| fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); |
| |
| /* VFENE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); |
| instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); |
| JUMPTO(SLJIT_OVERFLOW, start); |
| |
| /* VLGVB */ |
| instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); |
| instruction[1] = 7; |
| instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); |
| |
| JUMPHERE(quit); |
| add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); |
| |
| return not_found; |
| } |
| |
| #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 |
| |
| static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, |
| PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) |
| { |
| DEFINE_COMPILER; |
| sljit_u16 instruction[3]; |
| struct sljit_label *start; |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| struct sljit_label *restart; |
| #endif |
| struct sljit_jump *quit; |
| struct sljit_jump *jump[2]; |
| vector_compare_type compare1_type = vector_compare_match1; |
| vector_compare_type compare2_type = vector_compare_match1; |
| sljit_u32 bit1 = 0; |
| sljit_u32 bit2 = 0; |
| sljit_s32 diff = IN_UCHARS(offs2 - offs1); |
| sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); |
| sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); |
| sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); |
| sljit_s32 data1_ind = 0; |
| sljit_s32 data2_ind = 1; |
| sljit_s32 tmp1_ind = 2; |
| sljit_s32 tmp2_ind = 3; |
| sljit_s32 cmp1a_ind = 4; |
| sljit_s32 cmp1b_ind = 5; |
| sljit_s32 cmp2a_ind = 6; |
| sljit_s32 cmp2b_ind = 7; |
| sljit_s32 zero_ind = 8; |
| int i; |
| |
| SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); |
| SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset())); |
| SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0); |
| |
| if (char1a != char1b) |
| { |
| bit1 = char1a ^ char1b; |
| compare1_type = vector_compare_match1i; |
| |
| if (!is_powerof2(bit1)) |
| { |
| bit1 = 0; |
| compare1_type = vector_compare_match2; |
| } |
| } |
| |
| if (char2a != char2b) |
| { |
| bit2 = char2a ^ char2b; |
| compare2_type = vector_compare_match1i; |
| |
| if (!is_powerof2(bit2)) |
| { |
| bit2 = 0; |
| compare2_type = vector_compare_match2; |
| } |
| } |
| |
| /* Initialize. */ |
| if (common->match_end_ptr != 0) |
| { |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
| OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
| OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); |
| |
| OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, STR_END, 0); |
| CMOV(SLJIT_LESS, STR_END, TMP1, 0); |
| } |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); |
| |
| #if PCRE2_CODE_UNIT_WIDTH != 32 |
| |
| OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); |
| |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4)); |
| instruction[1] = (sljit_u16)(char1a | bit1); |
| instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| if (char1a != char1b) |
| { |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4)); |
| instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b); |
| /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4)); |
| instruction[1] = (sljit_u16)(char2a | bit2); |
| /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| if (char2a != char2b) |
| { |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4)); |
| instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b); |
| /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ |
| sljit_emit_op_custom(compiler, instruction, 6); |
| } |
| |
| #else /* PCRE2_CODE_UNIT_WIDTH == 32 */ |
| |
| for (int i = 0; i < 2; i++) |
| { |
| replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1); |
| |
| if (char1a != char1b) |
| replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1); |
| |
| replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1); |
| |
| if (char2a != char2b) |
| replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1); |
| } |
| |
| OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); |
| |
| #endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ |
| |
| /* VREPI */ |
| instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); |
| instruction[1] = 0; |
| instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| restart = LABEL(); |
| #endif |
| |
| jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); |
| load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0); |
| jump[1] = JUMP(SLJIT_JUMP); |
| JUMPHERE(jump[0]); |
| load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0); |
| JUMPHERE(jump[1]); |
| |
| load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0); |
| OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16); |
| |
| for (i = 0; i < 3; i++) |
| { |
| fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); |
| fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); |
| } |
| |
| /* VN */ |
| instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
| instruction[1] = (sljit_u16)(data2_ind << 12); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x68); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| /* VFENE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
| instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| /* VLGVB */ |
| instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind); |
| instruction[1] = 7; |
| instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
| quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); |
| |
| OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); |
| OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff); |
| |
| /* Main loop. */ |
| start = LABEL(); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0); |
| load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind); |
| |
| for (i = 0; i < 3; i++) |
| { |
| fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); |
| fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); |
| } |
| |
| /* VN */ |
| instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
| instruction[1] = (sljit_u16)(data2_ind << 12); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x68); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| /* VFENE */ |
| instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); |
| instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); |
| instruction[2] = (sljit_u16)((0xe << 8) | 0x81); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| sljit_set_current_flags(compiler, SLJIT_SET_OVERFLOW); |
| JUMPTO(SLJIT_OVERFLOW, start); |
| |
| /* VLGVB */ |
| instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind); |
| instruction[1] = 7; |
| instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); |
| sljit_emit_op_custom(compiler, instruction, 6); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
| |
| JUMPHERE(quit); |
| |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 |
| if (common->utf) |
| { |
| SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); |
| |
| OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); |
| |
| quit = jump_if_utf_char_start(compiler, TMP1); |
| |
| OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
| add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
| |
| /* TMP1 contains diff. */ |
| OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); |
| OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); |
| JUMPTO(SLJIT_JUMP, restart); |
| |
| JUMPHERE(quit); |
| } |
| #endif |
| |
| OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); |
| |
| if (common->match_end_ptr != 0) |
| OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
| } |
| |
| #endif /* SLJIT_CONFIG_S390X */ |
| |
| #endif /* !SUPPORT_VALGRIND */ |