| # These test special UTF and UCP features of DFA matching. The output is |
| # different for the different widths. |
| |
| #subject dfa |
| |
| # ---------------------------------------------------- |
| # These are a selection of the more comprehensive tests that are run for |
| # non-DFA matching. |
| |
| /X/utf |
| XX\x{d800} |
| Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 |
| XX\x{d800}\=offset=3 |
| Error -36 (bad UTF-8 offset) |
| XX\x{d800}\=no_utf_check |
| 0: X |
| XX\x{da00} |
| Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 |
| XX\x{da00}\=no_utf_check |
| 0: X |
| XX\x{dc00} |
| Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 |
| XX\x{dc00}\=no_utf_check |
| 0: X |
| XX\x{de00} |
| Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 |
| XX\x{de00}\=no_utf_check |
| 0: X |
| XX\x{dfff} |
| Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 |
| XX\x{dfff}\=no_utf_check |
| 0: X |
| XX\x{110000} |
| Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2 |
| XX\x{d800}\x{1234} |
| Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2 |
| |
| /badutf/utf |
| X\xdf |
| Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1 |
| XX\xef |
| Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 |
| XXX\xef\x80 |
| Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 |
| X\xf7 |
| Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1 |
| XX\xf7\x80 |
| Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 |
| XXX\xf7\x80\x80 |
| Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3 |
| |
| /shortutf/utf |
| XX\xdf\=ph |
| Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 |
| XX\xef\=ph |
| Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2 |
| XX\xef\x80\=ph |
| Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2 |
| \xf7\=ph |
| Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0 |
| \xf7\x80\=ph |
| Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0 |
| |
| # ---------------------------------------------------- |
| # UCP and casing tests - except for the first two, these will all fail in 8-bit |
| # mode because they are testing UCP without UTF and use characters > 255. |
| |
| /\x{c1}/i,no_start_optimize |
| \= Expect no match |
| \x{e1} |
| No match |
| |
| /\x{c1}+\x{e1}/iB,ucp |
| ------------------------------------------------------------------ |
| Bra |
| /i \x{c1}+ |
| /i \x{e1} |
| Ket |
| End |
| ------------------------------------------------------------------ |
| \x{c1}\x{c1}\x{c1} |
| 0: \xc1\xc1\xc1 |
| 1: \xc1\xc1 |
| \x{e1}\x{e1}\x{e1} |
| 0: \xe1\xe1\xe1 |
| 1: \xe1\xe1 |
| |
| /\x{120}\x{c1}/i,ucp,no_start_optimize |
| Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large |
| \x{121}\x{e1} |
| |
| /\x{120}\x{c1}/i,ucp |
| Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large |
| \x{121}\x{e1} |
| |
| /[^\x{120}]/i,no_start_optimize |
| Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large |
| \x{121} |
| |
| /[^\x{120}]/i,ucp,no_start_optimize |
| Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large |
| \= Expect no match |
| \x{121} |
| |
| /[^\x{120}]/i |
| Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large |
| \x{121} |
| |
| /[^\x{120}]/i,ucp |
| Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large |
| \= Expect no match |
| \x{121} |
| |
| /\x{120}{2}/i,ucp |
| Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large |
| \x{121}\x{121} |
| |
| /[^\x{120}]{2}/i,ucp |
| Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large |
| \= Expect no match |
| \x{121}\x{121} |
| |
| # ---------------------------------------------------- |
| |
| # End of testinput14 |