| # This set of tests is for the 16-bit and 32-bit libraries' basic (non-UTF) |
| # features that are not compatible with the 8-bit library, or which give |
| # different output in 16-bit or 32-bit mode. The output for the two widths is |
| # different, so they have separate output files. |
| |
| #forbid_utf |
| #newline_default LF ANY ANYCRLF |
| |
| /[^\x{c4}]/IB |
| |
| /\x{100}/I |
| |
| / (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* # optional leading comment |
| (?: (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | |
| " (?: # opening quote... |
| [^\\\x80-\xff\n\015"] # Anything except backslash and quote |
| | # or |
| \\ [^\x80-\xff] # Escaped something (something != CR) |
| )* " # closing quote |
| ) # initial word |
| (?: (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* \. (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | |
| " (?: # opening quote... |
| [^\\\x80-\xff\n\015"] # Anything except backslash and quote |
| | # or |
| \\ [^\x80-\xff] # Escaped something (something != CR) |
| )* " # closing quote |
| ) )* # further okay, if led by a period |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* @ (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # initial subdomain |
| (?: # |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* \. # if led by a period... |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # ...further okay |
| )* |
| # address |
| | # or |
| (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | |
| " (?: # opening quote... |
| [^\\\x80-\xff\n\015"] # Anything except backslash and quote |
| | # or |
| \\ [^\x80-\xff] # Escaped something (something != CR) |
| )* " # closing quote |
| ) # one word, optionally followed by.... |
| (?: |
| [^()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037] | # atom and space parts, or... |
| \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) | # comments, or... |
| |
| " (?: # opening quote... |
| [^\\\x80-\xff\n\015"] # Anything except backslash and quote |
| | # or |
| \\ [^\x80-\xff] # Escaped something (something != CR) |
| )* " # closing quote |
| # quoted strings |
| )* |
| < (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* # leading < |
| (?: @ (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # initial subdomain |
| (?: # |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* \. # if led by a period... |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # ...further okay |
| )* |
| |
| (?: (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* , (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* @ (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # initial subdomain |
| (?: # |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* \. # if led by a period... |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # ...further okay |
| )* |
| )* # further okay, if led by comma |
| : # closing colon |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* )? # optional route |
| (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | |
| " (?: # opening quote... |
| [^\\\x80-\xff\n\015"] # Anything except backslash and quote |
| | # or |
| \\ [^\x80-\xff] # Escaped something (something != CR) |
| )* " # closing quote |
| ) # initial word |
| (?: (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* \. (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | |
| " (?: # opening quote... |
| [^\\\x80-\xff\n\015"] # Anything except backslash and quote |
| | # or |
| \\ [^\x80-\xff] # Escaped something (something != CR) |
| )* " # closing quote |
| ) )* # further okay, if led by a period |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* @ (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # initial subdomain |
| (?: # |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* \. # if led by a period... |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* (?: |
| [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+ # some number of atom characters... |
| (?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]) # ..not followed by something that could be part of an atom |
| | \[ # [ |
| (?: [^\\\x80-\xff\n\015\[\]] | \\ [^\x80-\xff] )* # stuff |
| \] # ] |
| ) # ...further okay |
| )* |
| # address spec |
| (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* > # trailing > |
| # name and address |
| ) (?: [\040\t] | \( |
| (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] | \( (?: [^\\\x80-\xff\n\015()] | \\ [^\x80-\xff] )* \) )* |
| \) )* # optional trailing comment |
| /Ix |
| |
| /[\h]/B |
| >\x09< |
| |
| /[\h]+/B |
| >\x09\x20\xa0< |
| |
| /[\v]/B |
| |
| /[^\h]/B |
| |
| /\h+/I |
| \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} |
| \x{3001}\x{2fff}\x{200a}\xa0\x{2000} |
| |
| /[\h\x{dc00}]+/IB |
| \x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000} |
| \x{3001}\x{2fff}\x{200a}\xa0\x{2000} |
| |
| /\H+/I |
| \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} |
| \x{2000}\x{200a}\x{1fff}\x{200b} |
| \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} |
| \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} |
| |
| /[\H\x{d800}]+/ |
| \x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f} |
| \x{2000}\x{200a}\x{1fff}\x{200b} |
| \x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060} |
| \xa0\x{3000}\x9f\xa1\x{2fff}\x{3001} |
| |
| /\v+/I |
| \x{2027}\x{2030}\x{2028}\x{2029} |
| \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d |
| |
| /[\v\x{dc00}]+/IB |
| \x{2027}\x{2030}\x{2028}\x{2029} |
| \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d |
| |
| /\V+/I |
| \x{2028}\x{2029}\x{2027}\x{2030} |
| \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 |
| |
| /[\V\x{d800}]+/ |
| \x{2028}\x{2029}\x{2027}\x{2030} |
| \x85\x0a\x0b\x0c\x0d\x09\x0e\x84\x86 |
| |
| /\R+/I,bsr=unicode |
| \x{2027}\x{2030}\x{2028}\x{2029} |
| \x09\x0e\x84\x86\x85\x0a\x0b\x0c\x0d |
| |
| /\x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00}/I |
| \x{d800}\x{d7ff}\x{dc00}\x{dc00}\x{dcff}\x{dd00} |
| |
| /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/B |
| |
| /[^\x{80}][^\x{ff}][^\x{100}][^\x{1000}][^\x{ffff}]/Bi |
| |
| /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/B |
| |
| /[^\x{100}]*[^\x{1000}]+[^\x{ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{100}]{5,6}+/Bi |
| |
| /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF)XX/mark |
| XX |
| |
| /(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDE)XX/mark |
| XX |
| |
| /\u0100/B,alt_bsux,allow_empty_class,match_unset_backref |
| |
| /[\u0100-\u0200]/B,alt_bsux,allow_empty_class,match_unset_backref |
| |
| /\ud800/B,alt_bsux,allow_empty_class,match_unset_backref |
| |
| /^\x{ffff}+/i |
| \x{ffff} |
| |
| /^\x{ffff}?/i |
| \x{ffff} |
| |
| /^\x{ffff}*/i |
| \x{ffff} |
| |
| /^\x{ffff}{3}/i |
| \x{ffff}\x{ffff}\x{ffff} |
| |
| /^\x{ffff}{0,3}/i |
| \x{ffff} |
| |
| /[^\x00-a]{12,}[^b-\xff]*/B |
| |
| /[^\s]*\s* [^\W]+\W+ [^\d]*?\d0 [^\d\w]{4,6}?\w*A/B |
| |
| /a*[b-\x{200}]?a#a*[b-\x{200}]?b#[a-f]*[g-\x{200}]*#[g-\x{200}]*[a-c]*#[g-\x{200}]*[a-h]*/B |
| |
| /^[\x{1234}\x{4321}]{2,4}?/ |
| \x{1234}\x{1234}\x{1234} |
| |
| # Check maximum non-UTF character size for the 16-bit library. |
| |
| /\x{ffff}/ |
| A\x{ffff}B |
| |
| /\x{10000}/ |
| |
| /\o{20000}/ |
| |
| # Check maximum character size for the 32-bit library. These will all give |
| # errors in the 16-bit library. |
| |
| /\x{110000}/ |
| |
| /\x{7fffffff}/ |
| |
| /\x{80000000}/ |
| |
| /\x{ffffffff}/ |
| |
| /\x{100000000}/ |
| |
| /\o{17777777777}/ |
| |
| /\o{20000000000}/ |
| |
| /\o{37777777777}/ |
| |
| /\o{40000000000}/ |
| |
| /\x{7fffffff}\x{7fffffff}/I |
| |
| /\x{80000000}\x{80000000}/I |
| |
| /\x{ffffffff}\x{ffffffff}/I |
| |
| # Non-UTF characters |
| |
| /.{2,3}/ |
| \x{400000}\x{400001}\x{400002}\x{400003} |
| |
| /\x{400000}\x{800000}/IBi |
| |
| # Check character ranges |
| |
| /[\H]/IB |
| |
| /[\V]/IB |
| |
| /(*THEN:\[A]{65501})/expand |
| |
| # We can use pcre2test's utf8_input modifier to create wide pattern characters, |
| # even though this test is run when UTF is not supported. |
| |
| /abý¿¿¿¿¿z/utf8_input |
| abý¿¿¿¿¿z |
| ab\x{7fffffff}z |
| |
| /abÿý¿¿¿¿¿z/utf8_input |
| abÿý¿¿¿¿¿z |
| ab\x{ffffffff}z |
| |
| /abÿAz/utf8_input |
| abÿAz |
| ab\x{80000041}z |
| |
| /(?i:A{1,}\6666666666)/ |
| A\x{1b6}6666666 |
| |
| # End of testinput11 |