/-- This set of tests is for UTF-8 support, and is relevant only to the 8-bit library. --/ /X(\C{3})/8 X\x{1234} 0: X\x{1234} 1: \x{1234} /X(\C{4})/8 X\x{1234}YZ 0: X\x{1234}Y 1: \x{1234}Y /X\C*/8 XYZabcdce 0: XYZabcdce /X\C*?/8 XYZabcde 0: X /X\C{3,5}/8 Xabcdefg 0: Xabcde X\x{1234} 0: X\x{1234} X\x{1234}YZ 0: X\x{1234}YZ X\x{1234}\x{512} 0: X\x{1234}\x{512} X\x{1234}\x{512}YZ 0: X\x{1234}\x{512} /X\C{3,5}?/8 Xabcdefg 0: Xabc X\x{1234} 0: X\x{1234} X\x{1234}YZ 0: X\x{1234} X\x{1234}\x{512} 0: X\x{1234} /a\Cb/8 aXb 0: aXb a\nb 0: a\x{0a}b /a\C\Cb/8 a\x{100}b 0: a\x{100}b /ab\Cde/8 abXde 0: abXde /a\C\Cb/8 a\x{100}b 0: a\x{100}b ** Failers No match a\x{12257}b No match /[Ã]/8 Failed: invalid UTF-8 string at offset 1 /Ã/8 Failed: invalid UTF-8 string at offset 0 /ÃÃÃxxx/8 Failed: invalid UTF-8 string at offset 0 /ÃÃÃxxx/8?DZSS ------------------------------------------------------------------ Bra \X{c0}\X{c0}\X{c0}xxx Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf no_utf_check First char = \x{c3} Need char = 'x' /badutf/8 \xdf Error -10 (bad UTF-8 string) offset=0 reason=1 \xef Error -10 (bad UTF-8 string) offset=0 reason=2 \xef\x80 Error -10 (bad UTF-8 string) offset=0 reason=1 \xf7 Error -10 (bad UTF-8 string) offset=0 reason=3 \xf7\x80 Error -10 (bad UTF-8 string) offset=0 reason=2 \xf7\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=1 \xfb Error -10 (bad UTF-8 string) offset=0 reason=4 \xfb\x80 Error -10 (bad UTF-8 string) offset=0 reason=3 \xfb\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=2 \xfb\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=1 \xfd Error -10 (bad UTF-8 string) offset=0 reason=5 \xfd\x80 Error -10 (bad UTF-8 string) offset=0 reason=4 \xfd\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=3 \xfd\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=2 \xfd\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=1 \xdf\x7f Error -10 (bad UTF-8 string) offset=0 reason=6 \xef\x7f\x80 Error -10 (bad UTF-8 string) offset=0 reason=6 \xef\x80\x7f Error -10 (bad UTF-8 string) offset=0 reason=7 \xf7\x7f\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=6 \xf7\x80\x7f\x80 Error -10 (bad UTF-8 string) offset=0 reason=7 \xf7\x80\x80\x7f Error -10 (bad UTF-8 string) offset=0 reason=8 \xfb\x7f\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=6 \xfb\x80\x7f\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=7 \xfb\x80\x80\x7f\x80 Error -10 (bad UTF-8 string) offset=0 reason=8 \xfb\x80\x80\x80\x7f Error -10 (bad UTF-8 string) offset=0 reason=9 \xfd\x7f\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=6 \xfd\x80\x7f\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=7 \xfd\x80\x80\x7f\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=8 \xfd\x80\x80\x80\x7f\x80 Error -10 (bad UTF-8 string) offset=0 reason=9 \xfd\x80\x80\x80\x80\x7f Error -10 (bad UTF-8 string) offset=0 reason=10 \xed\xa0\x80 Error -10 (bad UTF-8 string) offset=0 reason=14 \xc0\x8f Error -10 (bad UTF-8 string) offset=0 reason=15 \xe0\x80\x8f Error -10 (bad UTF-8 string) offset=0 reason=16 \xf0\x80\x80\x8f Error -10 (bad UTF-8 string) offset=0 reason=17 \xf8\x80\x80\x80\x8f Error -10 (bad UTF-8 string) offset=0 reason=18 \xfc\x80\x80\x80\x80\x8f Error -10 (bad UTF-8 string) offset=0 reason=19 \x80 Error -10 (bad UTF-8 string) offset=0 reason=20 \xfe Error -10 (bad UTF-8 string) offset=0 reason=21 \xff Error -10 (bad UTF-8 string) offset=0 reason=21 \xef\xb7\x90 Error -10 (bad UTF-8 string) offset=0 reason=22 /badutf/8 \xfb\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=11 \xfd\x80\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=12 \xf7\xbf\xbf\xbf Error -10 (bad UTF-8 string) offset=0 reason=13 /shortutf/8 \P\P\xdf Error -25 (short UTF-8 string) offset=0 reason=1 \P\P\xef Error -25 (short UTF-8 string) offset=0 reason=2 \P\P\xef\x80 Error -25 (short UTF-8 string) offset=0 reason=1 \P\P\xf7 Error -25 (short UTF-8 string) offset=0 reason=3 \P\P\xf7\x80 Error -25 (short UTF-8 string) offset=0 reason=2 \P\P\xf7\x80\x80 Error -25 (short UTF-8 string) offset=0 reason=1 \P\P\xfb Error -25 (short UTF-8 string) offset=0 reason=4 \P\P\xfb\x80 Error -25 (short UTF-8 string) offset=0 reason=3 \P\P\xfb\x80\x80 Error -25 (short UTF-8 string) offset=0 reason=2 \P\P\xfb\x80\x80\x80 Error -25 (short UTF-8 string) offset=0 reason=1 \P\P\xfd Error -25 (short UTF-8 string) offset=0 reason=5 \P\P\xfd\x80 Error -25 (short UTF-8 string) offset=0 reason=4 \P\P\xfd\x80\x80 Error -25 (short UTF-8 string) offset=0 reason=3 \P\P\xfd\x80\x80\x80 Error -25 (short UTF-8 string) offset=0 reason=2 \P\P\xfd\x80\x80\x80\x80 Error -25 (short UTF-8 string) offset=0 reason=1 /anything/8 \xc0\x80 Error -10 (bad UTF-8 string) offset=0 reason=15 \xc1\x8f Error -10 (bad UTF-8 string) offset=0 reason=15 \xe0\x9f\x80 Error -10 (bad UTF-8 string) offset=0 reason=16 \xf0\x8f\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=17 \xf8\x87\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=18 \xfc\x83\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=19 \xfe\x80\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=21 \xff\x80\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=21 \xc3\x8f No match \xe0\xaf\x80 No match \xe1\x80\x80 No match \xf0\x9f\x80\x80 No match \xf1\x8f\x80\x80 No match \xf8\x88\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=11 \xf9\x87\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=11 \xfc\x84\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=12 \xfd\x83\x80\x80\x80\x80 Error -10 (bad UTF-8 string) offset=0 reason=12 \?\xf8\x88\x80\x80\x80 No match \?\xf9\x87\x80\x80\x80 No match \?\xfc\x84\x80\x80\x80\x80 No match \?\xfd\x83\x80\x80\x80\x80 No match /noncharacter/8 \x{fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{1fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{1ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{2fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{2ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{3fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{3ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{4fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{4ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{5fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{5ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{6fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{6ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{7fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{7ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{8fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{8ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{9fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{9ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{afffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{affff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{bfffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{bffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{cfffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{cffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{dfffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{dffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{efffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{effff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{ffffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{10fffe} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{10ffff} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd0} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd1} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd2} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd3} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd4} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd5} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd6} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd7} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd8} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdd9} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdda} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fddb} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fddc} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fddd} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdde} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fddf} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde0} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde1} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde2} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde3} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde4} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde5} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde6} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde7} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde8} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fde9} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdea} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdeb} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdec} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fded} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdee} Error -10 (bad UTF-8 string) offset=0 reason=22 \x{fdef} Error -10 (bad UTF-8 string) offset=0 reason=22 /\x{100}/8DZ ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = \x{80} /\x{1000}/8DZ ------------------------------------------------------------------ Bra \x{1000} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{e1} Need char = \x{80} /\x{10000}/8DZ ------------------------------------------------------------------ Bra \x{10000} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{f0} Need char = \x{80} /\x{100000}/8DZ ------------------------------------------------------------------ Bra \x{100000} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{f4} Need char = \x{80} /\x{10ffff}/8DZ ------------------------------------------------------------------ Bra \x{10ffff} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{f4} Need char = \x{bf} /[\x{ff}]/8DZ ------------------------------------------------------------------ Bra \x{ff} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c3} Need char = \x{bf} /[\x{100}]/8DZ ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = \x{80} /\x80/8DZ ------------------------------------------------------------------ Bra \x{80} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c2} Need char = \x{80} /\xff/8DZ ------------------------------------------------------------------ Bra \x{ff} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c3} Need char = \x{bf} /\x{D55c}\x{ad6d}\x{C5B4}/DZ8 ------------------------------------------------------------------ Bra \x{d55c}\x{ad6d}\x{c5b4} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{ed} Need char = \x{b4} \x{D55c}\x{ad6d}\x{C5B4} 0: \x{d55c}\x{ad6d}\x{c5b4} /\x{65e5}\x{672c}\x{8a9e}/DZ8 ------------------------------------------------------------------ Bra \x{65e5}\x{672c}\x{8a9e} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{e6} Need char = \x{9e} \x{65e5}\x{672c}\x{8a9e} 0: \x{65e5}\x{672c}\x{8a9e} /\x{80}/DZ8 ------------------------------------------------------------------ Bra \x{80} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c2} Need char = \x{80} /\x{084}/DZ8 ------------------------------------------------------------------ Bra \x{84} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c2} Need char = \x{84} /\x{104}/DZ8 ------------------------------------------------------------------ Bra \x{104} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = \x{84} /\x{861}/DZ8 ------------------------------------------------------------------ Bra \x{861} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{e0} Need char = \x{a1} /\x{212ab}/DZ8 ------------------------------------------------------------------ Bra \x{212ab} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{f0} Need char = \x{ab} /-- This one is here not because it's different to Perl, but because the way the captured single-byte is displayed. (In Perl it becomes a character, and you can't tell the difference.) --/ /X(\C)(.*)/8 X\x{1234} 0: X\x{1234} 1: \x{e1} 2: \x{88}\x{b4} X\nabc 0: X\x{0a}abc 1: \x{0a} 2: abc /-- This one is here because Perl gives out a grumbly error message (quite correctly, but that messes up comparisons). --/ /a\Cb/8 *** Failers No match a\x{100}b No match /[^ab\xC0-\xF0]/8SDZ ------------------------------------------------------------------ Bra [\x00-`c-\xbf\xf1-\xff] (neg) Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a \x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff \x{f1} 0: \x{f1} \x{bf} 0: \x{bf} \x{100} 0: \x{100} \x{1000} 0: \x{1000} *** Failers 0: * \x{c0} No match \x{f0} No match /Ä€{3,4}/8SDZ ------------------------------------------------------------------ Bra \x{100}{3} \x{100}? Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = \x{80} Subject length lower bound = 3 No set of starting bytes \x{100}\x{100}\x{100}\x{100\x{100} 0: \x{100}\x{100}\x{100} /(\x{100}+|x)/8SDZ ------------------------------------------------------------------ Bra CBra 1 \x{100}+ Alt x Ket Ket End ------------------------------------------------------------------ Capturing subpattern count = 1 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: x \xc4 /(\x{100}*a|x)/8SDZ ------------------------------------------------------------------ Bra CBra 1 \x{100}*+ a Alt x Ket Ket End ------------------------------------------------------------------ Capturing subpattern count = 1 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: a x \xc4 /(\x{100}{0,2}a|x)/8SDZ ------------------------------------------------------------------ Bra CBra 1 \x{100}{0,2} a Alt x Ket Ket End ------------------------------------------------------------------ Capturing subpattern count = 1 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: a x \xc4 /(\x{100}{1,2}a|x)/8SDZ ------------------------------------------------------------------ Bra CBra 1 \x{100} \x{100}{0,1} a Alt x Ket Ket End ------------------------------------------------------------------ Capturing subpattern count = 1 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: x \xc4 /\x{100}/8DZ ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = \x{80} /a\x{100}\x{101}*/8DZ ------------------------------------------------------------------ Bra a\x{100} \x{101}* Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = 'a' Need char = \x{80} /a\x{100}\x{101}+/8DZ ------------------------------------------------------------------ Bra a\x{100} \x{101}+ Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = 'a' Need char = \x{81} /[^\x{c4}]/DZ ------------------------------------------------------------------ Bra [^\x{c4}] Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 No options No first char No need char /[\x{100}]/8DZ ------------------------------------------------------------------ Bra \x{100} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = \x{80} \x{100} 0: \x{100} Z\x{100} 0: \x{100} \x{100}Z 0: \x{100} *** Failers No match /[\xff]/DZ8 ------------------------------------------------------------------ Bra \x{ff} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c3} Need char = \x{bf} >\x{ff}< 0: \x{ff} /[^\xff]/8DZ ------------------------------------------------------------------ Bra [^\x{ff}] Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf No first char No need char /\x{100}abc(xyz(?1))/8DZ ------------------------------------------------------------------ Bra \x{100}abc CBra 1 xyz Recurse Ket Ket End ------------------------------------------------------------------ Capturing subpattern count = 1 Options: utf First char = \x{c4} Need char = 'z' /a\x{1234}b/P8 a\x{1234}b 0: a\x{1234}b /\777/8I Capturing subpattern count = 0 Options: utf First char = \x{c7} Need char = \x{bf} \x{1ff} 0: \x{1ff} \777 0: \x{1ff} /\x{100}+\x{200}/8DZ ------------------------------------------------------------------ Bra \x{100}++ \x{200} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = \x{80} /\x{100}+X/8DZ ------------------------------------------------------------------ Bra \x{100}++ X Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c4} Need char = 'X' /^[\QÄ€\E-\QÅ\E/BZ8 Failed: missing terminating ] for character class at offset 15 /-- This tests the stricter UTF-8 check according to RFC 3629. --/ /X/8 \x{0}\x{d7ff}\x{e000}\x{10ffff} Error -10 (bad UTF-8 string) offset=7 reason=22 \x{d800} Error -10 (bad UTF-8 string) offset=0 reason=14 \x{d800}\? No match \x{da00} Error -10 (bad UTF-8 string) offset=0 reason=14 \x{da00}\? No match \x{dfff} Error -10 (bad UTF-8 string) offset=0 reason=14 \x{dfff}\? No match \x{110000} Error -10 (bad UTF-8 string) offset=0 reason=13 \x{110000}\? No match \x{2000000} Error -10 (bad UTF-8 string) offset=0 reason=11 \x{2000000}\? No match \x{7fffffff} Error -10 (bad UTF-8 string) offset=0 reason=12 \x{7fffffff}\? No match /(*UTF8)\x{1234}/ abcd\x{1234}pqr 0: \x{1234} /(*CRLF)(*UTF)(*BSR_UNICODE)a\Rb/I Capturing subpattern count = 0 Options: bsr_unicode utf Forced newline sequence: CRLF First char = 'a' Need char = 'b' /\h/SI8 Capturing subpattern count = 0 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: \x09 \x20 \xc2 \xe1 \xe2 \xe3 ABC\x{09} 0: \x{09} ABC\x{20} 0: ABC\x{a0} 0: \x{a0} ABC\x{1680} 0: \x{1680} ABC\x{180e} 0: \x{180e} ABC\x{2000} 0: \x{2000} ABC\x{202f} 0: \x{202f} ABC\x{205f} 0: \x{205f} ABC\x{3000} 0: \x{3000} /\v/SI8 Capturing subpattern count = 0 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 ABC\x{0a} 0: \x{0a} ABC\x{0b} 0: \x{0b} ABC\x{0c} 0: \x{0c} ABC\x{0d} 0: \x{0d} ABC\x{85} 0: \x{85} ABC\x{2028} 0: \x{2028} /\h*A/SI8 Capturing subpattern count = 0 Options: utf No first char Need char = 'A' Subject length lower bound = 1 Starting byte set: \x09 \x20 A \xc2 \xe1 \xe2 \xe3 CDBABC 0: A /\v+A/SI8 Capturing subpattern count = 0 Options: utf No first char Need char = 'A' Subject length lower bound = 2 Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 /\s?xxx\s/8SI Capturing subpattern count = 0 Options: utf No first char Need char = 'x' Subject length lower bound = 4 Starting byte set: \x09 \x0a \x0c \x0d \x20 x /\sxxx\s/I8ST1 Capturing subpattern count = 0 Options: utf No first char Need char = 'x' Subject length lower bound = 5 Starting byte set: \x09 \x0a \x0c \x0d \x20 \xc2 AB\x{85}xxx\x{a0}XYZ 0: \x{85}xxx\x{a0} AB\x{a0}xxx\x{85}XYZ 0: \x{a0}xxx\x{85} /\S \S/I8ST1 Capturing subpattern count = 0 Options: utf No first char Need char = ' ' Subject length lower bound = 3 Starting byte set: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0b \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e \x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff \x{a2} \x{84} 0: \x{a2} \x{84} A Z 0: A Z /a+/8 a\x{123}aa\>1 0: aa a\x{123}aa\>2 Error -11 (bad UTF-8 offset) a\x{123}aa\>3 0: aa a\x{123}aa\>4 0: a a\x{123}aa\>5 No match a\x{123}aa\>6 Error -24 (bad offset value) /\x{1234}+/iS8I Capturing subpattern count = 0 Options: caseless utf No first char No need char Subject length lower bound = 1 Starting byte set: \xe1 /\x{1234}+?/iS8I Capturing subpattern count = 0 Options: caseless utf No first char No need char Subject length lower bound = 1 Starting byte set: \xe1 /\x{1234}++/iS8I Capturing subpattern count = 0 Options: caseless utf No first char No need char Subject length lower bound = 1 Starting byte set: \xe1 /\x{1234}{2}/iS8I Capturing subpattern count = 0 Options: caseless utf No first char No need char Subject length lower bound = 2 Starting byte set: \xe1 /[^\x{c4}]/8DZ ------------------------------------------------------------------ Bra [^\x{c4}] Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf No first char No need char /X+\x{200}/8DZ ------------------------------------------------------------------ Bra X++ \x{200} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = 'X' Need char = \x{80} /\R/SI8 Capturing subpattern count = 0 Options: utf No first char No need char Subject length lower bound = 1 Starting byte set: \x0a \x0b \x0c \x0d \xc2 \xe2 /\777/8DZ ------------------------------------------------------------------ Bra \x{1ff} Ket End ------------------------------------------------------------------ Capturing subpattern count = 0 Options: utf First char = \x{c7} Need char = \x{bf} /\w+\x{C4}/8BZ ------------------------------------------------------------------ Bra \w++ \x{c4} Ket End ------------------------------------------------------------------ a\x{C4}\x{C4} 0: a\x{c4} /\w+\x{C4}/8BZT1 ------------------------------------------------------------------ Bra \w+ \x{c4} Ket End ------------------------------------------------------------------ a\x{C4}\x{C4} 0: a\x{c4}\x{c4} /\W+\x{C4}/8BZ ------------------------------------------------------------------ Bra \W+ \x{c4} Ket End ------------------------------------------------------------------ !\x{C4} 0: !\x{c4} /\W+\x{C4}/8BZT1 ------------------------------------------------------------------ Bra \W++ \x{c4} Ket End ------------------------------------------------------------------ !\x{C4} 0: !\x{c4} /\W+\x{A1}/8BZ ------------------------------------------------------------------ Bra \W+ \x{a1} Ket End ------------------------------------------------------------------ !\x{A1} 0: !\x{a1} /\W+\x{A1}/8BZT1 ------------------------------------------------------------------ Bra \W+ \x{a1} Ket End ------------------------------------------------------------------ !\x{A1} 0: !\x{a1} /X\s+\x{A0}/8BZ ------------------------------------------------------------------ Bra X \s++ \x{a0} Ket End ------------------------------------------------------------------ X\x20\x{A0}\x{A0} 0: X \x{a0} /X\s+\x{A0}/8BZT1 ------------------------------------------------------------------ Bra X \s+ \x{a0} Ket End ------------------------------------------------------------------ X\x20\x{A0}\x{A0} 0: X \x{a0}\x{a0} /\S+\x{A0}/8BZ ------------------------------------------------------------------ Bra \S+ \x{a0} Ket End ------------------------------------------------------------------ X\x{A0}\x{A0} 0: X\x{a0}\x{a0} /\S+\x{A0}/8BZT1 ------------------------------------------------------------------ Bra \S++ \x{a0} Ket End ------------------------------------------------------------------ X\x{A0}\x{A0} 0: X\x{a0} /\x{a0}+\s!/8BZ ------------------------------------------------------------------ Bra \x{a0}++ \s ! Ket End ------------------------------------------------------------------ \x{a0}\x20! 0: \x{a0} ! /\x{a0}+\s!/8BZT1 ------------------------------------------------------------------ Bra \x{a0}+ \s ! Ket End ------------------------------------------------------------------ \x{a0}\x20! 0: \x{a0} ! /A/8 \x{ff000041} ** Character \x{ff000041} is greater than 0x7fffffff and so cannot be converted to UTF-8 \x{7f000041} Error -10 (bad UTF-8 string) offset=0 reason=12 /-- End of testinput15 --/