// // Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it // under the terms of the GNU General Public License version 2 only, as // published by the Free Software Foundation. // // This code is distributed in the hope that it will be useful, but WITHOUT // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // version 2 for more details (a copy is included in the LICENSE file that // accompanied this code). // // You should have received a copy of the GNU General Public License version // 2 along with this work; if not, write to the Free Software Foundation, // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. // // Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA // or visit www.oracle.com if you need additional information or have any // questions. // // -------------------------------------------------------- // This file contains test cases with supplementary characters for regular expressions. // A test case consists of three lines: // The first line is a pattern used in the test // The second line is the input to search for the pattern in // The third line is a concatentation of the match, the number of groups, // and the contents of the first four subexpressions. // Empty lines and lines beginning with comment slashes are ignored. // Test unsetting of backed off groups ^(\ud800\udc61)?\ud800\udc61 \ud800\udc61 true \ud800\udc61 1 ^(\ud800\udc61\ud800)?\ud800\udc61\ud800 \ud800\udc61\ud800 true \ud800\udc61\ud800 1 ^(\ud800\udc61\ud800\udc61(\ud800\udc62\ud800\udc62)?)+$ \ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61 2 \ud800\udc61\ud800\udc61 \ud800\udc62\ud800\udc62 ^(\ud800\udc61\ud800\udc61\ud800(\ud800\udc62\ud800\udc62\ud800)?)+$ \ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800 true \ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800 2 \ud800\udc61\ud800\udc61\ud800 \ud800\udc62\ud800\udc62\ud800 ((\ud800\udc61|\ud800\udc62)?\ud800\udc62)+ \ud800\udc62 true \ud800\udc62 2 \ud800\udc62 ((\ud800|\ud800\udc62)?\ud800\udc62)+ \ud800\udc62 true \ud800\udc62 2 \ud800\udc62 (\ud800\udc61\ud800\udc61\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\udc61 \ud800\udc61\ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61\ud800\udc61 1 (\ud800\udc61\ud800\udc61\ud800\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\ud800\udc61 \ud800\udc61\ud800\udc61\ud800\ud800\udc61 true \ud800\udc61\ud800\udc61\ud800\ud800\udc61 1 ^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$ \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800 ^(\ud800\udc61(\ud800\udc62)?)+$ \ud800\udc61\ud800\udc62\ud800\udc61 true \ud800\udc61\ud800\udc62\ud800\udc61 2 \ud800\udc61 \ud800\udc62 ^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$ \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800 ^(\ud800\udc61(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\udc62\ud800\udc63 \ud800\udc61\ud800\udc62\ud800\udc63 true \ud800\udc61\ud800\udc62\ud800\udc63 3 ^(\ud800\udc61\ud800(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\ud800\udc62\ud800\udc63 \ud800\udc61\ud800\ud800\udc62\ud800\udc63 true \ud800\udc61\ud800\ud800\udc62\ud800\udc63 3 ^(\ud800\udc61(\ud800\udc02(\ud800\udc63))).* \ud800\udc61\ud800\udc02\ud800\udc63 true \ud800\udc61\ud800\udc02\ud800\udc63 3 \ud800\udc61\ud800\udc02\ud800\udc63 \ud800\udc02\ud800\udc63 \ud800\udc63 ^(\ud800\udc61(\ud800(\ud800\udc63))).* \ud800\udc61\ud800\ud800\udc63 true \ud800\udc61\ud800\ud800\udc63 3 \ud800\udc61\ud800\ud800\udc63 \ud800\ud800\udc63 \ud800\udc63 // Patterns including no surrogates (.)([^a])xyz \ud801\ud800\udc00xyz true \ud801\ud800\udc00xyz 2 \ud801 \ud800\udc00 [^a-z].. \ud801\ud800\udc00xyz true \ud801\ud800\udc00x 0 .$ \ud801\ud800\udc00 true \ud800\udc00 0 .$ \ud801\udc01\ud800\udc00 true \ud800\udc00 0 .$ \ud801\udc01\ud800\udc00\udcff true \udcff 0 [^x-\uffff][^y-\uffff] \ud800\udc00pqr true \ud800\udc00p 0 [^x-\uffff]+ \ud800\udc00pqrx true \ud800\udc00pqr 0 /// The following test cases fail due to use of Start rather than /// StartS. Disabled for now. ///[a-\uffff] ///\ud800\udc00x ///true x 0 /// ///[a-\uffff] ///\ud800\udc00 ///false 0 // unpaired surrogate should match [\x{d800}-\x{dbff}\x{dc00}-\x{dfff}] xxx\udca9\ud83dyyy true \udca9 0 // surrogates in a supplementary character should not match [\x{d800}-\x{dbff}\x{dc00}-\x{dfff}] \ud83d\udca9 false 0 // unpaired surrogate should match [\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}] xxx\udca9\ud83dyyy true \udca9 0 // surrogates part of a supplementary character should not match [\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}] \ud83d\udca9 false 0 // low surrogate part of a supplementary character should not match [\x{dc00}-\x{dfff}] \ud83d\udca9 false 0 // use of x modifier \ud800\udc61bc(?x)bl\ud800\udc61h \ud800\udc61bcbl\ud800\udc61h true \ud800\udc61bcbl\ud800\udc61h 0 \ud800\udc61bc(?x) bl\ud800\udc61h \ud800\udc61bcbl\ud800\udc61h true \ud800\udc61bcbl\ud800\udc61h 0 \ud800\udc61bc(?x) bl\ud800\udc61h blech \ud800\udc61bcbl\ud800\udc61hblech true \ud800\udc61bcbl\ud800\udc61hblech 0 \ud800\udc61bc(?x) bl\ud800\udc61h # ignore comment \ud800\udc61bcbl\ud800\udc61h true \ud800\udc61bcbl\ud800\udc61h 0 // Simple alternation \ud800\udc61|\ud800\udc62 \ud800\udc61 true \ud800\udc61 0 \ud800\udc61|\ud800\udc62|\ud800 \ud800\udc61 true \ud800\udc61 0 \ud800\udc61|\ud800 \ud800\udc62 false 0 \ud800\udc62|\ud800 \ud800 true \ud800 0 \ud800\udc61|\ud802\udc02 z false 0 \ud800\udc61|\ud802\udc02 \ud802\udc02 true \ud802\udc02 0 \ud800\udc61|\ud802\udc02|\ud803\udc03\ud804\udc04 \ud803\udc03\ud804\udc04 true \ud803\udc03\ud804\udc04 0 \ud800\udc61|\ud800\udc61d \ud800\udc61d true \ud800\udc61 0 z(\ud800\udc61|\ud800\udc61c)\ud802\udc02 z\ud800\udc61c\ud802\udc02 true z\ud800\udc61c\ud802\udc02 1 \ud800\udc61c z(\ud800\udc61|\ud800\udc61c|\udc61c)\ud802\udc02 z\udc61c\ud802\udc02 true z\udc61c\ud802\udc02 1 \udc61c // Simple codepoint class [\ud800\udc61\ud802\udc02c]+ \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0 [\ud800\udc61\ud802\udc02c]+ \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0 [\ud800\udc61\ud802\udc02c\ud800]+ \ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 true \ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0 [\ud800\udc61bc]+ d\ud800\udc62fg false 0 [\ud800\udc61bc]+[\ud804\udc04ef]+[\ud807\udc07hi]+ zzz\ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07zzz true \ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07 0 // Range codepoint class [\ud801\udc01-\ud807\udc07]+ \ud8ff\udcff\ud8ff\udcff\ud8ff\udcff\ud807\udc07\ud807\udc07\ud807\udc07 true \ud807\udc07\ud807\udc07\ud807\udc07 0 [\ud801\udc01-\ud807\udc07]+ mmm false 0 [\ud800\udc61-]+ z\ud800\udc61-9z true \ud800\udc61- 0 // Negated char class [^\ud800\udc61\ud802\udc02c]+ \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 false 0 [^\ud800\udc61\ud802\udc02\ud803\udc03]+ \ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg true \ud804\udc04efg 0 [^\ud800\udc61\ud802\udc02\ud803\udc03\ud800]+ \ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg true \ud804\udc04efg 0 // Making sure a ^ not in first position matches literal ^ [\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02] \ud802\udc02 true \ud802\udc02 0 [\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02] ^ true ^ 0 // Class union and intersection [\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]] \ud802\udc02 true \ud802\udc02 0 [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]] \ud805\udc05 true \ud805\udc05 0 [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] \ud801\udc01 true \ud801\udc01 0 [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] \ud80c\udc0c true \ud80c\udc0c 0 [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] 4 true 4 0 [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] \ud805\udc05 false 0 [\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]] \ud816\udc16 false 0 [[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]] \ud802\udc02 true \ud802\udc02 0 [[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]] \ud81a\udc1a false 0 [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] \ud801\udc01 true \ud801\udc01 0 [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] \ud805\udc05 true \ud805\udc05 0 [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] \ud808\udc08 true \ud808\udc08 0 [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]] \ud80d\udc0d false 0 [\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]\ud80d\udc0d] \ud80d\udc0d true \ud80d\udc0d 0 [\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] \ud801\udc01 true \ud801\udc01 0 [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] \ud804\udc04 true \ud804\udc04 0 [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] \ud808\udc08 true \ud808\udc08 0 [\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09] \ud816\udc16 false 0 [\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]] \ud801\udc01 false 0 [\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]] \ud805\udc05 false 0 [\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]] \ud81a\udc1a false 0 [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]] \ud801\udc01 false 0 [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]] \ud805\udc05 false 0 [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]] \ud81a\udc1a false 0 [\ud801\udc01-\ud803\udc03&&\ud804\udc04-\ud806\udc06] \ud801\udc01 false 0 [\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a] \ud80d\udc0d true \ud80d\udc0d 0 [\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud803\udc03] \ud80d\udc0d false 0 [\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud81a\udc1a] \ud80d\udc0d true \ud80d\udc0d 0 [[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]] \ud801\udc01 false 0 [[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]] \ud80d\udc0d true \ud80d\udc0d 0 [[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]] \ud81a\udc1a false 0 [[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]] \ud801\udc01 false 0 [[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]] \ud804\udc04 true \ud804\udc04 0 [\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]] \ud801\udc01 false 0 [\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]] \ud804\udc04 true \ud804\udc04 0 [\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]] \ud801\udc01 false 0 [\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]] \ud805\udc05 true \ud805\udc05 0 [[\ud801\udc01-\ud803\udc03]&&\ud804\udc04-\ud806\udc06\ud801\udc01-\ud803\udc03] \ud801\udc01 true \ud801\udc01 0 [[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06][\ud801\udc01-\ud803\udc03]] \ud801\udc01 true \ud801\udc01 0 [[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03] \ud801\udc01 true \ud801\udc01 0 [[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]] \ud805\udc05 true \ud805\udc05 0 [[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]] \ud801\udc01 false 0 [[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]] \ud803\udc03 true \ud803\udc03 0 [[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04][\ud803\udc03-\ud805\udc05]&&[\ud815\udc15-\ud81a\udc1a]] \ud803\udc03 false 0 [\ud801\udc01\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]] \ud801\udc01 true \ud801\udc01 0 [\ud800\udc61\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]] \ud804\udc04 false 0 [\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09] \ud802\udc02 true \ud802\udc02 0 [\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09] \ud807\udc07 false 0 [[\ud801\udc01[\ud802\udc02]]&&[\ud802\udc02[\ud801\udc01]]] \ud801\udc01 true \ud801\udc01 0 // Unicode isn't supported in clazz() [[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]] \ud800\udc61 true \ud800\udc61 0 [[\ud800\udc61]&&[\ud802\udc02][\ud800][\ud800\udc61]&&[^\ud804\udc04]] \ud800\udc61 true \ud800\udc61 0 [[\ud800\udc61]&&[b][\ud800][\ud800\udc61]&&[^\ud804\udc04]] \ud804\udc04 false 0 [[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]] d false 0 [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]] \ud800\udc01 false 0 [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]] \ud800\udc03 true \ud800\udc03 0 [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]] \ud800\udc03 true \ud800\udc03 0 [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03] \ud800\udc03 true \ud800\udc03 0 [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&\ud800\udc03] \ud800\udc03 true \ud800\udc03 0 [[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&[\ud800\udc03\ud800\udc04\ud800\udc05]] \ud800\udc03 true \ud800\udc03 0 [z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]] \ud800\udc03 true \ud800\udc03 0 [z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]&&[u-z]] z true z 0 [x[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]] z false 0 [x[[wz]\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]] z true z 0 [[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]\ud800\udc61b\ud800\udc03] \ud800\udc61 true \ud800\udc61 0 [[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]xyz[\ud800\udc61b\ud800\udc03]] \ud800\udc61 true \ud800\udc61 0 \pL \ud800\udc00 true \ud800\udc00 0 \p{IsASCII} \ud800\udc00 false 0 \pLbc \ud800\udc00bc true \ud800\udc00bc 0 \ud800\udc61[r\p{InGreek}]c \ud800\udc61\u0370c true \ud800\udc61\u0370c 0 \ud800\udc61\p{InGreek} \ud800\udc61\u0370 true \ud800\udc61\u0370 0 \ud800\udc61\P{InGreek} \ud800\udc61\u0370 false 0 \ud800\udc61\P{InGreek} \ud800\udc61b true \ud800\udc61b 0 \ud800\udc61{^InGreek} - error \ud800\udc61\p{^InGreek} - error \ud800\udc61\P{^InGreek} - error \ud800\udc61\p{InGreek} \ud800\udc61\u0370 true \ud800\udc61\u0370 0 \ud800\udc61[\p{InGreek}]c \ud800\udc61\u0370c true \ud800\udc61\u0370c 0 \ud800\udc61[\P{InGreek}]c \ud800\udc61\u0370c false 0 \ud800\udc61[\P{InGreek}]c \ud800\udc61bc true \ud800\udc61bc 0 \ud800\udc61[{^InGreek}]c \ud800\udc61nc true \ud800\udc61nc 0 \ud800\udc61[{^InGreek}]c \ud800\udc61zc false 0 \ud800\udc61[\p{^InGreek}]c - error \ud800\udc61[\P{^InGreek}]c - error \ud800\udc61[\p{InGreek}] \ud800\udc61\u0370 true \ud800\udc61\u0370 0 \ud800\udc61[r\p{InGreek}]c \ud800\udc61rc true \ud800\udc61rc 0 \ud800\udc61[\p{InGreek}r]c \ud800\udc61rc true \ud800\udc61rc 0 \ud800\udc61[r\p{InGreek}]c \ud800\udc61rc true \ud800\udc61rc 0 \ud800\udc61[^\p{InGreek}]c \ud800\udc61\u0370c false 0 \ud800\udc61[^\P{InGreek}]c \ud800\udc61\u0370c true \ud800\udc61\u0370c 0 \ud800\udc61[\p{InGreek}&&[^\u0370]]c \ud800\udc61\u0370c false 0 // Test the dot metacharacter \ud800\udc61.c.+ \ud800\udc61#c%& true \ud800\udc61#c%& 0 \ud800\udc61b. \ud800\udc61b\n false 0 (?s)\ud800\udc61b. \ud800\udc61b\n true \ud800\udc61b\n 0 \ud800\udc61[\p{L}&&[\P{InGreek}]]c \ud800\udc61\u6000c true \ud800\udc61\u6000c 0 \ud800\udc61[\p{L}&&[\P{InGreek}]]c \ud800\udc61rc true \ud800\udc61rc 0 \ud800\udc61[\p{L}&&[\P{InGreek}]]c \ud800\udc61\u0370c false 0 \ud800\udc61\p{InGreek}c \ud800\udc61\u0370c true \ud800\udc61\u0370c 0 \ud800\udc61\p{Sc} \ud800\udc61$ true \ud800\udc61$ 0 // Test \p{L} \p{L} \ud800\udf1e true \ud800\udf1e 0 ^a\p{L}z$ a\ud800\udf1ez true a\ud800\udf1ez 0 // Test \P{InDeseret} \ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret} \ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00 true \ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00 0 \ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret} \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00 true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00 0 // Test \p{InDeseret} \ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\p{InDeseret} \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00 true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00 0 // Test the word char escape sequence \ud800\udc61b\wc \ud800\udc61bcc true \ud800\udc61bcc 0 \ud800\udc61bc[\w] \ud800\udc61bcd true \ud800\udc61bcd 0 \ud800\udc61bc[\sdef]* \ud800\udc61bc def true \ud800\udc61bc def 0 \ud800\udc61bc[\sy-z]* \ud800\udc61bc y z true \ud800\udc61bc y z 0 \ud800\udc01bc[\ud800\udc01-\ud800\udc04\sm-p]* \ud800\udc01bc\ud800\udc01\ud800\udc01 mn p true \ud800\udc01bc\ud800\udc01\ud800\udc01 mn p 0 // Test the whitespace escape sequence \ud800\udc61b\s\ud800\udc03 \ud800\udc61b \ud800\udc03 true \ud800\udc61b \ud800\udc03 0 \s\s\s bl\ud800\udc61h err false 0 \S\S\s bl\ud800\udc61h err true \ud800\udc61h 0 // Test the digit escape sequence \ud800\udc61b\d\ud800\udc03 \ud800\udc61b9\ud800\udc03 true \ud800\udc61b9\ud800\udc03 0 \d\d\d bl\ud800\udc61h45 false 0 // Test the caret metacharacter ^\ud800\udc61bc \ud800\udc61bcdef true \ud800\udc61bc 0 ^\ud800\udc61bc bcd\ud800\udc61bc false 0 // Greedy ? metacharacter \ud800\udc61?\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc02 0 \udc61?\ud800\udc02 \ud800\udc61\udc61\udc61\ud800\udc02 true \udc61\ud800\udc02 0 \ud800\udc61?\ud800\udc02 \ud800\udc02 true \ud800\udc02 0 \ud800?\ud800\udc02 \ud800\udc02 true \ud800\udc02 0 \ud800\udc61?\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc03\ud800\udc03\ud800\udc03 false 0 .?\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc02 0 // Reluctant ? metacharacter \ud800\udc61??\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc02 0 \ud800??\ud800\udc02 \ud800\ud800\ud8001\ud800\ud800\udc02 true \ud800\ud800\udc02 0 \ud800\udc61??\ud800\udc02 \ud800\udc02 true \ud800\udc02 0 \ud800??\ud800\udc02 \ud800\udc02 true \ud800\udc02 0 \ud800\udc61??\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61ccc false 0 .??\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc02 0 // Possessive ? metacharacter \ud800\udc61?+\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc02 0 \ud800\udc61?+\ud800\udc02 \ud800\udc02 true \ud800\udc02 0 \ud800\udc61?+\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61ccc false 0 .?+\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc02 0 // Greedy + metacharacter \ud800\udc61+\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 \udc61+\ud800\udc02 \ud800\udc61\udc61\udc61\udc61\ud800\udc02 true \udc61\udc61\udc61\ud800\udc02 0 \ud800\udc61+\ud800\udc02 \ud800\udc02 false 0 \ud800+\ud800\udc02 \ud800\udc02 false 0 \ud800\udc61+\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61ccc false 0 .+\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 .+\ud800\udc02 \ud800\udc61\udc61\udc61\udc61\ud800\udc02 true \ud800\udc61\udc61\udc61\udc61\ud800\udc02 0 // Reluctant + metacharacter \ud800\udc61+?\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 \udc61+?\ud800\udc02 \udc61\udc61\udc61\udc61\ud800\udc02 true \udc61\udc61\udc61\udc61\ud800\udc02 0 \ud800\udc61+?\ud800\udc02 \ud800\udc02 false 0 \ud800+?\ud800\udc02 \ud800\udc02 false 0 \ud800\udc61+?\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61ccc false 0 .+?\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 // Possessive + metacharacter \ud800\udc61++\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0 \ud800\udc61++\ud800\udc02 \ud800\udc02 false 0 \ud800\udc61++\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61ccc false 0 .++\ud800\udc02 \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 false 0 // Greedy Repetition \ud800\udc61{2,3} \ud800\udc61 false 0 \ud800\udc61{2,3} \ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61 0 \ud800\udc61{2,3} \ud800\udc61\ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61\ud800\udc61 0 \ud800\udc61{2,3} \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61\ud800\udc61 0 \ud800\udc61{3,} zzz\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61zzz true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 0 \ud800\udc61{3,} zzz\ud800\udc61\ud800\udc61zzz false 0 // Reluctant Repetition \ud800\udc61{2,3}? \ud800\udc61 false 0 \ud800\udc61{2,3}? \ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61 0 \ud800\udc61{2,3}? \ud800\udc61\ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61 0 \ud800\udc61{2,3}? \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 true \ud800\udc61\ud800\udc61 0 // Zero width Positive lookahead \ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04) zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04 true \ud800\udc61\ud802\udc02\ud803\udc03 0 \ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04) zzz\ud800\udc61\ud802\udc02\ud803\udc03e\ud804\udc04 false 0 \ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04) zzz\ud800\udc61\ud802\udc02\ud803\udc03\udcff\ud804\udc04 true \ud800\udc61\ud802\udc02\ud803\udc03 0 \ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04) zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud8ff\udcff\ud804\udc04 false 0 // Zero width Negative lookahead \ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04) zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04 false 0 a\ud802\udc02\ud803\udc03(?!\ud804\udc04) zza\ud802\udc02\ud803\udc03\udc04\ud804\udc04 true a\ud802\udc02\ud803\udc03 0 \ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff) zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04\ud8ffX false 0 a\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff) zza\ud802\udc02\ud803\udc03e\ud804\udc04\ud8ff\udcff true a\ud802\udc02\ud803\udc03 0 // Zero width Positive lookbehind (?<=\ud801\udc01\ud802\udc02)\ud803\udc03 \ud801\udc01\ud802\udc02\ud803\udc03 true \ud803\udc03 0 // Zero width Negative lookbehind (?3 // So that the BM optimization is part of test \Q***\E\ud801\udc01\ud802\udc02\ud800\udc03 ***\ud801\udc01\ud802\udc02\ud800\udc03 true ***\ud801\udc01\ud802\udc02\ud800\udc03 0 \ud802\udc02l\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03 \ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03 true \ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03 0 \Q***\ud801\udc01\ud802\udc02\ud800\udc03 ***\ud801\udc01\ud802\udc02\ud800\udc03 true ***\ud801\udc01\ud802\udc02\ud800\udc03 0 \ud802\udc02l\ud801\udc01h\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03 \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0 \Q***\ud801\udc01\ud802\udc02\ud800\udc03 ***\ud801\udc01\ud802\udc02\ud800\udc03 true ***\ud801\udc01\ud802\udc02\ud800\udc03 0 \Q*\ud801\udc01\ud802\udc02 *\ud801\udc01\ud802\udc02 true *\ud801\udc01\ud802\udc02 0 \ud802\udc02l\ud801\udc01h\Q***\ud801\udc01\ud802\udc02\ud800\udc03 \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0 \ud802\udc02l\ud801\udc01\Q***\ud801\udc01\ud802\udc02\ud800\udc03 \ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03 true \ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03 0 //Test cases below copied from i18n QE's RegexSupplementaryTests.txt \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 true \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 0 \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 \u1000\uD801\uDFF1\uDB00\uDC00 false 0 \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 \uD800\uDFFF\uFFFF\uDB00\uDC00 false 0 \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 \uD800\uDFFF\uD801\uDFF1\uFFFF false 0 \u1000.\uFFFF \u1000\uD800\uDFFF\uFFFF true \u1000\uD800\uDFFF\uFFFF 0 //======= // Ranges //======= [a-\uD800\uDFFF] \uDFFF true \uDFFF 0 [a-\uD800\uDFFF] \uD800 true \uD800 0 [a-\uD800\uDFFF] \uD800\uDFFF true \uD800\uDFFF 0 [\uD800\uDC00-\uDBFF\uDFFF] \uDBFF false 0 [\uD800\uDC00-\uDBFF\uDFFF] \uDC00 false 0 [\uD800-\uDFFF] \uD800\uDFFF false 0 [\uD800-\uDFFF] \uDFFF\uD800 true \uDFFF 0 foo[^\uD800-\uDFFF] foo\uD800\uDFFF true foo\uD800\uDFFF 0 foo[^\uD800-\uDFFF] foo\uDFFF\uD800 false 0 //fo\uD800[\uDC00-\uDFFF] //================== // Character Classes //================== // Simple class [ab\uD800\uDFFFcd]at \uD800at false 0 [ab\uD800\uDFFFcd]at \uD800\uDFFFat true \uD800\uDFFFat 0 // Negation [^\uD800\uDFFFcd]at \uD800at true \uD800at 0 [^\uD800\uDFFFcd]at \uDFFFat true \uDFFFat 0 // Inclusive range [\u0000-\uD800\uDFFF-\uFFFF] \uD800\uDFFF true \uD800\uDFFF 0 // Unions [\u0000-\uD800[\uDFFF-\uFFFF]] \uD800\uDFFF false 0 // Intersection [\u0000-\uFFFF&&[\uD800\uDFFF]] \uD800\uDFFF false 0 [\u0000-\uFFFF&&[\uD800\uDFFF]] \uD800 false 0 [\u0000-\uFFFF&&[\uDFFF\uD800]] \uD800 true \uD800 0 [\u0000-\uFFFF&&[\uDFFF\uD800\uDC00]] \uDC00 false 0 [\u0000-\uDFFF&&[\uD800-\uFFFF]] \uD800\uDFFF false 0 [\u0000-\uDFFF&&[\uD800-\uFFFF]] \uDFFF\uD800 true \uDFFF 0 // Subtraction [\u0000-\uD800\uDFFF&&[^\uD800\uDC00]] \uD800 true \uD800 0 [\u0000-\uD800\uDFFF&&[^\uD800\uDC00]] \uDC00 true \uDC00 0 [\u0000-\uD800\uDFFF&&[^\uD800\uDC00]] \uD800\uDFFF true \uD800\uDFFF 0 [\u0000-\uD800\uDFFF&&[^\uD800\uDBFF\uDC00]] \uD800 false 0 [\u0000-\uD800\uDFFF&&[^\uDC00\uD800\uDBFF]] \uD800\uDC00 true \uD800\uDC00 0 // Quantifiers a\uD800\uDFFF? a\uD800 true a 0 a\uD800\uDFFF? a\uDFFF true a 0 a\uD800\uDFFF? a\uD800\uDFFF true a\uD800\uDFFF 0 a\uDFFF\uD800? a\uDFFF true a\uDFFF 0 a\uDFFF\uD800? a\uD800 false 0 \uD800\uDFFF\uDC00? \uD800 false 0 \uD800\uDFFF\uDC00? \uD800\uDFFF true \uD800\uDFFF 0 a\uD800\uDFFF?? a\uDFFF true a 0 a\uD800\uDFFF* a true a 0 a\uD800\uDFFF* a\uD800 true a 0 \uD800\uDFFF* \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 \uD800\uDFFF* \uD800\uDFFF\uDFFF\uDFFF\uDFFF true \uD800\uDFFF 0 \uD800*\uDFFF \uD800\uDFFF false 0 a\uD800\uDFFF* a\uD800 true a 0 \uDFFF\uD800* \uDFFF true \uDFFF 0 \uDFFF\uD800* \uDFFF\uD800\uD800\uD800 true \uDFFF\uD800\uD800\uD800 0 \uD800\uDFFF+ \uD800\uDFFF\uDFFF\uDFFF true \uD800\uDFFF 0 \uD800\uDFFF+ \uD800 false 0 \uD800\uDFFF+ \uD800\uDFFF true \uD800\uDFFF 0 \uD800\uDFFF+ \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 \uDFFF\uD800+ \uDFFF\uD800\uDFFF\uD800 false 0 \uD800+\uDFFF \uD800\uDFFF false 0 \uD800+\uDFFF \uD800 false 0 \uDFFF+\uD800 \uD800 false 0 \uDFFF+\uD800 \uDFFF\uD800 true \uDFFF\uD800 0 \uD800\uDFFF{3} \uD800\uDFFF\uDFFF\uDFFF false 0 \uD800\uDFFF{3} \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 \uDFFF\uD800{3} \uDFFF\uD800\uDFFF\uD800\uDFFF\uD800 false 0 \uDFFF\uD800{3} \uDFFF\uD800\uD800\uD800 true \uDFFF\uD800\uD800\uD800 0 \uD800\uDFFF{2,} \uD800\uDFFF false 0 \uD800\uDFFF{2,} \uD800\uDFFF\uDFFF false 0 \uD800\uDFFF{2,} \uD800\uDFFF\uD800\uDFFF true \uD800\uDFFF\uD800\uDFFF 0 \uDFFF\uD800{2,} \uDFFF\uD800\uDFFF\uD800 false 0 \uDFFF\uD800{2,} \uDFFF\uD800\uD800\uD800 true \uDFFF\uD800\uD800\uD800 0 \uD800\uDFFF{3,4} \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 \uD800\uDFFF{3,4} \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800 true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 \uD800\uDFFF{3,4} \uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF false 0 \uDFFF\uD800{3,5} \uDFFF\uD800\uD800\uD800\uD800\uD800\uD800\uD800 true \uDFFF\uD800\uD800\uD800\uD800\uD800 0 \uD800\uDFFF{3,5} \uD800\uDFFF\uDFFF\uDFFF false 0 \uD800\uDFFF{3,5} \uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0 // Groupings (\uD800(\uDFFF)) \uD800\uDFFF false 2 (\uD800(\uDC00)(\uDFFF)) \uD800\uDC00\uDFFF false 3 ((\uD800)(\uDFFF)) \uD800\uDFFF false 3 (\uD800(\uDFFF)\uDFFF) \uD800\uDFFF false 2 (\uDFFF(\uD800)(\uDBFF)) \uDFFF\uD800\uDBFF true \uDFFF\uD800\uDBFF 3 \uDFFF\uD800\uDBFF \uD800 \uDBFF (\uDFFF(\uD800)(\uDC00)) \uDFFF\uD800\uDC00 false 3 (\uDFFF\uD800(\uDC00\uDBFF)) \uDFFF\uD800\uDC00\uDBFF false 2 (\uD800\uDFFF(\uDBFF)(\uDC00)) \uD800\uDFFF\uDBFF\uDC00 false 3 (\uD800\uDFFF(\uDBFF\uDC00)) \uD800\uDFFF\uDBFF\uDC00 true \uD800\uDFFF\uDBFF\uDC00 2 \uD800\uDFFF\uDBFF\uDC00 \uDBFF\uDC00