4946a162aa
Reviewed-by: joehw
1460 lines
34 KiB
Plaintext
1460 lines
34 KiB
Plaintext
//
|
|
// Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
//
|
|
// This code is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU General Public License version 2 only, as
|
|
// published by the Free Software Foundation.
|
|
//
|
|
// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
// version 2 for more details (a copy is included in the LICENSE file that
|
|
// accompanied this code).
|
|
//
|
|
// You should have received a copy of the GNU General Public License version
|
|
// 2 along with this work; if not, write to the Free Software Foundation,
|
|
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
//
|
|
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
// or visit www.oracle.com if you need additional information or have any
|
|
// questions.
|
|
//
|
|
// --------------------------------------------------------
|
|
// This file contains test cases with supplementary characters for regular expressions.
|
|
// A test case consists of three lines:
|
|
// The first line is a pattern used in the test
|
|
// The second line is the input to search for the pattern in
|
|
// The third line is a concatentation of the match, the number of groups,
|
|
// and the contents of the first four subexpressions.
|
|
// Empty lines and lines beginning with comment slashes are ignored.
|
|
|
|
// Test unsetting of backed off groups
|
|
^(\ud800\udc61)?\ud800\udc61
|
|
\ud800\udc61
|
|
true \ud800\udc61 1
|
|
|
|
^(\ud800\udc61\ud800)?\ud800\udc61\ud800
|
|
\ud800\udc61\ud800
|
|
true \ud800\udc61\ud800 1
|
|
|
|
^(\ud800\udc61\ud800\udc61(\ud800\udc62\ud800\udc62)?)+$
|
|
\ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61\ud800\udc62\ud800\udc62\ud800\udc61\ud800\udc61 2 \ud800\udc61\ud800\udc61 \ud800\udc62\ud800\udc62
|
|
|
|
^(\ud800\udc61\ud800\udc61\ud800(\ud800\udc62\ud800\udc62\ud800)?)+$
|
|
\ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800
|
|
true \ud800\udc61\ud800\udc61\ud800\ud800\udc62\ud800\udc62\ud800\ud800\udc61\ud800\udc61\ud800 2 \ud800\udc61\ud800\udc61\ud800 \ud800\udc62\ud800\udc62\ud800
|
|
|
|
((\ud800\udc61|\ud800\udc62)?\ud800\udc62)+
|
|
\ud800\udc62
|
|
true \ud800\udc62 2 \ud800\udc62
|
|
|
|
((\ud800|\ud800\udc62)?\ud800\udc62)+
|
|
\ud800\udc62
|
|
true \ud800\udc62 2 \ud800\udc62
|
|
|
|
(\ud800\udc61\ud800\udc61\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\udc61
|
|
\ud800\udc61\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61 1
|
|
|
|
(\ud800\udc61\ud800\udc61\ud800\ud800\udc61)?\ud800\udc61\ud800\udc61\ud800\ud800\udc61
|
|
\ud800\udc61\ud800\udc61\ud800\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61\ud800\ud800\udc61 1
|
|
|
|
^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$
|
|
\ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800
|
|
true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800
|
|
|
|
^(\ud800\udc61(\ud800\udc62)?)+$
|
|
\ud800\udc61\ud800\udc62\ud800\udc61
|
|
true \ud800\udc61\ud800\udc62\ud800\udc61 2 \ud800\udc61 \ud800\udc62
|
|
|
|
^(\ud800\udc61\ud800(\ud800\udc62\ud800)?)+$
|
|
\ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800
|
|
true \ud800\udc61\ud800\ud800\udc62\ud800\ud800\udc61\ud800 2 \ud800\udc61\ud800 \ud800\udc62\ud800
|
|
|
|
^(\ud800\udc61(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\udc62\ud800\udc63
|
|
\ud800\udc61\ud800\udc62\ud800\udc63
|
|
true \ud800\udc61\ud800\udc62\ud800\udc63 3
|
|
|
|
^(\ud800\udc61\ud800(\ud800\udc62(\ud800\udc63)?)?)?\ud800\udc61\ud800\ud800\udc62\ud800\udc63
|
|
\ud800\udc61\ud800\ud800\udc62\ud800\udc63
|
|
true \ud800\udc61\ud800\ud800\udc62\ud800\udc63 3
|
|
|
|
^(\ud800\udc61(\ud800\udc02(\ud800\udc63))).*
|
|
\ud800\udc61\ud800\udc02\ud800\udc63
|
|
true \ud800\udc61\ud800\udc02\ud800\udc63 3 \ud800\udc61\ud800\udc02\ud800\udc63 \ud800\udc02\ud800\udc63 \ud800\udc63
|
|
|
|
^(\ud800\udc61(\ud800(\ud800\udc63))).*
|
|
\ud800\udc61\ud800\ud800\udc63
|
|
true \ud800\udc61\ud800\ud800\udc63 3 \ud800\udc61\ud800\ud800\udc63 \ud800\ud800\udc63 \ud800\udc63
|
|
|
|
// Patterns including no surrogates
|
|
(.)([^a])xyz
|
|
\ud801\ud800\udc00xyz
|
|
true \ud801\ud800\udc00xyz 2 \ud801 \ud800\udc00
|
|
|
|
[^a-z]..
|
|
\ud801\ud800\udc00xyz
|
|
true \ud801\ud800\udc00x 0
|
|
|
|
.$
|
|
\ud801\ud800\udc00
|
|
true \ud800\udc00 0
|
|
|
|
.$
|
|
\ud801\udc01\ud800\udc00
|
|
true \ud800\udc00 0
|
|
|
|
.$
|
|
\ud801\udc01\ud800\udc00\udcff
|
|
true \udcff 0
|
|
|
|
[^x-\uffff][^y-\uffff]
|
|
\ud800\udc00pqr
|
|
true \ud800\udc00p 0
|
|
|
|
[^x-\uffff]+
|
|
\ud800\udc00pqrx
|
|
true \ud800\udc00pqr 0
|
|
|
|
/// The following test cases fail due to use of Start rather than
|
|
/// StartS. Disabled for now.
|
|
///[a-\uffff]
|
|
///\ud800\udc00x
|
|
///true x 0
|
|
///
|
|
///[a-\uffff]
|
|
///\ud800\udc00
|
|
///false 0
|
|
|
|
// unpaired surrogate should match
|
|
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
|
|
xxx\udca9\ud83dyyy
|
|
true \udca9 0
|
|
|
|
// surrogates in a supplementary character should not match
|
|
[\x{d800}-\x{dbff}\x{dc00}-\x{dfff}]
|
|
\ud83d\udca9
|
|
false 0
|
|
|
|
// unpaired surrogate should match
|
|
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
|
|
xxx\udca9\ud83dyyy
|
|
true \udca9 0
|
|
|
|
// surrogates part of a supplementary character should not match
|
|
[\p{InHIGH_SURROGATES}\p{InLOW_SURROGATES}]
|
|
\ud83d\udca9
|
|
false 0
|
|
|
|
// low surrogate part of a supplementary character should not match
|
|
[\x{dc00}-\x{dfff}]
|
|
\ud83d\udca9
|
|
false 0
|
|
|
|
// use of x modifier
|
|
\ud800\udc61bc(?x)bl\ud800\udc61h
|
|
\ud800\udc61bcbl\ud800\udc61h
|
|
true \ud800\udc61bcbl\ud800\udc61h 0
|
|
|
|
\ud800\udc61bc(?x) bl\ud800\udc61h
|
|
\ud800\udc61bcbl\ud800\udc61h
|
|
true \ud800\udc61bcbl\ud800\udc61h 0
|
|
|
|
\ud800\udc61bc(?x) bl\ud800\udc61h blech
|
|
\ud800\udc61bcbl\ud800\udc61hblech
|
|
true \ud800\udc61bcbl\ud800\udc61hblech 0
|
|
|
|
\ud800\udc61bc(?x) bl\ud800\udc61h # ignore comment
|
|
\ud800\udc61bcbl\ud800\udc61h
|
|
true \ud800\udc61bcbl\ud800\udc61h 0
|
|
|
|
// Simple alternation
|
|
\ud800\udc61|\ud800\udc62
|
|
\ud800\udc61
|
|
true \ud800\udc61 0
|
|
|
|
\ud800\udc61|\ud800\udc62|\ud800
|
|
\ud800\udc61
|
|
true \ud800\udc61 0
|
|
|
|
\ud800\udc61|\ud800
|
|
\ud800\udc62
|
|
false 0
|
|
|
|
\ud800\udc62|\ud800
|
|
\ud800
|
|
true \ud800 0
|
|
|
|
\ud800\udc61|\ud802\udc02
|
|
z
|
|
false 0
|
|
|
|
\ud800\udc61|\ud802\udc02
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
\ud800\udc61|\ud802\udc02|\ud803\udc03\ud804\udc04
|
|
\ud803\udc03\ud804\udc04
|
|
true \ud803\udc03\ud804\udc04 0
|
|
|
|
\ud800\udc61|\ud800\udc61d
|
|
\ud800\udc61d
|
|
true \ud800\udc61 0
|
|
|
|
z(\ud800\udc61|\ud800\udc61c)\ud802\udc02
|
|
z\ud800\udc61c\ud802\udc02
|
|
true z\ud800\udc61c\ud802\udc02 1 \ud800\udc61c
|
|
|
|
z(\ud800\udc61|\ud800\udc61c|\udc61c)\ud802\udc02
|
|
z\udc61c\ud802\udc02
|
|
true z\udc61c\ud802\udc02 1 \udc61c
|
|
|
|
// Simple codepoint class
|
|
[\ud800\udc61\ud802\udc02c]+
|
|
\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0
|
|
|
|
[\ud800\udc61\ud802\udc02c]+
|
|
\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0
|
|
|
|
[\ud800\udc61\ud802\udc02c\ud800]+
|
|
\ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud802\udc02\ud800\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02 0
|
|
|
|
[\ud800\udc61bc]+
|
|
d\ud800\udc62fg
|
|
false 0
|
|
|
|
[\ud800\udc61bc]+[\ud804\udc04ef]+[\ud807\udc07hi]+
|
|
zzz\ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07zzz
|
|
true \ud800\udc61\ud800\udc61\ud804\udc04\ud804\udc04\ud807\udc07\ud807\udc07 0
|
|
|
|
// Range codepoint class
|
|
[\ud801\udc01-\ud807\udc07]+
|
|
\ud8ff\udcff\ud8ff\udcff\ud8ff\udcff\ud807\udc07\ud807\udc07\ud807\udc07
|
|
true \ud807\udc07\ud807\udc07\ud807\udc07 0
|
|
|
|
[\ud801\udc01-\ud807\udc07]+
|
|
mmm
|
|
false 0
|
|
|
|
[\ud800\udc61-]+
|
|
z\ud800\udc61-9z
|
|
true \ud800\udc61- 0
|
|
|
|
// Negated char class
|
|
[^\ud800\udc61\ud802\udc02c]+
|
|
\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02\ud800\udc61\ud802\udc02
|
|
false 0
|
|
|
|
[^\ud800\udc61\ud802\udc02\ud803\udc03]+
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg
|
|
true \ud804\udc04efg 0
|
|
|
|
[^\ud800\udc61\ud802\udc02\ud803\udc03\ud800]+
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02\ud802\udc02\ud802\udc02\ud803\udc03\ud803\udc03\ud803\udc03\ud804\udc04efg
|
|
true \ud804\udc04efg 0
|
|
|
|
// Making sure a ^ not in first position matches literal ^
|
|
[\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02]
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
[\ud801\udc01\ud802\udc02\ud803\udc03^\ud802\udc02]
|
|
^
|
|
true ^ 0
|
|
|
|
// Class union and intersection
|
|
[\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]]
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]]
|
|
\ud805\udc05
|
|
true \ud805\udc05 0
|
|
|
|
[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
|
|
\ud80c\udc0c
|
|
true \ud80c\udc0c 0
|
|
|
|
[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
|
|
4
|
|
true 4 0
|
|
|
|
[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
|
|
\ud805\udc05
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud804\udc04[0-9][\ud80b\udc0b-\ud80d\udc0d]]
|
|
\ud816\udc16
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]]
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
[[\ud801\udc01-\ud804\udc04][0-9][\ud80b\udc0b-\ud80d\udc0d]]
|
|
\ud81a\udc1a
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
|
|
\ud805\udc05
|
|
true \ud805\udc05 0
|
|
|
|
[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
|
|
\ud808\udc08
|
|
true \ud808\udc08 0
|
|
|
|
[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]]
|
|
\ud80d\udc0d
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03[\ud804\udc04-\ud806\udc06[\ud807\udc07-\ud809\udc09]]\ud80d\udc0d]
|
|
\ud80d\udc0d
|
|
true \ud80d\udc0d 0
|
|
|
|
[\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
|
|
\ud804\udc04
|
|
true \ud804\udc04 0
|
|
|
|
[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
|
|
\ud808\udc08
|
|
true \ud808\udc08 0
|
|
|
|
[\ud800\udc61\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]\ud807\udc07\ud808\udc08\ud809\udc09]
|
|
\ud816\udc16
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud805\udc05
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud81a\udc1a
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud805\udc05
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud81a\udc1a
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03&&\ud804\udc04-\ud806\udc06]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a]
|
|
\ud80d\udc0d
|
|
true \ud80d\udc0d 0
|
|
|
|
[\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud803\udc03]
|
|
\ud80d\udc0d
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud80d\udc0d&&\ud80d\udc0d-\ud81a\udc1a&&\ud801\udc01-\ud81a\udc1a]
|
|
\ud80d\udc0d
|
|
true \ud80d\udc0d 0
|
|
|
|
[[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]]
|
|
\ud80d\udc0d
|
|
true \ud80d\udc0d 0
|
|
|
|
[[\ud801\udc01-\ud80d\udc0d]&&[\ud80d\udc0d-\ud81a\udc1a]]
|
|
\ud81a\udc1a
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud80d\udc0d]&&[^\ud801\udc01-\ud803\udc03]]
|
|
\ud804\udc04
|
|
true \ud804\udc04 0
|
|
|
|
[\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud80d\udc0d&&[^\ud801\udc01-\ud803\udc03]]
|
|
\ud804\udc04
|
|
true \ud804\udc04 0
|
|
|
|
[\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03\ud804\udc04-\ud806\udc06&&[\ud804\udc04-\ud806\udc06]]
|
|
\ud805\udc05
|
|
true \ud805\udc05 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&\ud804\udc04-\ud806\udc06\ud801\udc01-\ud803\udc03]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&[\ud804\udc04-\ud806\udc06][\ud801\udc01-\ud803\udc03]]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03][\ud804\udc04-\ud806\udc06]&&\ud801\udc01\ud802\udc02\ud803\udc03[\ud804\udc04\ud805\udc05\ud806\udc06]]
|
|
\ud805\udc05
|
|
true \ud805\udc05 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]]
|
|
\ud801\udc01
|
|
false 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04]&&[\ud803\udc03-\ud805\udc05]]
|
|
\ud803\udc03
|
|
true \ud803\udc03 0
|
|
|
|
[[\ud801\udc01-\ud803\udc03]&&[\ud802\udc02-\ud804\udc04][\ud803\udc03-\ud805\udc05]&&[\ud815\udc15-\ud81a\udc1a]]
|
|
\ud803\udc03
|
|
false 0
|
|
|
|
[\ud801\udc01\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
[\ud800\udc61\ud802\udc02\ud803\udc03[^\ud802\udc02\ud803\udc03\ud804\udc04]]
|
|
\ud804\udc04
|
|
false 0
|
|
|
|
[\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09]
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
[\ud801\udc01-\ud803\udc03&&\ud801\udc01-\ud804\udc04&&\ud801\udc01-\ud805\udc05\ud807\udc07\ud808\udc08\ud809\udc09]
|
|
\ud807\udc07
|
|
false 0
|
|
|
|
[[\ud801\udc01[\ud802\udc02]]&&[\ud802\udc02[\ud801\udc01]]]
|
|
\ud801\udc01
|
|
true \ud801\udc01 0
|
|
|
|
// Unicode isn't supported in clazz()
|
|
[[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]]
|
|
\ud800\udc61
|
|
true \ud800\udc61 0
|
|
|
|
[[\ud800\udc61]&&[\ud802\udc02][\ud800][\ud800\udc61]&&[^\ud804\udc04]]
|
|
\ud800\udc61
|
|
true \ud800\udc61 0
|
|
|
|
[[\ud800\udc61]&&[b][\ud800][\ud800\udc61]&&[^\ud804\udc04]]
|
|
\ud804\udc04
|
|
false 0
|
|
|
|
[[\ud800\udc61]&&[b][c][\ud800\udc61]&&[^d]]
|
|
d
|
|
false 0
|
|
|
|
[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]]
|
|
\ud800\udc01
|
|
false 0
|
|
|
|
[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]]
|
|
\ud800\udc03
|
|
true \ud800\udc03 0
|
|
|
|
[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]]
|
|
\ud800\udc03
|
|
true \ud800\udc03 0
|
|
|
|
[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03]
|
|
\ud800\udc03
|
|
true \ud800\udc03 0
|
|
|
|
[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&\ud800\udc03]
|
|
\ud800\udc03
|
|
true \ud800\udc03 0
|
|
|
|
[[[\ud800\udc01-\ud800\udc04]&&[\ud800\udc03-\ud800\udc06]]&&[\ud800\udc03]&&\ud800\udc03&&[\ud800\udc03\ud800\udc04\ud800\udc05]]
|
|
\ud800\udc03
|
|
true \ud800\udc03 0
|
|
|
|
[z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]]
|
|
\ud800\udc03
|
|
true \ud800\udc03 0
|
|
|
|
[z[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04]&&[u-z]]
|
|
z
|
|
true z 0
|
|
|
|
[x[\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]]
|
|
z
|
|
false 0
|
|
|
|
[x[[wz]\ud800\udc61b\ud800\udc03&&b\ud800\udc03\ud800\udc04[z]]&&[u-z]]
|
|
z
|
|
true z 0
|
|
|
|
[[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]\ud800\udc61b\ud800\udc03]
|
|
\ud800\udc61
|
|
true \ud800\udc61 0
|
|
|
|
[[\ud800\udc61b\ud800\udc03]&&[\ud800\udc04\ud800\udc05f]xyz[\ud800\udc61b\ud800\udc03]]
|
|
\ud800\udc61
|
|
true \ud800\udc61 0
|
|
|
|
\pL
|
|
\ud800\udc00
|
|
true \ud800\udc00 0
|
|
|
|
\p{IsASCII}
|
|
\ud800\udc00
|
|
false 0
|
|
|
|
\pLbc
|
|
\ud800\udc00bc
|
|
true \ud800\udc00bc 0
|
|
|
|
\ud800\udc61[r\p{InGreek}]c
|
|
\ud800\udc61\u0370c
|
|
true \ud800\udc61\u0370c 0
|
|
|
|
\ud800\udc61\p{InGreek}
|
|
\ud800\udc61\u0370
|
|
true \ud800\udc61\u0370 0
|
|
|
|
\ud800\udc61\P{InGreek}
|
|
\ud800\udc61\u0370
|
|
false 0
|
|
|
|
\ud800\udc61\P{InGreek}
|
|
\ud800\udc61b
|
|
true \ud800\udc61b 0
|
|
|
|
\ud800\udc61{^InGreek}
|
|
-
|
|
error
|
|
|
|
\ud800\udc61\p{^InGreek}
|
|
-
|
|
error
|
|
|
|
\ud800\udc61\P{^InGreek}
|
|
-
|
|
error
|
|
|
|
\ud800\udc61\p{InGreek}
|
|
\ud800\udc61\u0370
|
|
true \ud800\udc61\u0370 0
|
|
|
|
\ud800\udc61[\p{InGreek}]c
|
|
\ud800\udc61\u0370c
|
|
true \ud800\udc61\u0370c 0
|
|
|
|
\ud800\udc61[\P{InGreek}]c
|
|
\ud800\udc61\u0370c
|
|
false 0
|
|
|
|
\ud800\udc61[\P{InGreek}]c
|
|
\ud800\udc61bc
|
|
true \ud800\udc61bc 0
|
|
|
|
\ud800\udc61[{^InGreek}]c
|
|
\ud800\udc61nc
|
|
true \ud800\udc61nc 0
|
|
|
|
\ud800\udc61[{^InGreek}]c
|
|
\ud800\udc61zc
|
|
false 0
|
|
|
|
\ud800\udc61[\p{^InGreek}]c
|
|
-
|
|
error
|
|
|
|
\ud800\udc61[\P{^InGreek}]c
|
|
-
|
|
error
|
|
|
|
\ud800\udc61[\p{InGreek}]
|
|
\ud800\udc61\u0370
|
|
true \ud800\udc61\u0370 0
|
|
|
|
\ud800\udc61[r\p{InGreek}]c
|
|
\ud800\udc61rc
|
|
true \ud800\udc61rc 0
|
|
|
|
\ud800\udc61[\p{InGreek}r]c
|
|
\ud800\udc61rc
|
|
true \ud800\udc61rc 0
|
|
|
|
\ud800\udc61[r\p{InGreek}]c
|
|
\ud800\udc61rc
|
|
true \ud800\udc61rc 0
|
|
|
|
\ud800\udc61[^\p{InGreek}]c
|
|
\ud800\udc61\u0370c
|
|
false 0
|
|
|
|
\ud800\udc61[^\P{InGreek}]c
|
|
\ud800\udc61\u0370c
|
|
true \ud800\udc61\u0370c 0
|
|
|
|
\ud800\udc61[\p{InGreek}&&[^\u0370]]c
|
|
\ud800\udc61\u0370c
|
|
false 0
|
|
|
|
// Test the dot metacharacter
|
|
\ud800\udc61.c.+
|
|
\ud800\udc61#c%&
|
|
true \ud800\udc61#c%& 0
|
|
|
|
\ud800\udc61b.
|
|
\ud800\udc61b\n
|
|
false 0
|
|
|
|
(?s)\ud800\udc61b.
|
|
\ud800\udc61b\n
|
|
true \ud800\udc61b\n 0
|
|
|
|
\ud800\udc61[\p{L}&&[\P{InGreek}]]c
|
|
\ud800\udc61\u6000c
|
|
true \ud800\udc61\u6000c 0
|
|
|
|
\ud800\udc61[\p{L}&&[\P{InGreek}]]c
|
|
\ud800\udc61rc
|
|
true \ud800\udc61rc 0
|
|
|
|
\ud800\udc61[\p{L}&&[\P{InGreek}]]c
|
|
\ud800\udc61\u0370c
|
|
false 0
|
|
|
|
\ud800\udc61\p{InGreek}c
|
|
\ud800\udc61\u0370c
|
|
true \ud800\udc61\u0370c 0
|
|
|
|
\ud800\udc61\p{Sc}
|
|
\ud800\udc61$
|
|
true \ud800\udc61$ 0
|
|
|
|
// Test \p{L}
|
|
\p{L}
|
|
\ud800\udf1e
|
|
true \ud800\udf1e 0
|
|
|
|
^a\p{L}z$
|
|
a\ud800\udf1ez
|
|
true a\ud800\udf1ez 0
|
|
|
|
// Test \P{InDeseret}
|
|
|
|
\ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret}
|
|
\ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00
|
|
true \ud800\udf00\ud800\udf1e\ud800\udf1esupp->\ud900\udc00<-\ud901\udf00 0
|
|
|
|
\ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\P{InDeseret}
|
|
\ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00
|
|
true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud901\udf00 0
|
|
|
|
// Test \p{InDeseret}
|
|
\ud800\udf00\p{L}{2,3}\P{L}*supp->\ud900\udc00<-\p{InDeseret}
|
|
\ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00
|
|
true \ud800\udf00\ud800\udf1e\ud800\udf1e\ud901\udf00supp->\ud900\udc00<-\ud801\udc00 0
|
|
|
|
// Test the word char escape sequence
|
|
\ud800\udc61b\wc
|
|
\ud800\udc61bcc
|
|
true \ud800\udc61bcc 0
|
|
|
|
\ud800\udc61bc[\w]
|
|
\ud800\udc61bcd
|
|
true \ud800\udc61bcd 0
|
|
|
|
\ud800\udc61bc[\sdef]*
|
|
\ud800\udc61bc def
|
|
true \ud800\udc61bc def 0
|
|
|
|
\ud800\udc61bc[\sy-z]*
|
|
\ud800\udc61bc y z
|
|
true \ud800\udc61bc y z 0
|
|
|
|
\ud800\udc01bc[\ud800\udc01-\ud800\udc04\sm-p]*
|
|
\ud800\udc01bc\ud800\udc01\ud800\udc01 mn p
|
|
true \ud800\udc01bc\ud800\udc01\ud800\udc01 mn p 0
|
|
|
|
// Test the whitespace escape sequence
|
|
\ud800\udc61b\s\ud800\udc03
|
|
\ud800\udc61b \ud800\udc03
|
|
true \ud800\udc61b \ud800\udc03 0
|
|
|
|
\s\s\s
|
|
bl\ud800\udc61h err
|
|
false 0
|
|
|
|
\S\S\s
|
|
bl\ud800\udc61h err
|
|
true \ud800\udc61h 0
|
|
|
|
// Test the digit escape sequence
|
|
\ud800\udc61b\d\ud800\udc03
|
|
\ud800\udc61b9\ud800\udc03
|
|
true \ud800\udc61b9\ud800\udc03 0
|
|
|
|
\d\d\d
|
|
bl\ud800\udc61h45
|
|
false 0
|
|
|
|
// Test the caret metacharacter
|
|
^\ud800\udc61bc
|
|
\ud800\udc61bcdef
|
|
true \ud800\udc61bc 0
|
|
|
|
^\ud800\udc61bc
|
|
bcd\ud800\udc61bc
|
|
false 0
|
|
|
|
// Greedy ? metacharacter
|
|
\ud800\udc61?\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc02 0
|
|
|
|
\udc61?\ud800\udc02
|
|
\ud800\udc61\udc61\udc61\ud800\udc02
|
|
true \udc61\ud800\udc02 0
|
|
|
|
\ud800\udc61?\ud800\udc02
|
|
\ud800\udc02
|
|
true \ud800\udc02 0
|
|
|
|
\ud800?\ud800\udc02
|
|
\ud800\udc02
|
|
true \ud800\udc02 0
|
|
|
|
\ud800\udc61?\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc03\ud800\udc03\ud800\udc03
|
|
false 0
|
|
|
|
.?\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc02 0
|
|
|
|
// Reluctant ? metacharacter
|
|
\ud800\udc61??\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc02 0
|
|
|
|
\ud800??\ud800\udc02
|
|
\ud800\ud800\ud8001\ud800\ud800\udc02
|
|
true \ud800\ud800\udc02 0
|
|
|
|
\ud800\udc61??\ud800\udc02
|
|
\ud800\udc02
|
|
true \ud800\udc02 0
|
|
|
|
\ud800??\ud800\udc02
|
|
\ud800\udc02
|
|
true \ud800\udc02 0
|
|
|
|
\ud800\udc61??\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.??\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc02 0
|
|
|
|
// Possessive ? metacharacter
|
|
\ud800\udc61?+\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc02 0
|
|
|
|
\ud800\udc61?+\ud800\udc02
|
|
\ud800\udc02
|
|
true \ud800\udc02 0
|
|
|
|
\ud800\udc61?+\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.?+\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc02 0
|
|
|
|
// Greedy + metacharacter
|
|
\ud800\udc61+\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
|
|
|
|
\udc61+\ud800\udc02
|
|
\ud800\udc61\udc61\udc61\udc61\ud800\udc02
|
|
true \udc61\udc61\udc61\ud800\udc02 0
|
|
|
|
\ud800\udc61+\ud800\udc02
|
|
\ud800\udc02
|
|
false 0
|
|
|
|
\ud800+\ud800\udc02
|
|
\ud800\udc02
|
|
false 0
|
|
|
|
\ud800\udc61+\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.+\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
|
|
|
|
.+\ud800\udc02
|
|
\ud800\udc61\udc61\udc61\udc61\ud800\udc02
|
|
true \ud800\udc61\udc61\udc61\udc61\ud800\udc02 0
|
|
|
|
// Reluctant + metacharacter
|
|
\ud800\udc61+?\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
|
|
|
|
\udc61+?\ud800\udc02
|
|
\udc61\udc61\udc61\udc61\ud800\udc02
|
|
true \udc61\udc61\udc61\udc61\ud800\udc02 0
|
|
|
|
\ud800\udc61+?\ud800\udc02
|
|
\ud800\udc02
|
|
false 0
|
|
|
|
\ud800+?\ud800\udc02
|
|
\ud800\udc02
|
|
false 0
|
|
|
|
\ud800\udc61+?\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.+?\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
|
|
|
|
// Possessive + metacharacter
|
|
\ud800\udc61++\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02 0
|
|
|
|
\ud800\udc61++\ud800\udc02
|
|
\ud800\udc02
|
|
false 0
|
|
|
|
\ud800\udc61++\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.++\ud800\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc02
|
|
false 0
|
|
|
|
// Greedy Repetition
|
|
\ud800\udc61{2,3}
|
|
\ud800\udc61
|
|
false 0
|
|
|
|
\ud800\udc61{2,3}
|
|
\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61 0
|
|
|
|
\ud800\udc61{2,3}
|
|
\ud800\udc61\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61 0
|
|
|
|
\ud800\udc61{2,3}
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61 0
|
|
|
|
\ud800\udc61{3,}
|
|
zzz\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61zzz
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61 0
|
|
|
|
\ud800\udc61{3,}
|
|
zzz\ud800\udc61\ud800\udc61zzz
|
|
false 0
|
|
|
|
// Reluctant Repetition
|
|
\ud800\udc61{2,3}?
|
|
\ud800\udc61
|
|
false 0
|
|
|
|
\ud800\udc61{2,3}?
|
|
\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61 0
|
|
|
|
\ud800\udc61{2,3}?
|
|
\ud800\udc61\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61 0
|
|
|
|
\ud800\udc61{2,3}?
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61
|
|
true \ud800\udc61\ud800\udc61 0
|
|
|
|
// Zero width Positive lookahead
|
|
\ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04)
|
|
zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04
|
|
true \ud800\udc61\ud802\udc02\ud803\udc03 0
|
|
|
|
\ud800\udc61\ud802\udc02\ud803\udc03(?=\ud804\udc04)
|
|
zzz\ud800\udc61\ud802\udc02\ud803\udc03e\ud804\udc04
|
|
false 0
|
|
|
|
\ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04)
|
|
zzz\ud800\udc61\ud802\udc02\ud803\udc03\udcff\ud804\udc04
|
|
true \ud800\udc61\ud802\udc02\ud803\udc03 0
|
|
|
|
\ud800\udc61\ud802\udc02\ud803\udc03(?=\udcff\ud804\udc04)
|
|
zzz\ud800\udc61\ud802\udc02\ud803\udc03\ud8ff\udcff\ud804\udc04
|
|
false 0
|
|
|
|
// Zero width Negative lookahead
|
|
\ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04)
|
|
zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04
|
|
false 0
|
|
|
|
a\ud802\udc02\ud803\udc03(?!\ud804\udc04)
|
|
zza\ud802\udc02\ud803\udc03\udc04\ud804\udc04
|
|
true a\ud802\udc02\ud803\udc03 0
|
|
|
|
\ud800\udc61\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff)
|
|
zz\ud800\udc61\ud802\udc02\ud803\udc03\ud804\udc04\ud8ffX
|
|
false 0
|
|
|
|
a\ud802\udc02\ud803\udc03(?!\ud804\udc04\ud8ff)
|
|
zza\ud802\udc02\ud803\udc03e\ud804\udc04\ud8ff\udcff
|
|
true a\ud802\udc02\ud803\udc03 0
|
|
|
|
// Zero width Positive lookbehind
|
|
(?<=\ud801\udc01\ud802\udc02)\ud803\udc03
|
|
\ud801\udc01\ud802\udc02\ud803\udc03
|
|
true \ud803\udc03 0
|
|
|
|
// Zero width Negative lookbehind
|
|
(?<!\ud801\udc01)\ud802\udc02\ud803\udc03
|
|
###\ud800\udc00\ud802\udc02\ud803\udc03
|
|
true \ud802\udc02\ud803\udc03 0
|
|
|
|
(?<![\ud801\udc01\ud802\udc02])\ud803\udc03.
|
|
\ud801\udc01\ud803\udc03x\ud800\udc00\ud803\udc03y
|
|
true \ud803\udc03y 0
|
|
|
|
(?<!\ud801\udc01)\ud803\udc03
|
|
\ud801\udc01\ud803\udc03
|
|
false 0
|
|
|
|
// Nondeterministic group
|
|
(\ud800\udc61+\ud802)+
|
|
\ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802
|
|
true \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 1 \ud800\udc61\ud802
|
|
|
|
(\ud800\udc61|\ud802)+
|
|
\ud800\ud802\udc61\ud803\ud802\udc61
|
|
false 1
|
|
|
|
// Deterministic group
|
|
(\ud800\udc61\ud802)+
|
|
\ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802
|
|
true \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 1 \ud800\udc61\ud802
|
|
|
|
(\ud800\udc61\ud802)+
|
|
\ud800\udc61ccccd
|
|
false 1
|
|
|
|
(\ud800\udc61\ud802)*
|
|
\ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802
|
|
true \ud800\udc61\ud802\ud800\udc61\ud802\ud800\udc61\ud802 1 \ud800\udc61\ud802
|
|
|
|
(\ud800\udc61b)(cd*)
|
|
zzz\ud800\udc61bczzz
|
|
true \ud800\udc61bc 2 \ud800\udc61b c
|
|
|
|
\ud800\udc61bc(\ud804\udc04)*\ud800\udc61bc
|
|
\ud800\udc61bc\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud800\udc61bc
|
|
true \ud800\udc61bc\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud804\udc04\ud800\udc61bc 1 \ud804\udc04
|
|
|
|
// Back references
|
|
(\ud800\udc61*)\ud802\udc02c\1
|
|
zzz\ud800\udc61\ud800\udc61\ud802\udc02c\ud800\udc61\ud800\udc61zzz
|
|
true \ud800\udc61\ud800\udc61\ud802\udc02c\ud800\udc61\ud800\udc61 1 \ud800\udc61\ud800\udc61
|
|
|
|
(\ud800\udc61*)\ud802\udc02c\1
|
|
zzz\ud800\udc61\ud800\udc61\ud802\udc02c\ud800\udc61zzz
|
|
true \ud800\udc61\ud802\udc02c\ud800\udc61 1 \ud800\udc61
|
|
|
|
(\ud800\udc07\ud800\udc14*)(\ud804\udc04\ud804\udc04e)*(yu)\1\3(vv)
|
|
zzz\ud800\udc07\ud800\udc14\ud800\udc14\ud804\udc04\ud804\udc04e\ud804\udc04\ud804\udc04eyu\ud800\udc07\ud800\udc14\ud800\udc14yuvvzzz
|
|
true \ud800\udc07\ud800\udc14\ud800\udc14\ud804\udc04\ud804\udc04e\ud804\udc04\ud804\udc04eyu\ud800\udc07\ud800\udc14\ud800\udc14yuvv 4 \ud800\udc07\ud800\udc14\ud800\udc14 \ud804\udc04\ud804\udc04e yu vv
|
|
|
|
// Greedy * metacharacter
|
|
\ud800\udc61*\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0
|
|
|
|
\ud800\udc61*\ud802\udc02
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
\ud800\udc61*\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.*\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0
|
|
|
|
// Reluctant * metacharacter
|
|
\ud800\udc61*?\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0
|
|
|
|
\ud800\udc61*?\ud802\udc02
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
\ud800\udc61*?\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.*?\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0
|
|
|
|
// Possessive * metacharacter
|
|
\ud800\udc61*+\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02
|
|
true \ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02 0
|
|
|
|
\ud800\udc61*+\ud802\udc02
|
|
\ud802\udc02
|
|
true \ud802\udc02 0
|
|
|
|
\ud800\udc61*+\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61ccc
|
|
false 0
|
|
|
|
.*+\ud802\udc02
|
|
\ud800\udc61\ud800\udc61\ud800\udc61\ud800\udc61\ud802\udc02
|
|
false 0
|
|
|
|
// Case insensitivity
|
|
(?iu)\ud801\udc00\ud801\udc01\ud801\udc02x
|
|
\ud801\udc28\ud801\udc29\ud801\udc2aX
|
|
true \ud801\udc28\ud801\udc29\ud801\udc2aX 0
|
|
|
|
\ud801\udc00(?iu)\ud801\udc01\ud801\udc02
|
|
\ud801\udc00\ud801\udc29\ud801\udc2a
|
|
true \ud801\udc00\ud801\udc29\ud801\udc2a 0
|
|
|
|
\ud801\udc00(?iu)\ud801\udc01\ud801\udc02
|
|
\ud801\udc28\ud801\udc29\ud801\udc2a
|
|
false 0
|
|
|
|
(?iu)\ud801\udc00[\ud801\udc01\ud801\udc02]+
|
|
\ud801\udc28\ud801\udc29\ud801\udc2a
|
|
true \ud801\udc28\ud801\udc29\ud801\udc2a 0
|
|
|
|
(?iu)[\ud801\udc00-\ud801\udc02]+
|
|
\ud801\udc28\ud801\udc29\ud801\udc2a
|
|
true \ud801\udc28\ud801\udc29\ud801\udc2a 0
|
|
|
|
// Disable metacharacters- test both length <=3 and >3
|
|
// So that the BM optimization is part of test
|
|
\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03
|
|
***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
true ***\ud801\udc01\ud802\udc02\ud800\udc03 0
|
|
|
|
\ud802\udc02l\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03
|
|
\ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
true \ud802\udc02l***\ud801\udc01\ud802\udc02\ud800\udc03 0
|
|
|
|
\Q***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
true ***\ud801\udc01\ud802\udc02\ud800\udc03 0
|
|
|
|
\ud802\udc02l\ud801\udc01h\Q***\E\ud801\udc01\ud802\udc02\ud800\udc03
|
|
\ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0
|
|
|
|
\Q***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
true ***\ud801\udc01\ud802\udc02\ud800\udc03 0
|
|
|
|
\Q*\ud801\udc01\ud802\udc02
|
|
*\ud801\udc01\ud802\udc02
|
|
true *\ud801\udc01\ud802\udc02 0
|
|
|
|
\ud802\udc02l\ud801\udc01h\Q***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
\ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
true \ud802\udc02l\ud801\udc01h***\ud801\udc01\ud802\udc02\ud800\udc03 0
|
|
|
|
\ud802\udc02l\ud801\udc01\Q***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
\ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03
|
|
true \ud802\udc02l\ud801\udc01***\ud801\udc01\ud802\udc02\ud800\udc03 0
|
|
|
|
//Test cases below copied from i18n QE's RegexSupplementaryTests.txt
|
|
\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
|
|
\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
|
|
true \uD800\uDFFF\uD801\uDFF1\uDB00\uDC00 0
|
|
|
|
\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
|
|
\u1000\uD801\uDFF1\uDB00\uDC00
|
|
false 0
|
|
|
|
\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
|
|
\uD800\uDFFF\uFFFF\uDB00\uDC00
|
|
false 0
|
|
|
|
\uD800\uDFFF\uD801\uDFF1\uDB00\uDC00
|
|
\uD800\uDFFF\uD801\uDFF1\uFFFF
|
|
false 0
|
|
|
|
\u1000.\uFFFF
|
|
\u1000\uD800\uDFFF\uFFFF
|
|
true \u1000\uD800\uDFFF\uFFFF 0
|
|
|
|
//=======
|
|
// Ranges
|
|
//=======
|
|
[a-\uD800\uDFFF]
|
|
\uDFFF
|
|
true \uDFFF 0
|
|
|
|
[a-\uD800\uDFFF]
|
|
\uD800
|
|
true \uD800 0
|
|
|
|
[a-\uD800\uDFFF]
|
|
\uD800\uDFFF
|
|
true \uD800\uDFFF 0
|
|
|
|
[\uD800\uDC00-\uDBFF\uDFFF]
|
|
\uDBFF
|
|
false 0
|
|
|
|
[\uD800\uDC00-\uDBFF\uDFFF]
|
|
\uDC00
|
|
false 0
|
|
|
|
[\uD800-\uDFFF]
|
|
\uD800\uDFFF
|
|
false 0
|
|
|
|
[\uD800-\uDFFF]
|
|
\uDFFF\uD800
|
|
true \uDFFF 0
|
|
|
|
foo[^\uD800-\uDFFF]
|
|
foo\uD800\uDFFF
|
|
true foo\uD800\uDFFF 0
|
|
|
|
foo[^\uD800-\uDFFF]
|
|
foo\uDFFF\uD800
|
|
false 0
|
|
|
|
//fo\uD800[\uDC00-\uDFFF]
|
|
|
|
//==================
|
|
// Character Classes
|
|
//==================
|
|
// Simple class
|
|
[ab\uD800\uDFFFcd]at
|
|
\uD800at
|
|
false 0
|
|
|
|
[ab\uD800\uDFFFcd]at
|
|
\uD800\uDFFFat
|
|
true \uD800\uDFFFat 0
|
|
|
|
// Negation
|
|
[^\uD800\uDFFFcd]at
|
|
\uD800at
|
|
true \uD800at 0
|
|
|
|
[^\uD800\uDFFFcd]at
|
|
\uDFFFat
|
|
true \uDFFFat 0
|
|
|
|
// Inclusive range
|
|
[\u0000-\uD800\uDFFF-\uFFFF]
|
|
\uD800\uDFFF
|
|
true \uD800\uDFFF 0
|
|
|
|
// Unions
|
|
[\u0000-\uD800[\uDFFF-\uFFFF]]
|
|
\uD800\uDFFF
|
|
false 0
|
|
|
|
|
|
// Intersection
|
|
[\u0000-\uFFFF&&[\uD800\uDFFF]]
|
|
\uD800\uDFFF
|
|
false 0
|
|
|
|
[\u0000-\uFFFF&&[\uD800\uDFFF]]
|
|
\uD800
|
|
false 0
|
|
|
|
[\u0000-\uFFFF&&[\uDFFF\uD800]]
|
|
\uD800
|
|
true \uD800 0
|
|
|
|
[\u0000-\uFFFF&&[\uDFFF\uD800\uDC00]]
|
|
\uDC00
|
|
false 0
|
|
|
|
[\u0000-\uDFFF&&[\uD800-\uFFFF]]
|
|
\uD800\uDFFF
|
|
false 0
|
|
|
|
[\u0000-\uDFFF&&[\uD800-\uFFFF]]
|
|
\uDFFF\uD800
|
|
true \uDFFF 0
|
|
|
|
// Subtraction
|
|
[\u0000-\uD800\uDFFF&&[^\uD800\uDC00]]
|
|
\uD800
|
|
true \uD800 0
|
|
|
|
[\u0000-\uD800\uDFFF&&[^\uD800\uDC00]]
|
|
\uDC00
|
|
true \uDC00 0
|
|
|
|
[\u0000-\uD800\uDFFF&&[^\uD800\uDC00]]
|
|
\uD800\uDFFF
|
|
true \uD800\uDFFF 0
|
|
|
|
[\u0000-\uD800\uDFFF&&[^\uD800\uDBFF\uDC00]]
|
|
\uD800
|
|
false 0
|
|
|
|
[\u0000-\uD800\uDFFF&&[^\uDC00\uD800\uDBFF]]
|
|
\uD800\uDC00
|
|
true \uD800\uDC00 0
|
|
|
|
// Quantifiers
|
|
a\uD800\uDFFF?
|
|
a\uD800
|
|
true a 0
|
|
|
|
a\uD800\uDFFF?
|
|
a\uDFFF
|
|
true a 0
|
|
|
|
a\uD800\uDFFF?
|
|
a\uD800\uDFFF
|
|
true a\uD800\uDFFF 0
|
|
|
|
a\uDFFF\uD800?
|
|
a\uDFFF
|
|
true a\uDFFF 0
|
|
|
|
a\uDFFF\uD800?
|
|
a\uD800
|
|
false 0
|
|
|
|
\uD800\uDFFF\uDC00?
|
|
\uD800
|
|
false 0
|
|
|
|
\uD800\uDFFF\uDC00?
|
|
\uD800\uDFFF
|
|
true \uD800\uDFFF 0
|
|
|
|
a\uD800\uDFFF??
|
|
a\uDFFF
|
|
true a 0
|
|
|
|
a\uD800\uDFFF*
|
|
a
|
|
true a 0
|
|
|
|
a\uD800\uDFFF*
|
|
a\uD800
|
|
true a 0
|
|
|
|
\uD800\uDFFF*
|
|
\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
|
|
true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
|
|
|
|
\uD800\uDFFF*
|
|
\uD800\uDFFF\uDFFF\uDFFF\uDFFF
|
|
true \uD800\uDFFF 0
|
|
|
|
\uD800*\uDFFF
|
|
\uD800\uDFFF
|
|
false 0
|
|
|
|
a\uD800\uDFFF*
|
|
a\uD800
|
|
true a 0
|
|
|
|
\uDFFF\uD800*
|
|
\uDFFF
|
|
true \uDFFF 0
|
|
|
|
\uDFFF\uD800*
|
|
\uDFFF\uD800\uD800\uD800
|
|
true \uDFFF\uD800\uD800\uD800 0
|
|
|
|
\uD800\uDFFF+
|
|
\uD800\uDFFF\uDFFF\uDFFF
|
|
true \uD800\uDFFF 0
|
|
|
|
\uD800\uDFFF+
|
|
\uD800
|
|
false 0
|
|
|
|
\uD800\uDFFF+
|
|
\uD800\uDFFF
|
|
true \uD800\uDFFF 0
|
|
|
|
\uD800\uDFFF+
|
|
\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
|
|
true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
|
|
|
|
\uDFFF\uD800+
|
|
\uDFFF\uD800\uDFFF\uD800
|
|
false 0
|
|
|
|
\uD800+\uDFFF
|
|
\uD800\uDFFF
|
|
false 0
|
|
|
|
\uD800+\uDFFF
|
|
\uD800
|
|
false 0
|
|
|
|
\uDFFF+\uD800
|
|
\uD800
|
|
false 0
|
|
|
|
\uDFFF+\uD800
|
|
\uDFFF\uD800
|
|
true \uDFFF\uD800 0
|
|
|
|
\uD800\uDFFF{3}
|
|
\uD800\uDFFF\uDFFF\uDFFF
|
|
false 0
|
|
|
|
\uD800\uDFFF{3}
|
|
\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
|
|
true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
|
|
|
|
\uDFFF\uD800{3}
|
|
\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800
|
|
false 0
|
|
|
|
\uDFFF\uD800{3}
|
|
\uDFFF\uD800\uD800\uD800
|
|
true \uDFFF\uD800\uD800\uD800 0
|
|
|
|
\uD800\uDFFF{2,}
|
|
\uD800\uDFFF
|
|
false 0
|
|
|
|
\uD800\uDFFF{2,}
|
|
\uD800\uDFFF\uDFFF
|
|
false 0
|
|
|
|
\uD800\uDFFF{2,}
|
|
\uD800\uDFFF\uD800\uDFFF
|
|
true \uD800\uDFFF\uD800\uDFFF 0
|
|
|
|
\uDFFF\uD800{2,}
|
|
\uDFFF\uD800\uDFFF\uD800
|
|
false 0
|
|
|
|
\uDFFF\uD800{2,}
|
|
\uDFFF\uD800\uD800\uD800
|
|
true \uDFFF\uD800\uD800\uD800 0
|
|
|
|
\uD800\uDFFF{3,4}
|
|
\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
|
|
true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
|
|
|
|
\uD800\uDFFF{3,4}
|
|
\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF\uD800
|
|
true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
|
|
|
|
\uD800\uDFFF{3,4}
|
|
\uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF
|
|
false 0
|
|
|
|
\uDFFF\uD800{3,5}
|
|
\uDFFF\uD800\uD800\uD800\uD800\uD800\uD800\uD800
|
|
true \uDFFF\uD800\uD800\uD800\uD800\uD800 0
|
|
|
|
\uD800\uDFFF{3,5}
|
|
\uD800\uDFFF\uDFFF\uDFFF
|
|
false 0
|
|
|
|
\uD800\uDFFF{3,5}
|
|
\uD800\uDFFF\uD800\uD800\uDFFF\uD800\uDFFF\uD800\uDFFF
|
|
true \uD800\uDFFF\uD800\uDFFF\uD800\uDFFF 0
|
|
|
|
// Groupings
|
|
(\uD800(\uDFFF))
|
|
\uD800\uDFFF
|
|
false 2
|
|
|
|
(\uD800(\uDC00)(\uDFFF))
|
|
\uD800\uDC00\uDFFF
|
|
false 3
|
|
|
|
((\uD800)(\uDFFF))
|
|
\uD800\uDFFF
|
|
false 3
|
|
|
|
(\uD800(\uDFFF)\uDFFF)
|
|
\uD800\uDFFF
|
|
false 2
|
|
|
|
(\uDFFF(\uD800)(\uDBFF))
|
|
\uDFFF\uD800\uDBFF
|
|
true \uDFFF\uD800\uDBFF 3 \uDFFF\uD800\uDBFF \uD800 \uDBFF
|
|
|
|
(\uDFFF(\uD800)(\uDC00))
|
|
\uDFFF\uD800\uDC00
|
|
false 3
|
|
|
|
(\uDFFF\uD800(\uDC00\uDBFF))
|
|
\uDFFF\uD800\uDC00\uDBFF
|
|
false 2
|
|
|
|
(\uD800\uDFFF(\uDBFF)(\uDC00))
|
|
\uD800\uDFFF\uDBFF\uDC00
|
|
false 3
|
|
|
|
(\uD800\uDFFF(\uDBFF\uDC00))
|
|
\uD800\uDFFF\uDBFF\uDC00
|
|
true \uD800\uDFFF\uDBFF\uDC00 2 \uD800\uDFFF\uDBFF\uDC00 \uDBFF\uDC00
|