8013252: Regex Matcher .start and .end should be accessible by group name
8013254: Constructor \w need update to add the support of \p{Join_Control} Added the requested methods and updated the \w constructor Reviewed-by: mchung, alanb
This commit is contained in:
parent
68101a98fd
commit
722199f9b2
@ -25,6 +25,7 @@
|
|||||||
|
|
||||||
package java.util.regex;
|
package java.util.regex;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An engine that performs match operations on a {@link java.lang.CharSequence
|
* An engine that performs match operations on a {@link java.lang.CharSequence
|
||||||
@ -370,11 +371,36 @@ public final class Matcher implements MatchResult {
|
|||||||
public int start(int group) {
|
public int start(int group) {
|
||||||
if (first < 0)
|
if (first < 0)
|
||||||
throw new IllegalStateException("No match available");
|
throw new IllegalStateException("No match available");
|
||||||
if (group > groupCount())
|
if (group < 0 || group > groupCount())
|
||||||
throw new IndexOutOfBoundsException("No group " + group);
|
throw new IndexOutOfBoundsException("No group " + group);
|
||||||
return groups[group * 2];
|
return groups[group * 2];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the start index of the subsequence captured by the given
|
||||||
|
* <a href="Pattern.html#groupname">named-capturing group</a> during the
|
||||||
|
* previous match operation.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* The name of a named-capturing group in this matcher's pattern
|
||||||
|
*
|
||||||
|
* @return The index of the first character captured by the group,
|
||||||
|
* or {@code -1} if the match was successful but the group
|
||||||
|
* itself did not match anything
|
||||||
|
*
|
||||||
|
* @throws IllegalStateException
|
||||||
|
* If no match has yet been attempted,
|
||||||
|
* or if the previous match operation failed
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* If there is no capturing group in the pattern
|
||||||
|
* with the given name
|
||||||
|
* @since 1.8
|
||||||
|
*/
|
||||||
|
public int start(String name) {
|
||||||
|
return groups[getMatchedGroupIndex(name) * 2];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the offset after the last character matched. </p>
|
* Returns the offset after the last character matched. </p>
|
||||||
*
|
*
|
||||||
@ -417,11 +443,36 @@ public final class Matcher implements MatchResult {
|
|||||||
public int end(int group) {
|
public int end(int group) {
|
||||||
if (first < 0)
|
if (first < 0)
|
||||||
throw new IllegalStateException("No match available");
|
throw new IllegalStateException("No match available");
|
||||||
if (group > groupCount())
|
if (group < 0 || group > groupCount())
|
||||||
throw new IndexOutOfBoundsException("No group " + group);
|
throw new IndexOutOfBoundsException("No group " + group);
|
||||||
return groups[group * 2 + 1];
|
return groups[group * 2 + 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the offset after the last character of the subsequence
|
||||||
|
* captured by the given <a href="Pattern.html#groupname">named-capturing
|
||||||
|
* group</a> during the previous match operation.
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* The name of a named-capturing group in this matcher's pattern
|
||||||
|
*
|
||||||
|
* @return The offset after the last character captured by the group,
|
||||||
|
* or {@code -1} if the match was successful
|
||||||
|
* but the group itself did not match anything
|
||||||
|
*
|
||||||
|
* @throws IllegalStateException
|
||||||
|
* If no match has yet been attempted,
|
||||||
|
* or if the previous match operation failed
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* If there is no capturing group in the pattern
|
||||||
|
* with the given name
|
||||||
|
* @since 1.8
|
||||||
|
*/
|
||||||
|
public int end(String name) {
|
||||||
|
return groups[getMatchedGroupIndex(name) * 2 + 1];
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the input subsequence matched by the previous match.
|
* Returns the input subsequence matched by the previous match.
|
||||||
*
|
*
|
||||||
@ -518,13 +569,7 @@ public final class Matcher implements MatchResult {
|
|||||||
* @since 1.7
|
* @since 1.7
|
||||||
*/
|
*/
|
||||||
public String group(String name) {
|
public String group(String name) {
|
||||||
if (name == null)
|
int group = getMatchedGroupIndex(name);
|
||||||
throw new NullPointerException("Null group name");
|
|
||||||
if (first < 0)
|
|
||||||
throw new IllegalStateException("No match found");
|
|
||||||
if (!parentPattern.namedGroups().containsKey(name))
|
|
||||||
throw new IllegalArgumentException("No group with name <" + name + ">");
|
|
||||||
int group = parentPattern.namedGroups().get(name);
|
|
||||||
if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
|
if ((groups[group*2] == -1) || (groups[group*2+1] == -1))
|
||||||
return null;
|
return null;
|
||||||
return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
|
return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString();
|
||||||
@ -1257,4 +1302,17 @@ public final class Matcher implements MatchResult {
|
|||||||
return text.charAt(i);
|
return text.charAt(i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the group index of the matched capturing group.
|
||||||
|
*
|
||||||
|
* @return the index of the named-capturing group
|
||||||
|
*/
|
||||||
|
int getMatchedGroupIndex(String name) {
|
||||||
|
Objects.requireNonNull(name, "Group name");
|
||||||
|
if (first < 0)
|
||||||
|
throw new IllegalStateException("No match found");
|
||||||
|
if (!parentPattern.namedGroups().containsKey(name))
|
||||||
|
throw new IllegalArgumentException("No group with name <" + name + ">");
|
||||||
|
return parentPattern.namedGroups().get(name);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -612,6 +612,7 @@ import java.util.Arrays;
|
|||||||
* <li> White_Space
|
* <li> White_Space
|
||||||
* <li> Digit
|
* <li> Digit
|
||||||
* <li> Hex_Digit
|
* <li> Hex_Digit
|
||||||
|
* <li> Join_Control
|
||||||
* <li> Noncharacter_Code_Point
|
* <li> Noncharacter_Code_Point
|
||||||
* <li> Assigned
|
* <li> Assigned
|
||||||
* </ul>
|
* </ul>
|
||||||
@ -662,7 +663,7 @@ import java.util.Arrays;
|
|||||||
* <tr><td><tt>\S</tt></td>
|
* <tr><td><tt>\S</tt></td>
|
||||||
* <td>A non-whitespace character: <tt>[^\s]</tt></td></tr>
|
* <td>A non-whitespace character: <tt>[^\s]</tt></td></tr>
|
||||||
* <tr><td><tt>\w</tt></td>
|
* <tr><td><tt>\w</tt></td>
|
||||||
* <td>A word character: <tt>[\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}]</tt></td></tr>
|
* <td>A word character: <tt>[\p{Alpha}\p{gc=Mn}\p{gc=Me}\p{gc=Mc}\p{Digit}\p{gc=Pc}\p{IsJoin_Control}]</tt></td></tr>
|
||||||
* <tr><td><tt>\W</tt></td>
|
* <tr><td><tt>\W</tt></td>
|
||||||
* <td>A non-word character: <tt>[^\w]</tt></td></tr>
|
* <td>A non-word character: <tt>[^\w]</tt></td></tr>
|
||||||
* </table>
|
* </table>
|
||||||
|
@ -181,6 +181,7 @@ enum UnicodeProp {
|
|||||||
// \p{gc=Mark}
|
// \p{gc=Mark}
|
||||||
// \p{digit}
|
// \p{digit}
|
||||||
// \p{gc=Connector_Punctuation}
|
// \p{gc=Connector_Punctuation}
|
||||||
|
// \p{Join_Control} 200C..200D
|
||||||
|
|
||||||
public boolean is(int ch) {
|
public boolean is(int ch) {
|
||||||
return ALPHABETIC.is(ch) ||
|
return ALPHABETIC.is(ch) ||
|
||||||
@ -189,7 +190,15 @@ enum UnicodeProp {
|
|||||||
(1 << Character.COMBINING_SPACING_MARK) |
|
(1 << Character.COMBINING_SPACING_MARK) |
|
||||||
(1 << Character.DECIMAL_DIGIT_NUMBER) |
|
(1 << Character.DECIMAL_DIGIT_NUMBER) |
|
||||||
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
||||||
!= 0;
|
!= 0 ||
|
||||||
|
JOIN_CONTROL.is(ch);
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
JOIN_CONTROL {
|
||||||
|
// 200C..200D PropList.txt:Join_Control
|
||||||
|
public boolean is(int ch) {
|
||||||
|
return (ch == 0x200C || ch == 0x200D);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -212,6 +221,7 @@ enum UnicodeProp {
|
|||||||
aliases.put("WHITESPACE", "WHITE_SPACE");
|
aliases.put("WHITESPACE", "WHITE_SPACE");
|
||||||
aliases.put("HEXDIGIT","HEX_DIGIT");
|
aliases.put("HEXDIGIT","HEX_DIGIT");
|
||||||
aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
|
aliases.put("NONCHARACTERCODEPOINT", "NONCHARACTER_CODE_POINT");
|
||||||
|
aliases.put("JOINCONTROL", "JOIN_CONTROL");
|
||||||
}
|
}
|
||||||
|
|
||||||
public static UnicodeProp forName(String propName) {
|
public static UnicodeProp forName(String propName) {
|
||||||
|
@ -125,6 +125,10 @@ final public class POSIX_Unicode {
|
|||||||
return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
|
return (ch & 0xfffe) == 0xfffe || (ch >= 0xfdd0 && ch <= 0xfdef);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isJoinControl(int ch) {
|
||||||
|
return (ch == 0x200C || ch == 0x200D);
|
||||||
|
}
|
||||||
|
|
||||||
// \p{alpha}
|
// \p{alpha}
|
||||||
// \p{gc=Mark}
|
// \p{gc=Mark}
|
||||||
// \p{digit}
|
// \p{digit}
|
||||||
@ -136,6 +140,7 @@ final public class POSIX_Unicode {
|
|||||||
(1 << Character.COMBINING_SPACING_MARK) |
|
(1 << Character.COMBINING_SPACING_MARK) |
|
||||||
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
(1 << Character.CONNECTOR_PUNCTUATION)) >> Character.getType(ch)) & 1)
|
||||||
!= 0 ||
|
!= 0 ||
|
||||||
isDigit(ch);
|
isDigit(ch) ||
|
||||||
|
isJoinControl(ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,7 +33,7 @@
|
|||||||
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
|
* 5013885 5003322 4988891 5098443 5110268 6173522 4829857 5027748 6376940
|
||||||
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
|
* 6358731 6178785 6284152 6231989 6497148 6486934 6233084 6504326 6635133
|
||||||
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
|
* 6350801 6676425 6878475 6919132 6931676 6948903 6990617 7014645 7039066
|
||||||
* 7067045 7014640 7189363 8007395
|
* 7067045 7014640 7189363 8007395 8013252 8013254
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.util.regex.*;
|
import java.util.regex.*;
|
||||||
@ -3390,7 +3390,9 @@ public class RegExTest {
|
|||||||
private static void check(Pattern p, String s, String g, String expected) {
|
private static void check(Pattern p, String s, String g, String expected) {
|
||||||
Matcher m = p.matcher(s);
|
Matcher m = p.matcher(s);
|
||||||
m.find();
|
m.find();
|
||||||
if (!m.group(g).equals(expected))
|
if (!m.group(g).equals(expected) ||
|
||||||
|
s.charAt(m.start(g)) != expected.charAt(0) ||
|
||||||
|
s.charAt(m.end(g) - 1) != expected.charAt(expected.length() - 1))
|
||||||
failCount++;
|
failCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3420,19 +3422,42 @@ public class RegExTest {
|
|||||||
failCount++;
|
failCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void checkExpectedFail(Matcher m, String g) {
|
private static void checkExpectedIAE(Matcher m, String g) {
|
||||||
m.find();
|
m.find();
|
||||||
try {
|
try {
|
||||||
m.group(g);
|
m.group(g);
|
||||||
} catch (IllegalArgumentException iae) {
|
} catch (IllegalArgumentException x) {
|
||||||
//iae.printStackTrace();
|
//iae.printStackTrace();
|
||||||
return;
|
try {
|
||||||
} catch (NullPointerException npe) {
|
m.start(g);
|
||||||
return;
|
} catch (IllegalArgumentException xx) {
|
||||||
|
try {
|
||||||
|
m.start(g);
|
||||||
|
} catch (IllegalArgumentException xxx) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
failCount++;
|
failCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void checkExpectedNPE(Matcher m) {
|
||||||
|
m.find();
|
||||||
|
try {
|
||||||
|
m.group(null);
|
||||||
|
} catch (NullPointerException x) {
|
||||||
|
try {
|
||||||
|
m.start(null);
|
||||||
|
} catch (NullPointerException xx) {
|
||||||
|
try {
|
||||||
|
m.end(null);
|
||||||
|
} catch (NullPointerException xxx) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
failCount++;
|
||||||
|
}
|
||||||
|
|
||||||
private static void namedGroupCaptureTest() throws Exception {
|
private static void namedGroupCaptureTest() throws Exception {
|
||||||
check(Pattern.compile("x+(?<gname>y+)z+"),
|
check(Pattern.compile("x+(?<gname>y+)z+"),
|
||||||
@ -3559,10 +3584,9 @@ public class RegExTest {
|
|||||||
checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
|
checkExpectedFail("(?<6groupnamestartswithdigit>abc)(def)");
|
||||||
checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
|
checkExpectedFail("(?<gname>abc)(def)\\k<gnameX>");
|
||||||
checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
|
checkExpectedFail("(?<gname>abc)(?<gname>def)\\k<gnameX>");
|
||||||
checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
|
checkExpectedIAE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
|
||||||
"gnameX");
|
"gnameX");
|
||||||
checkExpectedFail(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"),
|
checkExpectedNPE(Pattern.compile("(?<gname>abc)(def)").matcher("abcdef"));
|
||||||
null);
|
|
||||||
report("NamedGroupCapture");
|
report("NamedGroupCapture");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3759,6 +3783,7 @@ public class RegExTest {
|
|||||||
Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
|
Matcher spaceP = Pattern.compile("\\p{IsWhiteSpace}").matcher("");
|
||||||
Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
|
Matcher definedP = Pattern.compile("\\p{IsAssigned}").matcher("");
|
||||||
Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
|
Matcher nonCCPP = Pattern.compile("\\p{IsNoncharacterCodePoint}").matcher("");
|
||||||
|
Matcher joinCrtl = Pattern.compile("\\p{IsJoinControl}").matcher("");
|
||||||
|
|
||||||
// javaMethod
|
// javaMethod
|
||||||
Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
|
Matcher lowerJ = Pattern.compile("\\p{javaLowerCase}").matcher("");
|
||||||
@ -3829,7 +3854,8 @@ public class RegExTest {
|
|||||||
Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
|
Character.isIdeographic(cp) != ideogP.reset(str).matches() ||
|
||||||
Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
|
Character.isIdeographic(cp) != ideogJ.reset(str).matches() ||
|
||||||
(Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
|
(Character.UNASSIGNED == type) == definedP.reset(str).matches() ||
|
||||||
POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches())
|
POSIX_Unicode.isNoncharacterCodePoint(cp) != nonCCPP.reset(str).matches() ||
|
||||||
|
POSIX_Unicode.isJoinControl(cp) != joinCrtl.reset(str).matches())
|
||||||
failCount++;
|
failCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user