8216332: Grapheme regex does not work with emoji sequences

Reviewed-by: rriggs
This commit is contained in:
Naoto Sato 2020-03-12 08:31:26 -07:00
parent add18914fb
commit eeaafbe141
3 changed files with 78 additions and 39 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -71,7 +71,7 @@ final class Grapheme {
int t1 = getGraphemeType(ch1);
if (gb11 && t0 == ZWJ && t1 == EXTENDED_PICTOGRAPHIC) {
gb11 = false;
// continue for gb11
} else if (riCount % 2 == 1 && t0 == RI && t1 == RI) {
// continue for gb12
} else if (rules[t0][t1]) {

View File

@ -0,0 +1,33 @@
#
# Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
#
# This code is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License version 2 only, as
# published by the Free Software Foundation.
#
# This code is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# version 2 for more details (a copy is included in the LICENSE file that
# accompanied this code).
#
# You should have received a copy of the GNU General Public License version
# 2 along with this work; if not, write to the Free Software Foundation,
# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
#
# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
# or visit www.oracle.com if you need additional information or have any
# questions.
#
# test cases for Grapheme support. Format follows GrahemeBreakTest.txt
# from Unicode.
# https://unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.html
# bug 8216332
÷ 1F468 × 1F3FE ÷ 1F468 × 200D × 1F469 × 200D × 1F466 ÷
÷ 1F468 × 200D × 1F469 × 200D × 1F466 × 200d ÷ 0041 ÷
÷ 1F468 × 200D × 1F469 × 200D × 1F466 ÷ 0041 ÷
÷ 0041 ÷ 1F468 × 200D × 1F469 × 200D × 1F466 ÷ 0041 ÷
÷ 1F468 × 200D × 1F3FE × 200D × 1F469 × 1F3FE × 200D × 1F466 ÷ 0041 ÷

View File

@ -36,6 +36,7 @@
* 8151481 4867170 7080302 6728861 6995635 6736245 4916384 6328855 6192895
* 6345469 6988218 6693451 7006761 8140212 8143282 8158482 8176029 8184706
* 8194667 8197462 8184692 8221431 8224789 8228352 8230829 8236034 8235812
* 8216332
*
* @library /test/lib
* @library /lib/testlibrary/java/lang
@ -55,6 +56,8 @@ import java.io.ObjectOutputStream;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
@ -68,6 +71,8 @@ import java.util.regex.Matcher;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Stream;
import jdk.test.lib.RandomFactory;
/**
@ -4790,7 +4795,8 @@ public class RegExTest {
}
private static void grapheme() throws Exception {
Files.lines(UCDFiles.GRAPHEME_BREAK_TEST)
Stream.concat(Files.lines(UCDFiles.GRAPHEME_BREAK_TEST),
Files.lines(Paths.get(System.getProperty("test.src", "."), "GraphemeTestCases.txt")))
.filter( ln -> ln.length() != 0 && !ln.startsWith("#") )
.forEach( ln -> {
ln = ln.replaceAll("\\s+|\\([a-zA-Z]+\\)|\\[[a-zA-Z]]+\\]|#.*", "");