8264765: BreakIterator sees bogus sentence boundary in parenthesized “i.e.” phrase
Reviewed-by: joehw
This commit is contained in:
parent
ec31b3a137
commit
9ebc497b53
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2007, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -310,7 +310,7 @@ public class BreakIteratorRules extends ListResourceBundle {
|
||||
// punctuation" and quotation marks
|
||||
+ "<start-punctuation>=[:Ps::Pi:\\\"\\\'];"
|
||||
|
||||
// punctuation with may occur at the end of a sentence: "ending punctuation"
|
||||
// punctuation which may occur at the end of a sentence: "ending punctuation"
|
||||
// and quotation marks
|
||||
+ "<end>=[:Pe::Pf:\\\"\\\'];"
|
||||
|
||||
@ -323,9 +323,12 @@ public class BreakIteratorRules extends ListResourceBundle {
|
||||
// periods, which MAY signal the end of a sentence
|
||||
+ "<period>=[\\.\uff0e];"
|
||||
|
||||
// comma, which may not occur at the start of a sentence
|
||||
+ "<comma>=[\\,];"
|
||||
|
||||
// characters that may occur at the beginning of a sentence: basically anything
|
||||
// not mentioned above (letters and digits are specifically excluded)
|
||||
+ "<sent-start>=[^[:L:<space><start-punctuation><end><digit><term><period>\u2029<ignore>]];"
|
||||
+ "<sent-start>=[^[:L:<space><start-punctuation><end><digit><term><period><comma>\u2029<ignore>]];"
|
||||
|
||||
// Hindi phrase separator
|
||||
+ "<danda>=[\u0964\u0965];"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1996, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -25,7 +25,7 @@
|
||||
* @test
|
||||
* @bug 4035266 4052418 4068133 4068137 4068139 4086052 4095322 4097779
|
||||
* 4097920 4098467 4111338 4113835 4117554 4143071 4146175 4152117
|
||||
* 4152416 4153072 4158381 4214367 4217703 4638433
|
||||
* 4152416 4153072 4158381 4214367 4217703 4638433 8264765
|
||||
* @library /java/text/testlib
|
||||
* @run main/timeout=2000 BreakIteratorTest
|
||||
* @summary test BreakIterator
|
||||
@ -746,6 +746,17 @@ public class BreakIteratorTest extends IntlTest
|
||||
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
||||
}
|
||||
|
||||
public void TestBug8264765() {
|
||||
Vector<String> sentenceSelectionData = new Vector<String>();
|
||||
|
||||
// Comma should not be regarded as the start of a sentence,
|
||||
// otherwise the backwards rule would break the following sentence.
|
||||
sentenceSelectionData.addElement(
|
||||
"Due to a problem (e.g., software bug), the server is down. ");
|
||||
|
||||
generalIteratorTest(sentenceBreak, sentenceSelectionData);
|
||||
}
|
||||
|
||||
public void TestLineBreak() {
|
||||
Vector<String> lineSelectionData = new Vector<String>();
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user