8035133: Locale matching: Weight q=0 isn't handled correctly

Reviewed-by: okutsu, peytoia
This commit is contained in:
Nishit Jain 2016-08-03 09:37:57 +09:00
parent 6d70a9e52d
commit 55be447abd
2 changed files with 409 additions and 46 deletions

View File

@ -28,16 +28,12 @@ package sun.util.locale;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Locale.*;
import static java.util.Locale.FilteringMode.*;
import static java.util.Locale.LanguageRange.*;
import java.util.Map;
import java.util.Set;
/**
* Implementation for BCP47 Locale matching
@ -111,10 +107,22 @@ public final class LocaleMatcher {
private static List<String> filterBasic(List<LanguageRange> priorityList,
Collection<String> tags) {
int splitIndex = splitRanges(priorityList);
List<LanguageRange> nonZeroRanges;
List<LanguageRange> zeroRanges;
if (splitIndex != -1) {
nonZeroRanges = priorityList.subList(0, splitIndex);
zeroRanges = priorityList.subList(splitIndex, priorityList.size());
} else {
nonZeroRanges = priorityList;
zeroRanges = List.of();
}
List<String> list = new ArrayList<>();
for (LanguageRange lr : priorityList) {
for (LanguageRange lr : nonZeroRanges) {
String range = lr.getRange();
if (range.equals("*")) {
tags = removeTagsMatchingBasicZeroRange(zeroRanges, tags);
return new ArrayList<String>(tags);
} else {
for (String tag : tags) {
@ -122,7 +130,8 @@ public final class LocaleMatcher {
if (tag.startsWith(range)) {
int len = range.length();
if ((tag.length() == len || tag.charAt(len) == '-')
&& !list.contains(tag)) {
&& !list.contains(tag)
&& !shouldIgnoreFilterBasicMatch(zeroRanges, tag)) {
list.add(tag);
}
}
@ -133,12 +142,76 @@ public final class LocaleMatcher {
return list;
}
private static List<String> filterExtended(List<LanguageRange> priorityList,
Collection<String> tags) {
List<String> list = new ArrayList<>();
for (LanguageRange lr : priorityList) {
/**
* Removes the tag(s) which are falling in the basic exclusion range(s) i.e
* range(s) with q=0 and returns the updated collection. If the basic
* language ranges contains '*' as one of its non zero range then instead of
* returning all the tags, remove those which are matching the range with
* quality weight q=0.
*/
private static Collection<String> removeTagsMatchingBasicZeroRange(
List<LanguageRange> zeroRange, Collection<String> tags) {
if (zeroRange.isEmpty()) {
return tags;
}
List<String> matchingTags = new ArrayList<>();
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
if (!shouldIgnoreFilterBasicMatch(zeroRange, tag)) {
matchingTags.add(tag);
}
}
return matchingTags;
}
/**
* The tag which is falling in the basic exclusion range(s) should not
* be considered as the matching tag. Ignores the tag matching with the
* non-zero ranges, if the tag also matches with one of the basic exclusion
* ranges i.e. range(s) having quality weight q=0
*/
private static boolean shouldIgnoreFilterBasicMatch(
List<LanguageRange> zeroRange, String tag) {
if (zeroRange.isEmpty()) {
return false;
}
for (LanguageRange lr : zeroRange) {
String range = lr.getRange();
if (range.equals("*")) {
return true;
}
if (tag.startsWith(range)) {
int len = range.length();
if ((tag.length() == len || tag.charAt(len) == '-')) {
return true;
}
}
}
return false;
}
private static List<String> filterExtended(List<LanguageRange> priorityList,
Collection<String> tags) {
int splitIndex = splitRanges(priorityList);
List<LanguageRange> nonZeroRanges;
List<LanguageRange> zeroRanges;
if (splitIndex != -1) {
nonZeroRanges = priorityList.subList(0, splitIndex);
zeroRanges = priorityList.subList(splitIndex, priorityList.size());
} else {
nonZeroRanges = priorityList;
zeroRanges = List.of();
}
List<String> list = new ArrayList<>();
for (LanguageRange lr : nonZeroRanges) {
String range = lr.getRange();
if (range.equals("*")) {
tags = removeTagsMatchingExtendedZeroRange(zeroRanges, tags);
return new ArrayList<String>(tags);
}
String[] rangeSubtags = range.split("-");
@ -150,33 +223,101 @@ public final class LocaleMatcher {
continue;
}
int rangeIndex = 1;
int tagIndex = 1;
while (rangeIndex < rangeSubtags.length
&& tagIndex < tagSubtags.length) {
if (rangeSubtags[rangeIndex].equals("*")) {
rangeIndex++;
} else if (rangeSubtags[rangeIndex].equals(tagSubtags[tagIndex])) {
rangeIndex++;
tagIndex++;
} else if (tagSubtags[tagIndex].length() == 1
&& !tagSubtags[tagIndex].equals("*")) {
break;
} else {
tagIndex++;
}
}
if (rangeSubtags.length == rangeIndex && !list.contains(tag)) {
list.add(tag);
}
int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
tagSubtags);
if (rangeSubtags.length == rangeIndex && !list.contains(tag)
&& !shouldIgnoreFilterExtendedMatch(zeroRanges, tag)) {
list.add(tag);
}
}
}
return list;
}
/**
* Removes the tag(s) which are falling in the extended exclusion range(s)
* i.e range(s) with q=0 and returns the updated collection. If the extended
* language ranges contains '*' as one of its non zero range then instead of
* returning all the tags, remove those which are matching the range with
* quality weight q=0.
*/
private static Collection<String> removeTagsMatchingExtendedZeroRange(
List<LanguageRange> zeroRange, Collection<String> tags) {
if (zeroRange.isEmpty()) {
return tags;
}
List<String> matchingTags = new ArrayList<>();
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
if (!shouldIgnoreFilterExtendedMatch(zeroRange, tag)) {
matchingTags.add(tag);
}
}
return matchingTags;
}
/**
* The tag which is falling in the extended exclusion range(s) should
* not be considered as the matching tag. Ignores the tag matching with the
* non zero range(s), if the tag also matches with one of the extended
* exclusion range(s) i.e. range(s) having quality weight q=0
*/
private static boolean shouldIgnoreFilterExtendedMatch(
List<LanguageRange> zeroRange, String tag) {
if (zeroRange.isEmpty()) {
return false;
}
String[] tagSubtags = tag.split("-");
for (LanguageRange lr : zeroRange) {
String range = lr.getRange();
if (range.equals("*")) {
return true;
}
String[] rangeSubtags = range.split("-");
if (!rangeSubtags[0].equals(tagSubtags[0])
&& !rangeSubtags[0].equals("*")) {
continue;
}
int rangeIndex = matchFilterExtendedSubtags(rangeSubtags,
tagSubtags);
if (rangeSubtags.length == rangeIndex) {
return true;
}
}
return false;
}
private static int matchFilterExtendedSubtags(String[] rangeSubtags,
String[] tagSubtags) {
int rangeIndex = 1;
int tagIndex = 1;
while (rangeIndex < rangeSubtags.length
&& tagIndex < tagSubtags.length) {
if (rangeSubtags[rangeIndex].equals("*")) {
rangeIndex++;
} else if (rangeSubtags[rangeIndex]
.equals(tagSubtags[tagIndex])) {
rangeIndex++;
tagIndex++;
} else if (tagSubtags[tagIndex].length() == 1
&& !tagSubtags[tagIndex].equals("*")) {
break;
} else {
tagIndex++;
}
}
return rangeIndex;
}
public static Locale lookup(List<LanguageRange> priorityList,
Collection<Locale> locales) {
if (priorityList.isEmpty() || locales.isEmpty()) {
@ -205,7 +346,18 @@ public final class LocaleMatcher {
return null;
}
for (LanguageRange lr : priorityList) {
int splitIndex = splitRanges(priorityList);
List<LanguageRange> nonZeroRanges;
List<LanguageRange> zeroRanges;
if (splitIndex != -1) {
nonZeroRanges = priorityList.subList(0, splitIndex);
zeroRanges = priorityList.subList(splitIndex, priorityList.size());
} else {
nonZeroRanges = priorityList;
zeroRanges = List.of();
}
for (LanguageRange lr : nonZeroRanges) {
String range = lr.getRange();
// Special language range ("*") is ignored in lookup.
@ -217,31 +369,83 @@ public final class LocaleMatcher {
while (rangeForRegex.length() > 0) {
for (String tag : tags) {
tag = tag.toLowerCase(Locale.ROOT);
if (tag.matches(rangeForRegex)) {
if (tag.matches(rangeForRegex)
&& !shouldIgnoreLookupMatch(zeroRanges, tag)) {
return tag;
}
}
// Truncate from the end....
int index = rangeForRegex.lastIndexOf('-');
if (index >= 0) {
rangeForRegex = rangeForRegex.substring(0, index);
// if range ends with an extension key, truncate it.
index = rangeForRegex.lastIndexOf('-');
if (index >= 0 && index == rangeForRegex.length()-2) {
rangeForRegex =
rangeForRegex.substring(0, rangeForRegex.length()-2);
}
} else {
rangeForRegex = "";
}
rangeForRegex = truncateRange(rangeForRegex);
}
}
return null;
}
/**
* The tag which is falling in the exclusion range(s) should not be
* considered as the matching tag. Ignores the tag matching with the
* non zero range(s), if the tag also matches with one of the exclusion
* range(s) i.e. range(s) having quality weight q=0.
*/
private static boolean shouldIgnoreLookupMatch(List<LanguageRange> zeroRange,
String tag) {
for (LanguageRange lr : zeroRange) {
String range = lr.getRange();
// Special language range ("*") is ignored in lookup.
if (range.equals("*")) {
continue;
}
String rangeForRegex = range.replaceAll("\\x2A", "\\\\p{Alnum}*");
while (rangeForRegex.length() > 0) {
if (tag.matches(rangeForRegex)) {
return true;
}
// Truncate from the end....
rangeForRegex = truncateRange(rangeForRegex);
}
}
return false;
}
/* Truncate the range from end during the lookup match */
private static String truncateRange(String rangeForRegex) {
int index = rangeForRegex.lastIndexOf('-');
if (index >= 0) {
rangeForRegex = rangeForRegex.substring(0, index);
// if range ends with an extension key, truncate it.
index = rangeForRegex.lastIndexOf('-');
if (index >= 0 && index == rangeForRegex.length() - 2) {
rangeForRegex
= rangeForRegex.substring(0, rangeForRegex.length() - 2);
}
} else {
rangeForRegex = "";
}
return rangeForRegex;
}
/* Returns the split index of the priority list, if it contains
* language range(s) with quality weight as 0 i.e. q=0, else -1
*/
private static int splitRanges(List<LanguageRange> priorityList) {
int size = priorityList.size();
for (int index = 0; index < size; index++) {
LanguageRange range = priorityList.get(index);
if (range.getWeight() == 0) {
return index;
}
}
return -1; // no q=0 range exists
}
public static List<LanguageRange> parse(String ranges) {
ranges = ranges.replaceAll(" ", "").toLowerCase(Locale.ROOT);
if (ranges.startsWith("accept-language:")) {

View File

@ -0,0 +1,159 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8035133
* @summary Checks that the tags matching the range with quality weight q=0
* e.g. en;q=0 must be elimited and must not be the part of output
*/
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
public class Bug8035133 {
private static boolean err = false;
public static void main(String[] args) {
// checking Locale.lookup with de-ch;q=0
checkLookup("en;q=0.1, *-ch;q=0.5, de-ch;q=0",
"de-ch, en, fr-ch", "fr-CH");
/* checking Locale.lookup with *;q=0 '*' should be ignored
* in lookup
*/
checkLookup("en;q=0.1, *-ch;q=0.5, *;q=0",
"de-ch, en, fr-ch", "de-CH");
// checking Locale.filter with fr-ch;q=0 in BASIC_FILTERING
checkFilter("en;q=0.1, fr-ch;q=0.0, de-ch;q=0.5",
"de-ch, en, fr-ch", "de-CH, en");
// checking Locale.filter with *;q=0 in BASIC_FILTERING
checkFilter("de-ch;q=0.6, *;q=0", "de-ch, fr-ch", "");
// checking Locale.filter with *;q=0 in BASIC_FILTERING
checkFilter("de-ch;q=0.6, de;q=0", "de-ch", "");
// checking Locale.filter with *;q=0.6, en;q=0 in BASIC_FILTERING
checkFilter("*;q=0.6, en;q=0", "de-ch, hi-in, en", "de-CH, hi-IN");
// checking Locale.filter with de-ch;q=0 in EXTENDED_FILTERING
checkFilter("en;q=0.1, *-ch;q=0.5, de-ch;q=0",
"de-ch, en, fr-ch", "fr-CH, en");
/* checking Locale.filter with *-ch;q=0 in EXTENDED_FILTERING which
* must make filter to return "" empty or no match
*/
checkFilter("de-ch;q=0.5, *-ch;q=0", "de-ch, fr-ch", "");
/* checking Locale.filter with *;q=0 in EXTENDED_FILTERING which
* must make filter to return "" empty or no match
*/
checkFilter("*-ch;q=0.5, *;q=0", "de-ch, fr-ch", "");
/* checking Locale.filter with *;q=0.6, *-Latn;q=0 in
* EXTENDED_FILTERING
*/
checkFilter("*;q=0.6, *-Latn;q=0", "de-ch, hi-in, en-Latn",
"de-CH, hi-IN");
if (err) {
throw new RuntimeException("[LocaleMatcher method(s) failed]");
}
}
private static void checkLookup(String ranges, String tags,
String expectedLocale) {
List<Locale.LanguageRange> priorityList = Locale.LanguageRange
.parse(ranges);
List<Locale> localeList = generateLocales(tags);
Locale loc = Locale.lookup(priorityList, localeList);
String actualLocale
= loc.toLanguageTag();
if (!actualLocale.equals(expectedLocale)) {
System.err.println("Locale.lookup failed with ranges: " + ranges
+ " Expected: " + expectedLocale
+ " Actual: " + actualLocale);
err = true;
}
}
private static void checkFilter(String ranges, String tags,
String expectedLocales) {
List<Locale.LanguageRange> priorityList = Locale.LanguageRange
.parse(ranges);
List<Locale> localeList = generateLocales(tags);
String actualLocales = getLocalesAsString(
Locale.filter(priorityList, localeList));
if (!actualLocales.equals(expectedLocales)) {
System.err.println("Locale.filter failed with ranges: " + ranges
+ " Expected: " + expectedLocales
+ " Actual: " + actualLocales);
err = true;
}
}
private static List<Locale> generateLocales(String tags) {
if (tags == null) {
return null;
}
List<Locale> localeList = new ArrayList<>();
if (tags.equals("")) {
return localeList;
}
String[] t = tags.split(", ");
for (String tag : t) {
localeList.add(Locale.forLanguageTag(tag));
}
return localeList;
}
private static String getLocalesAsString(List<Locale> locales) {
StringBuilder sb = new StringBuilder();
Iterator<Locale> itr = locales.iterator();
if (itr.hasNext()) {
sb.append(itr.next().toLanguageTag());
}
while (itr.hasNext()) {
sb.append(", ");
sb.append(itr.next().toLanguageTag());
}
return sb.toString().trim();
}
}