8303056: Improve support for Unicode characters and digits in JavaDoc search
Reviewed-by: jjg
This commit is contained in:
parent
9cf12bb977
commit
52ec4bcb1b
@ -44,6 +44,9 @@ const categories = {
|
|||||||
const highlight = "<span class='result-highlight'>$&</span>";
|
const highlight = "<span class='result-highlight'>$&</span>";
|
||||||
const NO_MATCH = {};
|
const NO_MATCH = {};
|
||||||
const MAX_RESULTS = 300;
|
const MAX_RESULTS = 300;
|
||||||
|
const UNICODE_LETTER = 0;
|
||||||
|
const UNICODE_DIGIT = 1;
|
||||||
|
const UNICODE_OTHER = 2;
|
||||||
function checkUnnamed(name, separator) {
|
function checkUnnamed(name, separator) {
|
||||||
return name === "<Unnamed>" || !name ? "" : name + separator;
|
return name === "<Unnamed>" || !name ? "" : name + separator;
|
||||||
}
|
}
|
||||||
@ -127,13 +130,13 @@ function createMatcher(term, camelCase) {
|
|||||||
var pattern = "";
|
var pattern = "";
|
||||||
var upperCase = [];
|
var upperCase = [];
|
||||||
term.trim().split(/\s+/).forEach(function(w, index, array) {
|
term.trim().split(/\s+/).forEach(function(w, index, array) {
|
||||||
var tokens = w.split(/(?=[A-Z,.()<>?[\/])/);
|
var tokens = w.split(/(?=[\p{Lu},.()<>?[\/])/u);
|
||||||
for (var i = 0; i < tokens.length; i++) {
|
for (var i = 0; i < tokens.length; i++) {
|
||||||
var s = tokens[i];
|
var s = tokens[i];
|
||||||
// ',' and '?' are the only delimiters commonly followed by space in java signatures
|
// ',' and '?' are the only delimiters commonly followed by space in java signatures
|
||||||
pattern += "(" + $.ui.autocomplete.escapeRegex(s).replace(/[,?]/g, "$&\\s*?") + ")";
|
pattern += "(" + escapeUnicodeRegex(s).replace(/[,?]/g, "$&\\s*?") + ")";
|
||||||
upperCase.push(false);
|
upperCase.push(false);
|
||||||
var isWordToken = /\w$/.test(s);
|
var isWordToken = /[\p{L}\p{Nd}_]$/u.test(s);
|
||||||
if (isWordToken) {
|
if (isWordToken) {
|
||||||
if (i === tokens.length - 1 && index < array.length - 1) {
|
if (i === tokens.length - 1 && index < array.length - 1) {
|
||||||
// space in query string matches all delimiters
|
// space in query string matches all delimiters
|
||||||
@ -143,7 +146,7 @@ function createMatcher(term, camelCase) {
|
|||||||
if (!camelCase && isUpperCase(s) && s.length === 1) {
|
if (!camelCase && isUpperCase(s) && s.length === 1) {
|
||||||
pattern += "()";
|
pattern += "()";
|
||||||
} else {
|
} else {
|
||||||
pattern += "([a-z0-9$<>?[\\]]*?)";
|
pattern += "([\\p{L}\\p{Nd}\\p{Sc}<>?[\\]]*?)";
|
||||||
}
|
}
|
||||||
upperCase.push(isUpperCase(s[0]));
|
upperCase.push(isUpperCase(s[0]));
|
||||||
}
|
}
|
||||||
@ -153,10 +156,14 @@ function createMatcher(term, camelCase) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
var re = new RegExp(pattern, "gi");
|
var re = new RegExp(pattern, "gui");
|
||||||
re.upperCase = upperCase;
|
re.upperCase = upperCase;
|
||||||
return re;
|
return re;
|
||||||
}
|
}
|
||||||
|
// Unicode regular expressions do not allow certain characters to be escaped
|
||||||
|
function escapeUnicodeRegex(pattern) {
|
||||||
|
return pattern.replace(/[\[\]{}()*+?.\\^$|\s]/g, '\\$&');
|
||||||
|
}
|
||||||
function findMatch(matcher, input, startOfName, endOfName) {
|
function findMatch(matcher, input, startOfName, endOfName) {
|
||||||
var from = startOfName;
|
var from = startOfName;
|
||||||
matcher.lastIndex = from;
|
matcher.lastIndex = from;
|
||||||
@ -176,20 +183,25 @@ function findMatch(matcher, input, startOfName, endOfName) {
|
|||||||
var start = match.index;
|
var start = match.index;
|
||||||
var prevEnd = -1;
|
var prevEnd = -1;
|
||||||
for (var i = 1; i < match.length; i += 2) {
|
for (var i = 1; i < match.length; i += 2) {
|
||||||
var isUpper = isUpperCase(input[start]);
|
var charType = getCharType(input[start]);
|
||||||
var isMatcherUpper = matcher.upperCase[i];
|
var isMatcherUpper = matcher.upperCase[i];
|
||||||
// capturing groups come in pairs, match and non-match
|
// capturing groups come in pairs, match and non-match
|
||||||
boundaries.push(start, start + match[i].length);
|
boundaries.push(start, start + match[i].length);
|
||||||
// make sure groups are anchored on a left word boundary
|
// make sure groups are anchored on a left word boundary
|
||||||
var prevChar = input[start - 1] || "";
|
var prevChar = input[start - 1] || "";
|
||||||
var nextChar = input[start + 1] || "";
|
var nextChar = input[start + 1] || "";
|
||||||
if (start !== 0 && !/[\W_]/.test(prevChar) && !/[\W_]/.test(input[start])) {
|
if (start !== 0) {
|
||||||
if (isUpper && (isLowerCase(prevChar) || isLowerCase(nextChar))) {
|
if (charType === UNICODE_DIGIT && getCharType(prevChar) === UNICODE_DIGIT) {
|
||||||
score -= 0.1;
|
|
||||||
} else if (isMatcherUpper && start === prevEnd) {
|
|
||||||
score -= isUpper ? 0.1 : 1.0;
|
|
||||||
} else {
|
|
||||||
return NO_MATCH;
|
return NO_MATCH;
|
||||||
|
} else if (charType === UNICODE_LETTER && getCharType(prevChar) === UNICODE_LETTER) {
|
||||||
|
var isUpper = isUpperCase(input[start]);
|
||||||
|
if (isUpper && (isLowerCase(prevChar) || isLowerCase(nextChar))) {
|
||||||
|
score -= 0.1;
|
||||||
|
} else if (isMatcherUpper && start === prevEnd) {
|
||||||
|
score -= isUpper ? 0.1 : 1.0;
|
||||||
|
} else {
|
||||||
|
return NO_MATCH;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
prevEnd = start + match[i].length;
|
prevEnd = start + match[i].length;
|
||||||
@ -214,15 +226,30 @@ function findMatch(matcher, input, startOfName, endOfName) {
|
|||||||
boundaries: boundaries
|
boundaries: boundaries
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
function isLetter(s) {
|
||||||
|
return /\p{L}/u.test(s);
|
||||||
|
}
|
||||||
function isUpperCase(s) {
|
function isUpperCase(s) {
|
||||||
return s !== s.toLowerCase();
|
return /\p{Lu}/u.test(s);
|
||||||
}
|
}
|
||||||
function isLowerCase(s) {
|
function isLowerCase(s) {
|
||||||
return s !== s.toUpperCase();
|
return /\p{Ll}/u.test(s);
|
||||||
|
}
|
||||||
|
function isDigit(s) {
|
||||||
|
return /\p{Nd}/u.test(s);
|
||||||
|
}
|
||||||
|
function getCharType(s) {
|
||||||
|
if (isLetter(s)) {
|
||||||
|
return UNICODE_LETTER;
|
||||||
|
} else if (isDigit(s)) {
|
||||||
|
return UNICODE_DIGIT;
|
||||||
|
} else {
|
||||||
|
return UNICODE_OTHER;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
function rateNoise(str) {
|
function rateNoise(str) {
|
||||||
return (str.match(/([.(])/g) || []).length / 5
|
return (str.match(/([.(])/g) || []).length / 5
|
||||||
+ (str.match(/([A-Z]+)/g) || []).length / 10
|
+ (str.match(/(\p{Lu}+)/gu) || []).length / 10
|
||||||
+ str.length / 20;
|
+ str.length / 20;
|
||||||
}
|
}
|
||||||
function doSearch(request, response) {
|
function doSearch(request, response) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* @test
|
* @test
|
||||||
* @bug 8178982 8220497 8210683 8241982 8297216
|
* @bug 8178982 8220497 8210683 8241982 8297216 8303056
|
||||||
* @summary Test the search feature of javadoc.
|
* @summary Test the search feature of javadoc.
|
||||||
* @library ../../lib
|
* @library ../../lib
|
||||||
* @library /test/lib
|
* @library /test/lib
|
||||||
@ -335,6 +335,46 @@ public class TestSearchScript extends JavadocTester {
|
|||||||
checkSearch(inv, "with map", List.of(
|
checkSearch(inv, "with map", List.of(
|
||||||
"listpkg.Nolist.withTypeParams(Map<String, ? extends Collection>)"));
|
"listpkg.Nolist.withTypeParams(Map<String, ? extends Collection>)"));
|
||||||
|
|
||||||
|
// search for numeric strings
|
||||||
|
checkSearch(inv, "1", List.of("listpkg.MyList.abc123xyz()"));
|
||||||
|
checkSearch(inv, "12", List.of("listpkg.MyList.abc123xyz()"));
|
||||||
|
checkSearch(inv, "12 x", List.of("listpkg.MyList.abc123xyz()"));
|
||||||
|
checkSearch(inv, "123 x", List.of("listpkg.MyList.abc123xyz()"));
|
||||||
|
checkSearch(inv, "1 x", List.of("listpkg.MyList.abc123xyz()"));
|
||||||
|
checkSearch(inv, "2 x", List.of());
|
||||||
|
checkSearch(inv, "3", List.of("listpkg.MyList.M_3X"));
|
||||||
|
checkSearch(inv, "3x", List.of("listpkg.MyList.M_3X"));
|
||||||
|
checkSearch(inv, "_3", List.of("listpkg.MyList.M_3X"));
|
||||||
|
checkSearch(inv, "3 x", List.of("listpkg.MyList.M_3X"));
|
||||||
|
|
||||||
|
// Unicode camel-case tests
|
||||||
|
checkSearch(inv, "νέα λίστα", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "δημ νέα λίσ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "δ ν λ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "ν λ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "δημιουργήστεΝέαΛίστα", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "δηΝέΛίσ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "δΝΛ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "ΝΛ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "δημ λίστα", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||||
|
checkSearch(inv, "сделать новый список", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "сде нов спи", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "с н с", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "н с", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "сделатьНовыйСписок", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "сдеНовСпис", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "сНС", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "сН", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
checkSearch(inv, "сдеН Спи", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||||
|
|
||||||
|
// Negative Unicode camel-case tests
|
||||||
|
checkSearch(inv, "Νέα ίστα", List.of());
|
||||||
|
checkSearch(inv, "α λίστα", List.of());
|
||||||
|
checkSearch(inv, "ηΝΛ", List.of());
|
||||||
|
checkSearch(inv, "овый", List.of());
|
||||||
|
checkSearch(inv, "д н с", List.of());
|
||||||
|
checkSearch(inv, "пи", List.of());
|
||||||
|
checkSearch(inv, "НОВЫЙС ПИСОК", List.of());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -364,7 +404,7 @@ public class TestSearchScript extends JavadocTester {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void checkList(String query, List<?> result, List<?> expected) {
|
void checkList(String query, List<?> result, List<?> expected) {
|
||||||
checking("Checking resut for query \"" + query + "\"");
|
checking("Checking result for query \"" + query + "\"");
|
||||||
if (!expected.equals(result)) {
|
if (!expected.equals(result)) {
|
||||||
failed("Expected: " + expected + ", got: " + result);
|
failed("Expected: " + expected + ", got: " + result);
|
||||||
} else {
|
} else {
|
||||||
|
@ -26,6 +26,7 @@ package listpkg;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Example class containing "list" matching full name.
|
* Example class containing "list" matching full name.
|
||||||
|
* @param <E> type parameter
|
||||||
*/
|
*/
|
||||||
public interface List<E> {
|
public interface List<E> {
|
||||||
|
|
||||||
|
@ -27,9 +27,30 @@ package listpkg;
|
|||||||
* Example class containing "list" matching at beginning of name.
|
* Example class containing "list" matching at beginning of name.
|
||||||
*/
|
*/
|
||||||
public class ListProvider {
|
public class ListProvider {
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
*/
|
||||||
public ListProvider() {}
|
public ListProvider() {}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* English camel-case name
|
||||||
|
*/
|
||||||
public List makeNewList() {
|
public List makeNewList() {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Greek camel-case name
|
||||||
|
*/
|
||||||
|
public List δημιουργήστεΝέαΛίστα() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Russion camel-case name
|
||||||
|
*/
|
||||||
|
public List сделатьНовыйСписок() {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -23,5 +23,18 @@
|
|||||||
|
|
||||||
package listpkg;
|
package listpkg;
|
||||||
|
|
||||||
public class MyList implements List {
|
/**
|
||||||
|
* A class.
|
||||||
|
*/
|
||||||
|
public abstract class MyList implements List {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Field name containing a digit.
|
||||||
|
*/
|
||||||
|
public static final int M_3X = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Method name containing digits.
|
||||||
|
*/
|
||||||
|
public void abc123xyz() {}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user