8303056: Improve support for Unicode characters and digits in JavaDoc search
Reviewed-by: jjg
This commit is contained in:
parent
9cf12bb977
commit
52ec4bcb1b
@ -44,6 +44,9 @@ const categories = {
|
||||
const highlight = "<span class='result-highlight'>$&</span>";
|
||||
const NO_MATCH = {};
|
||||
const MAX_RESULTS = 300;
|
||||
const UNICODE_LETTER = 0;
|
||||
const UNICODE_DIGIT = 1;
|
||||
const UNICODE_OTHER = 2;
|
||||
function checkUnnamed(name, separator) {
|
||||
return name === "<Unnamed>" || !name ? "" : name + separator;
|
||||
}
|
||||
@ -127,13 +130,13 @@ function createMatcher(term, camelCase) {
|
||||
var pattern = "";
|
||||
var upperCase = [];
|
||||
term.trim().split(/\s+/).forEach(function(w, index, array) {
|
||||
var tokens = w.split(/(?=[A-Z,.()<>?[\/])/);
|
||||
var tokens = w.split(/(?=[\p{Lu},.()<>?[\/])/u);
|
||||
for (var i = 0; i < tokens.length; i++) {
|
||||
var s = tokens[i];
|
||||
// ',' and '?' are the only delimiters commonly followed by space in java signatures
|
||||
pattern += "(" + $.ui.autocomplete.escapeRegex(s).replace(/[,?]/g, "$&\\s*?") + ")";
|
||||
pattern += "(" + escapeUnicodeRegex(s).replace(/[,?]/g, "$&\\s*?") + ")";
|
||||
upperCase.push(false);
|
||||
var isWordToken = /\w$/.test(s);
|
||||
var isWordToken = /[\p{L}\p{Nd}_]$/u.test(s);
|
||||
if (isWordToken) {
|
||||
if (i === tokens.length - 1 && index < array.length - 1) {
|
||||
// space in query string matches all delimiters
|
||||
@ -143,7 +146,7 @@ function createMatcher(term, camelCase) {
|
||||
if (!camelCase && isUpperCase(s) && s.length === 1) {
|
||||
pattern += "()";
|
||||
} else {
|
||||
pattern += "([a-z0-9$<>?[\\]]*?)";
|
||||
pattern += "([\\p{L}\\p{Nd}\\p{Sc}<>?[\\]]*?)";
|
||||
}
|
||||
upperCase.push(isUpperCase(s[0]));
|
||||
}
|
||||
@ -153,10 +156,14 @@ function createMatcher(term, camelCase) {
|
||||
}
|
||||
}
|
||||
});
|
||||
var re = new RegExp(pattern, "gi");
|
||||
var re = new RegExp(pattern, "gui");
|
||||
re.upperCase = upperCase;
|
||||
return re;
|
||||
}
|
||||
// Unicode regular expressions do not allow certain characters to be escaped
|
||||
function escapeUnicodeRegex(pattern) {
|
||||
return pattern.replace(/[\[\]{}()*+?.\\^$|\s]/g, '\\$&');
|
||||
}
|
||||
function findMatch(matcher, input, startOfName, endOfName) {
|
||||
var from = startOfName;
|
||||
matcher.lastIndex = from;
|
||||
@ -176,20 +183,25 @@ function findMatch(matcher, input, startOfName, endOfName) {
|
||||
var start = match.index;
|
||||
var prevEnd = -1;
|
||||
for (var i = 1; i < match.length; i += 2) {
|
||||
var isUpper = isUpperCase(input[start]);
|
||||
var charType = getCharType(input[start]);
|
||||
var isMatcherUpper = matcher.upperCase[i];
|
||||
// capturing groups come in pairs, match and non-match
|
||||
boundaries.push(start, start + match[i].length);
|
||||
// make sure groups are anchored on a left word boundary
|
||||
var prevChar = input[start - 1] || "";
|
||||
var nextChar = input[start + 1] || "";
|
||||
if (start !== 0 && !/[\W_]/.test(prevChar) && !/[\W_]/.test(input[start])) {
|
||||
if (isUpper && (isLowerCase(prevChar) || isLowerCase(nextChar))) {
|
||||
score -= 0.1;
|
||||
} else if (isMatcherUpper && start === prevEnd) {
|
||||
score -= isUpper ? 0.1 : 1.0;
|
||||
} else {
|
||||
if (start !== 0) {
|
||||
if (charType === UNICODE_DIGIT && getCharType(prevChar) === UNICODE_DIGIT) {
|
||||
return NO_MATCH;
|
||||
} else if (charType === UNICODE_LETTER && getCharType(prevChar) === UNICODE_LETTER) {
|
||||
var isUpper = isUpperCase(input[start]);
|
||||
if (isUpper && (isLowerCase(prevChar) || isLowerCase(nextChar))) {
|
||||
score -= 0.1;
|
||||
} else if (isMatcherUpper && start === prevEnd) {
|
||||
score -= isUpper ? 0.1 : 1.0;
|
||||
} else {
|
||||
return NO_MATCH;
|
||||
}
|
||||
}
|
||||
}
|
||||
prevEnd = start + match[i].length;
|
||||
@ -214,15 +226,30 @@ function findMatch(matcher, input, startOfName, endOfName) {
|
||||
boundaries: boundaries
|
||||
};
|
||||
}
|
||||
function isLetter(s) {
|
||||
return /\p{L}/u.test(s);
|
||||
}
|
||||
function isUpperCase(s) {
|
||||
return s !== s.toLowerCase();
|
||||
return /\p{Lu}/u.test(s);
|
||||
}
|
||||
function isLowerCase(s) {
|
||||
return s !== s.toUpperCase();
|
||||
return /\p{Ll}/u.test(s);
|
||||
}
|
||||
function isDigit(s) {
|
||||
return /\p{Nd}/u.test(s);
|
||||
}
|
||||
function getCharType(s) {
|
||||
if (isLetter(s)) {
|
||||
return UNICODE_LETTER;
|
||||
} else if (isDigit(s)) {
|
||||
return UNICODE_DIGIT;
|
||||
} else {
|
||||
return UNICODE_OTHER;
|
||||
}
|
||||
}
|
||||
function rateNoise(str) {
|
||||
return (str.match(/([.(])/g) || []).length / 5
|
||||
+ (str.match(/([A-Z]+)/g) || []).length / 10
|
||||
+ (str.match(/(\p{Lu}+)/gu) || []).length / 10
|
||||
+ str.length / 20;
|
||||
}
|
||||
function doSearch(request, response) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,7 +23,7 @@
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @bug 8178982 8220497 8210683 8241982 8297216
|
||||
* @bug 8178982 8220497 8210683 8241982 8297216 8303056
|
||||
* @summary Test the search feature of javadoc.
|
||||
* @library ../../lib
|
||||
* @library /test/lib
|
||||
@ -335,6 +335,46 @@ public class TestSearchScript extends JavadocTester {
|
||||
checkSearch(inv, "with map", List.of(
|
||||
"listpkg.Nolist.withTypeParams(Map<String, ? extends Collection>)"));
|
||||
|
||||
// search for numeric strings
|
||||
checkSearch(inv, "1", List.of("listpkg.MyList.abc123xyz()"));
|
||||
checkSearch(inv, "12", List.of("listpkg.MyList.abc123xyz()"));
|
||||
checkSearch(inv, "12 x", List.of("listpkg.MyList.abc123xyz()"));
|
||||
checkSearch(inv, "123 x", List.of("listpkg.MyList.abc123xyz()"));
|
||||
checkSearch(inv, "1 x", List.of("listpkg.MyList.abc123xyz()"));
|
||||
checkSearch(inv, "2 x", List.of());
|
||||
checkSearch(inv, "3", List.of("listpkg.MyList.M_3X"));
|
||||
checkSearch(inv, "3x", List.of("listpkg.MyList.M_3X"));
|
||||
checkSearch(inv, "_3", List.of("listpkg.MyList.M_3X"));
|
||||
checkSearch(inv, "3 x", List.of("listpkg.MyList.M_3X"));
|
||||
|
||||
// Unicode camel-case tests
|
||||
checkSearch(inv, "νέα λίστα", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "δημ νέα λίσ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "δ ν λ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "ν λ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "δημιουργήστεΝέαΛίστα", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "δηΝέΛίσ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "δΝΛ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "ΝΛ", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "δημ λίστα", List.of("listpkg.ListProvider.δημιουργήστεΝέαΛίστα()"));
|
||||
checkSearch(inv, "сделать новый список", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "сде нов спи", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "с н с", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "н с", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "сделатьНовыйСписок", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "сдеНовСпис", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "сНС", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "сН", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
checkSearch(inv, "сдеН Спи", List.of("listpkg.ListProvider.сделатьНовыйСписок()"));
|
||||
|
||||
// Negative Unicode camel-case tests
|
||||
checkSearch(inv, "Νέα ίστα", List.of());
|
||||
checkSearch(inv, "α λίστα", List.of());
|
||||
checkSearch(inv, "ηΝΛ", List.of());
|
||||
checkSearch(inv, "овый", List.of());
|
||||
checkSearch(inv, "д н с", List.of());
|
||||
checkSearch(inv, "пи", List.of());
|
||||
checkSearch(inv, "НОВЫЙС ПИСОК", List.of());
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -364,7 +404,7 @@ public class TestSearchScript extends JavadocTester {
|
||||
}
|
||||
|
||||
void checkList(String query, List<?> result, List<?> expected) {
|
||||
checking("Checking resut for query \"" + query + "\"");
|
||||
checking("Checking result for query \"" + query + "\"");
|
||||
if (!expected.equals(result)) {
|
||||
failed("Expected: " + expected + ", got: " + result);
|
||||
} else {
|
||||
|
@ -26,6 +26,7 @@ package listpkg;
|
||||
|
||||
/**
|
||||
* Example class containing "list" matching full name.
|
||||
* @param <E> type parameter
|
||||
*/
|
||||
public interface List<E> {
|
||||
|
||||
|
@ -27,9 +27,30 @@ package listpkg;
|
||||
* Example class containing "list" matching at beginning of name.
|
||||
*/
|
||||
public class ListProvider {
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
public ListProvider() {}
|
||||
|
||||
/**
|
||||
* English camel-case name
|
||||
*/
|
||||
public List makeNewList() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Greek camel-case name
|
||||
*/
|
||||
public List δημιουργήστεΝέαΛίστα() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Russion camel-case name
|
||||
*/
|
||||
public List сделатьНовыйСписок() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -23,5 +23,18 @@
|
||||
|
||||
package listpkg;
|
||||
|
||||
public class MyList implements List {
|
||||
/**
|
||||
* A class.
|
||||
*/
|
||||
public abstract class MyList implements List {
|
||||
|
||||
/**
|
||||
* Field name containing a digit.
|
||||
*/
|
||||
public static final int M_3X = 2;
|
||||
|
||||
/**
|
||||
* Method name containing digits.
|
||||
*/
|
||||
public void abc123xyz() {}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user