8141285: NameCode should pass tests from BytecodeNameTest.java
Reviewed-by: attila, mhaupt
This commit is contained in:
parent
44740462f8
commit
5532631558
101
nashorn/samples/find_underscores.js
Normal file
101
nashorn/samples/find_underscores.js
Normal file
@ -0,0 +1,101 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
*
|
||||||
|
* - Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* - Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* - Neither the name of Oracle nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
||||||
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Usage: jjs find_underscores.js -- <directory>
|
||||||
|
|
||||||
|
if (arguments.length == 0) {
|
||||||
|
print("Usage: jjs find_underscores.js -- <directory>");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Java types used
|
||||||
|
var File = Java.type("java.io.File");
|
||||||
|
var Files = Java.type("java.nio.file.Files");
|
||||||
|
var StringArray = Java.type("java.lang.String[]");
|
||||||
|
var ToolProvider = Java.type("javax.tools.ToolProvider");
|
||||||
|
var Tree = Java.type("com.sun.source.tree.Tree");
|
||||||
|
var Trees = Java.type("com.sun.source.util.Trees");
|
||||||
|
var TreeScanner = Java.type("com.sun.source.util.TreeScanner");
|
||||||
|
|
||||||
|
function findUnderscores() {
|
||||||
|
// get the system compiler tool
|
||||||
|
var compiler = ToolProvider.systemJavaCompiler;
|
||||||
|
// get standard file manager
|
||||||
|
var fileMgr = compiler.getStandardFileManager(null, null, null);
|
||||||
|
// Using Java.to convert script array (arguments) to a Java String[]
|
||||||
|
var compUnits = fileMgr.getJavaFileObjects(Java.to(arguments, StringArray));
|
||||||
|
// create a new compilation task
|
||||||
|
var task = compiler.getTask(null, fileMgr, null, null, null, compUnits);
|
||||||
|
var sourcePositions = Trees.instance(task).sourcePositions;
|
||||||
|
// subclass SimpleTreeVisitor - to find underscore variable names
|
||||||
|
var UnderscoreFinder = Java.extend(TreeScanner);
|
||||||
|
|
||||||
|
var visitor = new UnderscoreFinder() {
|
||||||
|
// override to capture information on current compilation unit
|
||||||
|
visitCompilationUnit: function(compUnit, p) {
|
||||||
|
this.compUnit = compUnit;
|
||||||
|
this.lineMap = compUnit.lineMap;
|
||||||
|
this.fileName = compUnit.sourceFile.name;
|
||||||
|
|
||||||
|
return Java.super(visitor).visitCompilationUnit(compUnit, p);
|
||||||
|
},
|
||||||
|
|
||||||
|
// override to check variable name
|
||||||
|
visitVariable: function(node, p) {
|
||||||
|
if (node.name.toString() == "_") {
|
||||||
|
var pos = sourcePositions.getStartPosition(this.compUnit, node);
|
||||||
|
var line = this.lineMap.getLineNumber(pos);
|
||||||
|
var col = this.lineMap.getColumnNumber(pos);
|
||||||
|
print(node + " @ " + this.fileName + ":" + line + ":" + col);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Java.super(visitor).visitVariable(node, p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for each (var cu in task.parse()) {
|
||||||
|
cu.accept(visitor, null);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for each ".java" file in directory (recursively).
|
||||||
|
function main(dir) {
|
||||||
|
var totalCount = 0;
|
||||||
|
Files.walk(dir.toPath()).
|
||||||
|
forEach(function(p) {
|
||||||
|
var name = p.toFile().absolutePath;
|
||||||
|
if (name.endsWith(".java")) {
|
||||||
|
findUnderscores(p);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
main(new File(arguments[0]));
|
@ -26,44 +26,256 @@
|
|||||||
package jdk.nashorn.internal.runtime.linker;
|
package jdk.nashorn.internal.runtime.linker;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* <p>
|
||||||
* Implements the name mangling and demangling as specified by John Rose's
|
* Implements the name mangling and demangling as specified by John Rose's
|
||||||
* <a href="https://blogs.oracle.com/jrose/entry/symbolic_freedom_in_the_vm"
|
* <a href="https://blogs.oracle.com/jrose/entry/symbolic_freedom_in_the_vm"
|
||||||
* target="_blank">"Symbolic Freedom in the VM"</a> article. Normally, you would
|
* target="_blank">"Symbolic Freedom in the VM"</a> article. Normally, you would
|
||||||
* mangle the names in the call sites as you're generating bytecode, and then
|
* mangle the names in the call sites as you're generating bytecode, and then
|
||||||
* demangle them when you receive them in bootstrap methods.
|
* demangle them when you receive them in bootstrap methods.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* This code is derived from sun.invoke.util.BytecodeName. Apart from subsetting that
|
||||||
|
* class, we don't want to create dependency between non-exported package from java.base
|
||||||
|
* to nashorn module.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h3>Comment from BytecodeName class reproduced here:</h3>
|
||||||
|
*
|
||||||
|
* Includes universal mangling rules for the JVM.
|
||||||
|
*
|
||||||
|
* <h3>Avoiding Dangerous Characters </h3>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The JVM defines a very small set of characters which are illegal
|
||||||
|
* in name spellings. We will slightly extend and regularize this set
|
||||||
|
* into a group of <cite>dangerous characters</cite>.
|
||||||
|
* These characters will then be replaced, in mangled names, by escape sequences.
|
||||||
|
* In addition, accidental escape sequences must be further escaped.
|
||||||
|
* Finally, a special prefix will be applied if and only if
|
||||||
|
* the mangling would otherwise fail to begin with the escape character.
|
||||||
|
* This happens to cover the corner case of the null string,
|
||||||
|
* and also clearly marks symbols which need demangling.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Dangerous characters are the union of all characters forbidden
|
||||||
|
* or otherwise restricted by the JVM specification,
|
||||||
|
* plus their mates, if they are brackets
|
||||||
|
* (<code><big><b>[</b></big></code> and <code><big><b>]</b></big></code>,
|
||||||
|
* <code><big><b><</b></big></code> and <code><big><b>></b></big></code>),
|
||||||
|
* plus, arbitrarily, the colon character <code><big><b>:</b></big></code>.
|
||||||
|
* There is no distinction between type, method, and field names.
|
||||||
|
* This makes it easier to convert between mangled names of different
|
||||||
|
* types, since they do not need to be decoded (demangled).
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The escape character is backslash <code><big><b>\</b></big></code>
|
||||||
|
* (also known as reverse solidus).
|
||||||
|
* This character is, until now, unheard of in bytecode names,
|
||||||
|
* but traditional in the proposed role.
|
||||||
|
*
|
||||||
|
* </p>
|
||||||
|
* <h3> Replacement Characters </h3>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* Every escape sequence is two characters
|
||||||
|
* (in fact, two UTF8 bytes) beginning with
|
||||||
|
* the escape character and followed by a
|
||||||
|
* <cite>replacement character</cite>.
|
||||||
|
* (Since the replacement character is never a backslash,
|
||||||
|
* iterated manglings do not double in size.)
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Each dangerous character has some rough visual similarity
|
||||||
|
* to its corresponding replacement character.
|
||||||
|
* This makes mangled symbols easier to recognize by sight.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* The dangerous characters are
|
||||||
|
* <code><big><b>/</b></big></code> (forward slash, used to delimit package components),
|
||||||
|
* <code><big><b>.</b></big></code> (dot, also a package delimiter),
|
||||||
|
* <code><big><b>;</b></big></code> (semicolon, used in signatures),
|
||||||
|
* <code><big><b>$</b></big></code> (dollar, used in inner classes and synthetic members),
|
||||||
|
* <code><big><b><</b></big></code> (left angle),
|
||||||
|
* <code><big><b>></b></big></code> (right angle),
|
||||||
|
* <code><big><b>[</b></big></code> (left square bracket, used in array types),
|
||||||
|
* <code><big><b>]</b></big></code> (right square bracket, reserved in this scheme for language use),
|
||||||
|
* and <code><big><b>:</b></big></code> (colon, reserved in this scheme for language use).
|
||||||
|
* Their replacements are, respectively,
|
||||||
|
* <code><big><b>|</b></big></code> (vertical bar),
|
||||||
|
* <code><big><b>,</b></big></code> (comma),
|
||||||
|
* <code><big><b>?</b></big></code> (question mark),
|
||||||
|
* <code><big><b>%</b></big></code> (percent),
|
||||||
|
* <code><big><b>^</b></big></code> (caret),
|
||||||
|
* <code><big><b>_</b></big></code> (underscore), and
|
||||||
|
* <code><big><b>{</b></big></code> (left curly bracket),
|
||||||
|
* <code><big><b>}</b></big></code> (right curly bracket),
|
||||||
|
* <code><big><b>!</b></big></code> (exclamation mark).
|
||||||
|
* In addition, the replacement character for the escape character itself is
|
||||||
|
* <code><big><b>-</b></big></code> (hyphen),
|
||||||
|
* and the replacement character for the null prefix is
|
||||||
|
* <code><big><b>=</b></big></code> (equal sign).
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* An escape character <code><big><b>\</b></big></code>
|
||||||
|
* followed by any of these replacement characters
|
||||||
|
* is an escape sequence, and there are no other escape sequences.
|
||||||
|
* An equal sign is only part of an escape sequence
|
||||||
|
* if it is the second character in the whole string, following a backslash.
|
||||||
|
* Two consecutive backslashes do <em>not</em> form an escape sequence.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Each escape sequence replaces a so-called <cite>original character</cite>
|
||||||
|
* which is either one of the dangerous characters or the escape character.
|
||||||
|
* A null prefix replaces an initial null string, not a character.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* All this implies that escape sequences cannot overlap and may be
|
||||||
|
* determined all at once for a whole string. Note that a spelling
|
||||||
|
* string can contain <cite>accidental escapes</cite>, apparent escape
|
||||||
|
* sequences which must not be interpreted as manglings.
|
||||||
|
* These are disabled by replacing their leading backslash with an
|
||||||
|
* escape sequence (<code><big><b>\-</b></big></code>). To mangle a string, three logical steps
|
||||||
|
* are required, though they may be carried out in one pass:
|
||||||
|
* </p>
|
||||||
|
* <ol>
|
||||||
|
* <li>In each accidental escape, replace the backslash with an escape sequence
|
||||||
|
* (<code><big><b>\-</b></big></code>).</li>
|
||||||
|
* <li>Replace each dangerous character with an escape sequence
|
||||||
|
* (<code><big><b>\|</b></big></code> for <code><big><b>/</b></big></code>, etc.).</li>
|
||||||
|
* <li>If the first two steps introduced any change, <em>and</em>
|
||||||
|
* if the string does not already begin with a backslash, prepend a null prefix (<code><big><b>\=</b></big></code>).</li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
* To demangle a mangled string that begins with an escape,
|
||||||
|
* remove any null prefix, and then replace (in parallel)
|
||||||
|
* each escape sequence by its original character.
|
||||||
|
* <p>Spelling strings which contain accidental
|
||||||
|
* escapes <em>must</em> have them replaced, even if those
|
||||||
|
* strings do not contain dangerous characters.
|
||||||
|
* This restriction means that mangling a string always
|
||||||
|
* requires a scan of the string for escapes.
|
||||||
|
* But then, a scan would be required anyway,
|
||||||
|
* to check for dangerous characters.
|
||||||
|
*
|
||||||
|
* </p>
|
||||||
|
* <h3> Nice Properties </h3>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* If a bytecode name does not contain any escape sequence,
|
||||||
|
* demangling is a no-op: The string demangles to itself.
|
||||||
|
* Such a string is called <cite>self-mangling</cite>.
|
||||||
|
* Almost all strings are self-mangling.
|
||||||
|
* In practice, to demangle almost any name “found in nature”,
|
||||||
|
* simply verify that it does not begin with a backslash.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* Mangling is a one-to-one function, while demangling
|
||||||
|
* is a many-to-one function.
|
||||||
|
* A mangled string is defined as <cite>validly mangled</cite> if
|
||||||
|
* it is in fact the unique mangling of its spelling string.
|
||||||
|
* Three examples of invalidly mangled strings are <code><big><b>\=foo</b></big></code>,
|
||||||
|
* <code><big><b>\-bar</b></big></code>, and <code><big><b>baz\!</b></big></code>, which demangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and
|
||||||
|
* <code><big><b>baz\!</b></big></code>, but then remangle to <code><big><b>foo</b></big></code>, <code><big><b>\bar</b></big></code>, and <code><big><b>\=baz\-!</b></big></code>.
|
||||||
|
* If a language back-end or runtime is using mangled names,
|
||||||
|
* it should never present an invalidly mangled bytecode
|
||||||
|
* name to the JVM. If the runtime encounters one,
|
||||||
|
* it should also report an error, since such an occurrence
|
||||||
|
* probably indicates a bug in name encoding which
|
||||||
|
* will lead to errors in linkage.
|
||||||
|
* However, this note does not propose that the JVM verifier
|
||||||
|
* detect invalidly mangled names.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* As a result of these rules, it is a simple matter to
|
||||||
|
* compute validly mangled substrings and concatenations
|
||||||
|
* of validly mangled strings, and (with a little care)
|
||||||
|
* these correspond to corresponding operations on their
|
||||||
|
* spelling strings.
|
||||||
|
* </p>
|
||||||
|
* <ul>
|
||||||
|
* <li>Any prefix of a validly mangled string is also validly mangled,
|
||||||
|
* although a null prefix may need to be removed.</li>
|
||||||
|
* <li>Any suffix of a validly mangled string is also validly mangled,
|
||||||
|
* although a null prefix may need to be added.</li>
|
||||||
|
* <li>Two validly mangled strings, when concatenated,
|
||||||
|
* are also validly mangled, although any null prefix
|
||||||
|
* must be removed from the second string,
|
||||||
|
* and a trailing backslash on the first string may need escaping,
|
||||||
|
* if it would participate in an accidental escape when followed
|
||||||
|
* by the first character of the second string.</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>If languages that include non-Java symbol spellings use this
|
||||||
|
* mangling convention, they will enjoy the following advantages:
|
||||||
|
* </p>
|
||||||
|
* <ul>
|
||||||
|
* <li>They can interoperate via symbols they share in common.</li>
|
||||||
|
* <li>Low-level tools, such as backtrace printers, will have readable displays.</li>
|
||||||
|
* <li>Future JVM and language extensions can safely use the dangerous characters
|
||||||
|
* for structuring symbols, but will never interfere with valid spellings.</li>
|
||||||
|
* <li>Runtimes and compilers can use standard libraries for mangling and demangling.</li>
|
||||||
|
* <li>Occasional transliterations and name composition will be simple and regular,
|
||||||
|
* for classes, methods, and fields.</li>
|
||||||
|
* <li>Bytecode names will continue to be compact.
|
||||||
|
* When mangled, spellings will at most double in length, either in
|
||||||
|
* UTF8 or UTF16 format, and most will not change at all.</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* <h3> Suggestions for Human Readable Presentations </h3>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* For human readable displays of symbols,
|
||||||
|
* it will be better to present a string-like quoted
|
||||||
|
* representation of the spelling, because JVM users
|
||||||
|
* are generally familiar with such tokens.
|
||||||
|
* We suggest using single or double quotes before and after
|
||||||
|
* mangled symbols which are not valid Java identifiers,
|
||||||
|
* with quotes, backslashes, and non-printing characters
|
||||||
|
* escaped as if for literals in the Java language.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* For example, an HTML-like spelling
|
||||||
|
* <code><big><b><pre></b></big></code> mangles to
|
||||||
|
* <code><big><b>\^pre\_</b></big></code> and could
|
||||||
|
* display more cleanly as
|
||||||
|
* <code><big><b>'<pre>'</b></big></code>,
|
||||||
|
* with the quotes included.
|
||||||
|
* Such string-like conventions are <em>not</em> suitable
|
||||||
|
* for mangled bytecode names, in part because
|
||||||
|
* dangerous characters must be eliminated, rather
|
||||||
|
* than just quoted. Otherwise internally structured
|
||||||
|
* strings like package prefixes and method signatures
|
||||||
|
* could not be reliably parsed.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* In such human-readable displays, invalidly mangled
|
||||||
|
* names should <em>not</em> be demangled and quoted,
|
||||||
|
* for this would be misleading. Likewise, JVM symbols
|
||||||
|
* which contain dangerous characters (like dots in field
|
||||||
|
* names or brackets in method names) should not be
|
||||||
|
* simply quoted. The bytecode names
|
||||||
|
* <code><big><b>\=phase\,1</b></big></code> and
|
||||||
|
* <code><big><b>phase.1</b></big></code> are distinct,
|
||||||
|
* and in demangled displays they should be presented as
|
||||||
|
* <code><big><b>'phase.1'</b></big></code> and something like
|
||||||
|
* <code><big><b>'phase'.1</b></big></code>, respectively.
|
||||||
|
* </p>
|
||||||
*/
|
*/
|
||||||
public final class NameCodec {
|
public final class NameCodec {
|
||||||
private static final char ESCAPE_CHAR = '\\';
|
private NameCodec() {
|
||||||
private static final char EMPTY_ESCAPE = '=';
|
}
|
||||||
|
|
||||||
|
private static final char ESCAPE_C = '\\';
|
||||||
|
// empty escape sequence to avoid a null name or illegal prefix
|
||||||
|
private static final char NULL_ESCAPE_C = '=';
|
||||||
|
private static final String NULL_ESCAPE = ESCAPE_C+""+NULL_ESCAPE_C;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Canonical encoding for the empty name.
|
* Canonical encoding for the empty name.
|
||||||
*/
|
*/
|
||||||
public static final String EMPTY_NAME = new String(new char[] { ESCAPE_CHAR, EMPTY_ESCAPE });
|
public static final String EMPTY_NAME = new String(new char[] { ESCAPE_C, NULL_ESCAPE_C });
|
||||||
private static final char EMPTY_CHAR = 0xFEFF;
|
|
||||||
|
|
||||||
private static final int MIN_ENCODING = '$';
|
|
||||||
private static final int MAX_ENCODING = ']';
|
|
||||||
private static final char[] ENCODING = new char[MAX_ENCODING - MIN_ENCODING + 1];
|
|
||||||
private static final int MIN_DECODING = '!';
|
|
||||||
private static final int MAX_DECODING = '}';
|
|
||||||
private static final char[] DECODING = new char[MAX_DECODING - MIN_DECODING + 1];
|
|
||||||
|
|
||||||
static {
|
|
||||||
addEncoding('/', '|');
|
|
||||||
addEncoding('.', ',');
|
|
||||||
addEncoding(';', '?');
|
|
||||||
addEncoding('$', '%');
|
|
||||||
addEncoding('<', '^');
|
|
||||||
addEncoding('>', '_');
|
|
||||||
addEncoding('[', '{');
|
|
||||||
addEncoding(']', '}');
|
|
||||||
addEncoding(':', '!');
|
|
||||||
addEncoding('\\', '-');
|
|
||||||
DECODING[EMPTY_ESCAPE - MIN_DECODING] = EMPTY_CHAR;
|
|
||||||
}
|
|
||||||
|
|
||||||
private NameCodec() {
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encodes ("mangles") an unencoded symbolic name.
|
* Encodes ("mangles") an unencoded symbolic name.
|
||||||
@ -71,37 +283,10 @@ public final class NameCodec {
|
|||||||
* @return the mangled form of the symbolic name.
|
* @return the mangled form of the symbolic name.
|
||||||
*/
|
*/
|
||||||
public static String encode(final String name) {
|
public static String encode(final String name) {
|
||||||
final int l = name.length();
|
String bn = mangle(name);
|
||||||
if(l == 0) {
|
assert((Object)bn == name || looksMangled(bn)) : bn;
|
||||||
return EMPTY_NAME;
|
assert(name.equals(decode(bn))) : name;
|
||||||
}
|
return bn;
|
||||||
StringBuilder b = null;
|
|
||||||
int lastEscape = -1;
|
|
||||||
for(int i = 0; i < l; ++i) {
|
|
||||||
final int encodeIndex = name.charAt(i) - MIN_ENCODING;
|
|
||||||
if(encodeIndex >= 0 && encodeIndex < ENCODING.length) {
|
|
||||||
final char e = ENCODING[encodeIndex];
|
|
||||||
if(e != 0) {
|
|
||||||
if(b == null) {
|
|
||||||
b = new StringBuilder(name.length() + 3);
|
|
||||||
if(name.charAt(0) != ESCAPE_CHAR && i > 0) {
|
|
||||||
b.append(EMPTY_NAME);
|
|
||||||
}
|
|
||||||
b.append(name, 0, i);
|
|
||||||
} else {
|
|
||||||
b.append(name, lastEscape + 1, i);
|
|
||||||
}
|
|
||||||
b.append(ESCAPE_CHAR).append(e);
|
|
||||||
lastEscape = i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if(b == null) {
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
assert lastEscape != -1;
|
|
||||||
b.append(name, lastEscape + 1, l);
|
|
||||||
return b.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -110,42 +295,138 @@ public final class NameCodec {
|
|||||||
* @return the demangled form of the symbolic name.
|
* @return the demangled form of the symbolic name.
|
||||||
*/
|
*/
|
||||||
public static String decode(final String name) {
|
public static String decode(final String name) {
|
||||||
if(name.isEmpty() || name.charAt(0) != ESCAPE_CHAR) {
|
String sn = name;
|
||||||
return name;
|
if (!sn.isEmpty() && looksMangled(name)) {
|
||||||
|
sn = demangle(name);
|
||||||
|
assert(name.equals(mangle(sn))) : name+" => "+sn+" => "+mangle(sn);
|
||||||
}
|
}
|
||||||
final int l = name.length();
|
return sn;
|
||||||
if(l == 2 && name.charAt(1) == EMPTY_CHAR) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
final StringBuilder b = new StringBuilder(name.length());
|
|
||||||
int lastEscape = -2;
|
|
||||||
int lastBackslash = -1;
|
|
||||||
for(;;) {
|
|
||||||
final int nextBackslash = name.indexOf(ESCAPE_CHAR, lastBackslash + 1);
|
|
||||||
if(nextBackslash == -1 || nextBackslash == l - 1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
final int decodeIndex = name.charAt(nextBackslash + 1) - MIN_DECODING;
|
|
||||||
if(decodeIndex >= 0 && decodeIndex < DECODING.length) {
|
|
||||||
final char d = DECODING[decodeIndex];
|
|
||||||
if(d == EMPTY_CHAR) {
|
|
||||||
// "\=" is only valid at the beginning of a mangled string
|
|
||||||
if(nextBackslash == 0) {
|
|
||||||
lastEscape = 0;
|
|
||||||
}
|
|
||||||
} else if(d != 0) {
|
|
||||||
b.append(name, lastEscape + 2, nextBackslash).append(d);
|
|
||||||
lastEscape = nextBackslash;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
lastBackslash = nextBackslash;
|
|
||||||
}
|
|
||||||
b.append(name, lastEscape + 2, l);
|
|
||||||
return b.toString();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void addEncoding(final char from, final char to) {
|
private static boolean looksMangled(String s) {
|
||||||
ENCODING[from - MIN_ENCODING] = to;
|
return s.charAt(0) == ESCAPE_C;
|
||||||
DECODING[to - MIN_DECODING] = from;
|
}
|
||||||
|
|
||||||
|
private static String mangle(String s) {
|
||||||
|
if (s.length() == 0)
|
||||||
|
return NULL_ESCAPE;
|
||||||
|
|
||||||
|
// build this lazily, when we first need an escape:
|
||||||
|
StringBuilder sb = null;
|
||||||
|
|
||||||
|
for (int i = 0, slen = s.length(); i < slen; i++) {
|
||||||
|
char c = s.charAt(i);
|
||||||
|
|
||||||
|
boolean needEscape = false;
|
||||||
|
if (c == ESCAPE_C) {
|
||||||
|
if (i+1 < slen) {
|
||||||
|
char c1 = s.charAt(i+1);
|
||||||
|
if ((i == 0 && c1 == NULL_ESCAPE_C)
|
||||||
|
|| c1 != originalOfReplacement(c1)) {
|
||||||
|
// an accidental escape
|
||||||
|
needEscape = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
needEscape = isDangerous(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!needEscape) {
|
||||||
|
if (sb != null) sb.append(c);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// build sb if this is the first escape
|
||||||
|
if (sb == null) {
|
||||||
|
sb = new StringBuilder(s.length()+10);
|
||||||
|
// mangled names must begin with a backslash:
|
||||||
|
if (s.charAt(0) != ESCAPE_C && i > 0)
|
||||||
|
sb.append(NULL_ESCAPE);
|
||||||
|
// append the string so far, which is unremarkable:
|
||||||
|
sb.append(s, 0, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// rewrite \ to \-, / to \|, etc.
|
||||||
|
sb.append(ESCAPE_C);
|
||||||
|
sb.append(replacementOf(c));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb != null) return sb.toString();
|
||||||
|
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String demangle(String s) {
|
||||||
|
// build this lazily, when we first meet an escape:
|
||||||
|
StringBuilder sb = null;
|
||||||
|
|
||||||
|
int stringStart = 0;
|
||||||
|
if (s.startsWith(NULL_ESCAPE))
|
||||||
|
stringStart = 2;
|
||||||
|
|
||||||
|
for (int i = stringStart, slen = s.length(); i < slen; i++) {
|
||||||
|
char c = s.charAt(i);
|
||||||
|
|
||||||
|
if (c == ESCAPE_C && i+1 < slen) {
|
||||||
|
// might be an escape sequence
|
||||||
|
char rc = s.charAt(i+1);
|
||||||
|
char oc = originalOfReplacement(rc);
|
||||||
|
if (oc != rc) {
|
||||||
|
// build sb if this is the first escape
|
||||||
|
if (sb == null) {
|
||||||
|
sb = new StringBuilder(s.length());
|
||||||
|
// append the string so far, which is unremarkable:
|
||||||
|
sb.append(s, stringStart, i);
|
||||||
|
}
|
||||||
|
++i; // skip both characters
|
||||||
|
c = oc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb != null)
|
||||||
|
sb.append(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sb != null) return sb.toString();
|
||||||
|
|
||||||
|
return s.substring(stringStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final String DANGEROUS_CHARS = "\\/.;:$[]<>"; // \\ must be first
|
||||||
|
private static final String REPLACEMENT_CHARS = "-|,?!%{}^_";
|
||||||
|
private static final int DANGEROUS_CHAR_FIRST_INDEX = 1; // index after \\
|
||||||
|
|
||||||
|
private static final long[] SPECIAL_BITMAP = new long[2]; // 128 bits
|
||||||
|
static {
|
||||||
|
String SPECIAL = DANGEROUS_CHARS + REPLACEMENT_CHARS;
|
||||||
|
for (char c : SPECIAL.toCharArray()) {
|
||||||
|
SPECIAL_BITMAP[c >>> 6] |= 1L << c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isSpecial(char c) {
|
||||||
|
if ((c >>> 6) < SPECIAL_BITMAP.length)
|
||||||
|
return ((SPECIAL_BITMAP[c >>> 6] >> c) & 1) != 0;
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static char replacementOf(char c) {
|
||||||
|
if (!isSpecial(c)) return c;
|
||||||
|
int i = DANGEROUS_CHARS.indexOf(c);
|
||||||
|
if (i < 0) return c;
|
||||||
|
return REPLACEMENT_CHARS.charAt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static char originalOfReplacement(char c) {
|
||||||
|
if (!isSpecial(c)) return c;
|
||||||
|
int i = REPLACEMENT_CHARS.indexOf(c);
|
||||||
|
if (i < 0) return c;
|
||||||
|
return DANGEROUS_CHARS.charAt(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isDangerous(char c) {
|
||||||
|
if (!isSpecial(c)) return false;
|
||||||
|
return (DANGEROUS_CHARS.indexOf(c) >= DANGEROUS_CHAR_FIRST_INDEX);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,112 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation. Oracle designates this
|
||||||
|
* particular file as subject to the "Classpath" exception as provided
|
||||||
|
* by Oracle in the LICENSE file that accompanied this code.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
package jdk.nashorn.internal.runtime.linker.test;
|
||||||
|
|
||||||
|
import static org.testng.Assert.assertEquals;
|
||||||
|
|
||||||
|
import jdk.nashorn.internal.runtime.linker.NameCodec;
|
||||||
|
import org.testng.annotations.Test;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test for jdk.nashorn.intenal.runtime.linker.NameCodec.java. This test is
|
||||||
|
* derived from BytecodeNameTest.java from (older) mlvm code @
|
||||||
|
* http://hg.openjdk.java.net/mlvm/mlvm/file/tip/netbeans/meth/test/sun/invoke/util/BytecodeNameTest.java
|
||||||
|
*
|
||||||
|
* @bug 8141285: NameCode should pass tests from BytecodeNameTest.java
|
||||||
|
*/
|
||||||
|
public class NameCodecTest {
|
||||||
|
|
||||||
|
static String[][] SAMPLES = {
|
||||||
|
// mangled, source
|
||||||
|
{"foo", "foo"},
|
||||||
|
{"ba\\r", "ba\\r"},
|
||||||
|
{"\\=ba\\-%z", "ba\\%z"},
|
||||||
|
{"\\=ba\\--z", "ba\\-z"},
|
||||||
|
{"=\\=", "=\\="},
|
||||||
|
{"\\==\\|\\=", "=/\\="},
|
||||||
|
{"\\|\\=", "/\\="},
|
||||||
|
{"\\=ba\\!", "ba:"},
|
||||||
|
{"\\|", "/"},
|
||||||
|
{"\\", "\\"},
|
||||||
|
{"\\\\%", "\\$"},
|
||||||
|
{"\\\\", "\\\\"},
|
||||||
|
{"\\=", ""}
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
static final String DANGEROUS_CHARS = "\\/.;:$[]<>";
|
||||||
|
static final String REPLACEMENT_CHARS = "-|,?!%{}^_";
|
||||||
|
|
||||||
|
static String[][] canonicalSamples() {
|
||||||
|
int ndc = DANGEROUS_CHARS.length();
|
||||||
|
String[][] res = new String[2 * ndc][];
|
||||||
|
for (int i = 0; i < ndc; i++) {
|
||||||
|
char dc = DANGEROUS_CHARS.charAt(i);
|
||||||
|
char rc = REPLACEMENT_CHARS.charAt(i);
|
||||||
|
if (dc == '\\') {
|
||||||
|
res[2 * i + 0] = new String[]{"\\-%", "\\%"};
|
||||||
|
} else {
|
||||||
|
res[2 * i + 0] = new String[]{"\\" + rc, "" + dc};
|
||||||
|
}
|
||||||
|
res[2 * i + 1] = new String[]{"" + rc, "" + rc};
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEncode() {
|
||||||
|
System.out.println("testEncode");
|
||||||
|
testEncode(SAMPLES);
|
||||||
|
testEncode(canonicalSamples());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testEncode(String[][] samples) {
|
||||||
|
for (String[] sample : samples) {
|
||||||
|
String s = sample[1];
|
||||||
|
String expResult = sample[0];
|
||||||
|
String result = NameCodec.encode(s);
|
||||||
|
if (!result.equals(expResult)) {
|
||||||
|
System.out.println(s + " => " + result + " != " + expResult);
|
||||||
|
}
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDecode() {
|
||||||
|
System.out.println("testDecode");
|
||||||
|
testDecode(SAMPLES);
|
||||||
|
testDecode(canonicalSamples());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testDecode(String[][] samples) {
|
||||||
|
for (String[] sample : samples) {
|
||||||
|
String s = sample[0];
|
||||||
|
String expResult = sample[1];
|
||||||
|
String result = NameCodec.decode(s);
|
||||||
|
assertEquals(expResult, result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user