8254073: Tokenizer improvements (revised)

Reviewed-by: mcimadamore
This commit is contained in:
Jim Laskey 2020-10-09 11:08:56 +00:00
parent 9cecc16747
commit 4f9a1ffcdd
19 changed files with 1928 additions and 1318 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -29,15 +29,15 @@ import com.sun.tools.javac.parser.Tokens.Comment;
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
import com.sun.tools.javac.util.*;
import java.nio.*;
import java.nio.CharBuffer;
import java.util.Arrays;
import java.util.regex.Pattern;
import static com.sun.tools.javac.util.LayoutCharacters.*;
/** An extension to the base lexical analyzer that captures
* and processes the contents of doc comments. It does so by
* translating Unicode escape sequences and by stripping the
* leading whitespace and starts from each line of the comment.
/**
* An extension to the base lexical analyzer (JavaTokenizer) that
* captures and processes the contents of doc comments. It does
* so by stripping the leading whitespace and comment starts from
* each line of the Javadoc comment.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
@ -45,189 +45,107 @@ import static com.sun.tools.javac.util.LayoutCharacters.*;
* deletion without notice.</b>
*/
public class JavadocTokenizer extends JavaTokenizer {
/** Create a scanner from the input buffer. buffer must implement
* array() and compact(), and remaining() must be less than limit().
/**
* The factory that created this Scanner.
*/
protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
super(fac, buffer);
final ScannerFactory fac;
/**
* Create a tokenizer from the input character buffer. The input buffer
* content would typically be a Javadoc comment extracted by
* JavaTokenizer.
*
* @param fac the factory which created this Scanner.
* @param cb the input character buffer.
*/
protected JavadocTokenizer(ScannerFactory fac, CharBuffer cb) {
super(fac, cb);
this.fac = fac;
}
/** Create a scanner from the input array. The array must have at
* least a single character of extra space.
/**
* Create a tokenizer from the input array. The input buffer
* content would typically be a Javadoc comment extracted by
* JavaTokenizer.
*
* @param fac factory which created this Scanner
* @param array input character array.
* @param length length of the meaningful content in the array.
*/
protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
super(fac, input, inputLength);
protected JavadocTokenizer(ScannerFactory fac, char[] array, int length) {
super(fac, array, length);
this.fac = fac;
}
@Override
protected Comment processComment(int pos, int endPos, CommentStyle style) {
char[] buf = reader.getRawCharacters(pos, endPos);
return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
char[] buf = getRawCharacters(pos, endPos);
return new JavadocComment(style, fac, buf, pos);
}
/**
* This is a specialized version of UnicodeReader that keeps track of the
* column position within a given character stream (used for Javadoc processing),
* and which builds a table for mapping positions in the comment string to
* positions in the source file.
* An extension of BasicComment used to extract the relevant portion
* of a Javadoc comment.
*/
static class DocReader extends UnicodeReader {
int col;
int startPos;
/**
* A buffer for building a table for mapping positions in {@link #sbuf}
* to positions in the source buffer.
*
* The array is organized as a series of pairs of integers: the first
* number in each pair specifies a position in the comment text,
* the second number in each pair specifies the corresponding position
* in the source buffer. The pairs are sorted in ascending order.
*
* Since the mapping function is generally continuous, with successive
* positions in the string corresponding to successive positions in the
* source buffer, the table only needs to record discontinuities in
* the mapping. The values of intermediate positions can be inferred.
*
* Discontinuities may occur in a number of places: when a newline
* is followed by whitespace and asterisks (which are ignored),
* when a tab is expanded into spaces, and when unicode escapes
* are used in the source buffer.
*
* Thus, to find the source position of any position, p, in the comment
* string, find the index, i, of the pair whose string offset
* ({@code pbuf[i] }) is closest to but not greater than p. Then,
* {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
*/
int[] pbuf = new int[128];
/**
* The index of the next empty slot in the pbuf buffer.
*/
int pp = 0;
/** The buffer index of the last double backslash sequence
*/
private int doubleBackslashBp = -1;
DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
super(fac, input, inputLength);
this.startPos = startPos;
}
@Override
protected void convertUnicode() {
if (ch == '\\' && unicodeConversionBp != bp) {
bp++; ch = buf[bp]; col++;
if (ch == 'u') {
do {
bp++; ch = buf[bp]; col++;
} while (ch == 'u');
int limit = bp + 3;
if (limit < buflen) {
int d = digit(bp, 16);
int code = d;
while (bp < limit && d >= 0) {
bp++; ch = buf[bp]; col++;
d = digit(bp, 16);
code = (code << 4) + d;
}
if (d >= 0) {
ch = (char)code;
unicodeConversionBp = bp;
return;
}
}
// "illegal.Unicode.esc", reported by base scanner
} else {
bp--;
ch = '\\';
col--;
}
}
}
@Override
protected void scanCommentChar() {
scanChar();
if (ch == '\\') {
if (peekChar() == '\\' && !isUnicode()) {
bp++; col++;
doubleBackslashBp = bp;
} else {
convertUnicode();
}
}
}
@Override
protected void scanChar() {
bp++;
ch = buf[bp];
switch (ch) {
case '\r': // return
col = 0;
break;
case '\n': // newline
if (bp == 0 || buf[bp-1] != '\r') {
col = 0;
}
break;
case '\t': // tab
col = (col / TabInc * TabInc) + TabInc;
break;
case '\\': // possible Unicode
col++;
convertUnicode();
break;
default:
col++;
break;
}
}
@Override
public void putChar(char ch, boolean scan) {
// At this point, bp is the position of the current character in buf,
// and sp is the position in sbuf where this character will be put.
// Record a new entry in pbuf if pbuf is empty or if sp and its
// corresponding source position are not equidistant from the
// corresponding values in the latest entry in the pbuf array.
// (i.e. there is a discontinuity in the map function.)
if ((pp == 0)
|| (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
if (pp + 1 >= pbuf.length) {
int[] new_pbuf = new int[pbuf.length * 2];
System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
pbuf = new_pbuf;
}
pbuf[pp] = sp;
pbuf[pp + 1] = startPos + bp;
pp += 2;
}
super.putChar(ch, scan);
}
/** Whether the ch represents a sequence of two backslashes. */
boolean isDoubleBackslash() {
return doubleBackslashBp == bp;
}
}
protected static class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {
protected static class JavadocComment extends BasicComment {
/**
* Pattern used to detect a well formed @deprecated tag in a JaavDoc
* comment.
*/
private static final Pattern DEPRECATED_PATTERN =
Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
/**
* Translated and stripped contents of doc comment
*/
* The relevant portion of the comment that is of interest to Javadoc.
* Produced by invoking scanDocComment.
*/
private String docComment = null;
private int[] docPosns = null;
JavadocComment(DocReader reader, CommentStyle cs) {
super(reader, cs);
/**
* StringBuilder used to extract the relevant portion of the Javadoc comment.
*/
private final StringBuilder sb;
/**
* Map used to map the extracted Javadoc comment's character positions back to
* the original source.
*/
OffsetMap offsetMap = new OffsetMap();
JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
super( cs, sf, array, offset);
this.sb = new StringBuilder();
}
/**
* Add a character to the extraction buffer.
*
* @param ch character to add.
*/
protected void put(char ch) {
offsetMap.add(sb.length(), offsetPosition());
sb.append(ch);
}
/**
* Add a code point to the extraction buffer.
*
* @param codePoint code point to add.
*/
protected void putCodePoint(int codePoint) {
offsetMap.add(sb.length(), offsetPosition());
sb.appendCodePoint(codePoint);
}
/**
* Add current character or code point to the extraction buffer.
*/
protected void put() {
if (isSurrogate()) {
putCodePoint(getCodepoint());
} else {
put(get());
}
}
@Override
@ -240,232 +158,292 @@ public class JavadocTokenizer extends JavaTokenizer {
@Override
public int getSourcePos(int pos) {
// Binary search to find the entry for which the string index is
// less than pos. Since docPosns is a list of pairs of integers
// we must make sure the index is always even.
// If we find an exact match for pos, the other item in the pair
// gives the source pos; otherwise, compute the source position
// relative to the best match found in the array.
if (pos == Position.NOPOS)
if (pos == Position.NOPOS) {
return Position.NOPOS;
if (pos < 0 || pos > docComment.length())
throw new StringIndexOutOfBoundsException(String.valueOf(pos));
if (docPosns == null)
return Position.NOPOS;
int start = 0;
int end = docPosns.length;
while (start < end - 2) {
// find an even index midway between start and end
int index = ((start + end) / 4) * 2;
if (docPosns[index] < pos)
start = index;
else if (docPosns[index] == pos)
return docPosns[index + 1];
else
end = index;
}
return docPosns[start + 1] + (pos - docPosns[start]);
if (pos < 0 || pos > docComment.length()) {
throw new StringIndexOutOfBoundsException(String.valueOf(pos));
}
return offsetMap.getSourcePos(pos);
}
@Override
@SuppressWarnings("fallthrough")
protected void scanDocComment() {
try {
boolean firstLine = true;
// Skip over first slash
comment_reader.scanCommentChar();
// Skip over first star
comment_reader.scanCommentChar();
// Skip over /*
accept("/*");
// consume any number of stars
while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
comment_reader.scanCommentChar();
}
// is the comment in the form /**/, /***/, /****/, etc. ?
if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
// Consume any number of stars
skip('*');
// Is the comment in the form /**/, /***/, /****/, etc. ?
if (is('/')) {
docComment = "";
return;
}
// skip a newline on the first line of the comment.
if (comment_reader.bp < comment_reader.buflen) {
if (comment_reader.ch == LF) {
comment_reader.scanCommentChar();
firstLine = false;
} else if (comment_reader.ch == CR) {
comment_reader.scanCommentChar();
if (comment_reader.ch == LF) {
comment_reader.scanCommentChar();
firstLine = false;
}
}
// Skip line terminator on the first line of the comment.
if (isOneOf('\n', '\r')) {
accept('\r');
accept('\n');
firstLine = false;
}
outerLoop:
// The outerLoop processes the doc comment, looping once
// for each line. For each line, it first strips off
// whitespace, then it consumes any stars, then it
// puts the rest of the line into our buffer.
while (comment_reader.bp < comment_reader.buflen) {
int begin_bp = comment_reader.bp;
char begin_ch = comment_reader.ch;
// The wsLoop consumes whitespace from the beginning
// of each line.
wsLoop:
while (comment_reader.bp < comment_reader.buflen) {
switch(comment_reader.ch) {
case ' ':
comment_reader.scanCommentChar();
break;
case '\t':
comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
comment_reader.scanCommentChar();
break;
case FF:
comment_reader.col = 0;
comment_reader.scanCommentChar();
break;
// Treat newline at beginning of line (blank line, no star)
// as comment text. Old Javadoc compatibility requires this.
/*---------------------------------*
case CR: // (Spec 3.4)
doc_reader.scanCommentChar();
if (ch == LF) {
col = 0;
doc_reader.scanCommentChar();
}
break;
case LF: // (Spec 3.4)
doc_reader.scanCommentChar();
break;
*---------------------------------*/
default:
// we've seen something that isn't whitespace;
// jump out.
break wsLoop;
}
}
// puts the rest of the line into the extraction buffer.
while (isAvailable()) {
int begin_pos = position();
// Consume whitespace from the beginning of each line.
skipWhitespace();
// Are there stars here? If so, consume them all
// and check for the end of comment.
if (comment_reader.ch == '*') {
if (is('*')) {
// skip all of the stars
do {
comment_reader.scanCommentChar();
} while (comment_reader.ch == '*');
skip('*');
// check for the closing slash.
if (comment_reader.ch == '/') {
// We're done with the doc comment
// scanChar() and breakout.
if (accept('/')) {
// We're done with the Javadoc comment
break outerLoop;
}
} else if (! firstLine) {
} else if (!firstLine) {
// The current line does not begin with a '*' so we will
// treat it as comment
comment_reader.bp = begin_bp;
comment_reader.ch = begin_ch;
reset(begin_pos);
}
// The textLoop processes the rest of the characters
// on the line, adding them to our buffer.
textLoop:
while (comment_reader.bp < comment_reader.buflen) {
switch (comment_reader.ch) {
case '*':
// Is this just a star? Or is this the
// end of a comment?
comment_reader.scanCommentChar();
if (comment_reader.ch == '/') {
// This is the end of the comment,
// set ch and return our buffer.
break outerLoop;
}
// This is just an ordinary star. Add it to
// the buffer.
comment_reader.putChar('*', false);
break;
case '\\':
comment_reader.putChar('\\', false);
// If a double backslash was found, write two
if (comment_reader.isDoubleBackslash()) {
comment_reader.putChar('\\', false);
}
comment_reader.scanCommentChar();
break;
case ' ':
case '\t':
comment_reader.putChar(comment_reader.ch, false);
comment_reader.scanCommentChar();
break;
case FF:
comment_reader.scanCommentChar();
break textLoop; // treat as end of line
case CR: // (Spec 3.4)
comment_reader.scanCommentChar();
if (comment_reader.ch != LF) {
// Canonicalize CR-only line terminator to LF
comment_reader.putChar((char)LF, false);
break textLoop;
}
/* fall through to LF case */
case LF: // (Spec 3.4)
// The textLoop processes the rest of the characters
// on the line, adding them to the extraction buffer.
while (isAvailable()) {
if (accept("*/")) {
// This is the end of the comment, return
// the contents of the extraction buffer.
break outerLoop;
} else if (isOneOf('\n', '\r')) {
// We've seen a newline. Add it to our
// buffer and break out of this loop,
// starting fresh on a new line.
comment_reader.putChar(comment_reader.ch, false);
comment_reader.scanCommentChar();
put('\n');
accept('\r');
accept('\n');
break textLoop;
default:
} else if (is('\f')){
next();
break textLoop; // treat as end of line
} else {
// Add the character to our buffer.
comment_reader.putChar(comment_reader.ch, false);
comment_reader.scanCommentChar();
put();
next();
}
} // end textLoop
firstLine = false;
} // end outerLoop
if (comment_reader.sp > 0) {
int i = comment_reader.sp - 1;
trailLoop:
while (i > -1) {
switch (comment_reader.sbuf[i]) {
case '*':
i--;
break;
default:
break trailLoop;
}
// If extraction buffer is not empty.
if (sb.length() > 0) {
// Remove trailing asterisks.
int i = sb.length() - 1;
while (i > -1 && sb.charAt(i) == '*') {
i--;
}
comment_reader.sp = i + 1;
sb.setLength(i + 1) ;
// Store the text of the doc comment
docComment = comment_reader.chars();
docPosns = new int[comment_reader.pp];
System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);
} else {
docComment = sb.toString();
} else {
docComment = "";
}
} finally {
scanned = true;
comment_reader = null;
if (docComment != null &&
DEPRECATED_PATTERN.matcher(docComment).matches()) {
// Check if comment contains @deprecated comment.
if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) {
deprecatedFlag = true;
}
}
}
//where:
private static final Pattern DEPRECATED_PATTERN =
Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
}
/**
* Build a map for translating between line numbers and positions in the input.
* Overridden to expand tabs.
*
* @return a LineMap
*/
@Override
public Position.LineMap getLineMap() {
char[] buf = reader.getRawCharacters();
char[] buf = getRawCharacters();
return Position.makeLineMap(buf, buf.length, true);
}
/**
* Build an int table to mapping positions in extracted Javadoc comment
* to positions in the JavaTokenizer source buffer.
*
* The array is organized as a series of pairs of integers: the first
* number in each pair specifies a position in the comment text,
* the second number in each pair specifies the corresponding position
* in the source buffer. The pairs are sorted in ascending order.
*
* Since the mapping function is generally continuous, with successive
* positions in the string corresponding to successive positions in the
* source buffer, the table only needs to record discontinuities in
* the mapping. The values of intermediate positions can be inferred.
*
* Discontinuities may occur in a number of places: when a newline
* is followed by whitespace and asterisks (which are ignored),
* when a tab is expanded into spaces, and when unicode escapes
* are used in the source buffer.
*
* Thus, to find the source position of any position, p, in the comment
* string, find the index, i, of the pair whose string offset
* ({@code map[i * NOFFSETS + SB_OFFSET] }) is closest to but not greater
* than p. Then, {@code sourcePos(p) = map[i * NOFFSETS + POS_OFFSET] +
* (p - map[i * NOFFSETS + SB_OFFSET]) }.
*/
static class OffsetMap {
/**
* map entry offset for comment offset member of pair.
*/
private static final int SB_OFFSET = 0;
/**
* map entry offset of input offset member of pair.
*/
private static final int POS_OFFSET = 1;
/**
* Number of elements in each entry.
*/
private static final int NOFFSETS = 2;
/**
* Array storing entries in map.
*/
private int[] map;
/**
* Logical size of map (number of valid entries.)
*/
private int size;
/**
* Constructor.
*/
OffsetMap() {
this.map = new int[128];
this.size = 0;
}
/**
* Returns true if it is worthwhile adding the entry pair to the map. That is
* if there is a change in relative offset.
*
* @param sbOffset comment offset member of pair.
* @param posOffet input offset member of pair.
*
* @return true if it is worthwhile adding the entry pair.
*/
boolean shouldAdd(int sbOffset, int posOffet) {
return sbOffset - lastSBOffset() != posOffet - lastPosOffset();
}
/**
* Adds entry pair if worthwhile.
*
* @param sbOffset comment offset member of pair.
* @param posOffet input offset member of pair.
*/
void add(int sbOffset, int posOffet) {
if (size == 0 || shouldAdd(sbOffset, posOffet)) {
ensure(NOFFSETS);
map[size + SB_OFFSET] = sbOffset;
map[size + POS_OFFSET] = posOffet;
size += NOFFSETS;
}
}
/**
* Returns the previous comment offset.
*
* @return the previous comment offset.
*/
private int lastSBOffset() {
return size == 0 ? 0 : map[size - NOFFSETS + SB_OFFSET];
}
/**
* Returns the previous input offset.
*
* @return the previous input offset.
*/
private int lastPosOffset() {
return size == 0 ? 0 : map[size - NOFFSETS + POS_OFFSET];
}
/**
* Ensures there is enough space for a new entry.
*
* @param need number of array slots needed.
*/
private void ensure(int need) {
need += size;
int grow = map.length;
while (need > grow) {
grow <<= 1;
}
// Handle overflow.
if (grow < map.length) {
throw new IndexOutOfBoundsException();
} else if (grow != map.length) {
map = Arrays.copyOf(map, grow);
}
}
/**
* Binary search to find the entry for which the string index is less
* than pos. Since the map is a list of pairs of integers we must make
* sure the index is always NOFFSETS scaled. If we find an exact match
* for pos, the other item in the pair gives the source pos; otherwise,
* compute the source position relative to the best match found in the
* array.
*/
int getSourcePos(int pos) {
if (size == 0) {
return Position.NOPOS;
}
int start = 0;
int end = size / NOFFSETS;
while (start < end - 1) {
// find an index midway between start and end
int index = (start + end) / 2;
int indexScaled = index * NOFFSETS;
if (map[indexScaled + SB_OFFSET] < pos) {
start = index;
} else if (map[indexScaled + SB_OFFSET] == pos) {
return map[indexScaled + POS_OFFSET];
} else {
end = index;
}
}
int startScaled = start * NOFFSETS;
return map[startScaled + POS_OFFSET] + (pos - map[startScaled + SB_OFFSET]);
}
}
}

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.tools.javac.parser;
import java.util.HashSet;
import java.util.Set;
/**
* Methods used to support text blocks lint.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.</b>
*/
class TextBlockSupport {
enum WhitespaceChecks {
INCONSISTENT,
TRAILING
};
/** Check that the use of white space in content is not problematic.
*/
static Set<WhitespaceChecks> checkWhitespace(String string) {
// Start with empty result set.
Set<WhitespaceChecks> checks = new HashSet<>();
// No need to check empty strings.
if (string.isEmpty()) {
return checks;
}
// Maximum common indentation.
int outdent = 0;
// No need to check indentation if opting out (last line is empty.)
char lastChar = string.charAt(string.length() - 1);
boolean optOut = lastChar == '\n' || lastChar == '\r';
// Split string based at line terminators.
String[] lines = string.split("\\R");
int length = lines.length;
// Extract last line.
String lastLine = length == 0 ? "" : lines[length - 1];
if (!optOut) {
// Prime with the last line indentation (may be blank.)
outdent = indexOfNonWhitespace(lastLine);
for (String line : lines) {
// Blanks lines have no influence (last line accounted for.)
if (!line.isBlank()) {
outdent = Integer.min(outdent, indexOfNonWhitespace(line));
if (outdent == 0) {
break;
}
}
}
}
// Last line is representative.
String start = lastLine.substring(0, outdent);
for (String line : lines) {
// Fail if a line does not have the same indentation.
if (!line.isBlank() && !line.startsWith(start)) {
// Mix of different white space
checks.add(WhitespaceChecks.INCONSISTENT);
}
// Line has content even after indent is removed.
if (outdent < line.length()) {
// Is the last character a white space.
lastChar = line.charAt(line.length() - 1);
if (Character.isWhitespace(lastChar)) {
// Has trailing white space.
checks.add(WhitespaceChecks.TRAILING);
}
}
}
return checks;
}
private static int indexOfNonWhitespace(String string) {
return string.length() - string.stripLeading().length();
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,267 +25,503 @@
package com.sun.tools.javac.parser;
import java.nio.CharBuffer;
import java.util.Arrays;
import com.sun.tools.javac.file.JavacFileManager;
import com.sun.tools.javac.resources.CompilerProperties.Errors;
import com.sun.tools.javac.util.ArrayUtils;
import com.sun.tools.javac.util.Log;
import com.sun.tools.javac.util.Name;
import com.sun.tools.javac.util.Names;
import static com.sun.tools.javac.util.LayoutCharacters.*;
import static com.sun.tools.javac.util.LayoutCharacters.EOI;
import static com.sun.tools.javac.util.LayoutCharacters.tabulate;
/** The char reader used by the javac lexer/tokenizer. Returns the sequence of
* characters contained in the input stream, handling unicode escape accordingly.
* Additionally, it provides features for saving chars into a buffer and to retrieve
* them at a later stage.
/**
* The unicode character reader used by the javac/javadoc lexer/tokenizer, returns characters
* one by one as contained in the input stream, handling unicode escape sequences accordingly.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.</b>
* deletion without notice.</b></p>
*/
public class UnicodeReader {
/** The input buffer, index of next character to be read,
* index of one past last character in buffer.
/**
* Buffer containing characters from source file. May contain extraneous characters
* beyond this.length.
*/
protected char[] buf;
protected int bp;
protected final int buflen;
/** The current character.
*/
protected char ch;
/** The buffer index of the last converted unicode character
*/
protected int unicodeConversionBp = -1;
protected Log log;
protected Names names;
/** A character buffer for saved chars.
*/
protected char[] sbuf = new char[128];
protected int realLength;
protected int sp;
private final char[] buffer;
/**
* Create a scanner from the input array. This method might
* modify the array. To avoid copying the input array, ensure
* that {@code inputLength < input.length} or
* {@code input[input.length -1]} is a white space character.
* Length of meaningful content in buffer.
*/
private final int length;
/**
* Character buffer index of character currently being observed.
*/
private int position;
/**
* Number of characters combined to provide character currently being observed. Typically
* one, but may be more when combinations of surrogate pairs and unicode escape sequences
* are read.
*/
private int width;
/**
* Character currently being observed. If a surrogate pair is read then will be the high
* member of the pair.
*/
private char character;
/**
* Codepoint of character currently being observed. Typically equivalent to the character
* but will have a value greater that 0xFFFF when a surrogate pair.
*/
private int codepoint;
/**
* true if the last character was a backslash. This is used to handle the special case
* when a backslash precedes an unicode escape. In that case, the second backslash
* is treated as a backslash and not part of an unicode escape.
*/
private boolean wasBackslash;
/**
* Log for error reporting.
*/
private final Log log;
/**
* Constructor.
*
* @param sf the factory which created this Scanner
* @param buffer the input, might be modified
* Must be positive and less than or equal to input.length.
* @param sf scan factory.
* @param array array containing contents of source.
* @param length length of meaningful content in buffer.
*/
protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
this(sf, JavacFileManager.toArray(buffer), buffer.limit());
protected UnicodeReader(ScannerFactory sf, char[] array, int length) {
this.buffer = array;
this.length = length;
this.position = 0;
this.width = 0;
this.character = '\0';
this.codepoint = 0;
this.wasBackslash = false;
this.log = sf.log;
nextCodePoint();
}
protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
log = sf.log;
names = sf.names;
realLength = inputLength;
if (inputLength == input.length) {
if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
inputLength--;
} else {
input = Arrays.copyOf(input, inputLength + 1);
/**
* Returns the length of the buffer. This is length of meaningful content in buffer and
* not the length of the buffer array.
*
* @return length of the buffer.
*/
protected int length() {
return length;
}
/**
* Return true if current position is within the meaningful part of the buffer.
*
* @return true if current position is within the meaningful part of the buffer.
*/
protected boolean isAvailable() {
return position < length;
}
/**
* Fetches the next 16-bit character from the buffer and places it in this.character.
*/
private void nextCodeUnit() {
// Index of next character in buffer.
int index = position + width;
// If past end of buffer.
if (length <= index) {
// End of file is marked with EOI.
character = EOI;
} else {
// Next character in buffer.
character = buffer[index];
// Increment length of codepoint.
width++;
}
}
/**
* Fetches the next 16-bit character from the buffer. If an unicode escape
* is detected then converts the unicode escape to a character.
*/
private void nextUnicodeInputCharacter() {
// Position to next codepoint.
position += width;
// Codepoint has no characters yet.
width = 0;
// Fetch next character.
nextCodeUnit();
// If second backslash is detected.
if (wasBackslash) {
// Treat like a normal character (not part of unicode escape.)
wasBackslash = false;
} else if (character == '\\') {
// May be an unicode escape.
wasBackslash = !unicodeEscape();
}
// Codepoint and character match if not surrogate.
codepoint = (int)character;
}
/**
* Fetches the nextcode point from the buffer. If an unicode escape is recognized
* then converts unicode escape to a character. If two characters are a surrogate pair
* then converts to a codepoint.
*/
private void nextCodePoint() {
// Next unicode character.
nextUnicodeInputCharacter();
// Return early if ASCII or not a surrogate pair.
if (isASCII() || !Character.isHighSurrogate(character)) {
return;
}
// Capture high surrogate and position.
char hi = character;
int savePosition = position;
int saveWidth = width;
// Get potential low surrogate.
nextUnicodeInputCharacter();
char lo = character;
if (Character.isLowSurrogate(lo)) {
// Start codepoint at start of high surrogate.
position = savePosition;
width += saveWidth;
// Compute codepoint.
codepoint = Character.toCodePoint(hi, lo);
} else {
// Restore to treat high surrogate as just a character.
position = savePosition;
width = saveWidth;
character = hi;
codepoint = (int)hi;
// Could potential report an error here (old code did not.)
}
}
/**
* Converts an unicode escape into a character.
*
* @return true if was an unicode escape.
*/
private boolean unicodeEscape() {
// Start of unicode escape (past backslash.)
int start = position + width;
// Default to backslash result, unless proven otherwise.
character = '\\';
width = 1;
// Skip multiple 'u'.
int index;
for (index = start; index < length; index++) {
if (buffer[index] != 'u') {
break;
}
}
buf = input;
buflen = inputLength;
buf[buflen] = EOI;
bp = -1;
scanChar();
}
/** Read next character.
*/
protected void scanChar() {
if (bp < buflen) {
ch = buf[++bp];
if (ch == '\\') {
convertUnicode();
}
}
}
/** Read next character in comment, skipping over double '\' characters.
*/
protected void scanCommentChar() {
scanChar();
if (ch == '\\') {
if (peekChar() == '\\' && !isUnicode()) {
skipChar();
} else {
convertUnicode();
}
}
}
/** Append a character to sbuf.
*/
protected void putChar(char ch, boolean scan) {
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
sbuf[sp++] = ch;
if (scan)
scanChar();
}
protected void putChar(char ch) {
putChar(ch, false);
}
protected void putChar(boolean scan) {
putChar(ch, scan);
}
protected void nextChar(boolean skip) {
if (!skip) {
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
sbuf[sp++] = ch;
}
scanChar();
}
Name name() {
return names.fromChars(sbuf, 0, sp);
}
String chars() {
return new String(sbuf, 0, sp);
}
/** Add 'count' copies of the character 'ch' to the string buffer.
*/
protected void repeat(char ch, int count) {
for ( ; 0 < count; count--) {
putChar(ch, false);
}
}
/** Reset the scan buffer pointer to 'pos'.
*/
protected void reset(int pos) {
bp = pos - 1;
scanChar();
}
/** Convert unicode escape; bp points to initial '\' character
* (Spec 3.3).
*/
protected void convertUnicode() {
if (ch == '\\' && unicodeConversionBp != bp ) {
bp++; ch = buf[bp];
if (ch == 'u') {
do {
bp++; ch = buf[bp];
} while (ch == 'u');
int limit = bp + 3;
if (limit < buflen) {
int d = digit(bp, 16);
int code = d;
while (bp < limit && d >= 0) {
bp++; ch = buf[bp];
d = digit(bp, 16);
code = (code << 4) + d;
}
if (d >= 0) {
ch = (char)code;
unicodeConversionBp = bp;
return;
}
}
log.error(bp, Errors.IllegalUnicodeEsc);
} else {
bp--;
ch = '\\';
}
}
}
/** Are surrogates supported?
*/
final static boolean surrogatesSupported = surrogatesSupported();
private static boolean surrogatesSupported() {
try {
Character.isHighSurrogate('a');
return true;
} catch (NoSuchMethodError ex) {
// Needs to have been at least one u.
if (index == start) {
return false;
}
}
/** Scan surrogate pairs. If 'ch' is a high surrogate and
* the next character is a low surrogate, returns the code point
* constructed from these surrogates. Otherwise, returns -1.
* This method will not consume any of the characters.
*/
protected int peekSurrogates() {
if (surrogatesSupported && Character.isHighSurrogate(ch)) {
char high = ch;
int prevBP = bp;
int code = 0;
scanChar();
for (int i = 0; i < 4; i++) {
// Translate and merge digit.
int digit = index < length ? Character.digit(buffer[index], 16) : -1;
code = code << 4 | digit;
char low = ch;
ch = high;
bp = prevBP;
if (Character.isLowSurrogate(low)) {
return Character.toCodePoint(high, low);
// If invalid digit.
if (code < 0) {
break;
}
// On to next character.
index++;
}
return -1;
// Skip digits even if error.
width = index - position;
// If all digits are good.
if (code >= 0) {
character = (char)code;
} else {
log.error(position, Errors.IllegalUnicodeEsc);
}
// Return true even if error so that the invalid unicode escape is skipped.
return true;
}
/** Convert an ASCII digit from its base (8, 10, or 16)
* to its value.
/**
* Return the current position in the character buffer.
*
* @return current position in the character buffer.
*/
protected int digit(int pos, int base) {
char c = ch;
if ('0' <= c && c <= '9')
return Character.digit(c, base); //a fast common case
int codePoint = peekSurrogates();
int result = codePoint >= 0 ? Character.digit(codePoint, base) : Character.digit(c, base);
if (result >= 0 && c > 0x7f) {
log.error(pos + 1, Errors.IllegalNonasciiDigit);
if (codePoint >= 0)
scanChar();
ch = "0123456789abcdef".charAt(result);
protected int position() {
return position;
}
/**
* Reset the reader to the specified position.
* Warning: Do not use when previous character was an ASCII or unicode backslash.
* @param pos
*/
protected void reset(int pos) {
position = pos;
width = 0;
wasBackslash = false;
nextCodePoint();
}
/**
* Return the current character in at the current position.
*
* @return current character in at the current position.
*/
protected char get() {
return character;
}
/**
* Return the current codepoint in at the current position.
*
* @return current codepoint in at the current position.
*/
protected int getCodepoint() {
return codepoint;
}
/**
* Returns true if the current codepoint is a surrogate.
*
* @return true if the current codepoint is a surrogate.
*/
protected boolean isSurrogate() {
return 0xFFFF < codepoint;
}
/**
* Returns true if the current character is ASCII.
*
* @return true if the current character is ASCII.
*/
protected boolean isASCII() {
return character <= 0x7F;
}
/**
* Advances the current character to the next character.
*
* @return next character.
*/
protected char next() {
nextCodePoint();
return character;
}
/**
* Compare character. Returns true if a match.
*
* @param ch character to match.
*
* @return true if a match.
*/
protected boolean is(char ch) {
return character == ch;
}
/**
* Match one of the arguments. Returns true if a match.
*/
protected boolean isOneOf(char ch1, char ch2) {
return is(ch1) || is(ch2);
}
protected boolean isOneOf(char ch1, char ch2, char ch3) {
return is(ch1) || is(ch2) || is(ch3);
}
protected boolean isOneOf(char ch1, char ch2, char ch3, char ch4, char ch5, char ch6) {
return is(ch1) || is(ch2) || is(ch3) || is(ch4) || is(ch5) || is(ch6);
}
/**
* Tests to see if current character is in the range of lo to hi characters (inclusive).
*
* @param lo lowest character in range.
* @param hi highest character in range.
*
* @return true if the current character is in range.
*/
protected boolean inRange(char lo, char hi) {
return lo <= character && character <= hi;
}
/**
* Compare character and advance if a match. Returns true if a match.
*
* @param ch character to match.
*
* @return true if a match.
*/
protected boolean accept(char ch) {
if (is(ch)) {
next();
return true;
}
return false;
}
/**
* Match one of the arguments and advance if a match. Returns true if a match.
*/
protected boolean acceptOneOf(char ch1, char ch2) {
if (isOneOf(ch1, ch2)) {
next();
return true;
}
return false;
}
protected boolean acceptOneOf(char ch1, char ch2, char ch3) {
if (isOneOf(ch1, ch2, ch3)) {
next();
return true;
}
return false;
}
/**
* Skip over all occurances of character.
*
* @param ch character to accept.
*/
protected void skip(char ch) {
while (accept(ch)) {
// next
}
}
/**
* Skip over ASCII white space characters.
*/
protected void skipWhitespace() {
while (acceptOneOf(' ', '\t', '\f')) {
// next
}
}
/**
* Skip to end of line.
*/
protected void skipToEOLN() {
while (isAvailable()) {
if (isOneOf('\r', '\n')) {
break;
}
next();
}
}
/**
* Compare string and advance if a match. Returns true if a match.
* Warning: Do not use when previous character was a backslash
* (confuses state of wasBackslash.)
*
* @param string string to match character for character.
*
* @return true if a match.
*/
protected boolean accept(String string) {
// Quick test.
if (string.length() == 0 || !is(string.charAt(0))) {
return false;
}
// Be prepared to retreat if not a match.
int savedPosition = position;
nextCodePoint();
// Check each character.
for (int i = 1; i < string.length(); i++) {
if (!is(string.charAt(i))) {
// Restart if not a match.
reset(savedPosition);
return false;
}
nextCodePoint();
}
return true;
}
/**
* Convert an ASCII digit from its base (8, 10, or 16) to its value. Does not
* advance character.
*
* @param pos starting position.
* @param digitRadix base of number being converted.
*
* @return value of digit.
*/
protected int digit(int pos, int digitRadix) {
int result;
// Just an ASCII digit.
if (inRange('0', '9')) {
// Fast common case.
result = character - '0';
return result < digitRadix ? result : -1;
}
// Handle other digits.
result = isSurrogate() ? Character.digit(codepoint, digitRadix) :
Character.digit(character, digitRadix);
if (result >= 0 && !isASCII()) {
log.error(position(), Errors.IllegalNonasciiDigit);
character = "0123456789abcdef".charAt(result);
}
return result;
}
protected boolean isUnicode() {
return unicodeConversionBp == bp;
}
protected void skipChar() {
bp++;
}
protected char peekChar() {
return buf[bp + 1];
}
/**
* Returns a copy of the input buffer, up to its inputLength.
* Unicode escape sequences are not translated.
* Returns the input buffer. Unicode escape sequences are not translated.
*
* @return the input buffer.
*/
public char[] getRawCharacters() {
char[] chars = new char[buflen];
System.arraycopy(buf, 0, chars, 0, buflen);
return chars;
return length == buffer.length ? buffer : Arrays.copyOf(buffer, length);
}
/**
@ -297,15 +533,83 @@ public class UnicodeReader {
* {@code String.substring(beginIndex, endIndex)}.
* Unicode escape sequences are not translated.
*
* @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive.
* @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive.
*
* @throws ArrayIndexOutOfBoundsException if either offset is outside of the
* array bounds
*/
public char[] getRawCharacters(int beginIndex, int endIndex) {
int length = endIndex - beginIndex;
char[] chars = new char[length];
System.arraycopy(buf, beginIndex, chars, 0, length);
return chars;
return Arrays.copyOfRange(buffer, beginIndex, endIndex);
}
/**
* This is a specialized version of UnicodeReader that keeps track of the
* column position within a given character stream. Used for Javadoc
* processing to build a table for mapping positions in the comment string
* to positions in the source file.
*/
static class PositionTrackingReader extends UnicodeReader {
/**
* Offset from the beginning of the original reader buffer.
*/
private final int offset;
/**
* Current column in the comment.
*/
private int column;
/**
* Constructor.
*
* @param sf Scan factory.
* @param array Array containing contents of source.
* @param offset Position offset in original source buffer.
*/
protected PositionTrackingReader(ScannerFactory sf, char[] array, int offset) {
super(sf, array, array.length);
this.offset = offset;
this.column = 0;
}
/**
* Advances the current character to the next character. Tracks column.
*
* @return next character.
*/
@Override
protected char next() {
super.next();
if (isOneOf('\n', '\r', '\f')) {
column = 0;
} else if (is('\t')) {
column = tabulate(column);
} else {
column++;
}
return get();
}
/**
* Returns the current column.
*
* @return the current column.
*/
protected int column() {
return column;
}
/**
* Returns position relative to the original source buffer.
*
* @return
*/
protected int offsetPosition() {
return position() + offset;
}
}
}

View File

@ -1383,9 +1383,6 @@ compiler.err.unreported.exception.implicit.close=\
unreported exception {0}; must be caught or declared to be thrown\n\
exception thrown from implicit call to close() on resource variable ''{1}''
compiler.err.unsupported.cross.fp.lit=\
hexadecimal floating-point literals are not supported on this VM
compiler.err.void.not.allowed.here=\
''void'' type not allowed here

View File

@ -984,8 +984,6 @@ compiler.err.unreported.exception.default.constructor=\u30C7\u30D5\u30A9\u30EB\u
# 0: type, 1: name
compiler.err.unreported.exception.implicit.close=\u5831\u544A\u3055\u308C\u306A\u3044\u4F8B\u5916{0}\u306F\u3001\u30B9\u30ED\u30FC\u3059\u308B\u306B\u306F\u6355\u6349\u307E\u305F\u306F\u5BA3\u8A00\u3059\u308B\u5FC5\u8981\u304C\u3042\u308A\u307E\u3059\n\u30EA\u30BD\u30FC\u30B9\u5909\u6570''{1}''\u3067\u306Eclose()\u306E\u6697\u9ED9\u7684\u306A\u30B3\u30FC\u30EB\u304B\u3089\u4F8B\u5916\u304C\u30B9\u30ED\u30FC\u3055\u308C\u307E\u3057\u305F
compiler.err.unsupported.cross.fp.lit=16\u9032\u6D6E\u52D5\u5C0F\u6570\u70B9\u30EA\u30C6\u30E9\u30EB\u306F\u3053\u306EVM\u3067\u306F\u30B5\u30DD\u30FC\u30C8\u3055\u308C\u3066\u3044\u307E\u305B\u3093
compiler.err.void.not.allowed.here=\u3053\u3053\u3067''void''\u578B\u3092\u4F7F\u7528\u3059\u308B\u3053\u3068\u306F\u3067\u304D\u307E\u305B\u3093
# 0: string

View File

@ -984,8 +984,6 @@ compiler.err.unreported.exception.default.constructor=\u9ED8\u8BA4\u6784\u9020\u
# 0: type, 1: name
compiler.err.unreported.exception.implicit.close=\u672A\u62A5\u544A\u7684\u5F02\u5E38\u9519\u8BEF{0}; \u5FC5\u987B\u5BF9\u5176\u8FDB\u884C\u6355\u83B7\u6216\u58F0\u660E\u4EE5\u4FBF\u629B\u51FA\n\u5BF9\u8D44\u6E90\u53D8\u91CF ''{1}'' \u9690\u5F0F\u8C03\u7528 close() \u65F6\u629B\u51FA\u4E86\u5F02\u5E38\u9519\u8BEF
compiler.err.unsupported.cross.fp.lit=\u8BE5 VM \u4E0D\u652F\u6301\u5341\u516D\u8FDB\u5236\u6D6E\u70B9\u6587\u5B57
compiler.err.void.not.allowed.here=\u6B64\u5904\u4E0D\u5141\u8BB8\u4F7F\u7528 ''\u7A7A'' \u7C7B\u578B
# 0: string

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -96,7 +96,7 @@ public class DiagnosticSource {
return 0;
}
if (buf[bp] == '\t' && expandTabs) {
column = (column / TabInc * TabInc) + TabInc;
column = tabulate(column);
} else {
column++;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -68,4 +68,10 @@ public interface LayoutCharacters {
* source file.
*/
final static byte EOI = 0x1A;
/** Bump column to the next tab.
*/
static int tabulate(int column) {
return (column / TabInc * TabInc) + TabInc;
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -265,7 +265,7 @@ public class Position {
int column = 0;
for (int bp = lineStart; bp < pos; bp++) {
if (tabMap.get(bp))
column = (column / TabInc * TabInc) + TabInc;
column = tabulate(column);
else
column++;
}
@ -279,7 +279,7 @@ public class Position {
while (col < column) {
pos++;
if (tabMap.get(pos))
col = (col / TabInc * TabInc) + TabInc;
col = tabulate(col);
else
col++;
}

View File

@ -1,2 +1,2 @@
Digits.java:11:41: compiler.err.illegal.nonascii.digit
Digits.java:11:43: compiler.err.illegal.nonascii.digit
1 error

View File

@ -44,7 +44,6 @@ compiler.err.stack.sim.error
compiler.err.type.var.more.than.once # UNUSED
compiler.err.type.var.more.than.once.in.result # UNUSED
compiler.err.unexpected.type
compiler.err.unsupported.cross.fp.lit # Scanner: host system dependent
compiler.misc.bad.class.signature # bad class file
compiler.misc.bad.const.pool.tag # bad class file
compiler.misc.bad.const.pool.tag.at # bad class file

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,10 +23,10 @@
/**
* @test
* @bug 8056897
* @bug 8056897 8254073
* @modules jdk.compiler/com.sun.tools.javac.parser
* jdk.compiler/com.sun.tools.javac.util
* @summary Proper lexing of integer literals.
* @summary Proper lexing of various token kinds.
*/
import java.io.IOException;
@ -43,41 +43,130 @@ import com.sun.tools.javac.parser.Tokens.TokenKind;
import com.sun.tools.javac.util.Context;
import com.sun.tools.javac.util.Log;
import static com.sun.tools.javac.parser.Tokens.TokenKind.*;
public class JavaLexerTest {
public static void main(String... args) throws Exception {
new JavaLexerTest().run();
static final TestTuple[] PASSING_TESTS = {
new TestTuple(FLOATLITERAL, "0.0f"),
new TestTuple(FLOATLITERAL, "0.0F"),
new TestTuple(FLOATLITERAL, ".0F"),
new TestTuple(FLOATLITERAL, "0.F"),
new TestTuple(FLOATLITERAL, "0E0F"),
new TestTuple(FLOATLITERAL, "0E+0F"),
new TestTuple(FLOATLITERAL, "0E-0F"),
new TestTuple(DOUBLELITERAL, "0.0d"),
new TestTuple(DOUBLELITERAL, "0.0D"),
new TestTuple(DOUBLELITERAL, ".0D"),
new TestTuple(DOUBLELITERAL, "0.D"),
new TestTuple(DOUBLELITERAL, "0E0D"),
new TestTuple(DOUBLELITERAL, "0E+0D"),
new TestTuple(DOUBLELITERAL, "0E-0D"),
new TestTuple(DOUBLELITERAL, "0x0.0p0d"),
new TestTuple(DOUBLELITERAL, "0xff.0p8d"),
new TestTuple(STRINGLITERAL, "\"\\u2022\""),
new TestTuple(STRINGLITERAL, "\"\\b\\t\\n\\f\\r\\\'\\\"\\\\\""),
new TestTuple(CHARLITERAL, "\'\\b\'"),
new TestTuple(CHARLITERAL, "\'\\t\'"),
new TestTuple(CHARLITERAL, "\'\\n\'"),
new TestTuple(CHARLITERAL, "\'\\f\'"),
new TestTuple(CHARLITERAL, "\'\\r\'"),
new TestTuple(CHARLITERAL, "\'\\'\'"),
new TestTuple(CHARLITERAL, "\'\\\\'"),
new TestTuple(CHARLITERAL, "\'\\\'\'"),
new TestTuple(CHARLITERAL, "\'\\\"\'"),
new TestTuple(IDENTIFIER, "abc\\u0005def"),
};
static final TestTuple[] FAILING_TESTS = {
new TestTuple(LONGLITERAL, "0bL"),
new TestTuple(LONGLITERAL, "0b20L"),
new TestTuple(LONGLITERAL, "0xL"),
new TestTuple(INTLITERAL, "0xG000L", "0x"),
new TestTuple(DOUBLELITERAL, "0E*0F", "0E"),
new TestTuple(DOUBLELITERAL, "0E*0D", "0E"),
new TestTuple(INTLITERAL, "0xp8d", "0x"),
new TestTuple(DOUBLELITERAL, "0x8pd", "0x8pd"),
new TestTuple(INTLITERAL, "0xpd", "0x"),
new TestTuple(ERROR, "\"\\u20\""),
new TestTuple(ERROR, "\"\\u\""),
new TestTuple(ERROR, "\"\\uG000\""),
new TestTuple(ERROR, "\"\\u \""),
new TestTuple(ERROR, "\"\\q\""),
new TestTuple(ERROR, "\'\'"),
new TestTuple(ERROR, "\'\\q\'", "\'\\"),
};
static class TestTuple {
String input;
TokenKind kind;
String expected;
TestTuple(TokenKind kind, String input, String expected) {
this.input = input;
this.kind = kind;
this.expected = expected;
}
TestTuple(TokenKind kind, String input) {
this(kind, input, input);
}
}
void run() throws Exception {
void test(TestTuple test, boolean willFail) throws Exception {
Context ctx = new Context();
Log log = Log.instance(ctx);
String input = "0bL 0b20L 0xL ";
log.useSource(new SimpleJavaFileObject(new URI("mem://Test.java"), JavaFileObject.Kind.SOURCE) {
@Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
return input;
return test.input;
}
});
char[] inputArr = input.toCharArray();
JavaTokenizer tokenizer = new JavaTokenizer(ScannerFactory.instance(ctx), inputArr, inputArr.length) {
};
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0bL");
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0b20L");
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0xL");
char[] inputArr = test.input.toCharArray();
JavaTokenizer tokenizer = new JavaTokenizer(ScannerFactory.instance(ctx), inputArr, inputArr.length) {};
Token token = tokenizer.readToken();
boolean failed = log.nerrors != 0;
boolean normal = failed == willFail;
if (!normal) {
System.err.println("input: " + test.input);
String message = willFail ? "Expected to fail: " : "Expected to pass: ";
throw new AssertionError(message + test.input);
}
String actual = test.input.substring(token.pos, token.endPos);
if (token.kind != test.kind) {
System.err.println("input: " + test.input);
throw new AssertionError("Unexpected token kind: " + token.kind.name());
}
if (!Objects.equals(test.expected, actual)) {
System.err.println("input: " + test.input);
throw new AssertionError("Unexpected token content: " + actual);
}
}
void assertKind(String input, JavaTokenizer tokenizer, TokenKind kind, String expectedText) {
Token token = tokenizer.readToken();
if (token.kind != kind) {
throw new AssertionError("Unexpected token kind: " + token.kind);
void run() throws Exception {
for (TestTuple test : PASSING_TESTS) {
test(test, false);
}
String actualText = input.substring(token.pos, token.endPos);
if (!Objects.equals(actualText, expectedText)) {
throw new AssertionError("Unexpected token text: " + actualText);
for (TestTuple test : FAILING_TESTS) {
test(test, true);
}
}
public static void main(String[] args) throws Exception {
new JavaLexerTest().run();
}
}

View File

@ -1,10 +1,10 @@
NonasciiDigit.java:12:24: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:12:18: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:13:19: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:14:24: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:16:27: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:17:22: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:18:22: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:19:22: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:14:18: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:16:21: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:17:23: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:18:25: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:19:23: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:20:22: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:21:27: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:21:21: compiler.err.illegal.nonascii.digit
9 errors

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -25,24 +25,48 @@
* @test
* @bug 4330479
* @summary ASCII SUB character is rejected in multi-line comments
* @author gafter
*
* @compile SubChar.java
* @library /tools/lib
* @modules jdk.compiler/com.sun.tools.javac.api
* jdk.compiler/com.sun.tools.javac.main
* @build toolbox.ToolBox toolbox.JavacTask
* @run main SubChar
*/
import toolbox.JavacTask;
import toolbox.JavaTask;
import toolbox.Task;
import toolbox.ToolBox;
/*
Note: this source file has been crafted very carefully to end with the
unicode escape sequence for the control-Z character without a
following newline. The scanner is specified to allow control-Z there.
If you edit this source file, please make sure that your editor does
not insert a newline after that trailing line.
*/
/** \u001A */
class SubChar {
public static void main(String args[]) {
return;
}
public class SubChar {
private static final ToolBox TOOLBOX = new ToolBox();
private static final String SOURCE = """
/*
Note: this source file has been crafted very carefully to end with the
unicode escape sequence for the control-Z character without a
following newline. The scanner is specified to allow control-Z there.
If you edit this source file, please make sure that your editor does
not insert a newline after that trailing line.
*/
/** \\u001A */
class ControlZTest {
public static void main(String args[]) {
return;
}
}
/* \\u001A */\
""";
public static void main(String... args) {
String output = new JavacTask(TOOLBOX)
.sources(SOURCE)
.classpath(".")
.options("-encoding", "utf8")
.run()
.writeAll()
.getOutput(Task.OutputKind.DIRECT);
System.out.println(output);
}
}
/* \u001A */

View File

@ -1,4 +1,4 @@
SupplementaryJavaID2.java:12:14: compiler.err.illegal.char: \ud801
SupplementaryJavaID2.java:12:20: compiler.err.illegal.char: \ud801
SupplementaryJavaID2.java:12:9: compiler.err.illegal.char: \ud801
SupplementaryJavaID2.java:12:15: compiler.err.illegal.char: \ud801
SupplementaryJavaID2.java:12:24: compiler.err.expected: token.identifier
3 errors

View File

@ -1,3 +1,3 @@
SupplementaryJavaID3.java:12:17: compiler.err.illegal.char: \ud801
SupplementaryJavaID3.java:12:23: compiler.err.illegal.char: \ud801
SupplementaryJavaID3.java:12:12: compiler.err.illegal.char: \ud801
SupplementaryJavaID3.java:12:18: compiler.err.illegal.char: \ud801
2 errors

View File

@ -1,2 +1,2 @@
SupplementaryJavaID4.java:14:14: compiler.err.illegal.char: \ud834\udd7b
SupplementaryJavaID4.java:14:9: compiler.err.illegal.char: \ud834\udd7b
1 error

View File

@ -1,2 +1,2 @@
SupplementaryJavaID5.java:14:17: compiler.err.illegal.char: \ud834\udd00
SupplementaryJavaID5.java:14:12: compiler.err.illegal.char: \ud834\udd00
1 error