8254073: Tokenizer improvements (revised)

Reviewed-by: mcimadamore
This commit is contained in:
Jim Laskey 2020-10-09 11:08:56 +00:00
parent 9cecc16747
commit 4f9a1ffcdd
19 changed files with 1928 additions and 1318 deletions

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -29,15 +29,15 @@ import com.sun.tools.javac.parser.Tokens.Comment;
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle; import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
import com.sun.tools.javac.util.*; import com.sun.tools.javac.util.*;
import java.nio.*; import java.nio.CharBuffer;
import java.util.Arrays;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import static com.sun.tools.javac.util.LayoutCharacters.*; /**
* An extension to the base lexical analyzer (JavaTokenizer) that
/** An extension to the base lexical analyzer that captures * captures and processes the contents of doc comments. It does
* and processes the contents of doc comments. It does so by * so by stripping the leading whitespace and comment starts from
* translating Unicode escape sequences and by stripping the * each line of the Javadoc comment.
* leading whitespace and starts from each line of the comment.
* *
* <p><b>This is NOT part of any supported API. * <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk. * If you write code that depends on this, you do so at your own risk.
@ -45,189 +45,107 @@ import static com.sun.tools.javac.util.LayoutCharacters.*;
* deletion without notice.</b> * deletion without notice.</b>
*/ */
public class JavadocTokenizer extends JavaTokenizer { public class JavadocTokenizer extends JavaTokenizer {
/**
/** Create a scanner from the input buffer. buffer must implement * The factory that created this Scanner.
* array() and compact(), and remaining() must be less than limit().
*/ */
protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) { final ScannerFactory fac;
super(fac, buffer);
/**
* Create a tokenizer from the input character buffer. The input buffer
* content would typically be a Javadoc comment extracted by
* JavaTokenizer.
*
* @param fac the factory which created this Scanner.
* @param cb the input character buffer.
*/
protected JavadocTokenizer(ScannerFactory fac, CharBuffer cb) {
super(fac, cb);
this.fac = fac;
} }
/** Create a scanner from the input array. The array must have at /**
* least a single character of extra space. * Create a tokenizer from the input array. The input buffer
* content would typically be a Javadoc comment extracted by
* JavaTokenizer.
*
* @param fac factory which created this Scanner
* @param array input character array.
* @param length length of the meaningful content in the array.
*/ */
protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) { protected JavadocTokenizer(ScannerFactory fac, char[] array, int length) {
super(fac, input, inputLength); super(fac, array, length);
this.fac = fac;
} }
@Override @Override
protected Comment processComment(int pos, int endPos, CommentStyle style) { protected Comment processComment(int pos, int endPos, CommentStyle style) {
char[] buf = reader.getRawCharacters(pos, endPos); char[] buf = getRawCharacters(pos, endPos);
return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style); return new JavadocComment(style, fac, buf, pos);
} }
/** /**
* This is a specialized version of UnicodeReader that keeps track of the * An extension of BasicComment used to extract the relevant portion
* column position within a given character stream (used for Javadoc processing), * of a Javadoc comment.
* and which builds a table for mapping positions in the comment string to
* positions in the source file.
*/ */
static class DocReader extends UnicodeReader { protected static class JavadocComment extends BasicComment {
/**
int col; * Pattern used to detect a well formed @deprecated tag in a JaavDoc
int startPos; * comment.
*/
/** private static final Pattern DEPRECATED_PATTERN =
* A buffer for building a table for mapping positions in {@link #sbuf} Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
* to positions in the source buffer.
*
* The array is organized as a series of pairs of integers: the first
* number in each pair specifies a position in the comment text,
* the second number in each pair specifies the corresponding position
* in the source buffer. The pairs are sorted in ascending order.
*
* Since the mapping function is generally continuous, with successive
* positions in the string corresponding to successive positions in the
* source buffer, the table only needs to record discontinuities in
* the mapping. The values of intermediate positions can be inferred.
*
* Discontinuities may occur in a number of places: when a newline
* is followed by whitespace and asterisks (which are ignored),
* when a tab is expanded into spaces, and when unicode escapes
* are used in the source buffer.
*
* Thus, to find the source position of any position, p, in the comment
* string, find the index, i, of the pair whose string offset
* ({@code pbuf[i] }) is closest to but not greater than p. Then,
* {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
*/
int[] pbuf = new int[128];
/**
* The index of the next empty slot in the pbuf buffer.
*/
int pp = 0;
/** The buffer index of the last double backslash sequence
*/
private int doubleBackslashBp = -1;
DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
super(fac, input, inputLength);
this.startPos = startPos;
}
@Override
protected void convertUnicode() {
if (ch == '\\' && unicodeConversionBp != bp) {
bp++; ch = buf[bp]; col++;
if (ch == 'u') {
do {
bp++; ch = buf[bp]; col++;
} while (ch == 'u');
int limit = bp + 3;
if (limit < buflen) {
int d = digit(bp, 16);
int code = d;
while (bp < limit && d >= 0) {
bp++; ch = buf[bp]; col++;
d = digit(bp, 16);
code = (code << 4) + d;
}
if (d >= 0) {
ch = (char)code;
unicodeConversionBp = bp;
return;
}
}
// "illegal.Unicode.esc", reported by base scanner
} else {
bp--;
ch = '\\';
col--;
}
}
}
@Override
protected void scanCommentChar() {
scanChar();
if (ch == '\\') {
if (peekChar() == '\\' && !isUnicode()) {
bp++; col++;
doubleBackslashBp = bp;
} else {
convertUnicode();
}
}
}
@Override
protected void scanChar() {
bp++;
ch = buf[bp];
switch (ch) {
case '\r': // return
col = 0;
break;
case '\n': // newline
if (bp == 0 || buf[bp-1] != '\r') {
col = 0;
}
break;
case '\t': // tab
col = (col / TabInc * TabInc) + TabInc;
break;
case '\\': // possible Unicode
col++;
convertUnicode();
break;
default:
col++;
break;
}
}
@Override
public void putChar(char ch, boolean scan) {
// At this point, bp is the position of the current character in buf,
// and sp is the position in sbuf where this character will be put.
// Record a new entry in pbuf if pbuf is empty or if sp and its
// corresponding source position are not equidistant from the
// corresponding values in the latest entry in the pbuf array.
// (i.e. there is a discontinuity in the map function.)
if ((pp == 0)
|| (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
if (pp + 1 >= pbuf.length) {
int[] new_pbuf = new int[pbuf.length * 2];
System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
pbuf = new_pbuf;
}
pbuf[pp] = sp;
pbuf[pp + 1] = startPos + bp;
pp += 2;
}
super.putChar(ch, scan);
}
/** Whether the ch represents a sequence of two backslashes. */
boolean isDoubleBackslash() {
return doubleBackslashBp == bp;
}
}
protected static class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {
/** /**
* Translated and stripped contents of doc comment * The relevant portion of the comment that is of interest to Javadoc.
*/ * Produced by invoking scanDocComment.
*/
private String docComment = null; private String docComment = null;
private int[] docPosns = null;
JavadocComment(DocReader reader, CommentStyle cs) { /**
super(reader, cs); * StringBuilder used to extract the relevant portion of the Javadoc comment.
*/
private final StringBuilder sb;
/**
* Map used to map the extracted Javadoc comment's character positions back to
* the original source.
*/
OffsetMap offsetMap = new OffsetMap();
JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
super( cs, sf, array, offset);
this.sb = new StringBuilder();
}
/**
* Add a character to the extraction buffer.
*
* @param ch character to add.
*/
protected void put(char ch) {
offsetMap.add(sb.length(), offsetPosition());
sb.append(ch);
}
/**
* Add a code point to the extraction buffer.
*
* @param codePoint code point to add.
*/
protected void putCodePoint(int codePoint) {
offsetMap.add(sb.length(), offsetPosition());
sb.appendCodePoint(codePoint);
}
/**
* Add current character or code point to the extraction buffer.
*/
protected void put() {
if (isSurrogate()) {
putCodePoint(getCodepoint());
} else {
put(get());
}
} }
@Override @Override
@ -240,232 +158,292 @@ public class JavadocTokenizer extends JavaTokenizer {
@Override @Override
public int getSourcePos(int pos) { public int getSourcePos(int pos) {
// Binary search to find the entry for which the string index is if (pos == Position.NOPOS) {
// less than pos. Since docPosns is a list of pairs of integers
// we must make sure the index is always even.
// If we find an exact match for pos, the other item in the pair
// gives the source pos; otherwise, compute the source position
// relative to the best match found in the array.
if (pos == Position.NOPOS)
return Position.NOPOS; return Position.NOPOS;
if (pos < 0 || pos > docComment.length())
throw new StringIndexOutOfBoundsException(String.valueOf(pos));
if (docPosns == null)
return Position.NOPOS;
int start = 0;
int end = docPosns.length;
while (start < end - 2) {
// find an even index midway between start and end
int index = ((start + end) / 4) * 2;
if (docPosns[index] < pos)
start = index;
else if (docPosns[index] == pos)
return docPosns[index + 1];
else
end = index;
} }
return docPosns[start + 1] + (pos - docPosns[start]);
if (pos < 0 || pos > docComment.length()) {
throw new StringIndexOutOfBoundsException(String.valueOf(pos));
}
return offsetMap.getSourcePos(pos);
} }
@Override @Override
@SuppressWarnings("fallthrough")
protected void scanDocComment() { protected void scanDocComment() {
try { try {
boolean firstLine = true; boolean firstLine = true;
// Skip over first slash // Skip over /*
comment_reader.scanCommentChar(); accept("/*");
// Skip over first star
comment_reader.scanCommentChar();
// consume any number of stars // Consume any number of stars
while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') { skip('*');
comment_reader.scanCommentChar();
} // Is the comment in the form /**/, /***/, /****/, etc. ?
// is the comment in the form /**/, /***/, /****/, etc. ? if (is('/')) {
if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
docComment = ""; docComment = "";
return; return;
} }
// skip a newline on the first line of the comment. // Skip line terminator on the first line of the comment.
if (comment_reader.bp < comment_reader.buflen) { if (isOneOf('\n', '\r')) {
if (comment_reader.ch == LF) { accept('\r');
comment_reader.scanCommentChar(); accept('\n');
firstLine = false; firstLine = false;
} else if (comment_reader.ch == CR) {
comment_reader.scanCommentChar();
if (comment_reader.ch == LF) {
comment_reader.scanCommentChar();
firstLine = false;
}
}
} }
outerLoop: outerLoop:
// The outerLoop processes the doc comment, looping once // The outerLoop processes the doc comment, looping once
// for each line. For each line, it first strips off // for each line. For each line, it first strips off
// whitespace, then it consumes any stars, then it // whitespace, then it consumes any stars, then it
// puts the rest of the line into our buffer. // puts the rest of the line into the extraction buffer.
while (comment_reader.bp < comment_reader.buflen) { while (isAvailable()) {
int begin_bp = comment_reader.bp; int begin_pos = position();
char begin_ch = comment_reader.ch; // Consume whitespace from the beginning of each line.
// The wsLoop consumes whitespace from the beginning skipWhitespace();
// of each line.
wsLoop:
while (comment_reader.bp < comment_reader.buflen) {
switch(comment_reader.ch) {
case ' ':
comment_reader.scanCommentChar();
break;
case '\t':
comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
comment_reader.scanCommentChar();
break;
case FF:
comment_reader.col = 0;
comment_reader.scanCommentChar();
break;
// Treat newline at beginning of line (blank line, no star)
// as comment text. Old Javadoc compatibility requires this.
/*---------------------------------*
case CR: // (Spec 3.4)
doc_reader.scanCommentChar();
if (ch == LF) {
col = 0;
doc_reader.scanCommentChar();
}
break;
case LF: // (Spec 3.4)
doc_reader.scanCommentChar();
break;
*---------------------------------*/
default:
// we've seen something that isn't whitespace;
// jump out.
break wsLoop;
}
}
// Are there stars here? If so, consume them all // Are there stars here? If so, consume them all
// and check for the end of comment. // and check for the end of comment.
if (comment_reader.ch == '*') { if (is('*')) {
// skip all of the stars // skip all of the stars
do { skip('*');
comment_reader.scanCommentChar();
} while (comment_reader.ch == '*');
// check for the closing slash. // check for the closing slash.
if (comment_reader.ch == '/') { if (accept('/')) {
// We're done with the doc comment // We're done with the Javadoc comment
// scanChar() and breakout.
break outerLoop; break outerLoop;
} }
} else if (! firstLine) { } else if (!firstLine) {
// The current line does not begin with a '*' so we will // The current line does not begin with a '*' so we will
// treat it as comment // treat it as comment
comment_reader.bp = begin_bp; reset(begin_pos);
comment_reader.ch = begin_ch;
} }
// The textLoop processes the rest of the characters
// on the line, adding them to our buffer.
textLoop: textLoop:
while (comment_reader.bp < comment_reader.buflen) { // The textLoop processes the rest of the characters
switch (comment_reader.ch) { // on the line, adding them to the extraction buffer.
case '*': while (isAvailable()) {
// Is this just a star? Or is this the if (accept("*/")) {
// end of a comment? // This is the end of the comment, return
comment_reader.scanCommentChar(); // the contents of the extraction buffer.
if (comment_reader.ch == '/') { break outerLoop;
// This is the end of the comment, } else if (isOneOf('\n', '\r')) {
// set ch and return our buffer.
break outerLoop;
}
// This is just an ordinary star. Add it to
// the buffer.
comment_reader.putChar('*', false);
break;
case '\\':
comment_reader.putChar('\\', false);
// If a double backslash was found, write two
if (comment_reader.isDoubleBackslash()) {
comment_reader.putChar('\\', false);
}
comment_reader.scanCommentChar();
break;
case ' ':
case '\t':
comment_reader.putChar(comment_reader.ch, false);
comment_reader.scanCommentChar();
break;
case FF:
comment_reader.scanCommentChar();
break textLoop; // treat as end of line
case CR: // (Spec 3.4)
comment_reader.scanCommentChar();
if (comment_reader.ch != LF) {
// Canonicalize CR-only line terminator to LF
comment_reader.putChar((char)LF, false);
break textLoop;
}
/* fall through to LF case */
case LF: // (Spec 3.4)
// We've seen a newline. Add it to our // We've seen a newline. Add it to our
// buffer and break out of this loop, // buffer and break out of this loop,
// starting fresh on a new line. // starting fresh on a new line.
comment_reader.putChar(comment_reader.ch, false); put('\n');
comment_reader.scanCommentChar(); accept('\r');
accept('\n');
break textLoop; break textLoop;
default: } else if (is('\f')){
next();
break textLoop; // treat as end of line
} else {
// Add the character to our buffer. // Add the character to our buffer.
comment_reader.putChar(comment_reader.ch, false); put();
comment_reader.scanCommentChar(); next();
} }
} // end textLoop } // end textLoop
firstLine = false; firstLine = false;
} // end outerLoop } // end outerLoop
if (comment_reader.sp > 0) { // If extraction buffer is not empty.
int i = comment_reader.sp - 1; if (sb.length() > 0) {
trailLoop: // Remove trailing asterisks.
while (i > -1) { int i = sb.length() - 1;
switch (comment_reader.sbuf[i]) { while (i > -1 && sb.charAt(i) == '*') {
case '*': i--;
i--;
break;
default:
break trailLoop;
}
} }
comment_reader.sp = i + 1; sb.setLength(i + 1) ;
// Store the text of the doc comment // Store the text of the doc comment
docComment = comment_reader.chars(); docComment = sb.toString();
docPosns = new int[comment_reader.pp]; } else {
System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);
} else {
docComment = ""; docComment = "";
} }
} finally { } finally {
scanned = true; scanned = true;
comment_reader = null;
if (docComment != null && // Check if comment contains @deprecated comment.
DEPRECATED_PATTERN.matcher(docComment).matches()) { if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) {
deprecatedFlag = true; deprecatedFlag = true;
} }
} }
} }
//where:
private static final Pattern DEPRECATED_PATTERN =
Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
} }
/**
* Build a map for translating between line numbers and positions in the input.
* Overridden to expand tabs.
*
* @return a LineMap
*/
@Override @Override
public Position.LineMap getLineMap() { public Position.LineMap getLineMap() {
char[] buf = reader.getRawCharacters(); char[] buf = getRawCharacters();
return Position.makeLineMap(buf, buf.length, true); return Position.makeLineMap(buf, buf.length, true);
} }
/**
* Build an int table to mapping positions in extracted Javadoc comment
* to positions in the JavaTokenizer source buffer.
*
* The array is organized as a series of pairs of integers: the first
* number in each pair specifies a position in the comment text,
* the second number in each pair specifies the corresponding position
* in the source buffer. The pairs are sorted in ascending order.
*
* Since the mapping function is generally continuous, with successive
* positions in the string corresponding to successive positions in the
* source buffer, the table only needs to record discontinuities in
* the mapping. The values of intermediate positions can be inferred.
*
* Discontinuities may occur in a number of places: when a newline
* is followed by whitespace and asterisks (which are ignored),
* when a tab is expanded into spaces, and when unicode escapes
* are used in the source buffer.
*
* Thus, to find the source position of any position, p, in the comment
* string, find the index, i, of the pair whose string offset
* ({@code map[i * NOFFSETS + SB_OFFSET] }) is closest to but not greater
* than p. Then, {@code sourcePos(p) = map[i * NOFFSETS + POS_OFFSET] +
* (p - map[i * NOFFSETS + SB_OFFSET]) }.
*/
static class OffsetMap {
/**
* map entry offset for comment offset member of pair.
*/
private static final int SB_OFFSET = 0;
/**
* map entry offset of input offset member of pair.
*/
private static final int POS_OFFSET = 1;
/**
* Number of elements in each entry.
*/
private static final int NOFFSETS = 2;
/**
* Array storing entries in map.
*/
private int[] map;
/**
* Logical size of map (number of valid entries.)
*/
private int size;
/**
* Constructor.
*/
OffsetMap() {
this.map = new int[128];
this.size = 0;
}
/**
* Returns true if it is worthwhile adding the entry pair to the map. That is
* if there is a change in relative offset.
*
* @param sbOffset comment offset member of pair.
* @param posOffet input offset member of pair.
*
* @return true if it is worthwhile adding the entry pair.
*/
boolean shouldAdd(int sbOffset, int posOffet) {
return sbOffset - lastSBOffset() != posOffet - lastPosOffset();
}
/**
* Adds entry pair if worthwhile.
*
* @param sbOffset comment offset member of pair.
* @param posOffet input offset member of pair.
*/
void add(int sbOffset, int posOffet) {
if (size == 0 || shouldAdd(sbOffset, posOffet)) {
ensure(NOFFSETS);
map[size + SB_OFFSET] = sbOffset;
map[size + POS_OFFSET] = posOffet;
size += NOFFSETS;
}
}
/**
* Returns the previous comment offset.
*
* @return the previous comment offset.
*/
private int lastSBOffset() {
return size == 0 ? 0 : map[size - NOFFSETS + SB_OFFSET];
}
/**
* Returns the previous input offset.
*
* @return the previous input offset.
*/
private int lastPosOffset() {
return size == 0 ? 0 : map[size - NOFFSETS + POS_OFFSET];
}
/**
* Ensures there is enough space for a new entry.
*
* @param need number of array slots needed.
*/
private void ensure(int need) {
need += size;
int grow = map.length;
while (need > grow) {
grow <<= 1;
}
// Handle overflow.
if (grow < map.length) {
throw new IndexOutOfBoundsException();
} else if (grow != map.length) {
map = Arrays.copyOf(map, grow);
}
}
/**
* Binary search to find the entry for which the string index is less
* than pos. Since the map is a list of pairs of integers we must make
* sure the index is always NOFFSETS scaled. If we find an exact match
* for pos, the other item in the pair gives the source pos; otherwise,
* compute the source position relative to the best match found in the
* array.
*/
int getSourcePos(int pos) {
if (size == 0) {
return Position.NOPOS;
}
int start = 0;
int end = size / NOFFSETS;
while (start < end - 1) {
// find an index midway between start and end
int index = (start + end) / 2;
int indexScaled = index * NOFFSETS;
if (map[indexScaled + SB_OFFSET] < pos) {
start = index;
} else if (map[indexScaled + SB_OFFSET] == pos) {
return map[indexScaled + POS_OFFSET];
} else {
end = index;
}
}
int startScaled = start * NOFFSETS;
return map[startScaled + POS_OFFSET] + (pos - map[startScaled + SB_OFFSET]);
}
}
} }

@ -0,0 +1,101 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
* particular file as subject to the "Classpath" exception as provided
* by Oracle in the LICENSE file that accompanied this code.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package com.sun.tools.javac.parser;
import java.util.HashSet;
import java.util.Set;
/**
* Methods used to support text blocks lint.
*
* <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or
* deletion without notice.</b>
*/
class TextBlockSupport {
enum WhitespaceChecks {
INCONSISTENT,
TRAILING
};
/** Check that the use of white space in content is not problematic.
*/
static Set<WhitespaceChecks> checkWhitespace(String string) {
// Start with empty result set.
Set<WhitespaceChecks> checks = new HashSet<>();
// No need to check empty strings.
if (string.isEmpty()) {
return checks;
}
// Maximum common indentation.
int outdent = 0;
// No need to check indentation if opting out (last line is empty.)
char lastChar = string.charAt(string.length() - 1);
boolean optOut = lastChar == '\n' || lastChar == '\r';
// Split string based at line terminators.
String[] lines = string.split("\\R");
int length = lines.length;
// Extract last line.
String lastLine = length == 0 ? "" : lines[length - 1];
if (!optOut) {
// Prime with the last line indentation (may be blank.)
outdent = indexOfNonWhitespace(lastLine);
for (String line : lines) {
// Blanks lines have no influence (last line accounted for.)
if (!line.isBlank()) {
outdent = Integer.min(outdent, indexOfNonWhitespace(line));
if (outdent == 0) {
break;
}
}
}
}
// Last line is representative.
String start = lastLine.substring(0, outdent);
for (String line : lines) {
// Fail if a line does not have the same indentation.
if (!line.isBlank() && !line.startsWith(start)) {
// Mix of different white space
checks.add(WhitespaceChecks.INCONSISTENT);
}
// Line has content even after indent is removed.
if (outdent < line.length()) {
// Is the last character a white space.
lastChar = line.charAt(line.length() - 1);
if (Character.isWhitespace(lastChar)) {
// Has trailing white space.
checks.add(WhitespaceChecks.TRAILING);
}
}
}
return checks;
}
private static int indexOfNonWhitespace(String string) {
return string.length() - string.stripLeading().length();
}
}

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,267 +25,503 @@
package com.sun.tools.javac.parser; package com.sun.tools.javac.parser;
import java.nio.CharBuffer;
import java.util.Arrays; import java.util.Arrays;
import com.sun.tools.javac.file.JavacFileManager;
import com.sun.tools.javac.resources.CompilerProperties.Errors; import com.sun.tools.javac.resources.CompilerProperties.Errors;
import com.sun.tools.javac.util.ArrayUtils;
import com.sun.tools.javac.util.Log; import com.sun.tools.javac.util.Log;
import com.sun.tools.javac.util.Name;
import com.sun.tools.javac.util.Names;
import static com.sun.tools.javac.util.LayoutCharacters.*; import static com.sun.tools.javac.util.LayoutCharacters.EOI;
import static com.sun.tools.javac.util.LayoutCharacters.tabulate;
/** The char reader used by the javac lexer/tokenizer. Returns the sequence of /**
* characters contained in the input stream, handling unicode escape accordingly. * The unicode character reader used by the javac/javadoc lexer/tokenizer, returns characters
* Additionally, it provides features for saving chars into a buffer and to retrieve * one by one as contained in the input stream, handling unicode escape sequences accordingly.
* them at a later stage.
* *
* <p><b>This is NOT part of any supported API. * <p><b>This is NOT part of any supported API.
* If you write code that depends on this, you do so at your own risk. * If you write code that depends on this, you do so at your own risk.
* This code and its internal interfaces are subject to change or * This code and its internal interfaces are subject to change or
* deletion without notice.</b> * deletion without notice.</b></p>
*/ */
public class UnicodeReader { public class UnicodeReader {
/**
/** The input buffer, index of next character to be read, * Buffer containing characters from source file. May contain extraneous characters
* index of one past last character in buffer. * beyond this.length.
*/ */
protected char[] buf; private final char[] buffer;
protected int bp;
protected final int buflen;
/** The current character.
*/
protected char ch;
/** The buffer index of the last converted unicode character
*/
protected int unicodeConversionBp = -1;
protected Log log;
protected Names names;
/** A character buffer for saved chars.
*/
protected char[] sbuf = new char[128];
protected int realLength;
protected int sp;
/** /**
* Create a scanner from the input array. This method might * Length of meaningful content in buffer.
* modify the array. To avoid copying the input array, ensure */
* that {@code inputLength < input.length} or private final int length;
* {@code input[input.length -1]} is a white space character.
/**
* Character buffer index of character currently being observed.
*/
private int position;
/**
* Number of characters combined to provide character currently being observed. Typically
* one, but may be more when combinations of surrogate pairs and unicode escape sequences
* are read.
*/
private int width;
/**
* Character currently being observed. If a surrogate pair is read then will be the high
* member of the pair.
*/
private char character;
/**
* Codepoint of character currently being observed. Typically equivalent to the character
* but will have a value greater that 0xFFFF when a surrogate pair.
*/
private int codepoint;
/**
* true if the last character was a backslash. This is used to handle the special case
* when a backslash precedes an unicode escape. In that case, the second backslash
* is treated as a backslash and not part of an unicode escape.
*/
private boolean wasBackslash;
/**
* Log for error reporting.
*/
private final Log log;
/**
* Constructor.
* *
* @param sf the factory which created this Scanner * @param sf scan factory.
* @param buffer the input, might be modified * @param array array containing contents of source.
* Must be positive and less than or equal to input.length. * @param length length of meaningful content in buffer.
*/ */
protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) { protected UnicodeReader(ScannerFactory sf, char[] array, int length) {
this(sf, JavacFileManager.toArray(buffer), buffer.limit()); this.buffer = array;
this.length = length;
this.position = 0;
this.width = 0;
this.character = '\0';
this.codepoint = 0;
this.wasBackslash = false;
this.log = sf.log;
nextCodePoint();
} }
protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) { /**
log = sf.log; * Returns the length of the buffer. This is length of meaningful content in buffer and
names = sf.names; * not the length of the buffer array.
realLength = inputLength; *
if (inputLength == input.length) { * @return length of the buffer.
if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) { */
inputLength--; protected int length() {
} else { return length;
input = Arrays.copyOf(input, inputLength + 1); }
/**
* Return true if current position is within the meaningful part of the buffer.
*
* @return true if current position is within the meaningful part of the buffer.
*/
protected boolean isAvailable() {
return position < length;
}
/**
* Fetches the next 16-bit character from the buffer and places it in this.character.
*/
private void nextCodeUnit() {
// Index of next character in buffer.
int index = position + width;
// If past end of buffer.
if (length <= index) {
// End of file is marked with EOI.
character = EOI;
} else {
// Next character in buffer.
character = buffer[index];
// Increment length of codepoint.
width++;
}
}
/**
* Fetches the next 16-bit character from the buffer. If an unicode escape
* is detected then converts the unicode escape to a character.
*/
private void nextUnicodeInputCharacter() {
// Position to next codepoint.
position += width;
// Codepoint has no characters yet.
width = 0;
// Fetch next character.
nextCodeUnit();
// If second backslash is detected.
if (wasBackslash) {
// Treat like a normal character (not part of unicode escape.)
wasBackslash = false;
} else if (character == '\\') {
// May be an unicode escape.
wasBackslash = !unicodeEscape();
}
// Codepoint and character match if not surrogate.
codepoint = (int)character;
}
/**
* Fetches the nextcode point from the buffer. If an unicode escape is recognized
* then converts unicode escape to a character. If two characters are a surrogate pair
* then converts to a codepoint.
*/
private void nextCodePoint() {
// Next unicode character.
nextUnicodeInputCharacter();
// Return early if ASCII or not a surrogate pair.
if (isASCII() || !Character.isHighSurrogate(character)) {
return;
}
// Capture high surrogate and position.
char hi = character;
int savePosition = position;
int saveWidth = width;
// Get potential low surrogate.
nextUnicodeInputCharacter();
char lo = character;
if (Character.isLowSurrogate(lo)) {
// Start codepoint at start of high surrogate.
position = savePosition;
width += saveWidth;
// Compute codepoint.
codepoint = Character.toCodePoint(hi, lo);
} else {
// Restore to treat high surrogate as just a character.
position = savePosition;
width = saveWidth;
character = hi;
codepoint = (int)hi;
// Could potential report an error here (old code did not.)
}
}
/**
* Converts an unicode escape into a character.
*
* @return true if was an unicode escape.
*/
private boolean unicodeEscape() {
// Start of unicode escape (past backslash.)
int start = position + width;
// Default to backslash result, unless proven otherwise.
character = '\\';
width = 1;
// Skip multiple 'u'.
int index;
for (index = start; index < length; index++) {
if (buffer[index] != 'u') {
break;
} }
} }
buf = input;
buflen = inputLength;
buf[buflen] = EOI;
bp = -1;
scanChar();
}
/** Read next character. // Needs to have been at least one u.
*/ if (index == start) {
protected void scanChar() {
if (bp < buflen) {
ch = buf[++bp];
if (ch == '\\') {
convertUnicode();
}
}
}
/** Read next character in comment, skipping over double '\' characters.
*/
protected void scanCommentChar() {
scanChar();
if (ch == '\\') {
if (peekChar() == '\\' && !isUnicode()) {
skipChar();
} else {
convertUnicode();
}
}
}
/** Append a character to sbuf.
*/
protected void putChar(char ch, boolean scan) {
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
sbuf[sp++] = ch;
if (scan)
scanChar();
}
protected void putChar(char ch) {
putChar(ch, false);
}
protected void putChar(boolean scan) {
putChar(ch, scan);
}
protected void nextChar(boolean skip) {
if (!skip) {
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
sbuf[sp++] = ch;
}
scanChar();
}
Name name() {
return names.fromChars(sbuf, 0, sp);
}
String chars() {
return new String(sbuf, 0, sp);
}
/** Add 'count' copies of the character 'ch' to the string buffer.
*/
protected void repeat(char ch, int count) {
for ( ; 0 < count; count--) {
putChar(ch, false);
}
}
/** Reset the scan buffer pointer to 'pos'.
*/
protected void reset(int pos) {
bp = pos - 1;
scanChar();
}
/** Convert unicode escape; bp points to initial '\' character
* (Spec 3.3).
*/
protected void convertUnicode() {
if (ch == '\\' && unicodeConversionBp != bp ) {
bp++; ch = buf[bp];
if (ch == 'u') {
do {
bp++; ch = buf[bp];
} while (ch == 'u');
int limit = bp + 3;
if (limit < buflen) {
int d = digit(bp, 16);
int code = d;
while (bp < limit && d >= 0) {
bp++; ch = buf[bp];
d = digit(bp, 16);
code = (code << 4) + d;
}
if (d >= 0) {
ch = (char)code;
unicodeConversionBp = bp;
return;
}
}
log.error(bp, Errors.IllegalUnicodeEsc);
} else {
bp--;
ch = '\\';
}
}
}
/** Are surrogates supported?
*/
final static boolean surrogatesSupported = surrogatesSupported();
private static boolean surrogatesSupported() {
try {
Character.isHighSurrogate('a');
return true;
} catch (NoSuchMethodError ex) {
return false; return false;
} }
}
/** Scan surrogate pairs. If 'ch' is a high surrogate and int code = 0;
* the next character is a low surrogate, returns the code point
* constructed from these surrogates. Otherwise, returns -1.
* This method will not consume any of the characters.
*/
protected int peekSurrogates() {
if (surrogatesSupported && Character.isHighSurrogate(ch)) {
char high = ch;
int prevBP = bp;
scanChar(); for (int i = 0; i < 4; i++) {
// Translate and merge digit.
int digit = index < length ? Character.digit(buffer[index], 16) : -1;
code = code << 4 | digit;
char low = ch; // If invalid digit.
if (code < 0) {
ch = high; break;
bp = prevBP;
if (Character.isLowSurrogate(low)) {
return Character.toCodePoint(high, low);
} }
// On to next character.
index++;
} }
return -1; // Skip digits even if error.
width = index - position;
// If all digits are good.
if (code >= 0) {
character = (char)code;
} else {
log.error(position, Errors.IllegalUnicodeEsc);
}
// Return true even if error so that the invalid unicode escape is skipped.
return true;
} }
/** Convert an ASCII digit from its base (8, 10, or 16) /**
* to its value. * Return the current position in the character buffer.
*
* @return current position in the character buffer.
*/ */
protected int digit(int pos, int base) { protected int position() {
char c = ch; return position;
if ('0' <= c && c <= '9') }
return Character.digit(c, base); //a fast common case
int codePoint = peekSurrogates();
int result = codePoint >= 0 ? Character.digit(codePoint, base) : Character.digit(c, base); /**
if (result >= 0 && c > 0x7f) { * Reset the reader to the specified position.
log.error(pos + 1, Errors.IllegalNonasciiDigit); * Warning: Do not use when previous character was an ASCII or unicode backslash.
if (codePoint >= 0) * @param pos
scanChar(); */
ch = "0123456789abcdef".charAt(result); protected void reset(int pos) {
position = pos;
width = 0;
wasBackslash = false;
nextCodePoint();
}
/**
* Return the current character in at the current position.
*
* @return current character in at the current position.
*/
protected char get() {
return character;
}
/**
* Return the current codepoint in at the current position.
*
* @return current codepoint in at the current position.
*/
protected int getCodepoint() {
return codepoint;
}
/**
* Returns true if the current codepoint is a surrogate.
*
* @return true if the current codepoint is a surrogate.
*/
protected boolean isSurrogate() {
return 0xFFFF < codepoint;
}
/**
* Returns true if the current character is ASCII.
*
* @return true if the current character is ASCII.
*/
protected boolean isASCII() {
return character <= 0x7F;
}
/**
* Advances the current character to the next character.
*
* @return next character.
*/
protected char next() {
nextCodePoint();
return character;
}
/**
* Compare character. Returns true if a match.
*
* @param ch character to match.
*
* @return true if a match.
*/
protected boolean is(char ch) {
return character == ch;
}
/**
* Match one of the arguments. Returns true if a match.
*/
protected boolean isOneOf(char ch1, char ch2) {
return is(ch1) || is(ch2);
}
protected boolean isOneOf(char ch1, char ch2, char ch3) {
return is(ch1) || is(ch2) || is(ch3);
}
protected boolean isOneOf(char ch1, char ch2, char ch3, char ch4, char ch5, char ch6) {
return is(ch1) || is(ch2) || is(ch3) || is(ch4) || is(ch5) || is(ch6);
}
/**
* Tests to see if current character is in the range of lo to hi characters (inclusive).
*
* @param lo lowest character in range.
* @param hi highest character in range.
*
* @return true if the current character is in range.
*/
protected boolean inRange(char lo, char hi) {
return lo <= character && character <= hi;
}
/**
* Compare character and advance if a match. Returns true if a match.
*
* @param ch character to match.
*
* @return true if a match.
*/
protected boolean accept(char ch) {
if (is(ch)) {
next();
return true;
} }
return false;
}
/**
* Match one of the arguments and advance if a match. Returns true if a match.
*/
protected boolean acceptOneOf(char ch1, char ch2) {
if (isOneOf(ch1, ch2)) {
next();
return true;
}
return false;
}
protected boolean acceptOneOf(char ch1, char ch2, char ch3) {
if (isOneOf(ch1, ch2, ch3)) {
next();
return true;
}
return false;
}
/**
* Skip over all occurances of character.
*
* @param ch character to accept.
*/
protected void skip(char ch) {
while (accept(ch)) {
// next
}
}
/**
* Skip over ASCII white space characters.
*/
protected void skipWhitespace() {
while (acceptOneOf(' ', '\t', '\f')) {
// next
}
}
/**
* Skip to end of line.
*/
protected void skipToEOLN() {
while (isAvailable()) {
if (isOneOf('\r', '\n')) {
break;
}
next();
}
}
/**
* Compare string and advance if a match. Returns true if a match.
* Warning: Do not use when previous character was a backslash
* (confuses state of wasBackslash.)
*
* @param string string to match character for character.
*
* @return true if a match.
*/
protected boolean accept(String string) {
// Quick test.
if (string.length() == 0 || !is(string.charAt(0))) {
return false;
}
// Be prepared to retreat if not a match.
int savedPosition = position;
nextCodePoint();
// Check each character.
for (int i = 1; i < string.length(); i++) {
if (!is(string.charAt(i))) {
// Restart if not a match.
reset(savedPosition);
return false;
}
nextCodePoint();
}
return true;
}
/**
* Convert an ASCII digit from its base (8, 10, or 16) to its value. Does not
* advance character.
*
* @param pos starting position.
* @param digitRadix base of number being converted.
*
* @return value of digit.
*/
protected int digit(int pos, int digitRadix) {
int result;
// Just an ASCII digit.
if (inRange('0', '9')) {
// Fast common case.
result = character - '0';
return result < digitRadix ? result : -1;
}
// Handle other digits.
result = isSurrogate() ? Character.digit(codepoint, digitRadix) :
Character.digit(character, digitRadix);
if (result >= 0 && !isASCII()) {
log.error(position(), Errors.IllegalNonasciiDigit);
character = "0123456789abcdef".charAt(result);
}
return result; return result;
} }
protected boolean isUnicode() {
return unicodeConversionBp == bp;
}
protected void skipChar() {
bp++;
}
protected char peekChar() {
return buf[bp + 1];
}
/** /**
* Returns a copy of the input buffer, up to its inputLength. * Returns the input buffer. Unicode escape sequences are not translated.
* Unicode escape sequences are not translated. *
* @return the input buffer.
*/ */
public char[] getRawCharacters() { public char[] getRawCharacters() {
char[] chars = new char[buflen]; return length == buffer.length ? buffer : Arrays.copyOf(buffer, length);
System.arraycopy(buf, 0, chars, 0, buflen);
return chars;
} }
/** /**
@ -297,15 +533,83 @@ public class UnicodeReader {
* {@code String.substring(beginIndex, endIndex)}. * {@code String.substring(beginIndex, endIndex)}.
* Unicode escape sequences are not translated. * Unicode escape sequences are not translated.
* *
* @param beginIndex the beginning index, inclusive. * @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive. * @param endIndex the ending index, exclusive.
*
* @throws ArrayIndexOutOfBoundsException if either offset is outside of the * @throws ArrayIndexOutOfBoundsException if either offset is outside of the
* array bounds * array bounds
*/ */
public char[] getRawCharacters(int beginIndex, int endIndex) { public char[] getRawCharacters(int beginIndex, int endIndex) {
int length = endIndex - beginIndex; return Arrays.copyOfRange(buffer, beginIndex, endIndex);
char[] chars = new char[length];
System.arraycopy(buf, beginIndex, chars, 0, length);
return chars;
} }
/**
* This is a specialized version of UnicodeReader that keeps track of the
* column position within a given character stream. Used for Javadoc
* processing to build a table for mapping positions in the comment string
* to positions in the source file.
*/
static class PositionTrackingReader extends UnicodeReader {
/**
* Offset from the beginning of the original reader buffer.
*/
private final int offset;
/**
* Current column in the comment.
*/
private int column;
/**
* Constructor.
*
* @param sf Scan factory.
* @param array Array containing contents of source.
* @param offset Position offset in original source buffer.
*/
protected PositionTrackingReader(ScannerFactory sf, char[] array, int offset) {
super(sf, array, array.length);
this.offset = offset;
this.column = 0;
}
/**
* Advances the current character to the next character. Tracks column.
*
* @return next character.
*/
@Override
protected char next() {
super.next();
if (isOneOf('\n', '\r', '\f')) {
column = 0;
} else if (is('\t')) {
column = tabulate(column);
} else {
column++;
}
return get();
}
/**
* Returns the current column.
*
* @return the current column.
*/
protected int column() {
return column;
}
/**
* Returns position relative to the original source buffer.
*
* @return
*/
protected int offsetPosition() {
return position() + offset;
}
}
} }

@ -1383,9 +1383,6 @@ compiler.err.unreported.exception.implicit.close=\
unreported exception {0}; must be caught or declared to be thrown\n\ unreported exception {0}; must be caught or declared to be thrown\n\
exception thrown from implicit call to close() on resource variable ''{1}'' exception thrown from implicit call to close() on resource variable ''{1}''
compiler.err.unsupported.cross.fp.lit=\
hexadecimal floating-point literals are not supported on this VM
compiler.err.void.not.allowed.here=\ compiler.err.void.not.allowed.here=\
''void'' type not allowed here ''void'' type not allowed here

@ -984,8 +984,6 @@ compiler.err.unreported.exception.default.constructor=\u30C7\u30D5\u30A9\u30EB\u
# 0: type, 1: name # 0: type, 1: name
compiler.err.unreported.exception.implicit.close=\u5831\u544A\u3055\u308C\u306A\u3044\u4F8B\u5916{0}\u306F\u3001\u30B9\u30ED\u30FC\u3059\u308B\u306B\u306F\u6355\u6349\u307E\u305F\u306F\u5BA3\u8A00\u3059\u308B\u5FC5\u8981\u304C\u3042\u308A\u307E\u3059\n\u30EA\u30BD\u30FC\u30B9\u5909\u6570''{1}''\u3067\u306Eclose()\u306E\u6697\u9ED9\u7684\u306A\u30B3\u30FC\u30EB\u304B\u3089\u4F8B\u5916\u304C\u30B9\u30ED\u30FC\u3055\u308C\u307E\u3057\u305F compiler.err.unreported.exception.implicit.close=\u5831\u544A\u3055\u308C\u306A\u3044\u4F8B\u5916{0}\u306F\u3001\u30B9\u30ED\u30FC\u3059\u308B\u306B\u306F\u6355\u6349\u307E\u305F\u306F\u5BA3\u8A00\u3059\u308B\u5FC5\u8981\u304C\u3042\u308A\u307E\u3059\n\u30EA\u30BD\u30FC\u30B9\u5909\u6570''{1}''\u3067\u306Eclose()\u306E\u6697\u9ED9\u7684\u306A\u30B3\u30FC\u30EB\u304B\u3089\u4F8B\u5916\u304C\u30B9\u30ED\u30FC\u3055\u308C\u307E\u3057\u305F
compiler.err.unsupported.cross.fp.lit=16\u9032\u6D6E\u52D5\u5C0F\u6570\u70B9\u30EA\u30C6\u30E9\u30EB\u306F\u3053\u306EVM\u3067\u306F\u30B5\u30DD\u30FC\u30C8\u3055\u308C\u3066\u3044\u307E\u305B\u3093
compiler.err.void.not.allowed.here=\u3053\u3053\u3067''void''\u578B\u3092\u4F7F\u7528\u3059\u308B\u3053\u3068\u306F\u3067\u304D\u307E\u305B\u3093 compiler.err.void.not.allowed.here=\u3053\u3053\u3067''void''\u578B\u3092\u4F7F\u7528\u3059\u308B\u3053\u3068\u306F\u3067\u304D\u307E\u305B\u3093
# 0: string # 0: string

@ -984,8 +984,6 @@ compiler.err.unreported.exception.default.constructor=\u9ED8\u8BA4\u6784\u9020\u
# 0: type, 1: name # 0: type, 1: name
compiler.err.unreported.exception.implicit.close=\u672A\u62A5\u544A\u7684\u5F02\u5E38\u9519\u8BEF{0}; \u5FC5\u987B\u5BF9\u5176\u8FDB\u884C\u6355\u83B7\u6216\u58F0\u660E\u4EE5\u4FBF\u629B\u51FA\n\u5BF9\u8D44\u6E90\u53D8\u91CF ''{1}'' \u9690\u5F0F\u8C03\u7528 close() \u65F6\u629B\u51FA\u4E86\u5F02\u5E38\u9519\u8BEF compiler.err.unreported.exception.implicit.close=\u672A\u62A5\u544A\u7684\u5F02\u5E38\u9519\u8BEF{0}; \u5FC5\u987B\u5BF9\u5176\u8FDB\u884C\u6355\u83B7\u6216\u58F0\u660E\u4EE5\u4FBF\u629B\u51FA\n\u5BF9\u8D44\u6E90\u53D8\u91CF ''{1}'' \u9690\u5F0F\u8C03\u7528 close() \u65F6\u629B\u51FA\u4E86\u5F02\u5E38\u9519\u8BEF
compiler.err.unsupported.cross.fp.lit=\u8BE5 VM \u4E0D\u652F\u6301\u5341\u516D\u8FDB\u5236\u6D6E\u70B9\u6587\u5B57
compiler.err.void.not.allowed.here=\u6B64\u5904\u4E0D\u5141\u8BB8\u4F7F\u7528 ''\u7A7A'' \u7C7B\u578B compiler.err.void.not.allowed.here=\u6B64\u5904\u4E0D\u5141\u8BB8\u4F7F\u7528 ''\u7A7A'' \u7C7B\u578B
# 0: string # 0: string

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -96,7 +96,7 @@ public class DiagnosticSource {
return 0; return 0;
} }
if (buf[bp] == '\t' && expandTabs) { if (buf[bp] == '\t' && expandTabs) {
column = (column / TabInc * TabInc) + TabInc; column = tabulate(column);
} else { } else {
column++; column++;
} }

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -68,4 +68,10 @@ public interface LayoutCharacters {
* source file. * source file.
*/ */
final static byte EOI = 0x1A; final static byte EOI = 0x1A;
/** Bump column to the next tab.
*/
static int tabulate(int column) {
return (column / TabInc * TabInc) + TabInc;
}
} }

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -265,7 +265,7 @@ public class Position {
int column = 0; int column = 0;
for (int bp = lineStart; bp < pos; bp++) { for (int bp = lineStart; bp < pos; bp++) {
if (tabMap.get(bp)) if (tabMap.get(bp))
column = (column / TabInc * TabInc) + TabInc; column = tabulate(column);
else else
column++; column++;
} }
@ -279,7 +279,7 @@ public class Position {
while (col < column) { while (col < column) {
pos++; pos++;
if (tabMap.get(pos)) if (tabMap.get(pos))
col = (col / TabInc * TabInc) + TabInc; col = tabulate(col);
else else
col++; col++;
} }

@ -1,2 +1,2 @@
Digits.java:11:41: compiler.err.illegal.nonascii.digit Digits.java:11:43: compiler.err.illegal.nonascii.digit
1 error 1 error

@ -44,7 +44,6 @@ compiler.err.stack.sim.error
compiler.err.type.var.more.than.once # UNUSED compiler.err.type.var.more.than.once # UNUSED
compiler.err.type.var.more.than.once.in.result # UNUSED compiler.err.type.var.more.than.once.in.result # UNUSED
compiler.err.unexpected.type compiler.err.unexpected.type
compiler.err.unsupported.cross.fp.lit # Scanner: host system dependent
compiler.misc.bad.class.signature # bad class file compiler.misc.bad.class.signature # bad class file
compiler.misc.bad.const.pool.tag # bad class file compiler.misc.bad.const.pool.tag # bad class file
compiler.misc.bad.const.pool.tag.at # bad class file compiler.misc.bad.const.pool.tag.at # bad class file

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -23,10 +23,10 @@
/** /**
* @test * @test
* @bug 8056897 * @bug 8056897 8254073
* @modules jdk.compiler/com.sun.tools.javac.parser * @modules jdk.compiler/com.sun.tools.javac.parser
* jdk.compiler/com.sun.tools.javac.util * jdk.compiler/com.sun.tools.javac.util
* @summary Proper lexing of integer literals. * @summary Proper lexing of various token kinds.
*/ */
import java.io.IOException; import java.io.IOException;
@ -43,41 +43,130 @@ import com.sun.tools.javac.parser.Tokens.TokenKind;
import com.sun.tools.javac.util.Context; import com.sun.tools.javac.util.Context;
import com.sun.tools.javac.util.Log; import com.sun.tools.javac.util.Log;
import static com.sun.tools.javac.parser.Tokens.TokenKind.*;
public class JavaLexerTest { public class JavaLexerTest {
public static void main(String... args) throws Exception { static final TestTuple[] PASSING_TESTS = {
new JavaLexerTest().run(); new TestTuple(FLOATLITERAL, "0.0f"),
new TestTuple(FLOATLITERAL, "0.0F"),
new TestTuple(FLOATLITERAL, ".0F"),
new TestTuple(FLOATLITERAL, "0.F"),
new TestTuple(FLOATLITERAL, "0E0F"),
new TestTuple(FLOATLITERAL, "0E+0F"),
new TestTuple(FLOATLITERAL, "0E-0F"),
new TestTuple(DOUBLELITERAL, "0.0d"),
new TestTuple(DOUBLELITERAL, "0.0D"),
new TestTuple(DOUBLELITERAL, ".0D"),
new TestTuple(DOUBLELITERAL, "0.D"),
new TestTuple(DOUBLELITERAL, "0E0D"),
new TestTuple(DOUBLELITERAL, "0E+0D"),
new TestTuple(DOUBLELITERAL, "0E-0D"),
new TestTuple(DOUBLELITERAL, "0x0.0p0d"),
new TestTuple(DOUBLELITERAL, "0xff.0p8d"),
new TestTuple(STRINGLITERAL, "\"\\u2022\""),
new TestTuple(STRINGLITERAL, "\"\\b\\t\\n\\f\\r\\\'\\\"\\\\\""),
new TestTuple(CHARLITERAL, "\'\\b\'"),
new TestTuple(CHARLITERAL, "\'\\t\'"),
new TestTuple(CHARLITERAL, "\'\\n\'"),
new TestTuple(CHARLITERAL, "\'\\f\'"),
new TestTuple(CHARLITERAL, "\'\\r\'"),
new TestTuple(CHARLITERAL, "\'\\'\'"),
new TestTuple(CHARLITERAL, "\'\\\\'"),
new TestTuple(CHARLITERAL, "\'\\\'\'"),
new TestTuple(CHARLITERAL, "\'\\\"\'"),
new TestTuple(IDENTIFIER, "abc\\u0005def"),
};
static final TestTuple[] FAILING_TESTS = {
new TestTuple(LONGLITERAL, "0bL"),
new TestTuple(LONGLITERAL, "0b20L"),
new TestTuple(LONGLITERAL, "0xL"),
new TestTuple(INTLITERAL, "0xG000L", "0x"),
new TestTuple(DOUBLELITERAL, "0E*0F", "0E"),
new TestTuple(DOUBLELITERAL, "0E*0D", "0E"),
new TestTuple(INTLITERAL, "0xp8d", "0x"),
new TestTuple(DOUBLELITERAL, "0x8pd", "0x8pd"),
new TestTuple(INTLITERAL, "0xpd", "0x"),
new TestTuple(ERROR, "\"\\u20\""),
new TestTuple(ERROR, "\"\\u\""),
new TestTuple(ERROR, "\"\\uG000\""),
new TestTuple(ERROR, "\"\\u \""),
new TestTuple(ERROR, "\"\\q\""),
new TestTuple(ERROR, "\'\'"),
new TestTuple(ERROR, "\'\\q\'", "\'\\"),
};
static class TestTuple {
String input;
TokenKind kind;
String expected;
TestTuple(TokenKind kind, String input, String expected) {
this.input = input;
this.kind = kind;
this.expected = expected;
}
TestTuple(TokenKind kind, String input) {
this(kind, input, input);
}
} }
void run() throws Exception { void test(TestTuple test, boolean willFail) throws Exception {
Context ctx = new Context(); Context ctx = new Context();
Log log = Log.instance(ctx); Log log = Log.instance(ctx);
String input = "0bL 0b20L 0xL ";
log.useSource(new SimpleJavaFileObject(new URI("mem://Test.java"), JavaFileObject.Kind.SOURCE) { log.useSource(new SimpleJavaFileObject(new URI("mem://Test.java"), JavaFileObject.Kind.SOURCE) {
@Override @Override
public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException { public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
return input; return test.input;
} }
}); });
char[] inputArr = input.toCharArray();
JavaTokenizer tokenizer = new JavaTokenizer(ScannerFactory.instance(ctx), inputArr, inputArr.length) {
};
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0bL"); char[] inputArr = test.input.toCharArray();
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0b20L"); JavaTokenizer tokenizer = new JavaTokenizer(ScannerFactory.instance(ctx), inputArr, inputArr.length) {};
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0xL"); Token token = tokenizer.readToken();
boolean failed = log.nerrors != 0;
boolean normal = failed == willFail;
if (!normal) {
System.err.println("input: " + test.input);
String message = willFail ? "Expected to fail: " : "Expected to pass: ";
throw new AssertionError(message + test.input);
}
String actual = test.input.substring(token.pos, token.endPos);
if (token.kind != test.kind) {
System.err.println("input: " + test.input);
throw new AssertionError("Unexpected token kind: " + token.kind.name());
}
if (!Objects.equals(test.expected, actual)) {
System.err.println("input: " + test.input);
throw new AssertionError("Unexpected token content: " + actual);
}
} }
void assertKind(String input, JavaTokenizer tokenizer, TokenKind kind, String expectedText) { void run() throws Exception {
Token token = tokenizer.readToken(); for (TestTuple test : PASSING_TESTS) {
test(test, false);
if (token.kind != kind) {
throw new AssertionError("Unexpected token kind: " + token.kind);
} }
String actualText = input.substring(token.pos, token.endPos); for (TestTuple test : FAILING_TESTS) {
test(test, true);
if (!Objects.equals(actualText, expectedText)) {
throw new AssertionError("Unexpected token text: " + actualText);
} }
} }
public static void main(String[] args) throws Exception {
new JavaLexerTest().run();
}
} }

@ -1,10 +1,10 @@
NonasciiDigit.java:12:24: compiler.err.illegal.nonascii.digit NonasciiDigit.java:12:18: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:13:19: compiler.err.illegal.nonascii.digit NonasciiDigit.java:13:19: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:14:24: compiler.err.illegal.nonascii.digit NonasciiDigit.java:14:18: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:16:27: compiler.err.illegal.nonascii.digit NonasciiDigit.java:16:21: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:17:22: compiler.err.illegal.nonascii.digit NonasciiDigit.java:17:23: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:18:22: compiler.err.illegal.nonascii.digit NonasciiDigit.java:18:25: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:19:22: compiler.err.illegal.nonascii.digit NonasciiDigit.java:19:23: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:20:22: compiler.err.illegal.nonascii.digit NonasciiDigit.java:20:22: compiler.err.illegal.nonascii.digit
NonasciiDigit.java:21:27: compiler.err.illegal.nonascii.digit NonasciiDigit.java:21:21: compiler.err.illegal.nonascii.digit
9 errors 9 errors

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -25,24 +25,48 @@
* @test * @test
* @bug 4330479 * @bug 4330479
* @summary ASCII SUB character is rejected in multi-line comments * @summary ASCII SUB character is rejected in multi-line comments
* @author gafter * @library /tools/lib
* * @modules jdk.compiler/com.sun.tools.javac.api
* @compile SubChar.java * jdk.compiler/com.sun.tools.javac.main
* @build toolbox.ToolBox toolbox.JavacTask
* @run main SubChar
*/ */
import toolbox.JavacTask;
import toolbox.JavaTask;
import toolbox.Task;
import toolbox.ToolBox;
/*
Note: this source file has been crafted very carefully to end with the
unicode escape sequence for the control-Z character without a
following newline. The scanner is specified to allow control-Z there.
If you edit this source file, please make sure that your editor does
not insert a newline after that trailing line.
*/
/** \u001A */ public class SubChar {
class SubChar { private static final ToolBox TOOLBOX = new ToolBox();
public static void main(String args[]) {
return; private static final String SOURCE = """
} /*
Note: this source file has been crafted very carefully to end with the
unicode escape sequence for the control-Z character without a
following newline. The scanner is specified to allow control-Z there.
If you edit this source file, please make sure that your editor does
not insert a newline after that trailing line.
*/
/** \\u001A */
class ControlZTest {
public static void main(String args[]) {
return;
}
}
/* \\u001A */\
""";
public static void main(String... args) {
String output = new JavacTask(TOOLBOX)
.sources(SOURCE)
.classpath(".")
.options("-encoding", "utf8")
.run()
.writeAll()
.getOutput(Task.OutputKind.DIRECT);
System.out.println(output);
}
} }
/* \u001A */

@ -1,4 +1,4 @@
SupplementaryJavaID2.java:12:14: compiler.err.illegal.char: \ud801 SupplementaryJavaID2.java:12:9: compiler.err.illegal.char: \ud801
SupplementaryJavaID2.java:12:20: compiler.err.illegal.char: \ud801 SupplementaryJavaID2.java:12:15: compiler.err.illegal.char: \ud801
SupplementaryJavaID2.java:12:24: compiler.err.expected: token.identifier SupplementaryJavaID2.java:12:24: compiler.err.expected: token.identifier
3 errors 3 errors

@ -1,3 +1,3 @@
SupplementaryJavaID3.java:12:17: compiler.err.illegal.char: \ud801 SupplementaryJavaID3.java:12:12: compiler.err.illegal.char: \ud801
SupplementaryJavaID3.java:12:23: compiler.err.illegal.char: \ud801 SupplementaryJavaID3.java:12:18: compiler.err.illegal.char: \ud801
2 errors 2 errors

@ -1,2 +1,2 @@
SupplementaryJavaID4.java:14:14: compiler.err.illegal.char: \ud834\udd7b SupplementaryJavaID4.java:14:9: compiler.err.illegal.char: \ud834\udd7b
1 error 1 error

@ -1,2 +1,2 @@
SupplementaryJavaID5.java:14:17: compiler.err.illegal.char: \ud834\udd00 SupplementaryJavaID5.java:14:12: compiler.err.illegal.char: \ud834\udd00
1 error 1 error