8254073: Tokenizer improvements (revised)
Reviewed-by: mcimadamore
This commit is contained in:
parent
9cecc16747
commit
4f9a1ffcdd
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2004, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2004, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -29,15 +29,15 @@ import com.sun.tools.javac.parser.Tokens.Comment;
|
||||
import com.sun.tools.javac.parser.Tokens.Comment.CommentStyle;
|
||||
import com.sun.tools.javac.util.*;
|
||||
|
||||
import java.nio.*;
|
||||
import java.nio.CharBuffer;
|
||||
import java.util.Arrays;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static com.sun.tools.javac.util.LayoutCharacters.*;
|
||||
|
||||
/** An extension to the base lexical analyzer that captures
|
||||
* and processes the contents of doc comments. It does so by
|
||||
* translating Unicode escape sequences and by stripping the
|
||||
* leading whitespace and starts from each line of the comment.
|
||||
/**
|
||||
* An extension to the base lexical analyzer (JavaTokenizer) that
|
||||
* captures and processes the contents of doc comments. It does
|
||||
* so by stripping the leading whitespace and comment starts from
|
||||
* each line of the Javadoc comment.
|
||||
*
|
||||
* <p><b>This is NOT part of any supported API.
|
||||
* If you write code that depends on this, you do so at your own risk.
|
||||
@ -45,189 +45,107 @@ import static com.sun.tools.javac.util.LayoutCharacters.*;
|
||||
* deletion without notice.</b>
|
||||
*/
|
||||
public class JavadocTokenizer extends JavaTokenizer {
|
||||
|
||||
/** Create a scanner from the input buffer. buffer must implement
|
||||
* array() and compact(), and remaining() must be less than limit().
|
||||
/**
|
||||
* The factory that created this Scanner.
|
||||
*/
|
||||
protected JavadocTokenizer(ScannerFactory fac, CharBuffer buffer) {
|
||||
super(fac, buffer);
|
||||
final ScannerFactory fac;
|
||||
|
||||
/**
|
||||
* Create a tokenizer from the input character buffer. The input buffer
|
||||
* content would typically be a Javadoc comment extracted by
|
||||
* JavaTokenizer.
|
||||
*
|
||||
* @param fac the factory which created this Scanner.
|
||||
* @param cb the input character buffer.
|
||||
*/
|
||||
protected JavadocTokenizer(ScannerFactory fac, CharBuffer cb) {
|
||||
super(fac, cb);
|
||||
this.fac = fac;
|
||||
}
|
||||
|
||||
/** Create a scanner from the input array. The array must have at
|
||||
* least a single character of extra space.
|
||||
/**
|
||||
* Create a tokenizer from the input array. The input buffer
|
||||
* content would typically be a Javadoc comment extracted by
|
||||
* JavaTokenizer.
|
||||
*
|
||||
* @param fac factory which created this Scanner
|
||||
* @param array input character array.
|
||||
* @param length length of the meaningful content in the array.
|
||||
*/
|
||||
protected JavadocTokenizer(ScannerFactory fac, char[] input, int inputLength) {
|
||||
super(fac, input, inputLength);
|
||||
protected JavadocTokenizer(ScannerFactory fac, char[] array, int length) {
|
||||
super(fac, array, length);
|
||||
this.fac = fac;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Comment processComment(int pos, int endPos, CommentStyle style) {
|
||||
char[] buf = reader.getRawCharacters(pos, endPos);
|
||||
return new JavadocComment(new DocReader(fac, buf, buf.length, pos), style);
|
||||
char[] buf = getRawCharacters(pos, endPos);
|
||||
return new JavadocComment(style, fac, buf, pos);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a specialized version of UnicodeReader that keeps track of the
|
||||
* column position within a given character stream (used for Javadoc processing),
|
||||
* and which builds a table for mapping positions in the comment string to
|
||||
* positions in the source file.
|
||||
* An extension of BasicComment used to extract the relevant portion
|
||||
* of a Javadoc comment.
|
||||
*/
|
||||
static class DocReader extends UnicodeReader {
|
||||
|
||||
int col;
|
||||
int startPos;
|
||||
|
||||
/**
|
||||
* A buffer for building a table for mapping positions in {@link #sbuf}
|
||||
* to positions in the source buffer.
|
||||
*
|
||||
* The array is organized as a series of pairs of integers: the first
|
||||
* number in each pair specifies a position in the comment text,
|
||||
* the second number in each pair specifies the corresponding position
|
||||
* in the source buffer. The pairs are sorted in ascending order.
|
||||
*
|
||||
* Since the mapping function is generally continuous, with successive
|
||||
* positions in the string corresponding to successive positions in the
|
||||
* source buffer, the table only needs to record discontinuities in
|
||||
* the mapping. The values of intermediate positions can be inferred.
|
||||
*
|
||||
* Discontinuities may occur in a number of places: when a newline
|
||||
* is followed by whitespace and asterisks (which are ignored),
|
||||
* when a tab is expanded into spaces, and when unicode escapes
|
||||
* are used in the source buffer.
|
||||
*
|
||||
* Thus, to find the source position of any position, p, in the comment
|
||||
* string, find the index, i, of the pair whose string offset
|
||||
* ({@code pbuf[i] }) is closest to but not greater than p. Then,
|
||||
* {@code sourcePos(p) = pbuf[i+1] + (p - pbuf[i]) }.
|
||||
*/
|
||||
int[] pbuf = new int[128];
|
||||
|
||||
/**
|
||||
* The index of the next empty slot in the pbuf buffer.
|
||||
*/
|
||||
int pp = 0;
|
||||
|
||||
/** The buffer index of the last double backslash sequence
|
||||
*/
|
||||
private int doubleBackslashBp = -1;
|
||||
|
||||
DocReader(ScannerFactory fac, char[] input, int inputLength, int startPos) {
|
||||
super(fac, input, inputLength);
|
||||
this.startPos = startPos;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void convertUnicode() {
|
||||
if (ch == '\\' && unicodeConversionBp != bp) {
|
||||
bp++; ch = buf[bp]; col++;
|
||||
if (ch == 'u') {
|
||||
do {
|
||||
bp++; ch = buf[bp]; col++;
|
||||
} while (ch == 'u');
|
||||
int limit = bp + 3;
|
||||
if (limit < buflen) {
|
||||
int d = digit(bp, 16);
|
||||
int code = d;
|
||||
while (bp < limit && d >= 0) {
|
||||
bp++; ch = buf[bp]; col++;
|
||||
d = digit(bp, 16);
|
||||
code = (code << 4) + d;
|
||||
}
|
||||
if (d >= 0) {
|
||||
ch = (char)code;
|
||||
unicodeConversionBp = bp;
|
||||
return;
|
||||
}
|
||||
}
|
||||
// "illegal.Unicode.esc", reported by base scanner
|
||||
} else {
|
||||
bp--;
|
||||
ch = '\\';
|
||||
col--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void scanCommentChar() {
|
||||
scanChar();
|
||||
if (ch == '\\') {
|
||||
if (peekChar() == '\\' && !isUnicode()) {
|
||||
bp++; col++;
|
||||
doubleBackslashBp = bp;
|
||||
} else {
|
||||
convertUnicode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void scanChar() {
|
||||
bp++;
|
||||
ch = buf[bp];
|
||||
switch (ch) {
|
||||
case '\r': // return
|
||||
col = 0;
|
||||
break;
|
||||
case '\n': // newline
|
||||
if (bp == 0 || buf[bp-1] != '\r') {
|
||||
col = 0;
|
||||
}
|
||||
break;
|
||||
case '\t': // tab
|
||||
col = (col / TabInc * TabInc) + TabInc;
|
||||
break;
|
||||
case '\\': // possible Unicode
|
||||
col++;
|
||||
convertUnicode();
|
||||
break;
|
||||
default:
|
||||
col++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void putChar(char ch, boolean scan) {
|
||||
// At this point, bp is the position of the current character in buf,
|
||||
// and sp is the position in sbuf where this character will be put.
|
||||
// Record a new entry in pbuf if pbuf is empty or if sp and its
|
||||
// corresponding source position are not equidistant from the
|
||||
// corresponding values in the latest entry in the pbuf array.
|
||||
// (i.e. there is a discontinuity in the map function.)
|
||||
if ((pp == 0)
|
||||
|| (sp - pbuf[pp - 2] != (startPos + bp) - pbuf[pp - 1])) {
|
||||
if (pp + 1 >= pbuf.length) {
|
||||
int[] new_pbuf = new int[pbuf.length * 2];
|
||||
System.arraycopy(pbuf, 0, new_pbuf, 0, pbuf.length);
|
||||
pbuf = new_pbuf;
|
||||
}
|
||||
pbuf[pp] = sp;
|
||||
pbuf[pp + 1] = startPos + bp;
|
||||
pp += 2;
|
||||
}
|
||||
super.putChar(ch, scan);
|
||||
}
|
||||
|
||||
/** Whether the ch represents a sequence of two backslashes. */
|
||||
boolean isDoubleBackslash() {
|
||||
return doubleBackslashBp == bp;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
protected static class JavadocComment extends JavaTokenizer.BasicComment<DocReader> {
|
||||
protected static class JavadocComment extends BasicComment {
|
||||
/**
|
||||
* Pattern used to detect a well formed @deprecated tag in a JaavDoc
|
||||
* comment.
|
||||
*/
|
||||
private static final Pattern DEPRECATED_PATTERN =
|
||||
Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
|
||||
|
||||
/**
|
||||
* Translated and stripped contents of doc comment
|
||||
*/
|
||||
* The relevant portion of the comment that is of interest to Javadoc.
|
||||
* Produced by invoking scanDocComment.
|
||||
*/
|
||||
private String docComment = null;
|
||||
private int[] docPosns = null;
|
||||
|
||||
JavadocComment(DocReader reader, CommentStyle cs) {
|
||||
super(reader, cs);
|
||||
/**
|
||||
* StringBuilder used to extract the relevant portion of the Javadoc comment.
|
||||
*/
|
||||
private final StringBuilder sb;
|
||||
|
||||
/**
|
||||
* Map used to map the extracted Javadoc comment's character positions back to
|
||||
* the original source.
|
||||
*/
|
||||
OffsetMap offsetMap = new OffsetMap();
|
||||
|
||||
JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
|
||||
super( cs, sf, array, offset);
|
||||
this.sb = new StringBuilder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a character to the extraction buffer.
|
||||
*
|
||||
* @param ch character to add.
|
||||
*/
|
||||
protected void put(char ch) {
|
||||
offsetMap.add(sb.length(), offsetPosition());
|
||||
sb.append(ch);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a code point to the extraction buffer.
|
||||
*
|
||||
* @param codePoint code point to add.
|
||||
*/
|
||||
protected void putCodePoint(int codePoint) {
|
||||
offsetMap.add(sb.length(), offsetPosition());
|
||||
sb.appendCodePoint(codePoint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add current character or code point to the extraction buffer.
|
||||
*/
|
||||
protected void put() {
|
||||
if (isSurrogate()) {
|
||||
putCodePoint(getCodepoint());
|
||||
} else {
|
||||
put(get());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -240,232 +158,292 @@ public class JavadocTokenizer extends JavaTokenizer {
|
||||
|
||||
@Override
|
||||
public int getSourcePos(int pos) {
|
||||
// Binary search to find the entry for which the string index is
|
||||
// less than pos. Since docPosns is a list of pairs of integers
|
||||
// we must make sure the index is always even.
|
||||
// If we find an exact match for pos, the other item in the pair
|
||||
// gives the source pos; otherwise, compute the source position
|
||||
// relative to the best match found in the array.
|
||||
if (pos == Position.NOPOS)
|
||||
if (pos == Position.NOPOS) {
|
||||
return Position.NOPOS;
|
||||
if (pos < 0 || pos > docComment.length())
|
||||
throw new StringIndexOutOfBoundsException(String.valueOf(pos));
|
||||
if (docPosns == null)
|
||||
return Position.NOPOS;
|
||||
int start = 0;
|
||||
int end = docPosns.length;
|
||||
while (start < end - 2) {
|
||||
// find an even index midway between start and end
|
||||
int index = ((start + end) / 4) * 2;
|
||||
if (docPosns[index] < pos)
|
||||
start = index;
|
||||
else if (docPosns[index] == pos)
|
||||
return docPosns[index + 1];
|
||||
else
|
||||
end = index;
|
||||
}
|
||||
return docPosns[start + 1] + (pos - docPosns[start]);
|
||||
|
||||
if (pos < 0 || pos > docComment.length()) {
|
||||
throw new StringIndexOutOfBoundsException(String.valueOf(pos));
|
||||
}
|
||||
|
||||
return offsetMap.getSourcePos(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("fallthrough")
|
||||
protected void scanDocComment() {
|
||||
try {
|
||||
boolean firstLine = true;
|
||||
|
||||
// Skip over first slash
|
||||
comment_reader.scanCommentChar();
|
||||
// Skip over first star
|
||||
comment_reader.scanCommentChar();
|
||||
// Skip over /*
|
||||
accept("/*");
|
||||
|
||||
// consume any number of stars
|
||||
while (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '*') {
|
||||
comment_reader.scanCommentChar();
|
||||
}
|
||||
// is the comment in the form /**/, /***/, /****/, etc. ?
|
||||
if (comment_reader.bp < comment_reader.buflen && comment_reader.ch == '/') {
|
||||
// Consume any number of stars
|
||||
skip('*');
|
||||
|
||||
// Is the comment in the form /**/, /***/, /****/, etc. ?
|
||||
if (is('/')) {
|
||||
docComment = "";
|
||||
return;
|
||||
}
|
||||
|
||||
// skip a newline on the first line of the comment.
|
||||
if (comment_reader.bp < comment_reader.buflen) {
|
||||
if (comment_reader.ch == LF) {
|
||||
comment_reader.scanCommentChar();
|
||||
firstLine = false;
|
||||
} else if (comment_reader.ch == CR) {
|
||||
comment_reader.scanCommentChar();
|
||||
if (comment_reader.ch == LF) {
|
||||
comment_reader.scanCommentChar();
|
||||
firstLine = false;
|
||||
}
|
||||
}
|
||||
// Skip line terminator on the first line of the comment.
|
||||
if (isOneOf('\n', '\r')) {
|
||||
accept('\r');
|
||||
accept('\n');
|
||||
firstLine = false;
|
||||
}
|
||||
|
||||
outerLoop:
|
||||
|
||||
// The outerLoop processes the doc comment, looping once
|
||||
// for each line. For each line, it first strips off
|
||||
// whitespace, then it consumes any stars, then it
|
||||
// puts the rest of the line into our buffer.
|
||||
while (comment_reader.bp < comment_reader.buflen) {
|
||||
int begin_bp = comment_reader.bp;
|
||||
char begin_ch = comment_reader.ch;
|
||||
// The wsLoop consumes whitespace from the beginning
|
||||
// of each line.
|
||||
wsLoop:
|
||||
|
||||
while (comment_reader.bp < comment_reader.buflen) {
|
||||
switch(comment_reader.ch) {
|
||||
case ' ':
|
||||
comment_reader.scanCommentChar();
|
||||
break;
|
||||
case '\t':
|
||||
comment_reader.col = ((comment_reader.col - 1) / TabInc * TabInc) + TabInc;
|
||||
comment_reader.scanCommentChar();
|
||||
break;
|
||||
case FF:
|
||||
comment_reader.col = 0;
|
||||
comment_reader.scanCommentChar();
|
||||
break;
|
||||
// Treat newline at beginning of line (blank line, no star)
|
||||
// as comment text. Old Javadoc compatibility requires this.
|
||||
/*---------------------------------*
|
||||
case CR: // (Spec 3.4)
|
||||
doc_reader.scanCommentChar();
|
||||
if (ch == LF) {
|
||||
col = 0;
|
||||
doc_reader.scanCommentChar();
|
||||
}
|
||||
break;
|
||||
case LF: // (Spec 3.4)
|
||||
doc_reader.scanCommentChar();
|
||||
break;
|
||||
*---------------------------------*/
|
||||
default:
|
||||
// we've seen something that isn't whitespace;
|
||||
// jump out.
|
||||
break wsLoop;
|
||||
}
|
||||
}
|
||||
|
||||
// puts the rest of the line into the extraction buffer.
|
||||
while (isAvailable()) {
|
||||
int begin_pos = position();
|
||||
// Consume whitespace from the beginning of each line.
|
||||
skipWhitespace();
|
||||
// Are there stars here? If so, consume them all
|
||||
// and check for the end of comment.
|
||||
if (comment_reader.ch == '*') {
|
||||
if (is('*')) {
|
||||
// skip all of the stars
|
||||
do {
|
||||
comment_reader.scanCommentChar();
|
||||
} while (comment_reader.ch == '*');
|
||||
skip('*');
|
||||
|
||||
// check for the closing slash.
|
||||
if (comment_reader.ch == '/') {
|
||||
// We're done with the doc comment
|
||||
// scanChar() and breakout.
|
||||
if (accept('/')) {
|
||||
// We're done with the Javadoc comment
|
||||
break outerLoop;
|
||||
}
|
||||
} else if (! firstLine) {
|
||||
} else if (!firstLine) {
|
||||
// The current line does not begin with a '*' so we will
|
||||
// treat it as comment
|
||||
comment_reader.bp = begin_bp;
|
||||
comment_reader.ch = begin_ch;
|
||||
reset(begin_pos);
|
||||
}
|
||||
// The textLoop processes the rest of the characters
|
||||
// on the line, adding them to our buffer.
|
||||
|
||||
textLoop:
|
||||
while (comment_reader.bp < comment_reader.buflen) {
|
||||
switch (comment_reader.ch) {
|
||||
case '*':
|
||||
// Is this just a star? Or is this the
|
||||
// end of a comment?
|
||||
comment_reader.scanCommentChar();
|
||||
if (comment_reader.ch == '/') {
|
||||
// This is the end of the comment,
|
||||
// set ch and return our buffer.
|
||||
break outerLoop;
|
||||
}
|
||||
// This is just an ordinary star. Add it to
|
||||
// the buffer.
|
||||
comment_reader.putChar('*', false);
|
||||
break;
|
||||
case '\\':
|
||||
comment_reader.putChar('\\', false);
|
||||
// If a double backslash was found, write two
|
||||
if (comment_reader.isDoubleBackslash()) {
|
||||
comment_reader.putChar('\\', false);
|
||||
}
|
||||
comment_reader.scanCommentChar();
|
||||
break;
|
||||
case ' ':
|
||||
case '\t':
|
||||
comment_reader.putChar(comment_reader.ch, false);
|
||||
comment_reader.scanCommentChar();
|
||||
break;
|
||||
case FF:
|
||||
comment_reader.scanCommentChar();
|
||||
break textLoop; // treat as end of line
|
||||
case CR: // (Spec 3.4)
|
||||
comment_reader.scanCommentChar();
|
||||
if (comment_reader.ch != LF) {
|
||||
// Canonicalize CR-only line terminator to LF
|
||||
comment_reader.putChar((char)LF, false);
|
||||
break textLoop;
|
||||
}
|
||||
/* fall through to LF case */
|
||||
case LF: // (Spec 3.4)
|
||||
// The textLoop processes the rest of the characters
|
||||
// on the line, adding them to the extraction buffer.
|
||||
while (isAvailable()) {
|
||||
if (accept("*/")) {
|
||||
// This is the end of the comment, return
|
||||
// the contents of the extraction buffer.
|
||||
break outerLoop;
|
||||
} else if (isOneOf('\n', '\r')) {
|
||||
// We've seen a newline. Add it to our
|
||||
// buffer and break out of this loop,
|
||||
// starting fresh on a new line.
|
||||
comment_reader.putChar(comment_reader.ch, false);
|
||||
comment_reader.scanCommentChar();
|
||||
put('\n');
|
||||
accept('\r');
|
||||
accept('\n');
|
||||
break textLoop;
|
||||
default:
|
||||
} else if (is('\f')){
|
||||
next();
|
||||
break textLoop; // treat as end of line
|
||||
|
||||
} else {
|
||||
// Add the character to our buffer.
|
||||
comment_reader.putChar(comment_reader.ch, false);
|
||||
comment_reader.scanCommentChar();
|
||||
put();
|
||||
next();
|
||||
}
|
||||
} // end textLoop
|
||||
firstLine = false;
|
||||
} // end outerLoop
|
||||
|
||||
if (comment_reader.sp > 0) {
|
||||
int i = comment_reader.sp - 1;
|
||||
trailLoop:
|
||||
while (i > -1) {
|
||||
switch (comment_reader.sbuf[i]) {
|
||||
case '*':
|
||||
i--;
|
||||
break;
|
||||
default:
|
||||
break trailLoop;
|
||||
}
|
||||
// If extraction buffer is not empty.
|
||||
if (sb.length() > 0) {
|
||||
// Remove trailing asterisks.
|
||||
int i = sb.length() - 1;
|
||||
while (i > -1 && sb.charAt(i) == '*') {
|
||||
i--;
|
||||
}
|
||||
comment_reader.sp = i + 1;
|
||||
sb.setLength(i + 1) ;
|
||||
|
||||
// Store the text of the doc comment
|
||||
docComment = comment_reader.chars();
|
||||
docPosns = new int[comment_reader.pp];
|
||||
System.arraycopy(comment_reader.pbuf, 0, docPosns, 0, docPosns.length);
|
||||
} else {
|
||||
docComment = sb.toString();
|
||||
} else {
|
||||
docComment = "";
|
||||
}
|
||||
} finally {
|
||||
scanned = true;
|
||||
comment_reader = null;
|
||||
if (docComment != null &&
|
||||
DEPRECATED_PATTERN.matcher(docComment).matches()) {
|
||||
|
||||
// Check if comment contains @deprecated comment.
|
||||
if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) {
|
||||
deprecatedFlag = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
//where:
|
||||
private static final Pattern DEPRECATED_PATTERN =
|
||||
Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a map for translating between line numbers and positions in the input.
|
||||
* Overridden to expand tabs.
|
||||
*
|
||||
* @return a LineMap
|
||||
*/
|
||||
@Override
|
||||
public Position.LineMap getLineMap() {
|
||||
char[] buf = reader.getRawCharacters();
|
||||
char[] buf = getRawCharacters();
|
||||
return Position.makeLineMap(buf, buf.length, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an int table to mapping positions in extracted Javadoc comment
|
||||
* to positions in the JavaTokenizer source buffer.
|
||||
*
|
||||
* The array is organized as a series of pairs of integers: the first
|
||||
* number in each pair specifies a position in the comment text,
|
||||
* the second number in each pair specifies the corresponding position
|
||||
* in the source buffer. The pairs are sorted in ascending order.
|
||||
*
|
||||
* Since the mapping function is generally continuous, with successive
|
||||
* positions in the string corresponding to successive positions in the
|
||||
* source buffer, the table only needs to record discontinuities in
|
||||
* the mapping. The values of intermediate positions can be inferred.
|
||||
*
|
||||
* Discontinuities may occur in a number of places: when a newline
|
||||
* is followed by whitespace and asterisks (which are ignored),
|
||||
* when a tab is expanded into spaces, and when unicode escapes
|
||||
* are used in the source buffer.
|
||||
*
|
||||
* Thus, to find the source position of any position, p, in the comment
|
||||
* string, find the index, i, of the pair whose string offset
|
||||
* ({@code map[i * NOFFSETS + SB_OFFSET] }) is closest to but not greater
|
||||
* than p. Then, {@code sourcePos(p) = map[i * NOFFSETS + POS_OFFSET] +
|
||||
* (p - map[i * NOFFSETS + SB_OFFSET]) }.
|
||||
*/
|
||||
static class OffsetMap {
|
||||
/**
|
||||
* map entry offset for comment offset member of pair.
|
||||
*/
|
||||
private static final int SB_OFFSET = 0;
|
||||
|
||||
/**
|
||||
* map entry offset of input offset member of pair.
|
||||
*/
|
||||
private static final int POS_OFFSET = 1;
|
||||
|
||||
/**
|
||||
* Number of elements in each entry.
|
||||
*/
|
||||
private static final int NOFFSETS = 2;
|
||||
|
||||
/**
|
||||
* Array storing entries in map.
|
||||
*/
|
||||
private int[] map;
|
||||
|
||||
/**
|
||||
* Logical size of map (number of valid entries.)
|
||||
*/
|
||||
private int size;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
OffsetMap() {
|
||||
this.map = new int[128];
|
||||
this.size = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if it is worthwhile adding the entry pair to the map. That is
|
||||
* if there is a change in relative offset.
|
||||
*
|
||||
* @param sbOffset comment offset member of pair.
|
||||
* @param posOffet input offset member of pair.
|
||||
*
|
||||
* @return true if it is worthwhile adding the entry pair.
|
||||
*/
|
||||
boolean shouldAdd(int sbOffset, int posOffet) {
|
||||
return sbOffset - lastSBOffset() != posOffet - lastPosOffset();
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds entry pair if worthwhile.
|
||||
*
|
||||
* @param sbOffset comment offset member of pair.
|
||||
* @param posOffet input offset member of pair.
|
||||
*/
|
||||
void add(int sbOffset, int posOffet) {
|
||||
if (size == 0 || shouldAdd(sbOffset, posOffet)) {
|
||||
ensure(NOFFSETS);
|
||||
map[size + SB_OFFSET] = sbOffset;
|
||||
map[size + POS_OFFSET] = posOffet;
|
||||
size += NOFFSETS;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the previous comment offset.
|
||||
*
|
||||
* @return the previous comment offset.
|
||||
*/
|
||||
private int lastSBOffset() {
|
||||
return size == 0 ? 0 : map[size - NOFFSETS + SB_OFFSET];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the previous input offset.
|
||||
*
|
||||
* @return the previous input offset.
|
||||
*/
|
||||
private int lastPosOffset() {
|
||||
return size == 0 ? 0 : map[size - NOFFSETS + POS_OFFSET];
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensures there is enough space for a new entry.
|
||||
*
|
||||
* @param need number of array slots needed.
|
||||
*/
|
||||
private void ensure(int need) {
|
||||
need += size;
|
||||
int grow = map.length;
|
||||
|
||||
while (need > grow) {
|
||||
grow <<= 1;
|
||||
}
|
||||
|
||||
// Handle overflow.
|
||||
if (grow < map.length) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
} else if (grow != map.length) {
|
||||
map = Arrays.copyOf(map, grow);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Binary search to find the entry for which the string index is less
|
||||
* than pos. Since the map is a list of pairs of integers we must make
|
||||
* sure the index is always NOFFSETS scaled. If we find an exact match
|
||||
* for pos, the other item in the pair gives the source pos; otherwise,
|
||||
* compute the source position relative to the best match found in the
|
||||
* array.
|
||||
*/
|
||||
int getSourcePos(int pos) {
|
||||
if (size == 0) {
|
||||
return Position.NOPOS;
|
||||
}
|
||||
|
||||
int start = 0;
|
||||
int end = size / NOFFSETS;
|
||||
|
||||
while (start < end - 1) {
|
||||
// find an index midway between start and end
|
||||
int index = (start + end) / 2;
|
||||
int indexScaled = index * NOFFSETS;
|
||||
|
||||
if (map[indexScaled + SB_OFFSET] < pos) {
|
||||
start = index;
|
||||
} else if (map[indexScaled + SB_OFFSET] == pos) {
|
||||
return map[indexScaled + POS_OFFSET];
|
||||
} else {
|
||||
end = index;
|
||||
}
|
||||
}
|
||||
|
||||
int startScaled = start * NOFFSETS;
|
||||
|
||||
return map[startScaled + POS_OFFSET] + (pos - map[startScaled + SB_OFFSET]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation. Oracle designates this
|
||||
* particular file as subject to the "Classpath" exception as provided
|
||||
* by Oracle in the LICENSE file that accompanied this code.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.tools.javac.parser;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Methods used to support text blocks lint.
|
||||
*
|
||||
* <p><b>This is NOT part of any supported API.
|
||||
* If you write code that depends on this, you do so at your own risk.
|
||||
* This code and its internal interfaces are subject to change or
|
||||
* deletion without notice.</b>
|
||||
*/
|
||||
class TextBlockSupport {
|
||||
enum WhitespaceChecks {
|
||||
INCONSISTENT,
|
||||
TRAILING
|
||||
};
|
||||
|
||||
/** Check that the use of white space in content is not problematic.
|
||||
*/
|
||||
static Set<WhitespaceChecks> checkWhitespace(String string) {
|
||||
// Start with empty result set.
|
||||
Set<WhitespaceChecks> checks = new HashSet<>();
|
||||
// No need to check empty strings.
|
||||
if (string.isEmpty()) {
|
||||
return checks;
|
||||
}
|
||||
// Maximum common indentation.
|
||||
int outdent = 0;
|
||||
// No need to check indentation if opting out (last line is empty.)
|
||||
char lastChar = string.charAt(string.length() - 1);
|
||||
boolean optOut = lastChar == '\n' || lastChar == '\r';
|
||||
// Split string based at line terminators.
|
||||
String[] lines = string.split("\\R");
|
||||
int length = lines.length;
|
||||
// Extract last line.
|
||||
String lastLine = length == 0 ? "" : lines[length - 1];
|
||||
if (!optOut) {
|
||||
// Prime with the last line indentation (may be blank.)
|
||||
outdent = indexOfNonWhitespace(lastLine);
|
||||
for (String line : lines) {
|
||||
// Blanks lines have no influence (last line accounted for.)
|
||||
if (!line.isBlank()) {
|
||||
outdent = Integer.min(outdent, indexOfNonWhitespace(line));
|
||||
if (outdent == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Last line is representative.
|
||||
String start = lastLine.substring(0, outdent);
|
||||
for (String line : lines) {
|
||||
// Fail if a line does not have the same indentation.
|
||||
if (!line.isBlank() && !line.startsWith(start)) {
|
||||
// Mix of different white space
|
||||
checks.add(WhitespaceChecks.INCONSISTENT);
|
||||
}
|
||||
// Line has content even after indent is removed.
|
||||
if (outdent < line.length()) {
|
||||
// Is the last character a white space.
|
||||
lastChar = line.charAt(line.length() - 1);
|
||||
if (Character.isWhitespace(lastChar)) {
|
||||
// Has trailing white space.
|
||||
checks.add(WhitespaceChecks.TRAILING);
|
||||
}
|
||||
}
|
||||
}
|
||||
return checks;
|
||||
}
|
||||
|
||||
private static int indexOfNonWhitespace(String string) {
|
||||
return string.length() - string.stripLeading().length();
|
||||
}
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2011, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -25,267 +25,503 @@
|
||||
|
||||
package com.sun.tools.javac.parser;
|
||||
|
||||
import java.nio.CharBuffer;
|
||||
import java.util.Arrays;
|
||||
|
||||
import com.sun.tools.javac.file.JavacFileManager;
|
||||
import com.sun.tools.javac.resources.CompilerProperties.Errors;
|
||||
import com.sun.tools.javac.util.ArrayUtils;
|
||||
import com.sun.tools.javac.util.Log;
|
||||
import com.sun.tools.javac.util.Name;
|
||||
import com.sun.tools.javac.util.Names;
|
||||
|
||||
import static com.sun.tools.javac.util.LayoutCharacters.*;
|
||||
import static com.sun.tools.javac.util.LayoutCharacters.EOI;
|
||||
import static com.sun.tools.javac.util.LayoutCharacters.tabulate;
|
||||
|
||||
/** The char reader used by the javac lexer/tokenizer. Returns the sequence of
|
||||
* characters contained in the input stream, handling unicode escape accordingly.
|
||||
* Additionally, it provides features for saving chars into a buffer and to retrieve
|
||||
* them at a later stage.
|
||||
/**
|
||||
* The unicode character reader used by the javac/javadoc lexer/tokenizer, returns characters
|
||||
* one by one as contained in the input stream, handling unicode escape sequences accordingly.
|
||||
*
|
||||
* <p><b>This is NOT part of any supported API.
|
||||
* If you write code that depends on this, you do so at your own risk.
|
||||
* This code and its internal interfaces are subject to change or
|
||||
* deletion without notice.</b>
|
||||
* deletion without notice.</b></p>
|
||||
*/
|
||||
public class UnicodeReader {
|
||||
|
||||
/** The input buffer, index of next character to be read,
|
||||
* index of one past last character in buffer.
|
||||
/**
|
||||
* Buffer containing characters from source file. May contain extraneous characters
|
||||
* beyond this.length.
|
||||
*/
|
||||
protected char[] buf;
|
||||
protected int bp;
|
||||
protected final int buflen;
|
||||
|
||||
/** The current character.
|
||||
*/
|
||||
protected char ch;
|
||||
|
||||
/** The buffer index of the last converted unicode character
|
||||
*/
|
||||
protected int unicodeConversionBp = -1;
|
||||
|
||||
protected Log log;
|
||||
protected Names names;
|
||||
|
||||
/** A character buffer for saved chars.
|
||||
*/
|
||||
protected char[] sbuf = new char[128];
|
||||
protected int realLength;
|
||||
protected int sp;
|
||||
private final char[] buffer;
|
||||
|
||||
/**
|
||||
* Create a scanner from the input array. This method might
|
||||
* modify the array. To avoid copying the input array, ensure
|
||||
* that {@code inputLength < input.length} or
|
||||
* {@code input[input.length -1]} is a white space character.
|
||||
* Length of meaningful content in buffer.
|
||||
*/
|
||||
private final int length;
|
||||
|
||||
/**
|
||||
* Character buffer index of character currently being observed.
|
||||
*/
|
||||
private int position;
|
||||
|
||||
/**
|
||||
* Number of characters combined to provide character currently being observed. Typically
|
||||
* one, but may be more when combinations of surrogate pairs and unicode escape sequences
|
||||
* are read.
|
||||
*/
|
||||
private int width;
|
||||
|
||||
/**
|
||||
* Character currently being observed. If a surrogate pair is read then will be the high
|
||||
* member of the pair.
|
||||
*/
|
||||
private char character;
|
||||
|
||||
/**
|
||||
* Codepoint of character currently being observed. Typically equivalent to the character
|
||||
* but will have a value greater that 0xFFFF when a surrogate pair.
|
||||
*/
|
||||
private int codepoint;
|
||||
|
||||
/**
|
||||
* true if the last character was a backslash. This is used to handle the special case
|
||||
* when a backslash precedes an unicode escape. In that case, the second backslash
|
||||
* is treated as a backslash and not part of an unicode escape.
|
||||
*/
|
||||
private boolean wasBackslash;
|
||||
|
||||
/**
|
||||
* Log for error reporting.
|
||||
*/
|
||||
private final Log log;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param sf the factory which created this Scanner
|
||||
* @param buffer the input, might be modified
|
||||
* Must be positive and less than or equal to input.length.
|
||||
* @param sf scan factory.
|
||||
* @param array array containing contents of source.
|
||||
* @param length length of meaningful content in buffer.
|
||||
*/
|
||||
protected UnicodeReader(ScannerFactory sf, CharBuffer buffer) {
|
||||
this(sf, JavacFileManager.toArray(buffer), buffer.limit());
|
||||
protected UnicodeReader(ScannerFactory sf, char[] array, int length) {
|
||||
this.buffer = array;
|
||||
this.length = length;
|
||||
this.position = 0;
|
||||
this.width = 0;
|
||||
this.character = '\0';
|
||||
this.codepoint = 0;
|
||||
this.wasBackslash = false;
|
||||
this.log = sf.log;
|
||||
|
||||
nextCodePoint();
|
||||
}
|
||||
|
||||
protected UnicodeReader(ScannerFactory sf, char[] input, int inputLength) {
|
||||
log = sf.log;
|
||||
names = sf.names;
|
||||
realLength = inputLength;
|
||||
if (inputLength == input.length) {
|
||||
if (input.length > 0 && Character.isWhitespace(input[input.length - 1])) {
|
||||
inputLength--;
|
||||
} else {
|
||||
input = Arrays.copyOf(input, inputLength + 1);
|
||||
/**
|
||||
* Returns the length of the buffer. This is length of meaningful content in buffer and
|
||||
* not the length of the buffer array.
|
||||
*
|
||||
* @return length of the buffer.
|
||||
*/
|
||||
protected int length() {
|
||||
return length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if current position is within the meaningful part of the buffer.
|
||||
*
|
||||
* @return true if current position is within the meaningful part of the buffer.
|
||||
*/
|
||||
protected boolean isAvailable() {
|
||||
return position < length;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the next 16-bit character from the buffer and places it in this.character.
|
||||
*/
|
||||
private void nextCodeUnit() {
|
||||
// Index of next character in buffer.
|
||||
int index = position + width;
|
||||
|
||||
// If past end of buffer.
|
||||
if (length <= index) {
|
||||
// End of file is marked with EOI.
|
||||
character = EOI;
|
||||
} else {
|
||||
// Next character in buffer.
|
||||
character = buffer[index];
|
||||
// Increment length of codepoint.
|
||||
width++;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the next 16-bit character from the buffer. If an unicode escape
|
||||
* is detected then converts the unicode escape to a character.
|
||||
*/
|
||||
private void nextUnicodeInputCharacter() {
|
||||
// Position to next codepoint.
|
||||
position += width;
|
||||
// Codepoint has no characters yet.
|
||||
width = 0;
|
||||
|
||||
// Fetch next character.
|
||||
nextCodeUnit();
|
||||
|
||||
// If second backslash is detected.
|
||||
if (wasBackslash) {
|
||||
// Treat like a normal character (not part of unicode escape.)
|
||||
wasBackslash = false;
|
||||
} else if (character == '\\') {
|
||||
// May be an unicode escape.
|
||||
wasBackslash = !unicodeEscape();
|
||||
}
|
||||
|
||||
// Codepoint and character match if not surrogate.
|
||||
codepoint = (int)character;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the nextcode point from the buffer. If an unicode escape is recognized
|
||||
* then converts unicode escape to a character. If two characters are a surrogate pair
|
||||
* then converts to a codepoint.
|
||||
*/
|
||||
private void nextCodePoint() {
|
||||
// Next unicode character.
|
||||
nextUnicodeInputCharacter();
|
||||
|
||||
// Return early if ASCII or not a surrogate pair.
|
||||
if (isASCII() || !Character.isHighSurrogate(character)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Capture high surrogate and position.
|
||||
char hi = character;
|
||||
int savePosition = position;
|
||||
int saveWidth = width;
|
||||
|
||||
// Get potential low surrogate.
|
||||
nextUnicodeInputCharacter();
|
||||
char lo = character;
|
||||
|
||||
if (Character.isLowSurrogate(lo)) {
|
||||
// Start codepoint at start of high surrogate.
|
||||
position = savePosition;
|
||||
width += saveWidth;
|
||||
// Compute codepoint.
|
||||
codepoint = Character.toCodePoint(hi, lo);
|
||||
} else {
|
||||
// Restore to treat high surrogate as just a character.
|
||||
position = savePosition;
|
||||
width = saveWidth;
|
||||
character = hi;
|
||||
codepoint = (int)hi;
|
||||
// Could potential report an error here (old code did not.)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts an unicode escape into a character.
|
||||
*
|
||||
* @return true if was an unicode escape.
|
||||
*/
|
||||
private boolean unicodeEscape() {
|
||||
// Start of unicode escape (past backslash.)
|
||||
int start = position + width;
|
||||
|
||||
// Default to backslash result, unless proven otherwise.
|
||||
character = '\\';
|
||||
width = 1;
|
||||
|
||||
// Skip multiple 'u'.
|
||||
int index;
|
||||
for (index = start; index < length; index++) {
|
||||
if (buffer[index] != 'u') {
|
||||
break;
|
||||
}
|
||||
}
|
||||
buf = input;
|
||||
buflen = inputLength;
|
||||
buf[buflen] = EOI;
|
||||
bp = -1;
|
||||
scanChar();
|
||||
}
|
||||
|
||||
/** Read next character.
|
||||
*/
|
||||
protected void scanChar() {
|
||||
if (bp < buflen) {
|
||||
ch = buf[++bp];
|
||||
if (ch == '\\') {
|
||||
convertUnicode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Read next character in comment, skipping over double '\' characters.
|
||||
*/
|
||||
protected void scanCommentChar() {
|
||||
scanChar();
|
||||
if (ch == '\\') {
|
||||
if (peekChar() == '\\' && !isUnicode()) {
|
||||
skipChar();
|
||||
} else {
|
||||
convertUnicode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Append a character to sbuf.
|
||||
*/
|
||||
protected void putChar(char ch, boolean scan) {
|
||||
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
|
||||
sbuf[sp++] = ch;
|
||||
if (scan)
|
||||
scanChar();
|
||||
}
|
||||
|
||||
protected void putChar(char ch) {
|
||||
putChar(ch, false);
|
||||
}
|
||||
|
||||
protected void putChar(boolean scan) {
|
||||
putChar(ch, scan);
|
||||
}
|
||||
|
||||
protected void nextChar(boolean skip) {
|
||||
if (!skip) {
|
||||
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
|
||||
sbuf[sp++] = ch;
|
||||
}
|
||||
|
||||
scanChar();
|
||||
}
|
||||
|
||||
Name name() {
|
||||
return names.fromChars(sbuf, 0, sp);
|
||||
}
|
||||
|
||||
String chars() {
|
||||
return new String(sbuf, 0, sp);
|
||||
}
|
||||
|
||||
/** Add 'count' copies of the character 'ch' to the string buffer.
|
||||
*/
|
||||
protected void repeat(char ch, int count) {
|
||||
for ( ; 0 < count; count--) {
|
||||
putChar(ch, false);
|
||||
}
|
||||
}
|
||||
|
||||
/** Reset the scan buffer pointer to 'pos'.
|
||||
*/
|
||||
protected void reset(int pos) {
|
||||
bp = pos - 1;
|
||||
scanChar();
|
||||
}
|
||||
|
||||
/** Convert unicode escape; bp points to initial '\' character
|
||||
* (Spec 3.3).
|
||||
*/
|
||||
protected void convertUnicode() {
|
||||
if (ch == '\\' && unicodeConversionBp != bp ) {
|
||||
bp++; ch = buf[bp];
|
||||
if (ch == 'u') {
|
||||
do {
|
||||
bp++; ch = buf[bp];
|
||||
} while (ch == 'u');
|
||||
int limit = bp + 3;
|
||||
if (limit < buflen) {
|
||||
int d = digit(bp, 16);
|
||||
int code = d;
|
||||
while (bp < limit && d >= 0) {
|
||||
bp++; ch = buf[bp];
|
||||
d = digit(bp, 16);
|
||||
code = (code << 4) + d;
|
||||
}
|
||||
if (d >= 0) {
|
||||
ch = (char)code;
|
||||
unicodeConversionBp = bp;
|
||||
return;
|
||||
}
|
||||
}
|
||||
log.error(bp, Errors.IllegalUnicodeEsc);
|
||||
} else {
|
||||
bp--;
|
||||
ch = '\\';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Are surrogates supported?
|
||||
*/
|
||||
final static boolean surrogatesSupported = surrogatesSupported();
|
||||
private static boolean surrogatesSupported() {
|
||||
try {
|
||||
Character.isHighSurrogate('a');
|
||||
return true;
|
||||
} catch (NoSuchMethodError ex) {
|
||||
// Needs to have been at least one u.
|
||||
if (index == start) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Scan surrogate pairs. If 'ch' is a high surrogate and
|
||||
* the next character is a low surrogate, returns the code point
|
||||
* constructed from these surrogates. Otherwise, returns -1.
|
||||
* This method will not consume any of the characters.
|
||||
*/
|
||||
protected int peekSurrogates() {
|
||||
if (surrogatesSupported && Character.isHighSurrogate(ch)) {
|
||||
char high = ch;
|
||||
int prevBP = bp;
|
||||
int code = 0;
|
||||
|
||||
scanChar();
|
||||
for (int i = 0; i < 4; i++) {
|
||||
// Translate and merge digit.
|
||||
int digit = index < length ? Character.digit(buffer[index], 16) : -1;
|
||||
code = code << 4 | digit;
|
||||
|
||||
char low = ch;
|
||||
|
||||
ch = high;
|
||||
bp = prevBP;
|
||||
|
||||
if (Character.isLowSurrogate(low)) {
|
||||
return Character.toCodePoint(high, low);
|
||||
// If invalid digit.
|
||||
if (code < 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
// On to next character.
|
||||
index++;
|
||||
}
|
||||
|
||||
return -1;
|
||||
// Skip digits even if error.
|
||||
width = index - position;
|
||||
|
||||
// If all digits are good.
|
||||
if (code >= 0) {
|
||||
character = (char)code;
|
||||
} else {
|
||||
log.error(position, Errors.IllegalUnicodeEsc);
|
||||
}
|
||||
|
||||
// Return true even if error so that the invalid unicode escape is skipped.
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Convert an ASCII digit from its base (8, 10, or 16)
|
||||
* to its value.
|
||||
/**
|
||||
* Return the current position in the character buffer.
|
||||
*
|
||||
* @return current position in the character buffer.
|
||||
*/
|
||||
protected int digit(int pos, int base) {
|
||||
char c = ch;
|
||||
if ('0' <= c && c <= '9')
|
||||
return Character.digit(c, base); //a fast common case
|
||||
int codePoint = peekSurrogates();
|
||||
int result = codePoint >= 0 ? Character.digit(codePoint, base) : Character.digit(c, base);
|
||||
if (result >= 0 && c > 0x7f) {
|
||||
log.error(pos + 1, Errors.IllegalNonasciiDigit);
|
||||
if (codePoint >= 0)
|
||||
scanChar();
|
||||
ch = "0123456789abcdef".charAt(result);
|
||||
protected int position() {
|
||||
return position;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reset the reader to the specified position.
|
||||
* Warning: Do not use when previous character was an ASCII or unicode backslash.
|
||||
* @param pos
|
||||
*/
|
||||
protected void reset(int pos) {
|
||||
position = pos;
|
||||
width = 0;
|
||||
wasBackslash = false;
|
||||
nextCodePoint();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the current character in at the current position.
|
||||
*
|
||||
* @return current character in at the current position.
|
||||
*/
|
||||
protected char get() {
|
||||
return character;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the current codepoint in at the current position.
|
||||
*
|
||||
* @return current codepoint in at the current position.
|
||||
*/
|
||||
protected int getCodepoint() {
|
||||
return codepoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the current codepoint is a surrogate.
|
||||
*
|
||||
* @return true if the current codepoint is a surrogate.
|
||||
*/
|
||||
protected boolean isSurrogate() {
|
||||
return 0xFFFF < codepoint;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the current character is ASCII.
|
||||
*
|
||||
* @return true if the current character is ASCII.
|
||||
*/
|
||||
protected boolean isASCII() {
|
||||
return character <= 0x7F;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the current character to the next character.
|
||||
*
|
||||
* @return next character.
|
||||
*/
|
||||
protected char next() {
|
||||
nextCodePoint();
|
||||
|
||||
return character;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare character. Returns true if a match.
|
||||
*
|
||||
* @param ch character to match.
|
||||
*
|
||||
* @return true if a match.
|
||||
*/
|
||||
protected boolean is(char ch) {
|
||||
return character == ch;
|
||||
}
|
||||
|
||||
/**
|
||||
* Match one of the arguments. Returns true if a match.
|
||||
*/
|
||||
protected boolean isOneOf(char ch1, char ch2) {
|
||||
return is(ch1) || is(ch2);
|
||||
}
|
||||
protected boolean isOneOf(char ch1, char ch2, char ch3) {
|
||||
return is(ch1) || is(ch2) || is(ch3);
|
||||
}
|
||||
protected boolean isOneOf(char ch1, char ch2, char ch3, char ch4, char ch5, char ch6) {
|
||||
return is(ch1) || is(ch2) || is(ch3) || is(ch4) || is(ch5) || is(ch6);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests to see if current character is in the range of lo to hi characters (inclusive).
|
||||
*
|
||||
* @param lo lowest character in range.
|
||||
* @param hi highest character in range.
|
||||
*
|
||||
* @return true if the current character is in range.
|
||||
*/
|
||||
protected boolean inRange(char lo, char hi) {
|
||||
return lo <= character && character <= hi;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare character and advance if a match. Returns true if a match.
|
||||
*
|
||||
* @param ch character to match.
|
||||
*
|
||||
* @return true if a match.
|
||||
*/
|
||||
protected boolean accept(char ch) {
|
||||
if (is(ch)) {
|
||||
next();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Match one of the arguments and advance if a match. Returns true if a match.
|
||||
*/
|
||||
protected boolean acceptOneOf(char ch1, char ch2) {
|
||||
if (isOneOf(ch1, ch2)) {
|
||||
next();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
protected boolean acceptOneOf(char ch1, char ch2, char ch3) {
|
||||
if (isOneOf(ch1, ch2, ch3)) {
|
||||
next();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip over all occurances of character.
|
||||
*
|
||||
* @param ch character to accept.
|
||||
*/
|
||||
protected void skip(char ch) {
|
||||
while (accept(ch)) {
|
||||
// next
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip over ASCII white space characters.
|
||||
*/
|
||||
protected void skipWhitespace() {
|
||||
while (acceptOneOf(' ', '\t', '\f')) {
|
||||
// next
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip to end of line.
|
||||
*/
|
||||
protected void skipToEOLN() {
|
||||
while (isAvailable()) {
|
||||
if (isOneOf('\r', '\n')) {
|
||||
break;
|
||||
}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Compare string and advance if a match. Returns true if a match.
|
||||
* Warning: Do not use when previous character was a backslash
|
||||
* (confuses state of wasBackslash.)
|
||||
*
|
||||
* @param string string to match character for character.
|
||||
*
|
||||
* @return true if a match.
|
||||
*/
|
||||
protected boolean accept(String string) {
|
||||
// Quick test.
|
||||
if (string.length() == 0 || !is(string.charAt(0))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Be prepared to retreat if not a match.
|
||||
int savedPosition = position;
|
||||
|
||||
nextCodePoint();
|
||||
|
||||
// Check each character.
|
||||
for (int i = 1; i < string.length(); i++) {
|
||||
if (!is(string.charAt(i))) {
|
||||
// Restart if not a match.
|
||||
reset(savedPosition);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
nextCodePoint();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an ASCII digit from its base (8, 10, or 16) to its value. Does not
|
||||
* advance character.
|
||||
*
|
||||
* @param pos starting position.
|
||||
* @param digitRadix base of number being converted.
|
||||
*
|
||||
* @return value of digit.
|
||||
*/
|
||||
protected int digit(int pos, int digitRadix) {
|
||||
int result;
|
||||
|
||||
// Just an ASCII digit.
|
||||
if (inRange('0', '9')) {
|
||||
// Fast common case.
|
||||
result = character - '0';
|
||||
|
||||
return result < digitRadix ? result : -1;
|
||||
}
|
||||
|
||||
// Handle other digits.
|
||||
result = isSurrogate() ? Character.digit(codepoint, digitRadix) :
|
||||
Character.digit(character, digitRadix);
|
||||
|
||||
if (result >= 0 && !isASCII()) {
|
||||
log.error(position(), Errors.IllegalNonasciiDigit);
|
||||
character = "0123456789abcdef".charAt(result);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
protected boolean isUnicode() {
|
||||
return unicodeConversionBp == bp;
|
||||
}
|
||||
|
||||
protected void skipChar() {
|
||||
bp++;
|
||||
}
|
||||
|
||||
protected char peekChar() {
|
||||
return buf[bp + 1];
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a copy of the input buffer, up to its inputLength.
|
||||
* Unicode escape sequences are not translated.
|
||||
* Returns the input buffer. Unicode escape sequences are not translated.
|
||||
*
|
||||
* @return the input buffer.
|
||||
*/
|
||||
public char[] getRawCharacters() {
|
||||
char[] chars = new char[buflen];
|
||||
System.arraycopy(buf, 0, chars, 0, buflen);
|
||||
return chars;
|
||||
return length == buffer.length ? buffer : Arrays.copyOf(buffer, length);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -297,15 +533,83 @@ public class UnicodeReader {
|
||||
* {@code String.substring(beginIndex, endIndex)}.
|
||||
* Unicode escape sequences are not translated.
|
||||
*
|
||||
* @param beginIndex the beginning index, inclusive.
|
||||
* @param endIndex the ending index, exclusive.
|
||||
* @param beginIndex the beginning index, inclusive.
|
||||
* @param endIndex the ending index, exclusive.
|
||||
*
|
||||
* @throws ArrayIndexOutOfBoundsException if either offset is outside of the
|
||||
* array bounds
|
||||
*/
|
||||
public char[] getRawCharacters(int beginIndex, int endIndex) {
|
||||
int length = endIndex - beginIndex;
|
||||
char[] chars = new char[length];
|
||||
System.arraycopy(buf, beginIndex, chars, 0, length);
|
||||
return chars;
|
||||
return Arrays.copyOfRange(buffer, beginIndex, endIndex);
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a specialized version of UnicodeReader that keeps track of the
|
||||
* column position within a given character stream. Used for Javadoc
|
||||
* processing to build a table for mapping positions in the comment string
|
||||
* to positions in the source file.
|
||||
*/
|
||||
static class PositionTrackingReader extends UnicodeReader {
|
||||
/**
|
||||
* Offset from the beginning of the original reader buffer.
|
||||
*/
|
||||
private final int offset;
|
||||
|
||||
/**
|
||||
* Current column in the comment.
|
||||
*/
|
||||
private int column;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param sf Scan factory.
|
||||
* @param array Array containing contents of source.
|
||||
* @param offset Position offset in original source buffer.
|
||||
*/
|
||||
protected PositionTrackingReader(ScannerFactory sf, char[] array, int offset) {
|
||||
super(sf, array, array.length);
|
||||
this.offset = offset;
|
||||
this.column = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Advances the current character to the next character. Tracks column.
|
||||
*
|
||||
* @return next character.
|
||||
*/
|
||||
@Override
|
||||
protected char next() {
|
||||
super.next();
|
||||
|
||||
if (isOneOf('\n', '\r', '\f')) {
|
||||
column = 0;
|
||||
} else if (is('\t')) {
|
||||
column = tabulate(column);
|
||||
} else {
|
||||
column++;
|
||||
}
|
||||
|
||||
return get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the current column.
|
||||
*
|
||||
* @return the current column.
|
||||
*/
|
||||
protected int column() {
|
||||
return column;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns position relative to the original source buffer.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
protected int offsetPosition() {
|
||||
return position() + offset;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1383,9 +1383,6 @@ compiler.err.unreported.exception.implicit.close=\
|
||||
unreported exception {0}; must be caught or declared to be thrown\n\
|
||||
exception thrown from implicit call to close() on resource variable ''{1}''
|
||||
|
||||
compiler.err.unsupported.cross.fp.lit=\
|
||||
hexadecimal floating-point literals are not supported on this VM
|
||||
|
||||
compiler.err.void.not.allowed.here=\
|
||||
''void'' type not allowed here
|
||||
|
||||
|
@ -984,8 +984,6 @@ compiler.err.unreported.exception.default.constructor=\u30C7\u30D5\u30A9\u30EB\u
|
||||
# 0: type, 1: name
|
||||
compiler.err.unreported.exception.implicit.close=\u5831\u544A\u3055\u308C\u306A\u3044\u4F8B\u5916{0}\u306F\u3001\u30B9\u30ED\u30FC\u3059\u308B\u306B\u306F\u6355\u6349\u307E\u305F\u306F\u5BA3\u8A00\u3059\u308B\u5FC5\u8981\u304C\u3042\u308A\u307E\u3059\n\u30EA\u30BD\u30FC\u30B9\u5909\u6570''{1}''\u3067\u306Eclose()\u306E\u6697\u9ED9\u7684\u306A\u30B3\u30FC\u30EB\u304B\u3089\u4F8B\u5916\u304C\u30B9\u30ED\u30FC\u3055\u308C\u307E\u3057\u305F
|
||||
|
||||
compiler.err.unsupported.cross.fp.lit=16\u9032\u6D6E\u52D5\u5C0F\u6570\u70B9\u30EA\u30C6\u30E9\u30EB\u306F\u3053\u306EVM\u3067\u306F\u30B5\u30DD\u30FC\u30C8\u3055\u308C\u3066\u3044\u307E\u305B\u3093
|
||||
|
||||
compiler.err.void.not.allowed.here=\u3053\u3053\u3067''void''\u578B\u3092\u4F7F\u7528\u3059\u308B\u3053\u3068\u306F\u3067\u304D\u307E\u305B\u3093
|
||||
|
||||
# 0: string
|
||||
|
@ -984,8 +984,6 @@ compiler.err.unreported.exception.default.constructor=\u9ED8\u8BA4\u6784\u9020\u
|
||||
# 0: type, 1: name
|
||||
compiler.err.unreported.exception.implicit.close=\u672A\u62A5\u544A\u7684\u5F02\u5E38\u9519\u8BEF{0}; \u5FC5\u987B\u5BF9\u5176\u8FDB\u884C\u6355\u83B7\u6216\u58F0\u660E\u4EE5\u4FBF\u629B\u51FA\n\u5BF9\u8D44\u6E90\u53D8\u91CF ''{1}'' \u9690\u5F0F\u8C03\u7528 close() \u65F6\u629B\u51FA\u4E86\u5F02\u5E38\u9519\u8BEF
|
||||
|
||||
compiler.err.unsupported.cross.fp.lit=\u8BE5 VM \u4E0D\u652F\u6301\u5341\u516D\u8FDB\u5236\u6D6E\u70B9\u6587\u5B57
|
||||
|
||||
compiler.err.void.not.allowed.here=\u6B64\u5904\u4E0D\u5141\u8BB8\u4F7F\u7528 ''\u7A7A'' \u7C7B\u578B
|
||||
|
||||
# 0: string
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -96,7 +96,7 @@ public class DiagnosticSource {
|
||||
return 0;
|
||||
}
|
||||
if (buf[bp] == '\t' && expandTabs) {
|
||||
column = (column / TabInc * TabInc) + TabInc;
|
||||
column = tabulate(column);
|
||||
} else {
|
||||
column++;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -68,4 +68,10 @@ public interface LayoutCharacters {
|
||||
* source file.
|
||||
*/
|
||||
final static byte EOI = 0x1A;
|
||||
|
||||
/** Bump column to the next tab.
|
||||
*/
|
||||
static int tabulate(int column) {
|
||||
return (column / TabInc * TabInc) + TabInc;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1999, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -265,7 +265,7 @@ public class Position {
|
||||
int column = 0;
|
||||
for (int bp = lineStart; bp < pos; bp++) {
|
||||
if (tabMap.get(bp))
|
||||
column = (column / TabInc * TabInc) + TabInc;
|
||||
column = tabulate(column);
|
||||
else
|
||||
column++;
|
||||
}
|
||||
@ -279,7 +279,7 @@ public class Position {
|
||||
while (col < column) {
|
||||
pos++;
|
||||
if (tabMap.get(pos))
|
||||
col = (col / TabInc * TabInc) + TabInc;
|
||||
col = tabulate(col);
|
||||
else
|
||||
col++;
|
||||
}
|
||||
|
@ -1,2 +1,2 @@
|
||||
Digits.java:11:41: compiler.err.illegal.nonascii.digit
|
||||
Digits.java:11:43: compiler.err.illegal.nonascii.digit
|
||||
1 error
|
||||
|
@ -44,7 +44,6 @@ compiler.err.stack.sim.error
|
||||
compiler.err.type.var.more.than.once # UNUSED
|
||||
compiler.err.type.var.more.than.once.in.result # UNUSED
|
||||
compiler.err.unexpected.type
|
||||
compiler.err.unsupported.cross.fp.lit # Scanner: host system dependent
|
||||
compiler.misc.bad.class.signature # bad class file
|
||||
compiler.misc.bad.const.pool.tag # bad class file
|
||||
compiler.misc.bad.const.pool.tag.at # bad class file
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2016, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,10 +23,10 @@
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8056897
|
||||
* @bug 8056897 8254073
|
||||
* @modules jdk.compiler/com.sun.tools.javac.parser
|
||||
* jdk.compiler/com.sun.tools.javac.util
|
||||
* @summary Proper lexing of integer literals.
|
||||
* @summary Proper lexing of various token kinds.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
@ -43,41 +43,130 @@ import com.sun.tools.javac.parser.Tokens.TokenKind;
|
||||
import com.sun.tools.javac.util.Context;
|
||||
import com.sun.tools.javac.util.Log;
|
||||
|
||||
import static com.sun.tools.javac.parser.Tokens.TokenKind.*;
|
||||
|
||||
public class JavaLexerTest {
|
||||
public static void main(String... args) throws Exception {
|
||||
new JavaLexerTest().run();
|
||||
static final TestTuple[] PASSING_TESTS = {
|
||||
new TestTuple(FLOATLITERAL, "0.0f"),
|
||||
new TestTuple(FLOATLITERAL, "0.0F"),
|
||||
new TestTuple(FLOATLITERAL, ".0F"),
|
||||
new TestTuple(FLOATLITERAL, "0.F"),
|
||||
new TestTuple(FLOATLITERAL, "0E0F"),
|
||||
new TestTuple(FLOATLITERAL, "0E+0F"),
|
||||
new TestTuple(FLOATLITERAL, "0E-0F"),
|
||||
|
||||
new TestTuple(DOUBLELITERAL, "0.0d"),
|
||||
new TestTuple(DOUBLELITERAL, "0.0D"),
|
||||
new TestTuple(DOUBLELITERAL, ".0D"),
|
||||
new TestTuple(DOUBLELITERAL, "0.D"),
|
||||
new TestTuple(DOUBLELITERAL, "0E0D"),
|
||||
new TestTuple(DOUBLELITERAL, "0E+0D"),
|
||||
new TestTuple(DOUBLELITERAL, "0E-0D"),
|
||||
new TestTuple(DOUBLELITERAL, "0x0.0p0d"),
|
||||
new TestTuple(DOUBLELITERAL, "0xff.0p8d"),
|
||||
|
||||
new TestTuple(STRINGLITERAL, "\"\\u2022\""),
|
||||
new TestTuple(STRINGLITERAL, "\"\\b\\t\\n\\f\\r\\\'\\\"\\\\\""),
|
||||
|
||||
new TestTuple(CHARLITERAL, "\'\\b\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\t\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\n\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\f\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\r\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\'\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\\\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\\'\'"),
|
||||
new TestTuple(CHARLITERAL, "\'\\\"\'"),
|
||||
|
||||
new TestTuple(IDENTIFIER, "abc\\u0005def"),
|
||||
};
|
||||
|
||||
static final TestTuple[] FAILING_TESTS = {
|
||||
new TestTuple(LONGLITERAL, "0bL"),
|
||||
new TestTuple(LONGLITERAL, "0b20L"),
|
||||
new TestTuple(LONGLITERAL, "0xL"),
|
||||
new TestTuple(INTLITERAL, "0xG000L", "0x"),
|
||||
|
||||
new TestTuple(DOUBLELITERAL, "0E*0F", "0E"),
|
||||
|
||||
new TestTuple(DOUBLELITERAL, "0E*0D", "0E"),
|
||||
new TestTuple(INTLITERAL, "0xp8d", "0x"),
|
||||
new TestTuple(DOUBLELITERAL, "0x8pd", "0x8pd"),
|
||||
new TestTuple(INTLITERAL, "0xpd", "0x"),
|
||||
|
||||
new TestTuple(ERROR, "\"\\u20\""),
|
||||
new TestTuple(ERROR, "\"\\u\""),
|
||||
new TestTuple(ERROR, "\"\\uG000\""),
|
||||
new TestTuple(ERROR, "\"\\u \""),
|
||||
new TestTuple(ERROR, "\"\\q\""),
|
||||
|
||||
new TestTuple(ERROR, "\'\'"),
|
||||
new TestTuple(ERROR, "\'\\q\'", "\'\\"),
|
||||
};
|
||||
|
||||
static class TestTuple {
|
||||
String input;
|
||||
TokenKind kind;
|
||||
String expected;
|
||||
|
||||
TestTuple(TokenKind kind, String input, String expected) {
|
||||
this.input = input;
|
||||
this.kind = kind;
|
||||
this.expected = expected;
|
||||
}
|
||||
|
||||
TestTuple(TokenKind kind, String input) {
|
||||
this(kind, input, input);
|
||||
}
|
||||
}
|
||||
|
||||
void run() throws Exception {
|
||||
void test(TestTuple test, boolean willFail) throws Exception {
|
||||
Context ctx = new Context();
|
||||
Log log = Log.instance(ctx);
|
||||
String input = "0bL 0b20L 0xL ";
|
||||
|
||||
log.useSource(new SimpleJavaFileObject(new URI("mem://Test.java"), JavaFileObject.Kind.SOURCE) {
|
||||
@Override
|
||||
public CharSequence getCharContent(boolean ignoreEncodingErrors) throws IOException {
|
||||
return input;
|
||||
return test.input;
|
||||
}
|
||||
});
|
||||
char[] inputArr = input.toCharArray();
|
||||
JavaTokenizer tokenizer = new JavaTokenizer(ScannerFactory.instance(ctx), inputArr, inputArr.length) {
|
||||
};
|
||||
|
||||
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0bL");
|
||||
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0b20L");
|
||||
assertKind(input, tokenizer, TokenKind.LONGLITERAL, "0xL");
|
||||
char[] inputArr = test.input.toCharArray();
|
||||
JavaTokenizer tokenizer = new JavaTokenizer(ScannerFactory.instance(ctx), inputArr, inputArr.length) {};
|
||||
Token token = tokenizer.readToken();
|
||||
boolean failed = log.nerrors != 0;
|
||||
boolean normal = failed == willFail;
|
||||
|
||||
if (!normal) {
|
||||
System.err.println("input: " + test.input);
|
||||
String message = willFail ? "Expected to fail: " : "Expected to pass: ";
|
||||
throw new AssertionError(message + test.input);
|
||||
}
|
||||
|
||||
String actual = test.input.substring(token.pos, token.endPos);
|
||||
|
||||
if (token.kind != test.kind) {
|
||||
System.err.println("input: " + test.input);
|
||||
throw new AssertionError("Unexpected token kind: " + token.kind.name());
|
||||
}
|
||||
|
||||
if (!Objects.equals(test.expected, actual)) {
|
||||
System.err.println("input: " + test.input);
|
||||
throw new AssertionError("Unexpected token content: " + actual);
|
||||
}
|
||||
}
|
||||
|
||||
void assertKind(String input, JavaTokenizer tokenizer, TokenKind kind, String expectedText) {
|
||||
Token token = tokenizer.readToken();
|
||||
|
||||
if (token.kind != kind) {
|
||||
throw new AssertionError("Unexpected token kind: " + token.kind);
|
||||
void run() throws Exception {
|
||||
for (TestTuple test : PASSING_TESTS) {
|
||||
test(test, false);
|
||||
}
|
||||
|
||||
String actualText = input.substring(token.pos, token.endPos);
|
||||
|
||||
if (!Objects.equals(actualText, expectedText)) {
|
||||
throw new AssertionError("Unexpected token text: " + actualText);
|
||||
for (TestTuple test : FAILING_TESTS) {
|
||||
test(test, true);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
new JavaLexerTest().run();
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,10 @@
|
||||
NonasciiDigit.java:12:24: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:12:18: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:13:19: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:14:24: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:16:27: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:17:22: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:18:22: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:19:22: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:14:18: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:16:21: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:17:23: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:18:25: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:19:23: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:20:22: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:21:27: compiler.err.illegal.nonascii.digit
|
||||
NonasciiDigit.java:21:21: compiler.err.illegal.nonascii.digit
|
||||
9 errors
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -25,24 +25,48 @@
|
||||
* @test
|
||||
* @bug 4330479
|
||||
* @summary ASCII SUB character is rejected in multi-line comments
|
||||
* @author gafter
|
||||
*
|
||||
* @compile SubChar.java
|
||||
* @library /tools/lib
|
||||
* @modules jdk.compiler/com.sun.tools.javac.api
|
||||
* jdk.compiler/com.sun.tools.javac.main
|
||||
* @build toolbox.ToolBox toolbox.JavacTask
|
||||
* @run main SubChar
|
||||
*/
|
||||
|
||||
import toolbox.JavacTask;
|
||||
import toolbox.JavaTask;
|
||||
import toolbox.Task;
|
||||
import toolbox.ToolBox;
|
||||
|
||||
/*
|
||||
Note: this source file has been crafted very carefully to end with the
|
||||
unicode escape sequence for the control-Z character without a
|
||||
following newline. The scanner is specified to allow control-Z there.
|
||||
If you edit this source file, please make sure that your editor does
|
||||
not insert a newline after that trailing line.
|
||||
*/
|
||||
|
||||
/** \u001A */
|
||||
class SubChar {
|
||||
public static void main(String args[]) {
|
||||
return;
|
||||
}
|
||||
public class SubChar {
|
||||
private static final ToolBox TOOLBOX = new ToolBox();
|
||||
|
||||
private static final String SOURCE = """
|
||||
/*
|
||||
Note: this source file has been crafted very carefully to end with the
|
||||
unicode escape sequence for the control-Z character without a
|
||||
following newline. The scanner is specified to allow control-Z there.
|
||||
If you edit this source file, please make sure that your editor does
|
||||
not insert a newline after that trailing line.
|
||||
*/
|
||||
|
||||
/** \\u001A */
|
||||
class ControlZTest {
|
||||
public static void main(String args[]) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
/* \\u001A */\
|
||||
""";
|
||||
|
||||
public static void main(String... args) {
|
||||
String output = new JavacTask(TOOLBOX)
|
||||
.sources(SOURCE)
|
||||
.classpath(".")
|
||||
.options("-encoding", "utf8")
|
||||
.run()
|
||||
.writeAll()
|
||||
.getOutput(Task.OutputKind.DIRECT);
|
||||
System.out.println(output);
|
||||
}
|
||||
}
|
||||
/* \u001A */
|
||||
|
@ -1,4 +1,4 @@
|
||||
SupplementaryJavaID2.java:12:14: compiler.err.illegal.char: \ud801
|
||||
SupplementaryJavaID2.java:12:20: compiler.err.illegal.char: \ud801
|
||||
SupplementaryJavaID2.java:12:9: compiler.err.illegal.char: \ud801
|
||||
SupplementaryJavaID2.java:12:15: compiler.err.illegal.char: \ud801
|
||||
SupplementaryJavaID2.java:12:24: compiler.err.expected: token.identifier
|
||||
3 errors
|
||||
|
@ -1,3 +1,3 @@
|
||||
SupplementaryJavaID3.java:12:17: compiler.err.illegal.char: \ud801
|
||||
SupplementaryJavaID3.java:12:23: compiler.err.illegal.char: \ud801
|
||||
SupplementaryJavaID3.java:12:12: compiler.err.illegal.char: \ud801
|
||||
SupplementaryJavaID3.java:12:18: compiler.err.illegal.char: \ud801
|
||||
2 errors
|
||||
|
@ -1,2 +1,2 @@
|
||||
SupplementaryJavaID4.java:14:14: compiler.err.illegal.char: \ud834\udd7b
|
||||
SupplementaryJavaID4.java:14:9: compiler.err.illegal.char: \ud834\udd7b
|
||||
1 error
|
||||
|
@ -1,2 +1,2 @@
|
||||
SupplementaryJavaID5.java:14:17: compiler.err.illegal.char: \ud834\udd00
|
||||
SupplementaryJavaID5.java:14:12: compiler.err.illegal.char: \ud834\udd00
|
||||
1 error
|
||||
|
Loading…
x
Reference in New Issue
Block a user