8303912: Clean up JavadocTokenizer

Reviewed-by: vromero, jjg
This commit is contained in:
Jim Laskey 2023-03-28 16:36:07 +00:00
parent c1f5ca115d
commit 1fc218c58b
4 changed files with 234 additions and 257 deletions
src/jdk.compiler/share/classes/com/sun/tools/javac/parser

@ -307,15 +307,6 @@ public class JavaTokenizer extends UnicodeReader {
return false;
}
/**
* Test if the current character is a line terminator.
*
* @return true if current character is a line terminator.
*/
private boolean isEOLN() {
return isOneOf('\n', '\r');
}
/**
* Skip and process a line terminator sequence.
*/
@ -1094,7 +1085,7 @@ public class JavaTokenizer extends UnicodeReader {
if (scannerDebug) {
System.out.println("nextToken(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
getRawString(pos, endPos)
+ "|");
}
}
@ -1146,13 +1137,11 @@ public class JavaTokenizer extends UnicodeReader {
if (scannerDebug) {
System.out.println("processComment(" + pos
+ "," + endPos + "," + style + ")=|"
+ new String(getRawCharacters(pos, endPos))
+ getRawString(pos, endPos)
+ "|");
}
char[] buf = getRawCharacters(pos, endPos);
return new BasicComment(style, fac, buf, pos);
return new BasicComment(style,this, pos, endPos);
}
/**
@ -1167,8 +1156,8 @@ public class JavaTokenizer extends UnicodeReader {
protected void processWhiteSpace(int pos, int endPos) {
if (scannerDebug) {
System.out.println("processWhitespace(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "," + endPos + ")=|"
+ getRawString(pos, endPos)
+ "|");
}
}
@ -1182,8 +1171,8 @@ public class JavaTokenizer extends UnicodeReader {
protected void processLineTerminator(int pos, int endPos) {
if (scannerDebug) {
System.out.println("processTerminator(" + pos
+ "," + endPos + ")=|" +
new String(getRawCharacters(pos, endPos))
+ "," + endPos + ")=|"
+ getRawString(pos, endPos)
+ "|");
}
}
@ -1206,9 +1195,6 @@ public class JavaTokenizer extends UnicodeReader {
protected static class BasicComment extends PositionTrackingReader implements Comment {
/**
* Style of comment
* LINE starting with //
* BLOCK starting with /*
* JAVADOC starting with /**
*/
CommentStyle cs;
@ -1225,13 +1211,13 @@ public class JavaTokenizer extends UnicodeReader {
/**
* Constructor.
*
* @param cs comment style
* @param sf Scan factory.
* @param array Array containing contents of source.
* @param offset Position offset in original source buffer.
* @param cs comment style
* @param reader existing reader
* @param pos start of meaningful content in buffer.
* @param endPos end of meaningful content in buffer.
*/
protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
super(sf, array, offset);
protected BasicComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) {
super(reader, pos, endPos);
this.cs = cs;
}
@ -1247,8 +1233,7 @@ public class JavaTokenizer extends UnicodeReader {
/**
* Return buffer position in original buffer mapped from buffer position in comment.
*
* @param pos buffer position in comment.
*
* @param pos buffer position in comment.
* @return buffer position in original buffer.
*/
public int getSourcePos(int pos) {
@ -1257,11 +1242,8 @@ public class JavaTokenizer extends UnicodeReader {
/**
* Return style of comment.
* LINE starting with //
* BLOCK starting with /*
* JAVADOC starting with /**
*
* @return
* @return style of comment.
*/
public CommentStyle getStyle() {
return cs;
@ -1273,76 +1255,104 @@ public class JavaTokenizer extends UnicodeReader {
* @return true if comment contains @deprecated.
*/
public boolean isDeprecated() {
if (!scanned && cs == CommentStyle.JAVADOC) {
if (!scanned) {
scanDocComment();
}
return deprecatedFlag;
}
/**
* Scan JAVADOC comment for details.
* Detect the deprecated tag.
*
* @param line line reader
*
* @return true if deprecated tag is present.
*/
protected boolean hasDeprecated(UnicodeReader line) {
return line.accept("@deprecated") &&
(line.isWhitespace() || line.is('*') || line.isEOLN());
}
/**
* Remove closing star(s) slash from comment.
*
* @param line line reader
*
* @return new line reader if detected otherwise original line reader.
*/
UnicodeReader trimEndOfComment(UnicodeReader line) {
int pos = line.position();
while (line.isAvailable()) {
if (line.is('*')) {
int endPos = line.position();
line.skip('*');
if (line.is('/')) {
return line.lineReader(pos, endPos);
}
} else {
line.next();
}
}
line.reset(pos);
return line;
}
/**
* Trim the first part of the JavaDoc comment.
*
* @param line line reader
*
* @return modified line reader
*/
UnicodeReader trimJavadocComment(UnicodeReader line) {
int pos = line.position();
line.skipWhitespace();
if (line.skip('*') == 0) {
line.reset(pos);
}
return trimEndOfComment(line);
}
/**
* Put the line into the buffer.
*
* @param line line reader
*/
protected void putLine(UnicodeReader line) {
// ignore overridden in subclass
}
/**
* Scan document comment for content.
*/
protected void scanDocComment() {
try {
boolean deprecatedPrefix = false;
accept("/**");
forEachLine:
while (isAvailable()) {
// Skip optional WhiteSpace at beginning of line
skipWhitespace();
// Skip optional consecutive Stars
while (accept('*')) {
if (is('/')) {
return;
}
}
// Skip optional WhiteSpace after Stars
skipWhitespace();
// At beginning of line in the JavaDoc sense.
deprecatedPrefix = deprecatedFlag || accept("@deprecated");
if (deprecatedPrefix && isAvailable()) {
if (Character.isWhitespace(get())) {
deprecatedFlag = true;
} else if (accept('*')) {
if (is('/')) {
deprecatedFlag = true;
return;
}
}
}
// Skip rest of line
while (isAvailable()) {
switch (get()) {
case '*':
next();
if (is('/')) {
return;
}
break;
case '\r': // (Spec 3.4)
case '\n': // (Spec 3.4)
accept('\r');
accept('\n');
continue forEachLine;
default:
next();
break;
}
} // rest of line
} // forEachLine
return;
} finally {
if (!scanned) {
deprecatedFlag = false;
scanned = true;
if (!accept("/**")) {
return;
}
while (isAvailable()) {
UnicodeReader line = lineReader();
line = trimJavadocComment(line);
// If standalone @deprecated tag
int pos = line.position();
if (hasDeprecated(line)) {
deprecatedFlag = true;
}
line.reset(pos);
putLine(line);
}
}
}
}

@ -79,8 +79,7 @@ public class JavadocTokenizer extends JavaTokenizer {
@Override
protected Comment processComment(int pos, int endPos, CommentStyle style) {
char[] buf = getRawCharacters(pos, endPos);
return new JavadocComment(style, fac, buf, pos);
return new JavadocComment(style, this, pos, endPos);
}
/**
@ -88,13 +87,6 @@ public class JavadocTokenizer extends JavaTokenizer {
* of a Javadoc comment.
*/
protected static class JavadocComment extends BasicComment {
/**
* Pattern used to detect a well formed @deprecated tag in a Javadoc
* comment.
*/
private static final Pattern DEPRECATED_PATTERN =
Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
/**
* The relevant portion of the comment that is of interest to Javadoc.
* Produced by invoking scanDocComment.
@ -112,45 +104,35 @@ public class JavadocTokenizer extends JavaTokenizer {
*/
OffsetMap offsetMap = new OffsetMap();
JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
super( cs, sf, array, offset);
JavadocComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) {
super(cs, reader, pos, endPos);
this.sb = new StringBuilder();
}
/**
* Add a character to the extraction buffer.
* Add current character or code point from line to the extraction buffer.
*
* @param ch character to add.
* @param line line reader
*/
protected void put(char ch) {
offsetMap.add(sb.length(), offsetPosition());
sb.append(ch);
}
protected void putLine(UnicodeReader line) {
while (line.isAvailable()) {
offsetMap.add(sb.length(), line.position());
/**
* Add a code point to the extraction buffer.
*
* @param codePoint code point to add.
*/
protected void putCodePoint(int codePoint) {
offsetMap.add(sb.length(), offsetPosition());
sb.appendCodePoint(codePoint);
}
if (line.isSurrogate()) {
sb.appendCodePoint(line.getCodepoint());
} else {
sb.append(line.get());
}
/**
* Add current character or code point to the extraction buffer.
*/
protected void put() {
if (isSurrogate()) {
putCodePoint(getCodepoint());
} else {
put(get());
line.next();
}
offsetMap.add(sb.length(), line.position());
sb.append('\n');
}
@Override
public String getText() {
if (!scanned && cs == CommentStyle.JAVADOC) {
if (!scanned) {
scanDocComment();
}
return docComment;
@ -171,104 +153,10 @@ public class JavadocTokenizer extends JavaTokenizer {
@Override
protected void scanDocComment() {
try {
boolean firstLine = true;
// Skip over /*
accept("/*");
// Consume any number of stars
skip('*');
// Is the comment in the form /**/, /***/, /****/, etc. ?
if (is('/')) {
docComment = "";
return;
}
// Skip line terminator on the first line of the comment.
if (isOneOf('\n', '\r')) {
accept('\r');
accept('\n');
firstLine = false;
}
outerLoop:
// The outerLoop processes the doc comment, looping once
// for each line. For each line, it first strips off
// whitespace, then it consumes any stars, then it
// puts the rest of the line into the extraction buffer.
while (isAvailable()) {
int begin_pos = position();
// Consume whitespace from the beginning of each line.
skipWhitespace();
// Are there stars here? If so, consume them all
// and check for the end of comment.
if (is('*')) {
// skip all of the stars
skip('*');
// check for the closing slash.
if (accept('/')) {
// We're done with the Javadoc comment
break outerLoop;
}
} else if (!firstLine) {
// The current line does not begin with a '*' so we will
// treat it as comment
reset(begin_pos);
}
textLoop:
// The textLoop processes the rest of the characters
// on the line, adding them to the extraction buffer.
while (isAvailable()) {
if (accept("*/")) {
// This is the end of the comment, return
// the contents of the extraction buffer.
break outerLoop;
} else if (isOneOf('\n', '\r')) {
// We've seen a newline. Add it to our
// buffer and break out of this loop,
// starting fresh on a new line.
put('\n');
accept('\r');
accept('\n');
break textLoop;
} else if (is('\f')){
next();
break textLoop; // treat as end of line
} else {
// Add the character to our buffer.
put();
next();
}
} // end textLoop
firstLine = false;
} // end outerLoop
// If extraction buffer is not empty.
if (sb.length() > 0) {
// Remove trailing asterisks.
int i = sb.length() - 1;
while (i > -1 && sb.charAt(i) == '*') {
i--;
}
sb.setLength(i + 1) ;
// Store the text of the doc comment
docComment = sb.toString();
} else {
docComment = "";
}
try {
super.scanDocComment();
} finally {
scanned = true;
// Check if comment contains @deprecated comment.
if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) {
deprecatedFlag = true;
}
docComment = sb.toString();
}
}
}

@ -273,9 +273,9 @@ public class Tokens {
public interface Comment {
enum CommentStyle {
LINE,
BLOCK,
JAVADOC,
LINE, // Starting with //
BLOCK, // starting with /*
JAVADOC, // starting with /**
}
String getText();

@ -104,15 +104,39 @@ public class UnicodeReader {
*/
@SuppressWarnings("this-escape")
protected UnicodeReader(ScannerFactory sf, char[] array, int length) {
this(sf.log, array, length);
}
/**
* Constructor.
*
* @param log Log for error reporting.
* @param array array containing contents of source.
* @param length length of meaningful content in buffer.
*/
protected UnicodeReader(Log log, char[] array, int length) {
this(log, array, 0, length);
}
/**
* Constructor.
*
* @param log Log for error reporting.
* @param array array containing contents of source.
* @param pos start of meaningful content in buffer.
* @param endPos end of meaningful content in buffer.
*/
@SuppressWarnings("this-escape")
protected UnicodeReader(Log log, char[] array, int pos, int endPos) {
this.buffer = array;
this.length = length;
this.position = 0;
this.length = endPos;
this.position = pos;
this.width = 0;
this.character = '\0';
this.codepoint = 0;
this.wasBackslash = false;
this.wasUnicodeEscape = false;
this.log = sf.log;
this.log = log;
nextCodePoint();
}
@ -426,25 +450,68 @@ public class UnicodeReader {
return false;
}
/**
* Match one of the arguments and advance if a match. Returns true if a match.
*/
protected boolean acceptOneOf(char ch1, char ch2, char ch3) {
if (isOneOf(ch1, ch2, ch3)) {
next();
return true;
}
return false;
}
/**
* Return a reader which is bracketed by the currect position
* and the next line terminator.
*
* @return a new reader
*/
protected UnicodeReader lineReader() {
int pos = position;
skipToEOLN();
int endPos = position;
accept('\r');
accept('\n');
return lineReader(pos, endPos);
}
/**
* Return a reader which is bracketed by the {@code pos}
* and {@code endPos}.
*
* @param pos initial position
* @param endPos end position
*
* @return a new reader
*/
protected UnicodeReader lineReader(int pos, int endPos) {
return new UnicodeReader(log, buffer, pos, endPos);
}
/**
* Skip over all occurrences of character.
*
* @param ch character to accept.
*
* @return number of characters skipped
*/
protected void skip(char ch) {
protected int skip(char ch) {
int count = 0;
while (accept(ch)) {
// next
count++;
}
return count;
}
/**
* Is ASCII white space character.
*
* @return true if is ASCII white space character
*/
protected boolean isWhitespace() {
return isOneOf(' ', '\t', '\f');
}
/**
@ -456,18 +523,26 @@ public class UnicodeReader {
}
}
/**
* Is ASCII line terminator.
*
* @return true if is ASCII white space character
*/
protected boolean isEOLN() {
return isOneOf('\r', '\n');
}
/**
* Skip to end of line.
*/
protected void skipToEOLN() {
while (isAvailable()) {
if (isOneOf('\r', '\n')) {
if (isEOLN()) {
break;
}
next();
}
}
/**
@ -565,6 +640,25 @@ public class UnicodeReader {
return Arrays.copyOfRange(buffer, beginIndex, endIndex);
}
/**
* Returns a string subset of the input buffer.
* The returned string begins at the {@code beginIndex} and
* extends to the character at index {@code endIndex - 1}.
* Thus the length of the substring is {@code endIndex-beginIndex}.
* This behavior is like
* {@code String.substring(beginIndex, endIndex)}.
* Unicode escape sequences are not translated.
*
* @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive.
*
* @throws ArrayIndexOutOfBoundsException if either offset is outside of the
* array bounds
*/
public String getRawString(int beginIndex, int endIndex) {
return new String(buffer, beginIndex, endIndex - beginIndex);
}
/**
* This is a specialized version of UnicodeReader that keeps track of the
* column position within a given character stream. Used for Javadoc
@ -572,11 +666,6 @@ public class UnicodeReader {
* to positions in the source file.
*/
static class PositionTrackingReader extends UnicodeReader {
/**
* Offset from the beginning of the original reader buffer.
*/
private final int offset;
/**
* Current column in the comment.
*/
@ -585,13 +674,12 @@ public class UnicodeReader {
/**
* Constructor.
*
* @param sf Scan factory.
* @param array Array containing contents of source.
* @param offset Position offset in original source buffer.
* @param reader existing reader
* @param pos start of meaningful content in buffer.
* @param endPos end of meaningful content in buffer.
*/
protected PositionTrackingReader(ScannerFactory sf, char[] array, int offset) {
super(sf, array, array.length);
this.offset = offset;
protected PositionTrackingReader(UnicodeReader reader, int pos, int endPos) {
super(reader.log, reader.buffer, pos, endPos);
this.column = 0;
}
@ -623,15 +711,6 @@ public class UnicodeReader {
protected int column() {
return column;
}
/**
* Returns position relative to the original source buffer.
*
* @return
*/
protected int offsetPosition() {
return position() + offset;
}
}
}