8303912: Clean up JavadocTokenizer

Reviewed-by: vromero, jjg
2023-03-28 16:36:07 +00:00 · 2023-03-28 16:36:07 +00:00 · 1fc218c58b
commit 1fc218c58b
parent c1f5ca115d
4 changed files with 234 additions and 257 deletions
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
@ -307,15 +307,6 @@ public class JavaTokenizer extends UnicodeReader {
        return false;
    }

-    /**
-     * Test if the current character is a line terminator.
-     *
-     * @return true if current character is a line terminator.
-     */
-    private boolean isEOLN() {
-        return isOneOf('\n', '\r');
-    }
-
    /**
     * Skip and process a line terminator sequence.
     */
@ -1094,7 +1085,7 @@ public class JavaTokenizer extends UnicodeReader {
            if (scannerDebug) {
                    System.out.println("nextToken(" + pos
                                       + "," + endPos + ")=|" +
-                                       new String(getRawCharacters(pos, endPos))
+                                       getRawString(pos, endPos)
                                       + "|");
            }
        }
@ -1146,13 +1137,11 @@ public class JavaTokenizer extends UnicodeReader {
        if (scannerDebug) {
            System.out.println("processComment(" + pos
                                + "," + endPos + "," + style + ")=|"
-                                + new String(getRawCharacters(pos, endPos))
+                                + getRawString(pos, endPos)
                                + "|");
        }

-        char[] buf = getRawCharacters(pos, endPos);
-
-        return new BasicComment(style, fac, buf, pos);
+        return new BasicComment(style,this, pos, endPos);
    }

    /**
@ -1167,8 +1156,8 @@ public class JavaTokenizer extends UnicodeReader {
    protected void processWhiteSpace(int pos, int endPos) {
        if (scannerDebug) {
            System.out.println("processWhitespace(" + pos
-                                + "," + endPos + ")=|" +
-                                new String(getRawCharacters(pos, endPos))
+                                + "," + endPos + ")=|"
+                                + getRawString(pos, endPos)
                                + "|");
        }
    }
@ -1182,8 +1171,8 @@ public class JavaTokenizer extends UnicodeReader {
    protected void processLineTerminator(int pos, int endPos) {
        if (scannerDebug) {
            System.out.println("processTerminator(" + pos
-                                + "," + endPos + ")=|" +
-                                new String(getRawCharacters(pos, endPos))
+                                + "," + endPos + ")=|"
+                                + getRawString(pos, endPos)
                                + "|");
        }
    }
@ -1206,9 +1195,6 @@ public class JavaTokenizer extends UnicodeReader {
    protected static class BasicComment extends PositionTrackingReader implements Comment {
        /**
         * Style of comment
-         *   LINE starting with //
-         *   BLOCK starting with /*
-         *   JAVADOC starting with /**
         */
        CommentStyle cs;

@ -1225,13 +1211,13 @@ public class JavaTokenizer extends UnicodeReader {
        /**
         * Constructor.
         *
-         * @param cs      comment style
-         * @param sf      Scan factory.
-         * @param array   Array containing contents of source.
-         * @param offset  Position offset in original source buffer.
+         * @param cs     comment style
+         * @param reader existing reader
+         * @param pos    start of meaningful content in buffer.
+         * @param endPos end of meaningful content in buffer.
         */
-        protected BasicComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
-            super(sf, array, offset);
+        protected BasicComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) {
+            super(reader, pos, endPos);
            this.cs = cs;
        }

@ -1247,8 +1233,7 @@ public class JavaTokenizer extends UnicodeReader {
        /**
         * Return buffer position in original buffer mapped from buffer position in comment.
         *
-         * @param pos  buffer position in comment.
-         *
+         * @param pos buffer position in comment.
         * @return buffer position in original buffer.
         */
        public int getSourcePos(int pos) {
@ -1257,11 +1242,8 @@ public class JavaTokenizer extends UnicodeReader {

        /**
         * Return style of comment.
-         *   LINE starting with //
-         *   BLOCK starting with /*
-         *   JAVADOC starting with /**
         *
-         * @return
+         * @return style of comment.
         */
        public CommentStyle getStyle() {
            return cs;
@ -1273,76 +1255,104 @@ public class JavaTokenizer extends UnicodeReader {
         * @return true if comment contains @deprecated.
         */
        public boolean isDeprecated() {
-            if (!scanned && cs == CommentStyle.JAVADOC) {
+            if (!scanned) {
                scanDocComment();
            }
-
            return deprecatedFlag;
        }

        /**
-         * Scan JAVADOC comment for details.
+         * Detect the deprecated tag.
+         *
+         * @param line line reader
+         *
+         * @return true if deprecated tag is present.
+         */
+        protected boolean hasDeprecated(UnicodeReader line) {
+            return line.accept("@deprecated") &&
+                    (line.isWhitespace() || line.is('*') || line.isEOLN());
+        }
+
+        /**
+         * Remove closing star(s) slash from comment.
+         *
+         * @param line line reader
+         *
+         * @return new line reader if detected otherwise original line reader.
+         */
+        UnicodeReader trimEndOfComment(UnicodeReader line) {
+            int pos = line.position();
+
+            while (line.isAvailable()) {
+                if (line.is('*')) {
+                    int endPos = line.position();
+                    line.skip('*');
+
+                    if (line.is('/')) {
+                        return line.lineReader(pos, endPos);
+                    }
+                } else {
+                    line.next();
+                }
+            }
+
+            line.reset(pos);
+
+            return line;
+        }
+
+        /**
+         * Trim the first part of the JavaDoc comment.
+         *
+         * @param line line reader
+         *
+         * @return modified line reader
+         */
+        UnicodeReader trimJavadocComment(UnicodeReader line) {
+            int pos = line.position();
+            line.skipWhitespace();
+
+            if (line.skip('*') == 0) {
+                line.reset(pos);
+            }
+
+            return trimEndOfComment(line);
+        }
+
+        /**
+         * Put the line into the buffer.
+         *
+         * @param line line reader
+         */
+        protected void putLine(UnicodeReader line) {
+            // ignore overridden in subclass
+        }
+
+        /**
+         * Scan document comment for content.
         */
        protected void scanDocComment() {
-            try {
-                boolean deprecatedPrefix = false;
-                accept("/**");
-
-                forEachLine:
-                while (isAvailable()) {
-                    // Skip optional WhiteSpace at beginning of line
-                    skipWhitespace();
-
-                    // Skip optional consecutive Stars
-                    while (accept('*')) {
-                        if (is('/')) {
-                            return;
-                        }
-                    }
-
-                    // Skip optional WhiteSpace after Stars
-                    skipWhitespace();
-
-                    // At beginning of line in the JavaDoc sense.
-                    deprecatedPrefix = deprecatedFlag || accept("@deprecated");
-
-                    if (deprecatedPrefix && isAvailable()) {
-                        if (Character.isWhitespace(get())) {
-                            deprecatedFlag = true;
-                        } else if (accept('*')) {
-                            if (is('/')) {
-                                deprecatedFlag = true;
-                                return;
-                            }
-                        }
-                    }
-
-                    // Skip rest of line
-                    while (isAvailable()) {
-                        switch (get()) {
-                            case '*':
-                                next();
-
-                                if (is('/')) {
-                                    return;
-                                }
-
-                                break;
-                            case '\r': // (Spec 3.4)
-                            case '\n': // (Spec 3.4)
-                                accept('\r');
-                                accept('\n');
-                                continue forEachLine;
-
-                            default:
-                                next();
-                                break;
-                        }
-                    } // rest of line
-                } // forEachLine
-                return;
-            } finally {
+            if (!scanned) {
+                deprecatedFlag = false;
                scanned = true;
+
+                if (!accept("/**")) {
+                    return;
+                }
+
+                while (isAvailable()) {
+                    UnicodeReader line = lineReader();
+                    line = trimJavadocComment(line);
+
+                    // If standalone @deprecated tag
+                    int pos = line.position();
+                    if (hasDeprecated(line)) {
+                        deprecatedFlag = true;
+                    }
+
+                    line.reset(pos);
+                    putLine(line);
+                }
            }
        }
    }
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavadocTokenizer.java
@ -79,8 +79,7 @@ public class JavadocTokenizer extends JavaTokenizer {

    @Override
    protected Comment processComment(int pos, int endPos, CommentStyle style) {
-        char[] buf = getRawCharacters(pos, endPos);
-        return new JavadocComment(style, fac, buf, pos);
+        return new JavadocComment(style, this, pos, endPos);
    }

    /**
@ -88,13 +87,6 @@ public class JavadocTokenizer extends JavaTokenizer {
     * of a Javadoc comment.
     */
    protected static class JavadocComment extends BasicComment {
-        /**
-         * Pattern used to detect a well formed @deprecated tag in a Javadoc
-         * comment.
-         */
-        private static final Pattern DEPRECATED_PATTERN =
-            Pattern.compile("(?sm).*^\\s*@deprecated( |$).*");
-
        /**
         * The relevant portion of the comment that is of interest to Javadoc.
         * Produced by invoking scanDocComment.
@ -112,45 +104,35 @@ public class JavadocTokenizer extends JavaTokenizer {
         */
        OffsetMap offsetMap = new OffsetMap();

-        JavadocComment(CommentStyle cs, ScannerFactory sf, char[] array, int offset) {
-            super( cs, sf, array, offset);
+        JavadocComment(CommentStyle cs, UnicodeReader reader, int pos, int endPos) {
+            super(cs, reader, pos, endPos);
            this.sb = new StringBuilder();
        }

        /**
-         * Add a character to the extraction buffer.
+         * Add current character or code point from line to the extraction buffer.
         *
-         * @param ch  character to add.
+         * @param line line reader
         */
-        protected void put(char ch) {
-            offsetMap.add(sb.length(), offsetPosition());
-            sb.append(ch);
-        }
+        protected void putLine(UnicodeReader line) {
+            while (line.isAvailable()) {
+                offsetMap.add(sb.length(), line.position());

-        /**
-         * Add a code point to the extraction buffer.
-         *
-         * @param codePoint  code point to add.
-         */
-        protected void putCodePoint(int codePoint) {
-            offsetMap.add(sb.length(), offsetPosition());
-            sb.appendCodePoint(codePoint);
-        }
+                if (line.isSurrogate()) {
+                    sb.appendCodePoint(line.getCodepoint());
+                } else {
+                    sb.append(line.get());
+                }

-        /**
-         * Add current character or code point to the extraction buffer.
-         */
-        protected void put() {
-            if (isSurrogate()) {
-                putCodePoint(getCodepoint());
-            } else {
-                put(get());
+                line.next();
            }
+            offsetMap.add(sb.length(), line.position());
+            sb.append('\n');
        }

        @Override
        public String getText() {
-            if (!scanned && cs == CommentStyle.JAVADOC) {
+            if (!scanned) {
                scanDocComment();
            }
            return docComment;
@ -171,104 +153,10 @@ public class JavadocTokenizer extends JavaTokenizer {

        @Override
        protected void scanDocComment() {
-             try {
-                 boolean firstLine = true;
-
-                 // Skip over /*
-                 accept("/*");
-
-                 // Consume any number of stars
-                 skip('*');
-
-                 // Is the comment in the form /**/, /***/, /****/, etc. ?
-                 if (is('/')) {
-                     docComment = "";
-                     return;
-                 }
-
-                 // Skip line terminator on the first line of the comment.
-                 if (isOneOf('\n', '\r')) {
-                     accept('\r');
-                     accept('\n');
-                     firstLine = false;
-                 }
-
-             outerLoop:
-                 // The outerLoop processes the doc comment, looping once
-                 // for each line.  For each line, it first strips off
-                 // whitespace, then it consumes any stars, then it
-                 // puts the rest of the line into the extraction buffer.
-                 while (isAvailable()) {
-                     int begin_pos = position();
-                     // Consume  whitespace from the beginning of each line.
-                     skipWhitespace();
-                     // Are there stars here?  If so, consume them all
-                     // and check for the end of comment.
-                     if (is('*')) {
-                         // skip all of the stars
-                         skip('*');
-
-                         // check for the closing slash.
-                         if (accept('/')) {
-                             // We're done with the Javadoc comment
-                             break outerLoop;
-                         }
-                     } else if (!firstLine) {
-                         // The current line does not begin with a '*' so we will
-                         // treat it as comment
-                         reset(begin_pos);
-                     }
-
-                 textLoop:
-                     // The textLoop processes the rest of the characters
-                     // on the line, adding them to the extraction buffer.
-                     while (isAvailable()) {
-                         if (accept("*/")) {
-                             // This is the end of the comment, return
-                             // the contents of the extraction buffer.
-                             break outerLoop;
-                         } else if (isOneOf('\n', '\r')) {
-                             // We've seen a newline.  Add it to our
-                             // buffer and break out of this loop,
-                             // starting fresh on a new line.
-                             put('\n');
-                             accept('\r');
-                             accept('\n');
-                             break textLoop;
-                         } else if (is('\f')){
-                             next();
-                             break textLoop; // treat as end of line
-
-                         } else {
-                             // Add the character to our buffer.
-                             put();
-                             next();
-                         }
-                     } // end textLoop
-                     firstLine = false;
-                 } // end outerLoop
-
-                 // If extraction buffer is not empty.
-                 if (sb.length() > 0) {
-                     // Remove trailing asterisks.
-                     int i = sb.length() - 1;
-                     while (i > -1 && sb.charAt(i) == '*') {
-                         i--;
-                     }
-                     sb.setLength(i + 1) ;
-
-                     // Store the text of the doc comment
-                    docComment = sb.toString();
-                 } else {
-                    docComment = "";
-                }
+            try {
+                super.scanDocComment();
            } finally {
-                scanned = true;
-
-                // Check if comment contains @deprecated comment.
-                if (docComment != null && DEPRECATED_PATTERN.matcher(docComment).matches()) {
-                    deprecatedFlag = true;
-                }
+                docComment = sb.toString();
            }
        }
    }
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/Tokens.java
@ -273,9 +273,9 @@ public class Tokens {
    public interface Comment {

        enum CommentStyle {
-            LINE,
-            BLOCK,
-            JAVADOC,
+            LINE,       // Starting with //
+            BLOCK,      // starting with /*
+            JAVADOC,    // starting with /**
        }

        String getText();
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
@ -104,15 +104,39 @@ public class UnicodeReader {
     */
    @SuppressWarnings("this-escape")
    protected UnicodeReader(ScannerFactory sf, char[] array, int length) {
+        this(sf.log, array, length);
+    }
+
+    /**
+     * Constructor.
+     *
+     * @param log     Log for error reporting.
+     * @param array   array containing contents of source.
+     * @param length  length of meaningful content in buffer.
+     */
+    protected UnicodeReader(Log log, char[] array, int length) {
+        this(log, array, 0, length);
+    }
+
+    /**
+      * Constructor.
+      *
+      * @param log     Log for error reporting.
+      * @param array   array containing contents of source.
+      * @param pos     start of meaningful content in buffer.
+      * @param endPos  end of meaningful content in buffer.
+      */
+    @SuppressWarnings("this-escape")
+    protected UnicodeReader(Log log, char[] array, int pos, int endPos) {
        this.buffer = array;
-        this.length = length;
-        this.position = 0;
+        this.length = endPos;
+        this.position = pos;
        this.width = 0;
        this.character = '\0';
        this.codepoint = 0;
        this.wasBackslash = false;
        this.wasUnicodeEscape = false;
-        this.log = sf.log;
+        this.log = log;

        nextCodePoint();
    }
@ -426,25 +450,68 @@ public class UnicodeReader {
        return false;
    }

+    /**
+     * Match one of the arguments and advance if a match. Returns true if a match.
+     */
    protected boolean acceptOneOf(char ch1, char ch2, char ch3) {
        if (isOneOf(ch1, ch2, ch3)) {
            next();

            return true;
        }
-
        return false;
    }

+    /**
+     * Return a reader which is bracketed by the currect position
+     * and the next line terminator.
+     *
+     * @return a new reader
+     */
+    protected UnicodeReader lineReader() {
+        int pos = position;
+        skipToEOLN();
+        int endPos = position;
+        accept('\r');
+        accept('\n');
+        return lineReader(pos, endPos);
+    }
+
+    /**
+     * Return a reader which is bracketed by the {@code pos}
+     * and {@code endPos}.
+     *
+     * @param pos     initial position
+     * @param endPos  end position
+     *
+     * @return a new reader
+     */
+    protected UnicodeReader lineReader(int pos, int endPos) {
+        return new UnicodeReader(log, buffer, pos, endPos);
+    }
+
    /**
     * Skip over all occurrences of character.
     *
     * @param ch character to accept.
+     *
+     * @return number of characters skipped
     */
-    protected void skip(char ch) {
+    protected int skip(char ch) {
+        int count = 0;
        while (accept(ch)) {
-            // next
+            count++;
        }
+        return count;
+    }
+
+    /**
+     * Is ASCII white space character.
+     *
+     * @return true if is ASCII white space character
+     */
+    protected boolean isWhitespace() {
+        return isOneOf(' ', '\t', '\f');
    }

    /**
@ -456,18 +523,26 @@ public class UnicodeReader {
        }
    }

+    /**
+     * Is ASCII line terminator.
+     *
+     * @return true if is ASCII white space character
+     */
+    protected boolean isEOLN() {
+        return isOneOf('\r', '\n');
+    }
+
    /**
     * Skip to end of line.
     */
    protected void skipToEOLN() {
        while (isAvailable()) {
-            if (isOneOf('\r', '\n')) {
+            if (isEOLN()) {
                break;
            }

            next();
        }
-
    }

    /**
@ -565,6 +640,25 @@ public class UnicodeReader {
        return Arrays.copyOfRange(buffer, beginIndex, endIndex);
    }

+    /**
+     * Returns a string subset of the input buffer.
+     * The returned string begins at the {@code beginIndex} and
+     * extends to the character at index {@code endIndex - 1}.
+     * Thus the length of the substring is {@code endIndex-beginIndex}.
+     * This behavior is like
+     * {@code String.substring(beginIndex, endIndex)}.
+     * Unicode escape sequences are not translated.
+     *
+     * @param  beginIndex the beginning index, inclusive.
+     * @param  endIndex the ending index, exclusive.
+     *
+     * @throws ArrayIndexOutOfBoundsException if either offset is outside of the
+     *         array bounds
+     */
+    public String getRawString(int beginIndex, int endIndex) {
+        return new String(buffer, beginIndex, endIndex - beginIndex);
+    }
+
    /**
     * This is a specialized version of UnicodeReader that keeps track of the
     * column position within a given character stream. Used for Javadoc
@ -572,11 +666,6 @@ public class UnicodeReader {
     * to positions in the source file.
     */
    static class PositionTrackingReader extends UnicodeReader {
-        /**
-         * Offset from the beginning of the original reader buffer.
-         */
-        private final int offset;
-
        /**
         * Current column in the comment.
         */
@ -585,13 +674,12 @@ public class UnicodeReader {
        /**
         * Constructor.
         *
-         * @param sf      Scan factory.
-         * @param array   Array containing contents of source.
-         * @param offset  Position offset in original source buffer.
+         * @param reader  existing reader
+         * @param pos     start of meaningful content in buffer.
+         * @param endPos  end of meaningful content in buffer.
         */
-        protected PositionTrackingReader(ScannerFactory sf, char[] array, int offset) {
-            super(sf, array, array.length);
-            this.offset = offset;
+        protected PositionTrackingReader(UnicodeReader reader, int pos, int endPos) {
+            super(reader.log, reader.buffer, pos, endPos);
            this.column = 0;
        }

@ -623,15 +711,6 @@ public class UnicodeReader {
        protected int column() {
            return column;
        }
-
-        /**
-         * Returns position relative to the original source buffer.
-         *
-         * @return
-         */
-        protected int offsetPosition() {
-            return position() + offset;
-        }
    }

 }