8233116: Escape Sequences For Line Continuation and White Space (Preview)

Reviewed-by: vromero, jlahoda, bchristi, mcimadamore
2019-12-03 08:35:21 -04:00 · 2019-12-03 08:35:21 -04:00 · 234f326d79
commit 234f326d79
parent 7afaaf1229
6 changed files with 249 additions and 138 deletions
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@ -3062,6 +3062,11 @@ public final class String
     *     <td>{@code U+000D}</td>
     *   </tr>
     *   <tr>
+     *     <th scope="row">{@code \u005Cs}</th>
+     *     <td>space</td>
+     *     <td>{@code U+0020}</td>
+     *   </tr>
+     *   <tr>
     *     <th scope="row">{@code \u005C"}</th>
     *     <td>double quote</td>
     *     <td>{@code U+0022}</td>
@ -3081,6 +3086,11 @@ public final class String
     *     <td>octal escape</td>
     *     <td>code point equivalents</td>
     *   </tr>
+     *   <tr>
+     *     <th scope="row">{@code \u005C<line-terminator>}</th>
+     *     <td>continuation</td>
+     *     <td>discard</td>
+     *   </tr>
     *   </tbody>
     * </table>
     *
@ -3124,6 +3134,9 @@ public final class String
                case 'r':
                    ch = '\r';
                    break;
+                case 's':
+                    ch = ' ';
+                    break;
                case 't':
                    ch = '\t';
                    break;
@ -3146,6 +3159,13 @@ public final class String
                    }
                    ch = (char)code;
                    break;
+                case '\n':
+                    continue;
+                case '\r':
+                    if (from < length && chars[from] == '\n') {
+                        from++;
+                    }
+                    continue;
                default: {
                    String msg = String.format(
                        "Invalid escape sequence: \\%c \\\\u%04X",
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/JavaTokenizer.java
@ -91,17 +91,13 @@ public class JavaTokenizer {
     */
    protected UnicodeReader reader;

-    /** Should the string stripped of indentation?
+    /** If is a text block
     */
-    protected boolean shouldStripIndent;
+    protected boolean isTextBlock;

-    /** Should the string's escapes be translated?
+    /** If contains escape sequences
     */
-    protected boolean shouldTranslateEscapes;
-
-    /** Has the string broken escapes?
-     */
-    protected boolean hasBrokenEscapes;
+    protected boolean hasEscapeSequences;

    protected ScannerFactory fac;

@ -181,91 +177,72 @@ public class JavaTokenizer {
    }

    /** Read next character in character or string literal and copy into sbuf.
+     *      pos - start of literal offset
+     *      translateEscapesNow - true if String::translateEscapes is not available
+     *                            in the java.base libs. Occurs during bootstrapping.
+     *      multiline - true if scanning a text block. Allows newlines to be embedded
+     *                  in the result.
     */
-    private void scanLitChar(int pos) {
-        if (reader.ch == '\\') {
+    private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
+         if (reader.ch == '\\') {
            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
                reader.skipChar();
-                reader.putChar('\\', true);
+                if (!translateEscapesNow) {
+                    reader.putChar(false);
+                }
+                reader.putChar(true);
            } else {
-                reader.scanChar();
+                reader.nextChar(translateEscapesNow);
                switch (reader.ch) {
                case '0': case '1': case '2': case '3':
                case '4': case '5': case '6': case '7':
                    char leadch = reader.ch;
                    int oct = reader.digit(pos, 8);
-                    reader.scanChar();
+                    reader.nextChar(translateEscapesNow);
                    if ('0' <= reader.ch && reader.ch <= '7') {
                        oct = oct * 8 + reader.digit(pos, 8);
-                        reader.scanChar();
+                        reader.nextChar(translateEscapesNow);
                        if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
                            oct = oct * 8 + reader.digit(pos, 8);
-                            reader.scanChar();
+                            reader.nextChar(translateEscapesNow);
                        }
                    }
-                    reader.putChar((char)oct);
-                    break;
-                case 'b':
-                    reader.putChar('\b', true); break;
-                case 't':
-                    reader.putChar('\t', true); break;
-                case 'n':
-                    reader.putChar('\n', true); break;
-                case 'f':
-                    reader.putChar('\f', true); break;
-                case 'r':
-                    reader.putChar('\r', true); break;
-                case '\'':
-                    reader.putChar('\'', true); break;
-                case '\"':
-                    reader.putChar('\"', true); break;
-                case '\\':
-                    reader.putChar('\\', true); break;
-                default:
-                    lexError(reader.bp, Errors.IllegalEscChar);
-                }
-            }
-        } else if (reader.bp != reader.buflen) {
-            reader.putChar(true);
-        }
-    }
-
-    /** Read next character in character or string literal and copy into sbuf
-     *  without translating escapes. Used by text blocks to preflight verify
-     *  escapes sequences.
-     */
-    private void scanLitCharRaw(int pos) {
-        if (reader.ch == '\\') {
-            if (reader.peekChar() == '\\' && !reader.isUnicode()) {
-                reader.skipChar();
-                reader.putChar('\\', false);
-                reader.putChar('\\', true);
-            } else {
-                reader.putChar('\\', true);
-                switch (reader.ch) {
-                case '0': case '1': case '2': case '3':
-                case '4': case '5': case '6': case '7':
-                    char leadch = reader.ch;
-                    reader.putChar(true);
-                    if ('0' <= reader.ch && reader.ch <= '7') {
-                        reader.putChar(true);
-                        if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
-                            reader.putChar(true);
-                        }
+                    if (translateEscapesNow) {
+                        reader.putChar((char)oct);
                    }
                    break;
-                // Effectively list of valid escape sequences.
                case 'b':
+                    reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
                case 't':
+                    reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
                case 'n':
+                    reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
                case 'f':
+                    reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
                case 'r':
+                    reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
                case '\'':
                case '\"':
                case '\\':
                    reader.putChar(true); break;
+                case 's':
+                    checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
+                    reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
+                case '\n':
+                case '\r':
+                    if (!multiline) {
+                        lexError(reader.bp, Errors.IllegalEscChar);
+                    } else {
+                        int start = reader.bp;
+                        checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
+                        if (reader.ch == '\r' && reader.peekChar() == '\n') {
+                           reader.nextChar(translateEscapesNow);
+                        }
+                        reader.nextChar(translateEscapesNow);
+                        processLineTerminator(start, reader.bp);
+                    }
+                    break;
                default:
-                    hasBrokenEscapes = true;
                    lexError(reader.bp, Errors.IllegalEscChar);
                }
            }
@ -276,7 +253,7 @@ public class JavaTokenizer {

    /** Interim access to String methods used to support text blocks.
     *  Required to handle bootstrapping with pre-text block jdks.
-     *  Could be reworked in the 'next' jdk.
+     *  Should be replaced with direct calls in the 'next' jdk.
     */
    static class TextBlockSupport {
        /** Reflection method to remove incidental indentation.
@ -429,11 +406,8 @@ public class JavaTokenizer {
     */
    private void scanString(int pos) {
        // Clear flags.
-        shouldStripIndent = false;
-        shouldTranslateEscapes = false;
-        hasBrokenEscapes = false;
-        // Check if text block string methods are present.
-        boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
+        isTextBlock = false;
+        hasEscapeSequences = false;
        // Track the end of first line for error recovery.
        int firstEOLN = -1;
        // Attempt to scan for up to 3 double quotes.
@ -449,36 +423,28 @@ public class JavaTokenizer {
        case 3: // Starting a text block.
            // Check if preview feature is enabled for text blocks.
            checkSourceLevel(pos, Feature.TEXT_BLOCKS);
-            // Only proceed if text block string methods are present.
-            if (hasTextBlockSupport) {
-                // Indicate that the final string should have incidental indentation removed.
-                shouldStripIndent = true;
-                // Verify the open delimiter sequence.
-                boolean hasOpenEOLN = false;
-                while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
-                    hasOpenEOLN = isEOLN();
-                    if (hasOpenEOLN) {
-                        break;
-                    }
-                    reader.scanChar();
-                }
-                // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
-                if (!hasOpenEOLN) {
-                    lexError(reader.bp, Errors.IllegalTextBlockOpen);
-                    return;
-                }
-                // Skip line terminator.
-                int start = reader.bp;
-                if (isCRLF()) {
-                    reader.scanChar();
+            isTextBlock = true;
+            // Verify the open delimiter sequence.
+            boolean hasOpenEOLN = false;
+            while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
+                hasOpenEOLN = isEOLN();
+                if (hasOpenEOLN) {
+                    break;
                }
                reader.scanChar();
-                processLineTerminator(start, reader.bp);
-            } else {
-                // No text block string methods are present, so reset and treat like string literal.
-                reader.reset(pos);
-                openCount = countChar('\"', 1);
            }
+            // Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
+            if (!hasOpenEOLN) {
+                lexError(reader.bp, Errors.IllegalTextBlockOpen);
+                return;
+            }
+            // Skip line terminator.
+            int start = reader.bp;
+            if (isCRLF()) {
+                reader.scanChar();
+            }
+            reader.scanChar();
+            processLineTerminator(start, reader.bp);
            break;
        }
        // While characters are available.
@ -513,15 +479,11 @@ public class JavaTokenizer {
                }
            } else if (reader.ch == '\\') {
                // Handle escape sequences.
-                if (hasTextBlockSupport) {
-                    // Indicate that the final string should have escapes translated.
-                    shouldTranslateEscapes = true;
-                    // Validate escape sequence and add to string buffer.
-                    scanLitCharRaw(pos);
-                } else {
-                    // Translate escape sequence and add result to string buffer.
-                    scanLitChar(pos);
-                }
+                hasEscapeSequences = true;
+                // Translate escapes immediately if TextBlockSupport is not available
+                // during bootstrapping.
+                boolean translateEscapesNow = !TextBlockSupport.hasSupport();
+                scanLitChar(pos, translateEscapesNow, openCount != 1);
            } else {
                // Add character to string buffer.
                reader.putChar(true);
@ -961,7 +923,7 @@ public class JavaTokenizer {
                    } else {
                        if (isEOLN())
                            lexError(pos, Errors.IllegalLineEndInCharLit);
-                        scanLitChar(pos);
+                        scanLitChar(pos, true, false);
                        if (reader.ch == '\'') {
                            reader.scanChar();
                            tk = TokenKind.CHARLITERAL;
@ -1026,7 +988,7 @@ public class JavaTokenizer {
                    // Get characters from string buffer.
                    String string = reader.chars();
                    // If a text block.
-                    if (shouldStripIndent) {
+                    if (isTextBlock && TextBlockSupport.hasSupport()) {
                        // Verify that the incidental indentation is consistent.
                        if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
                            Set<TextBlockSupport.WhitespaceChecks> checks =
@ -1041,11 +1003,19 @@ public class JavaTokenizer {
                            }
                        }
                        // Remove incidental indentation.
-                        string = TextBlockSupport.stripIndent(string);
+                        try {
+                            string = TextBlockSupport.stripIndent(string);
+                        } catch (Exception ex) {
+                            // Error already reported, just use unstripped string.
+                        }
                    }
                    // Translate escape sequences if present.
-                    if (shouldTranslateEscapes && !hasBrokenEscapes) {
-                        string = TextBlockSupport.translateEscapes(string);
+                    if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
+                        try {
+                            string = TextBlockSupport.translateEscapes(string);
+                        } catch (Exception ex) {
+                            // Error already reported, just use untranslated string.
+                        }
                    }
                    // Build string token.
                    return new StringToken(tk, pos, endPos, string, comments);
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/parser/UnicodeReader.java
@ -146,6 +146,15 @@ public class UnicodeReader {
        putChar(ch, scan);
    }

+    protected void nextChar(boolean skip) {
+        if (!skip) {
+            sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
+            sbuf[sp++] = ch;
+        }
+
+        scanChar();
+    }
+
    Name name() {
        return names.fromChars(sbuf, 0, sp);
    }
--- a/test/jdk/java/lang/String/TranslateEscapes.java
+++ b/test/jdk/java/lang/String/TranslateEscapes.java
@ -34,6 +34,7 @@ public class TranslateEscapes {
        test1();
        test2();
        test3();
+        test4();
    }

    /*
@ -44,6 +45,7 @@ public class TranslateEscapes {
        verifyEscape("f", '\f');
        verifyEscape("n", '\n');
        verifyEscape("r", '\r');
+        verifyEscape("s", '\s');
        verifyEscape("t", '\t');
        verifyEscape("'", '\'');
        verifyEscape("\"", '\"');
@ -72,7 +74,16 @@ public class TranslateEscapes {
     */
    static void test3() {
        exceptionThrown("+");
-        exceptionThrown("\n");
+        exceptionThrown("q");
+    }
+
+    /*
+     * Escape line terminator.
+     */
+    static void test4() {
+        verifyLineTerminator("\n");
+        verifyLineTerminator("\r\n");
+        verifyLineTerminator("\r");
    }

    static void verifyEscape(String string, char ch) {
@ -102,4 +113,13 @@ public class TranslateEscapes {
            // okay
        }
    }
+
+    static void verifyLineTerminator(String string) {
+        String escapes = "\\" + string;
+        if (!escapes.translateEscapes().isEmpty()) {
+            System.err.format("escape for line terminator not handled %s%n",
+                              string.replace("\n", "\\n").replace("\r", "\\r"));
+            throw new RuntimeException();
+        }
+    }
 }
--- a/test/langtools/tools/javac/TextBlockAPI.java
+++ b/test/langtools/tools/javac/TextBlockAPI.java
@ -46,7 +46,10 @@ public class TextBlockAPI {
        test2();
        test3();
        test4();
-    }
+        test5();
+        test6();
+        test7();
+   }

    /*
     * Check that correct/incorrect syntax is properly detected
@ -72,35 +75,35 @@ public class TextBlockAPI {
     * Check that use of \u0022 is properly detected
     */
    static void test2() {
-        compPass("public class UnicodeDelimiterTest {\n" +
-                "    public static void main(String... args) {\n" +
-                "        String xxx = \\u0022\\u0022\\u0022\nabc\n\\u0022\\u0022\\u0022;\n" +
-                "    }\n" +
-                "}\n");
+        compPass("public class UnicodeDelimiterTest {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \\u0022\\u0022\\u0022\nabc\n\\u0022\\u0022\\u0022;",
+                 "    }",
+                 "}");
    }

    /*
     * Check edge cases of text blocks as last token
     */
    static void test3() {
-        compFail("public class EndTest {\n" +
-                "    public static void main(String... args) {\n" +
-                "        String xxx = \"\"\"\nabc\"\"\"");
-        compFail("public class TwoQuoteClose {\n" +
-                "    public static void main(String... args) {\n" +
-                "        String xxx = \"\"\"\nabc\"\"");
-        compFail("public class OneQuoteClose {\n" +
-                "    public static void main(String... args) {\n" +
-                "        String xxx = \"\"\"\nabc\"");
-        compFail("public class NoClose {\n" +
-                "    public static void main(String... args) {\n" +
-                "        String xxx = \"\"\"\nabc");
-        compFail("public class ZeroTerminator {\n" +
-                "    public static void main(String... args) {\n" +
-                "        String xxx = \"\"\"\nabc\\u0000");
-        compFail("public class NonBreakingSpace {\n" +
-                "    public static void main(String... args) {\n" +
-                "        String xxx = \"\"\"\nabc\\u001A");
+        compFail("public class EndTest {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\"\"\nabc\"\"\"");
+        compFail("public class TwoQuoteClose {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\"\"\nabc\"\"");
+        compFail("public class OneQuoteClose {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\"\"\nabc\"");
+        compFail("public class NoClose {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\"\"\nabc");
+        compFail("public class ZeroTerminator {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\"\"\nabc\\u0000");
+        compFail("public class NonBreakingSpace {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\"\"\nabc\\u001A");
    }

    /*
@ -137,6 +140,61 @@ public class TextBlockAPI {
        }
    }

+    /*
+     * Check escape space
+     */
+    static void test5() {
+        compPass("public class EscapeSChar {",
+                 "    public static void main(String... args) {",
+                 "        char xxx = '\\s';",
+                 "    }",
+                 "}");
+        compPass("public class EscapeSString {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\\s\";",
+                 "    }",
+                 "}");
+        compPass("public class EscapeSTextBlock {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\"\"",
+                 "                     \\s",
+                 "                     \"\"\";",
+                 "    }",
+                 "}");
+    }
+
+    /*
+     * Check escape line terminator
+     */
+    static void test6() {
+        String[] terminators = new String[] { "\n", "\r\n", "\r" };
+        for (String terminator : terminators) {
+            compPass("public class EscapeLineTerminator {",
+                     "    public static void main(String... args) {",
+                     "        String xxx = \"\"\"",
+                     "                     \\" + terminator +
+                     "                     \"\"\";",
+                     "    }",
+                     "}");
+        }
+    }
+
+    /*
+     * Check incorrect escape line terminator cases
+     */
+    static void test7() {
+        compFail("public class EscapeLineTerminatorChar {",
+                 "    public static void main(String... args) {",
+                 "        char xxx = '\\\n';",
+                 "    }",
+                 "}");
+        compFail("public class EscapeLineTerminatorString {",
+                 "    public static void main(String... args) {",
+                 "        String xxx = \"\\\n\";",
+                 "    }",
+                 "}");
+    }
+
    /*
     * Test source for successful compile.
     */
@ -154,6 +212,10 @@ public class TextBlockAPI {
        }
    }

+    static void compPass(String... lines) {
+        compPass(String.join("\n", lines) + "\n");
+    }
+
    /*
     * Test source for unsuccessful compile and specific error.
     */
@ -170,4 +232,8 @@ public class TextBlockAPI {
            throw new RuntimeException("No error detected");
        }
    }
+
+    static void compFail(String... lines) {
+        compFail(String.join("\n", lines) + "\n");
+    }
 }
--- a/test/langtools/tools/javac/TextBlockLang.java
+++ b/test/langtools/tools/javac/TextBlockLang.java
@ -32,6 +32,8 @@
 public class TextBlockLang {
    public static void main(String... args) {
        test1();
+        test2();
+        test3();
    }

    /*
@ -75,6 +77,30 @@ public class TextBlockLang {
            """, 4);
    }

+    /*
+     * Test escape-S.
+     */
+    static void test2() {
+        if ('\s' != ' ') {
+            throw new RuntimeException("Failed character escape-S");
+        }
+        EQ("\s", " ");
+        EQ("""
+           \s
+           """, " \n");
+    }
+
+    /*
+     * Test escape line terminator.
+     */
+    static void test3() {
+        EQ("""
+           abc \
+           """, "abc ");
+        EQ("\\\n".translateEscapes(), "");
+        EQ("\\\r\n".translateEscapes(), "");
+        EQ("\\\r".translateEscapes(), "");
+    }

    /*
     * Raise an exception if the string is not the expected length.