8233116: Escape Sequences For Line Continuation and White Space (Preview)
Reviewed-by: vromero, jlahoda, bchristi, mcimadamore
This commit is contained in:
parent
7afaaf1229
commit
234f326d79
@ -3062,6 +3062,11 @@ public final class String
|
||||
* <td>{@code U+000D}</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <th scope="row">{@code \u005Cs}</th>
|
||||
* <td>space</td>
|
||||
* <td>{@code U+0020}</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <th scope="row">{@code \u005C"}</th>
|
||||
* <td>double quote</td>
|
||||
* <td>{@code U+0022}</td>
|
||||
@ -3081,6 +3086,11 @@ public final class String
|
||||
* <td>octal escape</td>
|
||||
* <td>code point equivalents</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <th scope="row">{@code \u005C<line-terminator>}</th>
|
||||
* <td>continuation</td>
|
||||
* <td>discard</td>
|
||||
* </tr>
|
||||
* </tbody>
|
||||
* </table>
|
||||
*
|
||||
@ -3124,6 +3134,9 @@ public final class String
|
||||
case 'r':
|
||||
ch = '\r';
|
||||
break;
|
||||
case 's':
|
||||
ch = ' ';
|
||||
break;
|
||||
case 't':
|
||||
ch = '\t';
|
||||
break;
|
||||
@ -3146,6 +3159,13 @@ public final class String
|
||||
}
|
||||
ch = (char)code;
|
||||
break;
|
||||
case '\n':
|
||||
continue;
|
||||
case '\r':
|
||||
if (from < length && chars[from] == '\n') {
|
||||
from++;
|
||||
}
|
||||
continue;
|
||||
default: {
|
||||
String msg = String.format(
|
||||
"Invalid escape sequence: \\%c \\\\u%04X",
|
||||
|
@ -91,17 +91,13 @@ public class JavaTokenizer {
|
||||
*/
|
||||
protected UnicodeReader reader;
|
||||
|
||||
/** Should the string stripped of indentation?
|
||||
/** If is a text block
|
||||
*/
|
||||
protected boolean shouldStripIndent;
|
||||
protected boolean isTextBlock;
|
||||
|
||||
/** Should the string's escapes be translated?
|
||||
/** If contains escape sequences
|
||||
*/
|
||||
protected boolean shouldTranslateEscapes;
|
||||
|
||||
/** Has the string broken escapes?
|
||||
*/
|
||||
protected boolean hasBrokenEscapes;
|
||||
protected boolean hasEscapeSequences;
|
||||
|
||||
protected ScannerFactory fac;
|
||||
|
||||
@ -181,91 +177,72 @@ public class JavaTokenizer {
|
||||
}
|
||||
|
||||
/** Read next character in character or string literal and copy into sbuf.
|
||||
* pos - start of literal offset
|
||||
* translateEscapesNow - true if String::translateEscapes is not available
|
||||
* in the java.base libs. Occurs during bootstrapping.
|
||||
* multiline - true if scanning a text block. Allows newlines to be embedded
|
||||
* in the result.
|
||||
*/
|
||||
private void scanLitChar(int pos) {
|
||||
if (reader.ch == '\\') {
|
||||
private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
|
||||
if (reader.ch == '\\') {
|
||||
if (reader.peekChar() == '\\' && !reader.isUnicode()) {
|
||||
reader.skipChar();
|
||||
reader.putChar('\\', true);
|
||||
if (!translateEscapesNow) {
|
||||
reader.putChar(false);
|
||||
}
|
||||
reader.putChar(true);
|
||||
} else {
|
||||
reader.scanChar();
|
||||
reader.nextChar(translateEscapesNow);
|
||||
switch (reader.ch) {
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
char leadch = reader.ch;
|
||||
int oct = reader.digit(pos, 8);
|
||||
reader.scanChar();
|
||||
reader.nextChar(translateEscapesNow);
|
||||
if ('0' <= reader.ch && reader.ch <= '7') {
|
||||
oct = oct * 8 + reader.digit(pos, 8);
|
||||
reader.scanChar();
|
||||
reader.nextChar(translateEscapesNow);
|
||||
if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
|
||||
oct = oct * 8 + reader.digit(pos, 8);
|
||||
reader.scanChar();
|
||||
reader.nextChar(translateEscapesNow);
|
||||
}
|
||||
}
|
||||
reader.putChar((char)oct);
|
||||
break;
|
||||
case 'b':
|
||||
reader.putChar('\b', true); break;
|
||||
case 't':
|
||||
reader.putChar('\t', true); break;
|
||||
case 'n':
|
||||
reader.putChar('\n', true); break;
|
||||
case 'f':
|
||||
reader.putChar('\f', true); break;
|
||||
case 'r':
|
||||
reader.putChar('\r', true); break;
|
||||
case '\'':
|
||||
reader.putChar('\'', true); break;
|
||||
case '\"':
|
||||
reader.putChar('\"', true); break;
|
||||
case '\\':
|
||||
reader.putChar('\\', true); break;
|
||||
default:
|
||||
lexError(reader.bp, Errors.IllegalEscChar);
|
||||
}
|
||||
}
|
||||
} else if (reader.bp != reader.buflen) {
|
||||
reader.putChar(true);
|
||||
}
|
||||
}
|
||||
|
||||
/** Read next character in character or string literal and copy into sbuf
|
||||
* without translating escapes. Used by text blocks to preflight verify
|
||||
* escapes sequences.
|
||||
*/
|
||||
private void scanLitCharRaw(int pos) {
|
||||
if (reader.ch == '\\') {
|
||||
if (reader.peekChar() == '\\' && !reader.isUnicode()) {
|
||||
reader.skipChar();
|
||||
reader.putChar('\\', false);
|
||||
reader.putChar('\\', true);
|
||||
} else {
|
||||
reader.putChar('\\', true);
|
||||
switch (reader.ch) {
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
char leadch = reader.ch;
|
||||
reader.putChar(true);
|
||||
if ('0' <= reader.ch && reader.ch <= '7') {
|
||||
reader.putChar(true);
|
||||
if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
|
||||
reader.putChar(true);
|
||||
}
|
||||
if (translateEscapesNow) {
|
||||
reader.putChar((char)oct);
|
||||
}
|
||||
break;
|
||||
// Effectively list of valid escape sequences.
|
||||
case 'b':
|
||||
reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
|
||||
case 't':
|
||||
reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
|
||||
case 'n':
|
||||
reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
|
||||
case 'f':
|
||||
reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
|
||||
case 'r':
|
||||
reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
|
||||
case '\'':
|
||||
case '\"':
|
||||
case '\\':
|
||||
reader.putChar(true); break;
|
||||
case 's':
|
||||
checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
|
||||
reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
|
||||
case '\n':
|
||||
case '\r':
|
||||
if (!multiline) {
|
||||
lexError(reader.bp, Errors.IllegalEscChar);
|
||||
} else {
|
||||
int start = reader.bp;
|
||||
checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
|
||||
if (reader.ch == '\r' && reader.peekChar() == '\n') {
|
||||
reader.nextChar(translateEscapesNow);
|
||||
}
|
||||
reader.nextChar(translateEscapesNow);
|
||||
processLineTerminator(start, reader.bp);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
hasBrokenEscapes = true;
|
||||
lexError(reader.bp, Errors.IllegalEscChar);
|
||||
}
|
||||
}
|
||||
@ -276,7 +253,7 @@ public class JavaTokenizer {
|
||||
|
||||
/** Interim access to String methods used to support text blocks.
|
||||
* Required to handle bootstrapping with pre-text block jdks.
|
||||
* Could be reworked in the 'next' jdk.
|
||||
* Should be replaced with direct calls in the 'next' jdk.
|
||||
*/
|
||||
static class TextBlockSupport {
|
||||
/** Reflection method to remove incidental indentation.
|
||||
@ -429,11 +406,8 @@ public class JavaTokenizer {
|
||||
*/
|
||||
private void scanString(int pos) {
|
||||
// Clear flags.
|
||||
shouldStripIndent = false;
|
||||
shouldTranslateEscapes = false;
|
||||
hasBrokenEscapes = false;
|
||||
// Check if text block string methods are present.
|
||||
boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
|
||||
isTextBlock = false;
|
||||
hasEscapeSequences = false;
|
||||
// Track the end of first line for error recovery.
|
||||
int firstEOLN = -1;
|
||||
// Attempt to scan for up to 3 double quotes.
|
||||
@ -449,36 +423,28 @@ public class JavaTokenizer {
|
||||
case 3: // Starting a text block.
|
||||
// Check if preview feature is enabled for text blocks.
|
||||
checkSourceLevel(pos, Feature.TEXT_BLOCKS);
|
||||
// Only proceed if text block string methods are present.
|
||||
if (hasTextBlockSupport) {
|
||||
// Indicate that the final string should have incidental indentation removed.
|
||||
shouldStripIndent = true;
|
||||
// Verify the open delimiter sequence.
|
||||
boolean hasOpenEOLN = false;
|
||||
while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
|
||||
hasOpenEOLN = isEOLN();
|
||||
if (hasOpenEOLN) {
|
||||
break;
|
||||
}
|
||||
reader.scanChar();
|
||||
}
|
||||
// Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
|
||||
if (!hasOpenEOLN) {
|
||||
lexError(reader.bp, Errors.IllegalTextBlockOpen);
|
||||
return;
|
||||
}
|
||||
// Skip line terminator.
|
||||
int start = reader.bp;
|
||||
if (isCRLF()) {
|
||||
reader.scanChar();
|
||||
isTextBlock = true;
|
||||
// Verify the open delimiter sequence.
|
||||
boolean hasOpenEOLN = false;
|
||||
while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
|
||||
hasOpenEOLN = isEOLN();
|
||||
if (hasOpenEOLN) {
|
||||
break;
|
||||
}
|
||||
reader.scanChar();
|
||||
processLineTerminator(start, reader.bp);
|
||||
} else {
|
||||
// No text block string methods are present, so reset and treat like string literal.
|
||||
reader.reset(pos);
|
||||
openCount = countChar('\"', 1);
|
||||
}
|
||||
// Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
|
||||
if (!hasOpenEOLN) {
|
||||
lexError(reader.bp, Errors.IllegalTextBlockOpen);
|
||||
return;
|
||||
}
|
||||
// Skip line terminator.
|
||||
int start = reader.bp;
|
||||
if (isCRLF()) {
|
||||
reader.scanChar();
|
||||
}
|
||||
reader.scanChar();
|
||||
processLineTerminator(start, reader.bp);
|
||||
break;
|
||||
}
|
||||
// While characters are available.
|
||||
@ -513,15 +479,11 @@ public class JavaTokenizer {
|
||||
}
|
||||
} else if (reader.ch == '\\') {
|
||||
// Handle escape sequences.
|
||||
if (hasTextBlockSupport) {
|
||||
// Indicate that the final string should have escapes translated.
|
||||
shouldTranslateEscapes = true;
|
||||
// Validate escape sequence and add to string buffer.
|
||||
scanLitCharRaw(pos);
|
||||
} else {
|
||||
// Translate escape sequence and add result to string buffer.
|
||||
scanLitChar(pos);
|
||||
}
|
||||
hasEscapeSequences = true;
|
||||
// Translate escapes immediately if TextBlockSupport is not available
|
||||
// during bootstrapping.
|
||||
boolean translateEscapesNow = !TextBlockSupport.hasSupport();
|
||||
scanLitChar(pos, translateEscapesNow, openCount != 1);
|
||||
} else {
|
||||
// Add character to string buffer.
|
||||
reader.putChar(true);
|
||||
@ -961,7 +923,7 @@ public class JavaTokenizer {
|
||||
} else {
|
||||
if (isEOLN())
|
||||
lexError(pos, Errors.IllegalLineEndInCharLit);
|
||||
scanLitChar(pos);
|
||||
scanLitChar(pos, true, false);
|
||||
if (reader.ch == '\'') {
|
||||
reader.scanChar();
|
||||
tk = TokenKind.CHARLITERAL;
|
||||
@ -1026,7 +988,7 @@ public class JavaTokenizer {
|
||||
// Get characters from string buffer.
|
||||
String string = reader.chars();
|
||||
// If a text block.
|
||||
if (shouldStripIndent) {
|
||||
if (isTextBlock && TextBlockSupport.hasSupport()) {
|
||||
// Verify that the incidental indentation is consistent.
|
||||
if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
|
||||
Set<TextBlockSupport.WhitespaceChecks> checks =
|
||||
@ -1041,11 +1003,19 @@ public class JavaTokenizer {
|
||||
}
|
||||
}
|
||||
// Remove incidental indentation.
|
||||
string = TextBlockSupport.stripIndent(string);
|
||||
try {
|
||||
string = TextBlockSupport.stripIndent(string);
|
||||
} catch (Exception ex) {
|
||||
// Error already reported, just use unstripped string.
|
||||
}
|
||||
}
|
||||
// Translate escape sequences if present.
|
||||
if (shouldTranslateEscapes && !hasBrokenEscapes) {
|
||||
string = TextBlockSupport.translateEscapes(string);
|
||||
if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
|
||||
try {
|
||||
string = TextBlockSupport.translateEscapes(string);
|
||||
} catch (Exception ex) {
|
||||
// Error already reported, just use untranslated string.
|
||||
}
|
||||
}
|
||||
// Build string token.
|
||||
return new StringToken(tk, pos, endPos, string, comments);
|
||||
|
@ -146,6 +146,15 @@ public class UnicodeReader {
|
||||
putChar(ch, scan);
|
||||
}
|
||||
|
||||
protected void nextChar(boolean skip) {
|
||||
if (!skip) {
|
||||
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
|
||||
sbuf[sp++] = ch;
|
||||
}
|
||||
|
||||
scanChar();
|
||||
}
|
||||
|
||||
Name name() {
|
||||
return names.fromChars(sbuf, 0, sp);
|
||||
}
|
||||
|
@ -34,6 +34,7 @@ public class TranslateEscapes {
|
||||
test1();
|
||||
test2();
|
||||
test3();
|
||||
test4();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -44,6 +45,7 @@ public class TranslateEscapes {
|
||||
verifyEscape("f", '\f');
|
||||
verifyEscape("n", '\n');
|
||||
verifyEscape("r", '\r');
|
||||
verifyEscape("s", '\s');
|
||||
verifyEscape("t", '\t');
|
||||
verifyEscape("'", '\'');
|
||||
verifyEscape("\"", '\"');
|
||||
@ -72,7 +74,16 @@ public class TranslateEscapes {
|
||||
*/
|
||||
static void test3() {
|
||||
exceptionThrown("+");
|
||||
exceptionThrown("\n");
|
||||
exceptionThrown("q");
|
||||
}
|
||||
|
||||
/*
|
||||
* Escape line terminator.
|
||||
*/
|
||||
static void test4() {
|
||||
verifyLineTerminator("\n");
|
||||
verifyLineTerminator("\r\n");
|
||||
verifyLineTerminator("\r");
|
||||
}
|
||||
|
||||
static void verifyEscape(String string, char ch) {
|
||||
@ -102,4 +113,13 @@ public class TranslateEscapes {
|
||||
// okay
|
||||
}
|
||||
}
|
||||
|
||||
static void verifyLineTerminator(String string) {
|
||||
String escapes = "\\" + string;
|
||||
if (!escapes.translateEscapes().isEmpty()) {
|
||||
System.err.format("escape for line terminator not handled %s%n",
|
||||
string.replace("\n", "\\n").replace("\r", "\\r"));
|
||||
throw new RuntimeException();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -46,7 +46,10 @@ public class TextBlockAPI {
|
||||
test2();
|
||||
test3();
|
||||
test4();
|
||||
}
|
||||
test5();
|
||||
test6();
|
||||
test7();
|
||||
}
|
||||
|
||||
/*
|
||||
* Check that correct/incorrect syntax is properly detected
|
||||
@ -72,35 +75,35 @@ public class TextBlockAPI {
|
||||
* Check that use of \u0022 is properly detected
|
||||
*/
|
||||
static void test2() {
|
||||
compPass("public class UnicodeDelimiterTest {\n" +
|
||||
" public static void main(String... args) {\n" +
|
||||
" String xxx = \\u0022\\u0022\\u0022\nabc\n\\u0022\\u0022\\u0022;\n" +
|
||||
" }\n" +
|
||||
"}\n");
|
||||
compPass("public class UnicodeDelimiterTest {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \\u0022\\u0022\\u0022\nabc\n\\u0022\\u0022\\u0022;",
|
||||
" }",
|
||||
"}");
|
||||
}
|
||||
|
||||
/*
|
||||
* Check edge cases of text blocks as last token
|
||||
*/
|
||||
static void test3() {
|
||||
compFail("public class EndTest {\n" +
|
||||
" public static void main(String... args) {\n" +
|
||||
" String xxx = \"\"\"\nabc\"\"\"");
|
||||
compFail("public class TwoQuoteClose {\n" +
|
||||
" public static void main(String... args) {\n" +
|
||||
" String xxx = \"\"\"\nabc\"\"");
|
||||
compFail("public class OneQuoteClose {\n" +
|
||||
" public static void main(String... args) {\n" +
|
||||
" String xxx = \"\"\"\nabc\"");
|
||||
compFail("public class NoClose {\n" +
|
||||
" public static void main(String... args) {\n" +
|
||||
" String xxx = \"\"\"\nabc");
|
||||
compFail("public class ZeroTerminator {\n" +
|
||||
" public static void main(String... args) {\n" +
|
||||
" String xxx = \"\"\"\nabc\\u0000");
|
||||
compFail("public class NonBreakingSpace {\n" +
|
||||
" public static void main(String... args) {\n" +
|
||||
" String xxx = \"\"\"\nabc\\u001A");
|
||||
compFail("public class EndTest {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"\nabc\"\"\"");
|
||||
compFail("public class TwoQuoteClose {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"\nabc\"\"");
|
||||
compFail("public class OneQuoteClose {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"\nabc\"");
|
||||
compFail("public class NoClose {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"\nabc");
|
||||
compFail("public class ZeroTerminator {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"\nabc\\u0000");
|
||||
compFail("public class NonBreakingSpace {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"\nabc\\u001A");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -137,6 +140,61 @@ public class TextBlockAPI {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check escape space
|
||||
*/
|
||||
static void test5() {
|
||||
compPass("public class EscapeSChar {",
|
||||
" public static void main(String... args) {",
|
||||
" char xxx = '\\s';",
|
||||
" }",
|
||||
"}");
|
||||
compPass("public class EscapeSString {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\\s\";",
|
||||
" }",
|
||||
"}");
|
||||
compPass("public class EscapeSTextBlock {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"",
|
||||
" \\s",
|
||||
" \"\"\";",
|
||||
" }",
|
||||
"}");
|
||||
}
|
||||
|
||||
/*
|
||||
* Check escape line terminator
|
||||
*/
|
||||
static void test6() {
|
||||
String[] terminators = new String[] { "\n", "\r\n", "\r" };
|
||||
for (String terminator : terminators) {
|
||||
compPass("public class EscapeLineTerminator {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\"\"",
|
||||
" \\" + terminator +
|
||||
" \"\"\";",
|
||||
" }",
|
||||
"}");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Check incorrect escape line terminator cases
|
||||
*/
|
||||
static void test7() {
|
||||
compFail("public class EscapeLineTerminatorChar {",
|
||||
" public static void main(String... args) {",
|
||||
" char xxx = '\\\n';",
|
||||
" }",
|
||||
"}");
|
||||
compFail("public class EscapeLineTerminatorString {",
|
||||
" public static void main(String... args) {",
|
||||
" String xxx = \"\\\n\";",
|
||||
" }",
|
||||
"}");
|
||||
}
|
||||
|
||||
/*
|
||||
* Test source for successful compile.
|
||||
*/
|
||||
@ -154,6 +212,10 @@ public class TextBlockAPI {
|
||||
}
|
||||
}
|
||||
|
||||
static void compPass(String... lines) {
|
||||
compPass(String.join("\n", lines) + "\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Test source for unsuccessful compile and specific error.
|
||||
*/
|
||||
@ -170,4 +232,8 @@ public class TextBlockAPI {
|
||||
throw new RuntimeException("No error detected");
|
||||
}
|
||||
}
|
||||
|
||||
static void compFail(String... lines) {
|
||||
compFail(String.join("\n", lines) + "\n");
|
||||
}
|
||||
}
|
||||
|
@ -32,6 +32,8 @@
|
||||
public class TextBlockLang {
|
||||
public static void main(String... args) {
|
||||
test1();
|
||||
test2();
|
||||
test3();
|
||||
}
|
||||
|
||||
/*
|
||||
@ -75,6 +77,30 @@ public class TextBlockLang {
|
||||
""", 4);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test escape-S.
|
||||
*/
|
||||
static void test2() {
|
||||
if ('\s' != ' ') {
|
||||
throw new RuntimeException("Failed character escape-S");
|
||||
}
|
||||
EQ("\s", " ");
|
||||
EQ("""
|
||||
\s
|
||||
""", " \n");
|
||||
}
|
||||
|
||||
/*
|
||||
* Test escape line terminator.
|
||||
*/
|
||||
static void test3() {
|
||||
EQ("""
|
||||
abc \
|
||||
""", "abc ");
|
||||
EQ("\\\n".translateEscapes(), "");
|
||||
EQ("\\\r\n".translateEscapes(), "");
|
||||
EQ("\\\r".translateEscapes(), "");
|
||||
}
|
||||
|
||||
/*
|
||||
* Raise an exception if the string is not the expected length.
|
||||
|
Loading…
Reference in New Issue
Block a user