8233116: Escape Sequences For Line Continuation and White Space (Preview)

Reviewed-by: vromero, jlahoda, bchristi, mcimadamore
This commit is contained in:
Jim Laskey 2019-12-03 08:35:21 -04:00
parent 7afaaf1229
commit 234f326d79
6 changed files with 249 additions and 138 deletions

View File

@ -3062,6 +3062,11 @@ public final class String
* <td>{@code U+000D}</td>
* </tr>
* <tr>
* <th scope="row">{@code \u005Cs}</th>
* <td>space</td>
* <td>{@code U+0020}</td>
* </tr>
* <tr>
* <th scope="row">{@code \u005C"}</th>
* <td>double quote</td>
* <td>{@code U+0022}</td>
@ -3081,6 +3086,11 @@ public final class String
* <td>octal escape</td>
* <td>code point equivalents</td>
* </tr>
* <tr>
* <th scope="row">{@code \u005C<line-terminator>}</th>
* <td>continuation</td>
* <td>discard</td>
* </tr>
* </tbody>
* </table>
*
@ -3124,6 +3134,9 @@ public final class String
case 'r':
ch = '\r';
break;
case 's':
ch = ' ';
break;
case 't':
ch = '\t';
break;
@ -3146,6 +3159,13 @@ public final class String
}
ch = (char)code;
break;
case '\n':
continue;
case '\r':
if (from < length && chars[from] == '\n') {
from++;
}
continue;
default: {
String msg = String.format(
"Invalid escape sequence: \\%c \\\\u%04X",

View File

@ -91,17 +91,13 @@ public class JavaTokenizer {
*/
protected UnicodeReader reader;
/** Should the string stripped of indentation?
/** If is a text block
*/
protected boolean shouldStripIndent;
protected boolean isTextBlock;
/** Should the string's escapes be translated?
/** If contains escape sequences
*/
protected boolean shouldTranslateEscapes;
/** Has the string broken escapes?
*/
protected boolean hasBrokenEscapes;
protected boolean hasEscapeSequences;
protected ScannerFactory fac;
@ -181,91 +177,72 @@ public class JavaTokenizer {
}
/** Read next character in character or string literal and copy into sbuf.
* pos - start of literal offset
* translateEscapesNow - true if String::translateEscapes is not available
* in the java.base libs. Occurs during bootstrapping.
* multiline - true if scanning a text block. Allows newlines to be embedded
* in the result.
*/
private void scanLitChar(int pos) {
if (reader.ch == '\\') {
private void scanLitChar(int pos, boolean translateEscapesNow, boolean multiline) {
if (reader.ch == '\\') {
if (reader.peekChar() == '\\' && !reader.isUnicode()) {
reader.skipChar();
reader.putChar('\\', true);
if (!translateEscapesNow) {
reader.putChar(false);
}
reader.putChar(true);
} else {
reader.scanChar();
reader.nextChar(translateEscapesNow);
switch (reader.ch) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
char leadch = reader.ch;
int oct = reader.digit(pos, 8);
reader.scanChar();
reader.nextChar(translateEscapesNow);
if ('0' <= reader.ch && reader.ch <= '7') {
oct = oct * 8 + reader.digit(pos, 8);
reader.scanChar();
reader.nextChar(translateEscapesNow);
if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
oct = oct * 8 + reader.digit(pos, 8);
reader.scanChar();
reader.nextChar(translateEscapesNow);
}
}
reader.putChar((char)oct);
break;
case 'b':
reader.putChar('\b', true); break;
case 't':
reader.putChar('\t', true); break;
case 'n':
reader.putChar('\n', true); break;
case 'f':
reader.putChar('\f', true); break;
case 'r':
reader.putChar('\r', true); break;
case '\'':
reader.putChar('\'', true); break;
case '\"':
reader.putChar('\"', true); break;
case '\\':
reader.putChar('\\', true); break;
default:
lexError(reader.bp, Errors.IllegalEscChar);
}
}
} else if (reader.bp != reader.buflen) {
reader.putChar(true);
}
}
/** Read next character in character or string literal and copy into sbuf
* without translating escapes. Used by text blocks to preflight verify
* escapes sequences.
*/
private void scanLitCharRaw(int pos) {
if (reader.ch == '\\') {
if (reader.peekChar() == '\\' && !reader.isUnicode()) {
reader.skipChar();
reader.putChar('\\', false);
reader.putChar('\\', true);
} else {
reader.putChar('\\', true);
switch (reader.ch) {
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
char leadch = reader.ch;
reader.putChar(true);
if ('0' <= reader.ch && reader.ch <= '7') {
reader.putChar(true);
if (leadch <= '3' && '0' <= reader.ch && reader.ch <= '7') {
reader.putChar(true);
}
if (translateEscapesNow) {
reader.putChar((char)oct);
}
break;
// Effectively list of valid escape sequences.
case 'b':
reader.putChar(translateEscapesNow ? '\b' : 'b', true); break;
case 't':
reader.putChar(translateEscapesNow ? '\t' : 't', true); break;
case 'n':
reader.putChar(translateEscapesNow ? '\n' : 'n', true); break;
case 'f':
reader.putChar(translateEscapesNow ? '\f' : 'f', true); break;
case 'r':
reader.putChar(translateEscapesNow ? '\r' : 'r', true); break;
case '\'':
case '\"':
case '\\':
reader.putChar(true); break;
case 's':
checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
reader.putChar(translateEscapesNow ? ' ' : 's', true); break;
case '\n':
case '\r':
if (!multiline) {
lexError(reader.bp, Errors.IllegalEscChar);
} else {
int start = reader.bp;
checkSourceLevel(reader.bp, Feature.TEXT_BLOCKS);
if (reader.ch == '\r' && reader.peekChar() == '\n') {
reader.nextChar(translateEscapesNow);
}
reader.nextChar(translateEscapesNow);
processLineTerminator(start, reader.bp);
}
break;
default:
hasBrokenEscapes = true;
lexError(reader.bp, Errors.IllegalEscChar);
}
}
@ -276,7 +253,7 @@ public class JavaTokenizer {
/** Interim access to String methods used to support text blocks.
* Required to handle bootstrapping with pre-text block jdks.
* Could be reworked in the 'next' jdk.
* Should be replaced with direct calls in the 'next' jdk.
*/
static class TextBlockSupport {
/** Reflection method to remove incidental indentation.
@ -429,11 +406,8 @@ public class JavaTokenizer {
*/
private void scanString(int pos) {
// Clear flags.
shouldStripIndent = false;
shouldTranslateEscapes = false;
hasBrokenEscapes = false;
// Check if text block string methods are present.
boolean hasTextBlockSupport = TextBlockSupport.hasSupport();
isTextBlock = false;
hasEscapeSequences = false;
// Track the end of first line for error recovery.
int firstEOLN = -1;
// Attempt to scan for up to 3 double quotes.
@ -449,36 +423,28 @@ public class JavaTokenizer {
case 3: // Starting a text block.
// Check if preview feature is enabled for text blocks.
checkSourceLevel(pos, Feature.TEXT_BLOCKS);
// Only proceed if text block string methods are present.
if (hasTextBlockSupport) {
// Indicate that the final string should have incidental indentation removed.
shouldStripIndent = true;
// Verify the open delimiter sequence.
boolean hasOpenEOLN = false;
while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
hasOpenEOLN = isEOLN();
if (hasOpenEOLN) {
break;
}
reader.scanChar();
}
// Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
if (!hasOpenEOLN) {
lexError(reader.bp, Errors.IllegalTextBlockOpen);
return;
}
// Skip line terminator.
int start = reader.bp;
if (isCRLF()) {
reader.scanChar();
isTextBlock = true;
// Verify the open delimiter sequence.
boolean hasOpenEOLN = false;
while (reader.bp < reader.buflen && Character.isWhitespace(reader.ch)) {
hasOpenEOLN = isEOLN();
if (hasOpenEOLN) {
break;
}
reader.scanChar();
processLineTerminator(start, reader.bp);
} else {
// No text block string methods are present, so reset and treat like string literal.
reader.reset(pos);
openCount = countChar('\"', 1);
}
// Error if the open delimiter sequence not is """<Whitespace>*<LineTerminator>.
if (!hasOpenEOLN) {
lexError(reader.bp, Errors.IllegalTextBlockOpen);
return;
}
// Skip line terminator.
int start = reader.bp;
if (isCRLF()) {
reader.scanChar();
}
reader.scanChar();
processLineTerminator(start, reader.bp);
break;
}
// While characters are available.
@ -513,15 +479,11 @@ public class JavaTokenizer {
}
} else if (reader.ch == '\\') {
// Handle escape sequences.
if (hasTextBlockSupport) {
// Indicate that the final string should have escapes translated.
shouldTranslateEscapes = true;
// Validate escape sequence and add to string buffer.
scanLitCharRaw(pos);
} else {
// Translate escape sequence and add result to string buffer.
scanLitChar(pos);
}
hasEscapeSequences = true;
// Translate escapes immediately if TextBlockSupport is not available
// during bootstrapping.
boolean translateEscapesNow = !TextBlockSupport.hasSupport();
scanLitChar(pos, translateEscapesNow, openCount != 1);
} else {
// Add character to string buffer.
reader.putChar(true);
@ -961,7 +923,7 @@ public class JavaTokenizer {
} else {
if (isEOLN())
lexError(pos, Errors.IllegalLineEndInCharLit);
scanLitChar(pos);
scanLitChar(pos, true, false);
if (reader.ch == '\'') {
reader.scanChar();
tk = TokenKind.CHARLITERAL;
@ -1026,7 +988,7 @@ public class JavaTokenizer {
// Get characters from string buffer.
String string = reader.chars();
// If a text block.
if (shouldStripIndent) {
if (isTextBlock && TextBlockSupport.hasSupport()) {
// Verify that the incidental indentation is consistent.
if (lint.isEnabled(LintCategory.TEXT_BLOCKS)) {
Set<TextBlockSupport.WhitespaceChecks> checks =
@ -1041,11 +1003,19 @@ public class JavaTokenizer {
}
}
// Remove incidental indentation.
string = TextBlockSupport.stripIndent(string);
try {
string = TextBlockSupport.stripIndent(string);
} catch (Exception ex) {
// Error already reported, just use unstripped string.
}
}
// Translate escape sequences if present.
if (shouldTranslateEscapes && !hasBrokenEscapes) {
string = TextBlockSupport.translateEscapes(string);
if (hasEscapeSequences && TextBlockSupport.hasSupport()) {
try {
string = TextBlockSupport.translateEscapes(string);
} catch (Exception ex) {
// Error already reported, just use untranslated string.
}
}
// Build string token.
return new StringToken(tk, pos, endPos, string, comments);

View File

@ -146,6 +146,15 @@ public class UnicodeReader {
putChar(ch, scan);
}
protected void nextChar(boolean skip) {
if (!skip) {
sbuf = ArrayUtils.ensureCapacity(sbuf, sp);
sbuf[sp++] = ch;
}
scanChar();
}
Name name() {
return names.fromChars(sbuf, 0, sp);
}

View File

@ -34,6 +34,7 @@ public class TranslateEscapes {
test1();
test2();
test3();
test4();
}
/*
@ -44,6 +45,7 @@ public class TranslateEscapes {
verifyEscape("f", '\f');
verifyEscape("n", '\n');
verifyEscape("r", '\r');
verifyEscape("s", '\s');
verifyEscape("t", '\t');
verifyEscape("'", '\'');
verifyEscape("\"", '\"');
@ -72,7 +74,16 @@ public class TranslateEscapes {
*/
static void test3() {
exceptionThrown("+");
exceptionThrown("\n");
exceptionThrown("q");
}
/*
* Escape line terminator.
*/
static void test4() {
verifyLineTerminator("\n");
verifyLineTerminator("\r\n");
verifyLineTerminator("\r");
}
static void verifyEscape(String string, char ch) {
@ -102,4 +113,13 @@ public class TranslateEscapes {
// okay
}
}
static void verifyLineTerminator(String string) {
String escapes = "\\" + string;
if (!escapes.translateEscapes().isEmpty()) {
System.err.format("escape for line terminator not handled %s%n",
string.replace("\n", "\\n").replace("\r", "\\r"));
throw new RuntimeException();
}
}
}

View File

@ -46,7 +46,10 @@ public class TextBlockAPI {
test2();
test3();
test4();
}
test5();
test6();
test7();
}
/*
* Check that correct/incorrect syntax is properly detected
@ -72,35 +75,35 @@ public class TextBlockAPI {
* Check that use of \u0022 is properly detected
*/
static void test2() {
compPass("public class UnicodeDelimiterTest {\n" +
" public static void main(String... args) {\n" +
" String xxx = \\u0022\\u0022\\u0022\nabc\n\\u0022\\u0022\\u0022;\n" +
" }\n" +
"}\n");
compPass("public class UnicodeDelimiterTest {",
" public static void main(String... args) {",
" String xxx = \\u0022\\u0022\\u0022\nabc\n\\u0022\\u0022\\u0022;",
" }",
"}");
}
/*
* Check edge cases of text blocks as last token
*/
static void test3() {
compFail("public class EndTest {\n" +
" public static void main(String... args) {\n" +
" String xxx = \"\"\"\nabc\"\"\"");
compFail("public class TwoQuoteClose {\n" +
" public static void main(String... args) {\n" +
" String xxx = \"\"\"\nabc\"\"");
compFail("public class OneQuoteClose {\n" +
" public static void main(String... args) {\n" +
" String xxx = \"\"\"\nabc\"");
compFail("public class NoClose {\n" +
" public static void main(String... args) {\n" +
" String xxx = \"\"\"\nabc");
compFail("public class ZeroTerminator {\n" +
" public static void main(String... args) {\n" +
" String xxx = \"\"\"\nabc\\u0000");
compFail("public class NonBreakingSpace {\n" +
" public static void main(String... args) {\n" +
" String xxx = \"\"\"\nabc\\u001A");
compFail("public class EndTest {",
" public static void main(String... args) {",
" String xxx = \"\"\"\nabc\"\"\"");
compFail("public class TwoQuoteClose {",
" public static void main(String... args) {",
" String xxx = \"\"\"\nabc\"\"");
compFail("public class OneQuoteClose {",
" public static void main(String... args) {",
" String xxx = \"\"\"\nabc\"");
compFail("public class NoClose {",
" public static void main(String... args) {",
" String xxx = \"\"\"\nabc");
compFail("public class ZeroTerminator {",
" public static void main(String... args) {",
" String xxx = \"\"\"\nabc\\u0000");
compFail("public class NonBreakingSpace {",
" public static void main(String... args) {",
" String xxx = \"\"\"\nabc\\u001A");
}
/*
@ -137,6 +140,61 @@ public class TextBlockAPI {
}
}
/*
* Check escape space
*/
static void test5() {
compPass("public class EscapeSChar {",
" public static void main(String... args) {",
" char xxx = '\\s';",
" }",
"}");
compPass("public class EscapeSString {",
" public static void main(String... args) {",
" String xxx = \"\\s\";",
" }",
"}");
compPass("public class EscapeSTextBlock {",
" public static void main(String... args) {",
" String xxx = \"\"\"",
" \\s",
" \"\"\";",
" }",
"}");
}
/*
* Check escape line terminator
*/
static void test6() {
String[] terminators = new String[] { "\n", "\r\n", "\r" };
for (String terminator : terminators) {
compPass("public class EscapeLineTerminator {",
" public static void main(String... args) {",
" String xxx = \"\"\"",
" \\" + terminator +
" \"\"\";",
" }",
"}");
}
}
/*
* Check incorrect escape line terminator cases
*/
static void test7() {
compFail("public class EscapeLineTerminatorChar {",
" public static void main(String... args) {",
" char xxx = '\\\n';",
" }",
"}");
compFail("public class EscapeLineTerminatorString {",
" public static void main(String... args) {",
" String xxx = \"\\\n\";",
" }",
"}");
}
/*
* Test source for successful compile.
*/
@ -154,6 +212,10 @@ public class TextBlockAPI {
}
}
static void compPass(String... lines) {
compPass(String.join("\n", lines) + "\n");
}
/*
* Test source for unsuccessful compile and specific error.
*/
@ -170,4 +232,8 @@ public class TextBlockAPI {
throw new RuntimeException("No error detected");
}
}
static void compFail(String... lines) {
compFail(String.join("\n", lines) + "\n");
}
}

View File

@ -32,6 +32,8 @@
public class TextBlockLang {
public static void main(String... args) {
test1();
test2();
test3();
}
/*
@ -75,6 +77,30 @@ public class TextBlockLang {
""", 4);
}
/*
* Test escape-S.
*/
static void test2() {
if ('\s' != ' ') {
throw new RuntimeException("Failed character escape-S");
}
EQ("\s", " ");
EQ("""
\s
""", " \n");
}
/*
* Test escape line terminator.
*/
static void test3() {
EQ("""
abc \
""", "abc ");
EQ("\\\n".translateEscapes(), "");
EQ("\\\r\n".translateEscapes(), "");
EQ("\\\r".translateEscapes(), "");
}
/*
* Raise an exception if the string is not the expected length.