8072081: Supplementary characters are rejected in comments

Reviewed-by: lancea
This commit is contained in:
Joe Wang 2015-12-09 21:40:44 -08:00
parent 60e4fa0770
commit bed7e98034
4 changed files with 121 additions and 43 deletions

View File

@ -1394,7 +1394,12 @@ public class XMLDocumentFragmentScannerImpl
fEmptyElement = true; fEmptyElement = true;
return true; return true;
} else if (!isValidNameStartChar(c) || !sawSpace) { } else if (!isValidNameStartChar(c) || !sawSpace) {
reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); // Second chance. Check if this character is a high
// surrogate of a valid name start character.
if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
reportFatalError("ElementUnterminated",
new Object[]{fElementQName.rawname});
}
} }
return false; return false;
@ -2606,7 +2611,9 @@ public class XMLDocumentFragmentScannerImpl
private void startOfMarkup() throws IOException { private void startOfMarkup() throws IOException {
fMarkupDepth++; fMarkupDepth++;
final int ch = fEntityScanner.peekChar(); final int ch = fEntityScanner.peekChar();
if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) {
setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
} else {
switch(ch){ switch(ch){
case '?' :{ case '?' :{
setScannerState(SCANNER_STATE_PI); setScannerState(SCANNER_STATE_PI);
@ -2635,11 +2642,7 @@ public class XMLDocumentFragmentScannerImpl
break; break;
} }
default :{ default :{
if (isValidNameStartChar(ch)) { reportFatalError("MarkupNotRecognizedInContent", null);
setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
} else {
reportFatalError("MarkupNotRecognizedInContent",
null);
} }
} }
} }

View File

@ -847,9 +847,12 @@ public class XMLDocumentScannerImpl
case SCANNER_STATE_START_OF_MARKUP: { case SCANNER_STATE_START_OF_MARKUP: {
fMarkupDepth++; fMarkupDepth++;
if (isValidNameStartChar(fEntityScanner.peekChar()) ||
if (fEntityScanner.skipChar('?')) { isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
setScannerState(SCANNER_STATE_PI); setScannerState(SCANNER_STATE_ROOT_ELEMENT);
setDriver(fContentDriver);
//from now onwards this would be handled by fContentDriver,in the same next() call
return fContentDriver.next();
} else if (fEntityScanner.skipChar('!')) { } else if (fEntityScanner.skipChar('!')) {
if (fEntityScanner.skipChar('-')) { if (fEntityScanner.skipChar('-')) {
if (!fEntityScanner.skipChar('-')) { if (!fEntityScanner.skipChar('-')) {
@ -872,12 +875,8 @@ public class XMLDocumentScannerImpl
reportFatalError("MarkupNotRecognizedInProlog", reportFatalError("MarkupNotRecognizedInProlog",
null); null);
} }
} else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { } else if (fEntityScanner.skipChar('?')) {
setScannerState(SCANNER_STATE_ROOT_ELEMENT); setScannerState(SCANNER_STATE_PI);
setDriver(fContentDriver);
//from now onwards this would be handled by fContentDriver,in the same next() call
return fContentDriver.next();
} else { } else {
reportFatalError("MarkupNotRecognizedInProlog", reportFatalError("MarkupNotRecognizedInProlog",
null); null);
@ -1395,7 +1394,8 @@ public class XMLDocumentScannerImpl
} else if (fEntityScanner.skipChar('/')) { } else if (fEntityScanner.skipChar('/')) {
reportFatalError("MarkupNotRecognizedInMisc", reportFatalError("MarkupNotRecognizedInMisc",
null); null);
} else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { } else if (isValidNameStartChar(fEntityScanner.peekChar()) ||
isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
reportFatalError("MarkupNotRecognizedInMisc", reportFatalError("MarkupNotRecognizedInMisc",
null); null);
scanStartElement(); scanStartElement();

View File

@ -784,7 +784,7 @@ public abstract class XMLScanner
if (XMLChar.isHighSurrogate(c)) { if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(text); scanSurrogates(text);
} }
if (isInvalidLiteral(c)) { else if (isInvalidLiteral(c)) {
reportFatalError("InvalidCharInComment", reportFatalError("InvalidCharInComment",
new Object[] { Integer.toHexString(c) }); new Object[] { Integer.toHexString(c) });
fEntityScanner.scanChar(); fEntityScanner.scanChar();
@ -1385,6 +1385,14 @@ public abstract class XMLScanner
return (XMLChar.isNameStart(value)); return (XMLChar.isNameStart(value));
} // isValidNameStartChar(int): boolean } // isValidNameStartChar(int): boolean
// returns true if the given character is
// a valid high surrogate for a nameStartChar
// with respect to the version of XML understood
// by this scanner.
protected boolean isValidNameStartHighSurrogate(int value) {
return false;
} // isValidNameStartHighSurrogate(int): boolean
protected boolean versionSupported(String version ) { protected boolean versionSupported(String version ) {
return version.equals("1.0") || version.equals("1.1"); return version.equals("1.0") || version.equals("1.1");
} // version Supported } // version Supported

View File

@ -0,0 +1,67 @@
package parsers;
import java.io.ByteArrayInputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
/**
* @bug 8072081
* @summary verifies that supplementary characters are supported as character
* data in xml 1.0, and also names in xml 1.1.
*
* Joe Wang (huizhe.wang@oracle.com)
*/
public class SupplementaryChars {
@Test(dataProvider = "supported")
public void test(String xml) throws Exception {
ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
getParser().parse(stream, new DefaultHandler());
stream.close();
}
@Test(dataProvider = "unsupported", expectedExceptions = SAXParseException.class)
public void testInvalid(String xml) throws Exception {
ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
getParser().parse(stream, new DefaultHandler());
stream.close();
}
@DataProvider(name = "supported")
private Object[][] supported() {
return new Object[][] {
{"<?xml version=\"1.0\"?><tag>\uD840\uDC0B</tag>"},
{"<?xml version=\"1.0\"?><!-- \uD840\uDC0B --><tag/>"},
{"<?xml version=\"1.1\"?><tag\uD840\uDC0B>in tag name</tag\uD840\uDC0B>"},
{"<?xml version=\"1.1\"?><tag attr\uD840\uDC0B=\"in attribute\">in attribute name</tag>"},
{"<?xml version=\"1.1\"?><tag>\uD840\uDC0B</tag>"},
{"<?xml version=\"1.1\"?><!-- \uD840\uDC0B --><dontCare/>"}
};
}
@DataProvider(name = "unsupported")
private Object[][] unsupported() {
return new Object[][] {
{"<?xml version=\"1.0\"?><tag\uD840\uDC0B>in tag name</tag\uD840\uDC0B>"},
{"<?xml version=\"1.0\"?><tag attr\uD840\uDC0B=\"in attribute\">in attribute name</tag>"}
};
}
private SAXParser getParser() {
SAXParser parser = null;
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
parser = factory.newSAXParser();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
return parser;
}
}