8072081: Supplementary characters are rejected in comments

Reviewed-by: lancea
This commit is contained in:
Joe Wang 2015-12-09 21:40:44 -08:00
parent 60e4fa0770
commit bed7e98034
4 changed files with 121 additions and 43 deletions

View File

@ -1394,7 +1394,12 @@ public class XMLDocumentFragmentScannerImpl
fEmptyElement = true;
return true;
} else if (!isValidNameStartChar(c) || !sawSpace) {
reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname});
// Second chance. Check if this character is a high
// surrogate of a valid name start character.
if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
reportFatalError("ElementUnterminated",
new Object[]{fElementQName.rawname});
}
}
return false;
@ -2606,40 +2611,38 @@ public class XMLDocumentFragmentScannerImpl
private void startOfMarkup() throws IOException {
fMarkupDepth++;
final int ch = fEntityScanner.peekChar();
switch(ch){
case '?' :{
setScannerState(SCANNER_STATE_PI);
fEntityScanner.skipChar(ch);
break;
}
case '!' :{
fEntityScanner.skipChar(ch);
if (fEntityScanner.skipChar('-')) {
if (!fEntityScanner.skipChar('-')) {
reportFatalError("InvalidCommentStart",
if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) {
setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
} else {
switch(ch){
case '?' :{
setScannerState(SCANNER_STATE_PI);
fEntityScanner.skipChar(ch);
break;
}
case '!' :{
fEntityScanner.skipChar(ch);
if (fEntityScanner.skipChar('-')) {
if (!fEntityScanner.skipChar('-')) {
reportFatalError("InvalidCommentStart",
null);
}
setScannerState(SCANNER_STATE_COMMENT);
} else if (fEntityScanner.skipString(cdata)) {
setScannerState(SCANNER_STATE_CDATA );
} else if (!scanForDoctypeHook()) {
reportFatalError("MarkupNotRecognizedInContent",
null);
}
setScannerState(SCANNER_STATE_COMMENT);
} else if (fEntityScanner.skipString(cdata)) {
setScannerState(SCANNER_STATE_CDATA );
} else if (!scanForDoctypeHook()) {
reportFatalError("MarkupNotRecognizedInContent",
null);
break;
}
break;
}
case '/' :{
setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
fEntityScanner.skipChar(ch);
break;
}
default :{
if (isValidNameStartChar(ch)) {
setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
} else {
reportFatalError("MarkupNotRecognizedInContent",
null);
case '/' :{
setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
fEntityScanner.skipChar(ch);
break;
}
default :{
reportFatalError("MarkupNotRecognizedInContent", null);
}
}
}

View File

@ -847,9 +847,12 @@ public class XMLDocumentScannerImpl
case SCANNER_STATE_START_OF_MARKUP: {
fMarkupDepth++;
if (fEntityScanner.skipChar('?')) {
setScannerState(SCANNER_STATE_PI);
if (isValidNameStartChar(fEntityScanner.peekChar()) ||
isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
setScannerState(SCANNER_STATE_ROOT_ELEMENT);
setDriver(fContentDriver);
//from now onwards this would be handled by fContentDriver,in the same next() call
return fContentDriver.next();
} else if (fEntityScanner.skipChar('!')) {
if (fEntityScanner.skipChar('-')) {
if (!fEntityScanner.skipChar('-')) {
@ -872,12 +875,8 @@ public class XMLDocumentScannerImpl
reportFatalError("MarkupNotRecognizedInProlog",
null);
}
} else if (XMLChar.isNameStart(fEntityScanner.peekChar())) {
setScannerState(SCANNER_STATE_ROOT_ELEMENT);
setDriver(fContentDriver);
//from now onwards this would be handled by fContentDriver,in the same next() call
return fContentDriver.next();
} else if (fEntityScanner.skipChar('?')) {
setScannerState(SCANNER_STATE_PI);
} else {
reportFatalError("MarkupNotRecognizedInProlog",
null);
@ -1395,7 +1394,8 @@ public class XMLDocumentScannerImpl
} else if (fEntityScanner.skipChar('/')) {
reportFatalError("MarkupNotRecognizedInMisc",
null);
} else if (XMLChar.isNameStart(fEntityScanner.peekChar())) {
} else if (isValidNameStartChar(fEntityScanner.peekChar()) ||
isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
reportFatalError("MarkupNotRecognizedInMisc",
null);
scanStartElement();

View File

@ -784,7 +784,7 @@ public abstract class XMLScanner
if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(text);
}
if (isInvalidLiteral(c)) {
else if (isInvalidLiteral(c)) {
reportFatalError("InvalidCharInComment",
new Object[] { Integer.toHexString(c) });
fEntityScanner.scanChar();
@ -1385,6 +1385,14 @@ public abstract class XMLScanner
return (XMLChar.isNameStart(value));
} // isValidNameStartChar(int): boolean
// returns true if the given character is
// a valid high surrogate for a nameStartChar
// with respect to the version of XML understood
// by this scanner.
protected boolean isValidNameStartHighSurrogate(int value) {
return false;
} // isValidNameStartHighSurrogate(int): boolean
protected boolean versionSupported(String version ) {
return version.equals("1.0") || version.equals("1.1");
} // version Supported

View File

@ -0,0 +1,67 @@
package parsers;
import java.io.ByteArrayInputStream;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
/**
* @bug 8072081
* @summary verifies that supplementary characters are supported as character
* data in xml 1.0, and also names in xml 1.1.
*
* Joe Wang (huizhe.wang@oracle.com)
*/
public class SupplementaryChars {
@Test(dataProvider = "supported")
public void test(String xml) throws Exception {
ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
getParser().parse(stream, new DefaultHandler());
stream.close();
}
@Test(dataProvider = "unsupported", expectedExceptions = SAXParseException.class)
public void testInvalid(String xml) throws Exception {
ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
getParser().parse(stream, new DefaultHandler());
stream.close();
}
@DataProvider(name = "supported")
private Object[][] supported() {
return new Object[][] {
{"<?xml version=\"1.0\"?><tag>\uD840\uDC0B</tag>"},
{"<?xml version=\"1.0\"?><!-- \uD840\uDC0B --><tag/>"},
{"<?xml version=\"1.1\"?><tag\uD840\uDC0B>in tag name</tag\uD840\uDC0B>"},
{"<?xml version=\"1.1\"?><tag attr\uD840\uDC0B=\"in attribute\">in attribute name</tag>"},
{"<?xml version=\"1.1\"?><tag>\uD840\uDC0B</tag>"},
{"<?xml version=\"1.1\"?><!-- \uD840\uDC0B --><dontCare/>"}
};
}
@DataProvider(name = "unsupported")
private Object[][] unsupported() {
return new Object[][] {
{"<?xml version=\"1.0\"?><tag\uD840\uDC0B>in tag name</tag\uD840\uDC0B>"},
{"<?xml version=\"1.0\"?><tag attr\uD840\uDC0B=\"in attribute\">in attribute name</tag>"}
};
}
private SAXParser getParser() {
SAXParser parser = null;
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
parser = factory.newSAXParser();
} catch (Exception e) {
throw new RuntimeException(e.getMessage());
}
return parser;
}
}