diff --git a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java index ea0f2e2f74b..1ea57877573 100644 --- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java +++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java @@ -1394,7 +1394,12 @@ public class XMLDocumentFragmentScannerImpl fEmptyElement = true; return true; } else if (!isValidNameStartChar(c) || !sawSpace) { - reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname}); + // Second chance. Check if this character is a high + // surrogate of a valid name start character. + if (!isValidNameStartHighSurrogate(c) || !sawSpace) { + reportFatalError("ElementUnterminated", + new Object[]{fElementQName.rawname}); + } } return false; @@ -2606,40 +2611,38 @@ public class XMLDocumentFragmentScannerImpl private void startOfMarkup() throws IOException { fMarkupDepth++; final int ch = fEntityScanner.peekChar(); - - switch(ch){ - case '?' :{ - setScannerState(SCANNER_STATE_PI); - fEntityScanner.skipChar(ch); - break; - } - case '!' :{ - fEntityScanner.skipChar(ch); - if (fEntityScanner.skipChar('-')) { - if (!fEntityScanner.skipChar('-')) { - reportFatalError("InvalidCommentStart", + if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { + setScannerState(SCANNER_STATE_START_ELEMENT_TAG); + } else { + switch(ch){ + case '?' :{ + setScannerState(SCANNER_STATE_PI); + fEntityScanner.skipChar(ch); + break; + } + case '!' :{ + fEntityScanner.skipChar(ch); + if (fEntityScanner.skipChar('-')) { + if (!fEntityScanner.skipChar('-')) { + reportFatalError("InvalidCommentStart", + null); + } + setScannerState(SCANNER_STATE_COMMENT); + } else if (fEntityScanner.skipString(cdata)) { + setScannerState(SCANNER_STATE_CDATA ); + } else if (!scanForDoctypeHook()) { + reportFatalError("MarkupNotRecognizedInContent", null); } - setScannerState(SCANNER_STATE_COMMENT); - } else if (fEntityScanner.skipString(cdata)) { - setScannerState(SCANNER_STATE_CDATA ); - } else if (!scanForDoctypeHook()) { - reportFatalError("MarkupNotRecognizedInContent", - null); + break; } - break; - } - case '/' :{ - setScannerState(SCANNER_STATE_END_ELEMENT_TAG); - fEntityScanner.skipChar(ch); - break; - } - default :{ - if (isValidNameStartChar(ch)) { - setScannerState(SCANNER_STATE_START_ELEMENT_TAG); - } else { - reportFatalError("MarkupNotRecognizedInContent", - null); + case '/' :{ + setScannerState(SCANNER_STATE_END_ELEMENT_TAG); + fEntityScanner.skipChar(ch); + break; + } + default :{ + reportFatalError("MarkupNotRecognizedInContent", null); } } } diff --git a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java index e562457f85c..3d839652705 100644 --- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java +++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java @@ -847,9 +847,12 @@ public class XMLDocumentScannerImpl case SCANNER_STATE_START_OF_MARKUP: { fMarkupDepth++; - - if (fEntityScanner.skipChar('?')) { - setScannerState(SCANNER_STATE_PI); + if (isValidNameStartChar(fEntityScanner.peekChar()) || + isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { + setScannerState(SCANNER_STATE_ROOT_ELEMENT); + setDriver(fContentDriver); + //from now onwards this would be handled by fContentDriver,in the same next() call + return fContentDriver.next(); } else if (fEntityScanner.skipChar('!')) { if (fEntityScanner.skipChar('-')) { if (!fEntityScanner.skipChar('-')) { @@ -872,12 +875,8 @@ public class XMLDocumentScannerImpl reportFatalError("MarkupNotRecognizedInProlog", null); } - } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { - setScannerState(SCANNER_STATE_ROOT_ELEMENT); - setDriver(fContentDriver); - //from now onwards this would be handled by fContentDriver,in the same next() call - return fContentDriver.next(); - + } else if (fEntityScanner.skipChar('?')) { + setScannerState(SCANNER_STATE_PI); } else { reportFatalError("MarkupNotRecognizedInProlog", null); @@ -1395,7 +1394,8 @@ public class XMLDocumentScannerImpl } else if (fEntityScanner.skipChar('/')) { reportFatalError("MarkupNotRecognizedInMisc", null); - } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) { + } else if (isValidNameStartChar(fEntityScanner.peekChar()) || + isValidNameStartHighSurrogate(fEntityScanner.peekChar())) { reportFatalError("MarkupNotRecognizedInMisc", null); scanStartElement(); diff --git a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java index 6316d4a3bfb..96864c3886d 100644 --- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java +++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java @@ -784,7 +784,7 @@ public abstract class XMLScanner if (XMLChar.isHighSurrogate(c)) { scanSurrogates(text); } - if (isInvalidLiteral(c)) { + else if (isInvalidLiteral(c)) { reportFatalError("InvalidCharInComment", new Object[] { Integer.toHexString(c) }); fEntityScanner.scanChar(); @@ -1385,6 +1385,14 @@ public abstract class XMLScanner return (XMLChar.isNameStart(value)); } // isValidNameStartChar(int): boolean + // returns true if the given character is + // a valid high surrogate for a nameStartChar + // with respect to the version of XML understood + // by this scanner. + protected boolean isValidNameStartHighSurrogate(int value) { + return false; + } // isValidNameStartHighSurrogate(int): boolean + protected boolean versionSupported(String version ) { return version.equals("1.0") || version.equals("1.1"); } // version Supported diff --git a/jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java b/jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java new file mode 100644 index 00000000000..7430e03f5c4 --- /dev/null +++ b/jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java @@ -0,0 +1,67 @@ +package parsers; + +import java.io.ByteArrayInputStream; +import javax.xml.parsers.SAXParser; +import javax.xml.parsers.SAXParserFactory; + +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; +import org.xml.sax.SAXParseException; +import org.xml.sax.helpers.DefaultHandler; + +/** + * @bug 8072081 + * @summary verifies that supplementary characters are supported as character + * data in xml 1.0, and also names in xml 1.1. + * + * Joe Wang (huizhe.wang@oracle.com) + */ + +public class SupplementaryChars { + + @Test(dataProvider = "supported") + public void test(String xml) throws Exception { + ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8")); + getParser().parse(stream, new DefaultHandler()); + stream.close(); + } + + @Test(dataProvider = "unsupported", expectedExceptions = SAXParseException.class) + public void testInvalid(String xml) throws Exception { + ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8")); + getParser().parse(stream, new DefaultHandler()); + stream.close(); + } + + @DataProvider(name = "supported") + private Object[][] supported() { + + return new Object[][] { + {"\uD840\uDC0B"}, + {""}, + {"in tag name"}, + {"in attribute name"}, + {"\uD840\uDC0B"}, + {""} + }; + } + + @DataProvider(name = "unsupported") + private Object[][] unsupported() { + return new Object[][] { + {"in tag name"}, + {"in attribute name"} + }; + } + + private SAXParser getParser() { + SAXParser parser = null; + try { + SAXParserFactory factory = SAXParserFactory.newInstance(); + parser = factory.newSAXParser(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage()); + } + return parser; + } +}