diff --git a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java
index ea0f2e2f74b..1ea57877573 100644
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java
+++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentFragmentScannerImpl.java
@@ -1394,7 +1394,12 @@ public class XMLDocumentFragmentScannerImpl
fEmptyElement = true;
return true;
} else if (!isValidNameStartChar(c) || !sawSpace) {
- reportFatalError("ElementUnterminated", new Object[]{fElementQName.rawname});
+ // Second chance. Check if this character is a high
+ // surrogate of a valid name start character.
+ if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
+ reportFatalError("ElementUnterminated",
+ new Object[]{fElementQName.rawname});
+ }
}
return false;
@@ -2606,40 +2611,38 @@ public class XMLDocumentFragmentScannerImpl
private void startOfMarkup() throws IOException {
fMarkupDepth++;
final int ch = fEntityScanner.peekChar();
-
- switch(ch){
- case '?' :{
- setScannerState(SCANNER_STATE_PI);
- fEntityScanner.skipChar(ch);
- break;
- }
- case '!' :{
- fEntityScanner.skipChar(ch);
- if (fEntityScanner.skipChar('-')) {
- if (!fEntityScanner.skipChar('-')) {
- reportFatalError("InvalidCommentStart",
+ if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) {
+ setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
+ } else {
+ switch(ch){
+ case '?' :{
+ setScannerState(SCANNER_STATE_PI);
+ fEntityScanner.skipChar(ch);
+ break;
+ }
+ case '!' :{
+ fEntityScanner.skipChar(ch);
+ if (fEntityScanner.skipChar('-')) {
+ if (!fEntityScanner.skipChar('-')) {
+ reportFatalError("InvalidCommentStart",
+ null);
+ }
+ setScannerState(SCANNER_STATE_COMMENT);
+ } else if (fEntityScanner.skipString(cdata)) {
+ setScannerState(SCANNER_STATE_CDATA );
+ } else if (!scanForDoctypeHook()) {
+ reportFatalError("MarkupNotRecognizedInContent",
null);
}
- setScannerState(SCANNER_STATE_COMMENT);
- } else if (fEntityScanner.skipString(cdata)) {
- setScannerState(SCANNER_STATE_CDATA );
- } else if (!scanForDoctypeHook()) {
- reportFatalError("MarkupNotRecognizedInContent",
- null);
+ break;
}
- break;
- }
- case '/' :{
- setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
- fEntityScanner.skipChar(ch);
- break;
- }
- default :{
- if (isValidNameStartChar(ch)) {
- setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
- } else {
- reportFatalError("MarkupNotRecognizedInContent",
- null);
+ case '/' :{
+ setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
+ fEntityScanner.skipChar(ch);
+ break;
+ }
+ default :{
+ reportFatalError("MarkupNotRecognizedInContent", null);
}
}
}
diff --git a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java
index e562457f85c..3d839652705 100644
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java
+++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLDocumentScannerImpl.java
@@ -847,9 +847,12 @@ public class XMLDocumentScannerImpl
case SCANNER_STATE_START_OF_MARKUP: {
fMarkupDepth++;
-
- if (fEntityScanner.skipChar('?')) {
- setScannerState(SCANNER_STATE_PI);
+ if (isValidNameStartChar(fEntityScanner.peekChar()) ||
+ isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
+ setScannerState(SCANNER_STATE_ROOT_ELEMENT);
+ setDriver(fContentDriver);
+ //from now onwards this would be handled by fContentDriver,in the same next() call
+ return fContentDriver.next();
} else if (fEntityScanner.skipChar('!')) {
if (fEntityScanner.skipChar('-')) {
if (!fEntityScanner.skipChar('-')) {
@@ -872,12 +875,8 @@ public class XMLDocumentScannerImpl
reportFatalError("MarkupNotRecognizedInProlog",
null);
}
- } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) {
- setScannerState(SCANNER_STATE_ROOT_ELEMENT);
- setDriver(fContentDriver);
- //from now onwards this would be handled by fContentDriver,in the same next() call
- return fContentDriver.next();
-
+ } else if (fEntityScanner.skipChar('?')) {
+ setScannerState(SCANNER_STATE_PI);
} else {
reportFatalError("MarkupNotRecognizedInProlog",
null);
@@ -1395,7 +1394,8 @@ public class XMLDocumentScannerImpl
} else if (fEntityScanner.skipChar('/')) {
reportFatalError("MarkupNotRecognizedInMisc",
null);
- } else if (XMLChar.isNameStart(fEntityScanner.peekChar())) {
+ } else if (isValidNameStartChar(fEntityScanner.peekChar()) ||
+ isValidNameStartHighSurrogate(fEntityScanner.peekChar())) {
reportFatalError("MarkupNotRecognizedInMisc",
null);
scanStartElement();
diff --git a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java
index 6316d4a3bfb..96864c3886d 100644
--- a/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java
+++ b/jaxp/src/java.xml/share/classes/com/sun/org/apache/xerces/internal/impl/XMLScanner.java
@@ -784,7 +784,7 @@ public abstract class XMLScanner
if (XMLChar.isHighSurrogate(c)) {
scanSurrogates(text);
}
- if (isInvalidLiteral(c)) {
+ else if (isInvalidLiteral(c)) {
reportFatalError("InvalidCharInComment",
new Object[] { Integer.toHexString(c) });
fEntityScanner.scanChar();
@@ -1385,6 +1385,14 @@ public abstract class XMLScanner
return (XMLChar.isNameStart(value));
} // isValidNameStartChar(int): boolean
+ // returns true if the given character is
+ // a valid high surrogate for a nameStartChar
+ // with respect to the version of XML understood
+ // by this scanner.
+ protected boolean isValidNameStartHighSurrogate(int value) {
+ return false;
+ } // isValidNameStartHighSurrogate(int): boolean
+
protected boolean versionSupported(String version ) {
return version.equals("1.0") || version.equals("1.1");
} // version Supported
diff --git a/jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java b/jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java
new file mode 100644
index 00000000000..7430e03f5c4
--- /dev/null
+++ b/jaxp/test/javax/xml/jaxp/unittest/parsers/SupplementaryChars.java
@@ -0,0 +1,67 @@
+package parsers;
+
+import java.io.ByteArrayInputStream;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import org.xml.sax.SAXParseException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * @bug 8072081
+ * @summary verifies that supplementary characters are supported as character
+ * data in xml 1.0, and also names in xml 1.1.
+ *
+ * Joe Wang (huizhe.wang@oracle.com)
+ */
+
+public class SupplementaryChars {
+
+ @Test(dataProvider = "supported")
+ public void test(String xml) throws Exception {
+ ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
+ getParser().parse(stream, new DefaultHandler());
+ stream.close();
+ }
+
+ @Test(dataProvider = "unsupported", expectedExceptions = SAXParseException.class)
+ public void testInvalid(String xml) throws Exception {
+ ByteArrayInputStream stream = new ByteArrayInputStream(xml.getBytes("UTF-8"));
+ getParser().parse(stream, new DefaultHandler());
+ stream.close();
+ }
+
+ @DataProvider(name = "supported")
+ private Object[][] supported() {
+
+ return new Object[][] {
+ {"\uD840\uDC0B"},
+ {""},
+ {"in tag name"},
+ {"in attribute name"},
+ {"\uD840\uDC0B"},
+ {""}
+ };
+ }
+
+ @DataProvider(name = "unsupported")
+ private Object[][] unsupported() {
+ return new Object[][] {
+ {"in tag name"},
+ {"in attribute name"}
+ };
+ }
+
+ private SAXParser getParser() {
+ SAXParser parser = null;
+ try {
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ parser = factory.newSAXParser();
+ } catch (Exception e) {
+ throw new RuntimeException(e.getMessage());
+ }
+ return parser;
+ }
+}