8043592: The basic XML parser based on UKit fails to read XML files encoded in UTF-16BE or LE
Reviewed-by: sherman, lancea
This commit is contained in:
parent
714a7e877e
commit
91d9371566
@ -2860,14 +2860,25 @@ public abstract class Parser {
|
|||||||
} else {
|
} else {
|
||||||
// Get encoding from BOM or the xml text decl.
|
// Get encoding from BOM or the xml text decl.
|
||||||
reader = bom(is.getByteStream(), ' ');
|
reader = bom(is.getByteStream(), ' ');
|
||||||
|
/**
|
||||||
|
* [#4.3.3] requires BOM for UTF-16, however, it's not uncommon
|
||||||
|
* that it may be missing. A mature technique exists in Xerces
|
||||||
|
* to further check for possible UTF-16 encoding
|
||||||
|
*/
|
||||||
|
if (reader == null) {
|
||||||
|
reader = utf16(is.getByteStream());
|
||||||
|
}
|
||||||
|
|
||||||
if (reader == null) {
|
if (reader == null) {
|
||||||
// Encoding is defined by the xml text decl.
|
// Encoding is defined by the xml text decl.
|
||||||
reader = enc("UTF-8", is.getByteStream());
|
reader = enc("UTF-8", is.getByteStream());
|
||||||
expenc = xml(reader);
|
expenc = xml(reader);
|
||||||
|
if (!expenc.equals("UTF-8")) {
|
||||||
if (expenc.startsWith("UTF-16")) {
|
if (expenc.startsWith("UTF-16")) {
|
||||||
panic(FAULT); // UTF-16 must have BOM [#4.3.3]
|
panic(FAULT); // UTF-16 must have BOM [#4.3.3]
|
||||||
}
|
}
|
||||||
reader = enc(expenc, is.getByteStream());
|
reader = enc(expenc, is.getByteStream());
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// Encoding is defined by the BOM.
|
// Encoding is defined by the BOM.
|
||||||
xml(reader);
|
xml(reader);
|
||||||
@ -2956,6 +2967,49 @@ public abstract class Parser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Using a mature technique from Xerces, this method checks further after
|
||||||
|
* the bom method above to see if the encoding is UTF-16
|
||||||
|
*
|
||||||
|
* @param is A byte stream of the entity.
|
||||||
|
* @return a reader, may be null
|
||||||
|
* @exception Exception is parser specific exception form panic method.
|
||||||
|
* @exception IOException
|
||||||
|
*/
|
||||||
|
private Reader utf16(InputStream is)
|
||||||
|
throws Exception {
|
||||||
|
if (mChIdx != 0) {
|
||||||
|
//The bom method has read ONE byte into the buffer.
|
||||||
|
byte b0 = (byte)mChars[0];
|
||||||
|
if (b0 == 0x00 || b0 == 0x3C) {
|
||||||
|
int b1 = is.read();
|
||||||
|
int b2 = is.read();
|
||||||
|
int b3 = is.read();
|
||||||
|
if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
|
||||||
|
// UTF-16, big-endian, no BOM
|
||||||
|
mChars[0] = (char)(b1);
|
||||||
|
mChars[mChIdx++] = (char)(b3);
|
||||||
|
return new ReaderUTF16(is, 'b');
|
||||||
|
} else if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
|
||||||
|
// UTF-16, little-endian, no BOM
|
||||||
|
mChars[0] = (char)(b0);
|
||||||
|
mChars[mChIdx++] = (char)(b2);
|
||||||
|
return new ReaderUTF16(is, 'l');
|
||||||
|
} else {
|
||||||
|
/**not every InputStream supports reset, so we have to remember
|
||||||
|
* the state for further parsing
|
||||||
|
**/
|
||||||
|
mChars[0] = (char)(b0);
|
||||||
|
mChars[mChIdx++] = (char)(b1);
|
||||||
|
mChars[mChIdx++] = (char)(b2);
|
||||||
|
mChars[mChIdx++] = (char)(b3);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Parses the xml text declaration.
|
* Parses the xml text declaration.
|
||||||
*
|
*
|
||||||
@ -2974,17 +3028,17 @@ public abstract class Parser {
|
|||||||
String enc = "UTF-8";
|
String enc = "UTF-8";
|
||||||
char ch;
|
char ch;
|
||||||
int val;
|
int val;
|
||||||
short st;
|
short st = 0;
|
||||||
// Read the xml text declaration into the buffer
|
int byteRead = mChIdx; //number of bytes read prior to entering this method
|
||||||
if (mChIdx != 0) {
|
|
||||||
// The bom method have read ONE char into the buffer.
|
|
||||||
st = (short) ((mChars[0] == '<') ? 1 : -1);
|
|
||||||
} else {
|
|
||||||
st = 0;
|
|
||||||
}
|
|
||||||
while (st >= 0 && mChIdx < mChars.length) {
|
while (st >= 0 && mChIdx < mChars.length) {
|
||||||
|
if (st < byteRead) {
|
||||||
|
ch = mChars[st];
|
||||||
|
} else {
|
||||||
ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
|
ch = ((val = reader.read()) >= 0) ? (char) val : EOS;
|
||||||
mChars[mChIdx++] = ch;
|
mChars[mChIdx++] = ch;
|
||||||
|
}
|
||||||
|
|
||||||
switch (st) {
|
switch (st) {
|
||||||
case 0: // read '<' of xml declaration
|
case 0: // read '<' of xml declaration
|
||||||
switch (ch) {
|
switch (ch) {
|
||||||
|
@ -32,6 +32,7 @@ import java.io.ByteArrayOutputStream;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.nio.file.DirectoryStream;
|
import java.nio.file.DirectoryStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
@ -47,6 +48,7 @@ import java.util.Properties;
|
|||||||
import java.util.PropertyPermission;
|
import java.util.PropertyPermission;
|
||||||
|
|
||||||
public class LoadAndStoreXML {
|
public class LoadAndStoreXML {
|
||||||
|
static final String bomChar = "\uFEFF";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simple policy implementation that grants a set of permissions to
|
* Simple policy implementation that grants a set of permissions to
|
||||||
@ -125,13 +127,14 @@ public class LoadAndStoreXML {
|
|||||||
* Sanity test that properties saved with Properties#storeToXML can be
|
* Sanity test that properties saved with Properties#storeToXML can be
|
||||||
* read with Properties#loadFromXML.
|
* read with Properties#loadFromXML.
|
||||||
*/
|
*/
|
||||||
static void testLoadAndStore(String encoding) throws IOException {
|
static void testLoadAndStore(String encoding, boolean appendBOM) throws IOException {
|
||||||
System.out.println("testLoadAndStore, encoding=" + encoding);
|
System.out.println("testLoadAndStore, encoding=" + encoding);
|
||||||
|
|
||||||
Properties props = new Properties();
|
Properties props = new Properties();
|
||||||
|
props.put("k0", "\u6C34");
|
||||||
props.put("k1", "foo");
|
props.put("k1", "foo");
|
||||||
props.put("k2", "bar");
|
props.put("k2", "bar");
|
||||||
props.put("k3", "\\u0020\\u0391\\u0392\\u0393\\u0394\\u0395\\u0396\\u0397");
|
props.put("k3", "\u0020\u0391\u0392\u0393\u0394\u0395\u0396\u0397");
|
||||||
props.put("k4", "\u7532\u9aa8\u6587");
|
props.put("k4", "\u7532\u9aa8\u6587");
|
||||||
props.put("k5", "<java.home>/lib/jaxp.properties");
|
props.put("k5", "<java.home>/lib/jaxp.properties");
|
||||||
|
|
||||||
@ -141,7 +144,17 @@ public class LoadAndStoreXML {
|
|||||||
throw new RuntimeException("OutputStream closed by storeToXML");
|
throw new RuntimeException("OutputStream closed by storeToXML");
|
||||||
|
|
||||||
Properties p = new Properties();
|
Properties p = new Properties();
|
||||||
TestInputStream in = new TestInputStream(out.toByteArray());
|
TestInputStream in;
|
||||||
|
if (appendBOM) {
|
||||||
|
byte[] byteOrderMark = bomChar.getBytes(Charset.forName(encoding));
|
||||||
|
byte[] outArray = out.toByteArray();
|
||||||
|
byte[] inputArray = new byte[byteOrderMark.length + outArray.length];
|
||||||
|
System.arraycopy(byteOrderMark, 0, inputArray, 0, byteOrderMark.length);
|
||||||
|
System.arraycopy(outArray, 0, inputArray, byteOrderMark.length, outArray.length);
|
||||||
|
in = new TestInputStream(inputArray);
|
||||||
|
} else {
|
||||||
|
in = new TestInputStream(out.toByteArray());
|
||||||
|
}
|
||||||
p.loadFromXML(in);
|
p.loadFromXML(in);
|
||||||
if (in.isOpen())
|
if (in.isOpen())
|
||||||
throw new RuntimeException("InputStream not closed by loadFromXML");
|
throw new RuntimeException("InputStream not closed by loadFromXML");
|
||||||
@ -231,8 +244,12 @@ public class LoadAndStoreXML {
|
|||||||
|
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
|
|
||||||
testLoadAndStore("UTF-8");
|
testLoadAndStore("UTF-8", false);
|
||||||
testLoadAndStore("UTF-16");
|
testLoadAndStore("UTF-16", false);
|
||||||
|
testLoadAndStore("UTF-16BE", false);
|
||||||
|
testLoadAndStore("UTF-16LE", false);
|
||||||
|
testLoadAndStore("UTF-16BE", true);
|
||||||
|
testLoadAndStore("UTF-16LE", true);
|
||||||
testLoadWithoutEncoding();
|
testLoadWithoutEncoding();
|
||||||
testLoadWithBadEncoding();
|
testLoadWithBadEncoding();
|
||||||
testStoreWithBadEncoding();
|
testStoreWithBadEncoding();
|
||||||
@ -250,7 +267,7 @@ public class LoadAndStoreXML {
|
|||||||
Policy.setPolicy(p);
|
Policy.setPolicy(p);
|
||||||
System.setSecurityManager(new SecurityManager());
|
System.setSecurityManager(new SecurityManager());
|
||||||
try {
|
try {
|
||||||
testLoadAndStore("UTF-8");
|
testLoadAndStore("UTF-8", false);
|
||||||
} finally {
|
} finally {
|
||||||
// turn off security manager and restore policy
|
// turn off security manager and restore policy
|
||||||
System.setSecurityManager(null);
|
System.setSecurityManager(null);
|
||||||
|
Loading…
Reference in New Issue
Block a user