7183760: DocumentBuilder.parse(String uri) is not IPv6 enabled
Removing the hack of using escapeNonUSAscii. this is the same patch as 7166896 for 7u8. Reviewed-by: psandoz, lancea
This commit is contained in:
parent
4bdc9c4528
commit
fec1de491b
@ -602,7 +602,7 @@ protected static final String PARSER_SETTINGS =
|
||||
if (reader == null) {
|
||||
stream = xmlInputSource.getByteStream();
|
||||
if (stream == null) {
|
||||
URL location = new URL(escapeNonUSAscii(expandedSystemId));
|
||||
URL location = new URL(expandedSystemId);
|
||||
URLConnection connect = location.openConnection();
|
||||
if (!(connect instanceof HttpURLConnection)) {
|
||||
stream = connect.getInputStream();
|
||||
@ -2586,76 +2586,6 @@ protected static final String PARSER_SETTINGS =
|
||||
|
||||
} // fixURI(String):String
|
||||
|
||||
/**
|
||||
* Escape invalid URI characters.
|
||||
*
|
||||
* Passed a URI that contains invalid characters (like spaces, non-ASCII Unicode characters, and the like),
|
||||
* this function percent encodes the invalid characters per the URI specification (i.e., as a sequence of
|
||||
* %-encoded UTF-8 octets).
|
||||
*
|
||||
* N.B. There are two problems. If the URI contains a '%' character, that might be an indication that
|
||||
* the URI has already been escaped by the author, or it might be an invalid '%'. In the former case,
|
||||
* it's important not to escape it, or we'll wind up with invalid, doubly-escaped '%'s. In the latter,
|
||||
* the URI is broken if we don't encode it. Similarly, a '#' character might be the start of a fragment
|
||||
* identifier or it might be an invalid '#'.
|
||||
*
|
||||
* Given that the former is vastly more likely than the latter in each case (most users are familiar with
|
||||
* the magic status of '%' and '#' and they occur relatively infrequently in filenames, and if the user parses
|
||||
* a proper Java File, we will already have %-escaped the URI), we simply assume that %'s and #'s are legit.
|
||||
*
|
||||
* Very rarely, we may be wrong. If so, tell the user to fix the clearly broken URI.
|
||||
*/
|
||||
protected static String escapeNonUSAscii(String str) {
|
||||
if (str == null) {
|
||||
return str;
|
||||
}
|
||||
int len = str.length(), i=0, ch;
|
||||
for (; i < len; i++) {
|
||||
ch = str.charAt(i);
|
||||
// if it's not an ASCII 7 character, break here, and use UTF-8 encoding
|
||||
if (ch >= 128)
|
||||
break;
|
||||
}
|
||||
|
||||
// we saw no non-ascii-7 character
|
||||
if (i == len) {
|
||||
return str;
|
||||
}
|
||||
|
||||
// get UTF-8 bytes for the string
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
byte[] bytes = null;
|
||||
byte b;
|
||||
try {
|
||||
bytes = str.getBytes("UTF-8");
|
||||
} catch (java.io.UnsupportedEncodingException e) {
|
||||
// should never happen
|
||||
return str;
|
||||
}
|
||||
|
||||
len = bytes.length;
|
||||
|
||||
// for each byte
|
||||
for (i = 0; i < len; i++) {
|
||||
b = bytes[i];
|
||||
// for non-ascii character: make it positive, then escape
|
||||
if (b < 0) {
|
||||
ch = b + 256;
|
||||
buffer.append('%');
|
||||
buffer.append(gHexChs[ch >> 4]);
|
||||
buffer.append(gHexChs[ch & 0xf]);
|
||||
}
|
||||
else if (b != '%' && b != '#' && gNeedEscaping[b]) {
|
||||
buffer.append('%');
|
||||
buffer.append(gAfterEscaping1[b]);
|
||||
buffer.append(gAfterEscaping2[b]);
|
||||
}
|
||||
else {
|
||||
buffer.append((char)b);
|
||||
}
|
||||
}
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
//
|
||||
// Package visible methods
|
||||
|
Loading…
x
Reference in New Issue
Block a user