fe008ae27a
Reviewed-by: darcy, weijun
126 lines
4.4 KiB
Java
126 lines
4.4 KiB
Java
/*
|
|
* Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* This code is free software; you can redistribute it and/or modify it
|
|
* under the terms of the GNU General Public License version 2 only, as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
* version 2 for more details (a copy is included in the LICENSE file that
|
|
* accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU General Public License version
|
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
*
|
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
* or visit www.oracle.com if you need additional information or have any
|
|
* questions.
|
|
*/
|
|
|
|
/*
|
|
* @test
|
|
* @bug 4396708
|
|
* @summary Test URL encoder and decoder on a string that contains
|
|
* surrogate pairs.
|
|
*
|
|
*/
|
|
|
|
import java.io.*;
|
|
import java.net.*;
|
|
|
|
/*
|
|
* Surrogate pairs are two character Unicode sequences where the first
|
|
* character lies in the range [d800, dbff] and the second character lies
|
|
* in the range [dc00, dfff]. They are used as an escaping mechanism to add
|
|
* 1M more characters to Unicode.
|
|
*/
|
|
public class SurrogatePairs {
|
|
|
|
static String[] testStrings = {"\uD800\uDC00",
|
|
"\uD800\uDFFF",
|
|
"\uDBFF\uDC00",
|
|
"\uDBFF\uDFFF",
|
|
"1\uDBFF\uDC00",
|
|
"@\uDBFF\uDC00",
|
|
"\uDBFF\uDC001",
|
|
"\uDBFF\uDC00@",
|
|
"\u0101\uDBFF\uDC00",
|
|
"\uDBFF\uDC00\u0101"
|
|
};
|
|
|
|
static String[] correctEncodings = {"%F0%90%80%80",
|
|
"%F0%90%8F%BF",
|
|
"%F4%8F%B0%80",
|
|
"%F4%8F%BF%BF",
|
|
"1%F4%8F%B0%80",
|
|
"%40%F4%8F%B0%80",
|
|
"%F4%8F%B0%801",
|
|
"%F4%8F%B0%80%40",
|
|
"%C4%81%F4%8F%B0%80",
|
|
"%F4%8F%B0%80%C4%81"
|
|
};
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
|
for (int i=0; i < testStrings.length; i++) {
|
|
test(testStrings[i], correctEncodings[i]);
|
|
}
|
|
}
|
|
|
|
private static void test(String str, String correctEncoding)
|
|
throws Exception {
|
|
|
|
System.out.println("Unicode bytes of test string are: "
|
|
+ getHexBytes(str));
|
|
|
|
String encoded = URLEncoder.encode(str, "UTF-8");
|
|
|
|
System.out.println("URLEncoding is: " + encoded);
|
|
|
|
if (encoded.equals(correctEncoding))
|
|
System.out.println("The encoding is correct!");
|
|
else {
|
|
throw new Exception("The encoding is incorrect!" +
|
|
" It should be " + correctEncoding);
|
|
}
|
|
|
|
String decoded = URLDecoder.decode(encoded, "UTF-8");
|
|
|
|
System.out.println("Unicode bytes for URLDecoding are: "
|
|
+ getHexBytes(decoded));
|
|
|
|
if (str.equals(decoded))
|
|
System.out.println("The decoding is correct");
|
|
else {
|
|
throw new Exception("The decoded is not equal to the original");
|
|
}
|
|
System.out.println("---");
|
|
}
|
|
|
|
private static String getHexBytes(String s) throws Exception {
|
|
StringBuffer sb = new StringBuffer();
|
|
for (int i = 0; i < s.length(); i++) {
|
|
|
|
int a = s.charAt(i);
|
|
int b1 = (a >>8) & 0xff;
|
|
int b2 = (byte)a;
|
|
int b11 = (b1>>4) & 0x0f;
|
|
int b12 = b1 & 0x0f;
|
|
int b21 = (b2 >>4) & 0x0f;
|
|
int b22 = b2 & 0x0f;
|
|
|
|
sb.append(Integer.toHexString(b11));
|
|
sb.append(Integer.toHexString(b12));
|
|
sb.append(Integer.toHexString(b21));
|
|
sb.append(Integer.toHexString(b22));
|
|
sb.append(' ');
|
|
}
|
|
return sb.toString();
|
|
}
|
|
|
|
}
|