6928542: Chinese characters in RTF are not decoded
Reviewed-by: prr, psadhukhan
This commit is contained in:
parent
4e8c0364a2
commit
a26f7c03c7
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -26,6 +26,10 @@ package javax.swing.text.rtf;
|
|||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.lang.*;
|
import java.lang.*;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
import java.nio.charset.CoderResult;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax
|
* <b>RTFParser</b> is a subclass of <b>AbstractFilter</b> which understands basic RTF syntax
|
||||||
@ -69,6 +73,11 @@ abstract class RTFParser extends AbstractFilter
|
|||||||
|
|
||||||
private final int S_inblob = 6; // in a \bin blob
|
private final int S_inblob = 6; // in a \bin blob
|
||||||
|
|
||||||
|
// For fcharset control word
|
||||||
|
protected CharsetDecoder decoder = null;
|
||||||
|
private byte[] ba = new byte[2];
|
||||||
|
protected ByteBuffer decoderBB = ByteBuffer.wrap(ba);
|
||||||
|
|
||||||
/** Implemented by subclasses to interpret a parameter-less RTF keyword.
|
/** Implemented by subclasses to interpret a parameter-less RTF keyword.
|
||||||
* The keyword is passed without the leading '/' or any delimiting
|
* The keyword is passed without the leading '/' or any delimiting
|
||||||
* whitespace. */
|
* whitespace. */
|
||||||
@ -100,6 +109,9 @@ abstract class RTFParser extends AbstractFilter
|
|||||||
rtfSpecialsTable['\\'] = true;
|
rtfSpecialsTable['\\'] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Defined for replacement character
|
||||||
|
static final char REPLACEMENT_CHAR = '\uFFFD';
|
||||||
|
|
||||||
public RTFParser()
|
public RTFParser()
|
||||||
{
|
{
|
||||||
currentCharacters = new StringBuffer();
|
currentCharacters = new StringBuffer();
|
||||||
@ -109,6 +121,9 @@ abstract class RTFParser extends AbstractFilter
|
|||||||
//warnings = System.out;
|
//warnings = System.out;
|
||||||
|
|
||||||
specialsTable = rtfSpecialsTable;
|
specialsTable = rtfSpecialsTable;
|
||||||
|
// Initialize byte buffer for CharsetDecoder
|
||||||
|
decoderBB.clear();
|
||||||
|
decoderBB.limit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Handle wrapup at end of file correctly.
|
// TODO: Handle wrapup at end of file correctly.
|
||||||
@ -182,6 +197,9 @@ abstract class RTFParser extends AbstractFilter
|
|||||||
}
|
}
|
||||||
state = S_backslashed;
|
state = S_backslashed;
|
||||||
} else {
|
} else {
|
||||||
|
// SBCS: ASCII character
|
||||||
|
// DBCS: Non lead byte
|
||||||
|
ch = decode(ch);
|
||||||
currentCharacters.append(ch);
|
currentCharacters.append(ch);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -301,7 +319,9 @@ abstract class RTFParser extends AbstractFilter
|
|||||||
if (Character.digit(ch, 16) != -1)
|
if (Character.digit(ch, 16) != -1)
|
||||||
{
|
{
|
||||||
pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16);
|
pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16);
|
||||||
ch = translationTable[pendingCharacter];
|
// Use translationTable if decoder is not defined
|
||||||
|
ch = decoder == null ? translationTable[pendingCharacter]
|
||||||
|
: decode((char)pendingCharacter);
|
||||||
if (ch != 0)
|
if (ch != 0)
|
||||||
handleText(ch);
|
handleText(ch);
|
||||||
}
|
}
|
||||||
@ -360,4 +380,37 @@ abstract class RTFParser extends AbstractFilter
|
|||||||
super.close();
|
super.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For fcharset control word
|
||||||
|
private char[] ca = new char[1];
|
||||||
|
private CharBuffer decoderCB = CharBuffer.wrap(ca);
|
||||||
|
|
||||||
|
private char decode(char ch) {
|
||||||
|
if (decoder == null) return ch;
|
||||||
|
decoderBB.put((byte) ch);
|
||||||
|
decoderBB.rewind();
|
||||||
|
decoderCB.clear();
|
||||||
|
CoderResult cr = decoder.decode(decoderBB, decoderCB, false);
|
||||||
|
if (cr.isUnderflow()) {
|
||||||
|
if (decoderCB.position() == 1) {
|
||||||
|
// Converted to Unicode (including replacement character)
|
||||||
|
decoder.reset();
|
||||||
|
decoderBB.clear();
|
||||||
|
decoderBB.limit(1);
|
||||||
|
return ca[0];
|
||||||
|
} else {
|
||||||
|
// Detected lead byte
|
||||||
|
decoder.reset();
|
||||||
|
decoderBB.limit(2);
|
||||||
|
decoderBB.position(1);
|
||||||
|
return 0; // Skip write operation if return value is 0
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback, should not be called
|
||||||
|
decoder.reset();
|
||||||
|
decoderBB.clear();
|
||||||
|
decoderBB.limit(1);
|
||||||
|
return REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -32,6 +32,11 @@ import java.io.InputStream;
|
|||||||
import java.io.InputStreamReader;
|
import java.io.InputStreamReader;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.StreamTokenizer;
|
import java.io.StreamTokenizer;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.CharBuffer;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
|
import java.nio.charset.CharsetDecoder;
|
||||||
|
import java.nio.charset.CodingErrorAction;
|
||||||
import java.security.AccessController;
|
import java.security.AccessController;
|
||||||
import java.security.PrivilegedAction;
|
import java.security.PrivilegedAction;
|
||||||
import java.util.Dictionary;
|
import java.util.Dictionary;
|
||||||
@ -87,6 +92,10 @@ class RTFReader extends RTFParser
|
|||||||
|
|
||||||
/** This Dictionary maps Integer font numbers to String font names. */
|
/** This Dictionary maps Integer font numbers to String font names. */
|
||||||
Dictionary<Integer, String> fontTable;
|
Dictionary<Integer, String> fontTable;
|
||||||
|
/** This Dictionary maps Integer font numbers to Charset font charset. */
|
||||||
|
Dictionary<Integer, Charset> fcharsetTable;
|
||||||
|
/** This Dictionary maps String font charset to String code page. */
|
||||||
|
static Dictionary<String, String> fcharsetToCP = null;
|
||||||
/** This array maps color indices to Color objects. */
|
/** This array maps color indices to Color objects. */
|
||||||
Color[] colorTable;
|
Color[] colorTable;
|
||||||
/** This Map maps character style numbers to Style objects. */
|
/** This Map maps character style numbers to Style objects. */
|
||||||
@ -133,6 +142,7 @@ class RTFReader extends RTFParser
|
|||||||
textKeywords.put("emspace", "\u2003");
|
textKeywords.put("emspace", "\u2003");
|
||||||
textKeywords.put("endash", "\u2013");
|
textKeywords.put("endash", "\u2013");
|
||||||
textKeywords.put("enspace", "\u2002");
|
textKeywords.put("enspace", "\u2002");
|
||||||
|
textKeywords.put("line", "\n");
|
||||||
textKeywords.put("ldblquote", "\u201C");
|
textKeywords.put("ldblquote", "\u201C");
|
||||||
textKeywords.put("lquote", "\u2018");
|
textKeywords.put("lquote", "\u2018");
|
||||||
textKeywords.put("ltrmark", "\u200E");
|
textKeywords.put("ltrmark", "\u200E");
|
||||||
@ -159,7 +169,50 @@ class RTFReader extends RTFParser
|
|||||||
defineCharacterSet("ansicpg", latin1TranslationTable);
|
defineCharacterSet("ansicpg", latin1TranslationTable);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: per-font font encodings ( \fcharset control word ) ? */
|
/**
|
||||||
|
* Windows font charset
|
||||||
|
*/
|
||||||
|
private static final int ANSI_CHARSET = 0;
|
||||||
|
private static final int DEFAULT_CHARSET = 1;
|
||||||
|
private static final int SYMBOL_CHARSET = 2;
|
||||||
|
private static final int MAC_CHARSET = 77;
|
||||||
|
private static final int SHIFTJIS_CHARSET = 128;
|
||||||
|
private static final int HANGUL_CHARSET = 129;
|
||||||
|
private static final int JOHAB_CHARSET = 130;
|
||||||
|
private static final int GB2312_CHARSET = 134;
|
||||||
|
private static final int CHINESEBIG5_CHARSET = 136;
|
||||||
|
private static final int GREEK_CHARSET = 161;
|
||||||
|
private static final int TURKISH_CHARSET = 162;
|
||||||
|
private static final int VIETNAMESE_CHARSET = 163;
|
||||||
|
private static final int HEBREW_CHARSET = 177;
|
||||||
|
private static final int ARABIC_CHARSET = 178;
|
||||||
|
private static final int BALTIC_CHARSET = 186;
|
||||||
|
private static final int RUSSIAN_CHARSET = 204;
|
||||||
|
private static final int THAI_CHARSET = 222;
|
||||||
|
private static final int EASTEUROPE_CHARSET = 238;
|
||||||
|
private static final int OEM_CHARSET = 255;
|
||||||
|
|
||||||
|
static {
|
||||||
|
fcharsetToCP = new Hashtable<String, String>();
|
||||||
|
fcharsetToCP.put("fcharset" + ANSI_CHARSET, "windows-1252");
|
||||||
|
fcharsetToCP.put("fcharset" + SHIFTJIS_CHARSET, "ms932");
|
||||||
|
fcharsetToCP.put("fcharset" + HANGUL_CHARSET, "ms949");
|
||||||
|
fcharsetToCP.put("fcharset" + JOHAB_CHARSET, "ms1361");
|
||||||
|
fcharsetToCP.put("fcharset" + GB2312_CHARSET, "ms936");
|
||||||
|
fcharsetToCP.put("fcharset" + CHINESEBIG5_CHARSET, "ms950");
|
||||||
|
fcharsetToCP.put("fcharset" + GREEK_CHARSET, "windows-1253");
|
||||||
|
fcharsetToCP.put("fcharset" + TURKISH_CHARSET, "windows-1254");
|
||||||
|
fcharsetToCP.put("fcharset" + VIETNAMESE_CHARSET, "windows-1258");
|
||||||
|
fcharsetToCP.put("fcharset" + HEBREW_CHARSET, "windows-1255");
|
||||||
|
fcharsetToCP.put("fcharset" + ARABIC_CHARSET, "windows-1256");
|
||||||
|
fcharsetToCP.put("fcharset" + BALTIC_CHARSET, "windows-1257");
|
||||||
|
fcharsetToCP.put("fcharset" + RUSSIAN_CHARSET, "windows-1251");
|
||||||
|
fcharsetToCP.put("fcharset" + THAI_CHARSET, "ms874");
|
||||||
|
fcharsetToCP.put("fcharset" + EASTEUROPE_CHARSET, "windows-1250");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defined for replacement character
|
||||||
|
private static final String REPLACEMENT_CHAR = "\uFFFD";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new RTFReader instance. Text will be sent to
|
* Creates a new RTFReader instance. Text will be sent to
|
||||||
@ -174,6 +227,7 @@ public RTFReader(StyledDocument destination)
|
|||||||
target = destination;
|
target = destination;
|
||||||
parserState = new Hashtable<Object, Object>();
|
parserState = new Hashtable<Object, Object>();
|
||||||
fontTable = new Hashtable<Integer, String>();
|
fontTable = new Hashtable<Integer, String>();
|
||||||
|
fcharsetTable = new Hashtable<Integer, Charset>();
|
||||||
|
|
||||||
rtfversion = -1;
|
rtfversion = -1;
|
||||||
|
|
||||||
@ -762,6 +816,25 @@ class FonttblDestination implements Destination
|
|||||||
nextFontNumber = parameter;
|
nextFontNumber = parameter;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
// For fcharset control word
|
||||||
|
if (keyword.equals("fcharset")) {
|
||||||
|
String fcharset = keyword+parameter;
|
||||||
|
String csName = fcharsetToCP.get(fcharset);
|
||||||
|
Charset cs;
|
||||||
|
if (csName != null) {
|
||||||
|
try {
|
||||||
|
cs = Charset.forName(csName);
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
// Fallback, should not be called
|
||||||
|
cs = ISO_8859_1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Fallback, fcharset control word number is not defined
|
||||||
|
cs = ISO_8859_1;
|
||||||
|
}
|
||||||
|
fcharsetTable.put(nextFontNumber, cs);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1216,6 +1289,25 @@ abstract class AttributeTrackingDestination implements Destination
|
|||||||
|
|
||||||
if (keyword.equals("f")) {
|
if (keyword.equals("f")) {
|
||||||
parserState.put(keyword, Integer.valueOf(parameter));
|
parserState.put(keyword, Integer.valueOf(parameter));
|
||||||
|
|
||||||
|
// Check lead byte is stored or not
|
||||||
|
if (decoderBB.position() == 1) {
|
||||||
|
handleText(REPLACEMENT_CHAR);
|
||||||
|
}
|
||||||
|
// Reset decoder byte buffer
|
||||||
|
decoderBB.clear();
|
||||||
|
decoderBB.limit(1);
|
||||||
|
// Check fcharset is used or not
|
||||||
|
Charset cs = fcharsetTable.get(parameter);
|
||||||
|
if (cs != null) {
|
||||||
|
decoder = cs.newDecoder();
|
||||||
|
decoder.onMalformedInput(CodingErrorAction.REPLACE)
|
||||||
|
.onUnmappableCharacter(CodingErrorAction.REPLACE);
|
||||||
|
} else {
|
||||||
|
// fcharset is not used, use translationTable
|
||||||
|
decoder = null;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (keyword.equals("cf")) {
|
if (keyword.equals("cf")) {
|
||||||
@ -1610,6 +1702,12 @@ abstract class TextHandlingDestination
|
|||||||
|
|
||||||
if (keyword.equals("par")) {
|
if (keyword.equals("par")) {
|
||||||
// warnings.println("Ending paragraph.");
|
// warnings.println("Ending paragraph.");
|
||||||
|
// Check lead byte is stored or not
|
||||||
|
if (decoderBB.position() == 1) {
|
||||||
|
handleText(REPLACEMENT_CHAR);
|
||||||
|
decoderBB.clear();
|
||||||
|
decoderBB.limit(1);
|
||||||
|
}
|
||||||
endParagraph();
|
endParagraph();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
131
test/jdk/javax/swing/text/rtf/RTFReadFontCharsetTest.java
Normal file
131
test/jdk/javax/swing/text/rtf/RTFReadFontCharsetTest.java
Normal file
@ -0,0 +1,131 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 6928542
|
||||||
|
* @summary Verify RTFEditorKit.read() with fcharset
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import javax.swing.text.Document;
|
||||||
|
import javax.swing.text.Element;
|
||||||
|
import javax.swing.text.rtf.RTFEditorKit;
|
||||||
|
|
||||||
|
import static java.nio.charset.StandardCharsets.ISO_8859_1;
|
||||||
|
|
||||||
|
public class RTFReadFontCharsetTest {
|
||||||
|
public static void main(String[] args) throws Exception {
|
||||||
|
String s =
|
||||||
|
"{\\rtf1\\fbidis\\ansi\\ansicpg932\\deff0\\nouicomp" +
|
||||||
|
"at\\deflang1033\\deflangfe1041{\\fonttbl{\\f0\\fni" +
|
||||||
|
"l\\fcharset0 Segoe UI;}{\\f1\\fnil\\fcharset128 Yu" +
|
||||||
|
" Gothic UI;}{\\f2\\fswiss\\fprq2\\fcharset129 Malg" +
|
||||||
|
"un Gothic;}{\\f3\\fnil\\fcharset134 Microsoft YaHe" +
|
||||||
|
"i;}{\\f4\\fnil\\fcharset136 Microsoft JhengHei;}{\\" +
|
||||||
|
"f5\\fnil\\fcharset161 Segoe UI;}{\\f6\\fnil\\fcha" +
|
||||||
|
"rset162 Segoe UI;}{\\f7\\fnil\\fcharset163 Segoe U" +
|
||||||
|
"I;}{\\f8\\fnil\\fcharset177 Segoe UI;}{\\f9\\fnil\\" +
|
||||||
|
"fcharset178 Segoe UI;}{\\f10\\fnil\\fcharset186 S" +
|
||||||
|
"egoe UI;}{\\f11\\fnil\\fcharset204 Segoe UI;}{\\f1" +
|
||||||
|
"2\\fnil\\fcharset222 Leelawadee UI;}{\\f13\\fnil\\" +
|
||||||
|
"fcharset0 Leelawadee UI;}{\\f14\\fnil\\fcharset238" +
|
||||||
|
" Segoe UI;}}\r\n{\\*\\generator Riched20 10.0.1904" +
|
||||||
|
"1}\\viewkind4\\uc1 \r\n\\pard\\ltrpar\\nowidctlpar" +
|
||||||
|
"\\sa200\\sl276\\slmult1\\f0\\fs22\\lang1041 Gr\\'f" +
|
||||||
|
"cezi - Switzerland 0\\line\\f1\\'82\\'b1\\'82\\'f" +
|
||||||
|
"1\\'82\\'c9\\'82\\'bf\\'82\\'cd - Japanese 128\\li" +
|
||||||
|
"ne\\f2\\lang17\\'be\\'c8\\'b3\\'e7\\'c7\\'cf\\'bc\\" +
|
||||||
|
"'bc\\'bf\\'e4\\lang1041 - Korean 129\\line\\kern" +
|
||||||
|
"ing2\\f3\\lang1033\\'c4\\'e3\\'ba\\'c3 - China 134" +
|
||||||
|
"\\line\\f4\\'bb\\'4f\\'c6\\'57 - Traditional Chine" +
|
||||||
|
"se - Taiwan 136\\line\\kerning0\\f5\\lang17\\'e3\\" +
|
||||||
|
"'e5\\'e9\\'e1 \\'f3\\'ef\\'f5 - Greek\\f0\\lang104" +
|
||||||
|
"1 161\\line\\f6\\lang17 A\\'f0a\\'e7 - \\f0 Turki" +
|
||||||
|
"sh (Tree) 162\\line\\f7\\'fe\\f0\\lang1041 \\lang" +
|
||||||
|
"1033 - \\lang17 Vietnam currency\\lang1041 163\\l" +
|
||||||
|
"ine\\f8\\rtlch\\lang17\\'f9\\'c8\\'d1\\'ec\\'e5\\'" +
|
||||||
|
"c9\\'ed\\f0\\ltrch - Hebrew 177\\line\\f9\\rtlch\\" +
|
||||||
|
"lang1025\\'e3\\'d1\\'cd\\'c8\\'c7\\f0\\ltrch\\lan" +
|
||||||
|
"g17 - Arabic 178\\line\\kerning2\\f10\\lang1033 A" +
|
||||||
|
"\\'e8i\\'fb - Lithuanian (Thank you) 186\\kerning0" +
|
||||||
|
"\\f0\\lang1041\\line\\kerning2\\f11\\lang1049\\'c7" +
|
||||||
|
"\\'e4\\'f0\\'e0\\'e2\\'f1\\'f2\\'e2\\'f3\\'e9\\'f2" +
|
||||||
|
"\\'e5\\f0\\lang1033 - Russian 204\\line\\kerning0" +
|
||||||
|
"\\f12\\lang1054\\'ca\\'c7\\'d1\\'ca\\'b4\\'d5 \\f1" +
|
||||||
|
"3\\lang1033 - Thailand 222\\line\\kerning2\\f14 cz" +
|
||||||
|
"e\\'9c\\'e6 - Polish 238\\par\r\n}\r\n\u0000";
|
||||||
|
String expected =
|
||||||
|
"Gr\u00fcezi - Switzerland 0\n" +
|
||||||
|
"\u3053\u3093\u306b\u3061\u306f - Japanese 128\n" +
|
||||||
|
"\uc548\ub155\ud558\uc138\uc694 - Korean 129\n" +
|
||||||
|
"\u4f60\u597d - China 134\n" +
|
||||||
|
"\u81fa\u7063 - Traditional Chinese - Taiwan 136\n" +
|
||||||
|
"\u03b3\u03b5\u03b9\u03b1 \u03c3\u03bf\u03c5 - Greek 161\n" +
|
||||||
|
"A\u011fa\u00e7 - Turkish (Tree) 162\n" +
|
||||||
|
"\u20ab - Vietnam currency 163\n" +
|
||||||
|
"\u05e9\u05b8\u05c1\u05dc\u05d5\u05b9\u05dd - Hebrew 177\n" +
|
||||||
|
"\u0645\u0631\u062d\u0628\u0627 - Arabic 178\n" +
|
||||||
|
"A\u010di\u016b - Lithuanian (Thank you) 186\n" +
|
||||||
|
"\u0417\u0434\u0440\u0430\u0432\u0441\u0442" +
|
||||||
|
"\u0432\u0443\u0439\u0442\u0435 - Russian 204\n" +
|
||||||
|
"\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35 - Thailand 222\n" +
|
||||||
|
"cze\u015b\u0107 - Polish 238\n" +
|
||||||
|
"\n";
|
||||||
|
ByteArrayInputStream bais = new ByteArrayInputStream(
|
||||||
|
s.getBytes(ISO_8859_1));
|
||||||
|
InputStreamReader isr = new InputStreamReader(bais, ISO_8859_1);
|
||||||
|
RTFEditorKit kit = new RTFEditorKit();
|
||||||
|
Document doc = kit.createDefaultDocument();
|
||||||
|
kit.read(isr, doc, 0);
|
||||||
|
Element elem = doc.getDefaultRootElement();
|
||||||
|
int elemStart = elem.getStartOffset();
|
||||||
|
int elemEnd = elem.getEndOffset();
|
||||||
|
String text = doc.getText(elemStart, elemEnd - elemStart);
|
||||||
|
if (!expected.equals(text)) {
|
||||||
|
System.err.println("Read data");
|
||||||
|
System.err.println("=========");
|
||||||
|
dump(text, System.err);
|
||||||
|
System.err.println("Expected data");
|
||||||
|
System.err.println("=============");
|
||||||
|
dump(expected, System.err);
|
||||||
|
throw new RuntimeException("Test failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void dump(String s, PrintStream ps) {
|
||||||
|
for(char ch : s.toCharArray()) {
|
||||||
|
if (ch == '\\')
|
||||||
|
ps.print("\\\\");
|
||||||
|
else if (ch >= 0x20 && ch <= 0x7e)
|
||||||
|
ps.print(ch);
|
||||||
|
else if (ch == '\n')
|
||||||
|
ps.println();
|
||||||
|
else
|
||||||
|
ps.printf("\\u%04x", (int)ch);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user