diff --git a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java index 259b63ed8ee..05f45cc6bb0 100644 --- a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java +++ b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -26,6 +26,10 @@ package javax.swing.text.rtf; import java.io.*; import java.lang.*; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CoderResult; /** * RTFParser is a subclass of AbstractFilter which understands basic RTF syntax @@ -69,6 +73,11 @@ abstract class RTFParser extends AbstractFilter private final int S_inblob = 6; // in a \bin blob + // For fcharset control word + protected CharsetDecoder decoder = null; + private byte[] ba = new byte[2]; + protected ByteBuffer decoderBB = ByteBuffer.wrap(ba); + /** Implemented by subclasses to interpret a parameter-less RTF keyword. * The keyword is passed without the leading '/' or any delimiting * whitespace. */ @@ -100,6 +109,9 @@ abstract class RTFParser extends AbstractFilter rtfSpecialsTable['\\'] = true; } + // Defined for replacement character + static final char REPLACEMENT_CHAR = '\uFFFD'; + public RTFParser() { currentCharacters = new StringBuffer(); @@ -109,6 +121,9 @@ abstract class RTFParser extends AbstractFilter //warnings = System.out; specialsTable = rtfSpecialsTable; + // Initialize byte buffer for CharsetDecoder + decoderBB.clear(); + decoderBB.limit(1); } // TODO: Handle wrapup at end of file correctly. @@ -182,6 +197,9 @@ abstract class RTFParser extends AbstractFilter } state = S_backslashed; } else { + // SBCS: ASCII character + // DBCS: Non lead byte + ch = decode(ch); currentCharacters.append(ch); } break; @@ -301,7 +319,9 @@ abstract class RTFParser extends AbstractFilter if (Character.digit(ch, 16) != -1) { pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16); - ch = translationTable[pendingCharacter]; + // Use translationTable if decoder is not defined + ch = decoder == null ? translationTable[pendingCharacter] + : decode((char)pendingCharacter); if (ch != 0) handleText(ch); } @@ -360,4 +380,37 @@ abstract class RTFParser extends AbstractFilter super.close(); } + // For fcharset control word + private char[] ca = new char[1]; + private CharBuffer decoderCB = CharBuffer.wrap(ca); + + private char decode(char ch) { + if (decoder == null) return ch; + decoderBB.put((byte) ch); + decoderBB.rewind(); + decoderCB.clear(); + CoderResult cr = decoder.decode(decoderBB, decoderCB, false); + if (cr.isUnderflow()) { + if (decoderCB.position() == 1) { + // Converted to Unicode (including replacement character) + decoder.reset(); + decoderBB.clear(); + decoderBB.limit(1); + return ca[0]; + } else { + // Detected lead byte + decoder.reset(); + decoderBB.limit(2); + decoderBB.position(1); + return 0; // Skip write operation if return value is 0 + } + } else { + // Fallback, should not be called + decoder.reset(); + decoderBB.clear(); + decoderBB.limit(1); + return REPLACEMENT_CHAR; + } + } + } diff --git a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java index 9128a71ed01..60cfd585c73 100644 --- a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java +++ b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -32,6 +32,11 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStream; import java.io.StreamTokenizer; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.Dictionary; @@ -87,6 +92,10 @@ class RTFReader extends RTFParser /** This Dictionary maps Integer font numbers to String font names. */ Dictionary fontTable; + /** This Dictionary maps Integer font numbers to Charset font charset. */ + Dictionary fcharsetTable; + /** This Dictionary maps String font charset to String code page. */ + static Dictionary fcharsetToCP = null; /** This array maps color indices to Color objects. */ Color[] colorTable; /** This Map maps character style numbers to Style objects. */ @@ -133,6 +142,7 @@ class RTFReader extends RTFParser textKeywords.put("emspace", "\u2003"); textKeywords.put("endash", "\u2013"); textKeywords.put("enspace", "\u2002"); + textKeywords.put("line", "\n"); textKeywords.put("ldblquote", "\u201C"); textKeywords.put("lquote", "\u2018"); textKeywords.put("ltrmark", "\u200E"); @@ -159,7 +169,50 @@ class RTFReader extends RTFParser defineCharacterSet("ansicpg", latin1TranslationTable); } -/* TODO: per-font font encodings ( \fcharset control word ) ? */ + /** + * Windows font charset + */ + private static final int ANSI_CHARSET = 0; + private static final int DEFAULT_CHARSET = 1; + private static final int SYMBOL_CHARSET = 2; + private static final int MAC_CHARSET = 77; + private static final int SHIFTJIS_CHARSET = 128; + private static final int HANGUL_CHARSET = 129; + private static final int JOHAB_CHARSET = 130; + private static final int GB2312_CHARSET = 134; + private static final int CHINESEBIG5_CHARSET = 136; + private static final int GREEK_CHARSET = 161; + private static final int TURKISH_CHARSET = 162; + private static final int VIETNAMESE_CHARSET = 163; + private static final int HEBREW_CHARSET = 177; + private static final int ARABIC_CHARSET = 178; + private static final int BALTIC_CHARSET = 186; + private static final int RUSSIAN_CHARSET = 204; + private static final int THAI_CHARSET = 222; + private static final int EASTEUROPE_CHARSET = 238; + private static final int OEM_CHARSET = 255; + + static { + fcharsetToCP = new Hashtable(); + fcharsetToCP.put("fcharset" + ANSI_CHARSET, "windows-1252"); + fcharsetToCP.put("fcharset" + SHIFTJIS_CHARSET, "ms932"); + fcharsetToCP.put("fcharset" + HANGUL_CHARSET, "ms949"); + fcharsetToCP.put("fcharset" + JOHAB_CHARSET, "ms1361"); + fcharsetToCP.put("fcharset" + GB2312_CHARSET, "ms936"); + fcharsetToCP.put("fcharset" + CHINESEBIG5_CHARSET, "ms950"); + fcharsetToCP.put("fcharset" + GREEK_CHARSET, "windows-1253"); + fcharsetToCP.put("fcharset" + TURKISH_CHARSET, "windows-1254"); + fcharsetToCP.put("fcharset" + VIETNAMESE_CHARSET, "windows-1258"); + fcharsetToCP.put("fcharset" + HEBREW_CHARSET, "windows-1255"); + fcharsetToCP.put("fcharset" + ARABIC_CHARSET, "windows-1256"); + fcharsetToCP.put("fcharset" + BALTIC_CHARSET, "windows-1257"); + fcharsetToCP.put("fcharset" + RUSSIAN_CHARSET, "windows-1251"); + fcharsetToCP.put("fcharset" + THAI_CHARSET, "ms874"); + fcharsetToCP.put("fcharset" + EASTEUROPE_CHARSET, "windows-1250"); + } + + // Defined for replacement character + private static final String REPLACEMENT_CHAR = "\uFFFD"; /** * Creates a new RTFReader instance. Text will be sent to @@ -174,6 +227,7 @@ public RTFReader(StyledDocument destination) target = destination; parserState = new Hashtable(); fontTable = new Hashtable(); + fcharsetTable = new Hashtable(); rtfversion = -1; @@ -762,6 +816,25 @@ class FonttblDestination implements Destination nextFontNumber = parameter; return true; } + // For fcharset control word + if (keyword.equals("fcharset")) { + String fcharset = keyword+parameter; + String csName = fcharsetToCP.get(fcharset); + Charset cs; + if (csName != null) { + try { + cs = Charset.forName(csName); + } catch (IllegalArgumentException iae) { + // Fallback, should not be called + cs = ISO_8859_1; + } + } else { + // Fallback, fcharset control word number is not defined + cs = ISO_8859_1; + } + fcharsetTable.put(nextFontNumber, cs); + return true; + } return false; } @@ -1216,6 +1289,25 @@ abstract class AttributeTrackingDestination implements Destination if (keyword.equals("f")) { parserState.put(keyword, Integer.valueOf(parameter)); + + // Check lead byte is stored or not + if (decoderBB.position() == 1) { + handleText(REPLACEMENT_CHAR); + } + // Reset decoder byte buffer + decoderBB.clear(); + decoderBB.limit(1); + // Check fcharset is used or not + Charset cs = fcharsetTable.get(parameter); + if (cs != null) { + decoder = cs.newDecoder(); + decoder.onMalformedInput(CodingErrorAction.REPLACE) + .onUnmappableCharacter(CodingErrorAction.REPLACE); + } else { + // fcharset is not used, use translationTable + decoder = null; + } + return true; } if (keyword.equals("cf")) { @@ -1610,6 +1702,12 @@ abstract class TextHandlingDestination if (keyword.equals("par")) { // warnings.println("Ending paragraph."); + // Check lead byte is stored or not + if (decoderBB.position() == 1) { + handleText(REPLACEMENT_CHAR); + decoderBB.clear(); + decoderBB.limit(1); + } endParagraph(); return true; } diff --git a/test/jdk/javax/swing/text/rtf/RTFReadFontCharsetTest.java b/test/jdk/javax/swing/text/rtf/RTFReadFontCharsetTest.java new file mode 100644 index 00000000000..629b37285a3 --- /dev/null +++ b/test/jdk/javax/swing/text/rtf/RTFReadFontCharsetTest.java @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/* + * @test + * @bug 6928542 + * @summary Verify RTFEditorKit.read() with fcharset + */ + +import java.io.ByteArrayInputStream; +import java.io.InputStreamReader; +import java.io.PrintStream; +import javax.swing.text.Document; +import javax.swing.text.Element; +import javax.swing.text.rtf.RTFEditorKit; + +import static java.nio.charset.StandardCharsets.ISO_8859_1; + +public class RTFReadFontCharsetTest { + public static void main(String[] args) throws Exception { + String s = + "{\\rtf1\\fbidis\\ansi\\ansicpg932\\deff0\\nouicomp" + + "at\\deflang1033\\deflangfe1041{\\fonttbl{\\f0\\fni" + + "l\\fcharset0 Segoe UI;}{\\f1\\fnil\\fcharset128 Yu" + + " Gothic UI;}{\\f2\\fswiss\\fprq2\\fcharset129 Malg" + + "un Gothic;}{\\f3\\fnil\\fcharset134 Microsoft YaHe" + + "i;}{\\f4\\fnil\\fcharset136 Microsoft JhengHei;}{\\" + + "f5\\fnil\\fcharset161 Segoe UI;}{\\f6\\fnil\\fcha" + + "rset162 Segoe UI;}{\\f7\\fnil\\fcharset163 Segoe U" + + "I;}{\\f8\\fnil\\fcharset177 Segoe UI;}{\\f9\\fnil\\" + + "fcharset178 Segoe UI;}{\\f10\\fnil\\fcharset186 S" + + "egoe UI;}{\\f11\\fnil\\fcharset204 Segoe UI;}{\\f1" + + "2\\fnil\\fcharset222 Leelawadee UI;}{\\f13\\fnil\\" + + "fcharset0 Leelawadee UI;}{\\f14\\fnil\\fcharset238" + + " Segoe UI;}}\r\n{\\*\\generator Riched20 10.0.1904" + + "1}\\viewkind4\\uc1 \r\n\\pard\\ltrpar\\nowidctlpar" + + "\\sa200\\sl276\\slmult1\\f0\\fs22\\lang1041 Gr\\'f" + + "cezi - Switzerland 0\\line\\f1\\'82\\'b1\\'82\\'f" + + "1\\'82\\'c9\\'82\\'bf\\'82\\'cd - Japanese 128\\li" + + "ne\\f2\\lang17\\'be\\'c8\\'b3\\'e7\\'c7\\'cf\\'bc\\" + + "'bc\\'bf\\'e4\\lang1041 - Korean 129\\line\\kern" + + "ing2\\f3\\lang1033\\'c4\\'e3\\'ba\\'c3 - China 134" + + "\\line\\f4\\'bb\\'4f\\'c6\\'57 - Traditional Chine" + + "se - Taiwan 136\\line\\kerning0\\f5\\lang17\\'e3\\" + + "'e5\\'e9\\'e1 \\'f3\\'ef\\'f5 - Greek\\f0\\lang104" + + "1 161\\line\\f6\\lang17 A\\'f0a\\'e7 - \\f0 Turki" + + "sh (Tree) 162\\line\\f7\\'fe\\f0\\lang1041 \\lang" + + "1033 - \\lang17 Vietnam currency\\lang1041 163\\l" + + "ine\\f8\\rtlch\\lang17\\'f9\\'c8\\'d1\\'ec\\'e5\\'" + + "c9\\'ed\\f0\\ltrch - Hebrew 177\\line\\f9\\rtlch\\" + + "lang1025\\'e3\\'d1\\'cd\\'c8\\'c7\\f0\\ltrch\\lan" + + "g17 - Arabic 178\\line\\kerning2\\f10\\lang1033 A" + + "\\'e8i\\'fb - Lithuanian (Thank you) 186\\kerning0" + + "\\f0\\lang1041\\line\\kerning2\\f11\\lang1049\\'c7" + + "\\'e4\\'f0\\'e0\\'e2\\'f1\\'f2\\'e2\\'f3\\'e9\\'f2" + + "\\'e5\\f0\\lang1033 - Russian 204\\line\\kerning0" + + "\\f12\\lang1054\\'ca\\'c7\\'d1\\'ca\\'b4\\'d5 \\f1" + + "3\\lang1033 - Thailand 222\\line\\kerning2\\f14 cz" + + "e\\'9c\\'e6 - Polish 238\\par\r\n}\r\n\u0000"; + String expected = + "Gr\u00fcezi - Switzerland 0\n" + + "\u3053\u3093\u306b\u3061\u306f - Japanese 128\n" + + "\uc548\ub155\ud558\uc138\uc694 - Korean 129\n" + + "\u4f60\u597d - China 134\n" + + "\u81fa\u7063 - Traditional Chinese - Taiwan 136\n" + + "\u03b3\u03b5\u03b9\u03b1 \u03c3\u03bf\u03c5 - Greek 161\n" + + "A\u011fa\u00e7 - Turkish (Tree) 162\n" + + "\u20ab - Vietnam currency 163\n" + + "\u05e9\u05b8\u05c1\u05dc\u05d5\u05b9\u05dd - Hebrew 177\n" + + "\u0645\u0631\u062d\u0628\u0627 - Arabic 178\n" + + "A\u010di\u016b - Lithuanian (Thank you) 186\n" + + "\u0417\u0434\u0440\u0430\u0432\u0441\u0442" + + "\u0432\u0443\u0439\u0442\u0435 - Russian 204\n" + + "\u0e2a\u0e27\u0e31\u0e2a\u0e14\u0e35 - Thailand 222\n" + + "cze\u015b\u0107 - Polish 238\n" + + "\n"; + ByteArrayInputStream bais = new ByteArrayInputStream( + s.getBytes(ISO_8859_1)); + InputStreamReader isr = new InputStreamReader(bais, ISO_8859_1); + RTFEditorKit kit = new RTFEditorKit(); + Document doc = kit.createDefaultDocument(); + kit.read(isr, doc, 0); + Element elem = doc.getDefaultRootElement(); + int elemStart = elem.getStartOffset(); + int elemEnd = elem.getEndOffset(); + String text = doc.getText(elemStart, elemEnd - elemStart); + if (!expected.equals(text)) { + System.err.println("Read data"); + System.err.println("========="); + dump(text, System.err); + System.err.println("Expected data"); + System.err.println("============="); + dump(expected, System.err); + throw new RuntimeException("Test failed"); + } + } + + private static void dump(String s, PrintStream ps) { + for(char ch : s.toCharArray()) { + if (ch == '\\') + ps.print("\\\\"); + else if (ch >= 0x20 && ch <= 0x7e) + ps.print(ch); + else if (ch == '\n') + ps.println(); + else + ps.printf("\\u%04x", (int)ch); + } + } + +}