diff --git a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java
index 259b63ed8ee..05f45cc6bb0 100644
--- a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java
+++ b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFParser.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,10 @@ package javax.swing.text.rtf;
import java.io.*;
import java.lang.*;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
/**
* RTFParser is a subclass of AbstractFilter which understands basic RTF syntax
@@ -69,6 +73,11 @@ abstract class RTFParser extends AbstractFilter
private final int S_inblob = 6; // in a \bin blob
+ // For fcharset control word
+ protected CharsetDecoder decoder = null;
+ private byte[] ba = new byte[2];
+ protected ByteBuffer decoderBB = ByteBuffer.wrap(ba);
+
/** Implemented by subclasses to interpret a parameter-less RTF keyword.
* The keyword is passed without the leading '/' or any delimiting
* whitespace. */
@@ -100,6 +109,9 @@ abstract class RTFParser extends AbstractFilter
rtfSpecialsTable['\\'] = true;
}
+ // Defined for replacement character
+ static final char REPLACEMENT_CHAR = '\uFFFD';
+
public RTFParser()
{
currentCharacters = new StringBuffer();
@@ -109,6 +121,9 @@ abstract class RTFParser extends AbstractFilter
//warnings = System.out;
specialsTable = rtfSpecialsTable;
+ // Initialize byte buffer for CharsetDecoder
+ decoderBB.clear();
+ decoderBB.limit(1);
}
// TODO: Handle wrapup at end of file correctly.
@@ -182,6 +197,9 @@ abstract class RTFParser extends AbstractFilter
}
state = S_backslashed;
} else {
+ // SBCS: ASCII character
+ // DBCS: Non lead byte
+ ch = decode(ch);
currentCharacters.append(ch);
}
break;
@@ -301,7 +319,9 @@ abstract class RTFParser extends AbstractFilter
if (Character.digit(ch, 16) != -1)
{
pendingCharacter = pendingCharacter * 16 + Character.digit(ch, 16);
- ch = translationTable[pendingCharacter];
+ // Use translationTable if decoder is not defined
+ ch = decoder == null ? translationTable[pendingCharacter]
+ : decode((char)pendingCharacter);
if (ch != 0)
handleText(ch);
}
@@ -360,4 +380,37 @@ abstract class RTFParser extends AbstractFilter
super.close();
}
+ // For fcharset control word
+ private char[] ca = new char[1];
+ private CharBuffer decoderCB = CharBuffer.wrap(ca);
+
+ private char decode(char ch) {
+ if (decoder == null) return ch;
+ decoderBB.put((byte) ch);
+ decoderBB.rewind();
+ decoderCB.clear();
+ CoderResult cr = decoder.decode(decoderBB, decoderCB, false);
+ if (cr.isUnderflow()) {
+ if (decoderCB.position() == 1) {
+ // Converted to Unicode (including replacement character)
+ decoder.reset();
+ decoderBB.clear();
+ decoderBB.limit(1);
+ return ca[0];
+ } else {
+ // Detected lead byte
+ decoder.reset();
+ decoderBB.limit(2);
+ decoderBB.position(1);
+ return 0; // Skip write operation if return value is 0
+ }
+ } else {
+ // Fallback, should not be called
+ decoder.reset();
+ decoderBB.clear();
+ decoderBB.limit(1);
+ return REPLACEMENT_CHAR;
+ }
+ }
+
}
diff --git a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java
index 9128a71ed01..60cfd585c73 100644
--- a/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java
+++ b/src/java.desktop/share/classes/javax/swing/text/rtf/RTFReader.java
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -32,6 +32,11 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.StreamTokenizer;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.util.Dictionary;
@@ -87,6 +92,10 @@ class RTFReader extends RTFParser
/** This Dictionary maps Integer font numbers to String font names. */
Dictionary fontTable;
+ /** This Dictionary maps Integer font numbers to Charset font charset. */
+ Dictionary fcharsetTable;
+ /** This Dictionary maps String font charset to String code page. */
+ static Dictionary fcharsetToCP = null;
/** This array maps color indices to Color objects. */
Color[] colorTable;
/** This Map maps character style numbers to Style objects. */
@@ -133,6 +142,7 @@ class RTFReader extends RTFParser
textKeywords.put("emspace", "\u2003");
textKeywords.put("endash", "\u2013");
textKeywords.put("enspace", "\u2002");
+ textKeywords.put("line", "\n");
textKeywords.put("ldblquote", "\u201C");
textKeywords.put("lquote", "\u2018");
textKeywords.put("ltrmark", "\u200E");
@@ -159,7 +169,50 @@ class RTFReader extends RTFParser
defineCharacterSet("ansicpg", latin1TranslationTable);
}
-/* TODO: per-font font encodings ( \fcharset control word ) ? */
+ /**
+ * Windows font charset
+ */
+ private static final int ANSI_CHARSET = 0;
+ private static final int DEFAULT_CHARSET = 1;
+ private static final int SYMBOL_CHARSET = 2;
+ private static final int MAC_CHARSET = 77;
+ private static final int SHIFTJIS_CHARSET = 128;
+ private static final int HANGUL_CHARSET = 129;
+ private static final int JOHAB_CHARSET = 130;
+ private static final int GB2312_CHARSET = 134;
+ private static final int CHINESEBIG5_CHARSET = 136;
+ private static final int GREEK_CHARSET = 161;
+ private static final int TURKISH_CHARSET = 162;
+ private static final int VIETNAMESE_CHARSET = 163;
+ private static final int HEBREW_CHARSET = 177;
+ private static final int ARABIC_CHARSET = 178;
+ private static final int BALTIC_CHARSET = 186;
+ private static final int RUSSIAN_CHARSET = 204;
+ private static final int THAI_CHARSET = 222;
+ private static final int EASTEUROPE_CHARSET = 238;
+ private static final int OEM_CHARSET = 255;
+
+ static {
+ fcharsetToCP = new Hashtable();
+ fcharsetToCP.put("fcharset" + ANSI_CHARSET, "windows-1252");
+ fcharsetToCP.put("fcharset" + SHIFTJIS_CHARSET, "ms932");
+ fcharsetToCP.put("fcharset" + HANGUL_CHARSET, "ms949");
+ fcharsetToCP.put("fcharset" + JOHAB_CHARSET, "ms1361");
+ fcharsetToCP.put("fcharset" + GB2312_CHARSET, "ms936");
+ fcharsetToCP.put("fcharset" + CHINESEBIG5_CHARSET, "ms950");
+ fcharsetToCP.put("fcharset" + GREEK_CHARSET, "windows-1253");
+ fcharsetToCP.put("fcharset" + TURKISH_CHARSET, "windows-1254");
+ fcharsetToCP.put("fcharset" + VIETNAMESE_CHARSET, "windows-1258");
+ fcharsetToCP.put("fcharset" + HEBREW_CHARSET, "windows-1255");
+ fcharsetToCP.put("fcharset" + ARABIC_CHARSET, "windows-1256");
+ fcharsetToCP.put("fcharset" + BALTIC_CHARSET, "windows-1257");
+ fcharsetToCP.put("fcharset" + RUSSIAN_CHARSET, "windows-1251");
+ fcharsetToCP.put("fcharset" + THAI_CHARSET, "ms874");
+ fcharsetToCP.put("fcharset" + EASTEUROPE_CHARSET, "windows-1250");
+ }
+
+ // Defined for replacement character
+ private static final String REPLACEMENT_CHAR = "\uFFFD";
/**
* Creates a new RTFReader instance. Text will be sent to
@@ -174,6 +227,7 @@ public RTFReader(StyledDocument destination)
target = destination;
parserState = new Hashtable