6860431: Character.isSurrogate(char ch)

Add new method Character.isSurrogate(char ch)

Reviewed-by: sherman, darcy, okutsu
This commit is contained in:
Martin Buchholz 2009-08-31 15:00:04 -07:00
parent 2c7126577e
commit 9ef7eb7884
22 changed files with 178 additions and 138 deletions

View File

@ -162,7 +162,7 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
* *
* @since 1.0.2 * @since 1.0.2
*/ */
public static final char MAX_VALUE = '\uffff'; public static final char MAX_VALUE = '\uFFFF';
/** /**
* The <code>Class</code> instance representing the primitive type * The <code>Class</code> instance representing the primitive type
@ -518,75 +518,91 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
/** /**
* The minimum value of a Unicode high-surrogate code unit in the * The minimum value of a
* UTF-16 encoding. A high-surrogate is also known as a * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
* <i>leading-surrogate</i>. * Unicode high-surrogate code unit</a>
* in the UTF-16 encoding, constant <code>'&#92;uD800'</code>.
* A high-surrogate is also known as a <i>leading-surrogate</i>.
* *
* @since 1.5 * @since 1.5
*/ */
public static final char MIN_HIGH_SURROGATE = '\uD800'; public static final char MIN_HIGH_SURROGATE = '\uD800';
/** /**
* The maximum value of a Unicode high-surrogate code unit in the * The maximum value of a
* UTF-16 encoding. A high-surrogate is also known as a * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
* <i>leading-surrogate</i>. * Unicode high-surrogate code unit</a>
* in the UTF-16 encoding, constant <code>'&#92;uDBFF'</code>.
* A high-surrogate is also known as a <i>leading-surrogate</i>.
* *
* @since 1.5 * @since 1.5
*/ */
public static final char MAX_HIGH_SURROGATE = '\uDBFF'; public static final char MAX_HIGH_SURROGATE = '\uDBFF';
/** /**
* The minimum value of a Unicode low-surrogate code unit in the * The minimum value of a
* UTF-16 encoding. A low-surrogate is also known as a * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
* <i>trailing-surrogate</i>. * Unicode low-surrogate code unit</a>
* in the UTF-16 encoding, constant <code>'&#92;uDC00'</code>.
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
* *
* @since 1.5 * @since 1.5
*/ */
public static final char MIN_LOW_SURROGATE = '\uDC00'; public static final char MIN_LOW_SURROGATE = '\uDC00';
/** /**
* The maximum value of a Unicode low-surrogate code unit in the * The maximum value of a
* UTF-16 encoding. A low-surrogate is also known as a * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
* <i>trailing-surrogate</i>. * Unicode low-surrogate code unit</a>
* in the UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
* *
* @since 1.5 * @since 1.5
*/ */
public static final char MAX_LOW_SURROGATE = '\uDFFF'; public static final char MAX_LOW_SURROGATE = '\uDFFF';
/** /**
* The minimum value of a Unicode surrogate code unit in the UTF-16 encoding. * The minimum value of a Unicode surrogate code unit in the
* UTF-16 encoding, constant <code>'&#92;uD800'</code>.
* *
* @since 1.5 * @since 1.5
*/ */
public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE; public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
/** /**
* The maximum value of a Unicode surrogate code unit in the UTF-16 encoding. * The maximum value of a Unicode surrogate code unit in the
* UTF-16 encoding, constant <code>'&#92;uDFFF'</code>.
* *
* @since 1.5 * @since 1.5
*/ */
public static final char MAX_SURROGATE = MAX_LOW_SURROGATE; public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
/** /**
* The minimum value of a supplementary code point. * The minimum value of a
* <a href="http://www.unicode.org/glossary/#supplementary_code_point">
* Unicode supplementary code point</a>, constant {@code U+10000}.
* *
* @since 1.5 * @since 1.5
*/ */
public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000; public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
/** /**
* The minimum value of a Unicode code point. * The minimum value of a
* <a href="http://www.unicode.org/glossary/#code_point">
* Unicode code point</a>, constant {@code U+0000}.
* *
* @since 1.5 * @since 1.5
*/ */
public static final int MIN_CODE_POINT = 0x000000; public static final int MIN_CODE_POINT = 0x000000;
/** /**
* The maximum value of a Unicode code point. * The maximum value of a
* <a href="http://www.unicode.org/glossary/#code_point">
* Unicode code point</a>, constant {@code U+10FFFF}.
* *
* @since 1.5 * @since 1.5
*/ */
public static final int MAX_CODE_POINT = 0x10ffff; public static final int MAX_CODE_POINT = 0X10FFFF;
/** /**
@ -2648,19 +2664,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
} }
/** /**
* Determines whether the specified code point is a valid Unicode * Determines whether the specified code point is a valid
* code point value in the range of <code>0x0000</code> to * <a href="http://www.unicode.org/glossary/#code_point">
* <code>0x10FFFF</code> inclusive. This method is equivalent to * Unicode code point value</a>.
* the expression:
*
* <blockquote><pre>
* codePoint >= 0x0000 && codePoint <= 0x10FFFF
* </pre></blockquote>
* *
* @param codePoint the Unicode code point to be tested * @param codePoint the Unicode code point to be tested
* @return <code>true</code> if the specified code point value * @return {@code true} if the specified code point value is between
* is a valid code point value; * {@link #MIN_CODE_POINT} and
* <code>false</code> otherwise. * {@link #MAX_CODE_POINT} inclusive;
* {@code false} otherwise.
* @since 1.5 * @since 1.5
*/ */
public static boolean isValidCodePoint(int codePoint) { public static boolean isValidCodePoint(int codePoint) {
@ -2669,15 +2681,13 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
/** /**
* Determines whether the specified character (Unicode code point) * Determines whether the specified character (Unicode code point)
* is in the supplementary character range. The method call is * is in the <a href="#supplementary">supplementary character</a> range.
* equivalent to the expression:
* <blockquote><pre>
* codePoint >= 0x10000 && codePoint <= 0x10FFFF
* </pre></blockquote>
* *
* @param codePoint the character (Unicode code point) to be tested * @param codePoint the character (Unicode code point) to be tested
* @return <code>true</code> if the specified character is in the Unicode * @return {@code true} if the specified code point is between
* supplementary character range; <code>false</code> otherwise. * {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
* {@link #MAX_CODE_POINT} inclusive;
* {@code false} otherwise.
* @since 1.5 * @since 1.5
*/ */
public static boolean isSupplementaryCodePoint(int codePoint) { public static boolean isSupplementaryCodePoint(int codePoint) {
@ -2686,56 +2696,83 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
} }
/** /**
* Determines if the given <code>char</code> value is a * Determines if the given {@code char} value is a
* high-surrogate code unit (also known as <i>leading-surrogate * <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
* code unit</i>). Such values do not represent characters by * Unicode high-surrogate code unit</a>
* themselves, but are used in the representation of <a * (also known as <i>leading-surrogate code unit</i>).
* href="#supplementary">supplementary characters</a> in the
* UTF-16 encoding.
* *
* <p>This method returns <code>true</code> if and only if * <p>Such values do not represent characters by themselves,
* <blockquote><pre>ch >= '&#92;uD800' && ch <= '&#92;uDBFF' * but are used in the representation of
* </pre></blockquote> * <a href="#supplementary">supplementary characters</a>
* is <code>true</code>. * in the UTF-16 encoding.
* *
* @param ch the <code>char</code> value to be tested. * @param ch the {@code char} value to be tested.
* @return <code>true</code> if the <code>char</code> value * @return {@code true} if the {@code char} value is between
* is between '&#92;uD800' and '&#92;uDBFF' inclusive; * {@link #MIN_HIGH_SURROGATE} and
* <code>false</code> otherwise. * {@link #MAX_HIGH_SURROGATE} inclusive;
* @see java.lang.Character#isLowSurrogate(char) * {@code false} otherwise.
* @see Character.UnicodeBlock#of(int) * @see #isLowSurrogate(char)
* @since 1.5 * @see Character.UnicodeBlock#of(int)
* @since 1.5
*/ */
public static boolean isHighSurrogate(char ch) { public static boolean isHighSurrogate(char ch) {
return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE; return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
} }
/** /**
* Determines if the given <code>char</code> value is a * Determines if the given {@code char} value is a
* low-surrogate code unit (also known as <i>trailing-surrogate code * <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
* unit</i>). Such values do not represent characters by themselves, * Unicode low-surrogate code unit</a>
* but are used in the representation of <a * (also known as <i>trailing-surrogate code unit</i>).
* href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
* *
* <p> This method returns <code>true</code> if and only if * <p>Such values do not represent characters by themselves,
* <blockquote><pre>ch >= '&#92;uDC00' && ch <= '&#92;uDFFF' * but are used in the representation of
* </pre></blockquote> is <code>true</code>. * <a href="#supplementary">supplementary characters</a>
* in the UTF-16 encoding.
* *
* @param ch the <code>char</code> value to be tested. * @param ch the {@code char} value to be tested.
* @return <code>true</code> if the <code>char</code> value * @return {@code true} if the {@code char} value is between
* is between '&#92;uDC00' and '&#92;uDFFF' inclusive; * {@link #MIN_LOW_SURROGATE} and
* <code>false</code> otherwise. * {@link #MAX_LOW_SURROGATE} inclusive;
* @see java.lang.Character#isHighSurrogate(char) * {@code false} otherwise.
* @since 1.5 * @see #isHighSurrogate(char)
* @since 1.5
*/ */
public static boolean isLowSurrogate(char ch) { public static boolean isLowSurrogate(char ch) {
return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE; return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
} }
/**
* Determines if the given {@code char} value is a Unicode
* <i>surrogate code unit</i>.
*
* <p>Such values do not represent characters by themselves,
* but are used in the representation of
* <a href="#supplementary">supplementary characters</a>
* in the UTF-16 encoding.
*
* <p>A char value is a surrogate code unit if and only if it is either
* a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
* a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
*
* @param ch the {@code char} value to be tested.
* @return {@code true} if the {@code char} value is between
* {@link #MIN_SURROGATE} and
* {@link #MAX_SURROGATE} inclusive;
* {@code false} otherwise.
* @since 1.7
*/
public static boolean isSurrogate(char ch) {
return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
}
/** /**
* Determines whether the specified pair of <code>char</code> * Determines whether the specified pair of <code>char</code>
* values is a valid surrogate pair. This method is equivalent to * values is a valid
* the expression: * <a href="http://www.unicode.org/glossary/#surrogate_pair">
* Unicode surrogate pair</a>.
* <p>This method is equivalent to the expression:
* <blockquote><pre> * <blockquote><pre>
* isHighSurrogate(high) && isLowSurrogate(low) * isHighSurrogate(high) && isLowSurrogate(low)
* </pre></blockquote> * </pre></blockquote>
@ -4968,7 +5005,7 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
/** /**
* The number of bits used to represent a <tt>char</tt> value in unsigned * The number of bits used to represent a <tt>char</tt> value in unsigned
* binary form. * binary form, constant {@code 16}.
* *
* @since 1.5 * @since 1.5
*/ */

View File

@ -91,7 +91,7 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter
} }
// Is this a high surrogate? // Is this a high surrogate?
if (Surrogate.isHigh(inputChar)) { if (Character.isHighSurrogate(inputChar)) {
// Is this the last character of the input? // Is this the last character of the input?
if (charOff + inputSize >= inEnd) { if (charOff + inputSize >= inEnd) {
highHalfZoneCode = inputChar; highHalfZoneCode = inputChar;
@ -101,7 +101,7 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter
// Is there a low surrogate following? // Is there a low surrogate following?
inputChar = input[charOff + inputSize]; inputChar = input[charOff + inputSize];
if (Surrogate.isLow(inputChar)) { if (Character.isLowSurrogate(inputChar)) {
// We have a valid surrogate pair. Too bad we don't do // We have a valid surrogate pair. Too bad we don't do
// surrogates. Is substitution enabled? // surrogates. Is substitution enabled?
if (subMode) { if (subMode) {
@ -125,7 +125,7 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter
} }
} }
// Is this an unaccompanied low surrogate? // Is this an unaccompanied low surrogate?
else if (Surrogate.isLow(inputChar)) { else if (Character.isLowSurrogate(inputChar)) {
badInputLength = 1; badInputLength = 1;
throw new MalformedInputException(); throw new MalformedInputException();
} else { } else {

View File

@ -108,7 +108,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter
} }
// Is this a high surrogate? // Is this a high surrogate?
if (Surrogate.isHigh(inputChar)) { if (Character.isHighSurrogate(inputChar)) {
// Is this the last character of the input? // Is this the last character of the input?
if (charOff + inputSize >= inEnd) { if (charOff + inputSize >= inEnd) {
highHalfZoneCode = inputChar; highHalfZoneCode = inputChar;
@ -118,7 +118,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter
// Is there a low surrogate following? // Is there a low surrogate following?
inputChar = input[charOff + inputSize]; inputChar = input[charOff + inputSize];
if (Surrogate.isLow(inputChar)) { if (Character.isLowSurrogate(inputChar)) {
// We have a valid surrogate pair. Too bad we don't do // We have a valid surrogate pair. Too bad we don't do
// surrogates. Is substitution enabled? // surrogates. Is substitution enabled?
if (subMode) { if (subMode) {
@ -142,7 +142,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter
} }
} }
// Is this an unaccompanied low surrogate? // Is this an unaccompanied low surrogate?
else if (Surrogate.isLow(inputChar)) { else if (Character.isLowSurrogate(inputChar)) {
badInputLength = 1; badInputLength = 1;
throw new MalformedInputException(); throw new MalformedInputException();
} else { } else {

View File

@ -228,8 +228,8 @@ class ISO_8859_1
dst[dp++] = (byte)c; dst[dp++] = (byte)c;
continue; continue;
} }
if (Surrogate.isHigh(c) && sp < sl && if (Character.isHighSurrogate(c) && sp < sl &&
Surrogate.isLow(src[sp])) { Character.isLowSurrogate(src[sp])) {
if (len > dst.length) { if (len > dst.length) {
sl++; sl++;
len--; len--;

View File

@ -171,7 +171,7 @@ public class SingleByte
char c = sa[sp]; char c = sa[sp];
int b = encode(c); int b = encode(c);
if (b == UNMAPPABLE_ENCODING) { if (b == UNMAPPABLE_ENCODING) {
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp == null) if (sgp == null)
sgp = new Surrogate.Parser(); sgp = new Surrogate.Parser();
if (sgp.parse(c, sa, sp, sl) < 0) if (sgp.parse(c, sa, sp, sl) < 0)
@ -194,7 +194,7 @@ public class SingleByte
char c = src.get(); char c = src.get();
int b = encode(c); int b = encode(c);
if (b == UNMAPPABLE_ENCODING) { if (b == UNMAPPABLE_ENCODING) {
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp == null) if (sgp == null)
sgp = new Surrogate.Parser(); sgp = new Surrogate.Parser();
if (sgp.parse(c, src) < 0) if (sgp.parse(c, src) < 0)
@ -243,8 +243,8 @@ public class SingleByte
dst[dp++] = (byte)b; dst[dp++] = (byte)b;
continue; continue;
} }
if (Surrogate.isHigh(c) && sp < sl && if (Character.isHighSurrogate(c) && sp < sl &&
Surrogate.isLow(src[sp])) { Character.isLowSurrogate(src[sp])) {
if (len > dst.length) { if (len > dst.length) {
sl++; sl++;
len--; len--;

View File

@ -84,7 +84,7 @@ public abstract class SingleByteEncoder
try { try {
while (sp < sl) { while (sp < sl) {
char c = sa[sp]; char c = sa[sp];
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, sa, sp, sl) < 0) if (sgp.parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -117,7 +117,7 @@ public abstract class SingleByteEncoder
try { try {
while (src.hasRemaining()) { while (src.hasRemaining()) {
char c = src.get(); char c = src.get();
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, src) < 0) if (sgp.parse(c, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();

View File

@ -51,21 +51,24 @@ public class Surrogate {
public static final int UCS4_MAX = Character.MAX_CODE_POINT; public static final int UCS4_MAX = Character.MAX_CODE_POINT;
/** /**
* Tells whether or not the given UTF-16 value is a high surrogate. * Tells whether or not the given value is in the high surrogate range.
* Use of {@link Character#isHighSurrogate} is generally preferred.
*/ */
public static boolean isHigh(int c) { public static boolean isHigh(int c) {
return (MIN_HIGH <= c) && (c <= MAX_HIGH); return (MIN_HIGH <= c) && (c <= MAX_HIGH);
} }
/** /**
* Tells whether or not the given UTF-16 value is a low surrogate. * Tells whether or not the given value is in the low surrogate range.
* Use of {@link Character#isLowSurrogate} is generally preferred.
*/ */
public static boolean isLow(int c) { public static boolean isLow(int c) {
return (MIN_LOW <= c) && (c <= MAX_LOW); return (MIN_LOW <= c) && (c <= MAX_LOW);
} }
/** /**
* Tells whether or not the given UTF-16 value is a surrogate character, * Tells whether or not the given value is in the surrogate range.
* Use of {@link Character#isSurrogate} is generally preferred.
*/ */
public static boolean is(int c) { public static boolean is(int c) {
return (MIN <= c) && (c <= MAX); return (MIN <= c) && (c <= MAX);
@ -88,7 +91,7 @@ public class Surrogate {
} }
/** /**
* Returns the high UTF-16 surrogate for the given UCS-4 character. * Returns the high UTF-16 surrogate for the given supplementary UCS-4 character.
*/ */
public static char high(int uc) { public static char high(int uc) {
assert Character.isSupplementaryCodePoint(uc); assert Character.isSupplementaryCodePoint(uc);
@ -98,7 +101,7 @@ public class Surrogate {
} }
/** /**
* Returns the low UTF-16 surrogate for the given UCS-4 character. * Returns the low UTF-16 surrogate for the given supplementary UCS-4 character.
*/ */
public static char low(int uc) { public static char low(int uc) {
assert Character.isSupplementaryCodePoint(uc); assert Character.isSupplementaryCodePoint(uc);

View File

@ -247,8 +247,8 @@ public class US_ASCII
dst[dp++] = (byte)c; dst[dp++] = (byte)c;
continue; continue;
} }
if (Surrogate.isHigh(c) && sp < sl && if (Character.isHighSurrogate(c) && sp < sl &&
Surrogate.isLow(src[sp])) { Character.isLowSurrogate(src[sp])) {
if (len > dst.length) { if (len > dst.length) {
sl++; sl++;
len--; len--;

View File

@ -154,11 +154,11 @@ class UTF_32Coder {
try { try {
while (src.hasRemaining()) { while (src.hasRemaining()) {
char c = src.get(); char c = src.get();
if (Surrogate.isHigh(c)) { if (Character.isHighSurrogate(c)) {
if (!src.hasRemaining()) if (!src.hasRemaining())
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
char low = src.get(); char low = src.get();
if (Surrogate.isLow(low)) { if (Character.isLowSurrogate(low)) {
if (dst.remaining() < 4) if (dst.remaining() < 4)
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;
mark += 2; mark += 2;
@ -166,7 +166,7 @@ class UTF_32Coder {
} else { } else {
return CoderResult.malformedForLength(1); return CoderResult.malformedForLength(1);
} }
} else if (Surrogate.isLow(c)) { } else if (Character.isLowSurrogate(c)) {
return CoderResult.malformedForLength(1); return CoderResult.malformedForLength(1);
} else { } else {
if (dst.remaining() < 4) if (dst.remaining() < 4)

View File

@ -334,7 +334,7 @@ class UTF_8 extends Unicode
} }
public boolean canEncode(char c) { public boolean canEncode(char c) {
return !Surrogate.is(c); return !Character.isSurrogate(c);
} }
public boolean isLegalReplacement(byte[] repl) { public boolean isLegalReplacement(byte[] repl) {
@ -370,7 +370,7 @@ class UTF_8 extends Unicode
while (dp < dlASCII && sa[sp] < '\u0080') while (dp < dlASCII && sa[sp] < '\u0080')
da[dp++] = (byte) sa[sp++]; da[dp++] = (byte) sa[sp++];
while (sp < sl) { while (sp < sl) {
int c = sa[sp]; char c = sa[sp];
if (c < 0x80) { if (c < 0x80) {
// Have at most seven bits // Have at most seven bits
if (dp >= dl) if (dp >= dl)
@ -382,11 +382,11 @@ class UTF_8 extends Unicode
return overflow(src, sp, dst, dp); return overflow(src, sp, dst, dp);
da[dp++] = (byte)(0xc0 | ((c >> 06))); da[dp++] = (byte)(0xc0 | ((c >> 06)));
da[dp++] = (byte)(0x80 | (c & 0x3f)); da[dp++] = (byte)(0x80 | (c & 0x3f));
} else if (Surrogate.is(c)) { } else if (Character.isSurrogate(c)) {
// Have a surrogate pair // Have a surrogate pair
if (sgp == null) if (sgp == null)
sgp = new Surrogate.Parser(); sgp = new Surrogate.Parser();
int uc = sgp.parse((char)c, sa, sp, sl); int uc = sgp.parse(c, sa, sp, sl);
if (uc < 0) { if (uc < 0) {
updatePositions(src, sp, dst, dp); updatePositions(src, sp, dst, dp);
return sgp.error(); return sgp.error();
@ -417,7 +417,7 @@ class UTF_8 extends Unicode
{ {
int mark = src.position(); int mark = src.position();
while (src.hasRemaining()) { while (src.hasRemaining()) {
int c = src.get(); char c = src.get();
if (c < 0x80) { if (c < 0x80) {
// Have at most seven bits // Have at most seven bits
if (!dst.hasRemaining()) if (!dst.hasRemaining())
@ -429,11 +429,11 @@ class UTF_8 extends Unicode
return overflow(src, mark); return overflow(src, mark);
dst.put((byte)(0xc0 | ((c >> 06)))); dst.put((byte)(0xc0 | ((c >> 06))));
dst.put((byte)(0x80 | (c & 0x3f))); dst.put((byte)(0x80 | (c & 0x3f)));
} else if (Surrogate.is(c)) { } else if (Character.isSurrogate(c)) {
// Have a surrogate pair // Have a surrogate pair
if (sgp == null) if (sgp == null)
sgp = new Surrogate.Parser(); sgp = new Surrogate.Parser();
int uc = sgp.parse((char)c, src); int uc = sgp.parse(c, src);
if (uc < 0) { if (uc < 0) {
src.position(mark); src.position(mark);
return sgp.error(); return sgp.error();

View File

@ -97,12 +97,12 @@ abstract class UnicodeDecoder extends CharsetDecoder {
} }
// Surrogates // Surrogates
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (Surrogate.isHigh(c)) { if (Character.isHighSurrogate(c)) {
if (src.remaining() < 2) if (src.remaining() < 2)
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
char c2 = decode(src.get() & 0xff, src.get() & 0xff); char c2 = decode(src.get() & 0xff, src.get() & 0xff);
if (!Surrogate.isLow(c2)) if (!Character.isLowSurrogate(c2))
return CoderResult.malformedForLength(4); return CoderResult.malformedForLength(4);
if (dst.remaining() < 2) if (dst.remaining() < 2)
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;

View File

@ -80,7 +80,7 @@ public abstract class UnicodeEncoder extends CharsetEncoder {
try { try {
while (src.hasRemaining()) { while (src.hasRemaining()) {
char c = src.get(); char c = src.get();
if (!Surrogate.is(c)) { if (!Character.isSurrogate(c)) {
if (dst.remaining() < 2) if (dst.remaining() < 2)
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;
mark++; mark++;
@ -107,6 +107,6 @@ public abstract class UnicodeEncoder extends CharsetEncoder {
} }
public boolean canEncode(char c) { public boolean canEncode(char c) {
return ! Surrogate.is(c); return ! Character.isSurrogate(c);
} }
} }

View File

@ -449,7 +449,7 @@ public class DoubleByte {
char c = sa[sp]; char c = sa[sp];
int bb = encodeChar(c); int bb = encodeChar(c);
if (bb == UNMAPPABLE_ENCODING) { if (bb == UNMAPPABLE_ENCODING) {
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp().parse(c, sa, sp, sl) < 0) if (sgp().parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -484,7 +484,7 @@ public class DoubleByte {
char c = src.get(); char c = src.get();
int bb = encodeChar(c); int bb = encodeChar(c);
if (bb == UNMAPPABLE_ENCODING) { if (bb == UNMAPPABLE_ENCODING) {
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp().parse(c, src) < 0) if (sgp().parse(c, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -659,7 +659,7 @@ public class DoubleByte {
char c = sa[sp]; char c = sa[sp];
int bb = encodeChar(c); int bb = encodeChar(c);
if (bb == UNMAPPABLE_ENCODING) { if (bb == UNMAPPABLE_ENCODING) {
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp().parse(c, sa, sp, sl) < 0) if (sgp().parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -705,7 +705,7 @@ public class DoubleByte {
char c = src.get(); char c = src.get();
int bb = encodeChar(c); int bb = encodeChar(c);
if (bb == UNMAPPABLE_ENCODING) { if (bb == UNMAPPABLE_ENCODING) {
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp().parse(c, src) < 0) if (sgp().parse(c, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();

View File

@ -95,7 +95,7 @@ public abstract class DoubleByteEncoder
try { try {
while (sp < sl) { while (sp < sl) {
char c = sa[sp]; char c = sa[sp];
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, sa, sp, sl) < 0) if (sgp.parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
if (sl - sp < 2) if (sl - sp < 2)
@ -153,7 +153,7 @@ public abstract class DoubleByteEncoder
try { try {
while (src.hasRemaining()) { while (src.hasRemaining()) {
char c = src.get(); char c = src.get();
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
int surr; int surr;
if ((surr = sgp.parse(c, src)) < 0) if ((surr = sgp.parse(c, src)) < 0)
return sgp.error(); return sgp.error();

View File

@ -318,7 +318,7 @@ public class EUC_JP
outputByte = tmpBuf; outputByte = tmpBuf;
char c = sa[sp]; char c = sa[sp];
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, sa, sp, sl) < 0) if (sgp.parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -372,7 +372,7 @@ public class EUC_JP
while (src.hasRemaining()) { while (src.hasRemaining()) {
outputByte = tmpBuf; outputByte = tmpBuf;
char c = src.get(); char c = src.get();
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, src) < 0) if (sgp.parse(c, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();

View File

@ -285,7 +285,7 @@ public class EUC_JP_LINUX
while (sp < sl) { while (sp < sl) {
char c = sa[sp]; char c = sa[sp];
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, sa, sp, sl) < 0) if (sgp.parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -327,7 +327,7 @@ public class EUC_JP_LINUX
try { try {
while (src.hasRemaining()) { while (src.hasRemaining()) {
char c = src.get(); char c = src.get();
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, src) < 0) if (sgp.parse(c, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();

View File

@ -310,11 +310,11 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
int i = 0; int i = 0;
while (i < cs.length()) { while (i < cs.length()) {
char c = cs.charAt(i++); char c = cs.charAt(i++);
if (Surrogate.isHigh(c)) { if (Character.isHighSurrogate(c)) {
if (i == cs.length()) if (i == cs.length())
return false; return false;
char low = cs.charAt(i++); char low = cs.charAt(i++);
if (!Surrogate.isLow(low) || toEUC(c, low, bb) == -1) if (!Character.isLowSurrogate(low) || toEUC(c, low, bb) == -1)
return false; return false;
} else if (!canEncode(c)) { } else if (!canEncode(c)) {
return false; return false;
@ -361,14 +361,14 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
// supplementary character encoding. given the use // supplementary character encoding. given the use
// of supplementary characters is really rare, this // of supplementary characters is really rare, this
// is something worth doing. // is something worth doing.
if (Surrogate.isHigh(c)) { if (Character.isHighSurrogate(c)) {
if ((sp + 1) == sl) if ((sp + 1) == sl)
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
if (!Surrogate.isLow(sa[sp + 1])) if (!Character.isLowSurrogate(sa[sp + 1]))
return CoderResult.malformedForLength(1); return CoderResult.malformedForLength(1);
outSize = toEUC(c, sa[sp+1], bb); outSize = toEUC(c, sa[sp+1], bb);
inSize = 2; inSize = 2;
} else if (Surrogate.isLow(c)) { } else if (Character.isLowSurrogate(c)) {
return CoderResult.malformedForLength(1); return CoderResult.malformedForLength(1);
} }
} }
@ -405,15 +405,15 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
} else { } else {
outSize = toEUC(c, bb); outSize = toEUC(c, bb);
if (outSize == -1) { if (outSize == -1) {
if (Surrogate.isHigh(c)) { if (Character.isHighSurrogate(c)) {
if (!src.hasRemaining()) if (!src.hasRemaining())
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
char c2 = src.get(); char c2 = src.get();
if (!Surrogate.isLow(c2)) if (!Character.isLowSurrogate(c2))
return CoderResult.malformedForLength(1); return CoderResult.malformedForLength(1);
outSize = toEUC(c, c2, bb); outSize = toEUC(c, c2, bb);
inSize = 2; inSize = 2;
} else if (Surrogate.isLow(c)) { } else if (Character.isLowSurrogate(c)) {
return CoderResult.malformedForLength(1); return CoderResult.malformedForLength(1);
} }
} }

View File

@ -12586,7 +12586,7 @@ public class GB18030
} }
public boolean canEncode(char c) { public boolean canEncode(char c) {
return ! Surrogate.is(c); return ! Character.isSurrogate(c);
} }
private final Surrogate.Parser sgp = new Surrogate.Parser(); private final Surrogate.Parser sgp = new Surrogate.Parser();
@ -12625,7 +12625,7 @@ public class GB18030
int inputSize = 1; int inputSize = 1;
char c = sa[sp]; char c = sa[sp];
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0) if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0)
return sgp.error(); return sgp.error();
// Surogate.toUCS4 looks like // Surogate.toUCS4 looks like
@ -12719,7 +12719,7 @@ public class GB18030
while (src.hasRemaining()) { while (src.hasRemaining()) {
char c = src.get(); char c = src.get();
int inputSize = 1; int inputSize = 1;
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if ((condensedKey = sgp.parse(c, src))<0) if ((condensedKey = sgp.parse(c, src))<0)
return sgp.error(); return sgp.error();
condensedKey += 0x1e248; condensedKey += 0x1e248;

View File

@ -885,7 +885,7 @@ public class ISCII91 extends Charset implements HistoricallyNamedCharset
index = ((int)(inputChar) - 0x0900)*2; index = ((int)(inputChar) - 0x0900)*2;
} }
if (Surrogate.is(inputChar)) { if (Character.isSurrogate(inputChar)) {
if (sgp.parse(inputChar, sa, sp, sl) < 0) if (sgp.parse(inputChar, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -948,7 +948,7 @@ public class ISCII91 extends Charset implements HistoricallyNamedCharset
index = ((int)(inputChar) - 0x0900)*2; index = ((int)(inputChar) - 0x0900)*2;
} }
if (Surrogate.is(inputChar)) { if (Character.isSurrogate(inputChar)) {
if (sgp.parse(inputChar, src) < 0) if (sgp.parse(inputChar, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();

View File

@ -515,7 +515,7 @@ abstract class ISO2022
try { try {
while (sp < sl) { while (sp < sl) {
char c = sa[sp]; char c = sa[sp];
if (Surrogate.is(c)) { if (Character.isSurrogate(c)) {
if (sgp.parse(c, sa, sp, sl) < 0) if (sgp.parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -576,7 +576,7 @@ abstract class ISO2022
try { try {
while (src.hasRemaining()) { while (src.hasRemaining()) {
char inputChar = src.get(); char inputChar = src.get();
if (Surrogate.is(inputChar)) { if (Character.isSurrogate(inputChar)) {
if (sgp.parse(inputChar, src) < 0) if (sgp.parse(inputChar, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();

View File

@ -637,7 +637,7 @@ public class ISO2022_JP
da[dp++] = (byte)(index >> 8); da[dp++] = (byte)(index >> 8);
da[dp++] = (byte)(index & 0xff); da[dp++] = (byte)(index & 0xff);
} else { } else {
if (Surrogate.is(c) && sgp.parse(c, sa, sp, sl) < 0) if (Character.isSurrogate(c) && sgp.parse(c, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
if (unmappableCharacterAction() if (unmappableCharacterAction()
== CodingErrorAction.REPLACE == CodingErrorAction.REPLACE
@ -655,7 +655,7 @@ public class ISO2022_JP
} }
currentMode = replaceMode; currentMode = replaceMode;
} }
if (Surrogate.is(c)) if (Character.isSurrogate(c))
return sgp.unmappableResult(); return sgp.unmappableResult();
return CoderResult.unmappableForLength(1); return CoderResult.unmappableForLength(1);
} }
@ -745,7 +745,7 @@ public class ISO2022_JP
dst.put((byte)(index >> 8)); dst.put((byte)(index >> 8));
dst.put((byte)(index & 0xff)); dst.put((byte)(index & 0xff));
} else { } else {
if (Surrogate.is(c) && sgp.parse(c, src) < 0) if (Character.isSurrogate(c) && sgp.parse(c, src) < 0)
return sgp.error(); return sgp.error();
if (unmappableCharacterAction() == CodingErrorAction.REPLACE if (unmappableCharacterAction() == CodingErrorAction.REPLACE
&& currentMode != replaceMode) { && currentMode != replaceMode) {
@ -762,7 +762,7 @@ public class ISO2022_JP
} }
currentMode = replaceMode; currentMode = replaceMode;
} }
if (Surrogate.is(c)) if (Character.isSurrogate(c))
return sgp.unmappableResult(); return sgp.unmappableResult();
return CoderResult.unmappableForLength(1); return CoderResult.unmappableForLength(1);
} }

View File

@ -114,7 +114,7 @@ public abstract class SimpleEUCEncoder
while (sp < sl) { while (sp < sl) {
boolean allZeroes = true; boolean allZeroes = true;
char inputChar = sa[sp]; char inputChar = sa[sp];
if (Surrogate.is(inputChar)) { if (Character.isSurrogate(inputChar)) {
if (sgp.parse(inputChar, sa, sp, sl) < 0) if (sgp.parse(inputChar, sa, sp, sl) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();
@ -194,7 +194,7 @@ public abstract class SimpleEUCEncoder
while (src.hasRemaining()) { while (src.hasRemaining()) {
char inputChar = src.get(); char inputChar = src.get();
boolean allZeroes = true; boolean allZeroes = true;
if (Surrogate.is(inputChar)) { if (Character.isSurrogate(inputChar)) {
if (sgp.parse(inputChar, src) < 0) if (sgp.parse(inputChar, src) < 0)
return sgp.error(); return sgp.error();
return sgp.unmappableResult(); return sgp.unmappableResult();