6860431: Character.isSurrogate(char ch)
Add new method Character.isSurrogate(char ch) Reviewed-by: sherman, darcy, okutsu
This commit is contained in:
parent
2c7126577e
commit
9ef7eb7884
@ -162,7 +162,7 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
*
|
||||
* @since 1.0.2
|
||||
*/
|
||||
public static final char MAX_VALUE = '\uffff';
|
||||
public static final char MAX_VALUE = '\uFFFF';
|
||||
|
||||
/**
|
||||
* The <code>Class</code> instance representing the primitive type
|
||||
@ -518,75 +518,91 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18;
|
||||
|
||||
/**
|
||||
* The minimum value of a Unicode high-surrogate code unit in the
|
||||
* UTF-16 encoding. A high-surrogate is also known as a
|
||||
* <i>leading-surrogate</i>.
|
||||
* The minimum value of a
|
||||
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
|
||||
* Unicode high-surrogate code unit</a>
|
||||
* in the UTF-16 encoding, constant <code>'\uD800'</code>.
|
||||
* A high-surrogate is also known as a <i>leading-surrogate</i>.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final char MIN_HIGH_SURROGATE = '\uD800';
|
||||
|
||||
/**
|
||||
* The maximum value of a Unicode high-surrogate code unit in the
|
||||
* UTF-16 encoding. A high-surrogate is also known as a
|
||||
* <i>leading-surrogate</i>.
|
||||
* The maximum value of a
|
||||
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
|
||||
* Unicode high-surrogate code unit</a>
|
||||
* in the UTF-16 encoding, constant <code>'\uDBFF'</code>.
|
||||
* A high-surrogate is also known as a <i>leading-surrogate</i>.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final char MAX_HIGH_SURROGATE = '\uDBFF';
|
||||
|
||||
/**
|
||||
* The minimum value of a Unicode low-surrogate code unit in the
|
||||
* UTF-16 encoding. A low-surrogate is also known as a
|
||||
* <i>trailing-surrogate</i>.
|
||||
* The minimum value of a
|
||||
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
|
||||
* Unicode low-surrogate code unit</a>
|
||||
* in the UTF-16 encoding, constant <code>'\uDC00'</code>.
|
||||
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final char MIN_LOW_SURROGATE = '\uDC00';
|
||||
|
||||
/**
|
||||
* The maximum value of a Unicode low-surrogate code unit in the
|
||||
* UTF-16 encoding. A low-surrogate is also known as a
|
||||
* <i>trailing-surrogate</i>.
|
||||
* The maximum value of a
|
||||
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
|
||||
* Unicode low-surrogate code unit</a>
|
||||
* in the UTF-16 encoding, constant <code>'\uDFFF'</code>.
|
||||
* A low-surrogate is also known as a <i>trailing-surrogate</i>.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final char MAX_LOW_SURROGATE = '\uDFFF';
|
||||
|
||||
/**
|
||||
* The minimum value of a Unicode surrogate code unit in the UTF-16 encoding.
|
||||
* The minimum value of a Unicode surrogate code unit in the
|
||||
* UTF-16 encoding, constant <code>'\uD800'</code>.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final char MIN_SURROGATE = MIN_HIGH_SURROGATE;
|
||||
|
||||
/**
|
||||
* The maximum value of a Unicode surrogate code unit in the UTF-16 encoding.
|
||||
* The maximum value of a Unicode surrogate code unit in the
|
||||
* UTF-16 encoding, constant <code>'\uDFFF'</code>.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final char MAX_SURROGATE = MAX_LOW_SURROGATE;
|
||||
|
||||
/**
|
||||
* The minimum value of a supplementary code point.
|
||||
* The minimum value of a
|
||||
* <a href="http://www.unicode.org/glossary/#supplementary_code_point">
|
||||
* Unicode supplementary code point</a>, constant {@code U+10000}.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
|
||||
|
||||
/**
|
||||
* The minimum value of a Unicode code point.
|
||||
* The minimum value of a
|
||||
* <a href="http://www.unicode.org/glossary/#code_point">
|
||||
* Unicode code point</a>, constant {@code U+0000}.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final int MIN_CODE_POINT = 0x000000;
|
||||
|
||||
/**
|
||||
* The maximum value of a Unicode code point.
|
||||
* The maximum value of a
|
||||
* <a href="http://www.unicode.org/glossary/#code_point">
|
||||
* Unicode code point</a>, constant {@code U+10FFFF}.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
public static final int MAX_CODE_POINT = 0x10ffff;
|
||||
public static final int MAX_CODE_POINT = 0X10FFFF;
|
||||
|
||||
|
||||
/**
|
||||
@ -2648,19 +2664,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the specified code point is a valid Unicode
|
||||
* code point value in the range of <code>0x0000</code> to
|
||||
* <code>0x10FFFF</code> inclusive. This method is equivalent to
|
||||
* the expression:
|
||||
*
|
||||
* <blockquote><pre>
|
||||
* codePoint >= 0x0000 && codePoint <= 0x10FFFF
|
||||
* </pre></blockquote>
|
||||
* Determines whether the specified code point is a valid
|
||||
* <a href="http://www.unicode.org/glossary/#code_point">
|
||||
* Unicode code point value</a>.
|
||||
*
|
||||
* @param codePoint the Unicode code point to be tested
|
||||
* @return <code>true</code> if the specified code point value
|
||||
* is a valid code point value;
|
||||
* <code>false</code> otherwise.
|
||||
* @return {@code true} if the specified code point value is between
|
||||
* {@link #MIN_CODE_POINT} and
|
||||
* {@link #MAX_CODE_POINT} inclusive;
|
||||
* {@code false} otherwise.
|
||||
* @since 1.5
|
||||
*/
|
||||
public static boolean isValidCodePoint(int codePoint) {
|
||||
@ -2669,15 +2681,13 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
|
||||
/**
|
||||
* Determines whether the specified character (Unicode code point)
|
||||
* is in the supplementary character range. The method call is
|
||||
* equivalent to the expression:
|
||||
* <blockquote><pre>
|
||||
* codePoint >= 0x10000 && codePoint <= 0x10FFFF
|
||||
* </pre></blockquote>
|
||||
* is in the <a href="#supplementary">supplementary character</a> range.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested
|
||||
* @return <code>true</code> if the specified character is in the Unicode
|
||||
* supplementary character range; <code>false</code> otherwise.
|
||||
* @return {@code true} if the specified code point is between
|
||||
* {@link #MIN_SUPPLEMENTARY_CODE_POINT} and
|
||||
* {@link #MAX_CODE_POINT} inclusive;
|
||||
* {@code false} otherwise.
|
||||
* @since 1.5
|
||||
*/
|
||||
public static boolean isSupplementaryCodePoint(int codePoint) {
|
||||
@ -2686,56 +2696,83 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the given <code>char</code> value is a
|
||||
* high-surrogate code unit (also known as <i>leading-surrogate
|
||||
* code unit</i>). Such values do not represent characters by
|
||||
* themselves, but are used in the representation of <a
|
||||
* href="#supplementary">supplementary characters</a> in the
|
||||
* UTF-16 encoding.
|
||||
* Determines if the given {@code char} value is a
|
||||
* <a href="http://www.unicode.org/glossary/#high_surrogate_code_unit">
|
||||
* Unicode high-surrogate code unit</a>
|
||||
* (also known as <i>leading-surrogate code unit</i>).
|
||||
*
|
||||
* <p>This method returns <code>true</code> if and only if
|
||||
* <blockquote><pre>ch >= '\uD800' && ch <= '\uDBFF'
|
||||
* </pre></blockquote>
|
||||
* is <code>true</code>.
|
||||
* <p>Such values do not represent characters by themselves,
|
||||
* but are used in the representation of
|
||||
* <a href="#supplementary">supplementary characters</a>
|
||||
* in the UTF-16 encoding.
|
||||
*
|
||||
* @param ch the <code>char</code> value to be tested.
|
||||
* @return <code>true</code> if the <code>char</code> value
|
||||
* is between '\uD800' and '\uDBFF' inclusive;
|
||||
* <code>false</code> otherwise.
|
||||
* @see java.lang.Character#isLowSurrogate(char)
|
||||
* @see Character.UnicodeBlock#of(int)
|
||||
* @since 1.5
|
||||
* @param ch the {@code char} value to be tested.
|
||||
* @return {@code true} if the {@code char} value is between
|
||||
* {@link #MIN_HIGH_SURROGATE} and
|
||||
* {@link #MAX_HIGH_SURROGATE} inclusive;
|
||||
* {@code false} otherwise.
|
||||
* @see #isLowSurrogate(char)
|
||||
* @see Character.UnicodeBlock#of(int)
|
||||
* @since 1.5
|
||||
*/
|
||||
public static boolean isHighSurrogate(char ch) {
|
||||
return ch >= MIN_HIGH_SURROGATE && ch <= MAX_HIGH_SURROGATE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the given <code>char</code> value is a
|
||||
* low-surrogate code unit (also known as <i>trailing-surrogate code
|
||||
* unit</i>). Such values do not represent characters by themselves,
|
||||
* but are used in the representation of <a
|
||||
* href="#supplementary">supplementary characters</a> in the UTF-16 encoding.
|
||||
* Determines if the given {@code char} value is a
|
||||
* <a href="http://www.unicode.org/glossary/#low_surrogate_code_unit">
|
||||
* Unicode low-surrogate code unit</a>
|
||||
* (also known as <i>trailing-surrogate code unit</i>).
|
||||
*
|
||||
* <p> This method returns <code>true</code> if and only if
|
||||
* <blockquote><pre>ch >= '\uDC00' && ch <= '\uDFFF'
|
||||
* </pre></blockquote> is <code>true</code>.
|
||||
* <p>Such values do not represent characters by themselves,
|
||||
* but are used in the representation of
|
||||
* <a href="#supplementary">supplementary characters</a>
|
||||
* in the UTF-16 encoding.
|
||||
*
|
||||
* @param ch the <code>char</code> value to be tested.
|
||||
* @return <code>true</code> if the <code>char</code> value
|
||||
* is between '\uDC00' and '\uDFFF' inclusive;
|
||||
* <code>false</code> otherwise.
|
||||
* @see java.lang.Character#isHighSurrogate(char)
|
||||
* @since 1.5
|
||||
* @param ch the {@code char} value to be tested.
|
||||
* @return {@code true} if the {@code char} value is between
|
||||
* {@link #MIN_LOW_SURROGATE} and
|
||||
* {@link #MAX_LOW_SURROGATE} inclusive;
|
||||
* {@code false} otherwise.
|
||||
* @see #isHighSurrogate(char)
|
||||
* @since 1.5
|
||||
*/
|
||||
public static boolean isLowSurrogate(char ch) {
|
||||
return ch >= MIN_LOW_SURROGATE && ch <= MAX_LOW_SURROGATE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines if the given {@code char} value is a Unicode
|
||||
* <i>surrogate code unit</i>.
|
||||
*
|
||||
* <p>Such values do not represent characters by themselves,
|
||||
* but are used in the representation of
|
||||
* <a href="#supplementary">supplementary characters</a>
|
||||
* in the UTF-16 encoding.
|
||||
*
|
||||
* <p>A char value is a surrogate code unit if and only if it is either
|
||||
* a {@linkplain #isLowSurrogate(char) low-surrogate code unit} or
|
||||
* a {@linkplain #isHighSurrogate(char) high-surrogate code unit}.
|
||||
*
|
||||
* @param ch the {@code char} value to be tested.
|
||||
* @return {@code true} if the {@code char} value is between
|
||||
* {@link #MIN_SURROGATE} and
|
||||
* {@link #MAX_SURROGATE} inclusive;
|
||||
* {@code false} otherwise.
|
||||
* @since 1.7
|
||||
*/
|
||||
public static boolean isSurrogate(char ch) {
|
||||
return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the specified pair of <code>char</code>
|
||||
* values is a valid surrogate pair. This method is equivalent to
|
||||
* the expression:
|
||||
* values is a valid
|
||||
* <a href="http://www.unicode.org/glossary/#surrogate_pair">
|
||||
* Unicode surrogate pair</a>.
|
||||
|
||||
* <p>This method is equivalent to the expression:
|
||||
* <blockquote><pre>
|
||||
* isHighSurrogate(high) && isLowSurrogate(low)
|
||||
* </pre></blockquote>
|
||||
@ -4968,7 +5005,7 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
|
||||
/**
|
||||
* The number of bits used to represent a <tt>char</tt> value in unsigned
|
||||
* binary form.
|
||||
* binary form, constant {@code 16}.
|
||||
*
|
||||
* @since 1.5
|
||||
*/
|
||||
|
@ -91,7 +91,7 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter
|
||||
}
|
||||
|
||||
// Is this a high surrogate?
|
||||
if (Surrogate.isHigh(inputChar)) {
|
||||
if (Character.isHighSurrogate(inputChar)) {
|
||||
// Is this the last character of the input?
|
||||
if (charOff + inputSize >= inEnd) {
|
||||
highHalfZoneCode = inputChar;
|
||||
@ -101,7 +101,7 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter
|
||||
|
||||
// Is there a low surrogate following?
|
||||
inputChar = input[charOff + inputSize];
|
||||
if (Surrogate.isLow(inputChar)) {
|
||||
if (Character.isLowSurrogate(inputChar)) {
|
||||
// We have a valid surrogate pair. Too bad we don't do
|
||||
// surrogates. Is substitution enabled?
|
||||
if (subMode) {
|
||||
@ -125,7 +125,7 @@ public abstract class CharToByteDBCS_ASCII extends CharToByteConverter
|
||||
}
|
||||
}
|
||||
// Is this an unaccompanied low surrogate?
|
||||
else if (Surrogate.isLow(inputChar)) {
|
||||
else if (Character.isLowSurrogate(inputChar)) {
|
||||
badInputLength = 1;
|
||||
throw new MalformedInputException();
|
||||
} else {
|
||||
|
@ -108,7 +108,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter
|
||||
}
|
||||
|
||||
// Is this a high surrogate?
|
||||
if (Surrogate.isHigh(inputChar)) {
|
||||
if (Character.isHighSurrogate(inputChar)) {
|
||||
// Is this the last character of the input?
|
||||
if (charOff + inputSize >= inEnd) {
|
||||
highHalfZoneCode = inputChar;
|
||||
@ -118,7 +118,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter
|
||||
|
||||
// Is there a low surrogate following?
|
||||
inputChar = input[charOff + inputSize];
|
||||
if (Surrogate.isLow(inputChar)) {
|
||||
if (Character.isLowSurrogate(inputChar)) {
|
||||
// We have a valid surrogate pair. Too bad we don't do
|
||||
// surrogates. Is substitution enabled?
|
||||
if (subMode) {
|
||||
@ -142,7 +142,7 @@ public abstract class CharToByteDBCS_EBCDIC extends CharToByteConverter
|
||||
}
|
||||
}
|
||||
// Is this an unaccompanied low surrogate?
|
||||
else if (Surrogate.isLow(inputChar)) {
|
||||
else if (Character.isLowSurrogate(inputChar)) {
|
||||
badInputLength = 1;
|
||||
throw new MalformedInputException();
|
||||
} else {
|
||||
|
@ -228,8 +228,8 @@ class ISO_8859_1
|
||||
dst[dp++] = (byte)c;
|
||||
continue;
|
||||
}
|
||||
if (Surrogate.isHigh(c) && sp < sl &&
|
||||
Surrogate.isLow(src[sp])) {
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(src[sp])) {
|
||||
if (len > dst.length) {
|
||||
sl++;
|
||||
len--;
|
||||
|
@ -171,7 +171,7 @@ public class SingleByte
|
||||
char c = sa[sp];
|
||||
int b = encode(c);
|
||||
if (b == UNMAPPABLE_ENCODING) {
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp == null)
|
||||
sgp = new Surrogate.Parser();
|
||||
if (sgp.parse(c, sa, sp, sl) < 0)
|
||||
@ -194,7 +194,7 @@ public class SingleByte
|
||||
char c = src.get();
|
||||
int b = encode(c);
|
||||
if (b == UNMAPPABLE_ENCODING) {
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp == null)
|
||||
sgp = new Surrogate.Parser();
|
||||
if (sgp.parse(c, src) < 0)
|
||||
@ -243,8 +243,8 @@ public class SingleByte
|
||||
dst[dp++] = (byte)b;
|
||||
continue;
|
||||
}
|
||||
if (Surrogate.isHigh(c) && sp < sl &&
|
||||
Surrogate.isLow(src[sp])) {
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(src[sp])) {
|
||||
if (len > dst.length) {
|
||||
sl++;
|
||||
len--;
|
||||
|
@ -84,7 +84,7 @@ public abstract class SingleByteEncoder
|
||||
try {
|
||||
while (sp < sl) {
|
||||
char c = sa[sp];
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -117,7 +117,7 @@ public abstract class SingleByteEncoder
|
||||
try {
|
||||
while (src.hasRemaining()) {
|
||||
char c = src.get();
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
|
@ -51,21 +51,24 @@ public class Surrogate {
|
||||
public static final int UCS4_MAX = Character.MAX_CODE_POINT;
|
||||
|
||||
/**
|
||||
* Tells whether or not the given UTF-16 value is a high surrogate.
|
||||
* Tells whether or not the given value is in the high surrogate range.
|
||||
* Use of {@link Character#isHighSurrogate} is generally preferred.
|
||||
*/
|
||||
public static boolean isHigh(int c) {
|
||||
return (MIN_HIGH <= c) && (c <= MAX_HIGH);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether or not the given UTF-16 value is a low surrogate.
|
||||
* Tells whether or not the given value is in the low surrogate range.
|
||||
* Use of {@link Character#isLowSurrogate} is generally preferred.
|
||||
*/
|
||||
public static boolean isLow(int c) {
|
||||
return (MIN_LOW <= c) && (c <= MAX_LOW);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether or not the given UTF-16 value is a surrogate character,
|
||||
* Tells whether or not the given value is in the surrogate range.
|
||||
* Use of {@link Character#isSurrogate} is generally preferred.
|
||||
*/
|
||||
public static boolean is(int c) {
|
||||
return (MIN <= c) && (c <= MAX);
|
||||
@ -88,7 +91,7 @@ public class Surrogate {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the high UTF-16 surrogate for the given UCS-4 character.
|
||||
* Returns the high UTF-16 surrogate for the given supplementary UCS-4 character.
|
||||
*/
|
||||
public static char high(int uc) {
|
||||
assert Character.isSupplementaryCodePoint(uc);
|
||||
@ -98,7 +101,7 @@ public class Surrogate {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the low UTF-16 surrogate for the given UCS-4 character.
|
||||
* Returns the low UTF-16 surrogate for the given supplementary UCS-4 character.
|
||||
*/
|
||||
public static char low(int uc) {
|
||||
assert Character.isSupplementaryCodePoint(uc);
|
||||
|
@ -247,8 +247,8 @@ public class US_ASCII
|
||||
dst[dp++] = (byte)c;
|
||||
continue;
|
||||
}
|
||||
if (Surrogate.isHigh(c) && sp < sl &&
|
||||
Surrogate.isLow(src[sp])) {
|
||||
if (Character.isHighSurrogate(c) && sp < sl &&
|
||||
Character.isLowSurrogate(src[sp])) {
|
||||
if (len > dst.length) {
|
||||
sl++;
|
||||
len--;
|
||||
|
@ -154,11 +154,11 @@ class UTF_32Coder {
|
||||
try {
|
||||
while (src.hasRemaining()) {
|
||||
char c = src.get();
|
||||
if (Surrogate.isHigh(c)) {
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (!src.hasRemaining())
|
||||
return CoderResult.UNDERFLOW;
|
||||
char low = src.get();
|
||||
if (Surrogate.isLow(low)) {
|
||||
if (Character.isLowSurrogate(low)) {
|
||||
if (dst.remaining() < 4)
|
||||
return CoderResult.OVERFLOW;
|
||||
mark += 2;
|
||||
@ -166,7 +166,7 @@ class UTF_32Coder {
|
||||
} else {
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
} else if (Surrogate.isLow(c)) {
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
return CoderResult.malformedForLength(1);
|
||||
} else {
|
||||
if (dst.remaining() < 4)
|
||||
|
@ -334,7 +334,7 @@ class UTF_8 extends Unicode
|
||||
}
|
||||
|
||||
public boolean canEncode(char c) {
|
||||
return !Surrogate.is(c);
|
||||
return !Character.isSurrogate(c);
|
||||
}
|
||||
|
||||
public boolean isLegalReplacement(byte[] repl) {
|
||||
@ -370,7 +370,7 @@ class UTF_8 extends Unicode
|
||||
while (dp < dlASCII && sa[sp] < '\u0080')
|
||||
da[dp++] = (byte) sa[sp++];
|
||||
while (sp < sl) {
|
||||
int c = sa[sp];
|
||||
char c = sa[sp];
|
||||
if (c < 0x80) {
|
||||
// Have at most seven bits
|
||||
if (dp >= dl)
|
||||
@ -382,11 +382,11 @@ class UTF_8 extends Unicode
|
||||
return overflow(src, sp, dst, dp);
|
||||
da[dp++] = (byte)(0xc0 | ((c >> 06)));
|
||||
da[dp++] = (byte)(0x80 | (c & 0x3f));
|
||||
} else if (Surrogate.is(c)) {
|
||||
} else if (Character.isSurrogate(c)) {
|
||||
// Have a surrogate pair
|
||||
if (sgp == null)
|
||||
sgp = new Surrogate.Parser();
|
||||
int uc = sgp.parse((char)c, sa, sp, sl);
|
||||
int uc = sgp.parse(c, sa, sp, sl);
|
||||
if (uc < 0) {
|
||||
updatePositions(src, sp, dst, dp);
|
||||
return sgp.error();
|
||||
@ -417,7 +417,7 @@ class UTF_8 extends Unicode
|
||||
{
|
||||
int mark = src.position();
|
||||
while (src.hasRemaining()) {
|
||||
int c = src.get();
|
||||
char c = src.get();
|
||||
if (c < 0x80) {
|
||||
// Have at most seven bits
|
||||
if (!dst.hasRemaining())
|
||||
@ -429,11 +429,11 @@ class UTF_8 extends Unicode
|
||||
return overflow(src, mark);
|
||||
dst.put((byte)(0xc0 | ((c >> 06))));
|
||||
dst.put((byte)(0x80 | (c & 0x3f)));
|
||||
} else if (Surrogate.is(c)) {
|
||||
} else if (Character.isSurrogate(c)) {
|
||||
// Have a surrogate pair
|
||||
if (sgp == null)
|
||||
sgp = new Surrogate.Parser();
|
||||
int uc = sgp.parse((char)c, src);
|
||||
int uc = sgp.parse(c, src);
|
||||
if (uc < 0) {
|
||||
src.position(mark);
|
||||
return sgp.error();
|
||||
|
@ -97,12 +97,12 @@ abstract class UnicodeDecoder extends CharsetDecoder {
|
||||
}
|
||||
|
||||
// Surrogates
|
||||
if (Surrogate.is(c)) {
|
||||
if (Surrogate.isHigh(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (src.remaining() < 2)
|
||||
return CoderResult.UNDERFLOW;
|
||||
char c2 = decode(src.get() & 0xff, src.get() & 0xff);
|
||||
if (!Surrogate.isLow(c2))
|
||||
if (!Character.isLowSurrogate(c2))
|
||||
return CoderResult.malformedForLength(4);
|
||||
if (dst.remaining() < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
|
@ -80,7 +80,7 @@ public abstract class UnicodeEncoder extends CharsetEncoder {
|
||||
try {
|
||||
while (src.hasRemaining()) {
|
||||
char c = src.get();
|
||||
if (!Surrogate.is(c)) {
|
||||
if (!Character.isSurrogate(c)) {
|
||||
if (dst.remaining() < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
mark++;
|
||||
@ -107,6 +107,6 @@ public abstract class UnicodeEncoder extends CharsetEncoder {
|
||||
}
|
||||
|
||||
public boolean canEncode(char c) {
|
||||
return ! Surrogate.is(c);
|
||||
return ! Character.isSurrogate(c);
|
||||
}
|
||||
}
|
||||
|
@ -449,7 +449,7 @@ public class DoubleByte {
|
||||
char c = sa[sp];
|
||||
int bb = encodeChar(c);
|
||||
if (bb == UNMAPPABLE_ENCODING) {
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp().parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -484,7 +484,7 @@ public class DoubleByte {
|
||||
char c = src.get();
|
||||
int bb = encodeChar(c);
|
||||
if (bb == UNMAPPABLE_ENCODING) {
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp().parse(c, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -659,7 +659,7 @@ public class DoubleByte {
|
||||
char c = sa[sp];
|
||||
int bb = encodeChar(c);
|
||||
if (bb == UNMAPPABLE_ENCODING) {
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp().parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -705,7 +705,7 @@ public class DoubleByte {
|
||||
char c = src.get();
|
||||
int bb = encodeChar(c);
|
||||
if (bb == UNMAPPABLE_ENCODING) {
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp().parse(c, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
|
@ -95,7 +95,7 @@ public abstract class DoubleByteEncoder
|
||||
try {
|
||||
while (sp < sl) {
|
||||
char c = sa[sp];
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
if (sl - sp < 2)
|
||||
@ -153,7 +153,7 @@ public abstract class DoubleByteEncoder
|
||||
try {
|
||||
while (src.hasRemaining()) {
|
||||
char c = src.get();
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
int surr;
|
||||
if ((surr = sgp.parse(c, src)) < 0)
|
||||
return sgp.error();
|
||||
|
@ -318,7 +318,7 @@ public class EUC_JP
|
||||
outputByte = tmpBuf;
|
||||
char c = sa[sp];
|
||||
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -372,7 +372,7 @@ public class EUC_JP
|
||||
while (src.hasRemaining()) {
|
||||
outputByte = tmpBuf;
|
||||
char c = src.get();
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
|
@ -285,7 +285,7 @@ public class EUC_JP_LINUX
|
||||
while (sp < sl) {
|
||||
char c = sa[sp];
|
||||
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -327,7 +327,7 @@ public class EUC_JP_LINUX
|
||||
try {
|
||||
while (src.hasRemaining()) {
|
||||
char c = src.get();
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
|
@ -310,11 +310,11 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
|
||||
int i = 0;
|
||||
while (i < cs.length()) {
|
||||
char c = cs.charAt(i++);
|
||||
if (Surrogate.isHigh(c)) {
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (i == cs.length())
|
||||
return false;
|
||||
char low = cs.charAt(i++);
|
||||
if (!Surrogate.isLow(low) || toEUC(c, low, bb) == -1)
|
||||
if (!Character.isLowSurrogate(low) || toEUC(c, low, bb) == -1)
|
||||
return false;
|
||||
} else if (!canEncode(c)) {
|
||||
return false;
|
||||
@ -361,14 +361,14 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
|
||||
// supplementary character encoding. given the use
|
||||
// of supplementary characters is really rare, this
|
||||
// is something worth doing.
|
||||
if (Surrogate.isHigh(c)) {
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if ((sp + 1) == sl)
|
||||
return CoderResult.UNDERFLOW;
|
||||
if (!Surrogate.isLow(sa[sp + 1]))
|
||||
if (!Character.isLowSurrogate(sa[sp + 1]))
|
||||
return CoderResult.malformedForLength(1);
|
||||
outSize = toEUC(c, sa[sp+1], bb);
|
||||
inSize = 2;
|
||||
} else if (Surrogate.isLow(c)) {
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
}
|
||||
@ -405,15 +405,15 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
|
||||
} else {
|
||||
outSize = toEUC(c, bb);
|
||||
if (outSize == -1) {
|
||||
if (Surrogate.isHigh(c)) {
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (!src.hasRemaining())
|
||||
return CoderResult.UNDERFLOW;
|
||||
char c2 = src.get();
|
||||
if (!Surrogate.isLow(c2))
|
||||
if (!Character.isLowSurrogate(c2))
|
||||
return CoderResult.malformedForLength(1);
|
||||
outSize = toEUC(c, c2, bb);
|
||||
inSize = 2;
|
||||
} else if (Surrogate.isLow(c)) {
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
}
|
||||
|
@ -12586,7 +12586,7 @@ public class GB18030
|
||||
}
|
||||
|
||||
public boolean canEncode(char c) {
|
||||
return ! Surrogate.is(c);
|
||||
return ! Character.isSurrogate(c);
|
||||
}
|
||||
|
||||
private final Surrogate.Parser sgp = new Surrogate.Parser();
|
||||
@ -12625,7 +12625,7 @@ public class GB18030
|
||||
int inputSize = 1;
|
||||
char c = sa[sp];
|
||||
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0)
|
||||
return sgp.error();
|
||||
// Surogate.toUCS4 looks like
|
||||
@ -12719,7 +12719,7 @@ public class GB18030
|
||||
while (src.hasRemaining()) {
|
||||
char c = src.get();
|
||||
int inputSize = 1;
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if ((condensedKey = sgp.parse(c, src))<0)
|
||||
return sgp.error();
|
||||
condensedKey += 0x1e248;
|
||||
|
@ -885,7 +885,7 @@ public class ISCII91 extends Charset implements HistoricallyNamedCharset
|
||||
index = ((int)(inputChar) - 0x0900)*2;
|
||||
}
|
||||
|
||||
if (Surrogate.is(inputChar)) {
|
||||
if (Character.isSurrogate(inputChar)) {
|
||||
if (sgp.parse(inputChar, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -948,7 +948,7 @@ public class ISCII91 extends Charset implements HistoricallyNamedCharset
|
||||
index = ((int)(inputChar) - 0x0900)*2;
|
||||
}
|
||||
|
||||
if (Surrogate.is(inputChar)) {
|
||||
if (Character.isSurrogate(inputChar)) {
|
||||
if (sgp.parse(inputChar, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
|
@ -515,7 +515,7 @@ abstract class ISO2022
|
||||
try {
|
||||
while (sp < sl) {
|
||||
char c = sa[sp];
|
||||
if (Surrogate.is(c)) {
|
||||
if (Character.isSurrogate(c)) {
|
||||
if (sgp.parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -576,7 +576,7 @@ abstract class ISO2022
|
||||
try {
|
||||
while (src.hasRemaining()) {
|
||||
char inputChar = src.get();
|
||||
if (Surrogate.is(inputChar)) {
|
||||
if (Character.isSurrogate(inputChar)) {
|
||||
if (sgp.parse(inputChar, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
|
@ -637,7 +637,7 @@ public class ISO2022_JP
|
||||
da[dp++] = (byte)(index >> 8);
|
||||
da[dp++] = (byte)(index & 0xff);
|
||||
} else {
|
||||
if (Surrogate.is(c) && sgp.parse(c, sa, sp, sl) < 0)
|
||||
if (Character.isSurrogate(c) && sgp.parse(c, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
if (unmappableCharacterAction()
|
||||
== CodingErrorAction.REPLACE
|
||||
@ -655,7 +655,7 @@ public class ISO2022_JP
|
||||
}
|
||||
currentMode = replaceMode;
|
||||
}
|
||||
if (Surrogate.is(c))
|
||||
if (Character.isSurrogate(c))
|
||||
return sgp.unmappableResult();
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
@ -745,7 +745,7 @@ public class ISO2022_JP
|
||||
dst.put((byte)(index >> 8));
|
||||
dst.put((byte)(index & 0xff));
|
||||
} else {
|
||||
if (Surrogate.is(c) && sgp.parse(c, src) < 0)
|
||||
if (Character.isSurrogate(c) && sgp.parse(c, src) < 0)
|
||||
return sgp.error();
|
||||
if (unmappableCharacterAction() == CodingErrorAction.REPLACE
|
||||
&& currentMode != replaceMode) {
|
||||
@ -762,7 +762,7 @@ public class ISO2022_JP
|
||||
}
|
||||
currentMode = replaceMode;
|
||||
}
|
||||
if (Surrogate.is(c))
|
||||
if (Character.isSurrogate(c))
|
||||
return sgp.unmappableResult();
|
||||
return CoderResult.unmappableForLength(1);
|
||||
}
|
||||
|
@ -114,7 +114,7 @@ public abstract class SimpleEUCEncoder
|
||||
while (sp < sl) {
|
||||
boolean allZeroes = true;
|
||||
char inputChar = sa[sp];
|
||||
if (Surrogate.is(inputChar)) {
|
||||
if (Character.isSurrogate(inputChar)) {
|
||||
if (sgp.parse(inputChar, sa, sp, sl) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
@ -194,7 +194,7 @@ public abstract class SimpleEUCEncoder
|
||||
while (src.hasRemaining()) {
|
||||
char inputChar = src.get();
|
||||
boolean allZeroes = true;
|
||||
if (Surrogate.is(inputChar)) {
|
||||
if (Character.isSurrogate(inputChar)) {
|
||||
if (sgp.parse(inputChar, src) < 0)
|
||||
return sgp.error();
|
||||
return sgp.unmappableResult();
|
||||
|
Loading…
Reference in New Issue
Block a user