8010316: Improve handling of char sequences containing surrogates
Fix and optimize codePointAt, codePointBefore and similar methods Reviewed-by: sherman, okutsu, ulfzibis, kizune
This commit is contained in:
parent
2a57075d85
commit
edcaf23233
@ -236,7 +236,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
|
|||||||
if ((index < 0) || (index >= count)) {
|
if ((index < 0) || (index >= count)) {
|
||||||
throw new StringIndexOutOfBoundsException(index);
|
throw new StringIndexOutOfBoundsException(index);
|
||||||
}
|
}
|
||||||
return Character.codePointAt(value, index);
|
return Character.codePointAtImpl(value, index, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -265,7 +265,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
|
|||||||
if ((i < 0) || (i >= count)) {
|
if ((i < 0) || (i >= count)) {
|
||||||
throw new StringIndexOutOfBoundsException(index);
|
throw new StringIndexOutOfBoundsException(index);
|
||||||
}
|
}
|
||||||
return Character.codePointBefore(value, index);
|
return Character.codePointBeforeImpl(value, index, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -1370,20 +1370,27 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
|
|||||||
* @return a reference to this object.
|
* @return a reference to this object.
|
||||||
*/
|
*/
|
||||||
public AbstractStringBuilder reverse() {
|
public AbstractStringBuilder reverse() {
|
||||||
boolean hasSurrogate = false;
|
boolean hasSurrogates = false;
|
||||||
int n = count - 1;
|
int n = count - 1;
|
||||||
for (int j = (n-1) >> 1; j >= 0; --j) {
|
for (int j = (n-1) >> 1; j >= 0; j--) {
|
||||||
char temp = value[j];
|
int k = n - j;
|
||||||
char temp2 = value[n - j];
|
char cj = value[j];
|
||||||
if (!hasSurrogate) {
|
char ck = value[k];
|
||||||
hasSurrogate = (temp >= Character.MIN_SURROGATE && temp <= Character.MAX_SURROGATE)
|
value[j] = ck;
|
||||||
|| (temp2 >= Character.MIN_SURROGATE && temp2 <= Character.MAX_SURROGATE);
|
value[k] = cj;
|
||||||
|
if (Character.isSurrogate(cj) ||
|
||||||
|
Character.isSurrogate(ck)) {
|
||||||
|
hasSurrogates = true;
|
||||||
}
|
}
|
||||||
value[j] = temp2;
|
|
||||||
value[n - j] = temp;
|
|
||||||
}
|
}
|
||||||
if (hasSurrogate) {
|
if (hasSurrogates) {
|
||||||
// Reverse back all valid surrogate pairs
|
reverseAllValidSurrogatePairs();
|
||||||
|
}
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Outlined helper method for reverse() */
|
||||||
|
private void reverseAllValidSurrogatePairs() {
|
||||||
for (int i = 0; i < count - 1; i++) {
|
for (int i = 0; i < count - 1; i++) {
|
||||||
char c2 = value[i];
|
char c2 = value[i];
|
||||||
if (Character.isLowSurrogate(c2)) {
|
if (Character.isLowSurrogate(c2)) {
|
||||||
@ -1395,8 +1402,6 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a string representing the data in this sequence.
|
* Returns a string representing the data in this sequence.
|
||||||
|
@ -4862,15 +4862,13 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
* @since 1.5
|
* @since 1.5
|
||||||
*/
|
*/
|
||||||
public static int codePointAt(CharSequence seq, int index) {
|
public static int codePointAt(CharSequence seq, int index) {
|
||||||
char c1 = seq.charAt(index++);
|
char c1 = seq.charAt(index);
|
||||||
if (isHighSurrogate(c1)) {
|
if (isHighSurrogate(c1) && ++index < seq.length()) {
|
||||||
if (index < seq.length()) {
|
|
||||||
char c2 = seq.charAt(index);
|
char c2 = seq.charAt(index);
|
||||||
if (isLowSurrogate(c2)) {
|
if (isLowSurrogate(c2)) {
|
||||||
return toCodePoint(c1, c2);
|
return toCodePoint(c1, c2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return c1;
|
return c1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4931,17 +4929,15 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
return codePointAtImpl(a, index, limit);
|
return codePointAtImpl(a, index, limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
// throws ArrayIndexOutofBoundsException if index out of bounds
|
// throws ArrayIndexOutOfBoundsException if index out of bounds
|
||||||
static int codePointAtImpl(char[] a, int index, int limit) {
|
static int codePointAtImpl(char[] a, int index, int limit) {
|
||||||
char c1 = a[index++];
|
char c1 = a[index];
|
||||||
if (isHighSurrogate(c1)) {
|
if (isHighSurrogate(c1) && ++index < limit) {
|
||||||
if (index < limit) {
|
|
||||||
char c2 = a[index];
|
char c2 = a[index];
|
||||||
if (isLowSurrogate(c2)) {
|
if (isLowSurrogate(c2)) {
|
||||||
return toCodePoint(c1, c2);
|
return toCodePoint(c1, c2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return c1;
|
return c1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4968,14 +4964,12 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
*/
|
*/
|
||||||
public static int codePointBefore(CharSequence seq, int index) {
|
public static int codePointBefore(CharSequence seq, int index) {
|
||||||
char c2 = seq.charAt(--index);
|
char c2 = seq.charAt(--index);
|
||||||
if (isLowSurrogate(c2)) {
|
if (isLowSurrogate(c2) && index > 0) {
|
||||||
if (index > 0) {
|
|
||||||
char c1 = seq.charAt(--index);
|
char c1 = seq.charAt(--index);
|
||||||
if (isHighSurrogate(c1)) {
|
if (isHighSurrogate(c1)) {
|
||||||
return toCodePoint(c1, c2);
|
return toCodePoint(c1, c2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return c2;
|
return c2;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5038,17 +5032,15 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
return codePointBeforeImpl(a, index, start);
|
return codePointBeforeImpl(a, index, start);
|
||||||
}
|
}
|
||||||
|
|
||||||
// throws ArrayIndexOutofBoundsException if index-1 out of bounds
|
// throws ArrayIndexOutOfBoundsException if index-1 out of bounds
|
||||||
static int codePointBeforeImpl(char[] a, int index, int start) {
|
static int codePointBeforeImpl(char[] a, int index, int start) {
|
||||||
char c2 = a[--index];
|
char c2 = a[--index];
|
||||||
if (isLowSurrogate(c2)) {
|
if (isLowSurrogate(c2) && index > start) {
|
||||||
if (index > start) {
|
|
||||||
char c1 = a[--index];
|
char c1 = a[--index];
|
||||||
if (isHighSurrogate(c1)) {
|
if (isHighSurrogate(c1)) {
|
||||||
return toCodePoint(c1, c2);
|
return toCodePoint(c1, c2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
return c2;
|
return c2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@ public class Supplementary {
|
|||||||
test4(); // Test for appendCodePoint(int codePoint)
|
test4(); // Test for appendCodePoint(int codePoint)
|
||||||
test5(); // Test for codePointCount(int beginIndex, int endIndex)
|
test5(); // Test for codePointCount(int beginIndex, int endIndex)
|
||||||
test6(); // Test for offsetByCodePoints(int index, int offset)
|
test6(); // Test for offsetByCodePoints(int index, int offset)
|
||||||
|
testDontReadOutOfBoundsTrailingSurrogate();
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Text strings which are used as input data.
|
/* Text strings which are used as input data.
|
||||||
@ -305,6 +306,19 @@ public class Supplementary {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void testDontReadOutOfBoundsTrailingSurrogate() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
int suppl = Character.MIN_SUPPLEMENTARY_CODE_POINT;
|
||||||
|
sb.appendCodePoint(suppl);
|
||||||
|
check(sb.codePointAt(0) != (int) suppl,
|
||||||
|
"codePointAt(0)", sb.codePointAt(0), suppl);
|
||||||
|
check(sb.length() != 2, "sb.length()");
|
||||||
|
sb.setLength(1);
|
||||||
|
check(sb.length() != 1, "sb.length()");
|
||||||
|
check(sb.codePointAt(0) != Character.highSurrogate(suppl),
|
||||||
|
"codePointAt(0)",
|
||||||
|
sb.codePointAt(0), Character.highSurrogate(suppl));
|
||||||
|
}
|
||||||
|
|
||||||
static final boolean At = true, Before = false;
|
static final boolean At = true, Before = false;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user