From edcaf2323302ce255cbd6c0b2a745f65e4c85794 Mon Sep 17 00:00:00 2001 From: Martin Buchholz Date: Tue, 26 Mar 2013 13:36:51 -0700 Subject: [PATCH] 8010316: Improve handling of char sequences containing surrogates Fix and optimize codePointAt, codePointBefore and similar methods Reviewed-by: sherman, okutsu, ulfzibis, kizune --- .../java/lang/AbstractStringBuilder.java | 49 ++++++++++--------- .../share/classes/java/lang/Character.java | 48 ++++++++---------- .../lang/StringBuilder/Supplementary.java | 14 ++++++ 3 files changed, 61 insertions(+), 50 deletions(-) diff --git a/jdk/src/share/classes/java/lang/AbstractStringBuilder.java b/jdk/src/share/classes/java/lang/AbstractStringBuilder.java index 6e4881cb832..2d71795a5ca 100644 --- a/jdk/src/share/classes/java/lang/AbstractStringBuilder.java +++ b/jdk/src/share/classes/java/lang/AbstractStringBuilder.java @@ -236,7 +236,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence { if ((index < 0) || (index >= count)) { throw new StringIndexOutOfBoundsException(index); } - return Character.codePointAt(value, index); + return Character.codePointAtImpl(value, index, count); } /** @@ -265,7 +265,7 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence { if ((i < 0) || (i >= count)) { throw new StringIndexOutOfBoundsException(index); } - return Character.codePointBefore(value, index); + return Character.codePointBeforeImpl(value, index, 0); } /** @@ -1370,32 +1370,37 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence { * @return a reference to this object. */ public AbstractStringBuilder reverse() { - boolean hasSurrogate = false; + boolean hasSurrogates = false; int n = count - 1; - for (int j = (n-1) >> 1; j >= 0; --j) { - char temp = value[j]; - char temp2 = value[n - j]; - if (!hasSurrogate) { - hasSurrogate = (temp >= Character.MIN_SURROGATE && temp <= Character.MAX_SURROGATE) - || (temp2 >= Character.MIN_SURROGATE && temp2 <= Character.MAX_SURROGATE); + for (int j = (n-1) >> 1; j >= 0; j--) { + int k = n - j; + char cj = value[j]; + char ck = value[k]; + value[j] = ck; + value[k] = cj; + if (Character.isSurrogate(cj) || + Character.isSurrogate(ck)) { + hasSurrogates = true; } - value[j] = temp2; - value[n - j] = temp; } - if (hasSurrogate) { - // Reverse back all valid surrogate pairs - for (int i = 0; i < count - 1; i++) { - char c2 = value[i]; - if (Character.isLowSurrogate(c2)) { - char c1 = value[i + 1]; - if (Character.isHighSurrogate(c1)) { - value[i++] = c1; - value[i] = c2; - } + if (hasSurrogates) { + reverseAllValidSurrogatePairs(); + } + return this; + } + + /** Outlined helper method for reverse() */ + private void reverseAllValidSurrogatePairs() { + for (int i = 0; i < count - 1; i++) { + char c2 = value[i]; + if (Character.isLowSurrogate(c2)) { + char c1 = value[i + 1]; + if (Character.isHighSurrogate(c1)) { + value[i++] = c1; + value[i] = c2; } } } - return this; } /** diff --git a/jdk/src/share/classes/java/lang/Character.java b/jdk/src/share/classes/java/lang/Character.java index 0f440067b45..531263d3894 100644 --- a/jdk/src/share/classes/java/lang/Character.java +++ b/jdk/src/share/classes/java/lang/Character.java @@ -4862,13 +4862,11 @@ class Character implements java.io.Serializable, Comparable { * @since 1.5 */ public static int codePointAt(CharSequence seq, int index) { - char c1 = seq.charAt(index++); - if (isHighSurrogate(c1)) { - if (index < seq.length()) { - char c2 = seq.charAt(index); - if (isLowSurrogate(c2)) { - return toCodePoint(c1, c2); - } + char c1 = seq.charAt(index); + if (isHighSurrogate(c1) && ++index < seq.length()) { + char c2 = seq.charAt(index); + if (isLowSurrogate(c2)) { + return toCodePoint(c1, c2); } } return c1; @@ -4931,15 +4929,13 @@ class Character implements java.io.Serializable, Comparable { return codePointAtImpl(a, index, limit); } - // throws ArrayIndexOutofBoundsException if index out of bounds + // throws ArrayIndexOutOfBoundsException if index out of bounds static int codePointAtImpl(char[] a, int index, int limit) { - char c1 = a[index++]; - if (isHighSurrogate(c1)) { - if (index < limit) { - char c2 = a[index]; - if (isLowSurrogate(c2)) { - return toCodePoint(c1, c2); - } + char c1 = a[index]; + if (isHighSurrogate(c1) && ++index < limit) { + char c2 = a[index]; + if (isLowSurrogate(c2)) { + return toCodePoint(c1, c2); } } return c1; @@ -4968,12 +4964,10 @@ class Character implements java.io.Serializable, Comparable { */ public static int codePointBefore(CharSequence seq, int index) { char c2 = seq.charAt(--index); - if (isLowSurrogate(c2)) { - if (index > 0) { - char c1 = seq.charAt(--index); - if (isHighSurrogate(c1)) { - return toCodePoint(c1, c2); - } + if (isLowSurrogate(c2) && index > 0) { + char c1 = seq.charAt(--index); + if (isHighSurrogate(c1)) { + return toCodePoint(c1, c2); } } return c2; @@ -5038,15 +5032,13 @@ class Character implements java.io.Serializable, Comparable { return codePointBeforeImpl(a, index, start); } - // throws ArrayIndexOutofBoundsException if index-1 out of bounds + // throws ArrayIndexOutOfBoundsException if index-1 out of bounds static int codePointBeforeImpl(char[] a, int index, int start) { char c2 = a[--index]; - if (isLowSurrogate(c2)) { - if (index > start) { - char c1 = a[--index]; - if (isHighSurrogate(c1)) { - return toCodePoint(c1, c2); - } + if (isLowSurrogate(c2) && index > start) { + char c1 = a[--index]; + if (isHighSurrogate(c1)) { + return toCodePoint(c1, c2); } } return c2; diff --git a/jdk/test/java/lang/StringBuilder/Supplementary.java b/jdk/test/java/lang/StringBuilder/Supplementary.java index 30ff5e901c0..f5af9a0599e 100644 --- a/jdk/test/java/lang/StringBuilder/Supplementary.java +++ b/jdk/test/java/lang/StringBuilder/Supplementary.java @@ -37,6 +37,7 @@ public class Supplementary { test4(); // Test for appendCodePoint(int codePoint) test5(); // Test for codePointCount(int beginIndex, int endIndex) test6(); // Test for offsetByCodePoints(int index, int offset) + testDontReadOutOfBoundsTrailingSurrogate(); } /* Text strings which are used as input data. @@ -305,6 +306,19 @@ public class Supplementary { } } + static void testDontReadOutOfBoundsTrailingSurrogate() { + StringBuilder sb = new StringBuilder(); + int suppl = Character.MIN_SUPPLEMENTARY_CODE_POINT; + sb.appendCodePoint(suppl); + check(sb.codePointAt(0) != (int) suppl, + "codePointAt(0)", sb.codePointAt(0), suppl); + check(sb.length() != 2, "sb.length()"); + sb.setLength(1); + check(sb.length() != 1, "sb.length()"); + check(sb.codePointAt(0) != Character.highSurrogate(suppl), + "codePointAt(0)", + sb.codePointAt(0), Character.highSurrogate(suppl)); + } static final boolean At = true, Before = false;