8200434: String::align, String::indent

Reviewed-by: abuckley, smarks, sherman, rriggs, jrose, sundar, igerasim, briangoetz, darcy, jjg
This commit is contained in:
Jim Laskey 2018-09-12 14:19:36 -03:00
parent 2065ebd890
commit 12dad310bb
4 changed files with 541 additions and 34 deletions
src/java.base/share/classes/java/lang
test/jdk/java/lang/String

@ -40,12 +40,15 @@ import java.util.StringJoiner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import jdk.internal.HotSpotIntrinsicCandidate;
import jdk.internal.vm.annotation.Stable;
import static java.util.function.Predicate.not;
/**
* The {@code String} class represents character strings. All
* string literals in Java programs, such as {@code "abc"}, are
@ -2755,12 +2758,9 @@ public final class String
return indexOfNonWhitespace() == length();
}
private int indexOfNonWhitespace() {
if (isLatin1()) {
return StringLatin1.indexOfNonWhitespace(value);
} else {
return StringUTF16.indexOfNonWhitespace(value);
}
private Stream<String> lines(int maxLeading, int maxTrailing) {
return isLatin1() ? StringLatin1.lines(value, maxLeading, maxTrailing)
: StringUTF16.lines(value, maxLeading, maxTrailing);
}
/**
@ -2794,8 +2794,181 @@ public final class String
* @since 11
*/
public Stream<String> lines() {
return isLatin1() ? StringLatin1.lines(value)
: StringUTF16.lines(value);
return lines(0, 0);
}
/**
* Adjusts the indentation of each line of this string based on the value of
* {@code n}, and normalizes line termination characters.
* <p>
* This string is conceptually separated into lines using
* {@link String#lines()}. Each line is then adjusted as described below
* and then suffixed with a line feed {@code "\n"} (U+000A). The resulting
* lines are then concatenated and returned.
* <p>
* If {@code n > 0} then {@code n} spaces (U+0020) are inserted at the
* beginning of each line. {@link String#isBlank() Blank lines} are
* unaffected.
* <p>
* If {@code n < 0} then up to {@code n}
* {@link Character#isWhitespace(int) white space characters} are removed
* from the beginning of each line. If a given line does not contain
* sufficient white space then all leading
* {@link Character#isWhitespace(int) white space characters} are removed.
* Each white space character is treated as a single character. In
* particular, the tab character {@code "\t"} (U+0009) is considered a
* single character; it is not expanded.
* <p>
* If {@code n == 0} then the line remains unchanged. However, line
* terminators are still normalized.
* <p>
*
* @param n number of leading
* {@link Character#isWhitespace(int) white space characters}
* to add or remove
*
* @return string with indentation adjusted and line endings normalized
*
* @see String#lines()
* @see String#isBlank()
* @see Character#isWhitespace(int)
*
* @since 12
*/
public String indent(int n) {
return isEmpty() ? "" : indent(n, false);
}
private String indent(int n, boolean removeBlanks) {
Stream<String> stream = removeBlanks ? lines(Integer.MAX_VALUE, Integer.MAX_VALUE)
: lines();
if (n > 0) {
final String spaces = " ".repeat(n);
stream = stream.map(s -> s.isBlank() ? s : spaces + s);
} else if (n == Integer.MIN_VALUE) {
stream = stream.map(s -> s.stripLeading());
} else if (n < 0) {
stream = stream.map(s -> s.substring(Math.min(-n, s.indexOfNonWhitespace())));
}
return stream.collect(Collectors.joining("\n", "", "\n"));
}
private int indexOfNonWhitespace() {
return isLatin1() ? StringLatin1.indexOfNonWhitespace(value)
: StringUTF16.indexOfNonWhitespace(value);
}
private int lastIndexOfNonWhitespace() {
return isLatin1() ? StringLatin1.lastIndexOfNonWhitespace(value)
: StringUTF16.lastIndexOfNonWhitespace(value);
}
/**
* Removes vertical and horizontal white space margins from around the
* essential body of a multi-line string, while preserving relative
* indentation.
* <p>
* This string is first conceptually separated into lines as if by
* {@link String#lines()}.
* <p>
* Then, the <i>minimum indentation</i> (min) is determined as follows. For
* each non-blank line (as defined by {@link String#isBlank()}), the
* leading {@link Character#isWhitespace(int) white space} characters are
* counted. The <i>min</i> value is the smallest of these counts.
* <p>
* For each non-blank line, <i>min</i> leading white space characters are
* removed. Each white space character is treated as a single character. In
* particular, the tab character {@code "\t"} (U+0009) is considered a
* single character; it is not expanded.
* <p>
* Leading and trailing blank lines, if any, are removed. Trailing spaces are
* preserved.
* <p>
* Each line is suffixed with a line feed character {@code "\n"} (U+000A).
* <p>
* Finally, the lines are concatenated into a single string and returned.
*
* @apiNote
* This method's primary purpose is to shift a block of lines as far as
* possible to the left, while preserving relative indentation. Lines
* that were indented the least will thus have no leading white space.
*
* Example:
* <blockquote><pre>
* `
* This is the first line
* This is the second line
* `.align();
*
* returns
* This is the first line
* This is the second line
* </pre></blockquote>
*
* @return string with margins removed and line terminators normalized
*
* @see String#lines()
* @see String#isBlank()
* @see String#indent(int)
* @see Character#isWhitespace(int)
*
* @since 12
*/
public String align() {
return align(0);
}
/**
* Removes vertical and horizontal white space margins from around the
* essential body of a multi-line string, while preserving relative
* indentation and with optional indentation adjustment.
* <p>
* Invoking this method is equivalent to:
* <blockquote>
* {@code this.align().indent(n)}
* </blockquote>
*
* @apiNote
* Examples:
* <blockquote><pre>
* `
* This is the first line
* This is the second line
* `.align(0);
*
* returns
* This is the first line
* This is the second line
*
*
* `
* This is the first line
* This is the second line
* `.align(4);
* returns
* This is the first line
* This is the second line
* </pre></blockquote>
*
* @param n number of leading white space characters
* to add or remove
*
* @return string with margins removed, indentation adjusted and
* line terminators normalized
*
* @see String#align()
*
* @since 12
*/
public String align(int n) {
if (isEmpty()) {
return "";
}
int outdent = lines().filter(not(String::isBlank))
.mapToInt(String::indexOfNonWhitespace)
.min()
.orElse(0);
return indent(n - outdent, true);
}
/**

@ -545,7 +545,7 @@ final class StringLatin1 {
int length = value.length;
int left = 0;
while (left < length) {
char ch = (char)(value[left] & 0xff);
char ch = getChar(value, left);
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
break;
}
@ -558,7 +558,7 @@ final class StringLatin1 {
int length = value.length;
int right = length;
while (0 < right) {
char ch = (char)(value[right - 1] & 0xff);
char ch = getChar(value, right - 1);
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
break;
}
@ -573,7 +573,8 @@ final class StringLatin1 {
return "";
}
int right = lastIndexOfNonWhitespace(value);
return ((left > 0) || (right < value.length)) ? newString(value, left, right - left) : null;
boolean ifChanged = (left > 0) || (right < value.length);
return ifChanged ? newString(value, left, right - left) : null;
}
public static String stripLeading(byte[] value) {
@ -597,11 +598,7 @@ final class StringLatin1 {
private int index; // current index, modified on advance/split
private final int fence; // one past last index
LinesSpliterator(byte[] value) {
this(value, 0, value.length);
}
LinesSpliterator(byte[] value, int start, int length) {
private LinesSpliterator(byte[] value, int start, int length) {
this.value = value;
this.index = start;
this.fence = start + length;
@ -609,7 +606,7 @@ final class StringLatin1 {
private int indexOfLineSeparator(int start) {
for (int current = start; current < fence; current++) {
byte ch = value[current];
char ch = getChar(value, current);
if (ch == '\n' || ch == '\r') {
return current;
}
@ -619,9 +616,9 @@ final class StringLatin1 {
private int skipLineSeparator(int start) {
if (start < fence) {
if (value[start] == '\r') {
if (getChar(value, start) == '\r') {
int next = start + 1;
if (next < fence && value[next] == '\n') {
if (next < fence && getChar(value, next) == '\n') {
return next + 1;
}
}
@ -680,10 +677,80 @@ final class StringLatin1 {
public int characteristics() {
return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
}
static LinesSpliterator spliterator(byte[] value) {
return new LinesSpliterator(value, 0, value.length);
}
static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
int length = value.length;
int left = 0;
int index;
for (int l = 0; l < leading; l++) {
index = skipBlankForward(value, left, length);
if (index == left) {
break;
}
left = index;
}
int right = length;
for (int t = 0; t < trailing; t++) {
index = skipBlankBackward(value, left, right);
if (index == right) {
break;
}
right = index;
}
return new LinesSpliterator(value, left, right - left);
}
private static int skipBlankForward(byte[] value, int start, int length) {
int index = start;
while (index < length) {
char ch = getChar(value, index++);
if (ch == '\n') {
return index;
}
if (ch == '\r') {
if (index < length && getChar(value, index) == '\n') {
return index + 1;
}
return index;
}
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
return start;
}
}
return length;
}
private static int skipBlankBackward(byte[] value, int start, int fence) {
int index = fence;
if (start < index && getChar(value, index - 1) == '\n') {
index--;
}
if (start < index && getChar(value, index - 1) == '\r') {
index--;
}
while (start < index) {
char ch = getChar(value, --index);
if (ch == '\r' || ch == '\n') {
return index + 1;
}
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
return fence;
}
}
return start;
}
}
static Stream<String> lines(byte[] value) {
return StreamSupport.stream(new LinesSpliterator(value), false);
static Stream<String> lines(byte[] value, int leading, int trailing) {
if (leading == 0 && trailing == 0) {
return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
} else {
return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
}
}
public static void putChar(byte[] val, int index, int c) {

@ -859,7 +859,6 @@ final class StringUTF16 {
null;
}
public static int indexOfNonWhitespace(byte[] value) {
int length = value.length >> 1;
int left = 0;
@ -874,7 +873,7 @@ final class StringUTF16 {
}
public static int lastIndexOfNonWhitespace(byte[] value) {
int length = value.length >> 1;
int length = value.length >>> 1;
int right = length;
while (0 < right) {
int codepoint = codePointBefore(value, right);
@ -887,17 +886,18 @@ final class StringUTF16 {
}
public static String strip(byte[] value) {
int length = value.length >> 1;
int length = value.length >>> 1;
int left = indexOfNonWhitespace(value);
if (left == length) {
return "";
}
int right = lastIndexOfNonWhitespace(value);
return ((left > 0) || (right < length)) ? newString(value, left, right - left) : null;
boolean ifChanged = (left > 0) || (right < length);
return ifChanged ? newString(value, left, right - left) : null;
}
public static String stripLeading(byte[] value) {
int length = value.length >> 1;
int length = value.length >>> 1;
int left = indexOfNonWhitespace(value);
if (left == length) {
return "";
@ -906,7 +906,7 @@ final class StringUTF16 {
}
public static String stripTrailing(byte[] value) {
int length = value.length >> 1;
int length = value.length >>> 1;
int right = lastIndexOfNonWhitespace(value);
if (right == 0) {
return "";
@ -919,11 +919,7 @@ final class StringUTF16 {
private int index; // current index, modified on advance/split
private final int fence; // one past last index
LinesSpliterator(byte[] value) {
this(value, 0, value.length >>> 1);
}
LinesSpliterator(byte[] value, int start, int length) {
private LinesSpliterator(byte[] value, int start, int length) {
this.value = value;
this.index = start;
this.fence = start + length;
@ -1002,10 +998,80 @@ final class StringUTF16 {
public int characteristics() {
return Spliterator.ORDERED | Spliterator.IMMUTABLE | Spliterator.NONNULL;
}
static LinesSpliterator spliterator(byte[] value) {
return new LinesSpliterator(value, 0, value.length >>> 1);
}
static LinesSpliterator spliterator(byte[] value, int leading, int trailing) {
int length = value.length >>> 1;
int left = 0;
int index;
for (int l = 0; l < leading; l++) {
index = skipBlankForward(value, left, length);
if (index == left) {
break;
}
left = index;
}
int right = length;
for (int t = 0; t < trailing; t++) {
index = skipBlankBackward(value, left, right);
if (index == right) {
break;
}
right = index;
}
return new LinesSpliterator(value, left, right - left);
}
private static int skipBlankForward(byte[] value, int start, int length) {
int index = start;
while (index < length) {
char ch = getChar(value, index++);
if (ch == '\n') {
return index;
}
if (ch == '\r') {
if (index < length && getChar(value, index) == '\n') {
return index + 1;
}
return index;
}
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
return start;
}
}
return length;
}
private static int skipBlankBackward(byte[] value, int start, int fence) {
int index = fence;
if (start < index && getChar(value, index - 1) == '\n') {
index--;
}
if (start < index && getChar(value, index - 1) == '\r') {
index--;
}
while (start < index) {
char ch = getChar(value, --index);
if (ch == '\r' || ch == '\n') {
return index + 1;
}
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
return fence;
}
}
return start;
}
}
static Stream<String> lines(byte[] value) {
return StreamSupport.stream(new LinesSpliterator(value), false);
static Stream<String> lines(byte[] value, int leading, int trailing) {
if (leading == 0 && trailing == 0) {
return StreamSupport.stream(LinesSpliterator.spliterator(value), false);
} else {
return StreamSupport.stream(LinesSpliterator.spliterator(value, leading, trailing), false);
}
}
private static void putChars(byte[] val, int index, char[] str, int off, int end) {

@ -0,0 +1,201 @@
/*
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @summary Unit tests for String#align and String#indent
* @run main AlignIndent
*/
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
public class AlignIndent {
static final List<String> ENDS = List.of("", "\n", " \n", "\n\n", "\n\n\n");
static final List<String> MIDDLES = List.of(
"",
"xyz",
" xyz",
" xyz",
"xyz ",
" xyz ",
" xyz ",
"xyz\u2022",
" xyz\u2022",
"xyz\u2022 ",
" xyz\u2022 ",
" // comment"
);
public static void main(String[] args) {
test1();
test2();
test3();
}
/*
* Test String#align() functionality.
*/
static void test1() {
for (String prefix : ENDS) {
for (String suffix : ENDS) {
for (String middle : MIDDLES) {
{
String input = prefix + " abc \n" + middle + "\n def \n" + suffix;
String output = input.align();
String[] inLines = input.split("\\R");
String[] outLines = output.split("\\R");
String[] inLinesBody = getBody(inLines);
if (inLinesBody.length < outLines.length) {
report("String::align()", "Result has more lines than expected", input, output);
} else if (inLinesBody.length > outLines.length) {
report("String::align()", "Result has fewer lines than expected", input, output);
}
int indent = -1;
for (int i = 0; i < inLinesBody.length; i++) {
String in = inLinesBody[i];
String out = outLines[i];
if (!out.isBlank()) {
int offset = in.indexOf(out);
if (offset == -1) {
report("String::align()", "Portions of line are missing", input, output);
}
if (indent == -1) {
indent = offset;
} else if (offset != indent) {
report("String::align()",
"Inconsistent indentation in result", input, output);
}
}
}
}
}
}
}
}
/*
* Test String#align(int n) functionality.
*/
static void test2() {
for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) {
for (String prefix : ENDS) {
for (String suffix : ENDS) {
for (String middle : MIDDLES) {
{
String input = prefix + " abc \n" + middle + "\n def \n" + suffix;
String output = input.align(adjust);
String expected = input.align().indent(adjust);
if (!output.equals(expected)) {
report("String::align(int n)",
"Result inconsistent with align().indent(n)", expected, output);
}
}
}
}
}
}
}
/*
* Test String#indent(int n) functionality.
*/
static void test3() {
for (int adjust : new int[] {-8, -7, -4, -3, -2, -1, 0, 1, 2, 3, 4, 7, 8}) {
for (String prefix : ENDS) {
for (String suffix : ENDS) {
for (String middle : MIDDLES) {
String input = prefix + " abc \n" + middle + "\n def \n" + suffix;
String output = input.indent(adjust);
Stream<String> stream = input.lines();
if (adjust > 0) {
final String spaces = " ".repeat(adjust);
stream = stream.map(s -> s.isBlank() ? s : spaces + s);
} else if (adjust < 0) {
stream = stream.map(s -> s.substring(Math.min(-adjust, indexOfNonWhitespace(s))));
}
String expected = stream.collect(Collectors.joining("\n", "", "\n"));
if (!output.equals(expected)) {
report("String::indent(int n)",
"Result indentation not as expected", expected, output);
}
}
}
}
}
}
public static int indexOfNonWhitespace(String s) {
int left = 0;
while (left < s.length()) {
char ch = s.charAt(left);
if (ch != ' ' && ch != '\t' && !Character.isWhitespace(ch)) {
break;
}
left++;
}
return left;
}
private static String[] getBody(String[] inLines) {
int from = -1, to = -1;
for (int i = 0; i < inLines.length; i++) {
String line = inLines[i];
if (!line.isBlank()) {
if (from == -1) {
from = i;
}
to = i + 1;
}
}
return Arrays.copyOfRange(inLines, from, to);
}
/*
* Report difference in result.
*/
static void report(String test, String message, String input, String output) {
System.err.println("Testing " + test + ": " + message);
System.err.println();
System.err.println("Input: length = " + input.length());
System.err.println("_".repeat(40));
System.err.print(input.replaceAll(" ", "."));
System.err.println("_".repeat(40));
System.err.println();
System.err.println("Output: length = " + output.length());
System.err.println("_".repeat(40));
System.err.print(output.replaceAll(" ", "."));
System.err.println("_".repeat(40));
throw new RuntimeException();
}
}