8150488: Scanner.findAll() can return infinite stream if regex matches zero chars

Reviewed-by: sherman
This commit is contained in:
Stuart Marks 2017-04-28 12:16:30 -07:00
parent b0ddb372f0
commit cb05e8ea85
2 changed files with 109 additions and 10 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -2846,6 +2846,7 @@ public final class Scanner implements Iterator<String>, Closeable {
class FindSpliterator extends Spliterators.AbstractSpliterator<MatchResult> {
final Pattern pattern;
int expectedCount = -1;
private boolean advance = false; // true if we need to auto-advance
FindSpliterator(Pattern pattern) {
super(Long.MAX_VALUE,
@ -2861,12 +2862,15 @@ public final class Scanner implements Iterator<String>, Closeable {
throw new ConcurrentModificationException();
}
} else {
// init
matchValid = false;
matcher.usePattern(pattern);
expectedCount = modCount;
}
while (true) {
// assert expectedCount == modCount
if (findPatternInBuffer(pattern, 0)) { // doesn't increment modCount
if (nextInBuffer()) { // doesn't increment modCount
cons.accept(matcher.toMatchResult());
if (expectedCount != modCount) {
throw new ConcurrentModificationException();
@ -2879,6 +2883,29 @@ public final class Scanner implements Iterator<String>, Closeable {
return false; // reached end of input
}
}
// reimplementation of findPatternInBuffer with auto-advance on zero-length matches
private boolean nextInBuffer() {
if (advance) {
if (position + 1 > buf.limit()) {
if (!sourceClosed)
needInput = true;
return false;
}
position++;
advance = false;
}
matcher.region(position, buf.limit());
if (matcher.find() && (!matcher.hitEnd() || sourceClosed)) {
// Did not hit end, or hit real end
position = matcher.end();
advance = matcher.start() == position;
return true;
}
if (!sourceClosed)
needInput = true;
return false;
}
}
/** Small LRU cache of Patterns. */

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -33,6 +33,7 @@ import java.util.List;
import java.util.Scanner;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.MatchResult;
import java.util.regex.Pattern;
import java.util.stream.LambdaTestHelpers;
@ -44,7 +45,7 @@ import static org.testng.Assert.*;
/**
* @test
* @bug 8072722
* @bug 8072722 8150488
* @summary Tests of stream support in java.util.Scanner
* @library ../stream/bootlib
* @build java.base/java.util.stream.OpTestCase
@ -56,19 +57,22 @@ public class ScannerStreamTest extends OpTestCase {
static File inputFile = new File(System.getProperty("test.src", "."), "input.txt");
@DataProvider(name = "Patterns")
public static Object[][] makeStreamTestData() {
@DataProvider(name = "Tokens")
public static Object[][] makeTokensTestData() {
// each inner array is [String description, String input, String delimiter]
// delimiter may be null
List<Object[]> data = new ArrayList<>();
data.add(new Object[] { "default delimiter", "abc def ghi", null });
data.add(new Object[] { "fixed delimiter", "abc,def,,ghi", "," });
data.add(new Object[] { "regexp delimiter", "###abc##def###ghi###j", "#+" });
data.add(new Object[] { "regex delimiter", "###abc##def###ghi###j", "#+" });
return data.toArray(new Object[0][]);
}
/*
* Creates a scanner over the input, applying a delimiter if non-null.
*/
Scanner makeScanner(String input, String delimiter) {
Scanner sc = new Scanner(input);
if (delimiter != null) {
@ -77,7 +81,11 @@ public class ScannerStreamTest extends OpTestCase {
return sc;
}
@Test(dataProvider = "Patterns")
/*
* Given input and a delimiter, tests that tokens() returns the same
* results that would be provided by a Scanner hasNext/next loop.
*/
@Test(dataProvider = "Tokens")
public void tokensTest(String description, String input, String delimiter) {
// derive expected result by using conventional loop
Scanner sc = makeScanner(input, delimiter);
@ -93,6 +101,9 @@ public class ScannerStreamTest extends OpTestCase {
.exercise();
}
/*
* Creates a Scanner over the given input file.
*/
Scanner makeFileScanner(File file) {
try {
return new Scanner(file, "UTF-8");
@ -101,7 +112,12 @@ public class ScannerStreamTest extends OpTestCase {
}
}
public void findAllTest() {
/*
* Tests that the matches produced by findAll(pat) are the same
* as what are returned by findWithinHorizon(pat, 0). This tests
* a single pattern against a single input file.
*/
public void findAllFileTest() {
// derive expected result by using conventional loop
Pattern pat = Pattern.compile("[A-Z]{7,}");
List<String> expected = new ArrayList<>();
@ -116,10 +132,66 @@ public class ScannerStreamTest extends OpTestCase {
Supplier<Stream<String>> ss =
() -> makeFileScanner(inputFile).findAll(pat).map(MatchResult::group);
withData(TestData.Factory.ofSupplier("findAllTest", ss))
withData(TestData.Factory.ofSupplier("findAllFileTest", ss))
.stream(LambdaTestHelpers.identity())
.expectedResult(expected)
.exercise();
}
@DataProvider(name = "FindAllZero")
public static Object[][] makeFindAllZeroTestData() {
// each inner array is [String input, String patternString]
List<Object[]> data = new ArrayList<>();
data.add(new Object[] { "aaaaa", "a*" });
data.add(new Object[] { "aaaaab", "a*" });
data.add(new Object[] { "aaaaabb", "a*" });
data.add(new Object[] { "aaaaabbb", "a*" });
data.add(new Object[] { "aaabbaaaa", "a*" });
data.add(new Object[] { "aaabbaaaab", "a*" });
data.add(new Object[] { "aaabbaaaabb", "a*" });
data.add(new Object[] { "aaabbaaaabbb", "a*" });
data.add(new Object[] { "aaabbaaaa", "a*|b*" });
data.add(new Object[] { "aaabbaaaab", "a*|b*" });
data.add(new Object[] { "aaabbaaaabb", "a*|b*" });
data.add(new Object[] { "aaabbaaaabbb", "a*|b*" });
return data.toArray(new Object[0][]);
}
/*
* Tests findAll() using a pattern against an input string.
* The results from findAll() should equal the results obtained
* using a loop around Matcher.find().
*
* The provided regexes should allow zero-length matches.
* This primarily tests the auto-advance feature of findAll() that
* occurs if the regex match is of zero length to see if it has the
* same behavior as Matcher.find()'s auto-advance (JDK-8150488).
* Without auto-advance, findAll() would return an infinite stream
* of zero-length matches. Apply a limit to the stream so
* that an infinite stream will be truncated. The limit must be
* high enough that the resulting truncated stream won't be
* mistaken for a correct expected result.
*/
@Test(dataProvider = "FindAllZero")
public void findAllZeroTest(String input, String patternString) {
Pattern pattern = Pattern.compile(patternString);
// generate expected result using Matcher.find()
Matcher m = pattern.matcher(input);
List<String> expected = new ArrayList<>();
while (m.find()) {
expected.add(m.group());
}
Supplier<Stream<String>> ss = () -> new Scanner(input).findAll(pattern)
.limit(100)
.map(MatchResult::group);
withData(TestData.Factory.ofSupplier("findAllZeroTest", ss))
.stream(LambdaTestHelpers.identity())
.expectedResult(expected)
.exercise();
}
}