8137240: Negative lookahead in RegEx breaks backreference

Reviewed-by: mhaupt
This commit is contained in:
Hannes Wallnöfer 2016-06-24 12:39:42 +02:00
parent 803a8d9310
commit 939f1f7308
2 changed files with 58 additions and 4 deletions

View File

@ -80,8 +80,17 @@ final class RegExpScanner extends Scanner {
this.negLookaheadLevel = negLookaheadLevel;
}
boolean isContained(final int group, final int level) {
return group == this.negLookaheadGroup && level >= this.negLookaheadLevel;
/**
* Returns true if this Capture can be referenced from the position specified by the
* group and level parameters. This is the case if either the group is not within
* a negative lookahead, or the position of the referrer is in the same negative lookahead.
*
* @param group current negative lookahead group
* @param level current negative lokahead level
* @return true if this capture group can be referenced from the given position
*/
boolean canBeReferencedFrom(final int group, final int level) {
return this.negLookaheadLevel == 0 || (group == this.negLookaheadGroup && level >= this.negLookaheadLevel);
}
}
@ -671,8 +680,9 @@ final class RegExpScanner extends Scanner {
} else if (decimalValue <= caps.size()) {
// Captures inside a negative lookahead are undefined when referenced from the outside.
if (!caps.get(decimalValue - 1).isContained(negLookaheadGroup, negLookaheadLevel)) {
// Reference to capture in negative lookahead, omit from output buffer.
final Capture capture = caps.get(decimalValue - 1);
if (!capture.canBeReferencedFrom(negLookaheadGroup, negLookaheadLevel)) {
// Outside reference to capture in negative lookahead, omit from output buffer.
sb.setLength(sb.length() - 1);
} else {
// Append backreference to output buffer.

View File

@ -0,0 +1,44 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/**
* JDK-8137240: Negative lookahead in RegEx breaks backreference
*
* @test
* @run
*/
Assert.assertEquals('aa'.replace(/(a)(?!b)\1/gm, 'c'), 'c');
var result = 'aa'.match(/(a)(?!b)\1/);
Assert.assertTrue(result.length === 2);
Assert.assertTrue(result[0] === 'aa');
Assert.assertTrue(result[1] === 'a');
result = 'aa'.match(/(a)(?!(b))\2(a)/);
Assert.assertTrue(result.length === 4);
Assert.assertTrue(result[0] === 'aa');
Assert.assertTrue(result[1] === 'a');
Assert.assertTrue(result[2] === undefined);
Assert.assertTrue(result[3] === 'a');