From 79350b9b4f42e43a8ab27ea76b9115dae6f448af Mon Sep 17 00:00:00 2001 From: Volker Simonis Date: Tue, 23 Nov 2021 09:15:20 +0000 Subject: [PATCH] 8276216: Negated character classes performance regression in Pattern Reviewed-by: clanger --- .../classes/java/util/regex/Pattern.java | 65 +++++++++++------ .../bench/java/util/regex/FindPattern.java | 71 +++++++++++++++++++ 2 files changed, 113 insertions(+), 23 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/util/regex/FindPattern.java diff --git a/src/java.base/share/classes/java/util/regex/Pattern.java b/src/java.base/share/classes/java/util/regex/Pattern.java index 5b2533111f4..d52a2b92af9 100644 --- a/src/java.base/share/classes/java/util/regex/Pattern.java +++ b/src/java.base/share/classes/java/util/regex/Pattern.java @@ -5605,50 +5605,69 @@ NEXT: while (i <= last) { } } + private static CharPredicate and(CharPredicate p1, CharPredicate p2, + boolean bmpChar) { + if (bmpChar) { + return (BmpCharPredicate)(ch -> p1.is(ch) && p2.is(ch)); + } else { + return (CharPredicate)(ch -> p1.is(ch) && p2.is(ch)); + } + } + + private static CharPredicate union(CharPredicate p1, CharPredicate p2, + boolean bmpChar) { + if (bmpChar) { + return (BmpCharPredicate)(ch -> p1.is(ch) || p2.is(ch)); + } else { + return (CharPredicate)(ch -> p1.is(ch) || p2.is(ch)); + } + } + + private static CharPredicate union(CharPredicate p1, CharPredicate p2, + CharPredicate p3, boolean bmpChar) { + if (bmpChar) { + return (BmpCharPredicate)(ch -> p1.is(ch) || p2.is(ch) || p3.is(ch)); + } else { + return (CharPredicate)(ch -> p1.is(ch) || p2.is(ch) || p3.is(ch)); + } + } + + private static CharPredicate negate(CharPredicate p1) { + return (CharPredicate)(ch -> !p1.is(ch)); + } + @FunctionalInterface static interface CharPredicate { boolean is(int ch); default CharPredicate and(CharPredicate p) { - return ch -> is(ch) && p.is(ch); + return Pattern.and(this, p, false); } default CharPredicate union(CharPredicate p) { - return ch -> is(ch) || p.is(ch); + return Pattern.union(this, p, false); } default CharPredicate union(CharPredicate p1, CharPredicate p2) { - return ch -> is(ch) || p1.is(ch) || p2.is(ch); + return Pattern.union(this, p1, p2, false); } default CharPredicate negate() { - return ch -> !is(ch); + return Pattern.negate(this); } } static interface BmpCharPredicate extends CharPredicate { default CharPredicate and(CharPredicate p) { - if (p instanceof BmpCharPredicate) - return (BmpCharPredicate)(ch -> is(ch) && p.is(ch)); - return ch -> is(ch) && p.is(ch); + return Pattern.and(this, p, p instanceof BmpCharPredicate); } default CharPredicate union(CharPredicate p) { - if (p instanceof BmpCharPredicate) - return (BmpCharPredicate)(ch -> is(ch) || p.is(ch)); - return ch -> is(ch) || p.is(ch); + return Pattern.union(this, p, p instanceof BmpCharPredicate); } - static CharPredicate union(CharPredicate... predicates) { - CharPredicate cp = ch -> { - for (CharPredicate p : predicates) { - if (!p.is(ch)) - return false; - } - return true; - }; - for (CharPredicate p : predicates) { - if (! (p instanceof BmpCharPredicate)) - return cp; - } - return (BmpCharPredicate)cp; + default CharPredicate union(CharPredicate p1, + CharPredicate p2) { + return Pattern.union(this, p1, p2, + p1 instanceof BmpCharPredicate && + p2 instanceof BmpCharPredicate); } } diff --git a/test/micro/org/openjdk/bench/java/util/regex/FindPattern.java b/test/micro/org/openjdk/bench/java/util/regex/FindPattern.java new file mode 100644 index 00000000000..907a1616889 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/util/regex/FindPattern.java @@ -0,0 +1,71 @@ +/* + * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +package org.openjdk.bench.java.util.regex; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Level; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.util.concurrent.TimeUnit; +import java.util.regex.Pattern; +import java.util.regex.Matcher; + +@State(Scope.Benchmark) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(value=1, jvmArgs= {"-showversion", "-XX:+UseSerialGC"}) +@Warmup(iterations = 1, time = 10, timeUnit = TimeUnit.SECONDS) +@Measurement(iterations = 3, time = 10, timeUnit = TimeUnit.SECONDS) +public class FindPattern { + @Param({"[^A-Za-z0-9]", "[A-Za-z0-9]"}) + static String patternString; + @Param({"abcdefghijklmnop1234567890ABCDEFGHIJKLMNOP", + ",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,"}) + static String text; + static Pattern pattern; + + @Setup(Level.Trial) + public void setupTrial() { + pattern = Pattern.compile(patternString); + } + + @Benchmark + @BenchmarkMode(Mode.AverageTime) + public int testFind() { + int counter = 0; + Matcher m = pattern.matcher(text); + while (m.find()) { + counter++; + } + return counter; + } +}