8276066: Reset LoopPercentProfileLimit for x86 due to suboptimal performance

Reviewed-by: thartmann, kvn
This commit is contained in:
Jie Fu 2021-11-03 22:45:50 +00:00
parent f3320d2fbd
commit 0ab910d626
2 changed files with 89 additions and 1 deletions

View File

@ -44,7 +44,7 @@ define_pd_global(intx, OnStackReplacePercentage, 140);
define_pd_global(intx, ConditionalMoveLimit, 3);
define_pd_global(intx, FreqInlineSize, 325);
define_pd_global(intx, MinJumpTableSize, 10);
define_pd_global(intx, LoopPercentProfileLimit, 30);
define_pd_global(intx, LoopPercentProfileLimit, 10);
#ifdef AMD64
define_pd_global(intx, InteriorEntryAlignment, 16);
define_pd_global(size_t, NewSizeThreadIncrease, ScaleForWordSize(4*K));

View File

@ -0,0 +1,88 @@
/*
* Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.vm.compiler;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.*;
import java.util.concurrent.TimeUnit;
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.SECONDS)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Thread)
@Fork(value=1)
public class LoopUnroll {
@Param({"16", "32", "64", "128", "256", "512", "1024"})
private int VECLEN;
private byte[][] a;
private byte[][] b;
private byte[][] c;
@Setup
public void init() {
a = new byte[VECLEN][VECLEN];
b = new byte[VECLEN][VECLEN];
c = new byte[VECLEN][VECLEN];
}
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
private int run_workload1(int count, byte[][] a , byte[][] b, byte[][] c) {
for(int i = 0; i < a.length; i++) {
for (int j = 0; j < a[0].length; j++) {
a[i][j] = (byte)(b[i][j] + c[i][j]);
}
}
return a[count][count];
}
@Benchmark
public void workload1_caller(Blackhole bh) {
int r = 0;
for(int i = 0 ; i < 100; i++) {
r += run_workload1(i % a.length, a, b, c);
}
bh.consume(r);
}
@CompilerControl(CompilerControl.Mode.DONT_INLINE)
private int run_workload2(int count, byte[][] a , byte[][] b) {
for(int i = 0; i < b.length; i++) {
for (int j = 0; j < b[0].length; j++) {
a[i][j] = b[i][j];
}
}
return a[count][count];
}
@Benchmark
public void workload2_caller(Blackhole bh) {
int r = 0;
for(int i = 0 ; i < 100; i++) {
r += run_workload2(i % a.length, a, b);
}
bh.consume(r);
}
}