This commit is contained in:
Vladimir Kozlov 2016-06-24 15:30:50 -07:00
commit cd75cb6a26
14 changed files with 296 additions and 41 deletions

View File

@ -118,6 +118,7 @@ define_pd_global(intx, InlineSmallCode, 1000);
// Don't attempt to use Neon on builtin sim until builtin sim supports it
#define UseCRC32 false
#define UseSIMDForMemoryOps false
#define AvoidUnalignedAcesses false
#else
#define UseBuiltinSim false
@ -144,6 +145,8 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Use CRC32 instructions for CRC32 computation") \
product(bool, UseSIMDForMemoryOps, false, \
"Use SIMD instructions in generated memory move code") \
product(bool, AvoidUnalignedAccesses, false, \
"Avoid generating unaligned memory accesses") \
product(bool, UseLSE, false, \
"Use LSE instructions") \
product(bool, UseBlockZeroing, true, \

View File

@ -801,6 +801,12 @@ class StubGenerator: public StubCodeGenerator {
StubCodeMark mark(this, "StubRoutines", stub_name);
__ align(CodeEntryAlignment);
__ bind(start);
Label unaligned_copy_long;
if (AvoidUnalignedAccesses) {
__ tbnz(d, 3, unaligned_copy_long);
}
if (direction == copy_forwards) {
__ sub(s, s, bias);
__ sub(d, d, bias);
@ -901,6 +907,198 @@ class StubGenerator: public StubCodeGenerator {
}
__ ret(lr);
if (AvoidUnalignedAccesses) {
Label drain, again;
// Register order for storing. Order is different for backward copy.
__ bind(unaligned_copy_long);
// source address is even aligned, target odd aligned
//
// when forward copying word pairs we read long pairs at offsets
// {0, 2, 4, 6} (in long words). when backwards copying we read
// long pairs at offsets {-2, -4, -6, -8}. We adjust the source
// address by -2 in the forwards case so we can compute the
// source offsets for both as {2, 4, 6, 8} * unit where unit = 1
// or -1.
//
// when forward copying we need to store 1 word, 3 pairs and
// then 1 word at offsets {0, 1, 3, 5, 7}. Rather thna use a
// zero offset We adjust the destination by -1 which means we
// have to use offsets { 1, 2, 4, 6, 8} * unit for the stores.
//
// When backwards copyng we need to store 1 word, 3 pairs and
// then 1 word at offsets {-1, -3, -5, -7, -8} i.e. we use
// offsets {1, 3, 5, 7, 8} * unit.
if (direction == copy_forwards) {
__ sub(s, s, 16);
__ sub(d, d, 8);
}
// Fill 8 registers
//
// for forwards copy s was offset by -16 from the original input
// value of s so the register contents are at these offsets
// relative to the 64 bit block addressed by that original input
// and so on for each successive 64 byte block when s is updated
//
// t0 at offset 0, t1 at offset 8
// t2 at offset 16, t3 at offset 24
// t4 at offset 32, t5 at offset 40
// t6 at offset 48, t7 at offset 56
// for backwards copy s was not offset so the register contents
// are at these offsets into the preceding 64 byte block
// relative to that original input and so on for each successive
// preceding 64 byte block when s is updated. this explains the
// slightly counter-intuitive looking pattern of register usage
// in the stp instructions for backwards copy.
//
// t0 at offset -16, t1 at offset -8
// t2 at offset -32, t3 at offset -24
// t4 at offset -48, t5 at offset -40
// t6 at offset -64, t7 at offset -56
__ ldp(t0, t1, Address(s, 2 * unit));
__ ldp(t2, t3, Address(s, 4 * unit));
__ ldp(t4, t5, Address(s, 6 * unit));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
__ subs(count, count, 16);
__ br(Assembler::LO, drain);
int prefetch = PrefetchCopyIntervalInBytes;
bool use_stride = false;
if (direction == copy_backwards) {
use_stride = prefetch > 256;
prefetch = -prefetch;
if (use_stride) __ mov(stride, prefetch);
}
__ bind(again);
if (PrefetchCopyIntervalInBytes > 0)
__ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
if (direction == copy_forwards) {
// allowing for the offset of -8 the store instructions place
// registers into the target 64 bit block at the following
// offsets
//
// t0 at offset 0
// t1 at offset 8, t2 at offset 16
// t3 at offset 24, t4 at offset 32
// t5 at offset 40, t6 at offset 48
// t7 at offset 56
__ str(t0, Address(d, 1 * unit));
__ stp(t1, t2, Address(d, 2 * unit));
__ ldp(t0, t1, Address(s, 2 * unit));
__ stp(t3, t4, Address(d, 4 * unit));
__ ldp(t2, t3, Address(s, 4 * unit));
__ stp(t5, t6, Address(d, 6 * unit));
__ ldp(t4, t5, Address(s, 6 * unit));
__ str(t7, Address(__ pre(d, 8 * unit)));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
} else {
// d was not offset when we started so the registers are
// written into the 64 bit block preceding d with the following
// offsets
//
// t1 at offset -8
// t3 at offset -24, t0 at offset -16
// t5 at offset -48, t2 at offset -32
// t7 at offset -56, t4 at offset -48
// t6 at offset -64
//
// note that this matches the offsets previously noted for the
// loads
__ str(t1, Address(d, 1 * unit));
__ stp(t3, t0, Address(d, 3 * unit));
__ ldp(t0, t1, Address(s, 2 * unit));
__ stp(t5, t2, Address(d, 5 * unit));
__ ldp(t2, t3, Address(s, 4 * unit));
__ stp(t7, t4, Address(d, 7 * unit));
__ ldp(t4, t5, Address(s, 6 * unit));
__ str(t6, Address(__ pre(d, 8 * unit)));
__ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
}
__ subs(count, count, 8);
__ br(Assembler::HS, again);
// Drain
//
// this uses the same pattern of offsets and register arguments
// as above
__ bind(drain);
if (direction == copy_forwards) {
__ str(t0, Address(d, 1 * unit));
__ stp(t1, t2, Address(d, 2 * unit));
__ stp(t3, t4, Address(d, 4 * unit));
__ stp(t5, t6, Address(d, 6 * unit));
__ str(t7, Address(__ pre(d, 8 * unit)));
} else {
__ str(t1, Address(d, 1 * unit));
__ stp(t3, t0, Address(d, 3 * unit));
__ stp(t5, t2, Address(d, 5 * unit));
__ stp(t7, t4, Address(d, 7 * unit));
__ str(t6, Address(__ pre(d, 8 * unit)));
}
// now we need to copy any remaining part block which may
// include a 4 word block subblock and/or a 2 word subblock.
// bits 2 and 1 in the count are the tell-tale for whetehr we
// have each such subblock
{
Label L1, L2;
__ tbz(count, exact_log2(4), L1);
// this is the same as above but copying only 4 longs hence
// with ony one intervening stp between the str instructions
// but note that the offsets and registers still follow the
// same pattern
__ ldp(t0, t1, Address(s, 2 * unit));
__ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
if (direction == copy_forwards) {
__ str(t0, Address(d, 1 * unit));
__ stp(t1, t2, Address(d, 2 * unit));
__ str(t3, Address(__ pre(d, 4 * unit)));
} else {
__ str(t1, Address(d, 1 * unit));
__ stp(t3, t0, Address(d, 3 * unit));
__ str(t2, Address(__ pre(d, 4 * unit)));
}
__ bind(L1);
__ tbz(count, 1, L2);
// this is the same as above but copying only 2 longs hence
// there is no intervening stp between the str instructions
// but note that the offset and register patterns are still
// the same
__ ldp(t0, t1, Address(__ pre(s, 2 * unit)));
if (direction == copy_forwards) {
__ str(t0, Address(d, 1 * unit));
__ str(t1, Address(__ pre(d, 2 * unit)));
} else {
__ str(t1, Address(d, 1 * unit));
__ str(t0, Address(__ pre(d, 2 * unit)));
}
__ bind(L2);
// for forwards copy we need to re-adjust the offsets we
// applied so that s and d are follow the last words written
if (direction == copy_forwards) {
__ add(s, s, 16);
__ add(d, d, 8);
}
}
__ ret(lr);
}
}
// Small copy: less than 16 bytes.
@ -1024,11 +1222,9 @@ class StubGenerator: public StubCodeGenerator {
// (96 bytes if SIMD because we do 32 byes per instruction)
__ bind(copy80);
if (UseSIMDForMemoryOps) {
__ ldpq(v0, v1, Address(s, 0));
__ ldpq(v2, v3, Address(s, 32));
__ ld4(v0, v1, v2, v3, __ T16B, Address(s, 0));
__ ldpq(v4, v5, Address(send, -32));
__ stpq(v0, v1, Address(d, 0));
__ stpq(v2, v3, Address(d, 32));
__ st4(v0, v1, v2, v3, __ T16B, Address(d, 0));
__ stpq(v4, v5, Address(dend, -32));
} else {
__ ldp(t0, t1, Address(s, 0));

View File

@ -175,7 +175,15 @@ void VM_Version::get_processor_features() {
}
// Enable vendor specific features
if (_cpu == CPU_CAVIUM && _variant == 0) _features |= CPU_DMB_ATOMICS;
if (_cpu == CPU_CAVIUM) {
if (_variant == 0) _features |= CPU_DMB_ATOMICS;
if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
}
if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) {
FLAG_SET_DEFAULT(UseSIMDForMemoryOps, (_variant > 0));
}
}
if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _features |= CPU_A53MAC;
if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
// If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)

View File

@ -1668,9 +1668,13 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
__ lwz(Rscratch3, in_bytes(MethodData::backedge_mask_offset()), Rmdo);
__ addi(Rscratch2, Rscratch2, increment);
__ stw(Rscratch2, mdo_bc_offs, Rmdo);
__ and_(Rscratch3, Rscratch2, Rscratch3);
__ bne(CCR0, Lforward);
__ b(Loverflow);
if (UseOnStackReplacement) {
__ and_(Rscratch3, Rscratch2, Rscratch3);
__ bne(CCR0, Lforward);
__ b(Loverflow);
} else {
__ b(Lforward);
}
}
// If there's no MDO, increment counter in method.
@ -1680,9 +1684,12 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
__ lwz(Rscratch3, in_bytes(MethodCounters::backedge_mask_offset()), R4_counters);
__ addi(Rscratch2, Rscratch2, increment);
__ stw(Rscratch2, mo_bc_offs, R4_counters);
__ and_(Rscratch3, Rscratch2, Rscratch3);
__ bne(CCR0, Lforward);
if (UseOnStackReplacement) {
__ and_(Rscratch3, Rscratch2, Rscratch3);
__ bne(CCR0, Lforward);
} else {
__ b(Lforward);
}
__ bind(Loverflow);
// Notify point for loop, pass branch bytecode.

View File

@ -1636,7 +1636,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
in_bytes(InvocationCounter::counter_offset()));
Address mask(G4_scratch, in_bytes(MethodData::backedge_mask_offset()));
__ increment_mask_and_jump(mdo_backedge_counter, increment, mask, G3_scratch, O0,
Assembler::notZero, &Lforward);
(UseOnStackReplacement ? Assembler::notZero : Assembler::always), &Lforward);
__ ba_short(Loverflow);
}
@ -1647,7 +1647,7 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
in_bytes(InvocationCounter::counter_offset()));
Address mask(G3_method_counters, in_bytes(MethodCounters::backedge_mask_offset()));
__ increment_mask_and_jump(backedge_counter, increment, mask, G4_scratch, O0,
Assembler::notZero, &Lforward);
(UseOnStackReplacement ? Assembler::notZero : Assembler::always), &Lforward);
__ bind(Loverflow);
// notify point for loop, pass branch bytecode

View File

@ -109,13 +109,7 @@ class HotSpotMemoryAccessProviderImpl implements HotSpotMemoryAccessProvider {
}
}
private boolean verifyReadRawObject(Object expected, Constant base, long displacement, boolean compressed) {
if (compressed == runtime.getConfig().useCompressedOops) {
Object obj = asObject(base);
if (obj != null) {
assert expected == UNSAFE.getObject(obj, displacement) : "readUnsafeOop doesn't agree with unsafe.getObject";
}
}
private boolean verifyReadRawObject(Object expected, Constant base, long displacement) {
if (base instanceof HotSpotMetaspaceConstant) {
MetaspaceWrapperObject metaspaceObject = HotSpotMetaspaceConstantImpl.getMetaspaceObject(base);
if (metaspaceObject instanceof HotSpotResolvedObjectTypeImpl) {
@ -136,11 +130,11 @@ class HotSpotMemoryAccessProviderImpl implements HotSpotMemoryAccessProvider {
assert !compressed;
displacement += asRawPointer(baseConstant);
ret = UNSAFE.getUncompressedObject(displacement);
assert verifyReadRawObject(ret, baseConstant, initialDisplacement);
} else {
assert runtime.getConfig().useCompressedOops == compressed;
ret = UNSAFE.getObject(base, displacement);
}
assert verifyReadRawObject(ret, baseConstant, initialDisplacement, compressed);
return ret;
}

View File

@ -96,15 +96,6 @@ public interface ConstantReflectionProvider {
*/
ResolvedJavaType asJavaType(Constant constant);
/**
* Check if the constant is embeddable in the code.
*
* @param constant the constant to test
*/
default boolean isEmbeddable(Constant constant) {
return true;
}
/**
* Gets access to the internals of {@link MethodHandle}.
*/

View File

@ -3434,7 +3434,7 @@ void LIRGenerator::increment_event_counter_impl(CodeEmitInfo* info,
__ load(counter, result);
__ add(result, LIR_OprFact::intConst(InvocationCounter::count_increment), result);
__ store(result, counter);
if (notify) {
if (notify && (!backedge || UseOnStackReplacement)) {
LIR_Opr meth = LIR_OprFact::metadataConst(method->constant_encoding());
// The bci for info can point to cmp for if's we want the if bci
CodeStub* overflow = new CounterOverflowStub(info, bci, meth);

View File

@ -27,8 +27,8 @@ import sun.hotspot.code.BlobType;
/*
* @test PeakUsageTest
* @ignore 8151345
* @library /testlibrary /test/lib
* @ignore 8151345
* @modules java.base/jdk.internal.misc
* java.management
* @build PeakUsageTest

View File

@ -24,10 +24,10 @@
/*
* @test
* @bug 8137167
* @ignore 8140405
* @summary Tests jcmd to be able to clear directives added via options
* @modules java.base/jdk.internal.misc
* @library /testlibrary /test/lib ../share /
* @ignore 8140405
* @build compiler.compilercontrol.jcmd.ClearDirectivesFileStackTest
* pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
* compiler.testlibrary.CompilerUtils compiler.compilercontrol.share.actions.*

View File

@ -0,0 +1,62 @@
/*
* Copyright (c) 2016 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @bug 8159620
* @summary testing that -XX:-UseOnStackReplacement works with both -XX:(+/-)TieredCompilation
* @modules java.base/jdk.internal.misc
* @library /testlibrary /test/lib /
* @build sun.hotspot.WhiteBox
* @run main ClassFileInstaller sun.hotspot.WhiteBox
* sun.hotspot.WhiteBox$WhiteBoxPermission
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:+PrintCompilation
* -XX:-BackgroundCompilation -XX:-TieredCompilation -XX:-UseOnStackReplacement DisableOSRTest
* @run main/othervm -Xbootclasspath/a:. -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -XX:+PrintCompilation
* -XX:-BackgroundCompilation -XX:+TieredCompilation -XX:-UseOnStackReplacement DisableOSRTest
*/
import java.lang.reflect.Method;
import java.util.Random;
import sun.hotspot.WhiteBox;
public class DisableOSRTest {
private static final WhiteBox WB = WhiteBox.getWhiteBox();
private static final Random RANDOM = new Random();
public static int foo() {
return RANDOM.nextInt();
}
public static void main(String[] args) throws Exception {
Method m = DisableOSRTest.class.getMethod("main", String[].class);
for (int i = 0; i < 100_000; i++) {
foo();
}
if (WB.isMethodCompiled(m, true /* isOsr */)) {
throw new RuntimeException("\"" + m + "\" shouldn't be OSR compiled if running with -XX:-UseOnStackReplacement!");
}
}
}

View File

@ -24,10 +24,10 @@
/*
* @test
* @bug 8136421
* @ignore 8158860
* @requires (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "aarch64")
* @library / /testlibrary /test/lib
* @library ../common/patches
* @ignore 8158860
* @modules java.base/jdk.internal.misc
* @modules jdk.vm.ci/jdk.vm.ci.hotspot
* jdk.vm.ci/jdk.vm.ci.meta

View File

@ -120,12 +120,6 @@ public class HotSpotConstantReflectionProviderTest {
Assert.assertEquals(actual, expected, "Unexpected result:");
}
@Test(dataProvider = "isEmbeddableDataProvider", dataProviderClass = IsEmbeddableDataProvider.class)
public void testIsEmbeddable(JavaConstant constant, boolean expected) {
boolean actual = CONSTANT_REFLECTION_PROVIDER.isEmbeddable(constant);
Assert.assertEquals(actual, expected, "Unexpected result:");
}
@Test
public void testGetMemoryAccessProvider() {
MemoryAccessProvider actual = CONSTANT_REFLECTION_PROVIDER.getMemoryAccessProvider();

View File

@ -23,12 +23,12 @@
/*
* @test
* @ignore 8134286
* @bug 8023014
* @summary Test ensures that there is no crash if there is not enough ReservedCodeCacheSize
* to initialize all compiler threads. The option -Xcomp gives the VM more time to
* trigger the old bug.
* @library /testlibrary
* @ignore 8134286
* @modules java.base/jdk.internal.misc
* java.management
*/