8336245: AArch64: remove extra register copy when converting from long to pointer

Co-authored-by: Andrew Haley <aph@openjdk.org>
Reviewed-by: aph, adinn
This commit is contained in:
Fei Gao 2024-07-26 09:36:23 +00:00
parent 7f11935461
commit d10afa26e5
5 changed files with 317 additions and 23 deletions
src/hotspot
test/hotspot/jtreg/compiler/c2

@ -2745,10 +2745,7 @@ typedef void (MacroAssembler::* mem_vector_insn)(FloatRegister Rt,
}
if (index == -1) {
/* If we get an out-of-range offset it is a bug in the compiler,
so we assert here. */
assert(Address::offset_ok_for_immed(disp, exact_log2(size_in_memory)), "c2 compiler bug");
/* Fix up any out-of-range offsets. */
// Fix up any out-of-range offsets.
assert_different_registers(rscratch1, base);
Address addr = Address(base, disp);
addr = __ legitimize_address(addr, size_in_memory, rscratch1);
@ -3348,7 +3345,11 @@ encode %{
int scale = $mem$$scale;
int disp = $mem$$disp;
if (index == -1) {
__ prfm(Address(base, disp), PSTL1KEEP);
// Fix up any out-of-range offsets.
assert_different_registers(rscratch1, base);
Address addr = Address(base, disp);
addr = __ legitimize_address(addr, 8, rscratch1);
__ prfm(addr, PSTL1KEEP);
} else {
Register index_reg = as_Register(index);
if (disp == 0) {
@ -4229,9 +4230,9 @@ operand immIOffset16()
interface(CONST_INTER);
%}
operand immLoffset()
operand immLOffset()
%{
predicate(Address::offset_ok_for_immed(n->get_long(), 0));
predicate(n->get_long() >= -256 && n->get_long() <= 65520);
match(ConL);
op_cost(0);
@ -5341,6 +5342,34 @@ operand indOffL16(iRegP reg, immLoffset16 off)
%}
%}
operand indirectX2P(iRegL reg)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(CastX2P reg);
op_cost(0);
format %{ "[$reg]\t# long -> ptr" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp(0x0);
%}
%}
operand indOffX2P(iRegL reg, immLOffset off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP (CastX2P reg) off);
op_cost(0);
format %{ "[$reg, $off]\t# long -> ptr" %}
interface(MEMORY_INTER) %{
base($reg);
index(0xffffffff);
scale(0x0);
disp($off);
%}
%}
operand indirectN(iRegN reg)
%{
predicate(CompressedOops::shift() == 0);
@ -5431,7 +5460,7 @@ operand indOffIN(iRegN reg, immIOffset off)
%}
%}
operand indOffLN(iRegN reg, immLoffset off)
operand indOffLN(iRegN reg, immLOffset off)
%{
predicate(CompressedOops::shift() == 0);
constraint(ALLOC_IN_RC(ptr_reg));
@ -5664,6 +5693,17 @@ operand iRegL2I(iRegL reg) %{
interface(REG_INTER)
%}
operand iRegL2P(iRegL reg) %{
op_cost(0);
match(CastX2P reg);
format %{ "l2p($reg)" %}
interface(REG_INTER)
%}
opclass vmem2(indirect, indIndex, indOffI2, indOffL2);
opclass vmem4(indirect, indIndex, indOffI4, indOffL4);
opclass vmem8(indirect, indIndex, indOffI8, indOffL8);
@ -5680,21 +5720,21 @@ opclass vmem16(indirect, indIndex, indOffI16, indOffL16);
// instruction defs. we can turn a memory op into an Address
opclass memory1(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI1, indOffL1,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P);
opclass memory2(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI2, indOffL2,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indirectX2P, indOffX2P);
opclass memory4(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI4, indOffL4,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
opclass memory8(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex, indOffI8, indOffL8,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
// All of the memory operands. For the pipeline description.
opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indIndex,
indOffI1, indOffL1, indOffI2, indOffL2, indOffI4, indOffL4, indOffI8, indOffL8,
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN);
indirectN, indIndexScaledN, indIndexScaledI2LN, indIndexI2LN, indIndexN, indOffIN, indOffLN, indirectX2P, indOffX2P);
// iRegIorL2I is used for src inputs in rules for 32 bit int (I)
@ -5711,6 +5751,7 @@ opclass memory(indirect, indIndexScaled, indIndexScaledI2L, indIndexI2L, indInde
// movw is actually redundant but its not too costly.
opclass iRegIorL2I(iRegI, iRegL2I);
opclass iRegPorL2P(iRegP, iRegL2P);
//----------PIPELINE-----------------------------------------------------------
// Rules which define the behavior of the target architectures pipeline.
@ -9811,7 +9852,7 @@ instruct addI_reg_imm_i2l(iRegINoSp dst, iRegL src1, immIAddSub src2) %{
%}
// Pointer Addition
instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
instruct addP_reg_reg(iRegPNoSp dst, iRegPorL2P src1, iRegL src2) %{
match(Set dst (AddP src1 src2));
ins_cost(INSN_COST);
@ -9826,7 +9867,7 @@ instruct addP_reg_reg(iRegPNoSp dst, iRegP src1, iRegL src2) %{
ins_pipe(ialu_reg_reg);
%}
instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
instruct addP_reg_reg_ext(iRegPNoSp dst, iRegPorL2P src1, iRegIorL2I src2) %{
match(Set dst (AddP src1 (ConvI2L src2)));
ins_cost(1.9 * INSN_COST);
@ -9841,7 +9882,7 @@ instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
ins_pipe(ialu_reg_reg);
%}
instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale) %{
instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegPorL2P src1, iRegL src2, immIScale scale) %{
match(Set dst (AddP src1 (LShiftL src2 scale)));
ins_cost(1.9 * INSN_COST);
@ -9856,7 +9897,7 @@ instruct addP_reg_reg_lsl(iRegPNoSp dst, iRegP src1, iRegL src2, immIScale scale
ins_pipe(ialu_reg_reg_shift);
%}
instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegP src1, iRegIorL2I src2, immIScale scale) %{
instruct addP_reg_reg_ext_shift(iRegPNoSp dst, iRegPorL2P src1, iRegIorL2I src2, immIScale scale) %{
match(Set dst (AddP src1 (LShiftL (ConvI2L src2) scale)));
ins_cost(1.9 * INSN_COST);
@ -9889,7 +9930,7 @@ instruct lshift_ext(iRegLNoSp dst, iRegIorL2I src, immI scale, rFlagsReg cr) %{
// Pointer Immediate Addition
// n.b. this needs to be more expensive than using an indirect memory
// operand
instruct addP_reg_imm(iRegPNoSp dst, iRegP src1, immLAddSub src2) %{
instruct addP_reg_imm(iRegPNoSp dst, iRegPorL2P src1, immLAddSub src2) %{
match(Set dst (AddP src1 src2));
ins_cost(INSN_COST);

@ -1,5 +1,5 @@
//
// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -62,7 +62,13 @@ instruct xLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
format %{ "ldr $dst, $mem" %}
ins_encode %{
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
if (ref_addr.getMode() == Address::base_plus_offset) {
// Fix up any out-of-range offsets.
assert_different_registers(rscratch1, as_Register($mem$$base));
assert_different_registers(rscratch1, $dst$$Register);
ref_addr = __ legitimize_address(ref_addr, 8, rscratch1);
}
__ ldr($dst$$Register, ref_addr);
x_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch2 /* tmp */, barrier_data());
%}

@ -1,5 +1,5 @@
//
// Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -111,7 +111,13 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
format %{ "ldr $dst, $mem" %}
ins_encode %{
const Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
Address ref_addr = mem2address($mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
if (ref_addr.getMode() == Address::base_plus_offset) {
// Fix up any out-of-range offsets.
assert_different_registers(rscratch2, as_Register($mem$$base));
assert_different_registers(rscratch2, $dst$$Register);
ref_addr = __ legitimize_address(ref_addr, 8, rscratch2);
}
__ ldr($dst$$Register, ref_addr);
z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1);
%}

@ -395,7 +395,14 @@ const class TypePtr *MachNode::adr_type() const {
// 32-bit unscaled narrow oop can be the base of any address expression
t = t->make_ptr();
}
if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) {
if (t->isa_intptr_t() &&
#if !defined(AARCH64)
// AArch64 supports the addressing mode:
// [base, 0], in which [base] is converted from a long value
offset != 0 &&
#endif
offset != Type::OffsetBot) {
// We cannot assert that the offset does not look oop-ish here.
// Depending on the heap layout the cardmark base could land
// inside some oopish region. It definitely does for Win2K.

@ -0,0 +1,234 @@
/*
* Copyright (c) 2024, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.c2;
import jdk.internal.misc.Unsafe;
import jdk.test.lib.Asserts;
/**
* @test TestCastX2P
* @summary AArch64: remove extra register copy when converting from long to pointer.
* @bug 8336245
* @library /test/lib
* @modules java.base/jdk.internal.misc
* @run main/othervm -XX:-TieredCompilation compiler.c2.TestCastX2P
*/
public class TestCastX2P {
public static final int LEN = 2040;
static final Unsafe UNSAFE = Unsafe.getUnsafe();
public static long lseed = 0xbeef;
public static int iseed = 0xbeef;
public static short sseed = (short) (0xef);
public static byte bseed = (byte) (0xe);
public static long off1 = 16;
public static long off2 = 32;
public static long off3 = 64;
public static class TestLong {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 10_000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putLong(address+i, lseed);
}
}
UNSAFE.putLong(address + off1 + 1030, lseed);
UNSAFE.putLong(address + 1023, lseed);
UNSAFE.putLong(address + off2 + 1001, lseed);
}
}
public static class TestLongIndirect {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 1000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putLong(address+i, lseed);
}
}
UNSAFE.putLong(address + off1, lseed);
UNSAFE.putLong(address + off1 + off2, lseed);
UNSAFE.putLong(address + off3, lseed);
}
}
public static class TestInt {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 10_000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putInt(address+i, iseed);
}
}
UNSAFE.putInt(address + off1 + 274, iseed);
UNSAFE.putInt(address + 278, iseed);
UNSAFE.putInt(address + off2 + 282, iseed);
}
}
public static class TestIntIndirect {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 1000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putInt(address+i, iseed);
}
}
UNSAFE.putInt(address + off1, iseed);
UNSAFE.putInt(address + off1 + off2, iseed);
UNSAFE.putInt(address + off3, iseed);
}
}
public static class TestShort {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 10_000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putShort(address+i, sseed);
}
}
UNSAFE.putShort(address + off1 + 257, sseed);
UNSAFE.putShort(address + 277, sseed);
UNSAFE.putShort(address + off2 + 283, sseed);
}
}
public static class TestShortIndirect {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 1000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putShort(address+i, sseed);
}
}
UNSAFE.putShort(address + off1, sseed);
UNSAFE.putShort(address + off1 + off2, sseed);
UNSAFE.putShort(address + off3, sseed);
}
}
public static class TestByte {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 10_000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putByte(address+i, bseed);
}
}
UNSAFE.putByte(address + off1 + 257, bseed);
UNSAFE.putByte(address + 277, bseed);
UNSAFE.putByte(address + off2 + 283, bseed);
}
}
public static class TestByteIndirect {
private static long address = UNSAFE.allocateMemory(LEN);
static {
for (int k = 0; k < 1000; k++) {
for (int i = 0; i < LEN/2; i++) {
UNSAFE.putByte(address+i, bseed);
}
}
UNSAFE.putByte(address + off1, bseed);
UNSAFE.putByte(address + off1 + off2, bseed);
UNSAFE.putByte(address + off3, bseed);
}
}
static void test() {
TestLong t1 = new TestLong();
Asserts.assertEquals(UNSAFE.getLong(t1.address + off1 + 1030), lseed, "put long failed!");
Asserts.assertEquals(UNSAFE.getLong(t1.address + 1023), lseed, "put long failed!");
Asserts.assertEquals(UNSAFE.getLong(t1.address + off2 + 1001), lseed, "put long failed!");
TestLongIndirect t2 = new TestLongIndirect();
Asserts.assertEquals(UNSAFE.getLong(t2.address + off1), lseed, "put long failed!");
Asserts.assertEquals(UNSAFE.getLong(t2.address + off1 + off2), lseed, "put long failed!");
Asserts.assertEquals(UNSAFE.getLong(t2.address + off3), lseed, "put long failed!");
TestInt t3 = new TestInt();
Asserts.assertEquals(UNSAFE.getInt(t3.address + off1 + 274), iseed, "put int failed!");
Asserts.assertEquals(UNSAFE.getInt(t3.address + 278), iseed, "put int failed!");
Asserts.assertEquals(UNSAFE.getInt(t3.address + off2 + 282), iseed, "put int failed!");
TestIntIndirect t4 = new TestIntIndirect();
Asserts.assertEquals(UNSAFE.getInt(t4.address + off1), iseed, "put int failed!");
Asserts.assertEquals(UNSAFE.getInt(t4.address + off1 + off2), iseed, "put int failed!");
Asserts.assertEquals(UNSAFE.getInt(t4.address + off3), iseed, "put int failed!");
TestShort t5 = new TestShort();
Asserts.assertEquals(UNSAFE.getShort(t5.address + off1 + 257), sseed, "put short failed!");
Asserts.assertEquals(UNSAFE.getShort(t5.address + 277), sseed, "put short failed!");
Asserts.assertEquals(UNSAFE.getShort(t5.address + off2 + 283), sseed, "put short failed!");
TestShortIndirect t6 = new TestShortIndirect();
Asserts.assertEquals(UNSAFE.getShort(t6.address + off1), sseed, "put short failed!");
Asserts.assertEquals(UNSAFE.getShort(t6.address + off1 + off2), sseed, "put short failed!");
Asserts.assertEquals(UNSAFE.getShort(t6.address + off3), sseed, "put short failed!");
TestByte t7 = new TestByte();
Asserts.assertEquals(UNSAFE.getByte(t7.address + off1 + 257), bseed, "put byte failed!");
Asserts.assertEquals(UNSAFE.getByte(t7.address + 277), bseed, "put byte failed!");
Asserts.assertEquals(UNSAFE.getByte(t7.address + off2 + 283), bseed, "put byte failed!");
TestByteIndirect t8 = new TestByteIndirect();
Asserts.assertEquals(UNSAFE.getByte(t8.address + off1), bseed, "put byte failed!");
Asserts.assertEquals(UNSAFE.getByte(t8.address + off1 + off2), bseed, "put byte failed!");
Asserts.assertEquals(UNSAFE.getByte(t8.address + off3), bseed, "put byte failed!");
}
public static void main(String[] strArr) {
test();
}
}