8262355: Support for AVX-512 opmask register allocation.

Reviewed-by: vlivanov, njian, kvn
This commit is contained in:
Jatin Bhateja 2021-04-04 17:47:23 +00:00
parent 078066695b
commit f084bd2f61
41 changed files with 1570 additions and 295 deletions

View File

@ -467,22 +467,22 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
// ----------------------------
// SVE Predicate Registers
// ----------------------------
reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
reg_def P8 (SOC, SOC, Op_RegVMask, 8, p8->as_VMReg());
reg_def P9 (SOC, SOC, Op_RegVMask, 9, p9->as_VMReg());
reg_def P10 (SOC, SOC, Op_RegVMask, 10, p10->as_VMReg());
reg_def P11 (SOC, SOC, Op_RegVMask, 11, p11->as_VMReg());
reg_def P12 (SOC, SOC, Op_RegVMask, 12, p12->as_VMReg());
reg_def P13 (SOC, SOC, Op_RegVMask, 13, p13->as_VMReg());
reg_def P14 (SOC, SOC, Op_RegVMask, 14, p14->as_VMReg());
reg_def P15 (SOC, SOC, Op_RegVMask, 15, p15->as_VMReg());
reg_def P0 (SOC, SOC, Op_RegVectMask, 0, p0->as_VMReg());
reg_def P1 (SOC, SOC, Op_RegVectMask, 1, p1->as_VMReg());
reg_def P2 (SOC, SOC, Op_RegVectMask, 2, p2->as_VMReg());
reg_def P3 (SOC, SOC, Op_RegVectMask, 3, p3->as_VMReg());
reg_def P4 (SOC, SOC, Op_RegVectMask, 4, p4->as_VMReg());
reg_def P5 (SOC, SOC, Op_RegVectMask, 5, p5->as_VMReg());
reg_def P6 (SOC, SOC, Op_RegVectMask, 6, p6->as_VMReg());
reg_def P7 (SOC, SOC, Op_RegVectMask, 7, p7->as_VMReg());
reg_def P8 (SOC, SOC, Op_RegVectMask, 8, p8->as_VMReg());
reg_def P9 (SOC, SOC, Op_RegVectMask, 9, p9->as_VMReg());
reg_def P10 (SOC, SOC, Op_RegVectMask, 10, p10->as_VMReg());
reg_def P11 (SOC, SOC, Op_RegVectMask, 11, p11->as_VMReg());
reg_def P12 (SOC, SOC, Op_RegVectMask, 12, p12->as_VMReg());
reg_def P13 (SOC, SOC, Op_RegVectMask, 13, p13->as_VMReg());
reg_def P14 (SOC, SOC, Op_RegVectMask, 14, p14->as_VMReg());
reg_def P15 (SOC, SOC, Op_RegVectMask, 15, p15->as_VMReg());
// ----------------------------
// Special Registers
@ -2439,6 +2439,14 @@ const bool Matcher::has_predicated_vectors(void) {
return UseSVE > 0;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return &_PR_REG_mask;
}
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return new TypeVectMask(elemTy, length);
}
bool Matcher::supports_vector_variable_shifts(void) {
return true;
}
@ -5601,7 +5609,7 @@ operand vRegD_V31()
operand pRegGov()
%{
constraint(ALLOC_IN_RC(gov_pr));
match(RegVMask);
match(RegVectMask);
op_cost(0);
format %{ %}
interface(REG_INTER);

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -993,6 +993,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}
bool Matcher::supports_vector_variable_shifts(void) {
return VM_Version::has_simd();
}

View File

@ -2156,6 +2156,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}
bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}

View File

@ -1546,6 +1546,14 @@ const bool Matcher::has_predicated_vectors(void) {
return false;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return NULL;
}
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return NULL;
}
bool Matcher::supports_vector_variable_shifts(void) {
return false; // not supported
}

View File

@ -2452,6 +2452,22 @@ void Assembler::kmovwl(KRegister dst, Address src) {
emit_operand((Register)dst, src);
}
void Assembler::kmovwl(Address dst, KRegister src) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x91);
emit_operand((Register)src, dst);
}
void Assembler::kmovwl(KRegister dst, KRegister src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int16((unsigned char)0x90, (0xC0 | encode));
}
void Assembler::kmovdl(KRegister dst, Register src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);

View File

@ -1459,6 +1459,8 @@ private:
void kmovwl(KRegister dst, Register src);
void kmovwl(KRegister dst, Address src);
void kmovwl(Register dst, KRegister src);
void kmovwl(Address dst, KRegister src);
void kmovwl(KRegister dst, KRegister src);
void kmovdl(KRegister dst, Register src);
void kmovdl(Register dst, KRegister src);
void kmovql(KRegister dst, KRegister src);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -49,18 +49,18 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
}
}
void C2_MacroAssembler::setvectmask(Register dst, Register src) {
void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::movl(dst, 1);
Assembler::shlxl(dst, dst, src);
Assembler::decl(dst);
Assembler::kmovdl(k1, dst);
Assembler::kmovdl(mask, dst);
Assembler::movl(dst, src);
}
void C2_MacroAssembler::restorevectmask() {
void C2_MacroAssembler::restorevectmask(KRegister mask) {
guarantee(PostLoopMultiversioning, "must be");
Assembler::knotwl(k1, k0);
Assembler::knotwl(mask, k0);
}
#if INCLUDE_RTM_OPT
@ -1893,10 +1893,11 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
}
void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
assert(ArrayCopyPartialInlineSize <= 64,"");
mov64(dst, -1L);
bzhiq(dst, dst, len);
mov64(temp, -1L);
bzhiq(temp, temp, len);
kmovql(dst, temp);
}
#endif // _LP64
@ -2154,7 +2155,8 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
}
}
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
switch(vlen) {
case 4:
assert(vtmp1 != xnoreg, "required.");
@ -2192,14 +2194,13 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
break;
case 64:
{
KRegister ktemp = k2; // Use a hardcoded temp due to no k register allocation.
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
evpcmpeqb(ktemp, src1, src2, Assembler::AVX_512bit);
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
if (bt == BoolTest::ne) {
ktestql(ktemp, ktemp);
ktestql(mask, mask);
} else {
assert(bt == BoolTest::overflow, "required");
kortestql(ktemp, ktemp);
kortestql(mask, mask);
}
}
break;
@ -2916,7 +2917,7 @@ void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Regis
// Compare strings, used for char[] and byte[].
void C2_MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae) {
XMMRegister vec1, int ae, KRegister mask) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
@ -3069,12 +3070,12 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
} else {
vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
}
kortestql(k7, k7);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
addptr(result, stride2x2); // update since we already compared at this addr
subl(cnt2, stride2x2); // and sub the size too
@ -3258,7 +3259,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
kmovql(cnt1, k7);
kmovql(cnt1, mask);
notq(cnt1);
bsfq(cnt2, cnt1);
if (ae != StrIntrinsicNode::LL) {
@ -3307,7 +3308,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
// }
void C2_MacroAssembler::has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2) {
XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
// rsi: byte array
// rcx: len
// rax: result
@ -3339,8 +3340,8 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
bind(test_64_loop);
// Check whether our 64 elements of size byte contain negatives
evpcmpgtb(k2, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(k2, k2);
evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
kortestql(mask1, mask1);
jcc(Assembler::notZero, TRUE_LABEL);
addptr(len, 64);
@ -3357,7 +3358,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
shlxq(tmp3_aliased, tmp3_aliased, tmp1);
notq(tmp3_aliased);
kmovql(k3, tmp3_aliased);
kmovql(mask2, tmp3_aliased);
#else
Label k_init;
jmp(k_init);
@ -3382,11 +3383,11 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
lea(len, InternalAddress(tmp));
// create mask to test for negative byte inside a vector
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
evpcmpgtb(k3, vec1, Address(len, 0), Assembler::AVX_512bit);
evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);
#endif
evpcmpgtb(k2, k3, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(k2, k3);
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
ktestq(mask1, mask2);
jcc(Assembler::notZero, TRUE_LABEL);
jmp(FALSE_LABEL);
@ -3513,7 +3514,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char) {
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
ShortBranchVerifier sbv(this);
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
@ -3576,8 +3577,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
addptr(limit, 64); // update since we already compared at this addr
cmpl(limit, -64);
@ -3594,8 +3595,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
//
addptr(result, -64); // it is safe, bc we just came from this area
evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(k7, k7);
evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
kortestql(mask, mask);
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
jmp(TRUE_LABEL);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -31,8 +31,8 @@ public:
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
// special instructions for EVEX
void setvectmask(Register dst, Register src);
void restorevectmask();
void setvectmask(Register dst, Register src, KRegister mask);
void restorevectmask(KRegister mask);
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
// See full desription in macroAssembler_x86.cpp.
@ -131,7 +131,7 @@ public:
// vector test
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);
// blend
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
@ -146,7 +146,7 @@ public:
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
#ifdef _LP64
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
void genmask(Register dst, Register len, Register temp);
void genmask(KRegister dst, Register len, Register temp);
#endif // _LP64
// dst = reduce(op, src2) using vtmp as temps
@ -244,17 +244,17 @@ public:
// Compare strings.
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
XMMRegister vec1, int ae);
XMMRegister vec1, int ae, KRegister mask = knoreg);
// Search for Non-ASCII character (Negative byte value) in a byte array,
// return true if it has any and false otherwise.
void has_negatives(Register ary1, Register len,
Register result, Register tmp1,
XMMRegister vec1, XMMRegister vec2);
XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
// Compare char[] or byte[] arrays.
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
Register limit, Register result, Register chr,
XMMRegister vec1, XMMRegister vec2, bool is_char);
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP

View File

@ -30,7 +30,7 @@
// processor dependent initialization for i486
LP64_ONLY(extern void reg_mask_init();)
extern void reg_mask_init();
void Compile::pd_compiler2_init() {
guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
@ -61,5 +61,5 @@ void Compile::pd_compiler2_init() {
OptoReg::invalidate(i);
}
}
LP64_ONLY(reg_mask_init();)
reg_mask_init();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -149,7 +149,6 @@ void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
// Call VM
call_vm(masm, ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), dst, scratch);
// Restore registers
__ movdqu(xmm0, Address(rsp, xmm_size * 0));
__ movdqu(xmm1, Address(rsp, xmm_size * 1));
__ movdqu(xmm2, Address(rsp, xmm_size * 2));
@ -394,6 +393,7 @@ private:
MacroAssembler* const _masm;
GrowableArray<Register> _gp_registers;
GrowableArray<KRegister> _opmask_registers;
GrowableArray<XMMRegisterData> _xmm_registers;
int _spill_size;
int _spill_offset;
@ -450,11 +450,21 @@ private:
__ movq(Address(rsp, _spill_offset), reg);
}
void opmask_register_save(KRegister reg) {
_spill_offset -= 8;
__ kmovql(Address(rsp, _spill_offset), reg);
}
void gp_register_restore(Register reg) {
__ movq(reg, Address(rsp, _spill_offset));
_spill_offset += 8;
}
void opmask_register_restore(KRegister reg) {
__ kmovql(reg, Address(rsp, _spill_offset));
_spill_offset += 8;
}
void initialize(ZLoadBarrierStubC2* stub) {
// Create mask of caller saved registers that need to
// be saved/restored if live
@ -477,6 +487,7 @@ private:
}
int gp_spill_size = 0;
int opmask_spill_size = 0;
int xmm_spill_size = 0;
// Record registers that needs to be saved/restored
@ -490,6 +501,13 @@ private:
_gp_registers.append(vm_reg->as_Register());
gp_spill_size += 8;
}
} else if (vm_reg->is_KRegister()) {
// All opmask registers are caller saved, thus spill the ones
// which are live.
if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
_opmask_registers.append(vm_reg->as_KRegister());
opmask_spill_size += 8;
}
} else if (vm_reg->is_XMMRegister()) {
// We encode in the low order 4 bits of the opto_reg, how large part of the register is live
const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
@ -520,13 +538,14 @@ private:
const int arg_spill_size = frame::arg_reg_save_area_bytes;
// Stack pointer must be 16 bytes aligned for the call
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + arg_spill_size, 16);
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16);
}
public:
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
_masm(masm),
_gp_registers(),
_opmask_registers(),
_xmm_registers(),
_spill_size(0),
_spill_offset(0) {
@ -576,9 +595,19 @@ public:
for (int i = 0; i < _gp_registers.length(); i++) {
gp_register_save(_gp_registers.at(i));
}
// Save opmask registers
for (int i = 0; i < _opmask_registers.length(); i++) {
opmask_register_save(_opmask_registers.at(i));
}
}
~ZSaveLiveRegisters() {
// Restore opmask registers
for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
opmask_register_restore(_opmask_registers.at(i));
}
// Restore general purpose registers
for (int i = _gp_registers.length() - 1; i >= 0; i--) {
gp_register_restore(_gp_registers.at(i));

View File

@ -2525,6 +2525,59 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
}
}
void MacroAssembler::kmov(KRegister dst, Address src) {
if (VM_Version::supports_avx512bw()) {
kmovql(dst, src);
} else {
assert(VM_Version::supports_evex(), "");
kmovwl(dst, src);
}
}
void MacroAssembler::kmov(Address dst, KRegister src) {
if (VM_Version::supports_avx512bw()) {
kmovql(dst, src);
} else {
assert(VM_Version::supports_evex(), "");
kmovwl(dst, src);
}
}
void MacroAssembler::kmov(KRegister dst, KRegister src) {
if (VM_Version::supports_avx512bw()) {
kmovql(dst, src);
} else {
assert(VM_Version::supports_evex(), "");
kmovwl(dst, src);
}
}
void MacroAssembler::kmov(Register dst, KRegister src) {
if (VM_Version::supports_avx512bw()) {
kmovql(dst, src);
} else {
assert(VM_Version::supports_evex(), "");
kmovwl(dst, src);
}
}
void MacroAssembler::kmov(KRegister dst, Register src) {
if (VM_Version::supports_avx512bw()) {
kmovql(dst, src);
} else {
assert(VM_Version::supports_evex(), "");
kmovwl(dst, src);
}
}
void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_reg) {
if (reachable(src)) {
kmovql(dst, as_Address(src));
} else {
lea(scratch_reg, src);
kmovql(dst, Address(scratch_reg, 0));
}
}
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
if (reachable(src)) {
@ -4940,7 +4993,7 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
#if COMPILER2_OR_JVMCI
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp) {
void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
// cnt - number of qwords (8-byte words).
// base - start address, qword aligned.
Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
@ -4973,7 +5026,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
if (use64byteVector) {
addptr(cnt, 8);
jccb(Assembler::equal, L_end);
fill64_masked_avx(3, base, 0, xtmp, k2, cnt, rtmp, true);
fill64_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp, true);
jmp(L_end);
} else {
addptr(cnt, 4);
@ -4992,7 +5045,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
addptr(cnt, 4);
jccb(Assembler::lessEqual, L_end);
if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
fill32_masked_avx(3, base, 0, xtmp, k2, cnt, rtmp);
fill32_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp);
} else {
decrement(cnt);
@ -5006,7 +5059,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
}
// Clearing constant sized memory using YMM/ZMM registers.
void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp) {
void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
@ -5031,8 +5084,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
break;
case 3:
movl(rtmp, 0x7);
kmovwl(k2, rtmp);
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_256bit);
kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_256bit);
break;
case 4:
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
@ -5040,8 +5093,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
case 5:
if (use64byteVector) {
movl(rtmp, 0x1F);
kmovwl(k2, rtmp);
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_512bit);
kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
} else {
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
movq(Address(base, disp + 32), xtmp);
@ -5050,8 +5103,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
case 6:
if (use64byteVector) {
movl(rtmp, 0x3F);
kmovwl(k2, rtmp);
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_512bit);
kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
} else {
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, Assembler::AVX_128bit);
@ -5060,13 +5113,13 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
case 7:
if (use64byteVector) {
movl(rtmp, 0x7F);
kmovwl(k2, rtmp);
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_512bit);
kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
} else {
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
movl(rtmp, 0x7);
kmovwl(k2, rtmp);
evmovdqu(T_LONG, k2, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
kmovwl(mask, rtmp);
evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
}
break;
default:
@ -5076,7 +5129,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
}
}
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, bool is_large) {
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
bool is_large, KRegister mask) {
// cnt - number of qwords (8-byte words).
// base - start address, qword aligned.
// is_large - if optimizers know cnt is larger than InitArrayShortSize
@ -5116,7 +5170,7 @@ void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMReg
shlptr(cnt, 3); // convert to number of bytes
rep_stosb();
} else if (UseXMMForObjInit) {
xmm_clear_mem(base, cnt, tmp, xtmp);
xmm_clear_mem(base, cnt, tmp, xtmp, mask);
} else {
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
rep_stos();
@ -7748,7 +7802,7 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
Register tmp5, Register result) {
Register tmp5, Register result, KRegister mask1, KRegister mask2) {
Label copy_chars_loop, return_length, return_zero, done;
// rsi: src
@ -7800,14 +7854,14 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
movl(result, 0xFFFFFFFF);
shlxl(result, result, tmp5);
notl(result);
kmovdl(k3, result);
kmovdl(mask2, result);
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(mask1, mask2);
jcc(Assembler::carryClear, return_zero);
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
addptr(src, tmp5);
addptr(src, tmp5);
@ -7828,8 +7882,8 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
bind(copy_32_loop);
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(k2, k2);
evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
kortestdl(mask1, mask1);
jcc(Assembler::carryClear, return_zero);
// All elements in current processed chunk are valid candidates for
@ -7850,14 +7904,14 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
shlxl(result, result, len);
notl(result);
kmovdl(k3, result);
kmovdl(mask2, result);
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(k2, k3);
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
evpcmpuw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
ktestd(mask1, mask2);
jcc(Assembler::carryClear, return_zero);
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
jmp(return_length);
bind(below_threshold);
@ -7957,7 +8011,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
// }
// }
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
XMMRegister tmp1, Register tmp2) {
XMMRegister tmp1, Register tmp2, KRegister mask) {
Label copy_chars_loop, done, below_threshold, avx3_threshold;
// rsi: src
// rdi: dst
@ -8010,9 +8064,9 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
movl(tmp3_aliased, -1);
shlxl(tmp3_aliased, tmp3_aliased, tmp2);
notl(tmp3_aliased);
kmovdl(k2, tmp3_aliased);
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
kmovdl(mask, tmp3_aliased);
evpmovzxbw(tmp1, mask, Address(src, 0), Assembler::AVX_512bit);
evmovdquw(Address(dst, 0), mask, tmp1, /*merge*/ true, Assembler::AVX_512bit);
jmp(done);
bind(avx3_threshold);

View File

@ -1090,6 +1090,23 @@ public:
void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); }
void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); }
void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); }
void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); }
void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); }
void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); }
void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
// Safe move operation, lowers down to 16bit moves for targets supporting
// AVX512F feature and 64bit moves for targets supporting AVX512BW feature.
void kmov(Address dst, KRegister src);
void kmov(KRegister dst, Address src);
void kmov(KRegister dst, KRegister src);
void kmov(Register dst, KRegister src);
void kmov(KRegister dst, Register src);
// AVX Unaligned forms
void vmovdqu(Address dst, XMMRegister src);
@ -1683,13 +1700,13 @@ public:
// clear memory of size 'cnt' qwords, starting at 'base';
// if 'is_large' is set, do not try to produce short loop
void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large);
void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg);
// clear memory initialization sequence for constant size;
void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp);
void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp);
void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
// Fill primitive arrays
void generate_fill(BasicType t, bool aligned,
@ -1802,11 +1819,12 @@ public:
// Compress char[] array to byte[].
void char_array_compress(Register src, Register dst, Register len,
XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
XMMRegister tmp4, Register tmp5, Register result);
XMMRegister tmp4, Register tmp5, Register result,
KRegister mask1 = knoreg, KRegister mask2 = knoreg);
// Inflate byte[] array to char[].
void byte_array_inflate(Register src, Register dst, Register len,
XMMRegister tmp1, Register tmp2);
XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
void fill64_masked_avx(uint shift, Register dst, int disp,
XMMRegister xmm, KRegister mask, Register length,

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -218,7 +218,9 @@ class KRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers = 8,
max_slots_per_register = 1
// opmask registers are 64bit wide on both 32 and 64 bit targets.
// thus two slots are reserved per register.
max_slots_per_register = 2
};
// construction
@ -256,10 +258,14 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
// There is no requirement that any ordering here matches any ordering c2 gives
// it's optoregs.
// x86_32.ad defines additional dummy FILL0-FILL7 registers, in order to tally
// REG_COUNT (computed by ADLC based on the number of reg_defs seen in .ad files)
// with ConcreteRegisterImpl::number_of_registers additional count of 8 is being
// added for 32 bit jvm.
number_of_registers = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register +
2 * FloatRegisterImpl::number_of_registers +
2 * FloatRegisterImpl::number_of_registers + NOT_LP64(8) LP64_ONLY(0) +
XMMRegisterImpl::max_slots_per_register * XMMRegisterImpl::number_of_registers +
KRegisterImpl::number_of_registers + // mask registers
KRegisterImpl::number_of_registers * KRegisterImpl::max_slots_per_register + // mask registers
1 // eflags
};

View File

@ -131,6 +131,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
int ymm_bytes = num_xmm_regs * 16;
int zmm_bytes = num_xmm_regs * 32;
#ifdef COMPILER2
int opmask_state_bytes = KRegisterImpl::number_of_registers * 8;
if (save_vectors) {
assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
@ -139,6 +140,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
if (UseAVX > 2) {
// Save upper half of ZMM registers as well
vect_bytes += zmm_bytes;
additional_frame_words += opmask_state_bytes / wordSize;
}
additional_frame_words += vect_bytes / wordSize;
}
@ -229,6 +231,11 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int n = 0; n < num_xmm_regs; n++) {
__ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
}
__ subptr(rsp, opmask_state_bytes);
// Save opmask registers
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
__ kmov(Address(rsp, n*8), as_KRegister(n));
}
}
}
__ vzeroupper();
@ -251,6 +258,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// rbp, location is known implicitly, no oopMap
map->set_callee_saved(STACK_OFFSET(rsi_off), rsi->as_VMReg());
map->set_callee_saved(STACK_OFFSET(rdi_off), rdi->as_VMReg());
// %%% This is really a waste but we'll keep things as they were for now for the upper component
off = st0_off;
delta = st1_off - off;
@ -275,11 +283,12 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
int opmask_state_bytes = 0;
int additional_frame_bytes = 0;
int num_xmm_regs = XMMRegisterImpl::number_of_registers;
int ymm_bytes = num_xmm_regs * 16;
int zmm_bytes = num_xmm_regs * 32;
// Recover XMM & FPU state
int additional_frame_bytes = 0;
#ifdef COMPILER2
if (restore_vectors) {
assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
@ -289,6 +298,8 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
if (UseAVX > 2) {
// Save upper half of ZMM registers as well
additional_frame_bytes += zmm_bytes;
opmask_state_bytes = KRegisterImpl::number_of_registers * 8;
additional_frame_bytes += opmask_state_bytes;
}
}
#else
@ -322,11 +333,14 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16+off));
}
if (UseAVX > 2) {
// Restore upper half of ZMM registers.
off = opmask_state_bytes;
for (int n = 0; n < num_xmm_regs; n++) {
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32+off));
}
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
__ kmov(as_KRegister(n), Address(rsp, n*8));
}
}
__ addptr(rsp, additional_frame_bytes);

View File

@ -90,11 +90,13 @@ class RegisterSaver {
// units because compiler frame slots are jints.
#define XSAVE_AREA_BEGIN 160
#define XSAVE_AREA_YMM_BEGIN 576
#define XSAVE_AREA_OPMASK_BEGIN 1088
#define XSAVE_AREA_ZMM_BEGIN 1152
#define XSAVE_AREA_UPPERBANK 1664
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
#define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off
#define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off
#define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
enum layout {
fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
@ -106,6 +108,10 @@ class RegisterSaver {
DEF_YMM_OFFS(0),
DEF_YMM_OFFS(1),
// 2..15 are implied in range usage
opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_OPMASK_OFFS(0),
DEF_OPMASK_OFFS(1),
// 2..7 are implied in range usage
zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
DEF_ZMM_OFFS(0),
DEF_ZMM_OFFS(1),
@ -213,6 +219,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
}
#if COMPILER2_OR_JVMCI
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for(int n = 0; n < KRegisterImpl::number_of_registers; n++) {
__ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
}
#endif
}
} else {
if (VM_Version::supports_evex()) {
@ -222,6 +235,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int n = 16; n < num_xmm_regs; n++) {
__ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
}
#if COMPILER2_OR_JVMCI
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for(int n = 0; n < KRegisterImpl::number_of_registers; n++) {
__ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
}
#endif
}
}
__ vzeroupper();
@ -381,6 +401,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
}
#if COMPILER2_OR_JVMCI
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
__ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
}
#endif
}
} else {
if (VM_Version::supports_evex()) {
@ -390,6 +417,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
for (int n = 16; n < num_xmm_regs; n++) {
__ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
}
#if COMPILER2_OR_JVMCI
base_addr = XSAVE_AREA_OPMASK_BEGIN;
off = 0;
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
__ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
}
#endif
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -72,6 +72,7 @@ void VMRegImpl::set_regName() {
#define X87_TYPE 2
#define STACK_TYPE 3
//TODO: Case for KRegisters
VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
switch(type) {
case INTEGER_TYPE: return ::as_Register(index)->as_VMReg();

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -82,7 +82,7 @@ inline XMMRegister as_XMMRegister() {
inline KRegister as_KRegister() {
assert(is_KRegister(), "must be");
// Yuk
return ::as_KRegister((value() - ConcreteRegisterImpl::max_xmm));
return ::as_KRegister((value() - ConcreteRegisterImpl::max_xmm) >> 1);
}
inline bool is_concrete() {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -43,7 +43,7 @@ inline VMReg XMMRegisterImpl::as_VMReg() {
}
inline VMReg KRegisterImpl::as_VMReg() {
return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_xmm);
return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_xmm);
}
#endif // CPU_X86_VMREG_X86_INLINE_HPP

View File

@ -628,6 +628,29 @@ reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
#endif // _LP64
// AVX3 Mask Registers.
reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
@ -664,8 +687,33 @@ alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
#endif
);
alloc_class chunk2(K7, K7_H,
K6, K6_H,
K5, K5_H,
K4, K4_H,
K3, K3_H,
K2, K2_H,
K1, K1_H);
reg_class vectmask_reg(K1, K1_H,
K2, K2_H,
K3, K3_H,
K4, K4_H,
K5, K5_H,
K6, K6_H,
K7, K7_H);
reg_class vectmask_reg_K1(K1, K1_H);
reg_class vectmask_reg_K2(K2, K2_H);
reg_class vectmask_reg_K3(K3, K3_H);
reg_class vectmask_reg_K4(K4, K4_H);
reg_class vectmask_reg_K5(K5, K5_H);
reg_class vectmask_reg_K6(K6, K6_H);
reg_class vectmask_reg_K7(K7, K7_H);
// flags allocation class should be last.
alloc_class chunk2(RFLAGS);
alloc_class chunk3(RFLAGS);
// Singleton class for condition codes
reg_class int_flags(RFLAGS);
@ -1368,6 +1416,7 @@ const bool Matcher::match_rule_supported(int opcode) {
if (!has_match_rule(opcode)) {
return false; // no match rule present
}
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
switch (opcode) {
case Op_AbsVL:
case Op_StoreVectorScatter:
@ -1524,10 +1573,11 @@ const bool Matcher::match_rule_supported(int opcode) {
return false;
}
break;
case Op_VectorMaskGen:
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:
if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) {
return false;
}
break;
@ -1558,6 +1608,7 @@ const bool Matcher::match_rule_supported(int opcode) {
// Identify extra cases that we might want to provide match rules for vector nodes and
// other intrinsics guarded with vector length (vlen) and element type (bt).
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
if (!match_rule_supported(opcode)) {
return false;
}
@ -1608,7 +1659,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
case Op_VectorMaskGen:
case Op_LoadVectorMasked:
case Op_StoreVectorMasked:
if (!VM_Version::supports_avx512bw()) {
if (!is_LP64 || !VM_Version::supports_avx512bw()) {
return false;
}
if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
@ -1831,6 +1882,14 @@ const bool Matcher::has_predicated_vectors(void) {
return ret_value;
}
const RegMask* Matcher::predicate_reg_mask(void) {
return &_VECTMASK_REG_mask;
}
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
return new TypeVectMask(TypeInt::BOOL, length);
}
const int Matcher::float_pressure(int default_pressure_threshold) {
int float_pressure_threshold = default_pressure_threshold;
#ifdef _LP64
@ -2552,14 +2611,18 @@ instruct ShouldNotReachHere() %{
%}
// =================================EVEX special===============================
instruct setMask(rRegI dst, rRegI src) %{
predicate(Matcher::has_predicated_vectors());
// Existing partial implementation for post-loop multi-versioning computes
// the mask corresponding to tail loop in K1 opmask register. This may then be
// used for predicating instructions in loop body during last post-loop iteration.
// TODO: Remove hard-coded K1 usage while fixing existing post-loop
// multiversioning support.
instruct setMask(rRegI dst, rRegI src, kReg_K1 mask) %{
predicate(PostLoopMultiversioning && Matcher::has_predicated_vectors());
match(Set dst (SetVectMaskI src));
effect(TEMP dst);
format %{ "setvectmask $dst, $src" %}
ins_encode %{
__ setvectmask($dst$$Register, $src$$Register);
__ setvectmask($dst$$Register, $src$$Register, $mask$$KRegister);
%}
ins_pipe(pipe_slow);
%}
@ -3552,10 +3615,10 @@ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
ins_pipe( pipe_slow );
%}
instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
predicate(vector_length_in_bytes(n) == 64);
match(Set dst (LoadVectorGather mem idx));
effect(TEMP dst, TEMP tmp);
effect(TEMP dst, TEMP tmp, TEMP ktmp);
format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %}
ins_encode %{
assert(UseAVX > 2, "sanity");
@ -3565,10 +3628,9 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
KRegister ktmp = k2;
__ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
__ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
__ lea($tmp$$Register, $mem$$Address);
__ evgather(elem_bt, $dst$$XMMRegister, ktmp, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
__ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -3577,23 +3639,21 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
// Scatter INT, LONG, FLOAT, DOUBLE
instruct scatter(memory mem, vec src, vec idx, rRegP tmp) %{
instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
predicate(UseAVX > 2);
match(Set mem (StoreVectorScatter mem (Binary src idx)));
effect(TEMP tmp);
effect(TEMP tmp, TEMP ktmp);
format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
ins_encode %{
assert(UseAVX > 2, "sanity");
int vlen_enc = vector_length_encoding(this, $src);
BasicType elem_bt = vector_element_basic_type(this, $src);
assert(vector_length_in_bytes(this, $src) >= 16, "sanity");
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
KRegister ktmp = k2;
__ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
__ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
__ lea($tmp$$Register, $mem$$Address);
__ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, ktmp, $src$$XMMRegister, vlen_enc);
__ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -5694,12 +5754,12 @@ instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, l
ins_pipe( pipe_slow );
%}
instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{
instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
predicate(vector_length_in_bytes(n) == 64 &&
is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
match(Set dst (MinV a b));
match(Set dst (MaxV a b));
effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp);
effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
ins_encode %{
assert(UseAVX > 2, "required");
@ -5708,10 +5768,9 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{
int vlen_enc = vector_length_encoding(this);
BasicType elem_bt = vector_element_basic_type(this);
KRegister ktmp = k1;
__ evminmax_fp(opcode, elem_bt,
$dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
ktmp, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
$ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -6776,23 +6835,22 @@ instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
ins_pipe( pipe_slow );
%}
instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
effect(TEMP scratch);
effect(TEMP scratch, TEMP ktmp);
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
ins_encode %{
int vlen_enc = Assembler::AVX_512bit;
Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
KRegister mask = k0; // The comparison itself is not being masked.
if (vector_element_basic_type(this, $src1) == T_FLOAT) {
__ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
__ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
} else {
__ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
__ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
}
%}
ins_pipe( pipe_slow );
@ -6814,41 +6872,40 @@ instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %
ins_pipe( pipe_slow );
%}
instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
effect(TEMP scratch);
effect(TEMP scratch, TEMP ktmp);
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
ins_encode %{
assert(UseAVX > 2, "required");
int vlen_enc = Assembler::AVX_512bit;
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
KRegister mask = k0; // The comparison itself is not being masked.
bool merge = false;
BasicType src1_elem_bt = vector_element_basic_type(this, $src1);
switch (src1_elem_bt) {
case T_BYTE: {
__ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
__ evpcmpb($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdqub($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
break;
}
case T_SHORT: {
__ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
__ evpcmpw($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdquw($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
break;
}
case T_INT: {
__ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
__ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
break;
}
case T_LONG: {
__ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
__ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
__ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
break;
}
@ -7026,17 +7083,16 @@ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
ins_pipe( pipe_slow );
%}
instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch) %{
instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{
predicate(vector_length_in_bytes(n) == 64);
match(Set dst (VectorBlend (Binary src1 src2) mask));
format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %}
effect(TEMP scratch);
effect(TEMP scratch, TEMP ktmp);
ins_encode %{
int vlen_enc = Assembler::AVX_512bit;
BasicType elem_bt = vector_element_basic_type(this);
KRegister ktmp = k2;
__ evpcmp(elem_bt, ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
__ evpblend(elem_bt, $dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
__ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
__ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
%}
ins_pipe( pipe_slow );
%}
@ -7179,13 +7235,29 @@ instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1,
instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
predicate(vector_length_in_bytes(n->in(1)) >= 16 &&
vector_length_in_bytes(n->in(1)) < 64 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2 ));
effect(KILL cr);
format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}
ins_encode %{
int vlen = vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister);
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
__ setb(Assembler::carrySet, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_alltrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{
predicate(vector_length_in_bytes(n->in(1)) == 64 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
match(Set dst (VectorTest src1 src2 ));
effect(KILL cr, TEMP ktmp);
format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}
ins_encode %{
int vlen = vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
__ setb(Assembler::carrySet, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
@ -7210,13 +7282,29 @@ instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, r
instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
predicate(vector_length_in_bytes(n->in(1)) >= 16 &&
vector_length_in_bytes(n->in(1)) < 64 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2 ));
effect(KILL cr);
format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
ins_encode %{
int vlen = vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
__ setb(Assembler::notZero, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vptest_anytrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{
predicate(vector_length_in_bytes(n->in(1)) == 64 &&
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
match(Set dst (VectorTest src1 src2 ));
effect(KILL cr, TEMP ktmp);
format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
ins_encode %{
int vlen = vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
__ setb(Assembler::notZero, $dst$$Register);
__ movzbl($dst$$Register, $dst$$Register);
%}
@ -7239,12 +7327,26 @@ instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 z
instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{
predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 16 &&
vector_length_in_bytes(n->in(1)->in(1)) < 64 &&
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
match(Set cr (CmpI (VectorTest src1 src2) zero));
format %{ "cmp_vector_test_any_true $src1,$src2\t!" %}
ins_encode %{
int vlen = vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, kReg ktmp) %{
predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 &&
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
match(Set cr (CmpI (VectorTest src1 src2) zero));
effect(TEMP ktmp);
format %{ "cmp_vector_test_any_true $src1,$src2\t!" %}
ins_encode %{
int vlen = vector_length_in_bytes(this, $src1);
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -7897,47 +7999,46 @@ instruct vprorate(vec dst, vec src, vec shift) %{
#ifdef _LP64
// ---------------------------------- Masked Block Copy ------------------------------------
instruct vmasked_load64(vec dst, memory mem, rRegL mask) %{
instruct vmasked_load64(vec dst, memory mem, kReg mask) %{
match(Set dst (LoadVectorMasked mem mask));
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
ins_encode %{
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
int vector_len = vector_length_encoding(this);
__ kmovql(k2, $mask$$Register);
__ evmovdqu(elmType, k2, $dst$$XMMRegister, $mem$$Address, vector_len);
__ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, vector_len);
%}
ins_pipe( pipe_slow );
%}
instruct vmask_gen(rRegL dst, rRegL len, rRegL tempLen) %{
instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{
match(Set dst (VectorMaskGen len));
effect(TEMP_DEF dst, TEMP tempLen);
format %{ "vector_mask_gen $len \t! vector mask generator" %}
effect(TEMP temp);
format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
ins_encode %{
__ genmask($dst$$Register, $len$$Register, $tempLen$$Register);
__ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vmask_gen_imm(rRegL dst, immL len) %{
instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
match(Set dst (VectorMaskGen len));
format %{ "vector_mask_gen $len \t! vector mask generator" %}
effect(TEMP temp);
ins_encode %{
__ mov64($dst$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
__ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
__ kmovql($dst$$KRegister, $temp$$Register);
%}
ins_pipe( pipe_slow );
%}
instruct vmasked_store64(memory mem, vec src, rRegL mask) %{
instruct vmasked_store64(memory mem, vec src, kReg mask) %{
match(Set mem (StoreVectorMasked mem (Binary src mask)));
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
ins_encode %{
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
int vector_len = vector_length_encoding(src_node);
__ kmovql(k2, $mask$$Register);
__ evmovdqu(elmType, k2, $mem$$Address, $src$$XMMRegister, vector_len);
__ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len);
%}
ins_pipe( pipe_slow );
%}

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -260,6 +260,18 @@ source %{
// instructions, to allow sign-masking or sign-bit flipping. They allow
// fast versions of NegF/NegD and AbsF/AbsD.
void reg_mask_init() {
if (Matcher::has_predicated_vectors()) {
// Post-loop multi-versioning expects mask to be present in K1 register, till the time
// its fixed, RA should not be allocting K1 register, this shall prevent any accidental
// curruption of value held in K1 register.
if (PostLoopMultiversioning) {
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
}
}
}
// Note: 'double' and 'long long' have 32-bits alignment on x86.
static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
// Use the expression (adr)&(~0xF) to provide 128-bits aligned address
@ -731,7 +743,7 @@ const Pipeline * MachEpilogNode::pipeline() const {
//=============================================================================
enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
static enum RC rc_class( OptoReg::Name reg ) {
if( !OptoReg::is_valid(reg) ) return rc_bad;
@ -1050,7 +1062,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
if( src_first == dst_first && src_second == dst_second )
return size; // Self copy, no move
if (bottom_type()->isa_vect() != NULL) {
if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
uint ireg = ideal_reg();
assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
@ -1103,7 +1115,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
// Check for integer load
if( dst_first_rc == rc_int && src_first_rc == rc_stack )
if( src_first_rc == rc_stack && dst_first_rc == rc_int )
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
// Check for integer reg-xmm reg copy
@ -1192,16 +1204,16 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
// Check for xmm store
if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
}
// Check for float xmm load
if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
}
// Copy from float reg to xmm reg
if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
// copy to the top of stack from floating point reg
// and use LEA to preserve flags
if( cbuf ) {
@ -1257,6 +1269,42 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
if( dst_second_rc == rc_int && src_second_rc == rc_stack )
return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
// AVX-512 opmask specific spilling.
if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
MacroAssembler _masm(cbuf);
int offset = ra_->reg2offset(src_first);
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
return 0;
}
if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
MacroAssembler _masm(cbuf);
int offset = ra_->reg2offset(dst_first);
__ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
return 0;
}
if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
Unimplemented();
return 0;
}
if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
Unimplemented();
return 0;
}
if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
MacroAssembler _masm(cbuf);
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
return 0;
}
Unimplemented();
return 0; // Mute compiler
@ -3574,6 +3622,72 @@ operand immI_65535() %{
interface(CONST_INTER);
%}
operand kReg()
%{
constraint(ALLOC_IN_RC(vectmask_reg));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K1()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K1));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K2()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K2));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
// Special Registers
operand kReg_K3()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K3));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K4()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K4));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K5()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K5));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K6()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K6));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
// Special Registers
operand kReg_K7()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K7));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
// Register Operands
// Integer Register
operand rRegI() %{
@ -11410,8 +11524,10 @@ instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
// =======================================================================
// fast clearing of an array
// Small ClearArray non-AVX512.
instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2 || !VM_Version::supports_avx512vlbw() || !n->in(2)->bottom_type()->is_int()->is_con()));
predicate(!((ClearArrayNode*)n)->is_large() &&
(UseAVX <= 2 || !VM_Version::supports_avx512vlbw()));
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
@ -11464,13 +11580,76 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe du
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, false);
$tmp$$XMMRegister, false, knoreg);
%}
ins_pipe( pipe_slow );
%}
// Small ClearArray AVX512 non-constant length.
instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(!((ClearArrayNode*)n)->is_large() &&
UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
!n->in(2)->bottom_type()->is_int()->is_con());
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
format %{ $$template
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
$$emit$$"CMP InitArrayShortSize,rcx\n\t"
$$emit$$"JG LARGE\n\t"
$$emit$$"SHL ECX, 1\n\t"
$$emit$$"DEC ECX\n\t"
$$emit$$"JS DONE\t# Zero length\n\t"
$$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
$$emit$$"DEC ECX\n\t"
$$emit$$"JGE LOOP\n\t"
$$emit$$"JMP DONE\n\t"
$$emit$$"# LARGE:\n\t"
if (UseFastStosb) {
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
} else if (UseXMMForObjInit) {
$$emit$$"MOV RDI,RAX\n\t"
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
$$emit$$"JMPQ L_zero_64_bytes\n\t"
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
$$emit$$"ADD 0x40,RAX\n\t"
$$emit$$"# L_zero_64_bytes:\n\t"
$$emit$$"SUB 0x8,RCX\n\t"
$$emit$$"JGE L_loop\n\t"
$$emit$$"ADD 0x4,RCX\n\t"
$$emit$$"JL L_tail\n\t"
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
$$emit$$"ADD 0x20,RAX\n\t"
$$emit$$"SUB 0x4,RCX\n\t"
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
$$emit$$"ADD 0x4,RCX\n\t"
$$emit$$"JLE L_end\n\t"
$$emit$$"DEC RCX\n\t"
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
$$emit$$"ADD 0x8,RAX\n\t"
$$emit$$"DEC RCX\n\t"
$$emit$$"JGE L_sloop\n\t"
$$emit$$"# L_end:\n\t"
} else {
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
}
$$emit$$"# DONE"
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, false, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
// Large ClearArray non-AVX512.
instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(((ClearArrayNode*)n)->is_large());
predicate(UseAVX <= 2 && ((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
format %{ $$template
@ -11513,26 +11692,79 @@ instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Unive
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, true);
$tmp$$XMMRegister, true, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct rep_stos_im(immI cnt, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
%{
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->in(2)->bottom_type()->is_int()->is_con()));
// Large ClearArray AVX512.
instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
predicate(UseAVX > 2 && ((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base));
effect(TEMP tmp, TEMP zero, KILL cr);
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
format %{ $$template
if (UseFastStosb) {
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
} else if (UseXMMForObjInit) {
$$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
$$emit$$"JMPQ L_zero_64_bytes\n\t"
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
$$emit$$"ADD 0x40,RAX\n\t"
$$emit$$"# L_zero_64_bytes:\n\t"
$$emit$$"SUB 0x8,RCX\n\t"
$$emit$$"JGE L_loop\n\t"
$$emit$$"ADD 0x4,RCX\n\t"
$$emit$$"JL L_tail\n\t"
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
$$emit$$"ADD 0x20,RAX\n\t"
$$emit$$"SUB 0x4,RCX\n\t"
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
$$emit$$"ADD 0x4,RCX\n\t"
$$emit$$"JLE L_end\n\t"
$$emit$$"DEC RCX\n\t"
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
$$emit$$"ADD 0x8,RAX\n\t"
$$emit$$"DEC RCX\n\t"
$$emit$$"JGE L_sloop\n\t"
$$emit$$"# L_end:\n\t"
} else {
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
}
$$emit$$"# DONE"
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, true, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
// Small ClearArray AVX512 constant length.
instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
%{
predicate(!((ClearArrayNode*)n)->is_large() &&
(UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
n->in(2)->bottom_type()->is_int()->is_con()));
match(Set dummy (ClearArray cnt base));
effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
format %{ "clear_mem_imm $base , $cnt \n\t" %}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister);
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
%}
ins_pipe(pipe_slow);
%}
instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
eAXRegI result, regD tmp1, eFlagsReg cr) %{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -11540,14 +11772,29 @@ instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LL);
$tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
eAXRegI result, regD tmp1, eFlagsReg cr) %{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -11555,14 +11802,29 @@ instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UU);
$tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
eAXRegI result, regD tmp1, eFlagsReg cr) %{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -11570,14 +11832,29 @@ instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LU);
$tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
eAXRegI result, regD tmp1, eFlagsReg cr) %{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -11585,7 +11862,22 @@ instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2
ins_encode %{
__ string_compare($str2$$Register, $str1$$Register,
$cnt2$$Register, $cnt1$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UL);
$tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str2$$Register, $str1$$Register,
$cnt2$$Register, $cnt1$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -11593,6 +11885,7 @@ instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2
// fast string equals
instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
predicate(UseAVX <= 2);
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
@ -11600,12 +11893,29 @@ instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
ins_encode %{
__ arrays_equals(false, $str1$$Register, $str2$$Register,
$cnt$$Register, $result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
predicate(UseAVX > 2);
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
__ arrays_equals(false, $str1$$Register, $str2$$Register,
$cnt$$Register, $result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
// fast search of substring with known size.
instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
@ -11769,7 +12079,7 @@ instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
//ins_cost(300);
@ -11778,7 +12088,24 @@ instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
%{
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
//ins_cost(300);
format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -11786,7 +12113,7 @@ instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
//ins_cost(300);
@ -11795,7 +12122,24 @@ instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
%{
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
//ins_cost(300);
format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -11803,6 +12147,7 @@ instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
%{
predicate(UseAVX <= 2);
match(Set result (HasNegatives ary1 len));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
@ -11810,14 +12155,32 @@ instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
ins_encode %{
__ has_negatives($ary1$$Register, $len$$Register,
$result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
%{
predicate(UseAVX > 2);
match(Set result (HasNegatives ary1 len));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
__ has_negatives($ary1$$Register, $len$$Register,
$result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
%}
ins_pipe( pipe_slow );
%}
// fast char[] to byte[] compression
instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
predicate(UseAVX <= 2);
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
@ -11825,7 +12188,24 @@ instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD
ins_encode %{
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
knoreg, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
predicate(UseAVX > 2);
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
ins_encode %{
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
$ktmp1$$KRegister, $ktmp2$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -11833,13 +12213,28 @@ instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD
// fast byte[] to char[] inflation
instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
predicate(UseAVX <= 2);
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$Register);
$tmp1$$XMMRegister, $tmp2$$Register, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
predicate(UseAVX > 2);
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -12267,10 +12662,12 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
// mask version
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
predicate(n->has_vector_mask_set());
// Bounded mask operand used in following patten is needed for
// post-loop multiversioning.
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
effect(USE labl, TEMP ktmp);
ins_cost(400);
format %{ "J$cop $labl\t# Loop end\n\t"
@ -12279,16 +12676,18 @@ instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
__ restorevectmask($ktmp$$KRegister);
%}
ins_pipe( pipe_jcc );
%}
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
predicate(n->has_vector_mask_set());
// Bounded mask operand used in following patten is needed for
// post-loop multiversioning.
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
effect(USE labl, TEMP ktmp);
ins_cost(400);
format %{ "J$cop,u $labl\t# Loop end\n\t"
@ -12297,15 +12696,17 @@ instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
__ restorevectmask($ktmp$$KRegister);
%}
ins_pipe( pipe_jcc );
%}
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
predicate(n->has_vector_mask_set());
// Bounded mask operand used in following patten is needed for
// post-loop multiversioning.
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
effect(USE labl, TEMP ktmp);
ins_cost(300);
format %{ "J$cop,u $labl\t# Loop end\n\t"
@ -12314,7 +12715,7 @@ instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label lab
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
__ restorevectmask($ktmp$$KRegister);
%}
ins_pipe( pipe_jcc );
%}

View File

@ -424,6 +424,16 @@ void reg_mask_init() {
_INT_NO_RCX_REG_mask = _INT_REG_mask;
_INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
if (Matcher::has_predicated_vectors()) {
// Post-loop multi-versioning expects mask to be present in K1 register, till the time
// its fixed, RA should not be allocting K1 register, this shall prevent any accidental
// curruption of value held in K1 register.
if (PostLoopMultiversioning) {
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
}
}
}
static bool generate_vzeroupper(Compile* C) {
@ -1014,6 +1024,7 @@ const Pipeline* MachEpilogNode::pipeline() const
enum RC {
rc_bad,
rc_int,
rc_kreg,
rc_float,
rc_stack
};
@ -1028,6 +1039,8 @@ static enum RC rc_class(OptoReg::Name reg)
if (r->is_Register()) return rc_int;
if (r->is_KRegister()) return rc_kreg;
assert(r->is_XMMRegister(), "must be");
return rc_float;
}
@ -1141,7 +1154,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
// Self copy, no move
return 0;
}
if (bottom_type()->isa_vect() != NULL) {
if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
uint ireg = ideal_reg();
assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
@ -1271,6 +1284,24 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
st->print("movss %s, [rsp + #%d]\t# spill",
Matcher::regName[dst_first],
offset);
#endif
}
}
return 0;
} else if (dst_first_rc == rc_kreg) {
// mem -> kreg
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
int offset = ra_->reg2offset(src_first);
if (cbuf) {
MacroAssembler _masm(cbuf);
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
#ifndef PRODUCT
} else {
st->print("kmovq %s, [rsp + #%d]\t# spill",
Matcher::regName[dst_first],
offset);
#endif
}
}
@ -1376,6 +1407,23 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
}
return 0;
} else if (dst_first_rc == rc_kreg) {
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
MacroAssembler _masm(cbuf);
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else {
st->print("kmovq %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
}
Unimplemented();
return 0;
}
} else if (src_first_rc == rc_float) {
// xmm ->
@ -1476,6 +1524,65 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
}
}
return 0;
} else if (dst_first_rc == rc_kreg) {
assert(false, "Illegal spilling");
return 0;
}
} else if (src_first_rc == rc_kreg) {
if (dst_first_rc == rc_stack) {
// mem -> kreg
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
int offset = ra_->reg2offset(dst_first);
if (cbuf) {
MacroAssembler _masm(cbuf);
__ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else {
st->print("kmovq [rsp + #%d] , %s\t# spill",
offset,
Matcher::regName[src_first]);
#endif
}
}
return 0;
} else if (dst_first_rc == rc_int) {
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
MacroAssembler _masm(cbuf);
__ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else {
st->print("kmovq %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
}
Unimplemented();
return 0;
} else if (dst_first_rc == rc_kreg) {
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
// 64-bit
if (cbuf) {
MacroAssembler _masm(cbuf);
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
#ifndef PRODUCT
} else {
st->print("kmovq %s, %s\t# spill",
Matcher::regName[dst_first],
Matcher::regName[src_first]);
#endif
}
}
return 0;
} else if (dst_first_rc == rc_float) {
assert(false, "Illegal spill");
return 0;
}
}
@ -3291,6 +3398,72 @@ operand immL_65535()
interface(CONST_INTER);
%}
operand kReg()
%{
constraint(ALLOC_IN_RC(vectmask_reg));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K1()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K1));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K2()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K2));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
// Special Registers
operand kReg_K3()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K3));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K4()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K4));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K5()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K5));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
operand kReg_K6()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K6));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
// Special Registers
operand kReg_K7()
%{
constraint(ALLOC_IN_RC(vectmask_reg_K7));
match(RegVectMask);
format %{%}
interface(REG_INTER);
%}
// Register Operands
// Integer Register
operand rRegI()
@ -4701,7 +4874,6 @@ define
// name must have been defined in an 'enc_class' specification
// in the encode section of the architecture description.
//----------Load/Store/Move Instructions---------------------------------------
//----------Load Instructions--------------------------------------------------
@ -10768,13 +10940,13 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
ins_pipe( pipe_slow );
%}
// =======================================================================
// fast clearing of an array
// Fast clearing of an array
// Small ClearArray non-AVX512.
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
Universe dummy, rFlagsReg cr)
%{
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2 || !VM_Version::supports_avx512vlbw() || !n->in(2)->bottom_type()->is_long()->is_con()));
predicate(!((ClearArrayNode*)n)->is_large() &&
(UseAVX <= 2 || !VM_Version::supports_avx512vlbw()));
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
@ -10825,15 +10997,78 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, false);
$tmp$$XMMRegister, false, knoreg);
%}
ins_pipe(pipe_slow);
%}
// Small ClearArray AVX512 non-constant length.
instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, rax_RegI zero,
Universe dummy, rFlagsReg cr)
%{
predicate(!((ClearArrayNode*)n)->is_large() &&
UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
!n->in(2)->bottom_type()->is_long()->is_con());
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
format %{ $$template
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
$$emit$$"cmp InitArrayShortSize,rcx\n\t"
$$emit$$"jg LARGE\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"js DONE\t# Zero length\n\t"
$$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"jge LOOP\n\t"
$$emit$$"jmp DONE\n\t"
$$emit$$"# LARGE:\n\t"
if (UseFastStosb) {
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
} else if (UseXMMForObjInit) {
$$emit$$"mov rdi,rax\n\t"
$$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
$$emit$$"jmpq L_zero_64_bytes\n\t"
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
$$emit$$"vmovdqu ymm0,(rax)\n\t"
$$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
$$emit$$"add 0x40,rax\n\t"
$$emit$$"# L_zero_64_bytes:\n\t"
$$emit$$"sub 0x8,rcx\n\t"
$$emit$$"jge L_loop\n\t"
$$emit$$"add 0x4,rcx\n\t"
$$emit$$"jl L_tail\n\t"
$$emit$$"vmovdqu ymm0,(rax)\n\t"
$$emit$$"add 0x20,rax\n\t"
$$emit$$"sub 0x4,rcx\n\t"
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
$$emit$$"add 0x4,rcx\n\t"
$$emit$$"jle L_end\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
$$emit$$"vmovq xmm0,(rax)\n\t"
$$emit$$"add 0x8,rax\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"jge L_sloop\n\t"
$$emit$$"# L_end:\n\t"
} else {
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
}
$$emit$$"# DONE"
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, false, $ktmp$$KRegister);
%}
ins_pipe(pipe_slow);
%}
// Large ClearArray non-AVX512.
instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
Universe dummy, rFlagsReg cr)
%{
predicate(((ClearArrayNode*)n)->is_large());
predicate(UseAVX <=2 && ((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
@ -10875,19 +11110,73 @@ instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, true);
$tmp$$XMMRegister, true, knoreg);
%}
ins_pipe(pipe_slow);
%}
instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, Universe dummy, rFlagsReg cr)
// Large ClearArray AVX512.
instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, rax_RegI zero,
Universe dummy, rFlagsReg cr)
%{
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->in(2)->bottom_type()->is_long()->is_con()));
predicate(UseAVX > 2 && ((ClearArrayNode*)n)->is_large());
match(Set dummy (ClearArray cnt base));
effect(TEMP tmp, TEMP zero, KILL cr);
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
format %{ $$template
if (UseFastStosb) {
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
} else if (UseXMMForObjInit) {
$$emit$$"mov rdi,rax\t# ClearArray:\n\t"
$$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
$$emit$$"jmpq L_zero_64_bytes\n\t"
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
$$emit$$"vmovdqu ymm0,(rax)\n\t"
$$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
$$emit$$"add 0x40,rax\n\t"
$$emit$$"# L_zero_64_bytes:\n\t"
$$emit$$"sub 0x8,rcx\n\t"
$$emit$$"jge L_loop\n\t"
$$emit$$"add 0x4,rcx\n\t"
$$emit$$"jl L_tail\n\t"
$$emit$$"vmovdqu ymm0,(rax)\n\t"
$$emit$$"add 0x20,rax\n\t"
$$emit$$"sub 0x4,rcx\n\t"
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
$$emit$$"add 0x4,rcx\n\t"
$$emit$$"jle L_end\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
$$emit$$"vmovq xmm0,(rax)\n\t"
$$emit$$"add 0x8,rax\n\t"
$$emit$$"dec rcx\n\t"
$$emit$$"jge L_sloop\n\t"
$$emit$$"# L_end:\n\t"
} else {
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
}
%}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
$tmp$$XMMRegister, true, $ktmp$$KRegister);
%}
ins_pipe(pipe_slow);
%}
// Small ClearArray AVX512 constant length.
instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
%{
predicate(!((ClearArrayNode*)n)->is_large() &&
(UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
n->in(2)->bottom_type()->is_long()->is_con()));
match(Set dummy (ClearArray cnt base));
effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
format %{ "clear_mem_imm $base , $cnt \n\t" %}
ins_encode %{
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister);
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
%}
ins_pipe(pipe_slow);
%}
@ -10895,7 +11184,7 @@ instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, Universe dummy,
instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -10903,7 +11192,23 @@ instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LL);
$tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -10911,7 +11216,7 @@ instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -10919,7 +11224,23 @@ instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UU);
$tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -10927,7 +11248,7 @@ instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -10935,7 +11256,23 @@ instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LU);
$tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -10943,7 +11280,7 @@ instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI
instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
rax_RegI result, legRegD tmp1, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
@ -10951,7 +11288,23 @@ instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI
ins_encode %{
__ string_compare($str2$$Register, $str1$$Register,
$cnt2$$Register, $cnt1$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UL);
$tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
%{
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
ins_encode %{
__ string_compare($str2$$Register, $str1$$Register,
$cnt2$$Register, $cnt1$$Register, $result$$Register,
$tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -11126,6 +11479,7 @@ instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
%{
predicate(UseAVX <= 2);
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
@ -11133,7 +11487,23 @@ instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI resu
ins_encode %{
__ arrays_equals(false, $str1$$Register, $str2$$Register,
$cnt$$Register, $result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
%{
predicate(UseAVX > 2);
match(Set result (StrEquals (Binary str1 str2) cnt));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
__ arrays_equals(false, $str1$$Register, $str2$$Register,
$cnt$$Register, $result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -11142,7 +11512,7 @@ instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI resu
instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
@ -11150,7 +11520,23 @@ instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -11158,7 +11544,7 @@ instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
@ -11166,14 +11552,31 @@ instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
%{
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (AryEq ary1 ary2));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
ins_encode %{
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
$tmp3$$Register, $result$$Register, $tmp4$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
%{
predicate(UseAVX <= 2);
match(Set result (HasNegatives ary1 len));
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
@ -11181,36 +11584,86 @@ instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
ins_encode %{
__ has_negatives($ary1$$Register, $len$$Register,
$result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister);
$tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct has_negatives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
%{
predicate(UseAVX > 2);
match(Set result (HasNegatives ary1 len));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
ins_encode %{
__ has_negatives($ary1$$Register, $len$$Register,
$result$$Register, $tmp3$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
%}
ins_pipe( pipe_slow );
%}
// fast char[] to byte[] compression
instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
predicate(UseAVX <= 2);
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
USE_KILL len, KILL tmp5, KILL cr);
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
ins_encode %{
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
knoreg, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
predicate(UseAVX > 2);
match(Set result (StrCompressedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
USE_KILL len, KILL tmp5, KILL cr);
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
ins_encode %{
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
$ktmp1$$KRegister, $ktmp2$$KRegister);
%}
ins_pipe( pipe_slow );
%}
// fast byte[] to char[] inflation
instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
predicate(UseAVX <= 2);
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$Register);
$tmp1$$XMMRegister, $tmp2$$Register, knoreg);
%}
ins_pipe( pipe_slow );
%}
instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
predicate(UseAVX > 2);
match(Set dummy (StrInflatedCopy src (Binary dst len)));
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
ins_encode %{
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
$tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
%}
ins_pipe( pipe_slow );
%}
@ -12002,11 +12455,13 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
// mask version
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
// Bounded mask operand used in following patten is needed for
// post-loop multiversioning.
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, rFlagsReg cr, label labl)
%{
predicate(n->has_vector_mask_set());
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
match(CountedLoopEnd cop cr);
effect(USE labl);
effect(USE labl, TEMP ktmp);
ins_cost(400);
format %{ "j$cop $labl\t# loop end\n\t"
@ -12015,16 +12470,18 @@ instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
__ restorevectmask($ktmp$$KRegister);
%}
ins_pipe(pipe_jcc);
%}
// Jump Direct Conditional - Label defines a relative address from Jcc+1
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
predicate(n->has_vector_mask_set());
// Bounded mask operand used in following patten is needed for
// post-loop multiversioning.
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, rFlagsRegU cmp, label labl) %{
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
effect(USE labl, TEMP ktmp);
ins_cost(400);
format %{ "j$cop,u $labl\t# loop end\n\t"
@ -12033,15 +12490,17 @@ instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
__ restorevectmask($ktmp$$KRegister);
%}
ins_pipe(pipe_jcc);
%}
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
predicate(n->has_vector_mask_set());
// Bounded mask operand used in following patten is needed for
// post-loop multiversioning.
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, rFlagsRegUCF cmp, label labl) %{
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
match(CountedLoopEnd cop cmp);
effect(USE labl);
effect(USE labl, TEMP ktmp);
ins_cost(300);
format %{ "j$cop,u $labl\t# loop end\n\t"
@ -12050,7 +12509,7 @@ instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label lab
ins_encode %{
Label* L = $labl$$label;
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
__ restorevectmask();
__ restorevectmask($ktmp$$KRegister);
%}
ins_pipe(pipe_jcc);
%}

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -945,8 +945,8 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
}
}
if (strncmp(idealOp, "RegVMask", 8) == 0) {
return "Type::BOTTOM";
if (strncmp(idealOp, "RegVectMask", 8) == 0) {
return "TypeVect::VECTMASK";
}
// !!!!!

View File

@ -3964,7 +3964,7 @@ bool MatchRule::is_base_register(FormDict &globals) const {
strcmp(opType,"RegL")==0 ||
strcmp(opType,"RegF")==0 ||
strcmp(opType,"RegD")==0 ||
strcmp(opType,"RegVMask")==0 ||
strcmp(opType,"RegVectMask")==0 ||
strcmp(opType,"VecA")==0 ||
strcmp(opType,"VecS")==0 ||
strcmp(opType,"VecD")==0 ||

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -2265,6 +2265,7 @@ private:
// and return the conversion function to build them from OptoReg
const char* reg_conversion(const char* rep_var) {
if (strcmp(rep_var,"$Register") == 0) return "as_Register";
if (strcmp(rep_var,"$KRegister") == 0) return "as_KRegister";
if (strcmp(rep_var,"$FloatRegister") == 0) return "as_FloatRegister";
#if defined(IA32) || defined(AMD64)
if (strcmp(rep_var,"$XMMRegister") == 0) return "as_XMMRegister";

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -824,7 +824,8 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
}
}
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
assert(n_type->isa_vect() == NULL || lrg._is_vector ||
ireg == Op_RegD || ireg == Op_RegL || ireg == Op_RegVectMask,
"vector must be in vector registers");
// Check for bound register masks
@ -917,6 +918,10 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
lrg._is_bound = 1;
}
break;
case Op_RegVectMask:
lrg.set_num_regs(RegMask::SlotsPerRegVectMask);
lrg.set_reg_pressure(1);
break;
case Op_RegF:
case Op_RegI:
case Op_RegN:
@ -1036,8 +1041,8 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
const RegMask &lrgmask = lrg.mask();
uint kreg = n->in(k)->ideal_reg();
bool is_vect = RegMask::is_vector(kreg);
assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
is_vect || kreg == Op_RegD || kreg == Op_RegL,
assert(n->in(k)->bottom_type()->isa_vect() == NULL || is_vect ||
kreg == Op_RegD || kreg == Op_RegL || kreg == Op_RegVectMask,
"vector must be in vector registers");
if (lrgmask.is_bound(kreg))
lrg._is_bound = 1;

View File

@ -163,7 +163,7 @@ public:
bool is_scalable() {
#ifdef ASSERT
if (_is_scalable) {
// Should only be a vector for now, but it could also be a RegVMask in future.
// Should only be a vector for now, but it could also be a RegVectMask in future.
assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
}
#endif

View File

@ -2440,7 +2440,7 @@ static bool is_vector_bitwise_op(Node* n) {
}
static bool is_vector_bitwise_cone_root(Node* n) {
if (!is_vector_bitwise_op(n)) {
if (n->bottom_type()->isa_vectmask() || !is_vector_bitwise_op(n)) {
return false;
}
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -409,7 +409,9 @@ uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
LRG& lrg = lrgs(lidx);
if (lrg.mask_is_nonempty_and_up() &&
!lrg.is_float_or_vector() &&
lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
(lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
(Matcher::has_predicated_vectors() &&
lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegVectMask])))) {
cnt += lrg.reg_pressure();
}
lidx = elements.next();
@ -445,7 +447,9 @@ void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* l
} else {
// Do not count the SP and flag registers
const RegMask& r = lrg.mask();
if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
if (r.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
(Matcher::has_predicated_vectors() &&
r.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
int_pressure.lower(lrg, location);
}
}
@ -500,7 +504,9 @@ void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pr
} else {
// Do not count the SP and flag registers
const RegMask& rm = lrg.mask();
if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
(Matcher::has_predicated_vectors() &&
rm.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
int_pressure.raise(lrg);
}
}

View File

@ -103,6 +103,12 @@ public:
}
#if defined(IA32) || defined(AMD64)
KRegister as_KRegister(PhaseRegAlloc *ra_, const Node *node) const {
return ::as_KRegister(reg(ra_, node));
}
KRegister as_KRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
return ::as_KRegister(reg(ra_, node, idx));
}
XMMRegister as_XMMRegister(PhaseRegAlloc *ra_, const Node *node) const {
return ::as_XMMRegister(reg(ra_, node));
}

View File

@ -233,7 +233,7 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode
inline_block = generate_guard(ctrl, bol_le, NULL, PROB_FAIR);
stub_block = *ctrl;
Node* mask_gen = new VectorMaskGenNode(length, TypeLong::LONG, Type::get_const_basic_type(type));
Node* mask_gen = new VectorMaskGenNode(length, TypeVect::VECTMASK, Type::get_const_basic_type(type));
transform_later(mask_gen);
unsigned vec_size = lane_count * type2aelembytes(type);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -95,6 +95,7 @@ Matcher::Matcher()
idealreg2spillmask [Op_VecY] = NULL;
idealreg2spillmask [Op_VecZ] = NULL;
idealreg2spillmask [Op_RegFlags] = NULL;
idealreg2spillmask [Op_RegVectMask] = NULL;
idealreg2debugmask [Op_RegI] = NULL;
idealreg2debugmask [Op_RegN] = NULL;
@ -109,6 +110,7 @@ Matcher::Matcher()
idealreg2debugmask [Op_VecY] = NULL;
idealreg2debugmask [Op_VecZ] = NULL;
idealreg2debugmask [Op_RegFlags] = NULL;
idealreg2debugmask [Op_RegVectMask] = NULL;
idealreg2mhdebugmask[Op_RegI] = NULL;
idealreg2mhdebugmask[Op_RegN] = NULL;
@ -123,6 +125,7 @@ Matcher::Matcher()
idealreg2mhdebugmask[Op_VecY] = NULL;
idealreg2mhdebugmask[Op_VecZ] = NULL;
idealreg2mhdebugmask[Op_RegFlags] = NULL;
idealreg2mhdebugmask[Op_RegVectMask] = NULL;
debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node
}
@ -430,7 +433,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
return rms;
}
#define NOF_STACK_MASKS (3*12)
#define NOF_STACK_MASKS (3*13)
// Create the initial stack mask used by values spilling to the stack.
// Disallow any debug info in outgoing argument areas by setting the
@ -487,6 +490,10 @@ void Matcher::init_first_stack_mask() {
idealreg2mhdebugmask[Op_VecY] = &rms[34];
idealreg2mhdebugmask[Op_VecZ] = &rms[35];
idealreg2spillmask [Op_RegVectMask] = &rms[36];
idealreg2debugmask [Op_RegVectMask] = &rms[37];
idealreg2mhdebugmask[Op_RegVectMask] = &rms[38];
OptoReg::Name i;
// At first, start with the empty mask
@ -531,6 +538,11 @@ void Matcher::init_first_stack_mask() {
*idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
if (Matcher::has_predicated_vectors()) {
*idealreg2spillmask[Op_RegVectMask] = *idealreg2regmask[Op_RegVectMask];
idealreg2spillmask[Op_RegVectMask]->OR(aligned_stack_mask);
}
if (Matcher::vector_size_supported(T_BYTE,4)) {
*idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
@ -649,6 +661,7 @@ void Matcher::init_first_stack_mask() {
*idealreg2debugmask [Op_RegF] = *idealreg2spillmask[Op_RegF];
*idealreg2debugmask [Op_RegD] = *idealreg2spillmask[Op_RegD];
*idealreg2debugmask [Op_RegP] = *idealreg2spillmask[Op_RegP];
*idealreg2debugmask [Op_RegVectMask] = *idealreg2spillmask[Op_RegVectMask];
*idealreg2debugmask [Op_VecA] = *idealreg2spillmask[Op_VecA];
*idealreg2debugmask [Op_VecS] = *idealreg2spillmask[Op_VecS];
@ -663,6 +676,7 @@ void Matcher::init_first_stack_mask() {
*idealreg2mhdebugmask[Op_RegF] = *idealreg2spillmask[Op_RegF];
*idealreg2mhdebugmask[Op_RegD] = *idealreg2spillmask[Op_RegD];
*idealreg2mhdebugmask[Op_RegP] = *idealreg2spillmask[Op_RegP];
*idealreg2mhdebugmask[Op_RegVectMask] = *idealreg2spillmask[Op_RegVectMask];
*idealreg2mhdebugmask[Op_VecA] = *idealreg2spillmask[Op_VecA];
*idealreg2mhdebugmask[Op_VecS] = *idealreg2spillmask[Op_VecS];
@ -683,6 +697,7 @@ void Matcher::init_first_stack_mask() {
idealreg2debugmask[Op_RegF]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_RegD]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_RegP]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_RegVectMask]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecA]->SUBTRACT(*caller_save_mask);
idealreg2debugmask[Op_VecS]->SUBTRACT(*caller_save_mask);
@ -697,6 +712,7 @@ void Matcher::init_first_stack_mask() {
idealreg2mhdebugmask[Op_RegF]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegD]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegP]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_RegVectMask]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecA]->SUBTRACT(*mh_caller_save_mask);
idealreg2mhdebugmask[Op_VecS]->SUBTRACT(*mh_caller_save_mask);
@ -965,6 +981,7 @@ void Matcher::init_spill_mask( Node *ret ) {
idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret);
idealreg2regmask[Op_VecY] = regmask_for_ideal_register(Op_VecY, ret);
idealreg2regmask[Op_VecZ] = regmask_for_ideal_register(Op_VecZ, ret);
idealreg2regmask[Op_RegVectMask] = regmask_for_ideal_register(Op_RegVectMask, ret);
}
#ifdef ASSERT
@ -2559,6 +2576,7 @@ const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
case Op_VecX: // fall-through
case Op_VecY: // fall-through
case Op_VecZ: spill = new LoadVectorNode(NULL, mem, fp, atp, t->is_vect()); break;
case Op_RegVectMask: return Matcher::predicate_reg_mask();
default: ShouldNotReachHere();
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -319,6 +319,8 @@ public:
// Some microarchitectures have mask registers used on vectors
static const bool has_predicated_vectors(void);
static const RegMask* predicate_reg_mask(void);
static const TypeVect* predicate_reg_type(const Type* elemTy, int length);
// Some uarchs have different sized float register resources
static const int float_pressure(int default_pressure_threshold);

View File

@ -693,6 +693,8 @@ public:
DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6)
DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0)
DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1)
DEFINE_CLASS_ID(Vector, Type, 7)
DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0)
DEFINE_CLASS_ID(Proj, Node, 3)
DEFINE_CLASS_ID(CatchProj, Proj, 0)
@ -737,8 +739,6 @@ public:
DEFINE_CLASS_ID(BoxLock, Node, 10)
DEFINE_CLASS_ID(Add, Node, 11)
DEFINE_CLASS_ID(Mul, Node, 12)
DEFINE_CLASS_ID(Vector, Node, 13)
DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0)
DEFINE_CLASS_ID(ClearArray, Node, 14)
DEFINE_CLASS_ID(Halt, Node, 15)
DEFINE_CLASS_ID(Opaque1, Node, 16)

View File

@ -44,7 +44,7 @@ const char *NodeClassNames[] = {
"VecX",
"VecY",
"VecZ",
"RegVMask",
"RegVectMask",
"RegFlags",
"_last_machine_leaf",
#include "classes.hpp"

View File

@ -43,7 +43,7 @@ enum Opcodes {
macro(VecX) // Machine vectorx register
macro(VecY) // Machine vectory register
macro(VecZ) // Machine vectorz register
macro(RegVMask) // Vector mask/predicate register
macro(RegVectMask) // Vector mask/predicate register
macro(RegFlags) // Machine flags register
_last_machine_leaf, // Split between regular opcodes and machine
#include "classes.hpp"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -36,7 +36,7 @@
// non-SSA names. A Register is represented as a number. Non-regular values
// (e.g., Control, Memory, I/O) use the Special register. The actual machine
// registers (as described in the ADL file for a machine) start at zero.
// Stack-slots (spill locations) start at the nest Chunk past the last machine
// Stack-slots (spill locations) start at the next Chunk past the last machine
// register.
//
// Note that stack spill-slots are treated as a very large register set.

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -67,6 +67,8 @@ int RegMask::num_registers(uint ireg) {
return SlotsPerVecX;
case Op_VecD:
return SlotsPerVecD;
case Op_RegVectMask:
return SlotsPerRegVectMask;
case Op_RegD:
case Op_RegL:
#ifdef _LP64

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -105,6 +105,7 @@ class RegMask {
SlotsPerVecX = 4,
SlotsPerVecY = 8,
SlotsPerVecZ = 16,
SlotsPerRegVectMask = X86_ONLY(2) NOT_X86(1)
};
// A constructor only used by the ADLC output. All mask fields are filled

View File

@ -63,6 +63,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ARRAY, "array:", false, Node::NotAMachineReg, relocInfo::none }, // Array
#if defined(PPC64)
{ Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask.
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
{ Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD
@ -70,6 +71,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#elif defined(S390)
{ Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask.
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
{ Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD
@ -77,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#else // all other
{ Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask.
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
@ -658,6 +661,9 @@ void Type::Initialize_shared(Compile* current) {
// get_zero_type() should not happen for T_CONFLICT
_zero_type[T_CONFLICT]= NULL;
TypeVect::VECTMASK = (TypeVect*)(new TypeVectMask(TypeInt::BOOL, MaxVectorSize))->hashcons();
mreg2type[Op_RegVectMask] = TypeVect::VECTMASK;
if (Matcher::supports_scalable_vector()) {
TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
}
@ -2376,6 +2382,7 @@ const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors
const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
const TypeVect *TypeVect::VECTMASK = NULL; // predicate/mask vector
//------------------------------make-------------------------------------------
const TypeVect* TypeVect::make(const Type *elem, uint length) {
@ -2403,6 +2410,15 @@ const TypeVect* TypeVect::make(const Type *elem, uint length) {
return NULL;
}
const TypeVect *TypeVect::makemask(const Type* elem, uint length) {
if (Matcher::has_predicated_vectors()) {
const TypeVect* mtype = Matcher::predicate_reg_type(elem, length);
return (TypeVect*)(const_cast<TypeVect*>(mtype))->hashcons();
} else {
return make(elem, length);
}
}
//------------------------------meet-------------------------------------------
// Compute the MEET of two types. It returns a new Type object.
const Type *TypeVect::xmeet( const Type *t ) const {
@ -2417,6 +2433,13 @@ const Type *TypeVect::xmeet( const Type *t ) const {
default: // All else is a mistake
typerr(t);
case VectorMask: {
const TypeVectMask* v = t->is_vectmask();
assert( base() == v->base(), "");
assert(length() == v->length(), "");
assert(element_basic_type() == v->element_basic_type(), "");
return TypeVect::makemask(_elem->xmeet(v->_elem), _length);
}
case VectorA:
case VectorS:
case VectorD:
@ -2484,6 +2507,8 @@ void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
st->print("vectory["); break;
case VectorZ:
st->print("vectorz["); break;
case VectorMask:
st->print("vectormask["); break;
default:
ShouldNotReachHere();
}
@ -2493,6 +2518,14 @@ void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
}
#endif
bool TypeVectMask::eq(const Type *t) const {
const TypeVectMask *v = t->is_vectmask();
return (element_type() == v->element_type()) && (length() == v->length());
}
const Type *TypeVectMask::xdual() const {
return new TypeVectMask(element_type()->dual(), length());
}
//=============================================================================
// Convenience common pre-built types.

View File

@ -60,6 +60,7 @@ class TypeVectD;
class TypeVectX;
class TypeVectY;
class TypeVectZ;
class TypeVectMask;
class TypePtr;
class TypeRawPtr;
class TypeOopPtr;
@ -89,6 +90,8 @@ public:
Tuple, // Method signature or object layout
Array, // Array types
VectorMask, // Vector predicate/mask type
VectorA, // (Scalable) Vector types for vector length agnostic
VectorS, // 32bit Vector types
VectorD, // 64bit Vector types
@ -299,6 +302,8 @@ public:
const TypeAry *isa_ary() const; // Returns NULL of not ary
const TypeVect *is_vect() const; // Vector
const TypeVect *isa_vect() const; // Returns NULL if not a Vector
const TypeVectMask *is_vectmask() const; // Predicate/Mask Vector
const TypeVectMask *isa_vectmask() const; // Returns NULL if not a Vector Predicate/Mask
const TypePtr *is_ptr() const; // Asserts it is a ptr type
const TypePtr *isa_ptr() const; // Returns NULL if not ptr type
const TypeRawPtr *isa_rawptr() const; // NOT Java oop
@ -800,6 +805,13 @@ public:
// Used directly by Replicate nodes to construct singleton vector.
static const TypeVect *make(const Type* elem, uint length);
static const TypeVect *makemask(const BasicType elem_bt, uint length) {
// Use bottom primitive type.
return makemask(get_const_basic_type(elem_bt), length);
}
static const TypeVect *makemask(const Type* elem, uint length);
virtual const Type *xmeet( const Type *t) const;
virtual const Type *xdual() const; // Compute dual right now.
@ -809,6 +821,7 @@ public:
static const TypeVect *VECTX;
static const TypeVect *VECTY;
static const TypeVect *VECTZ;
static const TypeVect *VECTMASK;
#ifndef PRODUCT
virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping
@ -845,6 +858,14 @@ class TypeVectZ : public TypeVect {
TypeVectZ(const Type* elem, uint length) : TypeVect(VectorZ, elem, length) {}
};
class TypeVectMask : public TypeVect {
public:
friend class TypeVect;
TypeVectMask(const Type* elem, uint length) : TypeVect(VectorMask, elem, length) {}
virtual bool eq(const Type *t) const;
virtual const Type *xdual() const;
};
//------------------------------TypePtr----------------------------------------
// Class of machine Pointer Types: raw data, instances or arrays.
// If the _base enum is AnyPtr, then this refers to all of the above.
@ -1682,13 +1703,22 @@ inline const TypeAry *Type::isa_ary() const {
return ((_base == Array) ? (TypeAry*)this : NULL);
}
inline const TypeVectMask *Type::is_vectmask() const {
assert( _base == VectorMask, "Not a Vector Mask" );
return (TypeVectMask*)this;
}
inline const TypeVectMask *Type::isa_vectmask() const {
return (_base == VectorMask) ? (TypeVectMask*)this : NULL;
}
inline const TypeVect *Type::is_vect() const {
assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
assert( _base >= VectorMask && _base <= VectorZ, "Not a Vector" );
return (TypeVect*)this;
}
inline const TypeVect *Type::isa_vect() const {
return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
return (_base >= VectorMask && _base <= VectorZ) ? (TypeVect*)this : NULL;
}
inline const TypePtr *Type::is_ptr() const {

View File

@ -804,7 +804,7 @@ class StoreVectorMaskedNode : public StoreVectorNode {
public:
StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask)
: StoreVectorNode(c, mem, dst, at, src) {
assert(mask->bottom_type()->is_long(), "sanity");
assert(mask->bottom_type()->is_vectmask(), "sanity");
init_class_id(Class_StoreVector);
set_mismatched_access();
add_req(mask);
@ -822,7 +822,7 @@ class LoadVectorMaskedNode : public LoadVectorNode {
public:
LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask)
: LoadVectorNode(c, mem, src, at, vt) {
assert(mask->bottom_type()->is_long(), "sanity");
assert(mask->bottom_type()->is_vectmask(), "sanity");
init_class_id(Class_LoadVector);
set_mismatched_access();
add_req(mask);
@ -845,6 +845,9 @@ class VectorMaskGenNode : public TypeNode {
virtual int Opcode() const;
const Type* get_elem_type() { return _elemType;}
virtual uint size_of() const { return sizeof(VectorMaskGenNode); }
virtual uint ideal_reg() const {
return Op_RegVectMask;
}
private:
const Type* _elemType;