8262355: Support for AVX-512 opmask register allocation.
Reviewed-by: vlivanov, njian, kvn
This commit is contained in:
parent
078066695b
commit
f084bd2f61
@ -467,22 +467,22 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next());
|
||||
// ----------------------------
|
||||
// SVE Predicate Registers
|
||||
// ----------------------------
|
||||
reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg());
|
||||
reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg());
|
||||
reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg());
|
||||
reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg());
|
||||
reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg());
|
||||
reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg());
|
||||
reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg());
|
||||
reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg());
|
||||
reg_def P8 (SOC, SOC, Op_RegVMask, 8, p8->as_VMReg());
|
||||
reg_def P9 (SOC, SOC, Op_RegVMask, 9, p9->as_VMReg());
|
||||
reg_def P10 (SOC, SOC, Op_RegVMask, 10, p10->as_VMReg());
|
||||
reg_def P11 (SOC, SOC, Op_RegVMask, 11, p11->as_VMReg());
|
||||
reg_def P12 (SOC, SOC, Op_RegVMask, 12, p12->as_VMReg());
|
||||
reg_def P13 (SOC, SOC, Op_RegVMask, 13, p13->as_VMReg());
|
||||
reg_def P14 (SOC, SOC, Op_RegVMask, 14, p14->as_VMReg());
|
||||
reg_def P15 (SOC, SOC, Op_RegVMask, 15, p15->as_VMReg());
|
||||
reg_def P0 (SOC, SOC, Op_RegVectMask, 0, p0->as_VMReg());
|
||||
reg_def P1 (SOC, SOC, Op_RegVectMask, 1, p1->as_VMReg());
|
||||
reg_def P2 (SOC, SOC, Op_RegVectMask, 2, p2->as_VMReg());
|
||||
reg_def P3 (SOC, SOC, Op_RegVectMask, 3, p3->as_VMReg());
|
||||
reg_def P4 (SOC, SOC, Op_RegVectMask, 4, p4->as_VMReg());
|
||||
reg_def P5 (SOC, SOC, Op_RegVectMask, 5, p5->as_VMReg());
|
||||
reg_def P6 (SOC, SOC, Op_RegVectMask, 6, p6->as_VMReg());
|
||||
reg_def P7 (SOC, SOC, Op_RegVectMask, 7, p7->as_VMReg());
|
||||
reg_def P8 (SOC, SOC, Op_RegVectMask, 8, p8->as_VMReg());
|
||||
reg_def P9 (SOC, SOC, Op_RegVectMask, 9, p9->as_VMReg());
|
||||
reg_def P10 (SOC, SOC, Op_RegVectMask, 10, p10->as_VMReg());
|
||||
reg_def P11 (SOC, SOC, Op_RegVectMask, 11, p11->as_VMReg());
|
||||
reg_def P12 (SOC, SOC, Op_RegVectMask, 12, p12->as_VMReg());
|
||||
reg_def P13 (SOC, SOC, Op_RegVectMask, 13, p13->as_VMReg());
|
||||
reg_def P14 (SOC, SOC, Op_RegVectMask, 14, p14->as_VMReg());
|
||||
reg_def P15 (SOC, SOC, Op_RegVectMask, 15, p15->as_VMReg());
|
||||
|
||||
// ----------------------------
|
||||
// Special Registers
|
||||
@ -2439,6 +2439,14 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return UseSVE > 0;
|
||||
}
|
||||
|
||||
const RegMask* Matcher::predicate_reg_mask(void) {
|
||||
return &_PR_REG_mask;
|
||||
}
|
||||
|
||||
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
|
||||
return new TypeVectMask(elemTy, length);
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return true;
|
||||
}
|
||||
@ -5601,7 +5609,7 @@ operand vRegD_V31()
|
||||
operand pRegGov()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(gov_pr));
|
||||
match(RegVMask);
|
||||
match(RegVectMask);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(REG_INTER);
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2008, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2008, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -993,6 +993,14 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegMask* Matcher::predicate_reg_mask(void) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return VM_Version::has_simd();
|
||||
}
|
||||
|
@ -2156,6 +2156,14 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegMask* Matcher::predicate_reg_mask(void) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return false; // not supported
|
||||
}
|
||||
|
@ -1546,6 +1546,14 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegMask* Matcher::predicate_reg_mask(void) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool Matcher::supports_vector_variable_shifts(void) {
|
||||
return false; // not supported
|
||||
}
|
||||
|
@ -2452,6 +2452,22 @@ void Assembler::kmovwl(KRegister dst, Address src) {
|
||||
emit_operand((Register)dst, src);
|
||||
}
|
||||
|
||||
void Assembler::kmovwl(Address dst, KRegister src) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0x91);
|
||||
emit_operand((Register)src, dst);
|
||||
}
|
||||
|
||||
void Assembler::kmovwl(KRegister dst, KRegister src) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16((unsigned char)0x90, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::kmovdl(KRegister dst, Register src) {
|
||||
assert(VM_Version::supports_avx512bw(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
|
@ -1459,6 +1459,8 @@ private:
|
||||
void kmovwl(KRegister dst, Register src);
|
||||
void kmovwl(KRegister dst, Address src);
|
||||
void kmovwl(Register dst, KRegister src);
|
||||
void kmovwl(Address dst, KRegister src);
|
||||
void kmovwl(KRegister dst, KRegister src);
|
||||
void kmovdl(KRegister dst, Register src);
|
||||
void kmovdl(Register dst, KRegister src);
|
||||
void kmovql(KRegister dst, KRegister src);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -49,18 +49,18 @@ inline Assembler::AvxVectorLen C2_MacroAssembler::vector_length_encoding(int vle
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::setvectmask(Register dst, Register src) {
|
||||
void C2_MacroAssembler::setvectmask(Register dst, Register src, KRegister mask) {
|
||||
guarantee(PostLoopMultiversioning, "must be");
|
||||
Assembler::movl(dst, 1);
|
||||
Assembler::shlxl(dst, dst, src);
|
||||
Assembler::decl(dst);
|
||||
Assembler::kmovdl(k1, dst);
|
||||
Assembler::kmovdl(mask, dst);
|
||||
Assembler::movl(dst, src);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::restorevectmask() {
|
||||
void C2_MacroAssembler::restorevectmask(KRegister mask) {
|
||||
guarantee(PostLoopMultiversioning, "must be");
|
||||
Assembler::knotwl(k1, k0);
|
||||
Assembler::knotwl(mask, k0);
|
||||
}
|
||||
|
||||
#if INCLUDE_RTM_OPT
|
||||
@ -1893,10 +1893,11 @@ void C2_MacroAssembler::reduce8L(int opcode, Register dst, Register src1, XMMReg
|
||||
reduce4L(opcode, dst, src1, vtmp2, vtmp1, vtmp2);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::genmask(Register dst, Register len, Register temp) {
|
||||
void C2_MacroAssembler::genmask(KRegister dst, Register len, Register temp) {
|
||||
assert(ArrayCopyPartialInlineSize <= 64,"");
|
||||
mov64(dst, -1L);
|
||||
bzhiq(dst, dst, len);
|
||||
mov64(temp, -1L);
|
||||
bzhiq(temp, temp, len);
|
||||
kmovql(dst, temp);
|
||||
}
|
||||
#endif // _LP64
|
||||
|
||||
@ -2154,7 +2155,8 @@ void C2_MacroAssembler::evpblend(BasicType typ, XMMRegister dst, KRegister kmask
|
||||
}
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2) {
|
||||
void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister vtmp1, XMMRegister vtmp2, KRegister mask) {
|
||||
switch(vlen) {
|
||||
case 4:
|
||||
assert(vtmp1 != xnoreg, "required.");
|
||||
@ -2192,14 +2194,13 @@ void C2_MacroAssembler::vectortest(int bt, int vlen, XMMRegister src1, XMMRegist
|
||||
break;
|
||||
case 64:
|
||||
{
|
||||
KRegister ktemp = k2; // Use a hardcoded temp due to no k register allocation.
|
||||
assert((vtmp1 == xnoreg) && (vtmp2 == xnoreg), "required.");
|
||||
evpcmpeqb(ktemp, src1, src2, Assembler::AVX_512bit);
|
||||
evpcmpeqb(mask, src1, src2, Assembler::AVX_512bit);
|
||||
if (bt == BoolTest::ne) {
|
||||
ktestql(ktemp, ktemp);
|
||||
ktestql(mask, mask);
|
||||
} else {
|
||||
assert(bt == BoolTest::overflow, "required");
|
||||
kortestql(ktemp, ktemp);
|
||||
kortestql(mask, mask);
|
||||
}
|
||||
}
|
||||
break;
|
||||
@ -2916,7 +2917,7 @@ void C2_MacroAssembler::load_next_elements(Register elem1, Register elem2, Regis
|
||||
// Compare strings, used for char[] and byte[].
|
||||
void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
XMMRegister vec1, int ae) {
|
||||
XMMRegister vec1, int ae, KRegister mask) {
|
||||
ShortBranchVerifier sbv(this);
|
||||
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
|
||||
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
|
||||
@ -3069,12 +3070,12 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
|
||||
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
|
||||
evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
|
||||
evpcmpeqb(mask, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
|
||||
} else {
|
||||
vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
|
||||
evpcmpeqb(mask, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
|
||||
}
|
||||
kortestql(k7, k7);
|
||||
kortestql(mask, mask);
|
||||
jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
|
||||
addptr(result, stride2x2); // update since we already compared at this addr
|
||||
subl(cnt2, stride2x2); // and sub the size too
|
||||
@ -3258,7 +3259,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
|
||||
bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
|
||||
|
||||
kmovql(cnt1, k7);
|
||||
kmovql(cnt1, mask);
|
||||
notq(cnt1);
|
||||
bsfq(cnt2, cnt1);
|
||||
if (ae != StrIntrinsicNode::LL) {
|
||||
@ -3307,7 +3308,7 @@ void C2_MacroAssembler::string_compare(Register str1, Register str2,
|
||||
// }
|
||||
void C2_MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
Register result, Register tmp1,
|
||||
XMMRegister vec1, XMMRegister vec2) {
|
||||
XMMRegister vec1, XMMRegister vec2, KRegister mask1, KRegister mask2) {
|
||||
// rsi: byte array
|
||||
// rcx: len
|
||||
// rax: result
|
||||
@ -3339,8 +3340,8 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
|
||||
bind(test_64_loop);
|
||||
// Check whether our 64 elements of size byte contain negatives
|
||||
evpcmpgtb(k2, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
|
||||
kortestql(k2, k2);
|
||||
evpcmpgtb(mask1, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
|
||||
kortestql(mask1, mask1);
|
||||
jcc(Assembler::notZero, TRUE_LABEL);
|
||||
|
||||
addptr(len, 64);
|
||||
@ -3357,7 +3358,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
|
||||
shlxq(tmp3_aliased, tmp3_aliased, tmp1);
|
||||
notq(tmp3_aliased);
|
||||
kmovql(k3, tmp3_aliased);
|
||||
kmovql(mask2, tmp3_aliased);
|
||||
#else
|
||||
Label k_init;
|
||||
jmp(k_init);
|
||||
@ -3382,11 +3383,11 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
lea(len, InternalAddress(tmp));
|
||||
// create mask to test for negative byte inside a vector
|
||||
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
|
||||
evpcmpgtb(k3, vec1, Address(len, 0), Assembler::AVX_512bit);
|
||||
evpcmpgtb(mask2, vec1, Address(len, 0), Assembler::AVX_512bit);
|
||||
|
||||
#endif
|
||||
evpcmpgtb(k2, k3, vec2, Address(ary1, 0), Assembler::AVX_512bit);
|
||||
ktestq(k2, k3);
|
||||
evpcmpgtb(mask1, mask2, vec2, Address(ary1, 0), Assembler::AVX_512bit);
|
||||
ktestq(mask1, mask2);
|
||||
jcc(Assembler::notZero, TRUE_LABEL);
|
||||
|
||||
jmp(FALSE_LABEL);
|
||||
@ -3513,7 +3514,7 @@ void C2_MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
|
||||
void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register result, Register chr,
|
||||
XMMRegister vec1, XMMRegister vec2, bool is_char) {
|
||||
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask) {
|
||||
ShortBranchVerifier sbv(this);
|
||||
Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
|
||||
|
||||
@ -3576,8 +3577,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
|
||||
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
|
||||
|
||||
evmovdquq(vec1, Address(ary1, limit, Address::times_1), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
|
||||
kortestql(k7, k7);
|
||||
evpcmpeqb(mask, vec1, Address(ary2, limit, Address::times_1), Assembler::AVX_512bit);
|
||||
kortestql(mask, mask);
|
||||
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
|
||||
addptr(limit, 64); // update since we already compared at this addr
|
||||
cmpl(limit, -64);
|
||||
@ -3594,8 +3595,8 @@ void C2_MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register
|
||||
//
|
||||
addptr(result, -64); // it is safe, bc we just came from this area
|
||||
evmovdquq(vec1, Address(ary1, result, Address::times_1), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
|
||||
kortestql(k7, k7);
|
||||
evpcmpeqb(mask, vec1, Address(ary2, result, Address::times_1), Assembler::AVX_512bit);
|
||||
kortestql(mask, mask);
|
||||
jcc(Assembler::aboveEqual, FALSE_LABEL); // miscompare
|
||||
|
||||
jmp(TRUE_LABEL);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -31,8 +31,8 @@ public:
|
||||
Assembler::AvxVectorLen vector_length_encoding(int vlen_in_bytes);
|
||||
|
||||
// special instructions for EVEX
|
||||
void setvectmask(Register dst, Register src);
|
||||
void restorevectmask();
|
||||
void setvectmask(Register dst, Register src, KRegister mask);
|
||||
void restorevectmask(KRegister mask);
|
||||
|
||||
// Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
|
||||
// See full desription in macroAssembler_x86.cpp.
|
||||
@ -131,7 +131,7 @@ public:
|
||||
|
||||
// vector test
|
||||
void vectortest(int bt, int vlen, XMMRegister src1, XMMRegister src2,
|
||||
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg);
|
||||
XMMRegister vtmp1 = xnoreg, XMMRegister vtmp2 = xnoreg, KRegister mask = knoreg);
|
||||
|
||||
// blend
|
||||
void evpcmp(BasicType typ, KRegister kdmask, KRegister ksmask, XMMRegister src1, AddressLiteral adr, int comparison, int vector_len, Register scratch = rscratch1);
|
||||
@ -146,7 +146,7 @@ public:
|
||||
void reduceI(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
#ifdef _LP64
|
||||
void reduceL(int opcode, int vlen, Register dst, Register src1, XMMRegister src2, XMMRegister vtmp1, XMMRegister vtmp2);
|
||||
void genmask(Register dst, Register len, Register temp);
|
||||
void genmask(KRegister dst, Register len, Register temp);
|
||||
#endif // _LP64
|
||||
|
||||
// dst = reduce(op, src2) using vtmp as temps
|
||||
@ -244,17 +244,17 @@ public:
|
||||
// Compare strings.
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
XMMRegister vec1, int ae);
|
||||
XMMRegister vec1, int ae, KRegister mask = knoreg);
|
||||
|
||||
// Search for Non-ASCII character (Negative byte value) in a byte array,
|
||||
// return true if it has any and false otherwise.
|
||||
void has_negatives(Register ary1, Register len,
|
||||
Register result, Register tmp1,
|
||||
XMMRegister vec1, XMMRegister vec2);
|
||||
XMMRegister vec1, XMMRegister vec2, KRegister mask1 = knoreg, KRegister mask2 = knoreg);
|
||||
|
||||
// Compare char[] or byte[] arrays.
|
||||
void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
|
||||
Register limit, Register result, Register chr,
|
||||
XMMRegister vec1, XMMRegister vec2, bool is_char);
|
||||
XMMRegister vec1, XMMRegister vec2, bool is_char, KRegister mask = knoreg);
|
||||
|
||||
#endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
|
||||
|
@ -30,7 +30,7 @@
|
||||
|
||||
// processor dependent initialization for i486
|
||||
|
||||
LP64_ONLY(extern void reg_mask_init();)
|
||||
extern void reg_mask_init();
|
||||
|
||||
void Compile::pd_compiler2_init() {
|
||||
guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
|
||||
@ -61,5 +61,5 @@ void Compile::pd_compiler2_init() {
|
||||
OptoReg::invalidate(i);
|
||||
}
|
||||
}
|
||||
LP64_ONLY(reg_mask_init();)
|
||||
reg_mask_init();
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -149,7 +149,6 @@ void ZBarrierSetAssembler::load_at(MacroAssembler* masm,
|
||||
// Call VM
|
||||
call_vm(masm, ZBarrierSetRuntime::load_barrier_on_oop_field_preloaded_addr(decorators), dst, scratch);
|
||||
|
||||
// Restore registers
|
||||
__ movdqu(xmm0, Address(rsp, xmm_size * 0));
|
||||
__ movdqu(xmm1, Address(rsp, xmm_size * 1));
|
||||
__ movdqu(xmm2, Address(rsp, xmm_size * 2));
|
||||
@ -394,6 +393,7 @@ private:
|
||||
|
||||
MacroAssembler* const _masm;
|
||||
GrowableArray<Register> _gp_registers;
|
||||
GrowableArray<KRegister> _opmask_registers;
|
||||
GrowableArray<XMMRegisterData> _xmm_registers;
|
||||
int _spill_size;
|
||||
int _spill_offset;
|
||||
@ -450,11 +450,21 @@ private:
|
||||
__ movq(Address(rsp, _spill_offset), reg);
|
||||
}
|
||||
|
||||
void opmask_register_save(KRegister reg) {
|
||||
_spill_offset -= 8;
|
||||
__ kmovql(Address(rsp, _spill_offset), reg);
|
||||
}
|
||||
|
||||
void gp_register_restore(Register reg) {
|
||||
__ movq(reg, Address(rsp, _spill_offset));
|
||||
_spill_offset += 8;
|
||||
}
|
||||
|
||||
void opmask_register_restore(KRegister reg) {
|
||||
__ kmovql(reg, Address(rsp, _spill_offset));
|
||||
_spill_offset += 8;
|
||||
}
|
||||
|
||||
void initialize(ZLoadBarrierStubC2* stub) {
|
||||
// Create mask of caller saved registers that need to
|
||||
// be saved/restored if live
|
||||
@ -477,6 +487,7 @@ private:
|
||||
}
|
||||
|
||||
int gp_spill_size = 0;
|
||||
int opmask_spill_size = 0;
|
||||
int xmm_spill_size = 0;
|
||||
|
||||
// Record registers that needs to be saved/restored
|
||||
@ -490,6 +501,13 @@ private:
|
||||
_gp_registers.append(vm_reg->as_Register());
|
||||
gp_spill_size += 8;
|
||||
}
|
||||
} else if (vm_reg->is_KRegister()) {
|
||||
// All opmask registers are caller saved, thus spill the ones
|
||||
// which are live.
|
||||
if (_opmask_registers.find(vm_reg->as_KRegister()) == -1) {
|
||||
_opmask_registers.append(vm_reg->as_KRegister());
|
||||
opmask_spill_size += 8;
|
||||
}
|
||||
} else if (vm_reg->is_XMMRegister()) {
|
||||
// We encode in the low order 4 bits of the opto_reg, how large part of the register is live
|
||||
const VMReg vm_reg_base = OptoReg::as_VMReg(opto_reg & ~15);
|
||||
@ -520,13 +538,14 @@ private:
|
||||
const int arg_spill_size = frame::arg_reg_save_area_bytes;
|
||||
|
||||
// Stack pointer must be 16 bytes aligned for the call
|
||||
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + arg_spill_size, 16);
|
||||
_spill_offset = _spill_size = align_up(xmm_spill_size + gp_spill_size + opmask_spill_size + arg_spill_size, 16);
|
||||
}
|
||||
|
||||
public:
|
||||
ZSaveLiveRegisters(MacroAssembler* masm, ZLoadBarrierStubC2* stub) :
|
||||
_masm(masm),
|
||||
_gp_registers(),
|
||||
_opmask_registers(),
|
||||
_xmm_registers(),
|
||||
_spill_size(0),
|
||||
_spill_offset(0) {
|
||||
@ -576,9 +595,19 @@ public:
|
||||
for (int i = 0; i < _gp_registers.length(); i++) {
|
||||
gp_register_save(_gp_registers.at(i));
|
||||
}
|
||||
|
||||
// Save opmask registers
|
||||
for (int i = 0; i < _opmask_registers.length(); i++) {
|
||||
opmask_register_save(_opmask_registers.at(i));
|
||||
}
|
||||
}
|
||||
|
||||
~ZSaveLiveRegisters() {
|
||||
// Restore opmask registers
|
||||
for (int i = _opmask_registers.length() - 1; i >= 0; i--) {
|
||||
opmask_register_restore(_opmask_registers.at(i));
|
||||
}
|
||||
|
||||
// Restore general purpose registers
|
||||
for (int i = _gp_registers.length() - 1; i >= 0; i--) {
|
||||
gp_register_restore(_gp_registers.at(i));
|
||||
|
@ -2525,6 +2525,59 @@ void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scrat
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmov(KRegister dst, Address src) {
|
||||
if (VM_Version::supports_avx512bw()) {
|
||||
kmovql(dst, src);
|
||||
} else {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
kmovwl(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmov(Address dst, KRegister src) {
|
||||
if (VM_Version::supports_avx512bw()) {
|
||||
kmovql(dst, src);
|
||||
} else {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
kmovwl(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmov(KRegister dst, KRegister src) {
|
||||
if (VM_Version::supports_avx512bw()) {
|
||||
kmovql(dst, src);
|
||||
} else {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
kmovwl(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmov(Register dst, KRegister src) {
|
||||
if (VM_Version::supports_avx512bw()) {
|
||||
kmovql(dst, src);
|
||||
} else {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
kmovwl(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmov(KRegister dst, Register src) {
|
||||
if (VM_Version::supports_avx512bw()) {
|
||||
kmovql(dst, src);
|
||||
} else {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
kmovwl(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmovql(KRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
kmovql(dst, as_Address(src));
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
kmovql(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
@ -4940,7 +4993,7 @@ void MacroAssembler::verified_entry(int framesize, int stack_bang_size, bool fp_
|
||||
#if COMPILER2_OR_JVMCI
|
||||
|
||||
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM/ZMM registers
|
||||
void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp) {
|
||||
void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
|
||||
// cnt - number of qwords (8-byte words).
|
||||
// base - start address, qword aligned.
|
||||
Label L_zero_64_bytes, L_loop, L_sloop, L_tail, L_end;
|
||||
@ -4973,7 +5026,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
||||
if (use64byteVector) {
|
||||
addptr(cnt, 8);
|
||||
jccb(Assembler::equal, L_end);
|
||||
fill64_masked_avx(3, base, 0, xtmp, k2, cnt, rtmp, true);
|
||||
fill64_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp, true);
|
||||
jmp(L_end);
|
||||
} else {
|
||||
addptr(cnt, 4);
|
||||
@ -4992,7 +5045,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
||||
addptr(cnt, 4);
|
||||
jccb(Assembler::lessEqual, L_end);
|
||||
if (UseAVX > 2 && MaxVectorSize >= 32 && VM_Version::supports_avx512vl()) {
|
||||
fill32_masked_avx(3, base, 0, xtmp, k2, cnt, rtmp);
|
||||
fill32_masked_avx(3, base, 0, xtmp, mask, cnt, rtmp);
|
||||
} else {
|
||||
decrement(cnt);
|
||||
|
||||
@ -5006,7 +5059,7 @@ void MacroAssembler::xmm_clear_mem(Register base, Register cnt, Register rtmp, X
|
||||
}
|
||||
|
||||
// Clearing constant sized memory using YMM/ZMM registers.
|
||||
void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp) {
|
||||
void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask) {
|
||||
assert(UseAVX > 2 && VM_Version::supports_avx512vlbw(), "");
|
||||
bool use64byteVector = MaxVectorSize > 32 && AVX3Threshold == 0;
|
||||
|
||||
@ -5031,8 +5084,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
|
||||
break;
|
||||
case 3:
|
||||
movl(rtmp, 0x7);
|
||||
kmovwl(k2, rtmp);
|
||||
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_256bit);
|
||||
kmovwl(mask, rtmp);
|
||||
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_256bit);
|
||||
break;
|
||||
case 4:
|
||||
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
|
||||
@ -5040,8 +5093,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
|
||||
case 5:
|
||||
if (use64byteVector) {
|
||||
movl(rtmp, 0x1F);
|
||||
kmovwl(k2, rtmp);
|
||||
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_512bit);
|
||||
kmovwl(mask, rtmp);
|
||||
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
|
||||
} else {
|
||||
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
|
||||
movq(Address(base, disp + 32), xtmp);
|
||||
@ -5050,8 +5103,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
|
||||
case 6:
|
||||
if (use64byteVector) {
|
||||
movl(rtmp, 0x3F);
|
||||
kmovwl(k2, rtmp);
|
||||
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_512bit);
|
||||
kmovwl(mask, rtmp);
|
||||
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
|
||||
} else {
|
||||
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
|
||||
evmovdqu(T_LONG, k0, Address(base, disp + 32), xtmp, Assembler::AVX_128bit);
|
||||
@ -5060,13 +5113,13 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
|
||||
case 7:
|
||||
if (use64byteVector) {
|
||||
movl(rtmp, 0x7F);
|
||||
kmovwl(k2, rtmp);
|
||||
evmovdqu(T_LONG, k2, Address(base, disp), xtmp, Assembler::AVX_512bit);
|
||||
kmovwl(mask, rtmp);
|
||||
evmovdqu(T_LONG, mask, Address(base, disp), xtmp, Assembler::AVX_512bit);
|
||||
} else {
|
||||
evmovdqu(T_LONG, k0, Address(base, disp), xtmp, Assembler::AVX_256bit);
|
||||
movl(rtmp, 0x7);
|
||||
kmovwl(k2, rtmp);
|
||||
evmovdqu(T_LONG, k2, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
|
||||
kmovwl(mask, rtmp);
|
||||
evmovdqu(T_LONG, mask, Address(base, disp + 32), xtmp, Assembler::AVX_256bit);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@ -5076,7 +5129,8 @@ void MacroAssembler::clear_mem(Register base, int cnt, Register rtmp, XMMRegiste
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp, bool is_large) {
|
||||
void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMRegister xtmp,
|
||||
bool is_large, KRegister mask) {
|
||||
// cnt - number of qwords (8-byte words).
|
||||
// base - start address, qword aligned.
|
||||
// is_large - if optimizers know cnt is larger than InitArrayShortSize
|
||||
@ -5116,7 +5170,7 @@ void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, XMMReg
|
||||
shlptr(cnt, 3); // convert to number of bytes
|
||||
rep_stosb();
|
||||
} else if (UseXMMForObjInit) {
|
||||
xmm_clear_mem(base, cnt, tmp, xtmp);
|
||||
xmm_clear_mem(base, cnt, tmp, xtmp, mask);
|
||||
} else {
|
||||
NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
|
||||
rep_stos();
|
||||
@ -7748,7 +7802,7 @@ void MacroAssembler::crc32c_ipl_alg2_alt2(Register in_out, Register in1, Registe
|
||||
void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
|
||||
XMMRegister tmp1Reg, XMMRegister tmp2Reg,
|
||||
XMMRegister tmp3Reg, XMMRegister tmp4Reg,
|
||||
Register tmp5, Register result) {
|
||||
Register tmp5, Register result, KRegister mask1, KRegister mask2) {
|
||||
Label copy_chars_loop, return_length, return_zero, done;
|
||||
|
||||
// rsi: src
|
||||
@ -7800,14 +7854,14 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
movl(result, 0xFFFFFFFF);
|
||||
shlxl(result, result, tmp5);
|
||||
notl(result);
|
||||
kmovdl(k3, result);
|
||||
kmovdl(mask2, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(mask1, mask2);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
||||
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
|
||||
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
|
||||
|
||||
addptr(src, tmp5);
|
||||
addptr(src, tmp5);
|
||||
@ -7828,8 +7882,8 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
|
||||
bind(copy_32_loop);
|
||||
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
kortestdl(k2, k2);
|
||||
evpcmpuw(mask1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
kortestdl(mask1, mask1);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
||||
// All elements in current processed chunk are valid candidates for
|
||||
@ -7850,14 +7904,14 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
shlxl(result, result, len);
|
||||
notl(result);
|
||||
|
||||
kmovdl(k3, result);
|
||||
kmovdl(mask2, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
evmovdquw(tmp1Reg, mask2, Address(src, 0), /*merge*/ false, Assembler::AVX_512bit);
|
||||
evpcmpuw(mask1, mask2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(mask1, mask2);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
||||
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
|
||||
evpmovwb(Address(dst, 0), mask2, tmp1Reg, Assembler::AVX_512bit);
|
||||
jmp(return_length);
|
||||
|
||||
bind(below_threshold);
|
||||
@ -7957,7 +8011,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
// }
|
||||
// }
|
||||
void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
|
||||
XMMRegister tmp1, Register tmp2) {
|
||||
XMMRegister tmp1, Register tmp2, KRegister mask) {
|
||||
Label copy_chars_loop, done, below_threshold, avx3_threshold;
|
||||
// rsi: src
|
||||
// rdi: dst
|
||||
@ -8010,9 +8064,9 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
||||
movl(tmp3_aliased, -1);
|
||||
shlxl(tmp3_aliased, tmp3_aliased, tmp2);
|
||||
notl(tmp3_aliased);
|
||||
kmovdl(k2, tmp3_aliased);
|
||||
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), k2, tmp1, /*merge*/ true, Assembler::AVX_512bit);
|
||||
kmovdl(mask, tmp3_aliased);
|
||||
evpmovzxbw(tmp1, mask, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), mask, tmp1, /*merge*/ true, Assembler::AVX_512bit);
|
||||
|
||||
jmp(done);
|
||||
bind(avx3_threshold);
|
||||
|
@ -1090,6 +1090,23 @@ public:
|
||||
void kmovwl(Register dst, KRegister src) { Assembler::kmovwl(dst, src); }
|
||||
void kmovwl(KRegister dst, Address src) { Assembler::kmovwl(dst, src); }
|
||||
void kmovwl(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
void kmovwl(Address dst, KRegister src) { Assembler::kmovwl(dst, src); }
|
||||
void kmovwl(KRegister dst, KRegister src) { Assembler::kmovwl(dst, src); }
|
||||
|
||||
void kmovql(KRegister dst, KRegister src) { Assembler::kmovql(dst, src); }
|
||||
void kmovql(KRegister dst, Register src) { Assembler::kmovql(dst, src); }
|
||||
void kmovql(Register dst, KRegister src) { Assembler::kmovql(dst, src); }
|
||||
void kmovql(KRegister dst, Address src) { Assembler::kmovql(dst, src); }
|
||||
void kmovql(Address dst, KRegister src) { Assembler::kmovql(dst, src); }
|
||||
void kmovql(KRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
|
||||
// Safe move operation, lowers down to 16bit moves for targets supporting
|
||||
// AVX512F feature and 64bit moves for targets supporting AVX512BW feature.
|
||||
void kmov(Address dst, KRegister src);
|
||||
void kmov(KRegister dst, Address src);
|
||||
void kmov(KRegister dst, KRegister src);
|
||||
void kmov(Register dst, KRegister src);
|
||||
void kmov(KRegister dst, Register src);
|
||||
|
||||
// AVX Unaligned forms
|
||||
void vmovdqu(Address dst, XMMRegister src);
|
||||
@ -1683,13 +1700,13 @@ public:
|
||||
|
||||
// clear memory of size 'cnt' qwords, starting at 'base';
|
||||
// if 'is_large' is set, do not try to produce short loop
|
||||
void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large);
|
||||
void clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, bool is_large, KRegister mask=knoreg);
|
||||
|
||||
// clear memory initialization sequence for constant size;
|
||||
void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp);
|
||||
void clear_mem(Register base, int cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
|
||||
|
||||
// clear memory of size 'cnt' qwords, starting at 'base' using XMM/YMM registers
|
||||
void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp);
|
||||
void xmm_clear_mem(Register base, Register cnt, Register rtmp, XMMRegister xtmp, KRegister mask=knoreg);
|
||||
|
||||
// Fill primitive arrays
|
||||
void generate_fill(BasicType t, bool aligned,
|
||||
@ -1802,11 +1819,12 @@ public:
|
||||
// Compress char[] array to byte[].
|
||||
void char_array_compress(Register src, Register dst, Register len,
|
||||
XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
|
||||
XMMRegister tmp4, Register tmp5, Register result);
|
||||
XMMRegister tmp4, Register tmp5, Register result,
|
||||
KRegister mask1 = knoreg, KRegister mask2 = knoreg);
|
||||
|
||||
// Inflate byte[] array to char[].
|
||||
void byte_array_inflate(Register src, Register dst, Register len,
|
||||
XMMRegister tmp1, Register tmp2);
|
||||
XMMRegister tmp1, Register tmp2, KRegister mask = knoreg);
|
||||
|
||||
void fill64_masked_avx(uint shift, Register dst, int disp,
|
||||
XMMRegister xmm, KRegister mask, Register length,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -218,7 +218,9 @@ class KRegisterImpl : public AbstractRegisterImpl {
|
||||
public:
|
||||
enum {
|
||||
number_of_registers = 8,
|
||||
max_slots_per_register = 1
|
||||
// opmask registers are 64bit wide on both 32 and 64 bit targets.
|
||||
// thus two slots are reserved per register.
|
||||
max_slots_per_register = 2
|
||||
};
|
||||
|
||||
// construction
|
||||
@ -256,10 +258,14 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
||||
// There is no requirement that any ordering here matches any ordering c2 gives
|
||||
// it's optoregs.
|
||||
|
||||
// x86_32.ad defines additional dummy FILL0-FILL7 registers, in order to tally
|
||||
// REG_COUNT (computed by ADLC based on the number of reg_defs seen in .ad files)
|
||||
// with ConcreteRegisterImpl::number_of_registers additional count of 8 is being
|
||||
// added for 32 bit jvm.
|
||||
number_of_registers = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register +
|
||||
2 * FloatRegisterImpl::number_of_registers +
|
||||
2 * FloatRegisterImpl::number_of_registers + NOT_LP64(8) LP64_ONLY(0) +
|
||||
XMMRegisterImpl::max_slots_per_register * XMMRegisterImpl::number_of_registers +
|
||||
KRegisterImpl::number_of_registers + // mask registers
|
||||
KRegisterImpl::number_of_registers * KRegisterImpl::max_slots_per_register + // mask registers
|
||||
1 // eflags
|
||||
};
|
||||
|
||||
|
@ -131,6 +131,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
int ymm_bytes = num_xmm_regs * 16;
|
||||
int zmm_bytes = num_xmm_regs * 32;
|
||||
#ifdef COMPILER2
|
||||
int opmask_state_bytes = KRegisterImpl::number_of_registers * 8;
|
||||
if (save_vectors) {
|
||||
assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
|
||||
assert(MaxVectorSize <= 64, "Only up to 64 byte long vectors are supported");
|
||||
@ -139,6 +140,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
if (UseAVX > 2) {
|
||||
// Save upper half of ZMM registers as well
|
||||
vect_bytes += zmm_bytes;
|
||||
additional_frame_words += opmask_state_bytes / wordSize;
|
||||
}
|
||||
additional_frame_words += vect_bytes / wordSize;
|
||||
}
|
||||
@ -229,6 +231,11 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
|
||||
}
|
||||
__ subptr(rsp, opmask_state_bytes);
|
||||
// Save opmask registers
|
||||
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
|
||||
__ kmov(Address(rsp, n*8), as_KRegister(n));
|
||||
}
|
||||
}
|
||||
}
|
||||
__ vzeroupper();
|
||||
@ -251,6 +258,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
// rbp, location is known implicitly, no oopMap
|
||||
map->set_callee_saved(STACK_OFFSET(rsi_off), rsi->as_VMReg());
|
||||
map->set_callee_saved(STACK_OFFSET(rdi_off), rdi->as_VMReg());
|
||||
|
||||
// %%% This is really a waste but we'll keep things as they were for now for the upper component
|
||||
off = st0_off;
|
||||
delta = st1_off - off;
|
||||
@ -275,11 +283,12 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
}
|
||||
|
||||
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
|
||||
int opmask_state_bytes = 0;
|
||||
int additional_frame_bytes = 0;
|
||||
int num_xmm_regs = XMMRegisterImpl::number_of_registers;
|
||||
int ymm_bytes = num_xmm_regs * 16;
|
||||
int zmm_bytes = num_xmm_regs * 32;
|
||||
// Recover XMM & FPU state
|
||||
int additional_frame_bytes = 0;
|
||||
#ifdef COMPILER2
|
||||
if (restore_vectors) {
|
||||
assert(UseAVX > 0, "Vectors larger than 16 byte long are supported only with AVX");
|
||||
@ -289,6 +298,8 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
if (UseAVX > 2) {
|
||||
// Save upper half of ZMM registers as well
|
||||
additional_frame_bytes += zmm_bytes;
|
||||
opmask_state_bytes = KRegisterImpl::number_of_registers * 8;
|
||||
additional_frame_bytes += opmask_state_bytes;
|
||||
}
|
||||
}
|
||||
#else
|
||||
@ -322,11 +333,14 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16+off));
|
||||
}
|
||||
|
||||
if (UseAVX > 2) {
|
||||
// Restore upper half of ZMM registers.
|
||||
off = opmask_state_bytes;
|
||||
for (int n = 0; n < num_xmm_regs; n++) {
|
||||
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
|
||||
__ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32+off));
|
||||
}
|
||||
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
|
||||
__ kmov(as_KRegister(n), Address(rsp, n*8));
|
||||
}
|
||||
}
|
||||
__ addptr(rsp, additional_frame_bytes);
|
||||
|
@ -90,11 +90,13 @@ class RegisterSaver {
|
||||
// units because compiler frame slots are jints.
|
||||
#define XSAVE_AREA_BEGIN 160
|
||||
#define XSAVE_AREA_YMM_BEGIN 576
|
||||
#define XSAVE_AREA_OPMASK_BEGIN 1088
|
||||
#define XSAVE_AREA_ZMM_BEGIN 1152
|
||||
#define XSAVE_AREA_UPPERBANK 1664
|
||||
#define DEF_XMM_OFFS(regnum) xmm ## regnum ## _off = xmm_off + (regnum)*16/BytesPerInt, xmm ## regnum ## H_off
|
||||
#define DEF_YMM_OFFS(regnum) ymm ## regnum ## _off = ymm_off + (regnum)*16/BytesPerInt, ymm ## regnum ## H_off
|
||||
#define DEF_ZMM_OFFS(regnum) zmm ## regnum ## _off = zmm_off + (regnum)*32/BytesPerInt, zmm ## regnum ## H_off
|
||||
#define DEF_OPMASK_OFFS(regnum) opmask ## regnum ## _off = opmask_off + (regnum)*8/BytesPerInt, opmask ## regnum ## H_off
|
||||
#define DEF_ZMM_UPPER_OFFS(regnum) zmm ## regnum ## _off = zmm_upper_off + (regnum-16)*64/BytesPerInt, zmm ## regnum ## H_off
|
||||
enum layout {
|
||||
fpu_state_off = frame::arg_reg_save_area_bytes/BytesPerInt, // fxsave save area
|
||||
@ -106,6 +108,10 @@ class RegisterSaver {
|
||||
DEF_YMM_OFFS(0),
|
||||
DEF_YMM_OFFS(1),
|
||||
// 2..15 are implied in range usage
|
||||
opmask_off = xmm_off + (XSAVE_AREA_OPMASK_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
|
||||
DEF_OPMASK_OFFS(0),
|
||||
DEF_OPMASK_OFFS(1),
|
||||
// 2..7 are implied in range usage
|
||||
zmm_off = xmm_off + (XSAVE_AREA_ZMM_BEGIN - XSAVE_AREA_BEGIN)/BytesPerInt,
|
||||
DEF_ZMM_OFFS(0),
|
||||
DEF_ZMM_OFFS(1),
|
||||
@ -213,6 +219,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
|
||||
}
|
||||
#if COMPILER2_OR_JVMCI
|
||||
base_addr = XSAVE_AREA_OPMASK_BEGIN;
|
||||
off = 0;
|
||||
for(int n = 0; n < KRegisterImpl::number_of_registers; n++) {
|
||||
__ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
if (VM_Version::supports_evex()) {
|
||||
@ -222,6 +235,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
|
||||
}
|
||||
#if COMPILER2_OR_JVMCI
|
||||
base_addr = XSAVE_AREA_OPMASK_BEGIN;
|
||||
off = 0;
|
||||
for(int n = 0; n < KRegisterImpl::number_of_registers; n++) {
|
||||
__ kmov(Address(rsp, base_addr+(off++*8)), as_KRegister(n));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
__ vzeroupper();
|
||||
@ -381,6 +401,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ evmovdqul(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)), vector_len);
|
||||
}
|
||||
#if COMPILER2_OR_JVMCI
|
||||
base_addr = XSAVE_AREA_OPMASK_BEGIN;
|
||||
off = 0;
|
||||
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
|
||||
__ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
if (VM_Version::supports_evex()) {
|
||||
@ -390,6 +417,13 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
|
||||
for (int n = 16; n < num_xmm_regs; n++) {
|
||||
__ movsd(as_XMMRegister(n), Address(rsp, base_addr+(off++*64)));
|
||||
}
|
||||
#if COMPILER2_OR_JVMCI
|
||||
base_addr = XSAVE_AREA_OPMASK_BEGIN;
|
||||
off = 0;
|
||||
for (int n = 0; n < KRegisterImpl::number_of_registers; n++) {
|
||||
__ kmov(as_KRegister(n), Address(rsp, base_addr+(off++*8)));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -72,6 +72,7 @@ void VMRegImpl::set_regName() {
|
||||
#define X87_TYPE 2
|
||||
#define STACK_TYPE 3
|
||||
|
||||
//TODO: Case for KRegisters
|
||||
VMReg VMRegImpl::vmStorageToVMReg(int type, int index) {
|
||||
switch(type) {
|
||||
case INTEGER_TYPE: return ::as_Register(index)->as_VMReg();
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -82,7 +82,7 @@ inline XMMRegister as_XMMRegister() {
|
||||
inline KRegister as_KRegister() {
|
||||
assert(is_KRegister(), "must be");
|
||||
// Yuk
|
||||
return ::as_KRegister((value() - ConcreteRegisterImpl::max_xmm));
|
||||
return ::as_KRegister((value() - ConcreteRegisterImpl::max_xmm) >> 1);
|
||||
}
|
||||
|
||||
inline bool is_concrete() {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -43,7 +43,7 @@ inline VMReg XMMRegisterImpl::as_VMReg() {
|
||||
}
|
||||
|
||||
inline VMReg KRegisterImpl::as_VMReg() {
|
||||
return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_xmm);
|
||||
return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_xmm);
|
||||
}
|
||||
|
||||
#endif // CPU_X86_VMREG_X86_INLINE_HPP
|
||||
|
@ -628,6 +628,29 @@ reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
|
||||
reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
|
||||
#endif // _LP64
|
||||
|
||||
// AVX3 Mask Registers.
|
||||
reg_def K1 (SOC, SOC, Op_RegI, 1, k1->as_VMReg());
|
||||
reg_def K1_H (SOC, SOC, Op_RegI, 1, k1->as_VMReg()->next());
|
||||
|
||||
reg_def K2 (SOC, SOC, Op_RegI, 2, k2->as_VMReg());
|
||||
reg_def K2_H (SOC, SOC, Op_RegI, 2, k2->as_VMReg()->next());
|
||||
|
||||
reg_def K3 (SOC, SOC, Op_RegI, 3, k3->as_VMReg());
|
||||
reg_def K3_H (SOC, SOC, Op_RegI, 3, k3->as_VMReg()->next());
|
||||
|
||||
reg_def K4 (SOC, SOC, Op_RegI, 4, k4->as_VMReg());
|
||||
reg_def K4_H (SOC, SOC, Op_RegI, 4, k4->as_VMReg()->next());
|
||||
|
||||
reg_def K5 (SOC, SOC, Op_RegI, 5, k5->as_VMReg());
|
||||
reg_def K5_H (SOC, SOC, Op_RegI, 5, k5->as_VMReg()->next());
|
||||
|
||||
reg_def K6 (SOC, SOC, Op_RegI, 6, k6->as_VMReg());
|
||||
reg_def K6_H (SOC, SOC, Op_RegI, 6, k6->as_VMReg()->next());
|
||||
|
||||
reg_def K7 (SOC, SOC, Op_RegI, 7, k7->as_VMReg());
|
||||
reg_def K7_H (SOC, SOC, Op_RegI, 7, k7->as_VMReg()->next());
|
||||
|
||||
|
||||
alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
|
||||
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
|
||||
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
|
||||
@ -664,8 +687,33 @@ alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
|
||||
#endif
|
||||
);
|
||||
|
||||
alloc_class chunk2(K7, K7_H,
|
||||
K6, K6_H,
|
||||
K5, K5_H,
|
||||
K4, K4_H,
|
||||
K3, K3_H,
|
||||
K2, K2_H,
|
||||
K1, K1_H);
|
||||
|
||||
reg_class vectmask_reg(K1, K1_H,
|
||||
K2, K2_H,
|
||||
K3, K3_H,
|
||||
K4, K4_H,
|
||||
K5, K5_H,
|
||||
K6, K6_H,
|
||||
K7, K7_H);
|
||||
|
||||
reg_class vectmask_reg_K1(K1, K1_H);
|
||||
reg_class vectmask_reg_K2(K2, K2_H);
|
||||
reg_class vectmask_reg_K3(K3, K3_H);
|
||||
reg_class vectmask_reg_K4(K4, K4_H);
|
||||
reg_class vectmask_reg_K5(K5, K5_H);
|
||||
reg_class vectmask_reg_K6(K6, K6_H);
|
||||
reg_class vectmask_reg_K7(K7, K7_H);
|
||||
|
||||
// flags allocation class should be last.
|
||||
alloc_class chunk2(RFLAGS);
|
||||
alloc_class chunk3(RFLAGS);
|
||||
|
||||
|
||||
// Singleton class for condition codes
|
||||
reg_class int_flags(RFLAGS);
|
||||
@ -1368,6 +1416,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
if (!has_match_rule(opcode)) {
|
||||
return false; // no match rule present
|
||||
}
|
||||
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
|
||||
switch (opcode) {
|
||||
case Op_AbsVL:
|
||||
case Op_StoreVectorScatter:
|
||||
@ -1524,10 +1573,11 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
|
||||
if (!is_LP64 || UseAVX < 3 || !VM_Version::supports_bmi2()) {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
@ -1558,6 +1608,7 @@ const bool Matcher::match_rule_supported(int opcode) {
|
||||
// Identify extra cases that we might want to provide match rules for vector nodes and
|
||||
// other intrinsics guarded with vector length (vlen) and element type (bt).
|
||||
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
||||
const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
|
||||
if (!match_rule_supported(opcode)) {
|
||||
return false;
|
||||
}
|
||||
@ -1608,7 +1659,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType
|
||||
case Op_VectorMaskGen:
|
||||
case Op_LoadVectorMasked:
|
||||
case Op_StoreVectorMasked:
|
||||
if (!VM_Version::supports_avx512bw()) {
|
||||
if (!is_LP64 || !VM_Version::supports_avx512bw()) {
|
||||
return false;
|
||||
}
|
||||
if ((size_in_bits != 512) && !VM_Version::supports_avx512vl()) {
|
||||
@ -1831,6 +1882,14 @@ const bool Matcher::has_predicated_vectors(void) {
|
||||
return ret_value;
|
||||
}
|
||||
|
||||
const RegMask* Matcher::predicate_reg_mask(void) {
|
||||
return &_VECTMASK_REG_mask;
|
||||
}
|
||||
|
||||
const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
|
||||
return new TypeVectMask(TypeInt::BOOL, length);
|
||||
}
|
||||
|
||||
const int Matcher::float_pressure(int default_pressure_threshold) {
|
||||
int float_pressure_threshold = default_pressure_threshold;
|
||||
#ifdef _LP64
|
||||
@ -2552,14 +2611,18 @@ instruct ShouldNotReachHere() %{
|
||||
%}
|
||||
|
||||
// =================================EVEX special===============================
|
||||
|
||||
instruct setMask(rRegI dst, rRegI src) %{
|
||||
predicate(Matcher::has_predicated_vectors());
|
||||
// Existing partial implementation for post-loop multi-versioning computes
|
||||
// the mask corresponding to tail loop in K1 opmask register. This may then be
|
||||
// used for predicating instructions in loop body during last post-loop iteration.
|
||||
// TODO: Remove hard-coded K1 usage while fixing existing post-loop
|
||||
// multiversioning support.
|
||||
instruct setMask(rRegI dst, rRegI src, kReg_K1 mask) %{
|
||||
predicate(PostLoopMultiversioning && Matcher::has_predicated_vectors());
|
||||
match(Set dst (SetVectMaskI src));
|
||||
effect(TEMP dst);
|
||||
format %{ "setvectmask $dst, $src" %}
|
||||
ins_encode %{
|
||||
__ setvectmask($dst$$Register, $src$$Register);
|
||||
__ setvectmask($dst$$Register, $src$$Register, $mask$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -3552,10 +3615,10 @@ instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
|
||||
instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
|
||||
predicate(vector_length_in_bytes(n) == 64);
|
||||
match(Set dst (LoadVectorGather mem idx));
|
||||
effect(TEMP dst, TEMP tmp);
|
||||
effect(TEMP dst, TEMP tmp, TEMP ktmp);
|
||||
format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(UseAVX > 2, "sanity");
|
||||
@ -3565,10 +3628,9 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
|
||||
|
||||
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
|
||||
|
||||
KRegister ktmp = k2;
|
||||
__ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
|
||||
__ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
|
||||
__ lea($tmp$$Register, $mem$$Address);
|
||||
__ evgather(elem_bt, $dst$$XMMRegister, ktmp, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
|
||||
__ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -3577,23 +3639,21 @@ instruct evgather(vec dst, memory mem, vec idx, rRegP tmp) %{
|
||||
|
||||
// Scatter INT, LONG, FLOAT, DOUBLE
|
||||
|
||||
instruct scatter(memory mem, vec src, vec idx, rRegP tmp) %{
|
||||
instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set mem (StoreVectorScatter mem (Binary src idx)));
|
||||
effect(TEMP tmp);
|
||||
effect(TEMP tmp, TEMP ktmp);
|
||||
format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(UseAVX > 2, "sanity");
|
||||
|
||||
int vlen_enc = vector_length_encoding(this, $src);
|
||||
BasicType elem_bt = vector_element_basic_type(this, $src);
|
||||
|
||||
assert(vector_length_in_bytes(this, $src) >= 16, "sanity");
|
||||
assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
|
||||
|
||||
KRegister ktmp = k2;
|
||||
__ kmovwl(k2, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
|
||||
__ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
|
||||
__ lea($tmp$$Register, $mem$$Address);
|
||||
__ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, ktmp, $src$$XMMRegister, vlen_enc);
|
||||
__ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -5694,12 +5754,12 @@ instruct minmaxFP_reg(legVec dst, legVec a, legVec b, legVec tmp, legVec atmp, l
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{
|
||||
instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp, kReg ktmp) %{
|
||||
predicate(vector_length_in_bytes(n) == 64 &&
|
||||
is_floating_point_type(vector_element_basic_type(n))); // T_FLOAT, T_DOUBLE
|
||||
match(Set dst (MinV a b));
|
||||
match(Set dst (MaxV a b));
|
||||
effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp);
|
||||
effect(TEMP dst, USE a, USE b, TEMP atmp, TEMP btmp, TEMP ktmp);
|
||||
format %{ "vector_minmaxFP $dst,$a,$b\t!using $atmp, $btmp as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(UseAVX > 2, "required");
|
||||
@ -5708,10 +5768,9 @@ instruct evminmaxFP_reg_eavx(vec dst, vec a, vec b, vec atmp, vec btmp) %{
|
||||
int vlen_enc = vector_length_encoding(this);
|
||||
BasicType elem_bt = vector_element_basic_type(this);
|
||||
|
||||
KRegister ktmp = k1;
|
||||
__ evminmax_fp(opcode, elem_bt,
|
||||
$dst$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister,
|
||||
ktmp, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
|
||||
$ktmp$$KRegister, $atmp$$XMMRegister , $btmp$$XMMRegister, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -6776,23 +6835,22 @@ instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
|
||||
instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
|
||||
is_floating_point_type(vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
|
||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||
effect(TEMP scratch);
|
||||
effect(TEMP scratch, TEMP ktmp);
|
||||
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen_enc = Assembler::AVX_512bit;
|
||||
Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
|
||||
KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
|
||||
KRegister mask = k0; // The comparison itself is not being masked.
|
||||
if (vector_element_basic_type(this, $src1) == T_FLOAT) {
|
||||
__ evcmpps(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
|
||||
__ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
|
||||
} else {
|
||||
__ evcmppd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
|
||||
__ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
|
||||
}
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
@ -6814,41 +6872,40 @@ instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch) %{
|
||||
instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
|
||||
is_integral_type(vector_element_basic_type(n->in(1)->in(1)))); // src1
|
||||
match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
|
||||
effect(TEMP scratch);
|
||||
effect(TEMP scratch, TEMP ktmp);
|
||||
format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
|
||||
ins_encode %{
|
||||
assert(UseAVX > 2, "required");
|
||||
|
||||
int vlen_enc = Assembler::AVX_512bit;
|
||||
Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
|
||||
KRegister ktmp = k2; // Use a hardcoded temp due to no k register allocation.
|
||||
KRegister mask = k0; // The comparison itself is not being masked.
|
||||
bool merge = false;
|
||||
BasicType src1_elem_bt = vector_element_basic_type(this, $src1);
|
||||
|
||||
switch (src1_elem_bt) {
|
||||
case T_BYTE: {
|
||||
__ evpcmpb(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdqub($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
__ evpcmpb($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdqub($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
break;
|
||||
}
|
||||
case T_SHORT: {
|
||||
__ evpcmpw(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdquw($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
__ evpcmpw($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdquw($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
break;
|
||||
}
|
||||
case T_INT: {
|
||||
__ evpcmpd(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdqul($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
__ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
break;
|
||||
}
|
||||
case T_LONG: {
|
||||
__ evpcmpq(ktmp, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdquq($dst$$XMMRegister, ktmp, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
__ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
|
||||
__ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -7026,17 +7083,16 @@ instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch) %{
|
||||
instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{
|
||||
predicate(vector_length_in_bytes(n) == 64);
|
||||
match(Set dst (VectorBlend (Binary src1 src2) mask));
|
||||
format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %}
|
||||
effect(TEMP scratch);
|
||||
effect(TEMP scratch, TEMP ktmp);
|
||||
ins_encode %{
|
||||
int vlen_enc = Assembler::AVX_512bit;
|
||||
BasicType elem_bt = vector_element_basic_type(this);
|
||||
KRegister ktmp = k2;
|
||||
__ evpcmp(elem_bt, ktmp, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
|
||||
__ evpblend(elem_bt, $dst$$XMMRegister, ktmp, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
|
||||
__ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
|
||||
__ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -7179,13 +7235,29 @@ instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1,
|
||||
|
||||
instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)) >= 16 &&
|
||||
vector_length_in_bytes(n->in(1)) < 64 &&
|
||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
||||
match(Set dst (VectorTest src1 src2 ));
|
||||
effect(KILL cr);
|
||||
format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen = vector_length_in_bytes(this, $src1);
|
||||
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
|
||||
__ setb(Assembler::carrySet, $dst$$Register);
|
||||
__ movzbl($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vptest_alltrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)) == 64 &&
|
||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
|
||||
match(Set dst (VectorTest src1 src2 ));
|
||||
effect(KILL cr, TEMP ktmp);
|
||||
format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen = vector_length_in_bytes(this, $src1);
|
||||
__ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
|
||||
__ setb(Assembler::carrySet, $dst$$Register);
|
||||
__ movzbl($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
@ -7210,13 +7282,29 @@ instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, r
|
||||
|
||||
instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)) >= 16 &&
|
||||
vector_length_in_bytes(n->in(1)) < 64 &&
|
||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
||||
match(Set dst (VectorTest src1 src2 ));
|
||||
effect(KILL cr);
|
||||
format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen = vector_length_in_bytes(this, $src1);
|
||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
|
||||
__ setb(Assembler::notZero, $dst$$Register);
|
||||
__ movzbl($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vptest_anytrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)) == 64 &&
|
||||
static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
|
||||
match(Set dst (VectorTest src1 src2 ));
|
||||
effect(KILL cr, TEMP ktmp);
|
||||
format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
|
||||
ins_encode %{
|
||||
int vlen = vector_length_in_bytes(this, $src1);
|
||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
|
||||
__ setb(Assembler::notZero, $dst$$Register);
|
||||
__ movzbl($dst$$Register, $dst$$Register);
|
||||
%}
|
||||
@ -7239,12 +7327,26 @@ instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 z
|
||||
|
||||
instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)->in(1)) >= 16 &&
|
||||
vector_length_in_bytes(n->in(1)->in(1)) < 64 &&
|
||||
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
|
||||
match(Set cr (CmpI (VectorTest src1 src2) zero));
|
||||
format %{ "cmp_vector_test_any_true $src1,$src2\t!" %}
|
||||
ins_encode %{
|
||||
int vlen = vector_length_in_bytes(this, $src1);
|
||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister);
|
||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, kReg ktmp) %{
|
||||
predicate(vector_length_in_bytes(n->in(1)->in(1)) == 64 &&
|
||||
static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
|
||||
match(Set cr (CmpI (VectorTest src1 src2) zero));
|
||||
effect(TEMP ktmp);
|
||||
format %{ "cmp_vector_test_any_true $src1,$src2\t!" %}
|
||||
ins_encode %{
|
||||
int vlen = vector_length_in_bytes(this, $src1);
|
||||
__ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -7897,47 +7999,46 @@ instruct vprorate(vec dst, vec src, vec shift) %{
|
||||
|
||||
#ifdef _LP64
|
||||
// ---------------------------------- Masked Block Copy ------------------------------------
|
||||
|
||||
instruct vmasked_load64(vec dst, memory mem, rRegL mask) %{
|
||||
instruct vmasked_load64(vec dst, memory mem, kReg mask) %{
|
||||
match(Set dst (LoadVectorMasked mem mask));
|
||||
format %{ "vector_masked_load $dst, $mem, $mask \t! vector masked copy" %}
|
||||
ins_encode %{
|
||||
BasicType elmType = this->bottom_type()->is_vect()->element_basic_type();
|
||||
int vector_len = vector_length_encoding(this);
|
||||
__ kmovql(k2, $mask$$Register);
|
||||
__ evmovdqu(elmType, k2, $dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
__ evmovdqu(elmType, $mask$$KRegister, $dst$$XMMRegister, $mem$$Address, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_gen(rRegL dst, rRegL len, rRegL tempLen) %{
|
||||
instruct vmask_gen(kReg dst, rRegL len, rRegL temp) %{
|
||||
match(Set dst (VectorMaskGen len));
|
||||
effect(TEMP_DEF dst, TEMP tempLen);
|
||||
format %{ "vector_mask_gen $len \t! vector mask generator" %}
|
||||
effect(TEMP temp);
|
||||
format %{ "vector_mask_gen32 $dst, $len \t! vector mask generator" %}
|
||||
ins_encode %{
|
||||
__ genmask($dst$$Register, $len$$Register, $tempLen$$Register);
|
||||
__ genmask($dst$$KRegister, $len$$Register, $temp$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmask_gen_imm(rRegL dst, immL len) %{
|
||||
instruct vmask_gen_imm(kReg dst, immL len, rRegL temp) %{
|
||||
match(Set dst (VectorMaskGen len));
|
||||
format %{ "vector_mask_gen $len \t! vector mask generator" %}
|
||||
effect(TEMP temp);
|
||||
ins_encode %{
|
||||
__ mov64($dst$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
|
||||
__ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
|
||||
__ kmovql($dst$$KRegister, $temp$$Register);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct vmasked_store64(memory mem, vec src, rRegL mask) %{
|
||||
instruct vmasked_store64(memory mem, vec src, kReg mask) %{
|
||||
match(Set mem (StoreVectorMasked mem (Binary src mask)));
|
||||
format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
|
||||
ins_encode %{
|
||||
const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
|
||||
BasicType elmType = src_node->bottom_type()->is_vect()->element_basic_type();
|
||||
int vector_len = vector_length_encoding(src_node);
|
||||
__ kmovql(k2, $mask$$Register);
|
||||
__ evmovdqu(elmType, k2, $mem$$Address, $src$$XMMRegister, vector_len);
|
||||
__ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -260,6 +260,18 @@ source %{
|
||||
// instructions, to allow sign-masking or sign-bit flipping. They allow
|
||||
// fast versions of NegF/NegD and AbsF/AbsD.
|
||||
|
||||
void reg_mask_init() {
|
||||
if (Matcher::has_predicated_vectors()) {
|
||||
// Post-loop multi-versioning expects mask to be present in K1 register, till the time
|
||||
// its fixed, RA should not be allocting K1 register, this shall prevent any accidental
|
||||
// curruption of value held in K1 register.
|
||||
if (PostLoopMultiversioning) {
|
||||
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
|
||||
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Note: 'double' and 'long long' have 32-bits alignment on x86.
|
||||
static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
|
||||
// Use the expression (adr)&(~0xF) to provide 128-bits aligned address
|
||||
@ -731,7 +743,7 @@ const Pipeline * MachEpilogNode::pipeline() const {
|
||||
|
||||
//=============================================================================
|
||||
|
||||
enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
|
||||
enum RC { rc_bad, rc_int, rc_kreg, rc_float, rc_xmm, rc_stack };
|
||||
static enum RC rc_class( OptoReg::Name reg ) {
|
||||
|
||||
if( !OptoReg::is_valid(reg) ) return rc_bad;
|
||||
@ -1050,7 +1062,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
|
||||
if( src_first == dst_first && src_second == dst_second )
|
||||
return size; // Self copy, no move
|
||||
|
||||
if (bottom_type()->isa_vect() != NULL) {
|
||||
if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
|
||||
uint ireg = ideal_reg();
|
||||
assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
|
||||
assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
|
||||
@ -1103,7 +1115,7 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
|
||||
size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
|
||||
|
||||
// Check for integer load
|
||||
if( dst_first_rc == rc_int && src_first_rc == rc_stack )
|
||||
if( src_first_rc == rc_stack && dst_first_rc == rc_int )
|
||||
size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
|
||||
|
||||
// Check for integer reg-xmm reg copy
|
||||
@ -1192,16 +1204,16 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
|
||||
|
||||
// Check for xmm store
|
||||
if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
|
||||
return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
|
||||
return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first), src_first, src_second, size, st);
|
||||
}
|
||||
|
||||
// Check for float xmm load
|
||||
if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
|
||||
if( src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
|
||||
return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
|
||||
}
|
||||
|
||||
// Copy from float reg to xmm reg
|
||||
if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
|
||||
if( src_first_rc == rc_float && dst_first_rc == rc_xmm ) {
|
||||
// copy to the top of stack from floating point reg
|
||||
// and use LEA to preserve flags
|
||||
if( cbuf ) {
|
||||
@ -1257,6 +1269,42 @@ uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bo
|
||||
if( dst_second_rc == rc_int && src_second_rc == rc_stack )
|
||||
return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
|
||||
|
||||
// AVX-512 opmask specific spilling.
|
||||
if (src_first_rc == rc_stack && dst_first_rc == rc_kreg) {
|
||||
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
|
||||
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
|
||||
MacroAssembler _masm(cbuf);
|
||||
int offset = ra_->reg2offset(src_first);
|
||||
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (src_first_rc == rc_kreg && dst_first_rc == rc_stack) {
|
||||
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
|
||||
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
|
||||
MacroAssembler _masm(cbuf);
|
||||
int offset = ra_->reg2offset(dst_first);
|
||||
__ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (src_first_rc == rc_kreg && dst_first_rc == rc_int) {
|
||||
Unimplemented();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (src_first_rc == rc_int && dst_first_rc == rc_kreg) {
|
||||
Unimplemented();
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (src_first_rc == rc_kreg && dst_first_rc == rc_kreg) {
|
||||
assert((src_first & 1) == 0 && src_first + 1 == src_second, "invalid register pair");
|
||||
assert((dst_first & 1) == 0 && dst_first + 1 == dst_second, "invalid register pair");
|
||||
MacroAssembler _masm(cbuf);
|
||||
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
|
||||
return 0;
|
||||
}
|
||||
|
||||
Unimplemented();
|
||||
return 0; // Mute compiler
|
||||
@ -3574,6 +3622,72 @@ operand immI_65535() %{
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand kReg()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K1()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K1));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K2()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K2));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Special Registers
|
||||
operand kReg_K3()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K3));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K4()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K4));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K5()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K5));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K6()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K6));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Special Registers
|
||||
operand kReg_K7()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K7));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Register Operands
|
||||
// Integer Register
|
||||
operand rRegI() %{
|
||||
@ -11410,8 +11524,10 @@ instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
|
||||
|
||||
// =======================================================================
|
||||
// fast clearing of an array
|
||||
// Small ClearArray non-AVX512.
|
||||
instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2 || !VM_Version::supports_avx512vlbw() || !n->in(2)->bottom_type()->is_int()->is_con()));
|
||||
predicate(!((ClearArrayNode*)n)->is_large() &&
|
||||
(UseAVX <= 2 || !VM_Version::supports_avx512vlbw()));
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
|
||||
|
||||
@ -11464,13 +11580,76 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe du
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, false);
|
||||
$tmp$$XMMRegister, false, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Small ClearArray AVX512 non-constant length.
|
||||
instruct rep_stos_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() &&
|
||||
UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
|
||||
!n->in(2)->bottom_type()->is_int()->is_con());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
|
||||
|
||||
format %{ $$template
|
||||
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
|
||||
$$emit$$"CMP InitArrayShortSize,rcx\n\t"
|
||||
$$emit$$"JG LARGE\n\t"
|
||||
$$emit$$"SHL ECX, 1\n\t"
|
||||
$$emit$$"DEC ECX\n\t"
|
||||
$$emit$$"JS DONE\t# Zero length\n\t"
|
||||
$$emit$$"MOV EAX,(EDI,ECX,4)\t# LOOP\n\t"
|
||||
$$emit$$"DEC ECX\n\t"
|
||||
$$emit$$"JGE LOOP\n\t"
|
||||
$$emit$$"JMP DONE\n\t"
|
||||
$$emit$$"# LARGE:\n\t"
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
} else if (UseXMMForObjInit) {
|
||||
$$emit$$"MOV RDI,RAX\n\t"
|
||||
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
|
||||
$$emit$$"JMPQ L_zero_64_bytes\n\t"
|
||||
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
|
||||
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
|
||||
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
|
||||
$$emit$$"ADD 0x40,RAX\n\t"
|
||||
$$emit$$"# L_zero_64_bytes:\n\t"
|
||||
$$emit$$"SUB 0x8,RCX\n\t"
|
||||
$$emit$$"JGE L_loop\n\t"
|
||||
$$emit$$"ADD 0x4,RCX\n\t"
|
||||
$$emit$$"JL L_tail\n\t"
|
||||
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
|
||||
$$emit$$"ADD 0x20,RAX\n\t"
|
||||
$$emit$$"SUB 0x4,RCX\n\t"
|
||||
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
|
||||
$$emit$$"ADD 0x4,RCX\n\t"
|
||||
$$emit$$"JLE L_end\n\t"
|
||||
$$emit$$"DEC RCX\n\t"
|
||||
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
|
||||
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
|
||||
$$emit$$"ADD 0x8,RAX\n\t"
|
||||
$$emit$$"DEC RCX\n\t"
|
||||
$$emit$$"JGE L_sloop\n\t"
|
||||
$$emit$$"# L_end:\n\t"
|
||||
} else {
|
||||
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
|
||||
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
}
|
||||
$$emit$$"# DONE"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, false, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Large ClearArray non-AVX512.
|
||||
instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||
predicate(((ClearArrayNode*)n)->is_large());
|
||||
predicate(UseAVX <= 2 && ((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
|
||||
format %{ $$template
|
||||
@ -11513,26 +11692,79 @@ instruct rep_stos_large(eCXRegI cnt, eDIRegP base, regD tmp, eAXRegI zero, Unive
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, true);
|
||||
$tmp$$XMMRegister, true, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct rep_stos_im(immI cnt, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
|
||||
%{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->in(2)->bottom_type()->is_int()->is_con()));
|
||||
// Large ClearArray AVX512.
|
||||
instruct rep_stos_large_evex(eCXRegI cnt, eDIRegP base, regD tmp, kReg ktmp, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2 && ((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(TEMP tmp, TEMP zero, KILL cr);
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
|
||||
format %{ $$template
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
|
||||
$$emit$$"SHL ECX,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
} else if (UseXMMForObjInit) {
|
||||
$$emit$$"MOV RDI,RAX\t# ClearArray:\n\t"
|
||||
$$emit$$"VPXOR YMM0,YMM0,YMM0\n\t"
|
||||
$$emit$$"JMPQ L_zero_64_bytes\n\t"
|
||||
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
|
||||
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
|
||||
$$emit$$"VMOVDQU YMM0,0x20(RAX)\n\t"
|
||||
$$emit$$"ADD 0x40,RAX\n\t"
|
||||
$$emit$$"# L_zero_64_bytes:\n\t"
|
||||
$$emit$$"SUB 0x8,RCX\n\t"
|
||||
$$emit$$"JGE L_loop\n\t"
|
||||
$$emit$$"ADD 0x4,RCX\n\t"
|
||||
$$emit$$"JL L_tail\n\t"
|
||||
$$emit$$"VMOVDQU YMM0,(RAX)\n\t"
|
||||
$$emit$$"ADD 0x20,RAX\n\t"
|
||||
$$emit$$"SUB 0x4,RCX\n\t"
|
||||
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
|
||||
$$emit$$"ADD 0x4,RCX\n\t"
|
||||
$$emit$$"JLE L_end\n\t"
|
||||
$$emit$$"DEC RCX\n\t"
|
||||
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
|
||||
$$emit$$"VMOVQ XMM0,(RAX)\n\t"
|
||||
$$emit$$"ADD 0x8,RAX\n\t"
|
||||
$$emit$$"DEC RCX\n\t"
|
||||
$$emit$$"JGE L_sloop\n\t"
|
||||
$$emit$$"# L_end:\n\t"
|
||||
} else {
|
||||
$$emit$$"XOR EAX,EAX\t# ClearArray:\n\t"
|
||||
$$emit$$"SHL ECX,1\t# Convert doublewords to words\n\t"
|
||||
$$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
|
||||
}
|
||||
$$emit$$"# DONE"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, true, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// Small ClearArray AVX512 constant length.
|
||||
instruct rep_stos_im(immI cnt, kReg ktmp, eRegP base, regD tmp, rRegI zero, Universe dummy, eFlagsReg cr)
|
||||
%{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() &&
|
||||
(UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
|
||||
n->in(2)->bottom_type()->is_int()->is_con()));
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
|
||||
format %{ "clear_mem_imm $base , $cnt \n\t" %}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister);
|
||||
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, eFlagsReg cr) %{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -11540,14 +11772,29 @@ instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LL);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareL_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, eFlagsReg cr) %{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -11555,14 +11802,29 @@ instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UU);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, eFlagsReg cr) %{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -11570,14 +11832,29 @@ instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LU);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareLU_evex(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, eFlagsReg cr) %{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -11585,7 +11862,22 @@ instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2
|
||||
ins_encode %{
|
||||
__ string_compare($str2$$Register, $str1$$Register,
|
||||
$cnt2$$Register, $cnt1$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UL);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareUL_evex(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
|
||||
eAXRegI result, regD tmp1, kReg ktmp, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str2$$Register, $str1$$Register,
|
||||
$cnt2$$Register, $cnt1$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -11593,6 +11885,7 @@ instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2
|
||||
// fast string equals
|
||||
instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
|
||||
regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set result (StrEquals (Binary str1 str2) cnt));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
|
||||
|
||||
@ -11600,12 +11893,29 @@ instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
|
||||
ins_encode %{
|
||||
__ arrays_equals(false, $str1$$Register, $str2$$Register,
|
||||
$cnt$$Register, $result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
|
||||
%}
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_equals_evex(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
|
||||
regD tmp1, regD tmp2, kReg ktmp, eBXRegI tmp3, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set result (StrEquals (Binary str1 str2) cnt));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
|
||||
|
||||
format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
|
||||
ins_encode %{
|
||||
__ arrays_equals(false, $str1$$Register, $str2$$Register,
|
||||
$cnt$$Register, $result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
|
||||
%}
|
||||
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
|
||||
// fast search of substring with known size.
|
||||
instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
|
||||
eBXRegI result, regD vec1, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
|
||||
@ -11769,7 +12079,7 @@ instruct stringL_indexof_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
|
||||
instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
|
||||
%{
|
||||
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
//ins_cost(300);
|
||||
@ -11778,7 +12088,24 @@ instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct array_equalsB_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
//ins_cost(300);
|
||||
|
||||
format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -11786,7 +12113,7 @@ instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
|
||||
%{
|
||||
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
//ins_cost(300);
|
||||
@ -11795,7 +12122,24 @@ instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct array_equalsC_evex(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
regD tmp1, regD tmp2, kReg ktmp, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
//ins_cost(300);
|
||||
|
||||
format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -11803,6 +12147,7 @@ instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
|
||||
instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
|
||||
regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set result (HasNegatives ary1 len));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
|
||||
|
||||
@ -11810,14 +12155,32 @@ instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
|
||||
ins_encode %{
|
||||
__ has_negatives($ary1$$Register, $len$$Register,
|
||||
$result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct has_negatives_evex(eSIRegP ary1, eCXRegI len, eAXRegI result,
|
||||
regD tmp1, regD tmp2, kReg ktmp1, kReg ktmp2, eBXRegI tmp3, eFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set result (HasNegatives ary1 len));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
|
||||
|
||||
format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
|
||||
ins_encode %{
|
||||
__ has_negatives($ary1$$Register, $len$$Register,
|
||||
$result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
|
||||
// fast char[] to byte[] compression
|
||||
instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
|
||||
eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
|
||||
instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
|
||||
regD tmp3, regD tmp4, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
|
||||
|
||||
@ -11825,7 +12188,24 @@ instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD
|
||||
ins_encode %{
|
||||
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
|
||||
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
|
||||
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
|
||||
knoreg, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compress_evex(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2,
|
||||
regD tmp3, regD tmp4, kReg ktmp1, kReg ktmp2, eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
|
||||
|
||||
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
|
||||
ins_encode %{
|
||||
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
|
||||
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
|
||||
$ktmp1$$KRegister, $ktmp2$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -11833,13 +12213,28 @@ instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD
|
||||
// fast byte[] to char[] inflation
|
||||
instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
|
||||
regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$Register);
|
||||
$tmp1$$XMMRegister, $tmp2$$Register, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_inflate_evex(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
|
||||
regD tmp1, kReg ktmp, eCXRegI tmp2, eFlagsReg cr) %{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -12267,10 +12662,12 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
|
||||
// mask version
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
// Bounded mask operand used in following patten is needed for
|
||||
// post-loop multiversioning.
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, eFlagsReg cr, label labl) %{
|
||||
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
effect(USE labl, TEMP ktmp);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "J$cop $labl\t# Loop end\n\t"
|
||||
@ -12279,16 +12676,18 @@ instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
__ restorevectmask($ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
// Bounded mask operand used in following patten is needed for
|
||||
// post-loop multiversioning.
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, eFlagsRegU cmp, label labl) %{
|
||||
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
effect(USE labl, TEMP ktmp);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "J$cop,u $labl\t# Loop end\n\t"
|
||||
@ -12297,15 +12696,17 @@ instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
__ restorevectmask($ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
// Bounded mask operand used in following patten is needed for
|
||||
// post-loop multiversioning.
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, eFlagsRegUCF cmp, label labl) %{
|
||||
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
effect(USE labl, TEMP ktmp);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "J$cop,u $labl\t# Loop end\n\t"
|
||||
@ -12314,7 +12715,7 @@ instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label lab
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
__ restorevectmask($ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_jcc );
|
||||
%}
|
||||
|
@ -424,6 +424,16 @@ void reg_mask_init() {
|
||||
|
||||
_INT_NO_RCX_REG_mask = _INT_REG_mask;
|
||||
_INT_NO_RCX_REG_mask.Remove(OptoReg::as_OptoReg(rcx->as_VMReg()));
|
||||
|
||||
if (Matcher::has_predicated_vectors()) {
|
||||
// Post-loop multi-versioning expects mask to be present in K1 register, till the time
|
||||
// its fixed, RA should not be allocting K1 register, this shall prevent any accidental
|
||||
// curruption of value held in K1 register.
|
||||
if (PostLoopMultiversioning) {
|
||||
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()));
|
||||
const_cast<RegMask*>(&_VECTMASK_REG_mask)->Remove(OptoReg::as_OptoReg(k1->as_VMReg()->next()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool generate_vzeroupper(Compile* C) {
|
||||
@ -1014,6 +1024,7 @@ const Pipeline* MachEpilogNode::pipeline() const
|
||||
enum RC {
|
||||
rc_bad,
|
||||
rc_int,
|
||||
rc_kreg,
|
||||
rc_float,
|
||||
rc_stack
|
||||
};
|
||||
@ -1028,6 +1039,8 @@ static enum RC rc_class(OptoReg::Name reg)
|
||||
|
||||
if (r->is_Register()) return rc_int;
|
||||
|
||||
if (r->is_KRegister()) return rc_kreg;
|
||||
|
||||
assert(r->is_XMMRegister(), "must be");
|
||||
return rc_float;
|
||||
}
|
||||
@ -1141,7 +1154,7 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
|
||||
// Self copy, no move
|
||||
return 0;
|
||||
}
|
||||
if (bottom_type()->isa_vect() != NULL) {
|
||||
if (bottom_type()->isa_vect() != NULL && bottom_type()->isa_vectmask() == NULL) {
|
||||
uint ireg = ideal_reg();
|
||||
assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
|
||||
assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
|
||||
@ -1271,6 +1284,24 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
|
||||
st->print("movss %s, [rsp + #%d]\t# spill",
|
||||
Matcher::regName[dst_first],
|
||||
offset);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else if (dst_first_rc == rc_kreg) {
|
||||
// mem -> kreg
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
|
||||
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
// 64-bit
|
||||
int offset = ra_->reg2offset(src_first);
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
|
||||
#ifndef PRODUCT
|
||||
} else {
|
||||
st->print("kmovq %s, [rsp + #%d]\t# spill",
|
||||
Matcher::regName[dst_first],
|
||||
offset);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
@ -1376,6 +1407,23 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else if (dst_first_rc == rc_kreg) {
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
|
||||
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
// 64-bit
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
|
||||
#ifndef PRODUCT
|
||||
} else {
|
||||
st->print("kmovq %s, %s\t# spill",
|
||||
Matcher::regName[dst_first],
|
||||
Matcher::regName[src_first]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
Unimplemented();
|
||||
return 0;
|
||||
}
|
||||
} else if (src_first_rc == rc_float) {
|
||||
// xmm ->
|
||||
@ -1476,6 +1524,65 @@ uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else if (dst_first_rc == rc_kreg) {
|
||||
assert(false, "Illegal spilling");
|
||||
return 0;
|
||||
}
|
||||
} else if (src_first_rc == rc_kreg) {
|
||||
if (dst_first_rc == rc_stack) {
|
||||
// mem -> kreg
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
|
||||
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
// 64-bit
|
||||
int offset = ra_->reg2offset(dst_first);
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
__ kmov(Address(rsp, offset), as_KRegister(Matcher::_regEncode[src_first]));
|
||||
#ifndef PRODUCT
|
||||
} else {
|
||||
st->print("kmovq [rsp + #%d] , %s\t# spill",
|
||||
offset,
|
||||
Matcher::regName[src_first]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else if (dst_first_rc == rc_int) {
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
|
||||
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
// 64-bit
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
__ kmov(as_Register(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
|
||||
#ifndef PRODUCT
|
||||
} else {
|
||||
st->print("kmovq %s, %s\t# spill",
|
||||
Matcher::regName[dst_first],
|
||||
Matcher::regName[src_first]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
Unimplemented();
|
||||
return 0;
|
||||
} else if (dst_first_rc == rc_kreg) {
|
||||
if ((src_first & 1) == 0 && src_first + 1 == src_second &&
|
||||
(dst_first & 1) == 0 && dst_first + 1 == dst_second) {
|
||||
// 64-bit
|
||||
if (cbuf) {
|
||||
MacroAssembler _masm(cbuf);
|
||||
__ kmov(as_KRegister(Matcher::_regEncode[dst_first]), as_KRegister(Matcher::_regEncode[src_first]));
|
||||
#ifndef PRODUCT
|
||||
} else {
|
||||
st->print("kmovq %s, %s\t# spill",
|
||||
Matcher::regName[dst_first],
|
||||
Matcher::regName[src_first]);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
} else if (dst_first_rc == rc_float) {
|
||||
assert(false, "Illegal spill");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3291,6 +3398,72 @@ operand immL_65535()
|
||||
interface(CONST_INTER);
|
||||
%}
|
||||
|
||||
operand kReg()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K1()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K1));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K2()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K2));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Special Registers
|
||||
operand kReg_K3()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K3));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K4()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K4));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K5()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K5));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
operand kReg_K6()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K6));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Special Registers
|
||||
operand kReg_K7()
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(vectmask_reg_K7));
|
||||
match(RegVectMask);
|
||||
format %{%}
|
||||
interface(REG_INTER);
|
||||
%}
|
||||
|
||||
// Register Operands
|
||||
// Integer Register
|
||||
operand rRegI()
|
||||
@ -4701,7 +4874,6 @@ define
|
||||
// name must have been defined in an 'enc_class' specification
|
||||
// in the encode section of the architecture description.
|
||||
|
||||
|
||||
//----------Load/Store/Move Instructions---------------------------------------
|
||||
//----------Load Instructions--------------------------------------------------
|
||||
|
||||
@ -10768,13 +10940,13 @@ instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
|
||||
// =======================================================================
|
||||
// fast clearing of an array
|
||||
// Fast clearing of an array
|
||||
// Small ClearArray non-AVX512.
|
||||
instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
|
||||
Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX <= 2 || !VM_Version::supports_avx512vlbw() || !n->in(2)->bottom_type()->is_long()->is_con()));
|
||||
predicate(!((ClearArrayNode*)n)->is_large() &&
|
||||
(UseAVX <= 2 || !VM_Version::supports_avx512vlbw()));
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
|
||||
|
||||
@ -10825,15 +10997,78 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, false);
|
||||
$tmp$$XMMRegister, false, knoreg);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Small ClearArray AVX512 non-constant length.
|
||||
instruct rep_stos_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, rax_RegI zero,
|
||||
Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() &&
|
||||
UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
|
||||
!n->in(2)->bottom_type()->is_long()->is_con());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
|
||||
|
||||
format %{ $$template
|
||||
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
|
||||
$$emit$$"cmp InitArrayShortSize,rcx\n\t"
|
||||
$$emit$$"jg LARGE\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"js DONE\t# Zero length\n\t"
|
||||
$$emit$$"mov rax,(rdi,rcx,8)\t# LOOP\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"jge LOOP\n\t"
|
||||
$$emit$$"jmp DONE\n\t"
|
||||
$$emit$$"# LARGE:\n\t"
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--\n\t"
|
||||
} else if (UseXMMForObjInit) {
|
||||
$$emit$$"mov rdi,rax\n\t"
|
||||
$$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
|
||||
$$emit$$"jmpq L_zero_64_bytes\n\t"
|
||||
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
|
||||
$$emit$$"vmovdqu ymm0,(rax)\n\t"
|
||||
$$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
|
||||
$$emit$$"add 0x40,rax\n\t"
|
||||
$$emit$$"# L_zero_64_bytes:\n\t"
|
||||
$$emit$$"sub 0x8,rcx\n\t"
|
||||
$$emit$$"jge L_loop\n\t"
|
||||
$$emit$$"add 0x4,rcx\n\t"
|
||||
$$emit$$"jl L_tail\n\t"
|
||||
$$emit$$"vmovdqu ymm0,(rax)\n\t"
|
||||
$$emit$$"add 0x20,rax\n\t"
|
||||
$$emit$$"sub 0x4,rcx\n\t"
|
||||
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
|
||||
$$emit$$"add 0x4,rcx\n\t"
|
||||
$$emit$$"jle L_end\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
|
||||
$$emit$$"vmovq xmm0,(rax)\n\t"
|
||||
$$emit$$"add 0x8,rax\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"jge L_sloop\n\t"
|
||||
$$emit$$"# L_end:\n\t"
|
||||
} else {
|
||||
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--\n\t"
|
||||
}
|
||||
$$emit$$"# DONE"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, false, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Large ClearArray non-AVX512.
|
||||
instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
|
||||
Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((ClearArrayNode*)n)->is_large());
|
||||
predicate(UseAVX <=2 && ((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, KILL zero, KILL cr);
|
||||
|
||||
@ -10875,19 +11110,73 @@ instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, regD tmp, rax_RegI zero,
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, true);
|
||||
$tmp$$XMMRegister, true, knoreg);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, Universe dummy, rFlagsReg cr)
|
||||
// Large ClearArray AVX512.
|
||||
instruct rep_stos_large_evex(rcx_RegL cnt, rdi_RegP base, regD tmp, kReg ktmp, rax_RegI zero,
|
||||
Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() && (UseAVX > 2 && VM_Version::supports_avx512vlbw() && n->in(2)->bottom_type()->is_long()->is_con()));
|
||||
predicate(UseAVX > 2 && ((ClearArrayNode*)n)->is_large());
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(TEMP tmp, TEMP zero, KILL cr);
|
||||
effect(USE_KILL cnt, USE_KILL base, TEMP tmp, TEMP ktmp, KILL zero, KILL cr);
|
||||
|
||||
format %{ $$template
|
||||
if (UseFastStosb) {
|
||||
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
|
||||
$$emit$$"shlq rcx,3\t# Convert doublewords to bytes\n\t"
|
||||
$$emit$$"rep stosb\t# Store rax to *rdi++ while rcx--"
|
||||
} else if (UseXMMForObjInit) {
|
||||
$$emit$$"mov rdi,rax\t# ClearArray:\n\t"
|
||||
$$emit$$"vpxor ymm0,ymm0,ymm0\n\t"
|
||||
$$emit$$"jmpq L_zero_64_bytes\n\t"
|
||||
$$emit$$"# L_loop:\t# 64-byte LOOP\n\t"
|
||||
$$emit$$"vmovdqu ymm0,(rax)\n\t"
|
||||
$$emit$$"vmovdqu ymm0,0x20(rax)\n\t"
|
||||
$$emit$$"add 0x40,rax\n\t"
|
||||
$$emit$$"# L_zero_64_bytes:\n\t"
|
||||
$$emit$$"sub 0x8,rcx\n\t"
|
||||
$$emit$$"jge L_loop\n\t"
|
||||
$$emit$$"add 0x4,rcx\n\t"
|
||||
$$emit$$"jl L_tail\n\t"
|
||||
$$emit$$"vmovdqu ymm0,(rax)\n\t"
|
||||
$$emit$$"add 0x20,rax\n\t"
|
||||
$$emit$$"sub 0x4,rcx\n\t"
|
||||
$$emit$$"# L_tail:\t# Clearing tail bytes\n\t"
|
||||
$$emit$$"add 0x4,rcx\n\t"
|
||||
$$emit$$"jle L_end\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"# L_sloop:\t# 8-byte short loop\n\t"
|
||||
$$emit$$"vmovq xmm0,(rax)\n\t"
|
||||
$$emit$$"add 0x8,rax\n\t"
|
||||
$$emit$$"dec rcx\n\t"
|
||||
$$emit$$"jge L_sloop\n\t"
|
||||
$$emit$$"# L_end:\n\t"
|
||||
} else {
|
||||
$$emit$$"xorq rax, rax\t# ClearArray:\n\t"
|
||||
$$emit$$"rep stosq\t# Store rax to *rdi++ while rcx--"
|
||||
}
|
||||
%}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$Register, $zero$$Register,
|
||||
$tmp$$XMMRegister, true, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
// Small ClearArray AVX512 constant length.
|
||||
instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, kReg ktmp, Universe dummy, rFlagsReg cr)
|
||||
%{
|
||||
predicate(!((ClearArrayNode*)n)->is_large() &&
|
||||
(UseAVX > 2 && VM_Version::supports_avx512vlbw() &&
|
||||
n->in(2)->bottom_type()->is_long()->is_con()));
|
||||
match(Set dummy (ClearArray cnt base));
|
||||
effect(TEMP tmp, TEMP zero, TEMP ktmp, KILL cr);
|
||||
format %{ "clear_mem_imm $base , $cnt \n\t" %}
|
||||
ins_encode %{
|
||||
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister);
|
||||
__ clear_mem($base$$Register, $cnt$$constant, $zero$$Register, $tmp$$XMMRegister, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
@ -10895,7 +11184,7 @@ instruct rep_stos_im(immL cnt, rRegP base, regD tmp, rRegI zero, Universe dummy,
|
||||
instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -10903,7 +11192,23 @@ instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LL);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LL, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareL_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LL, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -10911,7 +11216,7 @@ instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
|
||||
instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -10919,7 +11224,23 @@ instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UU);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UU, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UU, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -10927,7 +11248,7 @@ instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI c
|
||||
instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -10935,7 +11256,23 @@ instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LU);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LU, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareLU_evex(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::LU, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -10943,7 +11280,7 @@ instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI
|
||||
instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
predicate(UseAVX <= 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
@ -10951,7 +11288,23 @@ instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI
|
||||
ins_encode %{
|
||||
__ string_compare($str2$$Register, $str1$$Register,
|
||||
$cnt2$$Register, $cnt1$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UL);
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UL, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compareUL_evex(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
|
||||
rax_RegI result, legRegD tmp1, kReg ktmp, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(TEMP tmp1, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result // KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str2$$Register, $str1$$Register,
|
||||
$cnt2$$Register, $cnt1$$Register, $result$$Register,
|
||||
$tmp1$$XMMRegister, StrIntrinsicNode::UL, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -11126,6 +11479,7 @@ instruct stringL_indexof_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
|
||||
instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set result (StrEquals (Binary str1 str2) cnt));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
|
||||
|
||||
@ -11133,7 +11487,23 @@ instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI resu
|
||||
ins_encode %{
|
||||
__ arrays_equals(false, $str1$$Register, $str2$$Register,
|
||||
$cnt$$Register, $result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_equals_evex(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, kReg ktmp, rbx_RegI tmp3, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set result (StrEquals (Binary str1 str2) cnt));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
|
||||
|
||||
format %{ "String Equals $str1,$str2,$cnt -> $result // KILL $tmp1, $tmp2, $tmp3" %}
|
||||
ins_encode %{
|
||||
__ arrays_equals(false, $str1$$Register, $str2$$Register,
|
||||
$cnt$$Register, $result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -11142,7 +11512,7 @@ instruct string_equals(rdi_RegP str1, rsi_RegP str2, rcx_RegI cnt, rax_RegI resu
|
||||
instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
|
||||
@ -11150,7 +11520,23 @@ instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct array_equalsB_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
|
||||
format %{ "Array Equals byte[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -11158,7 +11544,7 @@ instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
|
||||
instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
predicate(UseAVX <= 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
|
||||
@ -11166,14 +11552,31 @@ instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct array_equalsC_evex(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, kReg ktmp, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
|
||||
%{
|
||||
predicate(UseAVX > 2 && ((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (AryEq ary1 ary2));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
|
||||
|
||||
format %{ "Array Equals char[] $ary1,$ary2 -> $result // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
|
||||
ins_encode %{
|
||||
__ arrays_equals(true, $ary1$$Register, $ary2$$Register,
|
||||
$tmp3$$Register, $result$$Register, $tmp4$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr)
|
||||
legRegD tmp1, legRegD tmp2, rbx_RegI tmp3, rFlagsReg cr,)
|
||||
%{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set result (HasNegatives ary1 len));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
|
||||
|
||||
@ -11181,36 +11584,86 @@ instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
|
||||
ins_encode %{
|
||||
__ has_negatives($ary1$$Register, $len$$Register,
|
||||
$result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister);
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, knoreg, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct has_negatives_evex(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
|
||||
legRegD tmp1, legRegD tmp2, kReg ktmp1, kReg ktmp2, rbx_RegI tmp3, rFlagsReg cr,)
|
||||
%{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set result (HasNegatives ary1 len));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp1, TEMP ktmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
|
||||
|
||||
format %{ "has negatives byte[] $ary1,$len -> $result // KILL $tmp1, $tmp2, $tmp3" %}
|
||||
ins_encode %{
|
||||
__ has_negatives($ary1$$Register, $len$$Register,
|
||||
$result$$Register, $tmp3$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
// fast char[] to byte[] compression
|
||||
instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3, legRegD tmp4,
|
||||
rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
|
||||
instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
|
||||
legRegD tmp4, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst,
|
||||
USE_KILL len, KILL tmp5, KILL cr);
|
||||
|
||||
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
|
||||
ins_encode %{
|
||||
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
|
||||
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
|
||||
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
|
||||
knoreg, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_compress_evex(rsi_RegP src, rdi_RegP dst, rdx_RegI len, legRegD tmp1, legRegD tmp2, legRegD tmp3,
|
||||
legRegD tmp4, kReg ktmp1, kReg ktmp2, rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set result (StrCompressedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ktmp1, TEMP ktmp2, USE_KILL src, USE_KILL dst,
|
||||
USE_KILL len, KILL tmp5, KILL cr);
|
||||
|
||||
format %{ "String Compress $src,$dst -> $result // KILL RAX, RCX, RDX" %}
|
||||
ins_encode %{
|
||||
__ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
|
||||
$tmp4$$XMMRegister, $tmp5$$Register, $result$$Register,
|
||||
$ktmp1$$KRegister, $ktmp2$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
// fast byte[] to char[] inflation
|
||||
instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
|
||||
legRegD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
|
||||
predicate(UseAVX <= 2);
|
||||
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$Register);
|
||||
$tmp1$$XMMRegister, $tmp2$$Register, knoreg);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
|
||||
instruct string_inflate_evex(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
|
||||
legRegD tmp1, kReg ktmp, rcx_RegI tmp2, rFlagsReg cr) %{
|
||||
predicate(UseAVX > 2);
|
||||
match(Set dummy (StrInflatedCopy src (Binary dst len)));
|
||||
effect(TEMP tmp1, TEMP tmp2, TEMP ktmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
|
||||
|
||||
format %{ "String Inflate $src,$dst // KILL $tmp1, $tmp2" %}
|
||||
ins_encode %{
|
||||
__ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
|
||||
$tmp1$$XMMRegister, $tmp2$$Register, $ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe( pipe_slow );
|
||||
%}
|
||||
@ -12002,11 +12455,13 @@ instruct jmpLoopEndUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
|
||||
// mask version
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
|
||||
// Bounded mask operand used in following patten is needed for
|
||||
// post-loop multiversioning.
|
||||
instruct jmpLoopEnd_and_restoreMask(cmpOp cop, kReg_K1 ktmp, rFlagsReg cr, label labl)
|
||||
%{
|
||||
predicate(n->has_vector_mask_set());
|
||||
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cr);
|
||||
effect(USE labl);
|
||||
effect(USE labl, TEMP ktmp);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "j$cop $labl\t# loop end\n\t"
|
||||
@ -12015,16 +12470,18 @@ instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
__ restorevectmask($ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
// Jump Direct Conditional - Label defines a relative address from Jcc+1
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
// Bounded mask operand used in following patten is needed for
|
||||
// post-loop multiversioning.
|
||||
instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, kReg_K1 ktmp, rFlagsRegU cmp, label labl) %{
|
||||
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
effect(USE labl, TEMP ktmp);
|
||||
|
||||
ins_cost(400);
|
||||
format %{ "j$cop,u $labl\t# loop end\n\t"
|
||||
@ -12033,15 +12490,17 @@ instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
__ restorevectmask($ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
|
||||
predicate(n->has_vector_mask_set());
|
||||
// Bounded mask operand used in following patten is needed for
|
||||
// post-loop multiversioning.
|
||||
instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, kReg_K1 ktmp, rFlagsRegUCF cmp, label labl) %{
|
||||
predicate(PostLoopMultiversioning && n->has_vector_mask_set());
|
||||
match(CountedLoopEnd cop cmp);
|
||||
effect(USE labl);
|
||||
effect(USE labl, TEMP ktmp);
|
||||
|
||||
ins_cost(300);
|
||||
format %{ "j$cop,u $labl\t# loop end\n\t"
|
||||
@ -12050,7 +12509,7 @@ instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label lab
|
||||
ins_encode %{
|
||||
Label* L = $labl$$label;
|
||||
__ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
|
||||
__ restorevectmask();
|
||||
__ restorevectmask($ktmp$$KRegister);
|
||||
%}
|
||||
ins_pipe(pipe_jcc);
|
||||
%}
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -945,8 +945,8 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
|
||||
}
|
||||
}
|
||||
|
||||
if (strncmp(idealOp, "RegVMask", 8) == 0) {
|
||||
return "Type::BOTTOM";
|
||||
if (strncmp(idealOp, "RegVectMask", 8) == 0) {
|
||||
return "TypeVect::VECTMASK";
|
||||
}
|
||||
|
||||
// !!!!!
|
||||
|
@ -3964,7 +3964,7 @@ bool MatchRule::is_base_register(FormDict &globals) const {
|
||||
strcmp(opType,"RegL")==0 ||
|
||||
strcmp(opType,"RegF")==0 ||
|
||||
strcmp(opType,"RegD")==0 ||
|
||||
strcmp(opType,"RegVMask")==0 ||
|
||||
strcmp(opType,"RegVectMask")==0 ||
|
||||
strcmp(opType,"VecA")==0 ||
|
||||
strcmp(opType,"VecS")==0 ||
|
||||
strcmp(opType,"VecD")==0 ||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -2265,6 +2265,7 @@ private:
|
||||
// and return the conversion function to build them from OptoReg
|
||||
const char* reg_conversion(const char* rep_var) {
|
||||
if (strcmp(rep_var,"$Register") == 0) return "as_Register";
|
||||
if (strcmp(rep_var,"$KRegister") == 0) return "as_KRegister";
|
||||
if (strcmp(rep_var,"$FloatRegister") == 0) return "as_FloatRegister";
|
||||
#if defined(IA32) || defined(AMD64)
|
||||
if (strcmp(rep_var,"$XMMRegister") == 0) return "as_XMMRegister";
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -824,7 +824,8 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
|
||||
}
|
||||
}
|
||||
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
|
||||
assert(n_type->isa_vect() == NULL || lrg._is_vector ||
|
||||
ireg == Op_RegD || ireg == Op_RegL || ireg == Op_RegVectMask,
|
||||
"vector must be in vector registers");
|
||||
|
||||
// Check for bound register masks
|
||||
@ -917,6 +918,10 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
lrg._is_bound = 1;
|
||||
}
|
||||
break;
|
||||
case Op_RegVectMask:
|
||||
lrg.set_num_regs(RegMask::SlotsPerRegVectMask);
|
||||
lrg.set_reg_pressure(1);
|
||||
break;
|
||||
case Op_RegF:
|
||||
case Op_RegI:
|
||||
case Op_RegN:
|
||||
@ -1036,8 +1041,8 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
const RegMask &lrgmask = lrg.mask();
|
||||
uint kreg = n->in(k)->ideal_reg();
|
||||
bool is_vect = RegMask::is_vector(kreg);
|
||||
assert(n->in(k)->bottom_type()->isa_vect() == NULL ||
|
||||
is_vect || kreg == Op_RegD || kreg == Op_RegL,
|
||||
assert(n->in(k)->bottom_type()->isa_vect() == NULL || is_vect ||
|
||||
kreg == Op_RegD || kreg == Op_RegL || kreg == Op_RegVectMask,
|
||||
"vector must be in vector registers");
|
||||
if (lrgmask.is_bound(kreg))
|
||||
lrg._is_bound = 1;
|
||||
|
@ -163,7 +163,7 @@ public:
|
||||
bool is_scalable() {
|
||||
#ifdef ASSERT
|
||||
if (_is_scalable) {
|
||||
// Should only be a vector for now, but it could also be a RegVMask in future.
|
||||
// Should only be a vector for now, but it could also be a RegVectMask in future.
|
||||
assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
|
||||
}
|
||||
#endif
|
||||
|
@ -2440,7 +2440,7 @@ static bool is_vector_bitwise_op(Node* n) {
|
||||
}
|
||||
|
||||
static bool is_vector_bitwise_cone_root(Node* n) {
|
||||
if (!is_vector_bitwise_op(n)) {
|
||||
if (n->bottom_type()->isa_vectmask() || !is_vector_bitwise_op(n)) {
|
||||
return false;
|
||||
}
|
||||
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -409,7 +409,9 @@ uint PhaseChaitin::count_int_pressure(IndexSet* liveout) {
|
||||
LRG& lrg = lrgs(lidx);
|
||||
if (lrg.mask_is_nonempty_and_up() &&
|
||||
!lrg.is_float_or_vector() &&
|
||||
lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
|
||||
(lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
|
||||
(Matcher::has_predicated_vectors() &&
|
||||
lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegVectMask])))) {
|
||||
cnt += lrg.reg_pressure();
|
||||
}
|
||||
lidx = elements.next();
|
||||
@ -445,7 +447,9 @@ void PhaseChaitin::lower_pressure(Block* b, uint location, LRG& lrg, IndexSet* l
|
||||
} else {
|
||||
// Do not count the SP and flag registers
|
||||
const RegMask& r = lrg.mask();
|
||||
if (r.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
|
||||
if (r.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
|
||||
(Matcher::has_predicated_vectors() &&
|
||||
r.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
|
||||
int_pressure.lower(lrg, location);
|
||||
}
|
||||
}
|
||||
@ -500,7 +504,9 @@ void PhaseChaitin::raise_pressure(Block* b, LRG& lrg, Pressure& int_pressure, Pr
|
||||
} else {
|
||||
// Do not count the SP and flag registers
|
||||
const RegMask& rm = lrg.mask();
|
||||
if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI])) {
|
||||
if (rm.overlap(*Matcher::idealreg2regmask[Op_RegI]) ||
|
||||
(Matcher::has_predicated_vectors() &&
|
||||
rm.overlap(*Matcher::idealreg2regmask[Op_RegVectMask]))) {
|
||||
int_pressure.raise(lrg);
|
||||
}
|
||||
}
|
||||
|
@ -103,6 +103,12 @@ public:
|
||||
}
|
||||
|
||||
#if defined(IA32) || defined(AMD64)
|
||||
KRegister as_KRegister(PhaseRegAlloc *ra_, const Node *node) const {
|
||||
return ::as_KRegister(reg(ra_, node));
|
||||
}
|
||||
KRegister as_KRegister(PhaseRegAlloc *ra_, const Node *node, int idx) const {
|
||||
return ::as_KRegister(reg(ra_, node, idx));
|
||||
}
|
||||
XMMRegister as_XMMRegister(PhaseRegAlloc *ra_, const Node *node) const {
|
||||
return ::as_XMMRegister(reg(ra_, node));
|
||||
}
|
||||
|
@ -233,7 +233,7 @@ void PhaseMacroExpand::generate_partial_inlining_block(Node** ctrl, MergeMemNode
|
||||
inline_block = generate_guard(ctrl, bol_le, NULL, PROB_FAIR);
|
||||
stub_block = *ctrl;
|
||||
|
||||
Node* mask_gen = new VectorMaskGenNode(length, TypeLong::LONG, Type::get_const_basic_type(type));
|
||||
Node* mask_gen = new VectorMaskGenNode(length, TypeVect::VECTMASK, Type::get_const_basic_type(type));
|
||||
transform_later(mask_gen);
|
||||
|
||||
unsigned vec_size = lane_count * type2aelembytes(type);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -95,6 +95,7 @@ Matcher::Matcher()
|
||||
idealreg2spillmask [Op_VecY] = NULL;
|
||||
idealreg2spillmask [Op_VecZ] = NULL;
|
||||
idealreg2spillmask [Op_RegFlags] = NULL;
|
||||
idealreg2spillmask [Op_RegVectMask] = NULL;
|
||||
|
||||
idealreg2debugmask [Op_RegI] = NULL;
|
||||
idealreg2debugmask [Op_RegN] = NULL;
|
||||
@ -109,6 +110,7 @@ Matcher::Matcher()
|
||||
idealreg2debugmask [Op_VecY] = NULL;
|
||||
idealreg2debugmask [Op_VecZ] = NULL;
|
||||
idealreg2debugmask [Op_RegFlags] = NULL;
|
||||
idealreg2debugmask [Op_RegVectMask] = NULL;
|
||||
|
||||
idealreg2mhdebugmask[Op_RegI] = NULL;
|
||||
idealreg2mhdebugmask[Op_RegN] = NULL;
|
||||
@ -123,6 +125,7 @@ Matcher::Matcher()
|
||||
idealreg2mhdebugmask[Op_VecY] = NULL;
|
||||
idealreg2mhdebugmask[Op_VecZ] = NULL;
|
||||
idealreg2mhdebugmask[Op_RegFlags] = NULL;
|
||||
idealreg2mhdebugmask[Op_RegVectMask] = NULL;
|
||||
|
||||
debug_only(_mem_node = NULL;) // Ideal memory node consumed by mach node
|
||||
}
|
||||
@ -430,7 +433,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
|
||||
return rms;
|
||||
}
|
||||
|
||||
#define NOF_STACK_MASKS (3*12)
|
||||
#define NOF_STACK_MASKS (3*13)
|
||||
|
||||
// Create the initial stack mask used by values spilling to the stack.
|
||||
// Disallow any debug info in outgoing argument areas by setting the
|
||||
@ -487,6 +490,10 @@ void Matcher::init_first_stack_mask() {
|
||||
idealreg2mhdebugmask[Op_VecY] = &rms[34];
|
||||
idealreg2mhdebugmask[Op_VecZ] = &rms[35];
|
||||
|
||||
idealreg2spillmask [Op_RegVectMask] = &rms[36];
|
||||
idealreg2debugmask [Op_RegVectMask] = &rms[37];
|
||||
idealreg2mhdebugmask[Op_RegVectMask] = &rms[38];
|
||||
|
||||
OptoReg::Name i;
|
||||
|
||||
// At first, start with the empty mask
|
||||
@ -531,6 +538,11 @@ void Matcher::init_first_stack_mask() {
|
||||
*idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
|
||||
idealreg2spillmask[Op_RegD]->OR(aligned_stack_mask);
|
||||
|
||||
if (Matcher::has_predicated_vectors()) {
|
||||
*idealreg2spillmask[Op_RegVectMask] = *idealreg2regmask[Op_RegVectMask];
|
||||
idealreg2spillmask[Op_RegVectMask]->OR(aligned_stack_mask);
|
||||
}
|
||||
|
||||
if (Matcher::vector_size_supported(T_BYTE,4)) {
|
||||
*idealreg2spillmask[Op_VecS] = *idealreg2regmask[Op_VecS];
|
||||
idealreg2spillmask[Op_VecS]->OR(C->FIRST_STACK_mask());
|
||||
@ -649,6 +661,7 @@ void Matcher::init_first_stack_mask() {
|
||||
*idealreg2debugmask [Op_RegF] = *idealreg2spillmask[Op_RegF];
|
||||
*idealreg2debugmask [Op_RegD] = *idealreg2spillmask[Op_RegD];
|
||||
*idealreg2debugmask [Op_RegP] = *idealreg2spillmask[Op_RegP];
|
||||
*idealreg2debugmask [Op_RegVectMask] = *idealreg2spillmask[Op_RegVectMask];
|
||||
|
||||
*idealreg2debugmask [Op_VecA] = *idealreg2spillmask[Op_VecA];
|
||||
*idealreg2debugmask [Op_VecS] = *idealreg2spillmask[Op_VecS];
|
||||
@ -663,6 +676,7 @@ void Matcher::init_first_stack_mask() {
|
||||
*idealreg2mhdebugmask[Op_RegF] = *idealreg2spillmask[Op_RegF];
|
||||
*idealreg2mhdebugmask[Op_RegD] = *idealreg2spillmask[Op_RegD];
|
||||
*idealreg2mhdebugmask[Op_RegP] = *idealreg2spillmask[Op_RegP];
|
||||
*idealreg2mhdebugmask[Op_RegVectMask] = *idealreg2spillmask[Op_RegVectMask];
|
||||
|
||||
*idealreg2mhdebugmask[Op_VecA] = *idealreg2spillmask[Op_VecA];
|
||||
*idealreg2mhdebugmask[Op_VecS] = *idealreg2spillmask[Op_VecS];
|
||||
@ -683,6 +697,7 @@ void Matcher::init_first_stack_mask() {
|
||||
idealreg2debugmask[Op_RegF]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_RegD]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_RegP]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_RegVectMask]->SUBTRACT(*caller_save_mask);
|
||||
|
||||
idealreg2debugmask[Op_VecA]->SUBTRACT(*caller_save_mask);
|
||||
idealreg2debugmask[Op_VecS]->SUBTRACT(*caller_save_mask);
|
||||
@ -697,6 +712,7 @@ void Matcher::init_first_stack_mask() {
|
||||
idealreg2mhdebugmask[Op_RegF]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegD]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegP]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_RegVectMask]->SUBTRACT(*mh_caller_save_mask);
|
||||
|
||||
idealreg2mhdebugmask[Op_VecA]->SUBTRACT(*mh_caller_save_mask);
|
||||
idealreg2mhdebugmask[Op_VecS]->SUBTRACT(*mh_caller_save_mask);
|
||||
@ -965,6 +981,7 @@ void Matcher::init_spill_mask( Node *ret ) {
|
||||
idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret);
|
||||
idealreg2regmask[Op_VecY] = regmask_for_ideal_register(Op_VecY, ret);
|
||||
idealreg2regmask[Op_VecZ] = regmask_for_ideal_register(Op_VecZ, ret);
|
||||
idealreg2regmask[Op_RegVectMask] = regmask_for_ideal_register(Op_RegVectMask, ret);
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
@ -2559,6 +2576,7 @@ const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
|
||||
case Op_VecX: // fall-through
|
||||
case Op_VecY: // fall-through
|
||||
case Op_VecZ: spill = new LoadVectorNode(NULL, mem, fp, atp, t->is_vect()); break;
|
||||
case Op_RegVectMask: return Matcher::predicate_reg_mask();
|
||||
|
||||
default: ShouldNotReachHere();
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -319,6 +319,8 @@ public:
|
||||
|
||||
// Some microarchitectures have mask registers used on vectors
|
||||
static const bool has_predicated_vectors(void);
|
||||
static const RegMask* predicate_reg_mask(void);
|
||||
static const TypeVect* predicate_reg_type(const Type* elemTy, int length);
|
||||
|
||||
// Some uarchs have different sized float register resources
|
||||
static const int float_pressure(int default_pressure_threshold);
|
||||
|
@ -693,6 +693,8 @@ public:
|
||||
DEFINE_CLASS_ID(EncodeNarrowPtr, Type, 6)
|
||||
DEFINE_CLASS_ID(EncodeP, EncodeNarrowPtr, 0)
|
||||
DEFINE_CLASS_ID(EncodePKlass, EncodeNarrowPtr, 1)
|
||||
DEFINE_CLASS_ID(Vector, Type, 7)
|
||||
DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0)
|
||||
|
||||
DEFINE_CLASS_ID(Proj, Node, 3)
|
||||
DEFINE_CLASS_ID(CatchProj, Proj, 0)
|
||||
@ -737,8 +739,6 @@ public:
|
||||
DEFINE_CLASS_ID(BoxLock, Node, 10)
|
||||
DEFINE_CLASS_ID(Add, Node, 11)
|
||||
DEFINE_CLASS_ID(Mul, Node, 12)
|
||||
DEFINE_CLASS_ID(Vector, Node, 13)
|
||||
DEFINE_CLASS_ID(VectorMaskCmp, Vector, 0)
|
||||
DEFINE_CLASS_ID(ClearArray, Node, 14)
|
||||
DEFINE_CLASS_ID(Halt, Node, 15)
|
||||
DEFINE_CLASS_ID(Opaque1, Node, 16)
|
||||
|
@ -44,7 +44,7 @@ const char *NodeClassNames[] = {
|
||||
"VecX",
|
||||
"VecY",
|
||||
"VecZ",
|
||||
"RegVMask",
|
||||
"RegVectMask",
|
||||
"RegFlags",
|
||||
"_last_machine_leaf",
|
||||
#include "classes.hpp"
|
||||
|
@ -43,7 +43,7 @@ enum Opcodes {
|
||||
macro(VecX) // Machine vectorx register
|
||||
macro(VecY) // Machine vectory register
|
||||
macro(VecZ) // Machine vectorz register
|
||||
macro(RegVMask) // Vector mask/predicate register
|
||||
macro(RegVectMask) // Vector mask/predicate register
|
||||
macro(RegFlags) // Machine flags register
|
||||
_last_machine_leaf, // Split between regular opcodes and machine
|
||||
#include "classes.hpp"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2006, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -36,7 +36,7 @@
|
||||
// non-SSA names. A Register is represented as a number. Non-regular values
|
||||
// (e.g., Control, Memory, I/O) use the Special register. The actual machine
|
||||
// registers (as described in the ADL file for a machine) start at zero.
|
||||
// Stack-slots (spill locations) start at the nest Chunk past the last machine
|
||||
// Stack-slots (spill locations) start at the next Chunk past the last machine
|
||||
// register.
|
||||
//
|
||||
// Note that stack spill-slots are treated as a very large register set.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -67,6 +67,8 @@ int RegMask::num_registers(uint ireg) {
|
||||
return SlotsPerVecX;
|
||||
case Op_VecD:
|
||||
return SlotsPerVecD;
|
||||
case Op_RegVectMask:
|
||||
return SlotsPerRegVectMask;
|
||||
case Op_RegD:
|
||||
case Op_RegL:
|
||||
#ifdef _LP64
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -105,6 +105,7 @@ class RegMask {
|
||||
SlotsPerVecX = 4,
|
||||
SlotsPerVecY = 8,
|
||||
SlotsPerVecZ = 16,
|
||||
SlotsPerRegVectMask = X86_ONLY(2) NOT_X86(1)
|
||||
};
|
||||
|
||||
// A constructor only used by the ADLC output. All mask fields are filled
|
||||
|
@ -63,6 +63,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
|
||||
{ Bad, T_ARRAY, "array:", false, Node::NotAMachineReg, relocInfo::none }, // Array
|
||||
|
||||
#if defined(PPC64)
|
||||
{ Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask.
|
||||
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
|
||||
{ Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS
|
||||
{ Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD
|
||||
@ -70,6 +71,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
|
||||
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
|
||||
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
|
||||
#elif defined(S390)
|
||||
{ Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask.
|
||||
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
|
||||
{ Bad, T_ILLEGAL, "vectors:", false, 0, relocInfo::none }, // VectorS
|
||||
{ Bad, T_ILLEGAL, "vectord:", false, Op_RegL, relocInfo::none }, // VectorD
|
||||
@ -77,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
|
||||
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
|
||||
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
|
||||
#else // all other
|
||||
{ Bad, T_ILLEGAL, "vectormask:", false, Op_RegVectMask, relocInfo::none }, // VectorMask.
|
||||
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
|
||||
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
|
||||
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
|
||||
@ -658,6 +661,9 @@ void Type::Initialize_shared(Compile* current) {
|
||||
// get_zero_type() should not happen for T_CONFLICT
|
||||
_zero_type[T_CONFLICT]= NULL;
|
||||
|
||||
TypeVect::VECTMASK = (TypeVect*)(new TypeVectMask(TypeInt::BOOL, MaxVectorSize))->hashcons();
|
||||
mreg2type[Op_RegVectMask] = TypeVect::VECTMASK;
|
||||
|
||||
if (Matcher::supports_scalable_vector()) {
|
||||
TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
|
||||
}
|
||||
@ -2376,6 +2382,7 @@ const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
|
||||
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
|
||||
const TypeVect *TypeVect::VECTY = NULL; // 256-bit vectors
|
||||
const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
|
||||
const TypeVect *TypeVect::VECTMASK = NULL; // predicate/mask vector
|
||||
|
||||
//------------------------------make-------------------------------------------
|
||||
const TypeVect* TypeVect::make(const Type *elem, uint length) {
|
||||
@ -2403,6 +2410,15 @@ const TypeVect* TypeVect::make(const Type *elem, uint length) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const TypeVect *TypeVect::makemask(const Type* elem, uint length) {
|
||||
if (Matcher::has_predicated_vectors()) {
|
||||
const TypeVect* mtype = Matcher::predicate_reg_type(elem, length);
|
||||
return (TypeVect*)(const_cast<TypeVect*>(mtype))->hashcons();
|
||||
} else {
|
||||
return make(elem, length);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------meet-------------------------------------------
|
||||
// Compute the MEET of two types. It returns a new Type object.
|
||||
const Type *TypeVect::xmeet( const Type *t ) const {
|
||||
@ -2417,6 +2433,13 @@ const Type *TypeVect::xmeet( const Type *t ) const {
|
||||
|
||||
default: // All else is a mistake
|
||||
typerr(t);
|
||||
case VectorMask: {
|
||||
const TypeVectMask* v = t->is_vectmask();
|
||||
assert( base() == v->base(), "");
|
||||
assert(length() == v->length(), "");
|
||||
assert(element_basic_type() == v->element_basic_type(), "");
|
||||
return TypeVect::makemask(_elem->xmeet(v->_elem), _length);
|
||||
}
|
||||
case VectorA:
|
||||
case VectorS:
|
||||
case VectorD:
|
||||
@ -2484,6 +2507,8 @@ void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
|
||||
st->print("vectory["); break;
|
||||
case VectorZ:
|
||||
st->print("vectorz["); break;
|
||||
case VectorMask:
|
||||
st->print("vectormask["); break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
@ -2493,6 +2518,14 @@ void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
|
||||
}
|
||||
#endif
|
||||
|
||||
bool TypeVectMask::eq(const Type *t) const {
|
||||
const TypeVectMask *v = t->is_vectmask();
|
||||
return (element_type() == v->element_type()) && (length() == v->length());
|
||||
}
|
||||
|
||||
const Type *TypeVectMask::xdual() const {
|
||||
return new TypeVectMask(element_type()->dual(), length());
|
||||
}
|
||||
|
||||
//=============================================================================
|
||||
// Convenience common pre-built types.
|
||||
|
@ -60,6 +60,7 @@ class TypeVectD;
|
||||
class TypeVectX;
|
||||
class TypeVectY;
|
||||
class TypeVectZ;
|
||||
class TypeVectMask;
|
||||
class TypePtr;
|
||||
class TypeRawPtr;
|
||||
class TypeOopPtr;
|
||||
@ -89,6 +90,8 @@ public:
|
||||
|
||||
Tuple, // Method signature or object layout
|
||||
Array, // Array types
|
||||
|
||||
VectorMask, // Vector predicate/mask type
|
||||
VectorA, // (Scalable) Vector types for vector length agnostic
|
||||
VectorS, // 32bit Vector types
|
||||
VectorD, // 64bit Vector types
|
||||
@ -299,6 +302,8 @@ public:
|
||||
const TypeAry *isa_ary() const; // Returns NULL of not ary
|
||||
const TypeVect *is_vect() const; // Vector
|
||||
const TypeVect *isa_vect() const; // Returns NULL if not a Vector
|
||||
const TypeVectMask *is_vectmask() const; // Predicate/Mask Vector
|
||||
const TypeVectMask *isa_vectmask() const; // Returns NULL if not a Vector Predicate/Mask
|
||||
const TypePtr *is_ptr() const; // Asserts it is a ptr type
|
||||
const TypePtr *isa_ptr() const; // Returns NULL if not ptr type
|
||||
const TypeRawPtr *isa_rawptr() const; // NOT Java oop
|
||||
@ -800,6 +805,13 @@ public:
|
||||
// Used directly by Replicate nodes to construct singleton vector.
|
||||
static const TypeVect *make(const Type* elem, uint length);
|
||||
|
||||
static const TypeVect *makemask(const BasicType elem_bt, uint length) {
|
||||
// Use bottom primitive type.
|
||||
return makemask(get_const_basic_type(elem_bt), length);
|
||||
}
|
||||
static const TypeVect *makemask(const Type* elem, uint length);
|
||||
|
||||
|
||||
virtual const Type *xmeet( const Type *t) const;
|
||||
virtual const Type *xdual() const; // Compute dual right now.
|
||||
|
||||
@ -809,6 +821,7 @@ public:
|
||||
static const TypeVect *VECTX;
|
||||
static const TypeVect *VECTY;
|
||||
static const TypeVect *VECTZ;
|
||||
static const TypeVect *VECTMASK;
|
||||
|
||||
#ifndef PRODUCT
|
||||
virtual void dump2(Dict &d, uint, outputStream *st) const; // Specialized per-Type dumping
|
||||
@ -845,6 +858,14 @@ class TypeVectZ : public TypeVect {
|
||||
TypeVectZ(const Type* elem, uint length) : TypeVect(VectorZ, elem, length) {}
|
||||
};
|
||||
|
||||
class TypeVectMask : public TypeVect {
|
||||
public:
|
||||
friend class TypeVect;
|
||||
TypeVectMask(const Type* elem, uint length) : TypeVect(VectorMask, elem, length) {}
|
||||
virtual bool eq(const Type *t) const;
|
||||
virtual const Type *xdual() const;
|
||||
};
|
||||
|
||||
//------------------------------TypePtr----------------------------------------
|
||||
// Class of machine Pointer Types: raw data, instances or arrays.
|
||||
// If the _base enum is AnyPtr, then this refers to all of the above.
|
||||
@ -1682,13 +1703,22 @@ inline const TypeAry *Type::isa_ary() const {
|
||||
return ((_base == Array) ? (TypeAry*)this : NULL);
|
||||
}
|
||||
|
||||
inline const TypeVectMask *Type::is_vectmask() const {
|
||||
assert( _base == VectorMask, "Not a Vector Mask" );
|
||||
return (TypeVectMask*)this;
|
||||
}
|
||||
|
||||
inline const TypeVectMask *Type::isa_vectmask() const {
|
||||
return (_base == VectorMask) ? (TypeVectMask*)this : NULL;
|
||||
}
|
||||
|
||||
inline const TypeVect *Type::is_vect() const {
|
||||
assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
|
||||
assert( _base >= VectorMask && _base <= VectorZ, "Not a Vector" );
|
||||
return (TypeVect*)this;
|
||||
}
|
||||
|
||||
inline const TypeVect *Type::isa_vect() const {
|
||||
return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
||||
return (_base >= VectorMask && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
||||
}
|
||||
|
||||
inline const TypePtr *Type::is_ptr() const {
|
||||
|
@ -804,7 +804,7 @@ class StoreVectorMaskedNode : public StoreVectorNode {
|
||||
public:
|
||||
StoreVectorMaskedNode(Node* c, Node* mem, Node* dst, Node* src, const TypePtr* at, Node* mask)
|
||||
: StoreVectorNode(c, mem, dst, at, src) {
|
||||
assert(mask->bottom_type()->is_long(), "sanity");
|
||||
assert(mask->bottom_type()->is_vectmask(), "sanity");
|
||||
init_class_id(Class_StoreVector);
|
||||
set_mismatched_access();
|
||||
add_req(mask);
|
||||
@ -822,7 +822,7 @@ class LoadVectorMaskedNode : public LoadVectorNode {
|
||||
public:
|
||||
LoadVectorMaskedNode(Node* c, Node* mem, Node* src, const TypePtr* at, const TypeVect* vt, Node* mask)
|
||||
: LoadVectorNode(c, mem, src, at, vt) {
|
||||
assert(mask->bottom_type()->is_long(), "sanity");
|
||||
assert(mask->bottom_type()->is_vectmask(), "sanity");
|
||||
init_class_id(Class_LoadVector);
|
||||
set_mismatched_access();
|
||||
add_req(mask);
|
||||
@ -845,6 +845,9 @@ class VectorMaskGenNode : public TypeNode {
|
||||
virtual int Opcode() const;
|
||||
const Type* get_elem_type() { return _elemType;}
|
||||
virtual uint size_of() const { return sizeof(VectorMaskGenNode); }
|
||||
virtual uint ideal_reg() const {
|
||||
return Op_RegVectMask;
|
||||
}
|
||||
|
||||
private:
|
||||
const Type* _elemType;
|
||||
|
Loading…
x
Reference in New Issue
Block a user