8211251: Default mask register for avx512 instructions
Encode AVX 512 instructions as unmasked instruction where mask register is not specified. Reviewed-by: kvn
This commit is contained in:
parent
5985805474
commit
b0ea3a49d2
File diff suppressed because it is too large
Load Diff
@ -871,11 +871,6 @@ private:
|
||||
void clear_managed(void) { _is_managed = false; }
|
||||
bool is_managed(void) { return _is_managed; }
|
||||
|
||||
// Following functions are for stub code use only
|
||||
void set_vector_masking(void) { _vector_masking = true; }
|
||||
void clear_vector_masking(void) { _vector_masking = false; }
|
||||
bool is_vector_masking(void) { return _vector_masking; }
|
||||
|
||||
void lea(Register dst, Address src);
|
||||
|
||||
void mov(Register dst, Register src);
|
||||
@ -2210,7 +2205,7 @@ public:
|
||||
int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
|
||||
bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
|
||||
bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
|
||||
bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else k1 is used under the same condition
|
||||
bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
|
||||
bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
|
||||
:
|
||||
_avx_vector_len(vector_len),
|
||||
@ -2225,7 +2220,7 @@ public:
|
||||
_evex_encoding(0),
|
||||
_is_clear_context(true),
|
||||
_is_extended_context(false),
|
||||
_embedded_opmask_register_specifier(1), // hard code k1, it will be initialized for now
|
||||
_embedded_opmask_register_specifier(0), // hard code k0
|
||||
_current_assembler(NULL) {
|
||||
if (UseAVX < 3) _legacy_mode = true;
|
||||
}
|
||||
|
@ -3267,6 +3267,7 @@ void MacroAssembler::movq(XMMRegister dst, AddressLiteral src) {
|
||||
}
|
||||
|
||||
void MacroAssembler::setvectmask(Register dst, Register src) {
|
||||
guarantee(PostLoopMultiversioning == true, "must be");
|
||||
Assembler::movl(dst, 1);
|
||||
Assembler::shlxl(dst, dst, src);
|
||||
Assembler::decl(dst);
|
||||
@ -3275,6 +3276,7 @@ void MacroAssembler::setvectmask(Register dst, Register src) {
|
||||
}
|
||||
|
||||
void MacroAssembler::restorevectmask() {
|
||||
guarantee(PostLoopMultiversioning == true, "must be");
|
||||
Assembler::knotwl(k1, k0);
|
||||
}
|
||||
|
||||
@ -5026,7 +5028,7 @@ void MacroAssembler::restore_cpu_control_state_after_jni() {
|
||||
// Clear upper bits of YMM registers to avoid SSE <-> AVX transition penalty.
|
||||
vzeroupper();
|
||||
// Reset k1 to 0xffff.
|
||||
if (VM_Version::supports_evex()) {
|
||||
if (PostLoopMultiversioning && VM_Version::supports_evex()) {
|
||||
push(rcx);
|
||||
movl(rcx, 0xffff);
|
||||
kmovwl(k1, rcx);
|
||||
@ -6681,8 +6683,6 @@ void MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
VM_Version::supports_avx512vlbw() &&
|
||||
VM_Version::supports_bmi2()) {
|
||||
|
||||
set_vector_masking(); // opening of the stub context for programming mask registers
|
||||
|
||||
Label test_64_loop, test_tail;
|
||||
Register tmp3_aliased = len;
|
||||
|
||||
@ -6711,15 +6711,12 @@ void MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
testl(tmp1, -1);
|
||||
jcc(Assembler::zero, FALSE_LABEL);
|
||||
|
||||
// Save k1
|
||||
kmovql(k3, k1);
|
||||
|
||||
// ~(~0 << len) applied up to two times (for 32-bit scenario)
|
||||
#ifdef _LP64
|
||||
mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
|
||||
shlxq(tmp3_aliased, tmp3_aliased, tmp1);
|
||||
notq(tmp3_aliased);
|
||||
kmovql(k1, tmp3_aliased);
|
||||
kmovql(k3, tmp3_aliased);
|
||||
#else
|
||||
Label k_init;
|
||||
jmp(k_init);
|
||||
@ -6728,7 +6725,7 @@ void MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
// data required to compose 64 1's to the instruction stream
|
||||
// We emit 64 byte wide series of elements from 0..63 which later on would
|
||||
// be used as a compare targets with tail count contained in tmp1 register.
|
||||
// Result would be a k1 register having tmp1 consecutive number or 1
|
||||
// Result would be a k register having tmp1 consecutive number or 1
|
||||
// counting from least significant bit.
|
||||
address tmp = pc();
|
||||
emit_int64(0x0706050403020100);
|
||||
@ -6744,18 +6741,14 @@ void MacroAssembler::has_negatives(Register ary1, Register len,
|
||||
lea(len, InternalAddress(tmp));
|
||||
// create mask to test for negative byte inside a vector
|
||||
evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
|
||||
evpcmpgtb(k1, vec1, Address(len, 0), Assembler::AVX_512bit);
|
||||
evpcmpgtb(k3, vec1, Address(len, 0), Assembler::AVX_512bit);
|
||||
|
||||
#endif
|
||||
evpcmpgtb(k2, k1, vec2, Address(ary1, 0), Assembler::AVX_512bit);
|
||||
ktestq(k2, k1);
|
||||
// Restore k1
|
||||
kmovql(k1, k3);
|
||||
evpcmpgtb(k2, k3, vec2, Address(ary1, 0), Assembler::AVX_512bit);
|
||||
ktestq(k2, k3);
|
||||
jcc(Assembler::notZero, TRUE_LABEL);
|
||||
|
||||
jmp(FALSE_LABEL);
|
||||
|
||||
clear_vector_masking(); // closing of the stub context for programming mask registers
|
||||
} else {
|
||||
movl(result, len); // copy
|
||||
|
||||
@ -7197,10 +7190,6 @@ void MacroAssembler::generate_fill(BasicType t, bool aligned,
|
||||
{
|
||||
assert( UseSSE >= 2, "supported cpu only" );
|
||||
Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
|
||||
if (UseAVX > 2) {
|
||||
movl(rtmp, 0xffff);
|
||||
kmovwl(k1, rtmp);
|
||||
}
|
||||
movdl(xtmp, value);
|
||||
if (UseAVX > 2 && UseUnalignedLoadStores) {
|
||||
// Fill 64-byte chunks
|
||||
@ -7945,7 +7934,6 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
||||
VM_Version::supports_avx512vlbw()) {
|
||||
Label VECTOR64_LOOP, VECTOR64_NOT_EQUAL, VECTOR32_TAIL;
|
||||
|
||||
set_vector_masking(); // opening of the stub context for programming mask registers
|
||||
cmpq(length, 64);
|
||||
jcc(Assembler::less, VECTOR32_TAIL);
|
||||
movq(tmp1, length);
|
||||
@ -7968,19 +7956,15 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
||||
|
||||
//bind(VECTOR64_TAIL);
|
||||
// AVX512 code to compare upto 63 byte vectors.
|
||||
// Save k1
|
||||
kmovql(k3, k1);
|
||||
mov64(tmp2, 0xFFFFFFFFFFFFFFFF);
|
||||
shlxq(tmp2, tmp2, tmp1);
|
||||
notq(tmp2);
|
||||
kmovql(k1, tmp2);
|
||||
kmovql(k3, tmp2);
|
||||
|
||||
evmovdqub(rymm0, k1, Address(obja, result), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, k1, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
evmovdqub(rymm0, k3, Address(obja, result), Assembler::AVX_512bit);
|
||||
evpcmpeqb(k7, k3, rymm0, Address(objb, result), Assembler::AVX_512bit);
|
||||
|
||||
ktestql(k7, k1);
|
||||
// Restore k1
|
||||
kmovql(k1, k3);
|
||||
ktestql(k7, k3);
|
||||
jcc(Assembler::below, SAME_TILL_END); // not mismatch
|
||||
|
||||
bind(VECTOR64_NOT_EQUAL);
|
||||
@ -7991,7 +7975,6 @@ void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register
|
||||
shrq(result);
|
||||
jmp(DONE);
|
||||
bind(VECTOR32_TAIL);
|
||||
clear_vector_masking(); // closing of the stub context for programming mask registers
|
||||
}
|
||||
|
||||
cmpq(length, 8);
|
||||
@ -8752,11 +8735,6 @@ void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len, Regi
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
movl(tmp, 0xffff);
|
||||
kmovwl(k1, tmp);
|
||||
}
|
||||
|
||||
lea(table, ExternalAddress(StubRoutines::crc_table_addr()));
|
||||
notl(crc); // ~crc
|
||||
cmpl(len, 16);
|
||||
@ -9418,9 +9396,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
VM_Version::supports_avx512vlbw() &&
|
||||
VM_Version::supports_bmi2()) {
|
||||
|
||||
set_vector_masking(); // opening of the stub context for programming mask registers
|
||||
|
||||
Label copy_32_loop, copy_loop_tail, restore_k1_return_zero, below_threshold;
|
||||
Label copy_32_loop, copy_loop_tail, below_threshold;
|
||||
|
||||
// alignment
|
||||
Label post_alignment;
|
||||
@ -9434,9 +9410,6 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
movl(result, 0x00FF);
|
||||
evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
|
||||
|
||||
// Save k1
|
||||
kmovql(k3, k1);
|
||||
|
||||
testl(len, -64);
|
||||
jcc(Assembler::zero, post_alignment);
|
||||
|
||||
@ -9453,14 +9426,14 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
movl(result, 0xFFFFFFFF);
|
||||
shlxl(result, result, tmp5);
|
||||
notl(result);
|
||||
kmovdl(k1, result);
|
||||
kmovdl(k3, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k1);
|
||||
jcc(Assembler::carryClear, restore_k1_return_zero);
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
||||
evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
|
||||
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
|
||||
|
||||
addptr(src, tmp5);
|
||||
addptr(src, tmp5);
|
||||
@ -9483,7 +9456,7 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
kortestdl(k2, k2);
|
||||
jcc(Assembler::carryClear, restore_k1_return_zero);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
||||
// All elements in current processed chunk are valid candidates for
|
||||
// compression. Write a truncated byte elements to the memory.
|
||||
@ -9494,8 +9467,6 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
bind(copy_loop_tail);
|
||||
// bail out when there is nothing to be done
|
||||
testl(tmp5, 0xFFFFFFFF);
|
||||
// Restore k1
|
||||
kmovql(k1, k3);
|
||||
jcc(Assembler::zero, return_length);
|
||||
|
||||
movl(len, tmp5);
|
||||
@ -9505,25 +9476,16 @@ void MacroAssembler::char_array_compress(Register src, Register dst, Register le
|
||||
shlxl(result, result, len);
|
||||
notl(result);
|
||||
|
||||
kmovdl(k1, result);
|
||||
kmovdl(k3, result);
|
||||
|
||||
evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k1);
|
||||
jcc(Assembler::carryClear, restore_k1_return_zero);
|
||||
evmovdquw(tmp1Reg, k3, Address(src, 0), Assembler::AVX_512bit);
|
||||
evpcmpuw(k2, k3, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
|
||||
ktestd(k2, k3);
|
||||
jcc(Assembler::carryClear, return_zero);
|
||||
|
||||
evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
|
||||
// Restore k1
|
||||
kmovql(k1, k3);
|
||||
evpmovwb(Address(dst, 0), k3, tmp1Reg, Assembler::AVX_512bit);
|
||||
jmp(return_length);
|
||||
|
||||
bind(restore_k1_return_zero);
|
||||
// Restore k1
|
||||
kmovql(k1, k3);
|
||||
jmp(return_zero);
|
||||
|
||||
clear_vector_masking(); // closing of the stub context for programming mask registers
|
||||
|
||||
bind(below_threshold);
|
||||
}
|
||||
|
||||
@ -9637,8 +9599,6 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
||||
VM_Version::supports_avx512vlbw() &&
|
||||
VM_Version::supports_bmi2()) {
|
||||
|
||||
set_vector_masking(); // opening of the stub context for programming mask registers
|
||||
|
||||
Label copy_32_loop, copy_tail;
|
||||
Register tmp3_aliased = len;
|
||||
|
||||
@ -9670,22 +9630,15 @@ void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len
|
||||
testl(tmp2, -1); // we don't destroy the contents of tmp2 here
|
||||
jcc(Assembler::zero, done);
|
||||
|
||||
// Save k1
|
||||
kmovql(k2, k1);
|
||||
|
||||
// ~(~0 << length), where length is the # of remaining elements to process
|
||||
movl(tmp3_aliased, -1);
|
||||
shlxl(tmp3_aliased, tmp3_aliased, tmp2);
|
||||
notl(tmp3_aliased);
|
||||
kmovdl(k1, tmp3_aliased);
|
||||
evpmovzxbw(tmp1, k1, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), k1, tmp1, Assembler::AVX_512bit);
|
||||
kmovdl(k2, tmp3_aliased);
|
||||
evpmovzxbw(tmp1, k2, Address(src, 0), Assembler::AVX_512bit);
|
||||
evmovdquw(Address(dst, 0), k2, tmp1, Assembler::AVX_512bit);
|
||||
|
||||
// Restore k1
|
||||
kmovql(k1, k2);
|
||||
jmp(done);
|
||||
|
||||
clear_vector_masking(); // closing of the stub context for programming mask registers
|
||||
}
|
||||
if (UseSSE42Intrinsics) {
|
||||
Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
|
||||
|
@ -153,12 +153,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movptr(saved_rsi, rsi);
|
||||
__ movptr(saved_rbx, rbx);
|
||||
|
||||
// provide initial value for required masks
|
||||
if (UseAVX > 2) {
|
||||
__ movl(rbx, 0xffff);
|
||||
__ kmovwl(k1, rbx);
|
||||
}
|
||||
|
||||
// save and initialize %mxcsr
|
||||
if (sse_save) {
|
||||
Label skip_ldmx;
|
||||
@ -679,12 +673,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
void xmm_copy_forward(Register from, Register to_from, Register qword_count) {
|
||||
assert( UseSSE >= 2, "supported cpu only" );
|
||||
Label L_copy_64_bytes_loop, L_copy_64_bytes, L_copy_8_bytes, L_exit;
|
||||
if (UseAVX > 2) {
|
||||
__ push(rbx);
|
||||
__ movl(rbx, 0xffff);
|
||||
__ kmovwl(k1, rbx);
|
||||
__ pop(rbx);
|
||||
}
|
||||
|
||||
// Copy 64-byte chunks
|
||||
__ jmpb(L_copy_64_bytes);
|
||||
__ align(OptoLoopAlignment);
|
||||
@ -2115,14 +2104,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rdx, 0xffff);
|
||||
__ kmovdl(k1, rdx);
|
||||
}
|
||||
|
||||
__ movptr(from, from_param);
|
||||
__ movptr(key, key_param);
|
||||
|
||||
@ -2222,14 +2203,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rdx, 0xffff);
|
||||
__ kmovdl(k1, rdx);
|
||||
}
|
||||
|
||||
__ movptr(from, from_param);
|
||||
__ movptr(key, key_param);
|
||||
|
||||
@ -2356,14 +2329,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
handleSOERegisters(true /*saving*/);
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rdx, 0xffff);
|
||||
__ kmovdl(k1, rdx);
|
||||
}
|
||||
|
||||
// load registers from incoming parameters
|
||||
const Address from_param(rbp, 8+0);
|
||||
const Address to_param (rbp, 8+4);
|
||||
@ -2532,14 +2497,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
handleSOERegisters(true /*saving*/);
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rdx, 0xffff);
|
||||
__ kmovdl(k1, rdx);
|
||||
}
|
||||
|
||||
// load registers from incoming parameters
|
||||
const Address from_param(rbp, 8+0);
|
||||
const Address to_param (rbp, 8+4);
|
||||
@ -2693,14 +2650,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
handleSOERegisters(true /*saving*/); // save rbx, rsi, rdi
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rdx, 0xffff);
|
||||
__ kmovdl(k1, rdx);
|
||||
}
|
||||
|
||||
// load registers from incoming parameters
|
||||
const Address from_param(rbp, 8+0);
|
||||
const Address to_param (rbp, 8+4);
|
||||
@ -3154,14 +3103,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ enter();
|
||||
handleSOERegisters(true); // Save registers
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rdx, 0xffff);
|
||||
__ kmovdl(k1, rdx);
|
||||
}
|
||||
|
||||
__ movptr(state, state_param);
|
||||
__ movptr(subkeyH, subkeyH_param);
|
||||
__ movptr(data, data_param);
|
||||
|
@ -254,10 +254,7 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ movptr(r13_save, r13);
|
||||
__ movptr(r14_save, r14);
|
||||
__ movptr(r15_save, r15);
|
||||
if (UseAVX > 2) {
|
||||
__ movl(rbx, 0xffff);
|
||||
__ kmovwl(k1, rbx);
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
int last_reg = 15;
|
||||
if (UseAVX > 2) {
|
||||
@ -1257,10 +1254,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ align(OptoLoopAlignment);
|
||||
if (UseUnalignedLoadStores) {
|
||||
Label L_end;
|
||||
if (UseAVX > 2) {
|
||||
__ movl(to, 0xffff);
|
||||
__ kmovwl(k1, to);
|
||||
}
|
||||
// Copy 64-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
if (UseAVX > 2) {
|
||||
@ -1341,10 +1334,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ align(OptoLoopAlignment);
|
||||
if (UseUnalignedLoadStores) {
|
||||
Label L_end;
|
||||
if (UseAVX > 2) {
|
||||
__ movl(to, 0xffff);
|
||||
__ kmovwl(k1, to);
|
||||
}
|
||||
// Copy 64-bytes per iteration
|
||||
__ BIND(L_loop);
|
||||
if (UseAVX > 2) {
|
||||
@ -3005,14 +2994,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rax, 0xffff);
|
||||
__ kmovql(k1, rax);
|
||||
}
|
||||
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
@ -3107,14 +3088,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rax, 0xffff);
|
||||
__ kmovql(k1, rax);
|
||||
}
|
||||
|
||||
// keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
|
||||
__ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
|
||||
|
||||
@ -3227,14 +3200,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rax, 0xffff);
|
||||
__ kmovql(k1, rax);
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
// on win64, fill len_reg from stack position
|
||||
__ movl(len_reg, len_mem);
|
||||
@ -3428,14 +3393,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rax, 0xffff);
|
||||
__ kmovql(k1, rax);
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
// on win64, fill len_reg from stack position
|
||||
__ movl(len_reg, len_mem);
|
||||
@ -3902,14 +3859,6 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rax, 0xffff);
|
||||
__ kmovql(k1, rax);
|
||||
}
|
||||
|
||||
#ifdef _WIN64
|
||||
// allocate spill slots for r13, r14
|
||||
enum {
|
||||
@ -4484,14 +4433,6 @@ address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
|
||||
__ enter();
|
||||
|
||||
// For EVEX with VL and BW, provide a standard mask, VL = 128 will guide the merge
|
||||
// context for the registers used, where all instructions below are using 128-bit mode
|
||||
// On EVEX without VL and BW, these instructions will all be AVX.
|
||||
if (VM_Version::supports_avx512vlbw()) {
|
||||
__ movl(rax, 0xffff);
|
||||
__ kmovql(k1, rax);
|
||||
}
|
||||
|
||||
__ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||
|
||||
__ movdqu(xmm_temp0, Address(state, 0));
|
||||
@ -4761,7 +4702,6 @@ address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
__ push(r13);
|
||||
__ push(r14);
|
||||
__ push(r15);
|
||||
__ push(rbx);
|
||||
|
||||
// arguments
|
||||
const Register source = c_rarg0; // Source Array
|
||||
@ -4790,8 +4730,6 @@ address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
__ cmpl(length, 0);
|
||||
__ jcc(Assembler::lessEqual, L_exit);
|
||||
|
||||
// Save k1 value in rbx
|
||||
__ kmovql(rbx, k1);
|
||||
__ lea(r11, ExternalAddress(StubRoutines::x86::base64_charset_addr()));
|
||||
// check if base64 charset(isURL=0) or base64 url charset(isURL=1) needs to be loaded
|
||||
__ cmpl(isURL, 0);
|
||||
@ -4802,7 +4740,7 @@ address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
__ BIND(L_processdata);
|
||||
__ movdqu(xmm16, ExternalAddress(StubRoutines::x86::base64_gather_mask_addr()));
|
||||
// Set 64 bits of K register.
|
||||
__ evpcmpeqb(k1, xmm16, xmm16, Assembler::AVX_512bit);
|
||||
__ evpcmpeqb(k3, xmm16, xmm16, Assembler::AVX_512bit);
|
||||
__ evmovdquq(xmm12, ExternalAddress(StubRoutines::x86::base64_bswap_mask_addr()), Assembler::AVX_256bit, r13);
|
||||
__ evmovdquq(xmm13, ExternalAddress(StubRoutines::x86::base64_right_shift_mask_addr()), Assembler::AVX_512bit, r13);
|
||||
__ evmovdquq(xmm14, ExternalAddress(StubRoutines::x86::base64_left_shift_mask_addr()), Assembler::AVX_512bit, r13);
|
||||
@ -4881,17 +4819,17 @@ address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
__ vextracti64x4(xmm4, xmm5, 1);
|
||||
__ vpmovzxwd(xmm7, xmm4, Assembler::AVX_512bit);
|
||||
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm4, k2, Address(r11, xmm0, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm5, k2, Address(r11, xmm1, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm8, k2, Address(r11, xmm2, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm9, k2, Address(r11, xmm3, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm10, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm11, k2, Address(r11, xmm7, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
|
||||
//Down convert dword to byte. Final output is 16*6 = 96 bytes long
|
||||
@ -4927,9 +4865,9 @@ address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
__ vpmovzxwd(xmm6, xmm9, Assembler::AVX_512bit);
|
||||
__ vextracti64x4(xmm9, xmm1, 1);
|
||||
__ vpmovzxwd(xmm5, xmm9, Assembler::AVX_512bit);
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm8, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
__ kmovql(k2, k1);
|
||||
__ kmovql(k2, k3);
|
||||
__ evpgatherdd(xmm10, k2, Address(r11, xmm5, Address::times_4, 0), Assembler::AVX_512bit);
|
||||
__ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm8, Assembler::AVX_512bit);
|
||||
__ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm10, Assembler::AVX_512bit);
|
||||
@ -4985,9 +4923,6 @@ address generate_cipherBlockChaining_decryptVectorAESCrypt() {
|
||||
__ addq(source, 3);
|
||||
__ jmp(L_process3);
|
||||
__ BIND(L_exit);
|
||||
// restore k1 register value
|
||||
__ kmovql(k1, rbx);
|
||||
__ pop(rbx);
|
||||
__ pop(r15);
|
||||
__ pop(r14);
|
||||
__ pop(r13);
|
||||
|
@ -401,8 +401,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
// load value into all 64 bytes of zmm7 register
|
||||
__ movl(rcx, VM_Version::ymm_test_value());
|
||||
__ movdl(xmm0, rcx);
|
||||
__ movl(rcx, 0xffff);
|
||||
__ kmovwl(k1, rcx);
|
||||
__ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
|
||||
__ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
|
||||
#ifdef _LP64
|
||||
|
Loading…
x
Reference in New Issue
Block a user