8222074: Enhance auto vectorization for x86
Reviewed-by: kvn, vlivanov
This commit is contained in:
parent
0284208ab3
commit
707c30fae6
@ -1894,6 +1894,69 @@ void Assembler::cvttpd2dq(XMMRegister dst, XMMRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::pabsb(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_ssse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x1C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::pabsw(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_ssse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x1D);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::pabsd(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_ssse3(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x1E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpabsb(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
|
||||
vector_len == AVX_256bit? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x1C);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpabsw(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
|
||||
vector_len == AVX_256bit? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x1D);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpabsd(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
|
||||
vector_len == AVX_256bit? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit? VM_Version::supports_evex() : 0, "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x1E);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpabsq(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 2, "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x1F);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::decl(Address dst) {
|
||||
// Don't use it directly. Use MacroAssembler::decrement() instead.
|
||||
InstructionMark im(this);
|
||||
@ -3416,10 +3479,19 @@ void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_le
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x00);
|
||||
emit_int8(0xC0 | encode);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(UseAVX > 2, "requires AVX512F");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8((unsigned char)0x36);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8) {
|
||||
assert(VM_Version::supports_avx2(), "");
|
||||
InstructionAttr attributes(AVX_256bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
@ -3884,6 +3956,14 @@ void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::pmovsxbw(XMMRegister dst, XMMRegister src) {
|
||||
assert(VM_Version::supports_sse4_1(), "");
|
||||
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x20);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx(), "");
|
||||
InstructionMark im(this);
|
||||
@ -3905,6 +3985,15 @@ void Assembler::vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
emit_int8((unsigned char) (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
|
||||
vector_len == AVX_256bit? VM_Version::supports_avx2() :
|
||||
vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x20);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
|
||||
assert(VM_Version::supports_avx512vlbw(), "");
|
||||
@ -6277,6 +6366,26 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
|
||||
assert(UseAVX > 2, "requires AVX512");
|
||||
assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0x72);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8(shift & 0xFF);
|
||||
}
|
||||
|
||||
void Assembler::evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 2, "requires AVX512");
|
||||
assert ((VM_Version::supports_avx512vl() || vector_len == 2), "requires AVX512vl");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8((unsigned char)0xE2);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
// logical operations packed integers
|
||||
void Assembler::pand(XMMRegister dst, XMMRegister src) {
|
||||
|
@ -1102,6 +1102,15 @@ private:
|
||||
|
||||
void cvttpd2dq(XMMRegister dst, XMMRegister src);
|
||||
|
||||
//Abs of packed Integer values
|
||||
void pabsb(XMMRegister dst, XMMRegister src);
|
||||
void pabsw(XMMRegister dst, XMMRegister src);
|
||||
void pabsd(XMMRegister dst, XMMRegister src);
|
||||
void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Divide Scalar Double-Precision Floating-Point Values
|
||||
void divsd(XMMRegister dst, Address src);
|
||||
void divsd(XMMRegister dst, XMMRegister src);
|
||||
@ -1589,6 +1598,7 @@ private:
|
||||
// Pemutation of 64bit words
|
||||
void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
|
||||
void vpermq(XMMRegister dst, XMMRegister src, int imm8);
|
||||
void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
|
||||
void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
|
||||
void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
@ -1668,6 +1678,10 @@ private:
|
||||
|
||||
void evpmovdb(Address dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Sign extend moves
|
||||
void pmovsxbw(XMMRegister dst, XMMRegister src);
|
||||
void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
|
||||
// Multiply add
|
||||
void pmaddwd(XMMRegister dst, XMMRegister src);
|
||||
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
@ -2094,6 +2108,8 @@ private:
|
||||
void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
// And packed integers
|
||||
void pand(XMMRegister dst, XMMRegister src);
|
||||
|
@ -1003,25 +1003,25 @@ void MacroAssembler::align(int modulus, int target) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src) {
|
||||
void MacroAssembler::andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
// Used in sign-masking with aligned address.
|
||||
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
|
||||
if (reachable(src)) {
|
||||
Assembler::andpd(dst, as_Address(src));
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
Assembler::andpd(dst, Address(rscratch1, 0));
|
||||
lea(scratch_reg, src);
|
||||
Assembler::andpd(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::andps(XMMRegister dst, AddressLiteral src) {
|
||||
void MacroAssembler::andps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
// Used in sign-masking with aligned address.
|
||||
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
|
||||
if (reachable(src)) {
|
||||
Assembler::andps(dst, as_Address(src));
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
Assembler::andps(dst, Address(rscratch1, 0));
|
||||
lea(scratch_reg, src);
|
||||
Assembler::andps(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
@ -3340,13 +3340,13 @@ void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
|
||||
Assembler::vmovdqu(dst, src);
|
||||
}
|
||||
|
||||
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
|
||||
void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
vmovdqu(dst, as_Address(src));
|
||||
}
|
||||
else {
|
||||
lea(rscratch1, src);
|
||||
vmovdqu(dst, Address(rscratch1, 0));
|
||||
lea(scratch_reg, src);
|
||||
vmovdqu(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
@ -3698,14 +3698,14 @@ void MacroAssembler::ucomiss(XMMRegister dst, AddressLiteral src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src) {
|
||||
void MacroAssembler::xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
// Used in sign-bit flipping with aligned address.
|
||||
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
|
||||
if (reachable(src)) {
|
||||
Assembler::xorpd(dst, as_Address(src));
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
Assembler::xorpd(dst, Address(rscratch1, 0));
|
||||
lea(scratch_reg, src);
|
||||
Assembler::xorpd(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
@ -3726,14 +3726,14 @@ void MacroAssembler::xorps(XMMRegister dst, XMMRegister src) {
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src) {
|
||||
void MacroAssembler::xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg) {
|
||||
// Used in sign-bit flipping with aligned address.
|
||||
assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
|
||||
if (reachable(src)) {
|
||||
Assembler::xorps(dst, as_Address(src));
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
Assembler::xorps(dst, Address(rscratch1, 0));
|
||||
lea(scratch_reg, src);
|
||||
Assembler::xorps(dst, Address(scratch_reg, 0));
|
||||
}
|
||||
}
|
||||
|
||||
@ -3799,12 +3799,12 @@ void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int v
|
||||
Assembler::vpaddw(dst, nds, src, vector_len);
|
||||
}
|
||||
|
||||
void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
|
||||
void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
Assembler::vpand(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
Assembler::vpand(dst, nds, Address(rscratch1, 0), vector_len);
|
||||
lea(scratch_reg, src);
|
||||
Assembler::vpand(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3873,6 +3873,22 @@ void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vec
|
||||
Assembler::vpsraw(dst, nds, shift, vector_len);
|
||||
}
|
||||
|
||||
void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
|
||||
assert(UseAVX > 2,"");
|
||||
if (!VM_Version::supports_avx512vl() && vector_len < 2) {
|
||||
vector_len = 2;
|
||||
}
|
||||
Assembler::evpsraq(dst, nds, shift, vector_len);
|
||||
}
|
||||
|
||||
void MacroAssembler::evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
|
||||
assert(UseAVX > 2,"");
|
||||
if (!VM_Version::supports_avx512vl() && vector_len < 2) {
|
||||
vector_len = 2;
|
||||
}
|
||||
Assembler::evpsraq(dst, nds, shift, vector_len);
|
||||
}
|
||||
|
||||
void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
|
||||
assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
|
||||
Assembler::vpsrlw(dst, nds, shift, vector_len);
|
||||
@ -3913,21 +3929,21 @@ void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
|
||||
Assembler::pshuflw(dst, src, mode);
|
||||
}
|
||||
|
||||
void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
|
||||
void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
vandpd(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
vandpd(dst, nds, Address(rscratch1, 0), vector_len);
|
||||
lea(scratch_reg, src);
|
||||
vandpd(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
|
||||
void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
vandps(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
vandps(dst, nds, Address(rscratch1, 0), vector_len);
|
||||
lea(scratch_reg, src);
|
||||
vandps(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3995,24 +4011,162 @@ void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral
|
||||
vxorpd(dst, nds, src, Assembler::AVX_128bit);
|
||||
}
|
||||
|
||||
void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
|
||||
void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
vxorpd(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
vxorpd(dst, nds, Address(rscratch1, 0), vector_len);
|
||||
lea(scratch_reg, src);
|
||||
vxorpd(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
|
||||
void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (reachable(src)) {
|
||||
vxorps(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch1, src);
|
||||
vxorps(dst, nds, Address(rscratch1, 0), vector_len);
|
||||
lea(scratch_reg, src);
|
||||
vxorps(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg) {
|
||||
if (UseAVX > 1 || (vector_len < 1)) {
|
||||
if (reachable(src)) {
|
||||
Assembler::vpxor(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(scratch_reg, src);
|
||||
Assembler::vpxor(dst, nds, Address(scratch_reg, 0), vector_len);
|
||||
}
|
||||
}
|
||||
else {
|
||||
MacroAssembler::vxorpd(dst, nds, src, vector_len, scratch_reg);
|
||||
}
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------------------------
|
||||
#ifdef COMPILER2
|
||||
// Generic instructions support for use in .ad files C2 code generation
|
||||
|
||||
void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, Register scr) {
|
||||
if (opcode == Op_AbsVD) {
|
||||
andpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), scr);
|
||||
} else {
|
||||
assert((opcode == Op_NegVD),"opcode should be Op_NegD");
|
||||
xorpd(dst, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), scr);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
|
||||
if (opcode == Op_AbsVD) {
|
||||
vandpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_mask()), vector_len, scr);
|
||||
} else {
|
||||
assert((opcode == Op_NegVD),"opcode should be Op_NegD");
|
||||
vxorpd(dst, src, ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), vector_len, scr);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, Register scr) {
|
||||
if (opcode == Op_AbsVF) {
|
||||
andps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), scr);
|
||||
} else {
|
||||
assert((opcode == Op_NegVF),"opcode should be Op_NegF");
|
||||
xorps(dst, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), scr);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr) {
|
||||
if (opcode == Op_AbsVF) {
|
||||
vandps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_mask()), vector_len, scr);
|
||||
} else {
|
||||
assert((opcode == Op_NegVF),"opcode should be Op_NegF");
|
||||
vxorps(dst, src, ExternalAddress(StubRoutines::x86::vector_float_sign_flip()), vector_len, scr);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src) {
|
||||
if (sign) {
|
||||
pmovsxbw(dst, src);
|
||||
} else {
|
||||
pmovzxbw(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len) {
|
||||
if (sign) {
|
||||
vpmovsxbw(dst, src, vector_len);
|
||||
} else {
|
||||
vpmovzxbw(dst, src, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister src) {
|
||||
if (opcode == Op_RShiftVI) {
|
||||
psrad(dst, src);
|
||||
} else if (opcode == Op_LShiftVI) {
|
||||
pslld(dst, src);
|
||||
} else {
|
||||
assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
|
||||
psrld(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
if (opcode == Op_RShiftVI) {
|
||||
vpsrad(dst, nds, src, vector_len);
|
||||
} else if (opcode == Op_LShiftVI) {
|
||||
vpslld(dst, nds, src, vector_len);
|
||||
} else {
|
||||
assert((opcode == Op_URShiftVI),"opcode should be Op_URShiftVI");
|
||||
vpsrld(dst, nds, src, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister src) {
|
||||
if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
|
||||
psraw(dst, src);
|
||||
} else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
|
||||
psllw(dst, src);
|
||||
} else {
|
||||
assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
|
||||
psrlw(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
if ((opcode == Op_RShiftVS) || (opcode == Op_RShiftVB)) {
|
||||
vpsraw(dst, nds, src, vector_len);
|
||||
} else if ((opcode == Op_LShiftVS) || (opcode == Op_LShiftVB)) {
|
||||
vpsllw(dst, nds, src, vector_len);
|
||||
} else {
|
||||
assert(((opcode == Op_URShiftVS) || (opcode == Op_URShiftVB)),"opcode should be one of Op_URShiftVS or Op_URShiftVB");
|
||||
vpsrlw(dst, nds, src, vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister src) {
|
||||
if (opcode == Op_RShiftVL) {
|
||||
psrlq(dst, src); // using srl to implement sra on pre-avs512 systems
|
||||
} else if (opcode == Op_LShiftVL) {
|
||||
psllq(dst, src);
|
||||
} else {
|
||||
assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
|
||||
psrlq(dst, src);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
if (opcode == Op_RShiftVL) {
|
||||
evpsraq(dst, nds, src, vector_len);
|
||||
} else if (opcode == Op_LShiftVL) {
|
||||
vpsllq(dst, nds, src, vector_len);
|
||||
} else {
|
||||
assert((opcode == Op_URShiftVL),"opcode should be Op_URShiftVL");
|
||||
vpsrlq(dst, nds, src, vector_len);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
//-------------------------------------------------------------------------------------------
|
||||
|
||||
void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
|
||||
const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
|
||||
STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
|
||||
|
@ -877,12 +877,12 @@ class MacroAssembler: public Assembler {
|
||||
// Floating
|
||||
|
||||
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
|
||||
void andpd(XMMRegister dst, AddressLiteral src);
|
||||
void andpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
void andpd(XMMRegister dst, XMMRegister src) { Assembler::andpd(dst, src); }
|
||||
|
||||
void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
|
||||
void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
|
||||
void andps(XMMRegister dst, AddressLiteral src);
|
||||
void andps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
|
||||
void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
|
||||
void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
|
||||
@ -1066,8 +1066,8 @@ private:
|
||||
|
||||
// these are private because users should be doing movflt/movdbl
|
||||
|
||||
void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
|
||||
void movss(XMMRegister dst, XMMRegister src) { Assembler::movss(dst, src); }
|
||||
void movss(Address dst, XMMRegister src) { Assembler::movss(dst, src); }
|
||||
void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
|
||||
void movss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
@ -1105,7 +1105,7 @@ public:
|
||||
void vmovdqu(Address dst, XMMRegister src);
|
||||
void vmovdqu(XMMRegister dst, Address src);
|
||||
void vmovdqu(XMMRegister dst, XMMRegister src);
|
||||
void vmovdqu(XMMRegister dst, AddressLiteral src);
|
||||
void vmovdqu(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
void evmovdquq(XMMRegister dst, Address src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
|
||||
void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
|
||||
void evmovdquq(Address dst, XMMRegister src, int vector_len) { Assembler::evmovdquq(dst, src, vector_len); }
|
||||
@ -1183,12 +1183,12 @@ public:
|
||||
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
|
||||
void xorpd(XMMRegister dst, XMMRegister src);
|
||||
void xorpd(XMMRegister dst, Address src) { Assembler::xorpd(dst, src); }
|
||||
void xorpd(XMMRegister dst, AddressLiteral src);
|
||||
void xorpd(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
|
||||
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
|
||||
void xorps(XMMRegister dst, XMMRegister src);
|
||||
void xorps(XMMRegister dst, Address src) { Assembler::xorps(dst, src); }
|
||||
void xorps(XMMRegister dst, AddressLiteral src);
|
||||
void xorps(XMMRegister dst, AddressLiteral src, Register scratch_reg = rscratch1);
|
||||
|
||||
// Shuffle Bytes
|
||||
void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
|
||||
@ -1215,7 +1215,7 @@ public:
|
||||
|
||||
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
|
||||
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
|
||||
void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
|
||||
void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
|
||||
|
||||
void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
|
||||
@ -1241,6 +1241,9 @@ public:
|
||||
void vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
|
||||
void vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
void evpsraq(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
|
||||
void evpsraq(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
void vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len);
|
||||
void vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len);
|
||||
|
||||
@ -1260,11 +1263,11 @@ public:
|
||||
|
||||
void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
|
||||
void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandpd(dst, nds, src, vector_len); }
|
||||
void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
|
||||
void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
|
||||
|
||||
void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
|
||||
void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vandps(dst, nds, src, vector_len); }
|
||||
void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
|
||||
void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
|
||||
|
||||
void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
|
||||
void vdivsd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vdivsd(dst, nds, src); }
|
||||
@ -1297,11 +1300,11 @@ public:
|
||||
|
||||
void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
|
||||
void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorpd(dst, nds, src, vector_len); }
|
||||
void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
|
||||
void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
|
||||
|
||||
void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
|
||||
void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vxorps(dst, nds, src, vector_len); }
|
||||
void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
|
||||
void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
|
||||
|
||||
void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
if (UseAVX > 1 || (vector_len < 1)) // vpxor 256 bit is available only in AVX2
|
||||
@ -1315,6 +1318,7 @@ public:
|
||||
else
|
||||
Assembler::vxorpd(dst, nds, src, vector_len);
|
||||
}
|
||||
void vpxor(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register scratch_reg = rscratch1);
|
||||
|
||||
// Simple version for AVX2 256bit vectors
|
||||
void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
|
||||
@ -1601,6 +1605,22 @@ public:
|
||||
void movl2ptr(Register dst, Address src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(movl(dst, src)); }
|
||||
void movl2ptr(Register dst, Register src) { LP64_ONLY(movslq(dst, src)) NOT_LP64(if (dst != src) movl(dst, src)); }
|
||||
|
||||
#ifdef COMPILER2
|
||||
// Generic instructions support for use in .ad files C2 code generation
|
||||
void vabsnegd(int opcode, XMMRegister dst, Register scr);
|
||||
void vabsnegd(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, Register scr);
|
||||
void vabsnegf(int opcode, XMMRegister dst, XMMRegister src, int vector_len, Register scr);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src, int vector_len);
|
||||
void vextendbw(bool sign, XMMRegister dst, XMMRegister src);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftd(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftw(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister src);
|
||||
void vshiftq(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
#endif
|
||||
|
||||
// C2 compiled method's prolog code.
|
||||
void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b, bool is_stub);
|
||||
|
||||
|
@ -602,7 +602,59 @@ class StubGenerator: public StubCodeGenerator {
|
||||
|
||||
return start;
|
||||
}
|
||||
//---------------------------------------------------------------------------------------------------
|
||||
|
||||
address generate_vector_mask(const char *stub_name, int32_t mask) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
for (int i = 0; i < 16; i++) {
|
||||
__ emit_data(mask, relocInfo::none, 0);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_mask_long_double(const char *stub_name, int32_t maskhi, int32_t masklo) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
for (int i = 0; i < 8; i++) {
|
||||
__ emit_data(masklo, relocInfo::none, 0);
|
||||
__ emit_data(maskhi, relocInfo::none, 0);
|
||||
}
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
|
||||
address generate_vector_byte_perm_mask(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data(0x00000001, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000003, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000005, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000007, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000002, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000004, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
__ emit_data(0x00000006, relocInfo::none, 0);
|
||||
__ emit_data(0x00000000, relocInfo::none, 0);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------------------------------
|
||||
// Non-destructive plausibility checks for oops
|
||||
@ -3823,6 +3875,14 @@ class StubGenerator: public StubCodeGenerator {
|
||||
//------------------------------------------------------------------------------------------------------------------------
|
||||
// entry points that are platform specific
|
||||
|
||||
StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF);
|
||||
StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x80000000);
|
||||
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask_long_double("vector_double_sign_mask", 0x7FFFFFFF, 0xFFFFFFFF);
|
||||
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask_long_double("vector_double_sign_flip", 0x80000000, 0x00000000);
|
||||
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff);
|
||||
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
|
||||
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask_long_double("vector_long_sign_mask", 0x80000000, 0x00000000);
|
||||
|
||||
// support for verify_oop (must happen after universe_init)
|
||||
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
|
||||
|
||||
|
@ -979,6 +979,40 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_mask(const char *stub_name, int64_t mask) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
__ emit_data64(mask, relocInfo::none);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_vector_byte_perm_mask(const char *stub_name) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", stub_name);
|
||||
address start = __ pc();
|
||||
|
||||
__ emit_data64(0x0000000000000001, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000003, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000005, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000007, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000000, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000002, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000004, relocInfo::none);
|
||||
__ emit_data64(0x0000000000000006, relocInfo::none);
|
||||
|
||||
return start;
|
||||
}
|
||||
|
||||
// Non-destructive plausibility checks for oops
|
||||
//
|
||||
// Arguments:
|
||||
@ -5871,6 +5905,13 @@ address generate_avx_ghash_processBlocks() {
|
||||
StubRoutines::x86::_float_sign_flip = generate_fp_mask("float_sign_flip", 0x8000000080000000);
|
||||
StubRoutines::x86::_double_sign_mask = generate_fp_mask("double_sign_mask", 0x7FFFFFFFFFFFFFFF);
|
||||
StubRoutines::x86::_double_sign_flip = generate_fp_mask("double_sign_flip", 0x8000000000000000);
|
||||
StubRoutines::x86::_vector_float_sign_mask = generate_vector_mask("vector_float_sign_mask", 0x7FFFFFFF7FFFFFFF);
|
||||
StubRoutines::x86::_vector_float_sign_flip = generate_vector_mask("vector_float_sign_flip", 0x8000000080000000);
|
||||
StubRoutines::x86::_vector_double_sign_mask = generate_vector_mask("vector_double_sign_mask", 0x7FFFFFFFFFFFFFFF);
|
||||
StubRoutines::x86::_vector_double_sign_flip = generate_vector_mask("vector_double_sign_flip", 0x8000000000000000);
|
||||
StubRoutines::x86::_vector_short_to_byte_mask = generate_vector_mask("vector_short_to_byte_mask", 0x00ff00ff00ff00ff);
|
||||
StubRoutines::x86::_vector_byte_perm_mask = generate_vector_byte_perm_mask("vector_byte_perm_mask");
|
||||
StubRoutines::x86::_vector_long_sign_mask = generate_vector_mask("vector_long_sign_mask", 0x8000000000000000);
|
||||
|
||||
// support for verify_oop (must happen after universe_init)
|
||||
StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop();
|
||||
|
@ -43,6 +43,13 @@ address StubRoutines::x86::_ghash_shuffmask_addr = NULL;
|
||||
address StubRoutines::x86::_upper_word_mask_addr = NULL;
|
||||
address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
|
||||
address StubRoutines::x86::_k256_adr = NULL;
|
||||
address StubRoutines::x86::_vector_short_to_byte_mask = NULL;
|
||||
address StubRoutines::x86::_vector_float_sign_mask = NULL;
|
||||
address StubRoutines::x86::_vector_float_sign_flip = NULL;
|
||||
address StubRoutines::x86::_vector_double_sign_mask = NULL;
|
||||
address StubRoutines::x86::_vector_double_sign_flip = NULL;
|
||||
address StubRoutines::x86::_vector_byte_perm_mask = NULL;
|
||||
address StubRoutines::x86::_vector_long_sign_mask = NULL;
|
||||
#ifdef _LP64
|
||||
address StubRoutines::x86::_k256_W_adr = NULL;
|
||||
address StubRoutines::x86::_k512_W_addr = NULL;
|
||||
|
@ -102,6 +102,7 @@ class x86 {
|
||||
static address double_sign_flip() {
|
||||
return _double_sign_flip;
|
||||
}
|
||||
|
||||
#else // !LP64
|
||||
|
||||
private:
|
||||
@ -139,6 +140,13 @@ class x86 {
|
||||
//k256 table for sha256
|
||||
static juint _k256[];
|
||||
static address _k256_adr;
|
||||
static address _vector_short_to_byte_mask;
|
||||
static address _vector_float_sign_mask;
|
||||
static address _vector_float_sign_flip;
|
||||
static address _vector_double_sign_mask;
|
||||
static address _vector_double_sign_flip;
|
||||
static address _vector_byte_perm_mask;
|
||||
static address _vector_long_sign_mask;
|
||||
#ifdef _LP64
|
||||
static juint _k256_W[];
|
||||
static address _k256_W_adr;
|
||||
@ -212,6 +220,33 @@ class x86 {
|
||||
static address upper_word_mask_addr() { return _upper_word_mask_addr; }
|
||||
static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
|
||||
static address k256_addr() { return _k256_adr; }
|
||||
|
||||
static address vector_short_to_byte_mask() {
|
||||
return _vector_short_to_byte_mask;
|
||||
}
|
||||
static address vector_float_sign_mask() {
|
||||
return _vector_float_sign_mask;
|
||||
}
|
||||
|
||||
static address vector_float_sign_flip() {
|
||||
return _vector_float_sign_flip;
|
||||
}
|
||||
|
||||
static address vector_double_sign_mask() {
|
||||
return _vector_double_sign_mask;
|
||||
}
|
||||
|
||||
static address vector_double_sign_flip() {
|
||||
return _vector_double_sign_flip;
|
||||
}
|
||||
|
||||
static address vector_byte_perm_mask() {
|
||||
return _vector_byte_perm_mask;
|
||||
}
|
||||
|
||||
static address vector_long_sign_mask() {
|
||||
return _vector_long_sign_mask;
|
||||
}
|
||||
#ifdef _LP64
|
||||
static address k256_W_addr() { return _k256_W_adr; }
|
||||
static address k512_W_addr() { return _k512_W_addr; }
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -8949,6 +8949,28 @@ instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
|
||||
ins_pipe(ialu_reg_reg_alu0);
|
||||
%}
|
||||
|
||||
// Integer Absolute Instructions
|
||||
instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, eFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (AbsI src));
|
||||
effect(TEMP dst, TEMP tmp, KILL cr);
|
||||
format %{ "movl $tmp, $src\n\t"
|
||||
"sarl $tmp, 31\n\t"
|
||||
"movl $dst, $src\n\t"
|
||||
"xorl $dst, $tmp\n\t"
|
||||
"subl $dst, $tmp\n"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ movl($tmp$$Register, $src$$Register);
|
||||
__ sarl($tmp$$Register, 31);
|
||||
__ movl($dst$$Register, $src$$Register);
|
||||
__ xorl($dst$$Register, $tmp$$Register);
|
||||
__ subl($dst$$Register, $tmp$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(ialu_reg_reg);
|
||||
%}
|
||||
|
||||
//----------Long Instructions------------------------------------------------
|
||||
// Add Long Register with Register
|
||||
instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
|
||||
|
@ -8181,6 +8181,52 @@ instruct xchgN( memory mem, rRegN newval) %{
|
||||
ins_pipe( pipe_cmpxchg );
|
||||
%}
|
||||
|
||||
//----------Abs Instructions-------------------------------------------
|
||||
|
||||
// Integer Absolute Instructions
|
||||
instruct absI_rReg(rRegI dst, rRegI src, rRegI tmp, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (AbsI src));
|
||||
effect(TEMP dst, TEMP tmp, KILL cr);
|
||||
format %{ "movl $tmp, $src\n\t"
|
||||
"sarl $tmp, 31\n\t"
|
||||
"movl $dst, $src\n\t"
|
||||
"xorl $dst, $tmp\n\t"
|
||||
"subl $dst, $tmp\n"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ movl($tmp$$Register, $src$$Register);
|
||||
__ sarl($tmp$$Register, 31);
|
||||
__ movl($dst$$Register, $src$$Register);
|
||||
__ xorl($dst$$Register, $tmp$$Register);
|
||||
__ subl($dst$$Register, $tmp$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(ialu_reg_reg);
|
||||
%}
|
||||
|
||||
// Long Absolute Instructions
|
||||
instruct absL_rReg(rRegL dst, rRegL src, rRegL tmp, rFlagsReg cr)
|
||||
%{
|
||||
match(Set dst (AbsL src));
|
||||
effect(TEMP dst, TEMP tmp, KILL cr);
|
||||
format %{ "movq $tmp, $src\n\t"
|
||||
"sarq $tmp, 63\n\t"
|
||||
"movq $dst, $src\n\t"
|
||||
"xorq $dst, $tmp\n\t"
|
||||
"subq $dst, $tmp\n"
|
||||
%}
|
||||
ins_encode %{
|
||||
__ movq($tmp$$Register, $src$$Register);
|
||||
__ sarq($tmp$$Register, 63);
|
||||
__ movq($dst$$Register, $src$$Register);
|
||||
__ xorq($dst$$Register, $tmp$$Register);
|
||||
__ subq($dst$$Register, $tmp$$Register);
|
||||
%}
|
||||
|
||||
ins_pipe(ialu_reg_reg);
|
||||
%}
|
||||
|
||||
//----------Subtraction Instructions-------------------------------------------
|
||||
|
||||
// Integer Subtraction Instructions
|
||||
|
@ -3808,7 +3808,7 @@ void MatchNode::count_commutative_op(int& count) {
|
||||
"MaxI","MinI","MaxF","MinF","MaxD","MinD",
|
||||
"MaxV", "MinV",
|
||||
"MulI","MulL","MulF","MulD",
|
||||
"MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||
"OrI","OrL",
|
||||
"OrV",
|
||||
"XorI","XorL",
|
||||
@ -4175,10 +4175,10 @@ bool MatchRule::is_vector() const {
|
||||
static const char *vector_list[] = {
|
||||
"AddVB","AddVS","AddVI","AddVL","AddVF","AddVD",
|
||||
"SubVB","SubVS","SubVI","SubVL","SubVF","SubVD",
|
||||
"MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||
"MulVB","MulVS","MulVI","MulVL","MulVF","MulVD",
|
||||
"CMoveVD", "CMoveVF",
|
||||
"DivVF","DivVD",
|
||||
"AbsVF","AbsVD",
|
||||
"AbsVB","AbsVS","AbsVI","AbsVL","AbsVF","AbsVD",
|
||||
"NegVF","NegVD",
|
||||
"SqrtVD","SqrtVF",
|
||||
"AndV" ,"XorV" ,"OrV",
|
||||
|
@ -363,6 +363,9 @@ bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_isInstance:
|
||||
case vmIntrinsics::_currentThread:
|
||||
case vmIntrinsics::_dabs:
|
||||
case vmIntrinsics::_fabs:
|
||||
case vmIntrinsics::_iabs:
|
||||
case vmIntrinsics::_labs:
|
||||
case vmIntrinsics::_dsqrt:
|
||||
case vmIntrinsics::_dsin:
|
||||
case vmIntrinsics::_dcos:
|
||||
@ -404,6 +407,9 @@ bool vmIntrinsics::can_trap(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_longBitsToDouble:
|
||||
case vmIntrinsics::_currentThread:
|
||||
case vmIntrinsics::_dabs:
|
||||
case vmIntrinsics::_fabs:
|
||||
case vmIntrinsics::_iabs:
|
||||
case vmIntrinsics::_labs:
|
||||
case vmIntrinsics::_dsqrt:
|
||||
case vmIntrinsics::_dsin:
|
||||
case vmIntrinsics::_dcos:
|
||||
@ -567,6 +573,9 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_doubleToRawLongBits:
|
||||
case vmIntrinsics::_longBitsToDouble:
|
||||
case vmIntrinsics::_dabs:
|
||||
case vmIntrinsics::_fabs:
|
||||
case vmIntrinsics::_iabs:
|
||||
case vmIntrinsics::_labs:
|
||||
case vmIntrinsics::_dsqrt:
|
||||
case vmIntrinsics::_dsin:
|
||||
case vmIntrinsics::_dcos:
|
||||
|
@ -472,6 +472,7 @@
|
||||
template(float_int_signature, "(F)I") \
|
||||
template(double_long_signature, "(D)J") \
|
||||
template(double_double_signature, "(D)D") \
|
||||
template(float_float_signature, "(F)F") \
|
||||
template(int_float_signature, "(I)F") \
|
||||
template(long_int_signature, "(J)I") \
|
||||
template(long_long_signature, "(J)J") \
|
||||
@ -771,6 +772,9 @@
|
||||
do_name(fma_name, "fma") \
|
||||
\
|
||||
do_intrinsic(_dabs, java_lang_Math, abs_name, double_double_signature, F_S) \
|
||||
do_intrinsic(_fabs, java_lang_Math, abs_name, float_float_signature, F_S) \
|
||||
do_intrinsic(_iabs, java_lang_Math, abs_name, int_int_signature, F_S) \
|
||||
do_intrinsic(_labs, java_lang_Math, abs_name, long_long_signature, F_S) \
|
||||
do_intrinsic(_dsin, java_lang_Math, sin_name, double_double_signature, F_S) \
|
||||
do_intrinsic(_dcos, java_lang_Math, cos_name, double_double_signature, F_S) \
|
||||
do_intrinsic(_dtan, java_lang_Math, tan_name, double_double_signature, F_S) \
|
||||
|
@ -460,6 +460,9 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_dcos:
|
||||
case vmIntrinsics::_dtan:
|
||||
case vmIntrinsics::_dabs:
|
||||
case vmIntrinsics::_fabs:
|
||||
case vmIntrinsics::_iabs:
|
||||
case vmIntrinsics::_labs:
|
||||
case vmIntrinsics::_datan2:
|
||||
case vmIntrinsics::_dsqrt:
|
||||
case vmIntrinsics::_dexp:
|
||||
|
@ -30,6 +30,7 @@
|
||||
macro(AbsD)
|
||||
macro(AbsF)
|
||||
macro(AbsI)
|
||||
macro(AbsL)
|
||||
macro(AddD)
|
||||
macro(AddF)
|
||||
macro(AddI)
|
||||
@ -335,6 +336,7 @@ macro(SubVI)
|
||||
macro(SubVL)
|
||||
macro(SubVF)
|
||||
macro(SubVD)
|
||||
macro(MulVB)
|
||||
macro(MulVS)
|
||||
macro(MulVI)
|
||||
macro(MulReductionVI)
|
||||
@ -349,6 +351,10 @@ macro(FmaVD)
|
||||
macro(FmaVF)
|
||||
macro(DivVF)
|
||||
macro(DivVD)
|
||||
macro(AbsVB)
|
||||
macro(AbsVS)
|
||||
macro(AbsVI)
|
||||
macro(AbsVL)
|
||||
macro(AbsVF)
|
||||
macro(AbsVD)
|
||||
macro(NegVF)
|
||||
|
@ -227,6 +227,7 @@ class LibraryCallKit : public GraphKit {
|
||||
bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
|
||||
bool inline_math_native(vmIntrinsics::ID id);
|
||||
bool inline_math(vmIntrinsics::ID id);
|
||||
bool inline_double_math(vmIntrinsics::ID id);
|
||||
template <typename OverflowOp>
|
||||
bool inline_math_overflow(Node* arg1, Node* arg2);
|
||||
void inline_math_mathExact(Node* math, Node* test);
|
||||
@ -533,6 +534,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
case vmIntrinsics::_dcos:
|
||||
case vmIntrinsics::_dtan:
|
||||
case vmIntrinsics::_dabs:
|
||||
case vmIntrinsics::_fabs:
|
||||
case vmIntrinsics::_iabs:
|
||||
case vmIntrinsics::_labs:
|
||||
case vmIntrinsics::_datan2:
|
||||
case vmIntrinsics::_dsqrt:
|
||||
case vmIntrinsics::_dexp:
|
||||
@ -1793,7 +1797,7 @@ Node* LibraryCallKit::round_double_node(Node* n) {
|
||||
// public static double Math.sqrt(double)
|
||||
// public static double Math.log(double)
|
||||
// public static double Math.log10(double)
|
||||
bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
|
||||
bool LibraryCallKit::inline_double_math(vmIntrinsics::ID id) {
|
||||
Node* arg = round_double_node(argument(0));
|
||||
Node* n = NULL;
|
||||
switch (id) {
|
||||
@ -1805,6 +1809,23 @@ bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------inline_math-----------------------------------
|
||||
// public static float Math.abs(float)
|
||||
// public static int Math.abs(int)
|
||||
// public static long Math.abs(long)
|
||||
bool LibraryCallKit::inline_math(vmIntrinsics::ID id) {
|
||||
Node* arg = argument(0);
|
||||
Node* n = NULL;
|
||||
switch (id) {
|
||||
case vmIntrinsics::_fabs: n = new AbsFNode( arg); break;
|
||||
case vmIntrinsics::_iabs: n = new AbsINode( arg); break;
|
||||
case vmIntrinsics::_labs: n = new AbsLNode( arg); break;
|
||||
default: fatal_unexpected_iid(id); break;
|
||||
}
|
||||
set_result(_gvn.transform(n));
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------runtime_math-----------------------------
|
||||
bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
|
||||
assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
|
||||
@ -1855,8 +1876,11 @@ bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
|
||||
runtime_math(OptoRuntime::Math_D_D_Type(), FN_PTR(SharedRuntime::dlog10), "LOG10");
|
||||
|
||||
// These intrinsics are supported on all hardware
|
||||
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_math(id) : false;
|
||||
case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_math(id) : false;
|
||||
case vmIntrinsics::_dsqrt: return Matcher::match_rule_supported(Op_SqrtD) ? inline_double_math(id) : false;
|
||||
case vmIntrinsics::_dabs: return Matcher::has_match_rule(Op_AbsD) ? inline_double_math(id) : false;
|
||||
case vmIntrinsics::_fabs: return Matcher::match_rule_supported(Op_AbsF) ? inline_math(id) : false;
|
||||
case vmIntrinsics::_iabs: return Matcher::match_rule_supported(Op_AbsI) ? inline_math(id) : false;
|
||||
case vmIntrinsics::_labs: return Matcher::match_rule_supported(Op_AbsL) ? inline_math(id) : false;
|
||||
|
||||
case vmIntrinsics::_dexp:
|
||||
return StubRoutines::dexp() != NULL ?
|
||||
|
@ -350,6 +350,17 @@ public:
|
||||
virtual uint ideal_reg() const { return Op_RegI; }
|
||||
};
|
||||
|
||||
//------------------------------AbsLNode---------------------------------------
|
||||
// Absolute value a long. Since a naive graph involves control flow, we
|
||||
// "match" it in the ideal world (so the control flow can be removed).
|
||||
class AbsLNode : public AbsNode {
|
||||
public:
|
||||
AbsLNode( Node *in1 ) : AbsNode(in1) {}
|
||||
virtual int Opcode() const;
|
||||
const Type *bottom_type() const { return TypeLong::LONG; }
|
||||
virtual uint ideal_reg() const { return Op_RegL; }
|
||||
};
|
||||
|
||||
//------------------------------AbsFNode---------------------------------------
|
||||
// Absolute value a float, a common float-point idiom with a cheap hardware
|
||||
// implemention on most chips. Since a naive graph involves control flow, we
|
||||
|
@ -2453,6 +2453,7 @@ void SuperWord::output() {
|
||||
}
|
||||
} else if (opc == Op_SqrtF || opc == Op_SqrtD ||
|
||||
opc == Op_AbsF || opc == Op_AbsD ||
|
||||
opc == Op_AbsI || opc == Op_AbsL ||
|
||||
opc == Op_NegF || opc == Op_NegD ||
|
||||
opc == Op_PopCountI) {
|
||||
assert(n->req() == 2, "only one input expected");
|
||||
|
@ -70,8 +70,8 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
return Op_SubVD;
|
||||
case Op_MulI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_BYTE: return 0; // Unimplemented
|
||||
case T_BOOLEAN:return 0;
|
||||
case T_BYTE: return Op_MulVB;
|
||||
case T_CHAR:
|
||||
case T_SHORT: return Op_MulVS;
|
||||
case T_INT: return Op_MulVI;
|
||||
@ -104,6 +104,18 @@ int VectorNode::opcode(int sopc, BasicType bt) {
|
||||
case Op_DivD:
|
||||
assert(bt == T_DOUBLE, "must be");
|
||||
return Op_DivVD;
|
||||
case Op_AbsI:
|
||||
switch (bt) {
|
||||
case T_BOOLEAN:
|
||||
case T_CHAR: return 0; // abs does not make sense for unsigned
|
||||
case T_BYTE: return Op_AbsVB;
|
||||
case T_SHORT: return Op_AbsVS;
|
||||
case T_INT: return Op_AbsVI;
|
||||
default: ShouldNotReachHere(); return 0;
|
||||
}
|
||||
case Op_AbsL:
|
||||
assert(bt == T_LONG, "must be");
|
||||
return Op_AbsVL;
|
||||
case Op_AbsF:
|
||||
assert(bt == T_FLOAT, "must be");
|
||||
return Op_AbsVF;
|
||||
@ -350,6 +362,7 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
|
||||
case Op_SubVF: return new SubVFNode(n1, n2, vt);
|
||||
case Op_SubVD: return new SubVDNode(n1, n2, vt);
|
||||
|
||||
case Op_MulVB: return new MulVBNode(n1, n2, vt);
|
||||
case Op_MulVS: return new MulVSNode(n1, n2, vt);
|
||||
case Op_MulVI: return new MulVINode(n1, n2, vt);
|
||||
case Op_MulVL: return new MulVLNode(n1, n2, vt);
|
||||
@ -359,6 +372,10 @@ VectorNode* VectorNode::make(int opc, Node* n1, Node* n2, uint vlen, BasicType b
|
||||
case Op_DivVF: return new DivVFNode(n1, n2, vt);
|
||||
case Op_DivVD: return new DivVDNode(n1, n2, vt);
|
||||
|
||||
case Op_AbsVB: return new AbsVBNode(n1, vt);
|
||||
case Op_AbsVS: return new AbsVSNode(n1, vt);
|
||||
case Op_AbsVI: return new AbsVINode(n1, vt);
|
||||
case Op_AbsVL: return new AbsVLNode(n1, vt);
|
||||
case Op_AbsVF: return new AbsVFNode(n1, vt);
|
||||
case Op_AbsVD: return new AbsVDNode(n1, vt);
|
||||
|
||||
|
@ -224,6 +224,14 @@ class SubVDNode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MulVBNode--------------------------------------
|
||||
// Vector multiply byte
|
||||
class MulVBNode : public VectorNode {
|
||||
public:
|
||||
MulVBNode(Node* in1, Node* in2, const TypeVect* vt) : VectorNode(in1, in2, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------MulVSNode--------------------------------------
|
||||
// Vector multiply short
|
||||
class MulVSNode : public VectorNode {
|
||||
@ -360,6 +368,38 @@ class DivVDNode : public VectorNode {
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AbsVBNode--------------------------------------
|
||||
// Vector Abs byte
|
||||
class AbsVBNode : public VectorNode {
|
||||
public:
|
||||
AbsVBNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AbsVSNode--------------------------------------
|
||||
// Vector Abs short
|
||||
class AbsVSNode : public VectorNode {
|
||||
public:
|
||||
AbsVSNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AbsVINode--------------------------------------
|
||||
// Vector Abs int
|
||||
class AbsVINode : public VectorNode {
|
||||
public:
|
||||
AbsVINode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AbsVLNode--------------------------------------
|
||||
// Vector Abs long
|
||||
class AbsVLNode : public VectorNode {
|
||||
public:
|
||||
AbsVLNode(Node* in, const TypeVect* vt) : VectorNode(in, vt) {}
|
||||
virtual int Opcode() const;
|
||||
};
|
||||
|
||||
//------------------------------AbsVFNode--------------------------------------
|
||||
// Vector Abs float
|
||||
class AbsVFNode : public VectorNode {
|
||||
|
@ -1758,6 +1758,10 @@ typedef PaddedEnd<ObjectMonitor> PaddedObjectMonitor;
|
||||
declare_c2_type(ReverseBytesLNode, Node) \
|
||||
declare_c2_type(ReductionNode, Node) \
|
||||
declare_c2_type(VectorNode, Node) \
|
||||
declare_c2_type(AbsVBNode, VectorNode) \
|
||||
declare_c2_type(AbsVSNode, VectorNode) \
|
||||
declare_c2_type(AbsVINode, VectorNode) \
|
||||
declare_c2_type(AbsVLNode, VectorNode) \
|
||||
declare_c2_type(AddVBNode, VectorNode) \
|
||||
declare_c2_type(AddVSNode, VectorNode) \
|
||||
declare_c2_type(AddVINode, VectorNode) \
|
||||
@ -1774,6 +1778,7 @@ typedef PaddedEnd<ObjectMonitor> PaddedObjectMonitor;
|
||||
declare_c2_type(SubVLNode, VectorNode) \
|
||||
declare_c2_type(SubVFNode, VectorNode) \
|
||||
declare_c2_type(SubVDNode, VectorNode) \
|
||||
declare_c2_type(MulVBNode, VectorNode) \
|
||||
declare_c2_type(MulVSNode, VectorNode) \
|
||||
declare_c2_type(MulVLNode, VectorNode) \
|
||||
declare_c2_type(MulReductionVLNode, ReductionNode) \
|
||||
@ -1782,6 +1787,8 @@ typedef PaddedEnd<ObjectMonitor> PaddedObjectMonitor;
|
||||
declare_c2_type(MulVFNode, VectorNode) \
|
||||
declare_c2_type(MulReductionVFNode, ReductionNode) \
|
||||
declare_c2_type(MulVDNode, VectorNode) \
|
||||
declare_c2_type(NegVFNode, VectorNode) \
|
||||
declare_c2_type(NegVDNode, VectorNode) \
|
||||
declare_c2_type(FmaVDNode, VectorNode) \
|
||||
declare_c2_type(FmaVFNode, VectorNode) \
|
||||
declare_c2_type(CMoveVFNode, VectorNode) \
|
||||
|
@ -1353,6 +1353,7 @@ public final class Math {
|
||||
* @param a the argument whose absolute value is to be determined
|
||||
* @return the absolute value of the argument.
|
||||
*/
|
||||
@HotSpotIntrinsicCandidate
|
||||
public static int abs(int a) {
|
||||
return (a < 0) ? -a : a;
|
||||
}
|
||||
@ -1370,6 +1371,7 @@ public final class Math {
|
||||
* @param a the argument whose absolute value is to be determined
|
||||
* @return the absolute value of the argument.
|
||||
*/
|
||||
@HotSpotIntrinsicCandidate
|
||||
public static long abs(long a) {
|
||||
return (a < 0) ? -a : a;
|
||||
}
|
||||
@ -1394,6 +1396,7 @@ public final class Math {
|
||||
* @param a the argument whose absolute value is to be determined
|
||||
* @return the absolute value of the argument.
|
||||
*/
|
||||
@HotSpotIntrinsicCandidate
|
||||
public static float abs(float a) {
|
||||
return (a <= 0.0F) ? 0.0F - a : a;
|
||||
}
|
||||
|
@ -398,6 +398,9 @@ public class CheckGraalIntrinsics extends GraalTest {
|
||||
|
||||
if (isJDK13OrHigher()) {
|
||||
add(toBeInvestigated,
|
||||
"java/lang/Math.abs(F)F",
|
||||
"java/lang/Math.abs(I)I",
|
||||
"java/lang/Math.abs(J)J",
|
||||
"java/lang/Math.max(DD)D",
|
||||
"java/lang/Math.max(FF)F",
|
||||
"java/lang/Math.min(DD)D",
|
||||
|
@ -86,6 +86,7 @@ public class TestDoubleVect {
|
||||
test_divc_n(a0, a1);
|
||||
test_divv(a0, a1, -VALUE);
|
||||
test_diva(a0, a1, a3);
|
||||
test_negc(a0, a1);
|
||||
}
|
||||
// Test and verify results
|
||||
System.out.println("Verification");
|
||||
@ -339,6 +340,16 @@ public class TestDoubleVect {
|
||||
for (int i=12; i<ARRLEN; i++) {
|
||||
errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
|
||||
}
|
||||
test_negc(a0, a1);
|
||||
errn += verify("test_negc: ", 0, a0[0], (Double.NaN));
|
||||
errn += verify("test_negc: ", 1, a0[1], (Double.NEGATIVE_INFINITY));
|
||||
errn += verify("test_negc: ", 2, a0[2], (Double.POSITIVE_INFINITY));
|
||||
errn += verify("test_negc: ", 3, a0[3], (double)(-Double.MAX_VALUE));
|
||||
errn += verify("test_negc: ", 4, a0[4], (double)(-Double.MIN_VALUE));
|
||||
errn += verify("test_negc: ", 5, a0[5], (double)(-Double.MIN_NORMAL));
|
||||
for (int i=6; i<ARRLEN; i++) {
|
||||
errn += verify("test_negc: ", i, a0[i], (double)(-((double)(ADD_INIT+i))));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -469,6 +480,13 @@ public class TestDoubleVect {
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_diva_n: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<ITERS; i++) {
|
||||
test_negc(a0, a1);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_negc_n: " + (end - start));
|
||||
|
||||
return errn;
|
||||
}
|
||||
|
||||
@ -553,6 +571,11 @@ public class TestDoubleVect {
|
||||
a0[i] = (a1[i]/a2[i]);
|
||||
}
|
||||
}
|
||||
static void test_negc(double[] a0, double[] a1) {
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] = (double)(-((double)a1[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static int verify(String text, int i, double elem, double val) {
|
||||
if (elem != val && !(Double.isNaN(elem) && Double.isNaN(val))) {
|
||||
|
@ -86,6 +86,7 @@ public class TestFloatVect {
|
||||
test_divc_n(a0, a1);
|
||||
test_divv(a0, a1, -VALUE);
|
||||
test_diva(a0, a1, a3);
|
||||
test_negc(a0, a1);
|
||||
}
|
||||
// Test and verify results
|
||||
System.out.println("Verification");
|
||||
@ -340,6 +341,17 @@ public class TestFloatVect {
|
||||
errn += verify("test_diva_n: ", i, a0[i], ((ADD_INIT+i)/(-VALUE)));
|
||||
}
|
||||
|
||||
test_negc(a0, a1);
|
||||
errn += verify("test_negc: ", 0, a0[0], (Float.NaN));
|
||||
errn += verify("test_negc: ", 1, a0[1], (Float.NEGATIVE_INFINITY));
|
||||
errn += verify("test_negc: ", 2, a0[2], (Float.POSITIVE_INFINITY));
|
||||
errn += verify("test_negc: ", 3, a0[3], (float)(-Float.MAX_VALUE));
|
||||
errn += verify("test_negc: ", 4, a0[4], (float)(-Float.MIN_VALUE));
|
||||
errn += verify("test_negc: ", 5, a0[5], (float)(-Float.MIN_NORMAL));
|
||||
for (int i=6; i<ARRLEN; i++) {
|
||||
errn += verify("test_negc: ", i, a0[i], (float)(-((float)(ADD_INIT+i))));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (errn > 0)
|
||||
@ -469,6 +481,13 @@ public class TestFloatVect {
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_diva_n: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<ITERS; i++) {
|
||||
test_negc(a0, a1);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_negc_n: " + (end - start));
|
||||
|
||||
return errn;
|
||||
}
|
||||
|
||||
@ -554,6 +573,12 @@ public class TestFloatVect {
|
||||
}
|
||||
}
|
||||
|
||||
static void test_negc(float[] a0, float[] a1) {
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] = (float)(-((float)a1[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static int verify(String text, int i, float elem, float val) {
|
||||
if (elem != val && !(Float.isNaN(elem) && Float.isNaN(val))) {
|
||||
System.err.println(text + "[" + i + "] = " + elem + " != " + val);
|
||||
|
@ -102,6 +102,10 @@ public class TestIntVect {
|
||||
test_xorv(a0, a1, (int)BIT_MASK);
|
||||
test_xora(a0, a1, a4);
|
||||
|
||||
test_absc(a0, a1);
|
||||
test_negc(a0, a1);
|
||||
test_notc(a0, a1);
|
||||
|
||||
test_sllc(a0, a1);
|
||||
test_sllv(a0, a1, VALUE);
|
||||
test_srlc(a0, a1);
|
||||
@ -276,6 +280,21 @@ public class TestIntVect {
|
||||
errn += verify("test_xora: ", i, a0[i], (int)((int)(ADD_INIT+i)^BIT_MASK));
|
||||
}
|
||||
|
||||
test_absc(a0, a1);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_absc: ", i, a0[i], (int)(Math.abs((int)(ADD_INIT+i))));
|
||||
}
|
||||
|
||||
test_negc(a0, a1);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_negc: ", i, a0[i], (int)(-(int)(ADD_INIT+i)));
|
||||
}
|
||||
|
||||
test_notc(a0, a1);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_notc: ", i, a0[i], (int)(~(int)(ADD_INIT+i)));
|
||||
}
|
||||
|
||||
test_sllc(a0, a1);
|
||||
for (int i=0; i<ARRLEN; i++) {
|
||||
errn += verify("test_sllc: ", i, a0[i], (int)((int)(ADD_INIT+i)<<VALUE));
|
||||
@ -648,6 +667,27 @@ public class TestIntVect {
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_xora: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<ITERS; i++) {
|
||||
test_absc(a0, a1);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_absc: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<ITERS; i++) {
|
||||
test_negc(a0, a1);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_negc: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<ITERS; i++) {
|
||||
test_notc(a0, a1);
|
||||
}
|
||||
end = System.currentTimeMillis();
|
||||
System.out.println("test_notc: " + (end - start));
|
||||
|
||||
start = System.currentTimeMillis();
|
||||
for (int i=0; i<ITERS; i++) {
|
||||
test_sllc(a0, a1);
|
||||
@ -1040,6 +1080,24 @@ public class TestIntVect {
|
||||
}
|
||||
}
|
||||
|
||||
static void test_absc(int[] a0, int[] a1) {
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] = (int)(Math.abs(a1[i]));
|
||||
}
|
||||
}
|
||||
|
||||
static void test_negc(int[] a0, int[] a1) {
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] = (int)(-a1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_notc(int[] a0, int[] a1) {
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] = (int)(~a1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_sllc(int[] a0, int[] a1) {
|
||||
for (int i = 0; i < a0.length; i+=1) {
|
||||
a0[i] = (int)(a1[i]<<VALUE);
|
||||
|
Loading…
Reference in New Issue
Block a user