This commit is contained in:
Andrew Haley 2015-12-15 13:42:13 +00:00
commit ac6fa27965
40 changed files with 848 additions and 148 deletions

View File

@ -46,6 +46,7 @@ BUILD_HOTSPOT_JTREG_NATIVE_SRC := \
$(HOTSPOT_TOPDIR)/test/runtime/jni/8033445 \
$(HOTSPOT_TOPDIR)/test/runtime/jni/ToStringInInterfaceTest \
$(HOTSPOT_TOPDIR)/test/runtime/SameObject \
$(HOTSPOT_TOPDIR)/test/compiler/floatingpoint/ \
#
# Add conditional directories here when needed.

View File

@ -28,6 +28,10 @@
const int StackAlignmentInBytes = 16;
// Indicates whether the C calling conventions require that
// 32-bit integer argument values are extended to 64 bits.
const bool CCallingConventionRequiresIntsAsLongs = false;
#define SUPPORTS_NATIVE_CX8
// The maximum B/BL offset range on AArch64 is 128MB.

View File

@ -182,6 +182,11 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
}
if (UseVectorizedMismatchIntrinsic) {
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
if (auxv & HWCAP_AES) {
UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
UseAESIntrinsics =

View File

@ -31,6 +31,10 @@ const int BytesPerInstWord = 4;
const int StackAlignmentInBytes = 16;
// Indicates whether the C calling conventions require that
// 32-bit integer argument values are extended to 64 bits.
const bool CCallingConventionRequiresIntsAsLongs = true;
#define SUPPORTS_NATIVE_CX8
// The PPC CPUs are NOT multiple-copy-atomic.

View File

@ -3486,6 +3486,7 @@ encode %{
call->_jvmadj = _jvmadj;
call->_in_rms = _in_rms;
call->_nesting = _nesting;
call->_override_symbolic_info = _override_symbolic_info;
// New call needs all inputs of old call.
// Req...

View File

@ -223,6 +223,11 @@ void VM_Version::initialize() {
UseMultiplyToLenIntrinsic = true;
}
if (UseVectorizedMismatchIntrinsic) {
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
// Adjust RTM (Restricted Transactional Memory) flags.
if (!has_tcheck() && UseRTMLocking) {
// Can't continue because UseRTMLocking affects UseBiasedLocking flag

View File

@ -30,6 +30,10 @@ const int BytesPerInstWord = 4;
const int StackAlignmentInBytes = (2*wordSize);
// Indicates whether the C calling conventions require that
// 32-bit integer argument values are extended to 64 bits.
const bool CCallingConventionRequiresIntsAsLongs = false;
#define SUPPORTS_NATIVE_CX8
// The expected size in bytes of a cache line, used to pad data structures.

View File

@ -356,6 +356,11 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
if (UseVectorizedMismatchIntrinsic) {
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
(cache_line_size > ContendedPaddingWidth))
ContendedPaddingWidth = cache_line_size;

View File

@ -2152,33 +2152,64 @@ void Assembler::movddup(XMMRegister dst, XMMRegister src) {
emit_int8(0xC0 | encode);
}
void Assembler::kmovwl(KRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
void Assembler::kmovbl(KRegister dst, Register src) {
assert(VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x92);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovbl(Register dst, KRegister src) {
assert(VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x93);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovwl(KRegister dst, Register src) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x92);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovwl(Register dst, KRegister src) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x93);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovdl(KRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x92);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovdl(Register dst, KRegister src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x93);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovql(KRegister dst, KRegister src) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x90);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovql(KRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
assert(VM_Version::supports_avx512bw(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
@ -2187,7 +2218,7 @@ void Assembler::kmovql(KRegister dst, Address src) {
}
void Assembler::kmovql(Address dst, KRegister src) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
assert(VM_Version::supports_avx512bw(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
@ -2196,46 +2227,53 @@ void Assembler::kmovql(Address dst, KRegister src) {
}
void Assembler::kmovql(KRegister dst, Register src) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
VexSimdPrefix pre = !_legacy_mode_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
InstructionAttr attributes(AVX_128bit, /* rex_w */ !_legacy_mode_bw, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, VEX_OPCODE_0F, &attributes);
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x92);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::kmovql(Register dst, KRegister src) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x93);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestbl(KRegister src1, KRegister src2) {
NOT_LP64(assert(VM_Version::supports_avx512dq(), ""));
assert(VM_Version::supports_avx512dq(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestwl(KRegister src1, KRegister src2) {
NOT_LP64(assert(VM_Version::supports_evex(), ""));
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestdl(KRegister src1, KRegister src2) {
NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
// This instruction produces ZF or CF flags
void Assembler::kortestql(KRegister src1, KRegister src2) {
NOT_LP64(assert(VM_Version::supports_avx512bw(), ""));
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = kreg_prefix_and_encode(src1, knoreg, src2, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0x98);
emit_int8((unsigned char)(0xC0 | encode));
}
@ -2375,7 +2413,7 @@ void Assembler::vmovdqu(Address dst, XMMRegister src) {
// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
void Assembler::evmovdqub(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_int8((unsigned char)(0xC0 | encode));
@ -2395,7 +2433,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@ -2404,7 +2442,7 @@ void Assembler::evmovdqub(Address dst, XMMRegister src, int vector_len) {
void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x6F);
emit_int8((unsigned char)(0xC0 | encode));
@ -2424,7 +2462,7 @@ void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
assert(VM_Version::supports_evex(), "");
assert(src != xnoreg, "sanity");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x7F);
@ -3069,7 +3107,7 @@ void Assembler::packuswb(XMMRegister dst, Address src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x67);
@ -3078,7 +3116,7 @@ void Assembler::packuswb(XMMRegister dst, Address src) {
void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x67);
emit_int8((unsigned char)(0xC0 | encode));
@ -3086,7 +3124,7 @@ void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "some form of AVX must be enabled");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x67);
@ -3128,7 +3166,7 @@ void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert(VM_Version::supports_sse2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x74);
@ -3148,16 +3186,28 @@ void Assembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int
// In this context, kdst is written the mask used to process the equal components
void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x74);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int dst_enc = kdst->encoding();
vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x74);
emit_operand(as_Register(dst_enc), src);
}
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert(VM_Version::supports_sse2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x75);
@ -3177,16 +3227,28 @@ void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int
// In this context, kdst is written the mask used to process the equal components
void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(kdst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x75);
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_avx512bw(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int dst_enc = kdst->encoding();
vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x75);
emit_operand(as_Register(dst_enc), src);
}
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert(VM_Version::supports_sse2(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x76);
@ -3213,9 +3275,21 @@ void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int
emit_int8((unsigned char)(0xC0 | encode));
}
void Assembler::evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
assert(VM_Version::supports_evex(), "");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_32bit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int dst_enc = kdst->encoding();
vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x76);
emit_operand(as_Register(dst_enc), src);
}
// In this context, the dst vector contains the components that are equal, non equal components are zeroed in dst
void Assembler::pcmpeqq(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse4_1(), ""));
assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x29);
@ -3328,7 +3402,7 @@ void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
void Assembler::pmovzxbw(XMMRegister dst, Address src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
@ -3337,7 +3411,7 @@ void Assembler::pmovzxbw(XMMRegister dst, Address src) {
void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_sse4_1(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
emit_int8((unsigned char)(0xC0 | encode));
@ -3347,7 +3421,7 @@ void Assembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
assert(VM_Version::supports_avx(), "");
InstructionMark im(this);
assert(dst != xnoreg, "sanity");
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x30);
@ -3452,7 +3526,7 @@ void Assembler::prefix(Prefix p) {
void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_ssse3(), "");
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x00);
emit_int8((unsigned char)(0xC0 | encode));
@ -3461,7 +3535,7 @@ void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
void Assembler::pshufb(XMMRegister dst, Address src) {
assert(VM_Version::supports_ssse3(), "");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x00);
@ -3495,7 +3569,7 @@ void Assembler::pshufd(XMMRegister dst, Address src, int mode) {
void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
assert(isByte(mode), "invalid value");
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x70);
emit_int8((unsigned char)(0xC0 | encode));
@ -3507,7 +3581,7 @@ void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
InstructionMark im(this);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
emit_int8(0x70);
@ -4723,7 +4797,7 @@ void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
void Assembler::paddb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFC);
emit_int8((unsigned char)(0xC0 | encode));
@ -4731,7 +4805,7 @@ void Assembler::paddb(XMMRegister dst, XMMRegister src) {
void Assembler::paddw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFD);
emit_int8((unsigned char)(0xC0 | encode));
@ -4771,7 +4845,7 @@ void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFC);
@ -4780,7 +4854,7 @@ void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xFD);
@ -4808,7 +4882,7 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@ -4819,7 +4893,7 @@ void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@ -4851,7 +4925,7 @@ void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::psubb(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF8);
emit_int8((unsigned char)(0xC0 | encode));
@ -4859,7 +4933,7 @@ void Assembler::psubb(XMMRegister dst, XMMRegister src) {
void Assembler::psubw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF9);
emit_int8((unsigned char)(0xC0 | encode));
@ -4882,7 +4956,7 @@ void Assembler::psubq(XMMRegister dst, XMMRegister src) {
void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF8);
@ -4891,7 +4965,7 @@ void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF9);
@ -4919,7 +4993,7 @@ void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@ -4930,7 +5004,7 @@ void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@ -4962,7 +5036,7 @@ void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector
void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD5);
emit_int8((unsigned char)(0xC0 | encode));
@ -4978,7 +5052,7 @@ void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD5);
@ -5006,7 +5080,7 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int v
void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionMark im(this);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@ -5039,7 +5113,7 @@ void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vecto
// Shift packed integers left by specified number of bits.
void Assembler::psllw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM6 is for /6 encoding: 66 0F 71 /6 ib
int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@ -5069,7 +5143,7 @@ void Assembler::psllq(XMMRegister dst, int shift) {
void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF1);
emit_int8((unsigned char)(0xC0 | encode));
@ -5093,7 +5167,7 @@ void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM6 is for /6 encoding: 66 0F 71 /6 ib
int encode = vex_prefix_and_encode(xmm6->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@ -5124,7 +5198,7 @@ void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_l
void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xF1);
emit_int8((unsigned char)(0xC0 | encode));
@ -5149,7 +5223,7 @@ void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
// Shift packed integers logically right by specified number of bits.
void Assembler::psrlw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM2 is for /2 encoding: 66 0F 71 /2 ib
int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@ -5181,7 +5255,7 @@ void Assembler::psrlq(XMMRegister dst, int shift) {
void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD1);
emit_int8((unsigned char)(0xC0 | encode));
@ -5205,7 +5279,7 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM2 is for /2 encoding: 66 0F 71 /2 ib
int encode = vex_prefix_and_encode(xmm2->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@ -5235,7 +5309,7 @@ void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_l
void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xD1);
emit_int8((unsigned char)(0xC0 | encode));
@ -5260,7 +5334,7 @@ void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int
// Shift packed integers arithmetically right by specified number of bits.
void Assembler::psraw(XMMRegister dst, int shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM4 is for /4 encoding: 66 0F 71 /4 ib
int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@ -5280,7 +5354,7 @@ void Assembler::psrad(XMMRegister dst, int shift) {
void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = simd_prefix_and_encode(dst, dst, shift, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE1);
emit_int8((unsigned char)(0xC0 | encode));
@ -5296,7 +5370,7 @@ void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
// XMM4 is for /4 encoding: 66 0F 71 /4 ib
int encode = vex_prefix_and_encode(xmm4->encoding(), dst->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8(0x71);
@ -5316,7 +5390,7 @@ void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_l
void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
assert(UseAVX > 0, "requires some form of AVX");
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
emit_int8((unsigned char)0xE1);
emit_int8((unsigned char)(0xC0 | encode));
@ -5706,7 +5780,7 @@ void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
// duplicate 2-bytes integer data from src into 16 locations in dest
void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
assert(VM_Version::supports_avx2(), "");
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
emit_int8(0x79);
emit_int8((unsigned char)(0xC0 | encode));
@ -6573,18 +6647,6 @@ int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegis
}
}
int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes) {
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
}
int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes) {
int nds_enc = nds->is_valid() ? nds->encoding() : 0;
return vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), pre, opc, attributes);
}
void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
assert(VM_Version::supports_avx(), "");
assert(!VM_Version::supports_evex(), "");

View File

@ -655,12 +655,6 @@ private:
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
VexOpcode opc, InstructionAttr *attributes);
// Helper functions for groups of instructions
void emit_arith_b(int op1, int op2, Register dst, int imm8);
@ -1331,12 +1325,17 @@ private:
void movddup(XMMRegister dst, XMMRegister src);
void kmovbl(KRegister dst, Register src);
void kmovbl(Register dst, KRegister src);
void kmovwl(KRegister dst, Register src);
void kmovwl(Register dst, KRegister src);
void kmovdl(KRegister dst, Register src);
void kmovdl(Register dst, KRegister src);
void kmovql(KRegister dst, KRegister src);
void kmovql(KRegister dst, Register src);
void kmovql(Address dst, KRegister src);
void kmovql(KRegister dst, Address src);
void kmovql(KRegister dst, Register src);
void kmovql(Register dst, KRegister src);
void kortestbl(KRegister dst, KRegister src);
void kortestwl(KRegister dst, KRegister src);
@ -1521,14 +1520,17 @@ private:
void pcmpeqb(XMMRegister dst, XMMRegister src);
void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpeqw(XMMRegister dst, XMMRegister src);
void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpeqd(XMMRegister dst, XMMRegister src);
void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqd(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
void evpcmpeqd(KRegister kdst, XMMRegister nds, Address src, int vector_len);
void pcmpeqq(XMMRegister dst, XMMRegister src);
void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);

View File

@ -27,6 +27,10 @@
const int StackAlignmentInBytes = 16;
// Indicates whether the C calling conventions require that
// 32-bit integer argument values are extended to 64 bits.
const bool CCallingConventionRequiresIntsAsLongs = false;
#define SUPPORTS_NATIVE_CX8
// The expected size in bytes of a cache line, used to pad data structures.

View File

@ -7999,9 +7999,15 @@ void MacroAssembler::string_compare(Register str1, Register str2,
XMMRegister vec1, int ae) {
ShortBranchVerifier sbv(this);
Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
Label COMPARE_WIDE_VECTORS_LOOP_FAILED; // used only _LP64 && AVX3
int stride, stride2, adr_stride, adr_stride1, adr_stride2;
int stride2x2 = 0x40;
Address::ScaleFactor scale, scale1, scale2;
if (ae != StrIntrinsicNode::LL) {
stride2x2 = 0x20;
}
if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
shrl(cnt2, 1);
}
@ -8011,15 +8017,15 @@ void MacroAssembler::string_compare(Register str1, Register str2,
movl(result, cnt1);
subl(cnt1, cnt2);
push(cnt1);
cmov32(Assembler::lessEqual, cnt2, result);
cmov32(Assembler::lessEqual, cnt2, result); // cnt2 = min(cnt1, cnt2)
// Is the minimum length zero?
testl(cnt2, cnt2);
jcc(Assembler::zero, LENGTH_DIFF_LABEL);
if (ae == StrIntrinsicNode::LL) {
// Load first bytes
load_unsigned_byte(result, Address(str1, 0));
load_unsigned_byte(cnt1, Address(str2, 0));
load_unsigned_byte(result, Address(str1, 0)); // result = str1[0]
load_unsigned_byte(cnt1, Address(str2, 0)); // cnt1 = str2[0]
} else if (ae == StrIntrinsicNode::UU) {
// Load first characters
load_unsigned_short(result, Address(str1, 0));
@ -8060,7 +8066,10 @@ void MacroAssembler::string_compare(Register str1, Register str2,
assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
Label COMPARE_WIDE_VECTORS_LOOP_AVX2;
Label COMPARE_TAIL_LONG;
Label COMPARE_WIDE_VECTORS_LOOP_AVX3; // used only _LP64 && AVX3
int pcmpmask = 0x19;
if (ae == StrIntrinsicNode::LL) {
pcmpmask &= ~0x01;
@ -8123,11 +8132,40 @@ void MacroAssembler::string_compare(Register str1, Register str2,
}
subl(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::zero, COMPARE_WIDE_TAIL);
jcc(Assembler::zero, COMPARE_WIDE_TAIL);
negptr(result);
// In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
bind(COMPARE_WIDE_VECTORS_LOOP);
#ifdef _LP64
if (VM_Version::supports_avx512vlbw()) { // trying 64 bytes fast loop
cmpl(cnt2, stride2x2);
jccb(Assembler::below, COMPARE_WIDE_VECTORS_LOOP_AVX2);
testl(cnt2, stride2x2-1); // cnt2 holds the vector count
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX2); // means we cannot subtract by 0x40
bind(COMPARE_WIDE_VECTORS_LOOP_AVX3); // the hottest loop
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
evmovdquq(vec1, Address(str1, result, scale), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
} else {
vpmovzxbw(vec1, Address(str1, result, scale1), Assembler::AVX_512bit);
evpcmpeqb(k7, vec1, Address(str2, result, scale2), Assembler::AVX_512bit); // k7 == 11..11, if operands equal, otherwise k7 has some 0
}
kortestql(k7, k7);
jcc(Assembler::aboveEqual, COMPARE_WIDE_VECTORS_LOOP_FAILED); // miscompare
addptr(result, stride2x2); // update since we already compared at this addr
subl(cnt2, stride2x2); // and sub the size too
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP_AVX3);
vpxor(vec1, vec1);
jmpb(COMPARE_WIDE_TAIL);
}//if (VM_Version::supports_avx512vlbw())
#endif // _LP64
bind(COMPARE_WIDE_VECTORS_LOOP_AVX2);
if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
vmovdqu(vec1, Address(str1, result, scale));
vpxor(vec1, Address(str2, result, scale));
@ -8136,7 +8174,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
vpxor(vec1, Address(str2, result, scale2));
}
vptest(vec1, vec1);
jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
jcc(Assembler::notZero, VECTOR_NOT_EQUAL);
addptr(result, stride2);
subl(cnt2, stride2);
jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
@ -8151,7 +8189,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
movl(result, stride2);
movl(cnt2, result);
negptr(result);
jmpb(COMPARE_WIDE_VECTORS_LOOP);
jmp(COMPARE_WIDE_VECTORS_LOOP_AVX2);
// Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
bind(VECTOR_NOT_EQUAL);
@ -8295,6 +8333,34 @@ void MacroAssembler::string_compare(Register str1, Register str2,
}
jmpb(DONE_LABEL);
#ifdef _LP64
if (VM_Version::supports_avx512vlbw()) {
bind(COMPARE_WIDE_VECTORS_LOOP_FAILED);
kmovql(cnt1, k7);
notq(cnt1);
bsfq(cnt2, cnt1);
if (ae != StrIntrinsicNode::LL) {
// Divide diff by 2 to get number of chars
sarl(cnt2, 1);
}
addq(result, cnt2);
if (ae == StrIntrinsicNode::LL) {
load_unsigned_byte(cnt1, Address(str2, result));
load_unsigned_byte(result, Address(str1, result));
} else if (ae == StrIntrinsicNode::UU) {
load_unsigned_short(cnt1, Address(str2, result, scale));
load_unsigned_short(result, Address(str1, result, scale));
} else {
load_unsigned_short(cnt1, Address(str2, result, scale2));
load_unsigned_byte(result, Address(str1, result, scale1));
}
subl(result, cnt1);
jmpb(POP_LABEL);
}//if (VM_Version::supports_avx512vlbw())
#endif // _LP64
// Discard the stored length difference
bind(POP_LABEL);
pop(cnt1);
@ -8304,6 +8370,7 @@ void MacroAssembler::string_compare(Register str1, Register str2,
if(ae == StrIntrinsicNode::UL) {
negl(result);
}
}
// Search for Non-ASCII character (Negative byte value) in a byte array,
@ -9439,13 +9506,184 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
pop(tmp1);
}
void MacroAssembler::vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
Register result, Register tmp1, Register tmp2, XMMRegister rymm0, XMMRegister rymm1, XMMRegister rymm2){
assert(UseSSE42Intrinsics, "SSE4.2 must be enabled.");
Label VECTOR32_LOOP, VECTOR16_LOOP, VECTOR8_LOOP, VECTOR4_LOOP;
Label VECTOR16_TAIL, VECTOR8_TAIL, VECTOR4_TAIL;
Label VECTOR32_NOT_EQUAL, VECTOR16_NOT_EQUAL, VECTOR8_NOT_EQUAL, VECTOR4_NOT_EQUAL;
Label SAME_TILL_END, DONE;
Label BYTES_LOOP, BYTES_TAIL, BYTES_NOT_EQUAL;
//scale is in rcx in both Win64 and Unix
ShortBranchVerifier sbv(this);
shlq(length);
xorq(result, result);
cmpq(length, 8);
jcc(Assembler::equal, VECTOR8_LOOP);
jcc(Assembler::less, VECTOR4_TAIL);
if (UseAVX >= 2){
cmpq(length, 16);
jcc(Assembler::equal, VECTOR16_LOOP);
jcc(Assembler::less, VECTOR8_LOOP);
cmpq(length, 32);
jccb(Assembler::less, VECTOR16_TAIL);
subq(length, 32);
bind(VECTOR32_LOOP);
vmovdqu(rymm0, Address(obja, result));
vmovdqu(rymm1, Address(objb, result));
vpxor(rymm2, rymm0, rymm1, Assembler::AVX_256bit);
vptest(rymm2, rymm2);
jcc(Assembler::notZero, VECTOR32_NOT_EQUAL);//mismatch found
addq(result, 32);
subq(length, 32);
jccb(Assembler::greaterEqual, VECTOR32_LOOP);
addq(length, 32);
jcc(Assembler::equal, SAME_TILL_END);
//falling through if less than 32 bytes left //close the branch here.
bind(VECTOR16_TAIL);
cmpq(length, 16);
jccb(Assembler::less, VECTOR8_TAIL);
bind(VECTOR16_LOOP);
movdqu(rymm0, Address(obja, result));
movdqu(rymm1, Address(objb, result));
vpxor(rymm2, rymm0, rymm1, Assembler::AVX_128bit);
ptest(rymm2, rymm2);
jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
addq(result, 16);
subq(length, 16);
jcc(Assembler::equal, SAME_TILL_END);
//falling through if less than 16 bytes left
} else {//regular intrinsics
cmpq(length, 16);
jccb(Assembler::less, VECTOR8_TAIL);
subq(length, 16);
bind(VECTOR16_LOOP);
movdqu(rymm0, Address(obja, result));
movdqu(rymm1, Address(objb, result));
pxor(rymm0, rymm1);
ptest(rymm0, rymm0);
jcc(Assembler::notZero, VECTOR16_NOT_EQUAL);//mismatch found
addq(result, 16);
subq(length, 16);
jccb(Assembler::greaterEqual, VECTOR16_LOOP);
addq(length, 16);
jcc(Assembler::equal, SAME_TILL_END);
//falling through if less than 16 bytes left
}
bind(VECTOR8_TAIL);
cmpq(length, 8);
jccb(Assembler::less, VECTOR4_TAIL);
bind(VECTOR8_LOOP);
movq(tmp1, Address(obja, result));
movq(tmp2, Address(objb, result));
xorq(tmp1, tmp2);
testq(tmp1, tmp1);
jcc(Assembler::notZero, VECTOR8_NOT_EQUAL);//mismatch found
addq(result, 8);
subq(length, 8);
jcc(Assembler::equal, SAME_TILL_END);
//falling through if less than 8 bytes left
bind(VECTOR4_TAIL);
cmpq(length, 4);
jccb(Assembler::less, BYTES_TAIL);
bind(VECTOR4_LOOP);
movl(tmp1, Address(obja, result));
xorl(tmp1, Address(objb, result));
testl(tmp1, tmp1);
jcc(Assembler::notZero, VECTOR4_NOT_EQUAL);//mismatch found
addq(result, 4);
subq(length, 4);
jcc(Assembler::equal, SAME_TILL_END);
//falling through if less than 4 bytes left
bind(BYTES_TAIL);
bind(BYTES_LOOP);
load_unsigned_byte(tmp1, Address(obja, result));
load_unsigned_byte(tmp2, Address(objb, result));
xorl(tmp1, tmp2);
testl(tmp1, tmp1);
jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
decq(length);
jccb(Assembler::zero, SAME_TILL_END);
incq(result);
load_unsigned_byte(tmp1, Address(obja, result));
load_unsigned_byte(tmp2, Address(objb, result));
xorl(tmp1, tmp2);
testl(tmp1, tmp1);
jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
decq(length);
jccb(Assembler::zero, SAME_TILL_END);
incq(result);
load_unsigned_byte(tmp1, Address(obja, result));
load_unsigned_byte(tmp2, Address(objb, result));
xorl(tmp1, tmp2);
testl(tmp1, tmp1);
jccb(Assembler::notZero, BYTES_NOT_EQUAL);//mismatch found
jmpb(SAME_TILL_END);
if (UseAVX >= 2){
bind(VECTOR32_NOT_EQUAL);
vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_256bit);
vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_256bit);
vpxor(rymm0, rymm0, rymm2, Assembler::AVX_256bit);
vpmovmskb(tmp1, rymm0);
bsfq(tmp1, tmp1);
addq(result, tmp1);
shrq(result);
jmpb(DONE);
}
bind(VECTOR16_NOT_EQUAL);
if (UseAVX >= 2){
vpcmpeqb(rymm2, rymm2, rymm2, Assembler::AVX_128bit);
vpcmpeqb(rymm0, rymm0, rymm1, Assembler::AVX_128bit);
pxor(rymm0, rymm2);
} else {
pcmpeqb(rymm2, rymm2);
pxor(rymm0, rymm1);
pcmpeqb(rymm0, rymm1);
pxor(rymm0, rymm2);
}
pmovmskb(tmp1, rymm0);
bsfq(tmp1, tmp1);
addq(result, tmp1);
shrq(result);
jmpb(DONE);
bind(VECTOR8_NOT_EQUAL);
bind(VECTOR4_NOT_EQUAL);
bsfq(tmp1, tmp1);
shrq(tmp1, 3);
addq(result, tmp1);
bind(BYTES_NOT_EQUAL);
shrq(result);
jmpb(DONE);
bind(SAME_TILL_END);
mov64(result, -1);
bind(DONE);
}
//Helper functions for square_to_len()
/**
* Store the squares of x[], right shifted one bit (divided by 2) into z[]
* Preserves x and z and modifies rest of the registers.
*/
void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
// Perform square and right shift by 1
// Handle odd xlen case first, then for even xlen do the following

View File

@ -1346,7 +1346,6 @@ public:
Register carry2);
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
@ -1365,6 +1364,9 @@ public:
void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
Register raxReg);
void vectorized_mismatch(Register obja, Register objb, Register length, Register log2_array_indxscale,
Register result, Register tmp1, Register tmp2,
XMMRegister vec1, XMMRegister vec2, XMMRegister vec3);
#endif
// CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.

View File

@ -189,7 +189,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
// Save full ZMM registes(16..num_xmm_regs)
base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
off = 0;
int vector_len = Assembler::AVX_512bit;
for (int n = 16; n < num_xmm_regs; n++) {
__ evmovdqul(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n), vector_len);
@ -199,7 +199,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
if (VM_Version::supports_evex()) {
// Save upper bank of ZMM registers(16..31) for double/float usage
int base_addr = XSAVE_AREA_UPPERBANK;
int off = 0;
off = 0;
for (int n = 16; n < num_xmm_regs; n++) {
__ movsd(Address(rsp, base_addr+(off++*64)), as_XMMRegister(n));
}
@ -325,7 +325,7 @@ void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_ve
assert(MaxVectorSize == 64, "only 512bit vectors are supported now");
}
#else
assert(!save_vectors, "vectors are generated only by C2");
assert(!restore_vectors, "vectors are generated only by C2");
#endif
// On EVEX enabled targets everything is handled in pop fpu state

View File

@ -170,7 +170,7 @@ class StubGenerator: public StubCodeGenerator {
// provide initial value for required masks
if (UseAVX > 2) {
__ movl(rbx, 0xffff);
__ kmovdl(k1, rbx);
__ kmovwl(k1, rbx);
}
// save and initialize %mxcsr
@ -798,7 +798,7 @@ class StubGenerator: public StubCodeGenerator {
if (UseAVX > 2) {
__ push(rbx);
__ movl(rbx, 0xffff);
__ kmovdl(k1, rbx);
__ kmovwl(k1, rbx);
__ pop(rbx);
}
// Copy 64-byte chunks

View File

@ -266,7 +266,7 @@ class StubGenerator: public StubCodeGenerator {
__ movptr(r15_save, r15);
if (UseAVX > 2) {
__ movl(rbx, 0xffff);
__ kmovql(k1, rbx);
__ kmovwl(k1, rbx);
}
#ifdef _WIN64
int last_reg = 15;
@ -1350,7 +1350,7 @@ class StubGenerator: public StubCodeGenerator {
Label L_end;
if (UseAVX > 2) {
__ movl(to, 0xffff);
__ kmovql(k1, to);
__ kmovwl(k1, to);
}
// Copy 64-bytes per iteration
__ BIND(L_loop);
@ -1434,7 +1434,7 @@ class StubGenerator: public StubCodeGenerator {
Label L_end;
if (UseAVX > 2) {
__ movl(to, 0xffff);
__ kmovql(k1, to);
__ kmovwl(k1, to);
}
// Copy 64-bytes per iteration
__ BIND(L_loop);
@ -4054,6 +4054,54 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
/**
* Arguments:
*
* Input:
* c_rarg0 - obja address
* c_rarg1 - objb address
* c_rarg3 - length length
* c_rarg4 - scale log2_array_indxscale
*/
address generate_vectorizedMismatch() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");
address start = __ pc();
BLOCK_COMMENT("Entry:");
__ enter();
#ifdef _WIN64 // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
const Register scale = c_rarg0; //rcx, will exchange with r9
const Register objb = c_rarg1; //rdx
const Register length = c_rarg2; //r8
const Register obja = c_rarg3; //r9
__ xchgq(obja, scale); //now obja and scale contains the correct contents
const Register tmp1 = r10;
const Register tmp2 = r11;
#endif
#ifndef _WIN64 // Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
const Register obja = c_rarg0; //U:rdi
const Register objb = c_rarg1; //U:rsi
const Register length = c_rarg2; //U:rdx
const Register scale = c_rarg3; //U:rcx
const Register tmp1 = r8;
const Register tmp2 = r9;
#endif
const Register result = rax; //return value
const XMMRegister vec0 = xmm0;
const XMMRegister vec1 = xmm1;
const XMMRegister vec2 = xmm2;
__ vectorized_mismatch(obja, objb, length, scale, result, tmp1, tmp2, vec0, vec1, vec2);
__ leave();
__ ret(0);
return start;
}
/**
* Arguments:
*
@ -4505,7 +4553,9 @@ class StubGenerator: public StubCodeGenerator {
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
if (UseVectorizedMismatchIntrinsic) {
StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
}
#ifndef _WINDOWS
if (UseMontgomeryMultiplyIntrinsic) {
StubRoutines::_montgomeryMultiply

View File

@ -1039,6 +1039,25 @@ void VM_Version::get_processor_features() {
}
}
#ifdef _LP64
if (UseSSE42Intrinsics) {
if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
UseVectorizedMismatchIntrinsic = true;
}
} else if (UseVectorizedMismatchIntrinsic) {
if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
warning("vectorizedMismatch intrinsics are not available on this CPU");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
#else
if (UseVectorizedMismatchIntrinsic) {
if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
}
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
}
#endif // _LP64
// Use count leading zeros count instruction if available.
if (supports_lzcnt()) {
if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {

View File

@ -28,4 +28,8 @@
#include <ffi.h>
// Indicates whether the C calling conventions require that
// 32-bit integer argument values are extended to 64 bits.
const bool CCallingConventionRequiresIntsAsLongs = false;
#endif // CPU_ZERO_VM_GLOBALDEFINITIONS_ZERO_HPP

View File

@ -3055,13 +3055,16 @@ void LIRGenerator::do_IfOp(IfOp* x) {
__ cmove(lir_cond(x->cond()), t_val.result(), f_val.result(), reg, as_BasicType(x->x()->type()));
}
void LIRGenerator::do_RuntimeCall(address routine, int expected_arguments, Intrinsic* x) {
assert(x->number_of_arguments() == expected_arguments, "wrong type");
LIR_Opr reg = result_register_for(x->type());
__ call_runtime_leaf(routine, getThreadTemp(),
reg, new LIR_OprList());
LIR_Opr result = rlock_result(x);
__ move(reg, result);
void LIRGenerator::do_RuntimeCall(address routine, Intrinsic* x) {
assert(x->number_of_arguments() == 0, "wrong type");
// Enforce computation of _reserved_argument_area_size which is required on some platforms.
BasicTypeList signature;
CallingConvention* cc = frame_map()->c_calling_convention(&signature);
LIR_Opr reg = result_register_for(x->type());
__ call_runtime_leaf(routine, getThreadTemp(),
reg, new LIR_OprList());
LIR_Opr result = rlock_result(x);
__ move(reg, result);
}
#ifdef TRACE_HAVE_INTRINSICS
@ -3115,16 +3118,16 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) {
case vmIntrinsics::_threadID: do_ThreadIDIntrinsic(x); break;
case vmIntrinsics::_classID: do_ClassIDIntrinsic(x); break;
case vmIntrinsics::_counterTime:
do_RuntimeCall(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), 0, x);
do_RuntimeCall(CAST_FROM_FN_PTR(address, TRACE_TIME_METHOD), x);
break;
#endif
case vmIntrinsics::_currentTimeMillis:
do_RuntimeCall(CAST_FROM_FN_PTR(address, os::javaTimeMillis), 0, x);
do_RuntimeCall(CAST_FROM_FN_PTR(address, os::javaTimeMillis), x);
break;
case vmIntrinsics::_nanoTime:
do_RuntimeCall(CAST_FROM_FN_PTR(address, os::javaTimeNanos), 0, x);
do_RuntimeCall(CAST_FROM_FN_PTR(address, os::javaTimeNanos), x);
break;
case vmIntrinsics::_Object_init: do_RegisterFinalizer(x); break;

View File

@ -157,8 +157,8 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
private:
void* operator new(size_t size) throw();
void* operator new[](size_t size) throw();
void operator delete(void* p);
void operator delete[](void* p);
void operator delete(void* p) { ShouldNotReachHere(); }
void operator delete[](void* p) { ShouldNotReachHere(); }
Compilation* _compilation;
ciMethod* _method; // method that we are compiling
@ -439,7 +439,7 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure {
SwitchRangeArray* create_lookup_ranges(LookupSwitch* x);
void do_SwitchRanges(SwitchRangeArray* x, LIR_Opr value, BlockBegin* default_sux);
void do_RuntimeCall(address routine, int expected_arguments, Intrinsic* x);
void do_RuntimeCall(address routine, Intrinsic* x);
#ifdef TRACE_HAVE_INTRINSICS
void do_ThreadIDIntrinsic(Intrinsic* x);
void do_ClassIDIntrinsic(Intrinsic* x);

View File

@ -50,8 +50,8 @@ private:
private:
void* operator new(size_t size) throw();
void* operator new[](size_t size) throw();
void operator delete(void* p);
void operator delete[](void* p);
void operator delete(void* p) { ShouldNotReachHere(); }
void operator delete[](void* p) { ShouldNotReachHere(); }
IR *_ir;
boolArray _used;

View File

@ -681,6 +681,9 @@ bool vmIntrinsics::is_disabled_by_flags(const methodHandle& method) {
case vmIntrinsics::_montgomerySquare:
if (!UseMontgomerySquareIntrinsic) return true;
break;
case vmIntrinsics::_vectorizedMismatch:
if (!UseVectorizedMismatchIntrinsic) return true;
break;
case vmIntrinsics::_addExactI:
case vmIntrinsics::_addExactL:
case vmIntrinsics::_decrementExactI:

View File

@ -957,6 +957,11 @@
do_name( montgomerySquare_name, "implMontgomerySquare") \
do_signature(montgomerySquare_signature, "([I[IIJ[I)[I") \
\
do_class(java_util_ArraysSupport, "java/util/ArraysSupport") \
do_intrinsic(_vectorizedMismatch, java_util_ArraysSupport, vectorizedMismatch_name, vectorizedMismatch_signature, F_S)\
do_name(vectorizedMismatch_name, "vectorizedMismatch") \
do_signature(vectorizedMismatch_signature, "(Ljava/lang/Object;JLjava/lang/Object;JII)I") \
\
/* java/lang/ref/Reference */ \
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
\

View File

@ -346,7 +346,6 @@ void Dependencies::assert_common_2(DepType dept,
}
}
} else {
assert(dep_implicit_context_arg(dept) == 0, "sanity");
if (note_dep_seen(dept, x0) && note_dep_seen(dept, x1)) {
// look in this bucket for redundant assertions
const int stride = 2;

View File

@ -56,6 +56,7 @@
#if INCLUDE_JVMCI
#include "jvmci/jvmciCompiler.hpp"
#include "jvmci/jvmciRuntime.hpp"
#include "jvmci/jvmciJavaClasses.hpp"
#include "runtime/vframe.hpp"
#endif
#ifdef COMPILER2
@ -498,7 +499,7 @@ CompilerCounters::CompilerCounters() {
// CompileBroker::compilation_init
//
// Initialize the Compilation object
void CompileBroker::compilation_init() {
void CompileBroker::compilation_init(TRAPS) {
_last_method_compiled[0] = '\0';
// No need to initialize compilation system if we do not use it.
@ -529,6 +530,17 @@ void CompileBroker::compilation_init() {
} else {
c1_count = JVMCIHostThreads;
}
if (!UseInterpreter) {
// Force initialization of JVMCI compiler otherwise JVMCI
// compilations will not block until JVMCI is initialized
ResourceMark rm;
TempNewSymbol getCompiler = SymbolTable::new_symbol("getCompiler", CHECK);
TempNewSymbol sig = SymbolTable::new_symbol("()Ljdk/vm/ci/runtime/JVMCICompiler;", CHECK);
Handle jvmciRuntime = JVMCIRuntime::get_HotSpotJVMCIRuntime(CHECK);
JavaValue result(T_OBJECT);
JavaCalls::call_virtual(&result, jvmciRuntime, HotSpotJVMCIRuntime::klass(), getCompiler, sig, CHECK);
}
}
}
#endif // INCLUDE_JVMCI

View File

@ -276,7 +276,7 @@ public:
CompileQueue *q = compile_queue(comp_level);
return q != NULL ? q->size() : 0;
}
static void compilation_init();
static void compilation_init(TRAPS);
static void init_compiler_thread_log();
static nmethod* compile_method(const methodHandle& method,
int osr_bci,

View File

@ -441,6 +441,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
case vmIntrinsics::_mulAdd:
case vmIntrinsics::_montgomeryMultiply:
case vmIntrinsics::_montgomerySquare:
case vmIntrinsics::_vectorizedMismatch:
case vmIntrinsics::_ghash_processBlocks:
case vmIntrinsics::_updateCRC32:
case vmIntrinsics::_updateBytesCRC32:

View File

@ -987,7 +987,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
strcmp(call->as_CallLeaf()->_name, "vectorizedMismatch") == 0)
))) {
call->dump();
fatal("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name);

View File

@ -72,16 +72,18 @@ void GraphKit::gen_stub(address C_function,
// Make up the parameters
uint i;
for( i = 0; i < parm_cnt; i++ )
for (i = 0; i < parm_cnt; i++) {
map()->init_req(i, _gvn.transform(new ParmNode(start, i)));
for( ; i<map()->req(); i++ )
}
for ( ; i<map()->req(); i++) {
map()->init_req(i, top()); // For nicer debugging
}
// GraphKit requires memory to be a MergeMemNode:
set_all_memory(map()->memory());
// Get base of thread-local storage area
Node* thread = _gvn.transform( new ThreadLocalNode() );
Node* thread = _gvn.transform(new ThreadLocalNode());
const int NoAlias = Compile::AliasIdxBot;
@ -113,21 +115,27 @@ void GraphKit::gen_stub(address C_function,
//-----------------------------
// Compute signature for C call. Varies from the Java signature!
const Type **fields = TypeTuple::fields(2*parm_cnt+2);
uint cnt = TypeFunc::Parms;
// The C routines gets the base of thread-local storage passed in as an
// extra argument. Not all calls need it, but its cheap to add here.
// extra argument. Not all calls need it, but it is cheap to add here.
for (uint pcnt = cnt; pcnt < parm_cnt; pcnt++, cnt++) {
fields[cnt] = jdomain->field_at(pcnt);
const Type *f = jdomain->field_at(pcnt);
if (CCallingConventionRequiresIntsAsLongs && f->isa_int()) {
fields[cnt++] = TypeLong::LONG;
fields[cnt] = Type::HALF; // Must add an additional half for a long.
} else {
fields[cnt] = f;
}
}
fields[cnt++] = TypeRawPtr::BOTTOM; // Thread-local storage
// Also pass in the caller's PC, if asked for.
if (return_pc) {
fields[cnt++] = TypeRawPtr::BOTTOM; // Return PC
}
const TypeTuple* domain = TypeTuple::make(cnt, fields);
const TypeTuple* domain = TypeTuple::make(cnt,fields);
// The C routine we are about to call cannot return an oop; it can block on
// exit and a GC will trash the oop while it sits in C-land. Instead, we
// return the oop through TLS for runtime calls.
@ -155,37 +163,44 @@ void GraphKit::gen_stub(address C_function,
rfields[TypeFunc::Parms+1] = jrange->field_at(TypeFunc::Parms+1);
}
}
const TypeTuple* range = TypeTuple::make(jrange->cnt(),rfields);
const TypeTuple* range = TypeTuple::make(jrange->cnt(), rfields);
// Final C signature
const TypeFunc *c_sig = TypeFunc::make(domain,range);
const TypeFunc *c_sig = TypeFunc::make(domain, range);
//-----------------------------
// Make the call node
// Make the call node.
CallRuntimeNode *call = new CallRuntimeNode(c_sig, C_function, name, TypePtr::BOTTOM);
//-----------------------------
// Fix-up the debug info for the call
call->set_jvms( new (C) JVMState(0) );
// Fix-up the debug info for the call.
call->set_jvms(new (C) JVMState(0));
call->jvms()->set_bci(0);
call->jvms()->set_offsets(cnt);
// Set fixed predefined input arguments
// Set fixed predefined input arguments.
cnt = 0;
for (i = 0; i < TypeFunc::Parms; i++)
call->init_req(cnt++, map()->in(i));
// A little too aggressive on the parm copy; return address is not an input
call->set_req(TypeFunc::ReturnAdr, top());
for (; i < parm_cnt; i++) { // Regular input arguments
for (i = 0; i < TypeFunc::Parms; i++) {
call->init_req(cnt++, map()->in(i));
}
// A little too aggressive on the parm copy; return address is not an input.
call->set_req(TypeFunc::ReturnAdr, top());
for (; i < parm_cnt; i++) { // Regular input arguments.
const Type *f = jdomain->field_at(i);
if (CCallingConventionRequiresIntsAsLongs && f->isa_int()) {
call->init_req(cnt++, _gvn.transform(new ConvI2LNode(map()->in(i))));
call->init_req(cnt++, top());
} else {
call->init_req(cnt++, map()->in(i));
}
}
call->init_req(cnt++, thread);
if (return_pc) { // Return PC, if asked for.
call->init_req(cnt++, returnadr());
}
call->init_req( cnt++, thread );
if( return_pc ) // Return PC, if asked for
call->init_req( cnt++, returnadr() );
_gvn.transform_no_reclaim(call);
//-----------------------------
// Now set up the return results
set_control( _gvn.transform( new ProjNode(call,TypeFunc::Control)) );

View File

@ -312,6 +312,7 @@ class LibraryCallKit : public GraphKit {
bool inline_mulAdd();
bool inline_montgomeryMultiply();
bool inline_montgomerySquare();
bool inline_vectorizedMismatch();
bool inline_profileBoolean();
bool inline_isCompileConstant();
@ -720,6 +721,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
case vmIntrinsics::_montgomerySquare:
return inline_montgomerySquare();
case vmIntrinsics::_vectorizedMismatch:
return inline_vectorizedMismatch();
case vmIntrinsics::_ghash_processBlocks:
return inline_ghash_processBlocks();
@ -5581,6 +5585,50 @@ bool LibraryCallKit::inline_montgomerySquare() {
return true;
}
//-------------inline_vectorizedMismatch------------------------------
bool LibraryCallKit::inline_vectorizedMismatch() {
assert(UseVectorizedMismatchIntrinsic, "not implementated on this platform");
address stubAddr = StubRoutines::vectorizedMismatch();
if (stubAddr == NULL) {
return false; // Intrinsic's stub is not implemented on this platform
}
const char* stubName = "vectorizedMismatch";
int size_l = callee()->signature()->size();
assert(callee()->signature()->size() == 8, "vectorizedMismatch has 6 parameters");
Node* obja = argument(0);
Node* aoffset = argument(1);
Node* objb = argument(3);
Node* boffset = argument(4);
Node* length = argument(6);
Node* scale = argument(7);
const Type* a_type = obja->Value(&_gvn);
const Type* b_type = objb->Value(&_gvn);
const TypeAryPtr* top_a = a_type->isa_aryptr();
const TypeAryPtr* top_b = b_type->isa_aryptr();
if (top_a == NULL || top_a->klass() == NULL ||
top_b == NULL || top_b->klass() == NULL) {
// failed array check
return false;
}
Node* call;
jvms()->set_should_reexecute(true);
Node* obja_adr = make_unsafe_address(obja, aoffset);
Node* objb_adr = make_unsafe_address(objb, boffset);
call = make_runtime_call(RC_LEAF,
OptoRuntime::vectorizedMismatch_Type(),
stubAddr, stubName, TypePtr::BOTTOM,
obja_adr, objb_adr, length, scale);
Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
set_result(result);
return true;
}
/**
* Calculate CRC32 for byte.

View File

@ -1103,6 +1103,26 @@ const TypeFunc* OptoRuntime::montgomerySquare_Type() {
return TypeFunc::make(domain, range);
}
const TypeFunc* OptoRuntime::vectorizedMismatch_Type() {
// create input type (domain)
int num_args = 4;
int argcnt = num_args;
const Type** fields = TypeTuple::fields(argcnt);
int argp = TypeFunc::Parms;
fields[argp++] = TypePtr::NOTNULL; // obja
fields[argp++] = TypePtr::NOTNULL; // objb
fields[argp++] = TypeInt::INT; // length, number of elements
fields[argp++] = TypeInt::INT; // log2scale, element size
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
//return mismatch index (int)
fields = TypeTuple::fields(1);
fields[TypeFunc::Parms + 0] = TypeInt::INT;
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
return TypeFunc::make(domain, range);
}
// GHASH block processing
const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
int argcnt = 4;

View File

@ -299,6 +299,8 @@ private:
static const TypeFunc* mulAdd_Type();
static const TypeFunc* vectorizedMismatch_Type();
static const TypeFunc* ghash_processBlocks_Type();
static const TypeFunc* updateBytesCRC32_Type();

View File

@ -855,6 +855,9 @@ public:
product(bool, UseAdler32Intrinsics, false, \
"use intrinsics for java.util.zip.Adler32") \
\
product(bool, UseVectorizedMismatchIntrinsic, false, \
"Enables intrinsification of ArraysSupport.vectorizedMismatch()") \
\
diagnostic(ccstrlist, DisableIntrinsic, "", \
"do not expand intrinsics whose (internal) names appear here") \
\

View File

@ -148,6 +148,8 @@ address StubRoutines::_mulAdd = NULL;
address StubRoutines::_montgomeryMultiply = NULL;
address StubRoutines::_montgomerySquare = NULL;
address StubRoutines::_vectorizedMismatch = NULL;
address StubRoutines::_dexp = NULL;
address StubRoutines::_dlog = NULL;

View File

@ -207,6 +207,8 @@ class StubRoutines: AllStatic {
static address _montgomeryMultiply;
static address _montgomerySquare;
static address _vectorizedMismatch;
static address _dexp;
static address _dlog;
@ -376,6 +378,8 @@ class StubRoutines: AllStatic {
static address montgomeryMultiply() { return _montgomeryMultiply; }
static address montgomerySquare() { return _montgomerySquare; }
static address vectorizedMismatch() { return _vectorizedMismatch; }
static address dexp() { return _dexp; }
static address dlog() { return _dlog; }

View File

@ -3628,7 +3628,7 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
// initialize compiler(s)
#if defined(COMPILER1) || defined(COMPILER2) || defined(SHARK) || INCLUDE_JVMCI
CompileBroker::compilation_init();
CompileBroker::compilation_init(CHECK_JNI_ERR);
#endif
// Pre-initialize some JSR292 core classes to avoid deadlock during class loading.

View File

@ -860,6 +860,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
static_field(StubRoutines, _mulAdd, address) \
static_field(StubRoutines, _dexp, address) \
static_field(StubRoutines, _dlog, address) \
static_field(StubRoutines, _vectorizedMismatch, address) \
static_field(StubRoutines, _jbyte_arraycopy, address) \
static_field(StubRoutines, _jshort_arraycopy, address) \
static_field(StubRoutines, _jint_arraycopy, address) \

View File

@ -0,0 +1,69 @@
/*
* Copyright (c) 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/*
* @test
* @summary Test that overflowed integers passed to arraycopy don't do any harm. This might
* be the case on platforms where C-code expects that ints passed to a call
* are properly sign extended to 64 bit (e.g., PPC64, s390x). This can fail
* if slow_arraycopy_C() is commpiled by the C compiler without any imlicit
* casts (as spill stores to the stack that are done with 4-byte instruction).
* @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestArrayCopyOverflowArguments
*
*/
public class TestArrayCopyOverflowArguments {
// Without volatile the overflowing computation was moved up and then
// spilled to the stack. The 32-bit spill store caused proper rounding.
static volatile int mod = Integer.MAX_VALUE;
public static int[] m1(Object src) {
if (src == null) return null;
int[] dest = new int[10];
try {
// PPC C calling conventions require that ints are properly expanded
// to longs when passed to a function.
int pos = 8 + mod + mod; // = 0x1_0000_0006.
int start = 2 + mod + mod; // = 0x1_0000_0000.
int len = 12 + mod + mod; // = 0x1_0000_0010.
// This is supposed to call SharedRuntime::slow_arraycopy_C().
System.arraycopy(src, pos, dest, 0, 10);
} catch (ArrayStoreException npe) {
}
return dest;
}
static public void main(String[] args) throws Exception {
int[] src = new int[20];
for (int i = 0; i < 20; ++i) {
src[i] = i * (i-1);
}
for (int i = 0; i < 20000; i++) {
m1(src);
}
}
}

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
/* @test
* @bug 8139258
* @summary Regression test for 8139258 which failed to properly pass float args
* to a jni function on ppc64le.
* @run main/othervm -Xint Test15FloatJNIArgs
* @run main/othervm -XX:+TieredCompilation -Xcomp Test15FloatJNIArgs
* @run main/othervm -XX:-TieredCompilation -Xcomp Test15FloatJNIArgs
*/
public class Test15FloatJNIArgs {
static {
try {
System.loadLibrary("Test15FloatJNIArgs");
} catch (UnsatisfiedLinkError e) {
System.out.println("could not load native lib: " + e);
}
}
public static native float add15floats(
float f1, float f2, float f3, float f4,
float f5, float f6, float f7, float f8,
float f9, float f10, float f11, float f12,
float f13, float f14, float f15);
static void test() throws Exception {
float sum = Test15FloatJNIArgs.add15floats(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
if (sum != 15.0f) {
throw new Error("Passed 15 times 1.0f to jni function which didn't add them properly: " + sum);
}
}
public static void main(String[] args) throws Exception {
for (int i = 0; i < 200; ++i) {
test();
}
}
}

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2015. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
#include <jni.h>
#ifdef __cplusplus
extern "C" {
#endif
JNIEXPORT jfloat JNICALL Java_Test15FloatJNIArgs_add15floats
(JNIEnv *env, jclass cls,
jfloat f1, jfloat f2, jfloat f3, jfloat f4,
jfloat f5, jfloat f6, jfloat f7, jfloat f8,
jfloat f9, jfloat f10, jfloat f11, jfloat f12,
jfloat f13, jfloat f14, jfloat f15) {
return f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8 + f9 + f10 + f11 + f12 + f13 + f14 + f15;
}
#ifdef __cplusplus
}
#endif