7116452: Add support for AVX instructions
Added support for AVX extension to the x86 instruction set. Reviewed-by: never
This commit is contained in:
parent
11dbaed93f
commit
b7f5d60a7e
File diff suppressed because it is too large
Load Diff
@ -503,7 +503,31 @@ class Assembler : public AbstractAssembler {
|
||||
REX_WR = 0x4C,
|
||||
REX_WRB = 0x4D,
|
||||
REX_WRX = 0x4E,
|
||||
REX_WRXB = 0x4F
|
||||
REX_WRXB = 0x4F,
|
||||
|
||||
VEX_3bytes = 0xC4,
|
||||
VEX_2bytes = 0xC5
|
||||
};
|
||||
|
||||
enum VexPrefix {
|
||||
VEX_B = 0x20,
|
||||
VEX_X = 0x40,
|
||||
VEX_R = 0x80,
|
||||
VEX_W = 0x80
|
||||
};
|
||||
|
||||
enum VexSimdPrefix {
|
||||
VEX_SIMD_NONE = 0x0,
|
||||
VEX_SIMD_66 = 0x1,
|
||||
VEX_SIMD_F3 = 0x2,
|
||||
VEX_SIMD_F2 = 0x3
|
||||
};
|
||||
|
||||
enum VexOpcode {
|
||||
VEX_OPCODE_NONE = 0x0,
|
||||
VEX_OPCODE_0F = 0x1,
|
||||
VEX_OPCODE_0F_38 = 0x2,
|
||||
VEX_OPCODE_0F_3A = 0x3
|
||||
};
|
||||
|
||||
enum WhichOperand {
|
||||
@ -546,12 +570,88 @@ private:
|
||||
void prefixq(Address adr);
|
||||
|
||||
void prefix(Address adr, Register reg, bool byteinst = false);
|
||||
void prefixq(Address adr, Register reg);
|
||||
|
||||
void prefix(Address adr, XMMRegister reg);
|
||||
void prefixq(Address adr, Register reg);
|
||||
void prefixq(Address adr, XMMRegister reg);
|
||||
|
||||
void prefetch_prefix(Address src);
|
||||
|
||||
void rex_prefix(Address adr, XMMRegister xreg,
|
||||
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
|
||||
int rex_prefix_and_encode(int dst_enc, int src_enc,
|
||||
VexSimdPrefix pre, VexOpcode opc, bool rex_w);
|
||||
|
||||
void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
|
||||
int nds_enc, VexSimdPrefix pre, VexOpcode opc,
|
||||
bool vector256);
|
||||
|
||||
void vex_prefix(Address adr, int nds_enc, int xreg_enc,
|
||||
VexSimdPrefix pre, VexOpcode opc,
|
||||
bool vex_w, bool vector256);
|
||||
|
||||
int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
|
||||
VexSimdPrefix pre, VexOpcode opc,
|
||||
bool vex_w, bool vector256);
|
||||
|
||||
|
||||
void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool rex_w = false, bool vector256 = false);
|
||||
|
||||
void simd_prefix(XMMRegister dst, Address src,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
|
||||
simd_prefix(dst, xnoreg, src, pre, opc);
|
||||
}
|
||||
void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
|
||||
simd_prefix(src, dst, pre);
|
||||
}
|
||||
void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
|
||||
VexSimdPrefix pre) {
|
||||
bool rex_w = true;
|
||||
simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
|
||||
}
|
||||
|
||||
|
||||
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
|
||||
bool rex_w = false, bool vector256 = false);
|
||||
|
||||
int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
|
||||
return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
|
||||
}
|
||||
|
||||
// Move/convert 32-bit integer value.
|
||||
int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
|
||||
VexSimdPrefix pre) {
|
||||
// It is OK to cast from Register to XMMRegister to pass argument here
|
||||
// since only encoding is used in simd_prefix_and_encode() and number of
|
||||
// Gen and Xmm registers are the same.
|
||||
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
|
||||
}
|
||||
int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
|
||||
return simd_prefix_and_encode(dst, xnoreg, src, pre);
|
||||
}
|
||||
int simd_prefix_and_encode(Register dst, XMMRegister src,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
|
||||
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
|
||||
}
|
||||
|
||||
// Move/convert 64-bit integer value.
|
||||
int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
|
||||
VexSimdPrefix pre) {
|
||||
bool rex_w = true;
|
||||
return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
|
||||
}
|
||||
int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
|
||||
return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
|
||||
}
|
||||
int simd_prefix_and_encode_q(Register dst, XMMRegister src,
|
||||
VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
|
||||
bool rex_w = true;
|
||||
return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
|
||||
}
|
||||
|
||||
// Helper functions for groups of instructions
|
||||
void emit_arith_b(int op1, int op2, Register dst, int imm8);
|
||||
|
||||
@ -764,6 +864,7 @@ private:
|
||||
void addss(XMMRegister dst, Address src);
|
||||
void addss(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void andl(Address dst, int32_t imm32);
|
||||
void andl(Register dst, int32_t imm32);
|
||||
void andl(Register dst, Address src);
|
||||
void andl(Register dst, Register src);
|
||||
@ -774,9 +875,11 @@ private:
|
||||
void andq(Register dst, Register src);
|
||||
|
||||
// Bitwise Logical AND of Packed Double-Precision Floating-Point Values
|
||||
void andpd(XMMRegister dst, Address src);
|
||||
void andpd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Bitwise Logical AND of Packed Single-Precision Floating-Point Values
|
||||
void andps(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void bsfl(Register dst, Register src);
|
||||
void bsrl(Register dst, Register src);
|
||||
|
||||
@ -837,9 +940,11 @@ private:
|
||||
|
||||
// Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
|
||||
void comisd(XMMRegister dst, Address src);
|
||||
void comisd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
|
||||
void comiss(XMMRegister dst, Address src);
|
||||
void comiss(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Identify processor type and features
|
||||
void cpuid() {
|
||||
@ -849,14 +954,19 @@ private:
|
||||
|
||||
// Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
|
||||
void cvtsd2ss(XMMRegister dst, XMMRegister src);
|
||||
void cvtsd2ss(XMMRegister dst, Address src);
|
||||
|
||||
// Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
|
||||
void cvtsi2sdl(XMMRegister dst, Register src);
|
||||
void cvtsi2sdl(XMMRegister dst, Address src);
|
||||
void cvtsi2sdq(XMMRegister dst, Register src);
|
||||
void cvtsi2sdq(XMMRegister dst, Address src);
|
||||
|
||||
// Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
|
||||
void cvtsi2ssl(XMMRegister dst, Register src);
|
||||
void cvtsi2ssl(XMMRegister dst, Address src);
|
||||
void cvtsi2ssq(XMMRegister dst, Register src);
|
||||
void cvtsi2ssq(XMMRegister dst, Address src);
|
||||
|
||||
// Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
|
||||
void cvtdq2pd(XMMRegister dst, XMMRegister src);
|
||||
@ -866,6 +976,7 @@ private:
|
||||
|
||||
// Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
|
||||
void cvtss2sd(XMMRegister dst, XMMRegister src);
|
||||
void cvtss2sd(XMMRegister dst, Address src);
|
||||
|
||||
// Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
|
||||
void cvttsd2sil(Register dst, Address src);
|
||||
@ -1140,8 +1251,6 @@ private:
|
||||
void movdq(Register dst, XMMRegister src);
|
||||
|
||||
// Move Aligned Double Quadword
|
||||
void movdqa(Address dst, XMMRegister src);
|
||||
void movdqa(XMMRegister dst, Address src);
|
||||
void movdqa(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Move Unaligned Double Quadword
|
||||
@ -1261,10 +1370,18 @@ private:
|
||||
void orq(Register dst, Address src);
|
||||
void orq(Register dst, Register src);
|
||||
|
||||
// Pack with unsigned saturation
|
||||
void packuswb(XMMRegister dst, XMMRegister src);
|
||||
void packuswb(XMMRegister dst, Address src);
|
||||
|
||||
// SSE4.2 string instructions
|
||||
void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
|
||||
void pcmpestri(XMMRegister xmm1, Address src, int imm8);
|
||||
|
||||
// SSE4.1 packed move
|
||||
void pmovzxbw(XMMRegister dst, XMMRegister src);
|
||||
void pmovzxbw(XMMRegister dst, Address src);
|
||||
|
||||
#ifndef _LP64 // no 32bit push/pop on amd64
|
||||
void popl(Address dst);
|
||||
#endif
|
||||
@ -1292,6 +1409,7 @@ private:
|
||||
|
||||
// POR - Bitwise logical OR
|
||||
void por(XMMRegister dst, XMMRegister src);
|
||||
void por(XMMRegister dst, Address src);
|
||||
|
||||
// Shuffle Packed Doublewords
|
||||
void pshufd(XMMRegister dst, XMMRegister src, int mode);
|
||||
@ -1313,6 +1431,11 @@ private:
|
||||
|
||||
// Interleave Low Bytes
|
||||
void punpcklbw(XMMRegister dst, XMMRegister src);
|
||||
void punpcklbw(XMMRegister dst, Address src);
|
||||
|
||||
// Interleave Low Doublewords
|
||||
void punpckldq(XMMRegister dst, XMMRegister src);
|
||||
void punpckldq(XMMRegister dst, Address src);
|
||||
|
||||
#ifndef _LP64 // no 32bit push/pop on amd64
|
||||
void pushl(Address src);
|
||||
@ -1429,6 +1552,13 @@ private:
|
||||
void xchgq(Register reg, Address adr);
|
||||
void xchgq(Register dst, Register src);
|
||||
|
||||
// Get Value of Extended Control Register
|
||||
void xgetbv() {
|
||||
emit_byte(0x0F);
|
||||
emit_byte(0x01);
|
||||
emit_byte(0xD0);
|
||||
}
|
||||
|
||||
void xorl(Register dst, int32_t imm32);
|
||||
void xorl(Register dst, Address src);
|
||||
void xorl(Register dst, Register src);
|
||||
@ -1437,14 +1567,21 @@ private:
|
||||
void xorq(Register dst, Register src);
|
||||
|
||||
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
|
||||
void xorpd(XMMRegister dst, Address src);
|
||||
void xorpd(XMMRegister dst, XMMRegister src);
|
||||
|
||||
// Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
|
||||
void xorps(XMMRegister dst, Address src);
|
||||
void xorps(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
|
||||
|
||||
protected:
|
||||
// Next instructions require address alignment 16 bytes SSE mode.
|
||||
// They should be called only from corresponding MacroAssembler instructions.
|
||||
void andpd(XMMRegister dst, Address src);
|
||||
void andps(XMMRegister dst, Address src);
|
||||
void xorpd(XMMRegister dst, Address src);
|
||||
void xorps(XMMRegister dst, Address src);
|
||||
|
||||
};
|
||||
|
||||
|
||||
@ -2175,9 +2312,15 @@ class MacroAssembler: public Assembler {
|
||||
void andpd(XMMRegister dst, Address src) { Assembler::andpd(dst, src); }
|
||||
void andpd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void andps(XMMRegister dst, XMMRegister src) { Assembler::andps(dst, src); }
|
||||
void andps(XMMRegister dst, Address src) { Assembler::andps(dst, src); }
|
||||
void andps(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void comiss(XMMRegister dst, XMMRegister src) { Assembler::comiss(dst, src); }
|
||||
void comiss(XMMRegister dst, Address src) { Assembler::comiss(dst, src); }
|
||||
void comiss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void comisd(XMMRegister dst, XMMRegister src) { Assembler::comisd(dst, src); }
|
||||
void comisd(XMMRegister dst, Address src) { Assembler::comisd(dst, src); }
|
||||
void comisd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
@ -2211,62 +2354,62 @@ private:
|
||||
void movss(XMMRegister dst, Address src) { Assembler::movss(dst, src); }
|
||||
void movss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
|
||||
void movlpd(XMMRegister dst, Address src) {Assembler::movlpd(dst, src); }
|
||||
void movlpd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
public:
|
||||
|
||||
void addsd(XMMRegister dst, XMMRegister src) { Assembler::addsd(dst, src); }
|
||||
void addsd(XMMRegister dst, Address src) { Assembler::addsd(dst, src); }
|
||||
void addsd(XMMRegister dst, AddressLiteral src) { Assembler::addsd(dst, as_Address(src)); }
|
||||
void addsd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void addss(XMMRegister dst, XMMRegister src) { Assembler::addss(dst, src); }
|
||||
void addss(XMMRegister dst, Address src) { Assembler::addss(dst, src); }
|
||||
void addss(XMMRegister dst, AddressLiteral src) { Assembler::addss(dst, as_Address(src)); }
|
||||
void addss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void divsd(XMMRegister dst, XMMRegister src) { Assembler::divsd(dst, src); }
|
||||
void divsd(XMMRegister dst, Address src) { Assembler::divsd(dst, src); }
|
||||
void divsd(XMMRegister dst, AddressLiteral src) { Assembler::divsd(dst, as_Address(src)); }
|
||||
void divsd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void divss(XMMRegister dst, XMMRegister src) { Assembler::divss(dst, src); }
|
||||
void divss(XMMRegister dst, Address src) { Assembler::divss(dst, src); }
|
||||
void divss(XMMRegister dst, AddressLiteral src) { Assembler::divss(dst, as_Address(src)); }
|
||||
void divss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
|
||||
void movsd(Address dst, XMMRegister src) { Assembler::movsd(dst, src); }
|
||||
void movsd(XMMRegister dst, Address src) { Assembler::movsd(dst, src); }
|
||||
void movsd(XMMRegister dst, AddressLiteral src) { Assembler::movsd(dst, as_Address(src)); }
|
||||
void movsd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void mulsd(XMMRegister dst, XMMRegister src) { Assembler::mulsd(dst, src); }
|
||||
void mulsd(XMMRegister dst, Address src) { Assembler::mulsd(dst, src); }
|
||||
void mulsd(XMMRegister dst, AddressLiteral src) { Assembler::mulsd(dst, as_Address(src)); }
|
||||
void mulsd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void mulss(XMMRegister dst, XMMRegister src) { Assembler::mulss(dst, src); }
|
||||
void mulss(XMMRegister dst, Address src) { Assembler::mulss(dst, src); }
|
||||
void mulss(XMMRegister dst, AddressLiteral src) { Assembler::mulss(dst, as_Address(src)); }
|
||||
void mulss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void sqrtsd(XMMRegister dst, XMMRegister src) { Assembler::sqrtsd(dst, src); }
|
||||
void sqrtsd(XMMRegister dst, Address src) { Assembler::sqrtsd(dst, src); }
|
||||
void sqrtsd(XMMRegister dst, AddressLiteral src) { Assembler::sqrtsd(dst, as_Address(src)); }
|
||||
void sqrtsd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void sqrtss(XMMRegister dst, XMMRegister src) { Assembler::sqrtss(dst, src); }
|
||||
void sqrtss(XMMRegister dst, Address src) { Assembler::sqrtss(dst, src); }
|
||||
void sqrtss(XMMRegister dst, AddressLiteral src) { Assembler::sqrtss(dst, as_Address(src)); }
|
||||
void sqrtss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void subsd(XMMRegister dst, XMMRegister src) { Assembler::subsd(dst, src); }
|
||||
void subsd(XMMRegister dst, Address src) { Assembler::subsd(dst, src); }
|
||||
void subsd(XMMRegister dst, AddressLiteral src) { Assembler::subsd(dst, as_Address(src)); }
|
||||
void subsd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void subss(XMMRegister dst, XMMRegister src) { Assembler::subss(dst, src); }
|
||||
void subss(XMMRegister dst, Address src) { Assembler::subss(dst, src); }
|
||||
void subss(XMMRegister dst, AddressLiteral src) { Assembler::subss(dst, as_Address(src)); }
|
||||
void subss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void ucomiss(XMMRegister dst, XMMRegister src) { Assembler::ucomiss(dst, src); }
|
||||
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
|
||||
void ucomiss(XMMRegister dst, Address src) { Assembler::ucomiss(dst, src); }
|
||||
void ucomiss(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
void ucomisd(XMMRegister dst, XMMRegister src) { Assembler::ucomisd(dst, src); }
|
||||
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
|
||||
void ucomisd(XMMRegister dst, Address src) { Assembler::ucomisd(dst, src); }
|
||||
void ucomisd(XMMRegister dst, AddressLiteral src);
|
||||
|
||||
// Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
|
||||
|
@ -86,6 +86,7 @@ inline void Assembler::prefix(Address adr, Register reg, bool byteinst) {}
|
||||
inline void Assembler::prefixq(Address adr, Register reg) {}
|
||||
|
||||
inline void Assembler::prefix(Address adr, XMMRegister reg) {}
|
||||
inline void Assembler::prefixq(Address adr, XMMRegister reg) {}
|
||||
#else
|
||||
inline void Assembler::emit_long64(jlong x) {
|
||||
*(jlong*) _code_pos = x;
|
||||
|
@ -237,9 +237,21 @@ int NativeMovRegMem::instruction_start() const {
|
||||
int off = 0;
|
||||
u_char instr_0 = ubyte_at(off);
|
||||
|
||||
// See comment in Assembler::locate_operand() about VEX prefixes.
|
||||
if (instr_0 == instruction_VEX_prefix_2bytes) {
|
||||
assert((UseAVX > 0), "shouldn't have VEX prefix");
|
||||
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
|
||||
return 2;
|
||||
}
|
||||
if (instr_0 == instruction_VEX_prefix_3bytes) {
|
||||
assert((UseAVX > 0), "shouldn't have VEX prefix");
|
||||
NOT_LP64(assert((0xC0 & ubyte_at(1)) == 0xC0, "shouldn't have LDS and LES instructions"));
|
||||
return 3;
|
||||
}
|
||||
|
||||
// First check to see if we have a (prefixed or not) xor
|
||||
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
|
||||
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
|
||||
if (instr_0 >= instruction_prefix_wide_lo && // 0x40
|
||||
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
|
||||
off++;
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
@ -256,13 +268,13 @@ int NativeMovRegMem::instruction_start() const {
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
|
||||
if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
|
||||
if ( instr_0 == instruction_code_xmm_ss_prefix || // 0xf3
|
||||
instr_0 == instruction_code_xmm_sd_prefix) { // 0xf2
|
||||
off++;
|
||||
instr_0 = ubyte_at(off);
|
||||
}
|
||||
|
||||
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
|
||||
if ( instr_0 >= instruction_prefix_wide_lo && // 0x40
|
||||
instr_0 <= instruction_prefix_wide_hi) { // 0x4f
|
||||
off++;
|
||||
instr_0 = ubyte_at(off);
|
||||
|
@ -287,6 +287,9 @@ class NativeMovRegMem: public NativeInstruction {
|
||||
instruction_code_xmm_store = 0x11,
|
||||
instruction_code_xmm_lpd = 0x12,
|
||||
|
||||
instruction_VEX_prefix_2bytes = Assembler::VEX_2bytes,
|
||||
instruction_VEX_prefix_3bytes = Assembler::VEX_3bytes,
|
||||
|
||||
instruction_size = 4,
|
||||
instruction_offset = 0,
|
||||
data_offset = 2,
|
||||
|
@ -53,6 +53,7 @@ REGISTER_DEFINITION(Register, r14);
|
||||
REGISTER_DEFINITION(Register, r15);
|
||||
#endif // AMD64
|
||||
|
||||
REGISTER_DEFINITION(XMMRegister, xnoreg);
|
||||
REGISTER_DEFINITION(XMMRegister, xmm0 );
|
||||
REGISTER_DEFINITION(XMMRegister, xmm1 );
|
||||
REGISTER_DEFINITION(XMMRegister, xmm2 );
|
||||
@ -115,6 +116,7 @@ REGISTER_DEFINITION(Register, r12_heapbase);
|
||||
REGISTER_DEFINITION(Register, r15_thread);
|
||||
#endif // AMD64
|
||||
|
||||
REGISTER_DEFINITION(MMXRegister, mnoreg );
|
||||
REGISTER_DEFINITION(MMXRegister, mmx0 );
|
||||
REGISTER_DEFINITION(MMXRegister, mmx1 );
|
||||
REGISTER_DEFINITION(MMXRegister, mmx2 );
|
||||
|
@ -50,7 +50,7 @@ const char* VM_Version::_features_str = "";
|
||||
VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
|
||||
|
||||
static BufferBlob* stub_blob;
|
||||
static const int stub_size = 400;
|
||||
static const int stub_size = 500;
|
||||
|
||||
extern "C" {
|
||||
typedef void (*getPsrInfo_stub_t)(void*);
|
||||
@ -73,7 +73,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
|
||||
|
||||
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
|
||||
Label ext_cpuid1, ext_cpuid5, done;
|
||||
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, done;
|
||||
|
||||
StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
|
||||
# define __ _masm->
|
||||
@ -229,6 +229,41 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
__ movl(Address(rsi, 8), rcx);
|
||||
__ movl(Address(rsi,12), rdx);
|
||||
|
||||
//
|
||||
// Check if OS has enabled XGETBV instruction to access XCR0
|
||||
// (OSXSAVE feature flag) and CPU supports AVX
|
||||
//
|
||||
__ andl(rcx, 0x18000000);
|
||||
__ cmpl(rcx, 0x18000000);
|
||||
__ jccb(Assembler::notEqual, sef_cpuid);
|
||||
|
||||
//
|
||||
// XCR0, XFEATURE_ENABLED_MASK register
|
||||
//
|
||||
__ xorl(rcx, rcx); // zero for XCR0 register
|
||||
__ xgetbv();
|
||||
__ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
|
||||
__ movl(Address(rsi, 0), rax);
|
||||
__ movl(Address(rsi, 4), rdx);
|
||||
|
||||
//
|
||||
// cpuid(0x7) Structured Extended Features
|
||||
//
|
||||
__ bind(sef_cpuid);
|
||||
__ movl(rax, 7);
|
||||
__ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
|
||||
__ jccb(Assembler::greater, ext_cpuid);
|
||||
|
||||
__ xorl(rcx, rcx);
|
||||
__ cpuid();
|
||||
__ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
|
||||
__ movl(Address(rsi, 0), rax);
|
||||
__ movl(Address(rsi, 4), rbx);
|
||||
|
||||
//
|
||||
// Extended cpuid(0x80000000)
|
||||
//
|
||||
__ bind(ext_cpuid);
|
||||
__ movl(rax, 0x80000000);
|
||||
__ cpuid();
|
||||
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
|
||||
@ -359,13 +394,19 @@ void VM_Version::get_processor_features() {
|
||||
if (UseSSE < 1)
|
||||
_cpuFeatures &= ~CPU_SSE;
|
||||
|
||||
if (UseAVX < 2)
|
||||
_cpuFeatures &= ~CPU_AVX2;
|
||||
|
||||
if (UseAVX < 1)
|
||||
_cpuFeatures &= ~CPU_AVX;
|
||||
|
||||
if (logical_processors_per_package() == 1) {
|
||||
// HT processor could be installed on a system which doesn't support HT.
|
||||
_cpuFeatures &= ~CPU_HT;
|
||||
}
|
||||
|
||||
char buf[256];
|
||||
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
cores_per_cpu(), threads_per_core(),
|
||||
cpu_family(), _model, _stepping,
|
||||
(supports_cmov() ? ", cmov" : ""),
|
||||
@ -379,6 +420,8 @@ void VM_Version::get_processor_features() {
|
||||
(supports_sse4_1() ? ", sse4.1" : ""),
|
||||
(supports_sse4_2() ? ", sse4.2" : ""),
|
||||
(supports_popcnt() ? ", popcnt" : ""),
|
||||
(supports_avx() ? ", avx" : ""),
|
||||
(supports_avx2() ? ", avx2" : ""),
|
||||
(supports_mmx_ext() ? ", mmxext" : ""),
|
||||
(supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
|
||||
(supports_lzcnt() ? ", lzcnt": ""),
|
||||
@ -389,17 +432,24 @@ void VM_Version::get_processor_features() {
|
||||
// UseSSE is set to the smaller of what hardware supports and what
|
||||
// the command line requires. I.e., you cannot set UseSSE to 2 on
|
||||
// older Pentiums which do not support it.
|
||||
if( UseSSE > 4 ) UseSSE=4;
|
||||
if( UseSSE < 0 ) UseSSE=0;
|
||||
if( !supports_sse4_1() ) // Drop to 3 if no SSE4 support
|
||||
if (UseSSE > 4) UseSSE=4;
|
||||
if (UseSSE < 0) UseSSE=0;
|
||||
if (!supports_sse4_1()) // Drop to 3 if no SSE4 support
|
||||
UseSSE = MIN2((intx)3,UseSSE);
|
||||
if( !supports_sse3() ) // Drop to 2 if no SSE3 support
|
||||
if (!supports_sse3()) // Drop to 2 if no SSE3 support
|
||||
UseSSE = MIN2((intx)2,UseSSE);
|
||||
if( !supports_sse2() ) // Drop to 1 if no SSE2 support
|
||||
if (!supports_sse2()) // Drop to 1 if no SSE2 support
|
||||
UseSSE = MIN2((intx)1,UseSSE);
|
||||
if( !supports_sse () ) // Drop to 0 if no SSE support
|
||||
if (!supports_sse ()) // Drop to 0 if no SSE support
|
||||
UseSSE = 0;
|
||||
|
||||
if (UseAVX > 2) UseAVX=2;
|
||||
if (UseAVX < 0) UseAVX=0;
|
||||
if (!supports_avx2()) // Drop to 1 if no AVX2 support
|
||||
UseAVX = MIN2((intx)1,UseAVX);
|
||||
if (!supports_avx ()) // Drop to 0 if no AVX support
|
||||
UseAVX = 0;
|
||||
|
||||
// On new cpus instructions which update whole XMM register should be used
|
||||
// to prevent partial register stall due to dependencies on high half.
|
||||
//
|
||||
@ -534,6 +584,9 @@ void VM_Version::get_processor_features() {
|
||||
if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
|
||||
UsePopCountInstruction = true;
|
||||
}
|
||||
} else if (UsePopCountInstruction) {
|
||||
warning("POPCNT instruction is not available on this CPU");
|
||||
FLAG_SET_DEFAULT(UsePopCountInstruction, false);
|
||||
}
|
||||
|
||||
#ifdef COMPILER2
|
||||
@ -605,7 +658,11 @@ void VM_Version::get_processor_features() {
|
||||
if (PrintMiscellaneous && Verbose) {
|
||||
tty->print_cr("Logical CPUs per core: %u",
|
||||
logical_processors_per_package());
|
||||
tty->print_cr("UseSSE=%d",UseSSE);
|
||||
tty->print("UseSSE=%d",UseSSE);
|
||||
if (UseAVX > 0) {
|
||||
tty->print(" UseAVX=%d",UseAVX);
|
||||
}
|
||||
tty->cr();
|
||||
tty->print("Allocation");
|
||||
if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
|
||||
tty->print_cr(": no prefetching");
|
||||
|
@ -78,7 +78,10 @@ public:
|
||||
sse4_2 : 1,
|
||||
: 2,
|
||||
popcnt : 1,
|
||||
: 8;
|
||||
: 3,
|
||||
osxsave : 1,
|
||||
avx : 1,
|
||||
: 3;
|
||||
} bits;
|
||||
};
|
||||
|
||||
@ -176,6 +179,34 @@ public:
|
||||
} bits;
|
||||
};
|
||||
|
||||
union SefCpuid7Eax {
|
||||
uint32_t value;
|
||||
};
|
||||
|
||||
union SefCpuid7Ebx {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t fsgsbase : 1,
|
||||
: 2,
|
||||
bmi1 : 1,
|
||||
: 1,
|
||||
avx2 : 1,
|
||||
: 2,
|
||||
bmi2 : 1,
|
||||
: 23;
|
||||
} bits;
|
||||
};
|
||||
|
||||
union XemXcr0Eax {
|
||||
uint32_t value;
|
||||
struct {
|
||||
uint32_t x87 : 1,
|
||||
sse : 1,
|
||||
ymm : 1,
|
||||
: 29;
|
||||
} bits;
|
||||
};
|
||||
|
||||
protected:
|
||||
static int _cpu;
|
||||
static int _model;
|
||||
@ -200,7 +231,9 @@ protected:
|
||||
CPU_SSE4_1 = (1 << 11),
|
||||
CPU_SSE4_2 = (1 << 12),
|
||||
CPU_POPCNT = (1 << 13),
|
||||
CPU_LZCNT = (1 << 14)
|
||||
CPU_LZCNT = (1 << 14),
|
||||
CPU_AVX = (1 << 15),
|
||||
CPU_AVX2 = (1 << 16)
|
||||
} cpuFeatureFlags;
|
||||
|
||||
// cpuid information block. All info derived from executing cpuid with
|
||||
@ -228,6 +261,12 @@ protected:
|
||||
uint32_t dcp_cpuid4_ecx; // unused currently
|
||||
uint32_t dcp_cpuid4_edx; // unused currently
|
||||
|
||||
// cpuid function 7 (structured extended features)
|
||||
SefCpuid7Eax sef_cpuid7_eax;
|
||||
SefCpuid7Ebx sef_cpuid7_ebx;
|
||||
uint32_t sef_cpuid7_ecx; // unused currently
|
||||
uint32_t sef_cpuid7_edx; // unused currently
|
||||
|
||||
// cpuid function 0xB (processor topology)
|
||||
// ecx = 0
|
||||
uint32_t tpl_cpuidB0_eax;
|
||||
@ -275,6 +314,10 @@ protected:
|
||||
uint32_t ext_cpuid8_ebx; // reserved
|
||||
ExtCpuid8Ecx ext_cpuid8_ecx;
|
||||
uint32_t ext_cpuid8_edx; // reserved
|
||||
|
||||
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
|
||||
XemXcr0Eax xem_xcr0_eax;
|
||||
uint32_t xem_xcr0_edx; // reserved
|
||||
};
|
||||
|
||||
// The actual cpuid info block
|
||||
@ -328,6 +371,14 @@ protected:
|
||||
result |= CPU_SSE4_2;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
|
||||
result |= CPU_POPCNT;
|
||||
if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
|
||||
_cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
|
||||
_cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
|
||||
result |= CPU_AVX;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
|
||||
result |= CPU_AVX2;
|
||||
}
|
||||
|
||||
// AMD features.
|
||||
if (is_amd()) {
|
||||
@ -350,12 +401,14 @@ public:
|
||||
static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
|
||||
static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
|
||||
static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
|
||||
static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
|
||||
static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
|
||||
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
|
||||
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
|
||||
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
|
||||
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
|
||||
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
|
||||
static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
|
||||
|
||||
// Initialization
|
||||
static void initialize();
|
||||
@ -447,6 +500,8 @@ public:
|
||||
static bool supports_sse4_1() { return (_cpuFeatures & CPU_SSE4_1) != 0; }
|
||||
static bool supports_sse4_2() { return (_cpuFeatures & CPU_SSE4_2) != 0; }
|
||||
static bool supports_popcnt() { return (_cpuFeatures & CPU_POPCNT) != 0; }
|
||||
static bool supports_avx() { return (_cpuFeatures & CPU_AVX) != 0; }
|
||||
static bool supports_avx2() { return (_cpuFeatures & CPU_AVX2) != 0; }
|
||||
//
|
||||
// AMD features
|
||||
//
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -525,6 +525,9 @@ class CommandLineFlags {
|
||||
product(intx, UseSSE, 99, \
|
||||
"Highest supported SSE instructions set on x86/x64") \
|
||||
\
|
||||
product(intx, UseAVX, 99, \
|
||||
"Highest supported AVX instructions set on x86/x64") \
|
||||
\
|
||||
product(intx, UseVIS, 99, \
|
||||
"Highest supported VIS instructions set on Sparc") \
|
||||
\
|
||||
|
Loading…
Reference in New Issue
Block a user