8167065: Add intrinsic support for double precision shifting on x86_64
Reviewed-by: kvn
This commit is contained in:
parent
f4af0eadb6
commit
995da6eb2a
src
hotspot
cpu/x86
share
java.base/share/classes/java/math
jdk.aot/share/classes/jdk.tools.jaotc.binformat/src/jdk/tools/jaotc/binformat
jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.test/src/org/graalvm/compiler/hotspot/test
test
hotspot/jtreg/compiler/intrinsics/bigInteger
micro/org/openjdk/bench/java/math
@ -4257,8 +4257,8 @@ void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
|
||||
|
||||
void Assembler::vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len) {
|
||||
assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
|
||||
vector_len == AVX_256bit? VM_Version::supports_avx2() :
|
||||
0, "");
|
||||
(vector_len == AVX_256bit? VM_Version::supports_avx2() :
|
||||
(vector_len == AVX_512bit? VM_Version::supports_evex() : 0)), "");
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
@ -4737,6 +4737,36 @@ void Assembler::shrl(Register dst) {
|
||||
emit_int8((unsigned char)(0xE8 | encode));
|
||||
}
|
||||
|
||||
void Assembler::shldl(Register dst, Register src) {
|
||||
int encode = prefix_and_encode(src->encoding(), dst->encoding());
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xA5);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::shldl(Register dst, Register src, int8_t imm8) {
|
||||
int encode = prefix_and_encode(src->encoding(), dst->encoding());
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xA4);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::shrdl(Register dst, Register src) {
|
||||
int encode = prefix_and_encode(src->encoding(), dst->encoding());
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xAD);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
|
||||
int encode = prefix_and_encode(src->encoding(), dst->encoding());
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xAC);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
// copies a single word from [esi] to [edi]
|
||||
void Assembler::smovl() {
|
||||
emit_int8((unsigned char)0xA5);
|
||||
@ -6513,6 +6543,23 @@ void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int ve
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(VM_Version::supports_vbmi2(), "requires vbmi2");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x71);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
|
||||
assert(VM_Version::supports_vbmi2(), "requires vbmi2");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src->encoding(), shift->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int8(0x73);
|
||||
emit_int8((unsigned char)(0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::pandn(XMMRegister dst, XMMRegister src) {
|
||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||
@ -8109,26 +8156,6 @@ void Assembler::set_byte_if_not_zero(Register dst) {
|
||||
emit_int8((unsigned char)(0xE0 | dst->encoding()));
|
||||
}
|
||||
|
||||
void Assembler::shldl(Register dst, Register src) {
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xA5);
|
||||
emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
|
||||
}
|
||||
|
||||
// 0F A4 / r ib
|
||||
void Assembler::shldl(Register dst, Register src, int8_t imm8) {
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xA4);
|
||||
emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::shrdl(Register dst, Register src) {
|
||||
emit_int8(0x0F);
|
||||
emit_int8((unsigned char)0xAD);
|
||||
emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
|
||||
}
|
||||
|
||||
#else // LP64
|
||||
|
||||
void Assembler::set_byte_if_not_zero(Register dst) {
|
||||
|
@ -1838,6 +1838,8 @@ private:
|
||||
|
||||
void shldl(Register dst, Register src);
|
||||
void shldl(Register dst, Register src, int8_t imm8);
|
||||
void shrdl(Register dst, Register src);
|
||||
void shrdl(Register dst, Register src, int8_t imm8);
|
||||
|
||||
void shll(Register dst, int imm8);
|
||||
void shll(Register dst);
|
||||
@ -1845,8 +1847,6 @@ private:
|
||||
void shlq(Register dst, int imm8);
|
||||
void shlq(Register dst);
|
||||
|
||||
void shrdl(Register dst, Register src);
|
||||
|
||||
void shrl(Register dst, int imm8);
|
||||
void shrl(Register dst);
|
||||
|
||||
@ -2140,6 +2140,9 @@ private:
|
||||
void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
|
||||
void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
|
||||
|
||||
// And packed integers
|
||||
void pand(XMMRegister dst, XMMRegister src);
|
||||
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
@ -5694,6 +5694,247 @@ address generate_avx_ghash_processBlocks() {
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_bigIntegerRightShift() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "bigIntegerRightShiftWorker");
|
||||
|
||||
address start = __ pc();
|
||||
Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
|
||||
// For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
|
||||
const Register newArr = rdi;
|
||||
const Register oldArr = rsi;
|
||||
const Register newIdx = rdx;
|
||||
const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
|
||||
const Register totalNumIter = r8;
|
||||
|
||||
// For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
|
||||
// For everything else, we prefer using r9 and r10 since we do not have to save them before use.
|
||||
const Register tmp1 = r11; // Caller save.
|
||||
const Register tmp2 = rax; // Caller save.
|
||||
const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9); // Windows: Callee save. Linux: Caller save.
|
||||
const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10); // Windows: Callee save. Linux: Caller save.
|
||||
const Register tmp5 = r14; // Callee save.
|
||||
const Register tmp6 = r15;
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WINDOWS
|
||||
setup_arg_regs(4);
|
||||
// For windows, since last argument is on stack, we need to move it to the appropriate register.
|
||||
__ movl(totalNumIter, Address(rsp, 6 * wordSize));
|
||||
// Save callee save registers.
|
||||
__ push(tmp3);
|
||||
__ push(tmp4);
|
||||
#endif
|
||||
__ push(tmp5);
|
||||
|
||||
// Rename temps used throughout the code.
|
||||
const Register idx = tmp1;
|
||||
const Register nIdx = tmp2;
|
||||
|
||||
__ xorl(idx, idx);
|
||||
|
||||
// Start right shift from end of the array.
|
||||
// For example, if #iteration = 4 and newIdx = 1
|
||||
// then dest[4] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32)
|
||||
// if #iteration = 4 and newIdx = 0
|
||||
// then dest[3] = src[4] >> shiftCount | src[3] <<< (shiftCount - 32)
|
||||
__ movl(idx, totalNumIter);
|
||||
__ movl(nIdx, idx);
|
||||
__ addl(nIdx, newIdx);
|
||||
|
||||
// If vectorization is enabled, check if the number of iterations is at least 64
|
||||
// If not, then go to ShifTwo processing 2 iterations
|
||||
if (VM_Version::supports_vbmi2()) {
|
||||
__ cmpptr(totalNumIter, (AVX3Threshold/64));
|
||||
__ jcc(Assembler::less, ShiftTwo);
|
||||
|
||||
if (AVX3Threshold < 16 * 64) {
|
||||
__ cmpl(totalNumIter, 16);
|
||||
__ jcc(Assembler::less, ShiftTwo);
|
||||
}
|
||||
__ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
|
||||
__ subl(idx, 16);
|
||||
__ subl(nIdx, 16);
|
||||
__ BIND(Shift512Loop);
|
||||
__ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 4), Assembler::AVX_512bit);
|
||||
__ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
|
||||
__ vpshrdvd(x2, x1, x0, Assembler::AVX_512bit);
|
||||
__ evmovdqul(Address(newArr, nIdx, Address::times_4), x2, Assembler::AVX_512bit);
|
||||
__ subl(nIdx, 16);
|
||||
__ subl(idx, 16);
|
||||
__ jcc(Assembler::greaterEqual, Shift512Loop);
|
||||
__ addl(idx, 16);
|
||||
__ addl(nIdx, 16);
|
||||
}
|
||||
__ BIND(ShiftTwo);
|
||||
__ cmpl(idx, 2);
|
||||
__ jcc(Assembler::less, ShiftOne);
|
||||
__ subl(idx, 2);
|
||||
__ subl(nIdx, 2);
|
||||
__ BIND(ShiftTwoLoop);
|
||||
__ movl(tmp5, Address(oldArr, idx, Address::times_4, 8));
|
||||
__ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
|
||||
__ movl(tmp3, Address(oldArr, idx, Address::times_4));
|
||||
__ shrdl(tmp5, tmp4);
|
||||
__ shrdl(tmp4, tmp3);
|
||||
__ movl(Address(newArr, nIdx, Address::times_4, 4), tmp5);
|
||||
__ movl(Address(newArr, nIdx, Address::times_4), tmp4);
|
||||
__ subl(nIdx, 2);
|
||||
__ subl(idx, 2);
|
||||
__ jcc(Assembler::greaterEqual, ShiftTwoLoop);
|
||||
__ addl(idx, 2);
|
||||
__ addl(nIdx, 2);
|
||||
|
||||
// Do the last iteration
|
||||
__ BIND(ShiftOne);
|
||||
__ cmpl(idx, 1);
|
||||
__ jcc(Assembler::less, Exit);
|
||||
__ subl(idx, 1);
|
||||
__ subl(nIdx, 1);
|
||||
__ movl(tmp4, Address(oldArr, idx, Address::times_4, 4));
|
||||
__ movl(tmp3, Address(oldArr, idx, Address::times_4));
|
||||
__ shrdl(tmp4, tmp3);
|
||||
__ movl(Address(newArr, nIdx, Address::times_4), tmp4);
|
||||
__ BIND(Exit);
|
||||
// Restore callee save registers.
|
||||
__ pop(tmp5);
|
||||
#ifdef _WINDOWS
|
||||
__ pop(tmp4);
|
||||
__ pop(tmp3);
|
||||
restore_arg_regs();
|
||||
#endif
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
* Input:
|
||||
* c_rarg0 - newArr address
|
||||
* c_rarg1 - oldArr address
|
||||
* c_rarg2 - newIdx
|
||||
* c_rarg3 - shiftCount
|
||||
* not Win64
|
||||
* c_rarg4 - numIter
|
||||
* Win64
|
||||
* rsp40 - numIter
|
||||
*/
|
||||
address generate_bigIntegerLeftShift() {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", "bigIntegerLeftShiftWorker");
|
||||
address start = __ pc();
|
||||
Label Shift512Loop, ShiftTwo, ShiftTwoLoop, ShiftOne, Exit;
|
||||
// For Unix, the arguments are as follows: rdi, rsi, rdx, rcx, r8.
|
||||
const Register newArr = rdi;
|
||||
const Register oldArr = rsi;
|
||||
const Register newIdx = rdx;
|
||||
const Register shiftCount = rcx; // It was intentional to have shiftCount in rcx since it is used implicitly for shift.
|
||||
const Register totalNumIter = r8;
|
||||
// For windows, we use r9 and r10 as temps to save rdi and rsi. Thus we cannot allocate them for our temps.
|
||||
// For everything else, we prefer using r9 and r10 since we do not have to save them before use.
|
||||
const Register tmp1 = r11; // Caller save.
|
||||
const Register tmp2 = rax; // Caller save.
|
||||
const Register tmp3 = WINDOWS_ONLY(r12) NOT_WINDOWS(r9); // Windows: Callee save. Linux: Caller save.
|
||||
const Register tmp4 = WINDOWS_ONLY(r13) NOT_WINDOWS(r10); // Windows: Callee save. Linux: Caller save.
|
||||
const Register tmp5 = r14; // Callee save.
|
||||
|
||||
const XMMRegister x0 = xmm0;
|
||||
const XMMRegister x1 = xmm1;
|
||||
const XMMRegister x2 = xmm2;
|
||||
BLOCK_COMMENT("Entry:");
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
#ifdef _WINDOWS
|
||||
setup_arg_regs(4);
|
||||
// For windows, since last argument is on stack, we need to move it to the appropriate register.
|
||||
__ movl(totalNumIter, Address(rsp, 6 * wordSize));
|
||||
// Save callee save registers.
|
||||
__ push(tmp3);
|
||||
__ push(tmp4);
|
||||
#endif
|
||||
__ push(tmp5);
|
||||
|
||||
// Rename temps used throughout the code
|
||||
const Register idx = tmp1;
|
||||
const Register numIterTmp = tmp2;
|
||||
|
||||
// Start idx from zero.
|
||||
__ xorl(idx, idx);
|
||||
// Compute interior pointer for new array. We do this so that we can use same index for both old and new arrays.
|
||||
__ lea(newArr, Address(newArr, newIdx, Address::times_4));
|
||||
__ movl(numIterTmp, totalNumIter);
|
||||
|
||||
// If vectorization is enabled, check if the number of iterations is at least 64
|
||||
// If not, then go to ShiftTwo shifting two numbers at a time
|
||||
if (VM_Version::supports_vbmi2()) {
|
||||
__ cmpl(totalNumIter, (AVX3Threshold/64));
|
||||
__ jcc(Assembler::less, ShiftTwo);
|
||||
|
||||
if (AVX3Threshold < 16 * 64) {
|
||||
__ cmpl(totalNumIter, 16);
|
||||
__ jcc(Assembler::less, ShiftTwo);
|
||||
}
|
||||
__ evpbroadcastd(x0, shiftCount, Assembler::AVX_512bit);
|
||||
__ subl(numIterTmp, 16);
|
||||
__ BIND(Shift512Loop);
|
||||
__ evmovdqul(x1, Address(oldArr, idx, Address::times_4), Assembler::AVX_512bit);
|
||||
__ evmovdqul(x2, Address(oldArr, idx, Address::times_4, 0x4), Assembler::AVX_512bit);
|
||||
__ vpshldvd(x1, x2, x0, Assembler::AVX_512bit);
|
||||
__ evmovdqul(Address(newArr, idx, Address::times_4), x1, Assembler::AVX_512bit);
|
||||
__ addl(idx, 16);
|
||||
__ subl(numIterTmp, 16);
|
||||
__ jcc(Assembler::greaterEqual, Shift512Loop);
|
||||
__ addl(numIterTmp, 16);
|
||||
}
|
||||
__ BIND(ShiftTwo);
|
||||
__ cmpl(totalNumIter, 1);
|
||||
__ jcc(Assembler::less, Exit);
|
||||
__ movl(tmp3, Address(oldArr, idx, Address::times_4));
|
||||
__ subl(numIterTmp, 2);
|
||||
__ jcc(Assembler::less, ShiftOne);
|
||||
|
||||
__ BIND(ShiftTwoLoop);
|
||||
__ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
|
||||
__ movl(tmp5, Address(oldArr, idx, Address::times_4, 0x8));
|
||||
__ shldl(tmp3, tmp4);
|
||||
__ shldl(tmp4, tmp5);
|
||||
__ movl(Address(newArr, idx, Address::times_4), tmp3);
|
||||
__ movl(Address(newArr, idx, Address::times_4, 0x4), tmp4);
|
||||
__ movl(tmp3, tmp5);
|
||||
__ addl(idx, 2);
|
||||
__ subl(numIterTmp, 2);
|
||||
__ jcc(Assembler::greaterEqual, ShiftTwoLoop);
|
||||
|
||||
// Do the last iteration
|
||||
__ BIND(ShiftOne);
|
||||
__ addl(numIterTmp, 2);
|
||||
__ cmpl(numIterTmp, 1);
|
||||
__ jcc(Assembler::less, Exit);
|
||||
__ movl(tmp4, Address(oldArr, idx, Address::times_4, 0x4));
|
||||
__ shldl(tmp3, tmp4);
|
||||
__ movl(Address(newArr, idx, Address::times_4), tmp3);
|
||||
|
||||
__ BIND(Exit);
|
||||
// Restore callee save registers.
|
||||
__ pop(tmp5);
|
||||
#ifdef _WINDOWS
|
||||
__ pop(tmp4);
|
||||
__ pop(tmp3);
|
||||
restore_arg_regs();
|
||||
#endif
|
||||
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||
__ ret(0);
|
||||
return start;
|
||||
}
|
||||
|
||||
address generate_libmExp() {
|
||||
StubCodeMark mark(this, "StubRoutines", "libmExp");
|
||||
|
||||
@ -6314,6 +6555,10 @@ address generate_avx_ghash_processBlocks() {
|
||||
if (UseMulAddIntrinsic) {
|
||||
StubRoutines::_mulAdd = generate_mulAdd();
|
||||
}
|
||||
if (VM_Version::supports_vbmi2()) {
|
||||
StubRoutines::_bigIntegerRightShiftWorker = generate_bigIntegerRightShift();
|
||||
StubRoutines::_bigIntegerLeftShiftWorker = generate_bigIntegerLeftShift();
|
||||
}
|
||||
#ifndef _WINDOWS
|
||||
if (UseMontgomeryMultiplyIntrinsic) {
|
||||
StubRoutines::_montgomeryMultiply
|
||||
|
@ -694,6 +694,7 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_AVX512_VPCLMULQDQ;
|
||||
_features &= ~CPU_VAES;
|
||||
_features &= ~CPU_VNNI;
|
||||
_features &= ~CPU_VBMI2;
|
||||
}
|
||||
|
||||
if (UseAVX < 2)
|
||||
@ -716,7 +717,7 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
|
||||
char buf[256];
|
||||
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
|
||||
cores_per_cpu(), threads_per_core(),
|
||||
cpu_family(), _model, _stepping,
|
||||
(supports_cmov() ? ", cmov" : ""),
|
||||
@ -749,7 +750,10 @@ void VM_Version::get_processor_features() {
|
||||
(supports_adx() ? ", adx" : ""),
|
||||
(supports_evex() ? ", evex" : ""),
|
||||
(supports_sha() ? ", sha" : ""),
|
||||
(supports_fma() ? ", fma" : ""));
|
||||
(supports_fma() ? ", fma" : ""),
|
||||
(supports_vbmi2() ? ", vbmi2" : ""),
|
||||
(supports_vaes() ? ", vaes" : ""),
|
||||
(supports_vnni() ? ", vnni" : ""));
|
||||
_features_string = os::strdup(buf);
|
||||
|
||||
// UseSSE is set to the smaller of what hardware supports and what
|
||||
|
@ -345,6 +345,8 @@ protected:
|
||||
#define CPU_FLUSH ((uint64_t)UCONST64(0x20000000000)) // flush instruction
|
||||
#define CPU_FLUSHOPT ((uint64_t)UCONST64(0x40000000000)) // flushopt instruction
|
||||
#define CPU_CLWB ((uint64_t)UCONST64(0x80000000000)) // clwb instruction
|
||||
#define CPU_VBMI2 ((uint64_t)UCONST64(0x100000000000)) // VBMI2 shift left double instructions
|
||||
|
||||
|
||||
enum Extended_Family {
|
||||
// AMD
|
||||
@ -567,6 +569,8 @@ enum Extended_Family {
|
||||
result |= CPU_VAES;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
|
||||
result |= CPU_VNNI;
|
||||
if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
|
||||
result |= CPU_VBMI2;
|
||||
}
|
||||
}
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
|
||||
@ -858,6 +862,7 @@ public:
|
||||
static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
|
||||
static bool supports_vaes() { return (_features & CPU_VAES) != 0; }
|
||||
static bool supports_vnni() { return (_features & CPU_VNNI) != 0; }
|
||||
static bool supports_vbmi2() { return (_features & CPU_VBMI2) != 0; }
|
||||
|
||||
// Intel features
|
||||
static bool is_intel_family_core() { return is_intel() &&
|
||||
|
@ -555,6 +555,8 @@ void AOTCodeHeap::link_stub_routines_symbols() {
|
||||
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_montgomeryMultiply", address, StubRoutines::_montgomeryMultiply);
|
||||
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_montgomerySquare", address, StubRoutines::_montgomerySquare);
|
||||
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_vectorizedMismatch", address, StubRoutines::_vectorizedMismatch);
|
||||
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_bigIntegerRightShiftWorker", address, StubRoutines::_bigIntegerRightShiftWorker);
|
||||
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_bigIntegerLeftShiftWorker", address, StubRoutines::_bigIntegerLeftShiftWorker);
|
||||
|
||||
SET_AOT_GLOBAL_SYMBOL_VALUE("_aot_stub_routines_throw_delayed_StackOverflowError_entry", address, StubRoutines::_throw_delayed_StackOverflowError_entry);
|
||||
|
||||
|
@ -837,6 +837,9 @@ bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_montgomerySquare:
|
||||
if (!UseMontgomerySquareIntrinsic) return true;
|
||||
break;
|
||||
case vmIntrinsics::_bigIntegerRightShiftWorker:
|
||||
case vmIntrinsics::_bigIntegerLeftShiftWorker:
|
||||
break;
|
||||
case vmIntrinsics::_addExactI:
|
||||
case vmIntrinsics::_addExactL:
|
||||
case vmIntrinsics::_decrementExactI:
|
||||
|
@ -565,6 +565,7 @@
|
||||
template(char_StringBuffer_signature, "(C)Ljava/lang/StringBuffer;") \
|
||||
template(int_String_signature, "(I)Ljava/lang/String;") \
|
||||
template(boolean_boolean_int_signature, "(ZZ)I") \
|
||||
template(big_integer_shift_worker_signature, "([I[IIII)V") \
|
||||
template(reflect_method_signature, "Ljava/lang/reflect/Method;") \
|
||||
/* signature symbols needed by intrinsics */ \
|
||||
VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, VM_SYMBOL_IGNORE, template, VM_ALIAS_IGNORE) \
|
||||
@ -1007,6 +1008,12 @@
|
||||
do_name( montgomerySquare_name, "implMontgomerySquare") \
|
||||
do_signature(montgomerySquare_signature, "([I[IIJ[I)[I") \
|
||||
\
|
||||
do_intrinsic(_bigIntegerRightShiftWorker, java_math_BigInteger, rightShift_name, big_integer_shift_worker_signature, F_S) \
|
||||
do_name( rightShift_name, "shiftRightImplWorker") \
|
||||
\
|
||||
do_intrinsic(_bigIntegerLeftShiftWorker, java_math_BigInteger, leftShift_name, big_integer_shift_worker_signature, F_S) \
|
||||
do_name( leftShift_name, "shiftLeftImplWorker") \
|
||||
\
|
||||
do_class(jdk_internal_util_ArraysSupport, "jdk/internal/util/ArraysSupport") \
|
||||
do_intrinsic(_vectorizedMismatch, jdk_internal_util_ArraysSupport, vectorizedMismatch_name, vectorizedMismatch_signature, F_S)\
|
||||
do_name(vectorizedMismatch_name, "vectorizedMismatch") \
|
||||
|
@ -322,6 +322,8 @@
|
||||
static_field(StubRoutines, _montgomeryMultiply, address) \
|
||||
static_field(StubRoutines, _montgomerySquare, address) \
|
||||
static_field(StubRoutines, _vectorizedMismatch, address) \
|
||||
static_field(StubRoutines, _bigIntegerRightShiftWorker, address) \
|
||||
static_field(StubRoutines, _bigIntegerLeftShiftWorker, address) \
|
||||
\
|
||||
nonstatic_field(Thread, _tlab, ThreadLocalAllocBuffer) \
|
||||
nonstatic_field(Thread, _allocated_bytes, jlong) \
|
||||
|
@ -628,6 +628,8 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_mulAdd:
|
||||
case vmIntrinsics::_montgomeryMultiply:
|
||||
case vmIntrinsics::_montgomerySquare:
|
||||
case vmIntrinsics::_bigIntegerRightShiftWorker:
|
||||
case vmIntrinsics::_bigIntegerLeftShiftWorker:
|
||||
case vmIntrinsics::_vectorizedMismatch:
|
||||
case vmIntrinsics::_ghash_processBlocks:
|
||||
case vmIntrinsics::_base64_encodeBlock:
|
||||
|
@ -1006,6 +1006,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
||||
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "bigIntegerRightShiftWorker") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "bigIntegerLeftShiftWorker") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "vectorizedMismatch") == 0)
|
||||
))) {
|
||||
call->dump();
|
||||
|
@ -327,6 +327,7 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_mulAdd();
|
||||
bool inline_montgomeryMultiply();
|
||||
bool inline_montgomerySquare();
|
||||
bool inline_bigIntegerShift(bool isRightShift);
|
||||
bool inline_vectorizedMismatch();
|
||||
bool inline_fma(vmIntrinsics::ID id);
|
||||
bool inline_character_compare(vmIntrinsics::ID id);
|
||||
@ -845,6 +846,11 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
case vmIntrinsics::_montgomerySquare:
|
||||
return inline_montgomerySquare();
|
||||
|
||||
case vmIntrinsics::_bigIntegerRightShiftWorker:
|
||||
return inline_bigIntegerShift(true);
|
||||
case vmIntrinsics::_bigIntegerLeftShiftWorker:
|
||||
return inline_bigIntegerShift(false);
|
||||
|
||||
case vmIntrinsics::_vectorizedMismatch:
|
||||
return inline_vectorizedMismatch();
|
||||
|
||||
@ -5253,6 +5259,60 @@ bool LibraryCallKit::inline_montgomerySquare() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LibraryCallKit::inline_bigIntegerShift(bool isRightShift) {
|
||||
address stubAddr = NULL;
|
||||
const char* stubName = NULL;
|
||||
|
||||
stubAddr = isRightShift? StubRoutines::bigIntegerRightShift(): StubRoutines::bigIntegerLeftShift();
|
||||
if (stubAddr == NULL) {
|
||||
return false; // Intrinsic's stub is not implemented on this platform
|
||||
}
|
||||
|
||||
stubName = isRightShift? "bigIntegerRightShiftWorker" : "bigIntegerLeftShiftWorker";
|
||||
|
||||
assert(callee()->signature()->size() == 5, "expected 5 arguments");
|
||||
|
||||
Node* newArr = argument(0);
|
||||
Node* oldArr = argument(1);
|
||||
Node* newIdx = argument(2);
|
||||
Node* shiftCount = argument(3);
|
||||
Node* numIter = argument(4);
|
||||
|
||||
const Type* newArr_type = newArr->Value(&_gvn);
|
||||
const TypeAryPtr* top_newArr = newArr_type->isa_aryptr();
|
||||
const Type* oldArr_type = oldArr->Value(&_gvn);
|
||||
const TypeAryPtr* top_oldArr = oldArr_type->isa_aryptr();
|
||||
if (top_newArr == NULL || top_newArr->klass() == NULL || top_oldArr == NULL
|
||||
|| top_oldArr->klass() == NULL) {
|
||||
return false;
|
||||
}
|
||||
|
||||
BasicType newArr_elem = newArr_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||
BasicType oldArr_elem = oldArr_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||
if (newArr_elem != T_INT || oldArr_elem != T_INT) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Make the call
|
||||
{
|
||||
Node* newArr_start = array_element_address(newArr, intcon(0), newArr_elem);
|
||||
Node* oldArr_start = array_element_address(oldArr, intcon(0), oldArr_elem);
|
||||
|
||||
Node* call = make_runtime_call(RC_LEAF,
|
||||
OptoRuntime::bigIntegerShift_Type(),
|
||||
stubAddr,
|
||||
stubName,
|
||||
TypePtr::BOTTOM,
|
||||
newArr_start,
|
||||
oldArr_start,
|
||||
newIdx,
|
||||
shiftCount,
|
||||
numIter);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
//-------------inline_vectorizedMismatch------------------------------
|
||||
bool LibraryCallKit::inline_vectorizedMismatch() {
|
||||
assert(UseVectorizedMismatchIntrinsic, "not implementated on this platform");
|
||||
|
@ -1111,6 +1111,25 @@ const TypeFunc* OptoRuntime::montgomerySquare_Type() {
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
const TypeFunc * OptoRuntime::bigIntegerShift_Type() {
|
||||
int argcnt = 5;
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
fields[argp++] = TypePtr::NOTNULL; // newArr
|
||||
fields[argp++] = TypePtr::NOTNULL; // oldArr
|
||||
fields[argp++] = TypeInt::INT; // newIdx
|
||||
fields[argp++] = TypeInt::INT; // shiftCount
|
||||
fields[argp++] = TypeInt::INT; // numIter
|
||||
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
|
||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
|
||||
|
||||
// no result type needed
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms + 0] = NULL;
|
||||
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
const TypeFunc* OptoRuntime::vectorizedMismatch_Type() {
|
||||
// create input type (domain)
|
||||
int num_args = 4;
|
||||
|
@ -289,6 +289,8 @@ private:
|
||||
|
||||
static const TypeFunc* mulAdd_Type();
|
||||
|
||||
static const TypeFunc* bigIntegerShift_Type();
|
||||
|
||||
static const TypeFunc* vectorizedMismatch_Type();
|
||||
|
||||
static const TypeFunc* ghash_processBlocks_Type();
|
||||
|
@ -157,6 +157,8 @@ address StubRoutines::_squareToLen = NULL;
|
||||
address StubRoutines::_mulAdd = NULL;
|
||||
address StubRoutines::_montgomeryMultiply = NULL;
|
||||
address StubRoutines::_montgomerySquare = NULL;
|
||||
address StubRoutines::_bigIntegerRightShiftWorker = NULL;
|
||||
address StubRoutines::_bigIntegerLeftShiftWorker = NULL;
|
||||
|
||||
address StubRoutines::_vectorizedMismatch = NULL;
|
||||
|
||||
|
@ -239,6 +239,8 @@ class StubRoutines: AllStatic {
|
||||
static address _mulAdd;
|
||||
static address _montgomeryMultiply;
|
||||
static address _montgomerySquare;
|
||||
static address _bigIntegerRightShiftWorker;
|
||||
static address _bigIntegerLeftShiftWorker;
|
||||
|
||||
static address _vectorizedMismatch;
|
||||
|
||||
@ -414,6 +416,8 @@ class StubRoutines: AllStatic {
|
||||
static address mulAdd() { return _mulAdd; }
|
||||
static address montgomeryMultiply() { return _montgomeryMultiply; }
|
||||
static address montgomerySquare() { return _montgomerySquare; }
|
||||
static address bigIntegerRightShift() { return _bigIntegerRightShiftWorker; }
|
||||
static address bigIntegerLeftShift() { return _bigIntegerLeftShiftWorker; }
|
||||
|
||||
static address vectorizedMismatch() { return _vectorizedMismatch; }
|
||||
|
||||
|
@ -602,6 +602,8 @@ typedef HashtableEntry<InstanceKlass*, mtClass> KlassHashtableEntry;
|
||||
static_field(StubRoutines, _updateBytesCRC32C, address) \
|
||||
static_field(StubRoutines, _multiplyToLen, address) \
|
||||
static_field(StubRoutines, _squareToLen, address) \
|
||||
static_field(StubRoutines, _bigIntegerRightShiftWorker, address) \
|
||||
static_field(StubRoutines, _bigIntegerLeftShiftWorker, address) \
|
||||
static_field(StubRoutines, _mulAdd, address) \
|
||||
static_field(StubRoutines, _dexp, address) \
|
||||
static_field(StubRoutines, _dlog, address) \
|
||||
|
@ -42,6 +42,7 @@ import jdk.internal.math.DoubleConsts;
|
||||
import jdk.internal.math.FloatConsts;
|
||||
import jdk.internal.HotSpotIntrinsicCandidate;
|
||||
import jdk.internal.vm.annotation.Stable;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
/**
|
||||
* Immutable arbitrary-precision integers. All operations behave as if
|
||||
@ -2621,12 +2622,8 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
|
||||
// shifts a up to len right n bits assumes no leading zeros, 0<n<32
|
||||
static void primitiveRightShift(int[] a, int len, int n) {
|
||||
int n2 = 32 - n;
|
||||
for (int i=len-1, c=a[i]; i > 0; i--) {
|
||||
int b = c;
|
||||
c = a[i-1];
|
||||
a[i] = (c << n2) | (b >>> n);
|
||||
}
|
||||
Objects.checkFromToIndex(0, len, a.length);
|
||||
shiftRightImplWorker(a, a, 1, n, len-1);
|
||||
a[0] >>>= n;
|
||||
}
|
||||
|
||||
@ -2634,13 +2631,8 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
static void primitiveLeftShift(int[] a, int len, int n) {
|
||||
if (len == 0 || n == 0)
|
||||
return;
|
||||
|
||||
int n2 = 32 - n;
|
||||
for (int i=0, c=a[i], m=i+len-1; i < m; i++) {
|
||||
int b = c;
|
||||
c = a[i+1];
|
||||
a[i] = (b << n) | (c >>> n2);
|
||||
}
|
||||
Objects.checkFromToIndex(0, len, a.length);
|
||||
shiftLeftImplWorker(a, a, 0, n, len-1);
|
||||
a[len-1] <<= n;
|
||||
}
|
||||
|
||||
@ -3353,14 +3345,25 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
} else {
|
||||
newMag = new int[magLen + nInts];
|
||||
}
|
||||
int j=0;
|
||||
while (j < magLen-1)
|
||||
newMag[i++] = mag[j++] << nBits | mag[j] >>> nBits2;
|
||||
newMag[i] = mag[j] << nBits;
|
||||
int numIter = magLen - 1;
|
||||
Objects.checkFromToIndex(0, numIter + 1, mag.length);
|
||||
Objects.checkFromToIndex(i, numIter + i + 1, newMag.length);
|
||||
shiftLeftImplWorker(newMag, mag, i, nBits, numIter);
|
||||
newMag[numIter + i] = mag[numIter] << nBits;
|
||||
}
|
||||
return newMag;
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@HotSpotIntrinsicCandidate
|
||||
private static void shiftLeftImplWorker(int[] newArr, int[] oldArr, int newIdx, int shiftCount, int numIter) {
|
||||
int shiftCountRight = 32 - shiftCount;
|
||||
int oldIdx = 0;
|
||||
while (oldIdx < numIter) {
|
||||
newArr[newIdx++] = (oldArr[oldIdx++] << shiftCount) | (oldArr[oldIdx] >>> shiftCountRight);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a BigInteger whose value is {@code (this >> n)}. Sign
|
||||
* extension is performed. The shift distance, {@code n}, may be
|
||||
@ -3415,11 +3418,10 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
} else {
|
||||
newMag = new int[magLen - nInts -1];
|
||||
}
|
||||
|
||||
int nBits2 = 32 - nBits;
|
||||
int j=0;
|
||||
while (j < magLen - nInts - 1)
|
||||
newMag[i++] = (mag[j++] << nBits2) | (mag[j] >>> nBits);
|
||||
int numIter = magLen - nInts - 1;
|
||||
Objects.checkFromToIndex(0, numIter + 1, mag.length);
|
||||
Objects.checkFromToIndex(i, numIter + i, newMag.length);
|
||||
shiftRightImplWorker(newMag, mag, i, nBits, numIter);
|
||||
}
|
||||
|
||||
if (signum < 0) {
|
||||
@ -3437,6 +3439,17 @@ public class BigInteger extends Number implements Comparable<BigInteger> {
|
||||
return new BigInteger(newMag, signum);
|
||||
}
|
||||
|
||||
@ForceInline
|
||||
@HotSpotIntrinsicCandidate
|
||||
private static void shiftRightImplWorker(int[] newArr, int[] oldArr, int newIdx, int shiftCount, int numIter) {
|
||||
int shiftCountLeft = 32 - shiftCount;
|
||||
int idx = numIter;
|
||||
int nidx = (newIdx == 0) ? numIter - 1 : numIter;
|
||||
while (nidx >= newIdx) {
|
||||
newArr[nidx--] = (oldArr[idx--] >>> shiftCount) | (oldArr[idx] << shiftCountLeft);
|
||||
}
|
||||
}
|
||||
|
||||
int[] javaIncrement(int[] val) {
|
||||
int lastSum = 0;
|
||||
for (int i=val.length-1; i >= 0 && lastSum == 0; i--)
|
||||
|
@ -229,6 +229,8 @@ public final class BinaryContainer implements SymbolTable {
|
||||
{"StubRoutines::_montgomeryMultiply", "_aot_stub_routines_montgomeryMultiply" },
|
||||
{"StubRoutines::_montgomerySquare", "_aot_stub_routines_montgomerySquare" },
|
||||
{"StubRoutines::_vectorizedMismatch", "_aot_stub_routines_vectorizedMismatch" },
|
||||
{"StubRoutines::_bigIntegerRightShiftWorker", "_aot_stub_routines_bigIntegerRightShiftWorker" },
|
||||
{"StubRoutines::_bigIntegerLeftShiftWorker", "_aot_stub_routines_bigIntegerLeftShiftWorker" },
|
||||
|
||||
{"StubRoutines::_throw_delayed_StackOverflowError_entry", "_aot_stub_routines_throw_delayed_StackOverflowError_entry" },
|
||||
|
||||
|
@ -416,7 +416,9 @@ public class CheckGraalIntrinsics extends GraalTest {
|
||||
if (isJDK14OrHigher()) {
|
||||
add(toBeInvestigated,
|
||||
"com/sun/crypto/provider/ElectronicCodeBook.implECBDecrypt([BII[BI)I",
|
||||
"com/sun/crypto/provider/ElectronicCodeBook.implECBEncrypt([BII[BI)I");
|
||||
"com/sun/crypto/provider/ElectronicCodeBook.implECBEncrypt([BII[BI)I",
|
||||
"java/math/BigInteger.shiftLeftImplWorker([I[IIII)V",
|
||||
"java/math/BigInteger.shiftRightImplWorker([I[IIII)V");
|
||||
}
|
||||
|
||||
if (!config.inlineNotify()) {
|
||||
|
130
test/hotspot/jtreg/compiler/intrinsics/bigInteger/TestShift.java
Normal file
130
test/hotspot/jtreg/compiler/intrinsics/bigInteger/TestShift.java
Normal file
@ -0,0 +1,130 @@
|
||||
/*
|
||||
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
* @bug 8234692
|
||||
* @summary Add C2 x86 intrinsic for BigInteger::shiftLeft() and BigInteger::shiftRight() method
|
||||
* @requires vm.compiler2.enabled
|
||||
*
|
||||
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
|
||||
* -XX:CompileCommand=exclude,compiler.intrinsics.bigInteger.TestShift::main
|
||||
* -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_left_shift,ccstr,DisableIntrinsic,_bigIntegerLeftShiftWorker
|
||||
* -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_right_shift,ccstr,DisableIntrinsic,_bigIntegerRightShiftWorker
|
||||
* -XX:CompileCommand=inline,java.math.BigInteger::shiftLeft
|
||||
* -XX:CompileCommand=inline,java.math.BigInteger::shiftRight
|
||||
* compiler.intrinsics.bigInteger.TestShift
|
||||
*
|
||||
* @run main/othervm/timeout=600
|
||||
* -XX:CompileCommand=exclude,compiler.intrinsics.bigInteger.TestShift::main
|
||||
* -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_left_shift,ccstr,DisableIntrinsic,_bigIntegerLeftShiftWorker
|
||||
* -XX:CompileCommand=option,compiler.intrinsics.bigInteger.TestShift::base_right_shift,ccstr,DisableIntrinsic,_bigIntegerRightShiftWorker
|
||||
* -XX:CompileCommand=inline,java.math.BigInteger::shiftLeft
|
||||
* -XX:CompileCommand=inline,java.math.BigInteger::shiftRight
|
||||
* compiler.intrinsics.bigInteger.TestShift
|
||||
*
|
||||
*/
|
||||
|
||||
package compiler.intrinsics.bigInteger;
|
||||
|
||||
import java.math.BigInteger;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
public class TestShift {
|
||||
|
||||
public static BigInteger base_left_shift(BigInteger op1, int shift) {
|
||||
return op1.shiftLeft(shift);
|
||||
}
|
||||
|
||||
public static BigInteger new_left_shift(BigInteger op1, int shift) {
|
||||
return op1.shiftLeft(shift);
|
||||
}
|
||||
|
||||
public static BigInteger base_right_shift(BigInteger op1, int shift) {
|
||||
return op1.shiftRight(shift);
|
||||
}
|
||||
|
||||
public static BigInteger new_right_shift(BigInteger op1, int shift) {
|
||||
return op1.shiftRight(shift);
|
||||
}
|
||||
|
||||
public static boolean bytecompare(BigInteger b1, BigInteger b2) {
|
||||
byte[] data1 = b1.toByteArray();
|
||||
byte[] data2 = b2.toByteArray();
|
||||
if (data1.length != data2.length)
|
||||
return false;
|
||||
for (int i = 0; i < data1.length; i++) {
|
||||
if (data1[i] != data2[i])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static String stringify(BigInteger b) {
|
||||
String strout= "";
|
||||
byte [] data = b.toByteArray();
|
||||
for (int i = 0; i < data.length; i++) {
|
||||
strout += (String.format("%02x",data[i]) + " ");
|
||||
}
|
||||
return strout;
|
||||
}
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
BigInteger [] inputbuffer = new BigInteger[10];
|
||||
BigInteger [] oldLeftShiftResult = new BigInteger[10];
|
||||
BigInteger [] newLeftShiftResult = new BigInteger[10];
|
||||
BigInteger [] oldRightShiftResult = new BigInteger[10];
|
||||
BigInteger [] newRightShiftResult = new BigInteger[10];
|
||||
|
||||
Random rand = new Random();
|
||||
long seed = System.nanoTime();
|
||||
rand.setSeed(seed);
|
||||
int shiftCount = rand.nextInt(30) + 1;
|
||||
|
||||
for(int i = 0; i < inputbuffer.length; i++) {
|
||||
int numbits = rand.nextInt(4096)+32;
|
||||
inputbuffer[i] = new BigInteger(numbits, rand);
|
||||
}
|
||||
|
||||
for (int j = 0; j < 100000; j++) {
|
||||
for(int i = 0; i < inputbuffer.length; i++) {
|
||||
oldLeftShiftResult[i] = base_left_shift(inputbuffer[i], shiftCount);
|
||||
newLeftShiftResult[i] = new_left_shift(inputbuffer[i], shiftCount);
|
||||
if (!bytecompare(oldLeftShiftResult[i], newLeftShiftResult[i])) {
|
||||
System.out.println("mismatch for input:" + stringify(inputbuffer[i]) + "\n" + "expected left shift result:" + stringify(oldLeftShiftResult[i]) + "\n" +
|
||||
"calculated left shift result:" + stringify(newLeftShiftResult[i]));
|
||||
throw new Exception("Failed");
|
||||
}
|
||||
|
||||
oldRightShiftResult[i] = base_right_shift(inputbuffer[i], shiftCount);
|
||||
newRightShiftResult[i] = new_right_shift(inputbuffer[i], shiftCount);
|
||||
if (!bytecompare(oldRightShiftResult[i], newRightShiftResult[i])) {
|
||||
System.out.println("mismatch for input:" + stringify(inputbuffer[i]) + "\n" + "expected right shift result:" + stringify(oldRightShiftResult[i]) + "\n" +
|
||||
"calculated right shift result:" + stringify(newRightShiftResult[i]));
|
||||
throw new Exception("Failed");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -45,7 +45,7 @@ import java.util.concurrent.TimeUnit;
|
||||
@State(Scope.Thread)
|
||||
public class BigIntegers {
|
||||
|
||||
private BigInteger[] hugeArray, largeArray, smallArray;
|
||||
private BigInteger[] hugeArray, largeArray, smallArray, shiftArray;
|
||||
public String[] dummyStringArray;
|
||||
public Object[] dummyArr;
|
||||
private static final int TESTSIZE = 1000;
|
||||
@ -53,6 +53,7 @@ public class BigIntegers {
|
||||
@Setup
|
||||
public void setup() {
|
||||
Random r = new Random(1123);
|
||||
int numbits = r.nextInt(16384);
|
||||
|
||||
hugeArray = new BigInteger[TESTSIZE]; /*
|
||||
* Huge numbers larger than
|
||||
@ -67,6 +68,10 @@ public class BigIntegers {
|
||||
* Small number less than
|
||||
* MAX_INT
|
||||
*/
|
||||
shiftArray = new BigInteger[TESTSIZE]; /*
|
||||
* Each array entry is atmost 16k bits
|
||||
* in size
|
||||
*/
|
||||
|
||||
dummyStringArray = new String[TESTSIZE];
|
||||
dummyArr = new Object[TESTSIZE];
|
||||
@ -78,6 +83,7 @@ public class BigIntegers {
|
||||
+ ((long) value + (long) Integer.MAX_VALUE));
|
||||
largeArray[i] = new BigInteger("" + ((long) value + (long) Integer.MAX_VALUE));
|
||||
smallArray[i] = new BigInteger("" + ((long) value / 1000));
|
||||
shiftArray[i] = new BigInteger(numbits, r);
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,4 +143,38 @@ public class BigIntegers {
|
||||
}
|
||||
bh.consume(tmp);
|
||||
}
|
||||
|
||||
/** Invokes the shiftLeft method of BigInteger with different values. */
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(TESTSIZE)
|
||||
public void testLeftShift(Blackhole bh) {
|
||||
Random rand = new Random();
|
||||
int shift = rand.nextInt(30) + 1;
|
||||
BigInteger tmp = null;
|
||||
for (BigInteger s : shiftArray) {
|
||||
if (tmp == null) {
|
||||
tmp = s;
|
||||
continue;
|
||||
}
|
||||
tmp = tmp.shiftLeft(shift);
|
||||
}
|
||||
bh.consume(tmp);
|
||||
}
|
||||
|
||||
/** Invokes the shiftRight method of BigInteger with different values. */
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(TESTSIZE)
|
||||
public void testRightShift(Blackhole bh) {
|
||||
Random rand = new Random();
|
||||
int shift = rand.nextInt(30) + 1;
|
||||
BigInteger tmp = null;
|
||||
for (BigInteger s : shiftArray) {
|
||||
if (tmp == null) {
|
||||
tmp = s;
|
||||
continue;
|
||||
}
|
||||
tmp = tmp.shiftRight(shift);
|
||||
}
|
||||
bh.consume(tmp);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user