Merge
This commit is contained in:
commit
95736933bd
@ -2813,6 +2813,13 @@ void Assembler::orl(Register dst, Register src) {
|
|||||||
emit_arith(0x0B, 0xC0, dst, src);
|
emit_arith(0x0B, 0xC0, dst, src);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::orl(Address dst, Register src) {
|
||||||
|
InstructionMark im(this);
|
||||||
|
prefix(dst, src);
|
||||||
|
emit_int8(0x09);
|
||||||
|
emit_operand(src, dst);
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::packuswb(XMMRegister dst, Address src) {
|
void Assembler::packuswb(XMMRegister dst, Address src) {
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
|
assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
|
||||||
@ -6907,6 +6914,19 @@ void Assembler::rclq(Register dst, int imm8) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Assembler::rcrq(Register dst, int imm8) {
|
||||||
|
assert(isShiftCount(imm8 >> 1), "illegal shift count");
|
||||||
|
int encode = prefixq_and_encode(dst->encoding());
|
||||||
|
if (imm8 == 1) {
|
||||||
|
emit_int8((unsigned char)0xD1);
|
||||||
|
emit_int8((unsigned char)(0xD8 | encode));
|
||||||
|
} else {
|
||||||
|
emit_int8((unsigned char)0xC1);
|
||||||
|
emit_int8((unsigned char)(0xD8 | encode));
|
||||||
|
emit_int8(imm8);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Assembler::rorq(Register dst, int imm8) {
|
void Assembler::rorq(Register dst, int imm8) {
|
||||||
assert(isShiftCount(imm8 >> 1), "illegal shift count");
|
assert(isShiftCount(imm8 >> 1), "illegal shift count");
|
||||||
int encode = prefixq_and_encode(dst->encoding());
|
int encode = prefixq_and_encode(dst->encoding());
|
||||||
|
@ -1594,6 +1594,7 @@ private:
|
|||||||
void orl(Register dst, int32_t imm32);
|
void orl(Register dst, int32_t imm32);
|
||||||
void orl(Register dst, Address src);
|
void orl(Register dst, Address src);
|
||||||
void orl(Register dst, Register src);
|
void orl(Register dst, Register src);
|
||||||
|
void orl(Address dst, Register src);
|
||||||
|
|
||||||
void orq(Address dst, int32_t imm32);
|
void orq(Address dst, int32_t imm32);
|
||||||
void orq(Register dst, int32_t imm32);
|
void orq(Register dst, int32_t imm32);
|
||||||
@ -1694,6 +1695,8 @@ private:
|
|||||||
|
|
||||||
void rclq(Register dst, int imm8);
|
void rclq(Register dst, int imm8);
|
||||||
|
|
||||||
|
void rcrq(Register dst, int imm8);
|
||||||
|
|
||||||
void rdtsc();
|
void rdtsc();
|
||||||
|
|
||||||
void ret(int imm16);
|
void ret(int imm16);
|
||||||
|
@ -7750,6 +7750,503 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
|
|||||||
pop(tmp2);
|
pop(tmp2);
|
||||||
pop(tmp1);
|
pop(tmp1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//Helper functions for square_to_len()
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store the squares of x[], right shifted one bit (divided by 2) into z[]
|
||||||
|
* Preserves x and z and modifies rest of the registers.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void MacroAssembler::square_rshift(Register x, Register xlen, Register z, Register tmp1, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
|
||||||
|
// Perform square and right shift by 1
|
||||||
|
// Handle odd xlen case first, then for even xlen do the following
|
||||||
|
// jlong carry = 0;
|
||||||
|
// for (int j=0, i=0; j < xlen; j+=2, i+=4) {
|
||||||
|
// huge_128 product = x[j:j+1] * x[j:j+1];
|
||||||
|
// z[i:i+1] = (carry << 63) | (jlong)(product >>> 65);
|
||||||
|
// z[i+2:i+3] = (jlong)(product >>> 1);
|
||||||
|
// carry = (jlong)product;
|
||||||
|
// }
|
||||||
|
|
||||||
|
xorq(tmp5, tmp5); // carry
|
||||||
|
xorq(rdxReg, rdxReg);
|
||||||
|
xorl(tmp1, tmp1); // index for x
|
||||||
|
xorl(tmp4, tmp4); // index for z
|
||||||
|
|
||||||
|
Label L_first_loop, L_first_loop_exit;
|
||||||
|
|
||||||
|
testl(xlen, 1);
|
||||||
|
jccb(Assembler::zero, L_first_loop); //jump if xlen is even
|
||||||
|
|
||||||
|
// Square and right shift by 1 the odd element using 32 bit multiply
|
||||||
|
movl(raxReg, Address(x, tmp1, Address::times_4, 0));
|
||||||
|
imulq(raxReg, raxReg);
|
||||||
|
shrq(raxReg, 1);
|
||||||
|
adcq(tmp5, 0);
|
||||||
|
movq(Address(z, tmp4, Address::times_4, 0), raxReg);
|
||||||
|
incrementl(tmp1);
|
||||||
|
addl(tmp4, 2);
|
||||||
|
|
||||||
|
// Square and right shift by 1 the rest using 64 bit multiply
|
||||||
|
bind(L_first_loop);
|
||||||
|
cmpptr(tmp1, xlen);
|
||||||
|
jccb(Assembler::equal, L_first_loop_exit);
|
||||||
|
|
||||||
|
// Square
|
||||||
|
movq(raxReg, Address(x, tmp1, Address::times_4, 0));
|
||||||
|
rorq(raxReg, 32); // convert big-endian to little-endian
|
||||||
|
mulq(raxReg); // 64-bit multiply rax * rax -> rdx:rax
|
||||||
|
|
||||||
|
// Right shift by 1 and save carry
|
||||||
|
shrq(tmp5, 1); // rdx:rax:tmp5 = (tmp5:rdx:rax) >>> 1
|
||||||
|
rcrq(rdxReg, 1);
|
||||||
|
rcrq(raxReg, 1);
|
||||||
|
adcq(tmp5, 0);
|
||||||
|
|
||||||
|
// Store result in z
|
||||||
|
movq(Address(z, tmp4, Address::times_4, 0), rdxReg);
|
||||||
|
movq(Address(z, tmp4, Address::times_4, 8), raxReg);
|
||||||
|
|
||||||
|
// Update indices for x and z
|
||||||
|
addl(tmp1, 2);
|
||||||
|
addl(tmp4, 4);
|
||||||
|
jmp(L_first_loop);
|
||||||
|
|
||||||
|
bind(L_first_loop_exit);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform the following multiply add operation using BMI2 instructions
|
||||||
|
* carry:sum = sum + op1*op2 + carry
|
||||||
|
* op2 should be in rdx
|
||||||
|
* op2 is preserved, all other registers are modified
|
||||||
|
*/
|
||||||
|
void MacroAssembler::multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry, Register tmp2) {
|
||||||
|
// assert op2 is rdx
|
||||||
|
mulxq(tmp2, op1, op1); // op1 * op2 -> tmp2:op1
|
||||||
|
addq(sum, carry);
|
||||||
|
adcq(tmp2, 0);
|
||||||
|
addq(sum, op1);
|
||||||
|
adcq(tmp2, 0);
|
||||||
|
movq(carry, tmp2);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Perform the following multiply add operation:
|
||||||
|
* carry:sum = sum + op1*op2 + carry
|
||||||
|
* Preserves op1, op2 and modifies rest of registers
|
||||||
|
*/
|
||||||
|
void MacroAssembler::multiply_add_64(Register sum, Register op1, Register op2, Register carry, Register rdxReg, Register raxReg) {
|
||||||
|
// rdx:rax = op1 * op2
|
||||||
|
movq(raxReg, op2);
|
||||||
|
mulq(op1);
|
||||||
|
|
||||||
|
// rdx:rax = sum + carry + rdx:rax
|
||||||
|
addq(sum, carry);
|
||||||
|
adcq(rdxReg, 0);
|
||||||
|
addq(sum, raxReg);
|
||||||
|
adcq(rdxReg, 0);
|
||||||
|
|
||||||
|
// carry:sum = rdx:sum
|
||||||
|
movq(carry, rdxReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add 64 bit long carry into z[] with carry propogation.
|
||||||
|
* Preserves z and carry register values and modifies rest of registers.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void MacroAssembler::add_one_64(Register z, Register zlen, Register carry, Register tmp1) {
|
||||||
|
Label L_fourth_loop, L_fourth_loop_exit;
|
||||||
|
|
||||||
|
movl(tmp1, 1);
|
||||||
|
subl(zlen, 2);
|
||||||
|
addq(Address(z, zlen, Address::times_4, 0), carry);
|
||||||
|
|
||||||
|
bind(L_fourth_loop);
|
||||||
|
jccb(Assembler::carryClear, L_fourth_loop_exit);
|
||||||
|
subl(zlen, 2);
|
||||||
|
jccb(Assembler::negative, L_fourth_loop_exit);
|
||||||
|
addq(Address(z, zlen, Address::times_4, 0), tmp1);
|
||||||
|
jmp(L_fourth_loop);
|
||||||
|
bind(L_fourth_loop_exit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shift z[] left by 1 bit.
|
||||||
|
* Preserves x, len, z and zlen registers and modifies rest of the registers.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void MacroAssembler::lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
|
||||||
|
|
||||||
|
Label L_fifth_loop, L_fifth_loop_exit;
|
||||||
|
|
||||||
|
// Fifth loop
|
||||||
|
// Perform primitiveLeftShift(z, zlen, 1)
|
||||||
|
|
||||||
|
const Register prev_carry = tmp1;
|
||||||
|
const Register new_carry = tmp4;
|
||||||
|
const Register value = tmp2;
|
||||||
|
const Register zidx = tmp3;
|
||||||
|
|
||||||
|
// int zidx, carry;
|
||||||
|
// long value;
|
||||||
|
// carry = 0;
|
||||||
|
// for (zidx = zlen-2; zidx >=0; zidx -= 2) {
|
||||||
|
// (carry:value) = (z[i] << 1) | carry ;
|
||||||
|
// z[i] = value;
|
||||||
|
// }
|
||||||
|
|
||||||
|
movl(zidx, zlen);
|
||||||
|
xorl(prev_carry, prev_carry); // clear carry flag and prev_carry register
|
||||||
|
|
||||||
|
bind(L_fifth_loop);
|
||||||
|
decl(zidx); // Use decl to preserve carry flag
|
||||||
|
decl(zidx);
|
||||||
|
jccb(Assembler::negative, L_fifth_loop_exit);
|
||||||
|
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
movq(value, Address(z, zidx, Address::times_4, 0));
|
||||||
|
rclq(value, 1);
|
||||||
|
rorxq(value, value, 32);
|
||||||
|
movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// clear new_carry
|
||||||
|
xorl(new_carry, new_carry);
|
||||||
|
|
||||||
|
// Shift z[i] by 1, or in previous carry and save new carry
|
||||||
|
movq(value, Address(z, zidx, Address::times_4, 0));
|
||||||
|
shlq(value, 1);
|
||||||
|
adcl(new_carry, 0);
|
||||||
|
|
||||||
|
orq(value, prev_carry);
|
||||||
|
rorq(value, 0x20);
|
||||||
|
movq(Address(z, zidx, Address::times_4, 0), value); // Store back in big endian form
|
||||||
|
|
||||||
|
// Set previous carry = new carry
|
||||||
|
movl(prev_carry, new_carry);
|
||||||
|
}
|
||||||
|
jmp(L_fifth_loop);
|
||||||
|
|
||||||
|
bind(L_fifth_loop_exit);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Code for BigInteger::squareToLen() intrinsic
|
||||||
|
*
|
||||||
|
* rdi: x
|
||||||
|
* rsi: len
|
||||||
|
* r8: z
|
||||||
|
* rcx: zlen
|
||||||
|
* r12: tmp1
|
||||||
|
* r13: tmp2
|
||||||
|
* r14: tmp3
|
||||||
|
* r15: tmp4
|
||||||
|
* rbx: tmp5
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void MacroAssembler::square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
|
||||||
|
|
||||||
|
Label L_second_loop, L_second_loop_exit, L_third_loop, L_third_loop_exit, fifth_loop, fifth_loop_exit, L_last_x, L_multiply;
|
||||||
|
push(tmp1);
|
||||||
|
push(tmp2);
|
||||||
|
push(tmp3);
|
||||||
|
push(tmp4);
|
||||||
|
push(tmp5);
|
||||||
|
|
||||||
|
// First loop
|
||||||
|
// Store the squares, right shifted one bit (i.e., divided by 2).
|
||||||
|
square_rshift(x, len, z, tmp1, tmp3, tmp4, tmp5, rdxReg, raxReg);
|
||||||
|
|
||||||
|
// Add in off-diagonal sums.
|
||||||
|
//
|
||||||
|
// Second, third (nested) and fourth loops.
|
||||||
|
// zlen +=2;
|
||||||
|
// for (int xidx=len-2,zidx=zlen-4; xidx > 0; xidx-=2,zidx-=4) {
|
||||||
|
// carry = 0;
|
||||||
|
// long op2 = x[xidx:xidx+1];
|
||||||
|
// for (int j=xidx-2,k=zidx; j >= 0; j-=2) {
|
||||||
|
// k -= 2;
|
||||||
|
// long op1 = x[j:j+1];
|
||||||
|
// long sum = z[k:k+1];
|
||||||
|
// carry:sum = multiply_add_64(sum, op1, op2, carry, tmp_regs);
|
||||||
|
// z[k:k+1] = sum;
|
||||||
|
// }
|
||||||
|
// add_one_64(z, k, carry, tmp_regs);
|
||||||
|
// }
|
||||||
|
|
||||||
|
const Register carry = tmp5;
|
||||||
|
const Register sum = tmp3;
|
||||||
|
const Register op1 = tmp4;
|
||||||
|
Register op2 = tmp2;
|
||||||
|
|
||||||
|
push(zlen);
|
||||||
|
push(len);
|
||||||
|
addl(zlen,2);
|
||||||
|
bind(L_second_loop);
|
||||||
|
xorq(carry, carry);
|
||||||
|
subl(zlen, 4);
|
||||||
|
subl(len, 2);
|
||||||
|
push(zlen);
|
||||||
|
push(len);
|
||||||
|
cmpl(len, 0);
|
||||||
|
jccb(Assembler::lessEqual, L_second_loop_exit);
|
||||||
|
|
||||||
|
// Multiply an array by one 64 bit long.
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
op2 = rdxReg;
|
||||||
|
movq(op2, Address(x, len, Address::times_4, 0));
|
||||||
|
rorxq(op2, op2, 32);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
movq(op2, Address(x, len, Address::times_4, 0));
|
||||||
|
rorq(op2, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
bind(L_third_loop);
|
||||||
|
decrementl(len);
|
||||||
|
jccb(Assembler::negative, L_third_loop_exit);
|
||||||
|
decrementl(len);
|
||||||
|
jccb(Assembler::negative, L_last_x);
|
||||||
|
|
||||||
|
movq(op1, Address(x, len, Address::times_4, 0));
|
||||||
|
rorq(op1, 32);
|
||||||
|
|
||||||
|
bind(L_multiply);
|
||||||
|
subl(zlen, 2);
|
||||||
|
movq(sum, Address(z, zlen, Address::times_4, 0));
|
||||||
|
|
||||||
|
// Multiply 64 bit by 64 bit and add 64 bits lower half and upper 64 bits as carry.
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
multiply_add_64_bmi2(sum, op1, op2, carry, tmp2);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
movq(Address(z, zlen, Address::times_4, 0), sum);
|
||||||
|
|
||||||
|
jmp(L_third_loop);
|
||||||
|
bind(L_third_loop_exit);
|
||||||
|
|
||||||
|
// Fourth loop
|
||||||
|
// Add 64 bit long carry into z with carry propogation.
|
||||||
|
// Uses offsetted zlen.
|
||||||
|
add_one_64(z, zlen, carry, tmp1);
|
||||||
|
|
||||||
|
pop(len);
|
||||||
|
pop(zlen);
|
||||||
|
jmp(L_second_loop);
|
||||||
|
|
||||||
|
// Next infrequent code is moved outside loops.
|
||||||
|
bind(L_last_x);
|
||||||
|
movl(op1, Address(x, 0));
|
||||||
|
jmp(L_multiply);
|
||||||
|
|
||||||
|
bind(L_second_loop_exit);
|
||||||
|
pop(len);
|
||||||
|
pop(zlen);
|
||||||
|
pop(len);
|
||||||
|
pop(zlen);
|
||||||
|
|
||||||
|
// Fifth loop
|
||||||
|
// Shift z left 1 bit.
|
||||||
|
lshift_by_1(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4);
|
||||||
|
|
||||||
|
// z[zlen-1] |= x[len-1] & 1;
|
||||||
|
movl(tmp3, Address(x, len, Address::times_4, -4));
|
||||||
|
andl(tmp3, 1);
|
||||||
|
orl(Address(z, zlen, Address::times_4, -4), tmp3);
|
||||||
|
|
||||||
|
pop(tmp5);
|
||||||
|
pop(tmp4);
|
||||||
|
pop(tmp3);
|
||||||
|
pop(tmp2);
|
||||||
|
pop(tmp1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function for mul_add()
|
||||||
|
* Multiply the in[] by int k and add to out[] starting at offset offs using
|
||||||
|
* 128 bit by 32 bit multiply and return the carry in tmp5.
|
||||||
|
* Only quad int aligned length of in[] is operated on in this function.
|
||||||
|
* k is in rdxReg for BMI2Instructions, for others it is in tmp2.
|
||||||
|
* This function preserves out, in and k registers.
|
||||||
|
* len and offset point to the appropriate index in "in" & "out" correspondingly
|
||||||
|
* tmp5 has the carry.
|
||||||
|
* other registers are temporary and are modified.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void MacroAssembler::mul_add_128_x_32_loop(Register out, Register in,
|
||||||
|
Register offset, Register len, Register tmp1, Register tmp2, Register tmp3,
|
||||||
|
Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
|
||||||
|
|
||||||
|
Label L_first_loop, L_first_loop_exit;
|
||||||
|
|
||||||
|
movl(tmp1, len);
|
||||||
|
shrl(tmp1, 2);
|
||||||
|
|
||||||
|
bind(L_first_loop);
|
||||||
|
subl(tmp1, 1);
|
||||||
|
jccb(Assembler::negative, L_first_loop_exit);
|
||||||
|
|
||||||
|
subl(len, 4);
|
||||||
|
subl(offset, 4);
|
||||||
|
|
||||||
|
Register op2 = tmp2;
|
||||||
|
const Register sum = tmp3;
|
||||||
|
const Register op1 = tmp4;
|
||||||
|
const Register carry = tmp5;
|
||||||
|
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
op2 = rdxReg;
|
||||||
|
}
|
||||||
|
|
||||||
|
movq(op1, Address(in, len, Address::times_4, 8));
|
||||||
|
rorq(op1, 32);
|
||||||
|
movq(sum, Address(out, offset, Address::times_4, 8));
|
||||||
|
rorq(sum, 32);
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
|
||||||
|
}
|
||||||
|
// Store back in big endian from little endian
|
||||||
|
rorq(sum, 0x20);
|
||||||
|
movq(Address(out, offset, Address::times_4, 8), sum);
|
||||||
|
|
||||||
|
movq(op1, Address(in, len, Address::times_4, 0));
|
||||||
|
rorq(op1, 32);
|
||||||
|
movq(sum, Address(out, offset, Address::times_4, 0));
|
||||||
|
rorq(sum, 32);
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
|
||||||
|
}
|
||||||
|
// Store back in big endian from little endian
|
||||||
|
rorq(sum, 0x20);
|
||||||
|
movq(Address(out, offset, Address::times_4, 0), sum);
|
||||||
|
|
||||||
|
jmp(L_first_loop);
|
||||||
|
bind(L_first_loop_exit);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Code for BigInteger::mulAdd() intrinsic
|
||||||
|
*
|
||||||
|
* rdi: out
|
||||||
|
* rsi: in
|
||||||
|
* r11: offs (out.length - offset)
|
||||||
|
* rcx: len
|
||||||
|
* r8: k
|
||||||
|
* r12: tmp1
|
||||||
|
* r13: tmp2
|
||||||
|
* r14: tmp3
|
||||||
|
* r15: tmp4
|
||||||
|
* rbx: tmp5
|
||||||
|
* Multiply the in[] by word k and add to out[], return the carry in rax
|
||||||
|
*/
|
||||||
|
void MacroAssembler::mul_add(Register out, Register in, Register offs,
|
||||||
|
Register len, Register k, Register tmp1, Register tmp2, Register tmp3,
|
||||||
|
Register tmp4, Register tmp5, Register rdxReg, Register raxReg) {
|
||||||
|
|
||||||
|
Label L_carry, L_last_in, L_done;
|
||||||
|
|
||||||
|
// carry = 0;
|
||||||
|
// for (int j=len-1; j >= 0; j--) {
|
||||||
|
// long product = (in[j] & LONG_MASK) * kLong +
|
||||||
|
// (out[offs] & LONG_MASK) + carry;
|
||||||
|
// out[offs--] = (int)product;
|
||||||
|
// carry = product >>> 32;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
push(tmp1);
|
||||||
|
push(tmp2);
|
||||||
|
push(tmp3);
|
||||||
|
push(tmp4);
|
||||||
|
push(tmp5);
|
||||||
|
|
||||||
|
Register op2 = tmp2;
|
||||||
|
const Register sum = tmp3;
|
||||||
|
const Register op1 = tmp4;
|
||||||
|
const Register carry = tmp5;
|
||||||
|
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
op2 = rdxReg;
|
||||||
|
movl(op2, k);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
movl(op2, k);
|
||||||
|
}
|
||||||
|
|
||||||
|
xorq(carry, carry);
|
||||||
|
|
||||||
|
//First loop
|
||||||
|
|
||||||
|
//Multiply in[] by k in a 4 way unrolled loop using 128 bit by 32 bit multiply
|
||||||
|
//The carry is in tmp5
|
||||||
|
mul_add_128_x_32_loop(out, in, offs, len, tmp1, tmp2, tmp3, tmp4, tmp5, rdxReg, raxReg);
|
||||||
|
|
||||||
|
//Multiply the trailing in[] entry using 64 bit by 32 bit, if any
|
||||||
|
decrementl(len);
|
||||||
|
jccb(Assembler::negative, L_carry);
|
||||||
|
decrementl(len);
|
||||||
|
jccb(Assembler::negative, L_last_in);
|
||||||
|
|
||||||
|
movq(op1, Address(in, len, Address::times_4, 0));
|
||||||
|
rorq(op1, 32);
|
||||||
|
|
||||||
|
subl(offs, 2);
|
||||||
|
movq(sum, Address(out, offs, Address::times_4, 0));
|
||||||
|
rorq(sum, 32);
|
||||||
|
|
||||||
|
if (UseBMI2Instructions) {
|
||||||
|
multiply_add_64_bmi2(sum, op1, op2, carry, raxReg);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
multiply_add_64(sum, op1, op2, carry, rdxReg, raxReg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store back in big endian from little endian
|
||||||
|
rorq(sum, 0x20);
|
||||||
|
movq(Address(out, offs, Address::times_4, 0), sum);
|
||||||
|
|
||||||
|
testl(len, len);
|
||||||
|
jccb(Assembler::zero, L_carry);
|
||||||
|
|
||||||
|
//Multiply the last in[] entry, if any
|
||||||
|
bind(L_last_in);
|
||||||
|
movl(op1, Address(in, 0));
|
||||||
|
movl(sum, Address(out, offs, Address::times_4, -4));
|
||||||
|
|
||||||
|
movl(raxReg, k);
|
||||||
|
mull(op1); //tmp4 * eax -> edx:eax
|
||||||
|
addl(sum, carry);
|
||||||
|
adcl(rdxReg, 0);
|
||||||
|
addl(sum, raxReg);
|
||||||
|
adcl(rdxReg, 0);
|
||||||
|
movl(carry, rdxReg);
|
||||||
|
|
||||||
|
movl(Address(out, offs, Address::times_4, -4), sum);
|
||||||
|
|
||||||
|
bind(L_carry);
|
||||||
|
//return tmp5/carry as carry in rax
|
||||||
|
movl(rax, carry);
|
||||||
|
|
||||||
|
bind(L_done);
|
||||||
|
pop(tmp5);
|
||||||
|
pop(tmp4);
|
||||||
|
pop(tmp3);
|
||||||
|
pop(tmp2);
|
||||||
|
pop(tmp1);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1241,6 +1241,25 @@ public:
|
|||||||
Register carry2);
|
Register carry2);
|
||||||
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
|
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, Register zlen,
|
||||||
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
|
Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
|
||||||
|
|
||||||
|
void square_rshift(Register x, Register len, Register z, Register tmp1, Register tmp3,
|
||||||
|
Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
|
||||||
|
void multiply_add_64_bmi2(Register sum, Register op1, Register op2, Register carry,
|
||||||
|
Register tmp2);
|
||||||
|
void multiply_add_64(Register sum, Register op1, Register op2, Register carry,
|
||||||
|
Register rdxReg, Register raxReg);
|
||||||
|
void add_one_64(Register z, Register zlen, Register carry, Register tmp1);
|
||||||
|
void lshift_by_1(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
|
||||||
|
Register tmp3, Register tmp4);
|
||||||
|
void square_to_len(Register x, Register len, Register z, Register zlen, Register tmp1, Register tmp2,
|
||||||
|
Register tmp3, Register tmp4, Register tmp5, Register rdxReg, Register raxReg);
|
||||||
|
|
||||||
|
void mul_add_128_x_32_loop(Register out, Register in, Register offset, Register len, Register tmp1,
|
||||||
|
Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
|
||||||
|
Register raxReg);
|
||||||
|
void mul_add(Register out, Register in, Register offset, Register len, Register k, Register tmp1,
|
||||||
|
Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register rdxReg,
|
||||||
|
Register raxReg);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
|
// CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
|
||||||
|
@ -3785,6 +3785,107 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Arguments:
|
||||||
|
*
|
||||||
|
// Input:
|
||||||
|
// c_rarg0 - x address
|
||||||
|
// c_rarg1 - x length
|
||||||
|
// c_rarg2 - z address
|
||||||
|
// c_rarg3 - z lenth
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
address generate_squareToLen() {
|
||||||
|
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "squareToLen");
|
||||||
|
|
||||||
|
address start = __ pc();
|
||||||
|
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
|
||||||
|
// Unix: rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
|
||||||
|
const Register x = rdi;
|
||||||
|
const Register len = rsi;
|
||||||
|
const Register z = r8;
|
||||||
|
const Register zlen = rcx;
|
||||||
|
|
||||||
|
const Register tmp1 = r12;
|
||||||
|
const Register tmp2 = r13;
|
||||||
|
const Register tmp3 = r14;
|
||||||
|
const Register tmp4 = r15;
|
||||||
|
const Register tmp5 = rbx;
|
||||||
|
|
||||||
|
BLOCK_COMMENT("Entry:");
|
||||||
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
|
||||||
|
setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
|
||||||
|
// zlen => rcx
|
||||||
|
// r9 and r10 may be used to save non-volatile registers
|
||||||
|
__ movptr(r8, rdx);
|
||||||
|
__ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
|
||||||
|
|
||||||
|
restore_arg_regs();
|
||||||
|
|
||||||
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
__ ret(0);
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Arguments:
|
||||||
|
*
|
||||||
|
* Input:
|
||||||
|
* c_rarg0 - out address
|
||||||
|
* c_rarg1 - in address
|
||||||
|
* c_rarg2 - offset
|
||||||
|
* c_rarg3 - len
|
||||||
|
* not Win64
|
||||||
|
* c_rarg4 - k
|
||||||
|
* Win64
|
||||||
|
* rsp+40 - k
|
||||||
|
*/
|
||||||
|
address generate_mulAdd() {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "mulAdd");
|
||||||
|
|
||||||
|
address start = __ pc();
|
||||||
|
// Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
|
||||||
|
// Unix: rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
|
||||||
|
const Register out = rdi;
|
||||||
|
const Register in = rsi;
|
||||||
|
const Register offset = r11;
|
||||||
|
const Register len = rcx;
|
||||||
|
const Register k = r8;
|
||||||
|
|
||||||
|
// Next registers will be saved on stack in mul_add().
|
||||||
|
const Register tmp1 = r12;
|
||||||
|
const Register tmp2 = r13;
|
||||||
|
const Register tmp3 = r14;
|
||||||
|
const Register tmp4 = r15;
|
||||||
|
const Register tmp5 = rbx;
|
||||||
|
|
||||||
|
BLOCK_COMMENT("Entry:");
|
||||||
|
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
|
||||||
|
setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
|
||||||
|
// len => rcx, k => r8
|
||||||
|
// r9 and r10 may be used to save non-volatile registers
|
||||||
|
#ifdef _WIN64
|
||||||
|
// last argument is on stack on Win64
|
||||||
|
__ movl(k, Address(rsp, 6 * wordSize));
|
||||||
|
#endif
|
||||||
|
__ movptr(r11, rdx); // move offset in rdx to offset(r11)
|
||||||
|
__ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
|
||||||
|
|
||||||
|
restore_arg_regs();
|
||||||
|
|
||||||
|
__ leave(); // required for proper stackwalking of RuntimeStub frame
|
||||||
|
__ ret(0);
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#undef __
|
#undef __
|
||||||
#define __ masm->
|
#define __ masm->
|
||||||
|
|
||||||
@ -4030,6 +4131,12 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
if (UseMultiplyToLenIntrinsic) {
|
if (UseMultiplyToLenIntrinsic) {
|
||||||
StubRoutines::_multiplyToLen = generate_multiplyToLen();
|
StubRoutines::_multiplyToLen = generate_multiplyToLen();
|
||||||
}
|
}
|
||||||
|
if (UseSquareToLenIntrinsic) {
|
||||||
|
StubRoutines::_squareToLen = generate_squareToLen();
|
||||||
|
}
|
||||||
|
if (UseMulAddIntrinsic) {
|
||||||
|
StubRoutines::_mulAdd = generate_mulAdd();
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _
|
|||||||
|
|
||||||
enum platform_dependent_constants {
|
enum platform_dependent_constants {
|
||||||
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
|
code_size1 = 19000, // simply increase if too small (assembler will crash if too small)
|
||||||
code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
|
code_size2 = 23000 // simply increase if too small (assembler will crash if too small)
|
||||||
};
|
};
|
||||||
|
|
||||||
class x86 {
|
class x86 {
|
||||||
|
@ -790,6 +790,12 @@ void VM_Version::get_processor_features() {
|
|||||||
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
||||||
UseMultiplyToLenIntrinsic = true;
|
UseMultiplyToLenIntrinsic = true;
|
||||||
}
|
}
|
||||||
|
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
|
||||||
|
UseSquareToLenIntrinsic = true;
|
||||||
|
}
|
||||||
|
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
|
||||||
|
UseMulAddIntrinsic = true;
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
if (UseMultiplyToLenIntrinsic) {
|
if (UseMultiplyToLenIntrinsic) {
|
||||||
if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
||||||
@ -797,6 +803,18 @@ void VM_Version::get_processor_features() {
|
|||||||
}
|
}
|
||||||
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
|
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
|
||||||
}
|
}
|
||||||
|
if (UseSquareToLenIntrinsic) {
|
||||||
|
if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
|
||||||
|
warning("squareToLen intrinsic is not available in 32-bit VM");
|
||||||
|
}
|
||||||
|
FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
|
||||||
|
}
|
||||||
|
if (UseMulAddIntrinsic) {
|
||||||
|
if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
|
||||||
|
warning("mulAdd intrinsic is not available in 32-bit VM");
|
||||||
|
}
|
||||||
|
FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif // COMPILER2
|
#endif // COMPILER2
|
||||||
|
|
||||||
|
@ -799,6 +799,14 @@
|
|||||||
do_name( multiplyToLen_name, "multiplyToLen") \
|
do_name( multiplyToLen_name, "multiplyToLen") \
|
||||||
do_signature(multiplyToLen_signature, "([II[II[I)[I") \
|
do_signature(multiplyToLen_signature, "([II[II[I)[I") \
|
||||||
\
|
\
|
||||||
|
do_intrinsic(_squareToLen, java_math_BigInteger, squareToLen_name, squareToLen_signature, F_S) \
|
||||||
|
do_name( squareToLen_name, "implSquareToLen") \
|
||||||
|
do_signature(squareToLen_signature, "([II[II)[I") \
|
||||||
|
\
|
||||||
|
do_intrinsic(_mulAdd, java_math_BigInteger, mulAdd_name, mulAdd_signature, F_S) \
|
||||||
|
do_name( mulAdd_name, "implMulAdd") \
|
||||||
|
do_signature(mulAdd_signature, "([I[IIII)I") \
|
||||||
|
\
|
||||||
/* java/lang/ref/Reference */ \
|
/* java/lang/ref/Reference */ \
|
||||||
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
|
do_intrinsic(_Reference_get, java_lang_ref_Reference, get_name, void_object_signature, F_R) \
|
||||||
\
|
\
|
||||||
|
@ -665,6 +665,12 @@
|
|||||||
product(bool, UseMultiplyToLenIntrinsic, false, \
|
product(bool, UseMultiplyToLenIntrinsic, false, \
|
||||||
"Enables intrinsification of BigInteger.multiplyToLen()") \
|
"Enables intrinsification of BigInteger.multiplyToLen()") \
|
||||||
\
|
\
|
||||||
|
product(bool, UseSquareToLenIntrinsic, false, \
|
||||||
|
"Enables intrinsification of BigInteger.squareToLen()") \
|
||||||
|
\
|
||||||
|
product(bool, UseMulAddIntrinsic, false, \
|
||||||
|
"Enables intrinsification of BigInteger.mulAdd()") \
|
||||||
|
\
|
||||||
product(bool, UseTypeSpeculation, true, \
|
product(bool, UseTypeSpeculation, true, \
|
||||||
"Speculatively propagate types from profiles") \
|
"Speculatively propagate types from profiles") \
|
||||||
\
|
\
|
||||||
|
@ -972,7 +972,9 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
|||||||
strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "sha256_implCompressMB") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "sha512_implCompress") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0)
|
strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 ||
|
||||||
|
strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
|
||||||
|
strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0)
|
||||||
))) {
|
))) {
|
||||||
call->dump();
|
call->dump();
|
||||||
fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
|
fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
|
||||||
|
@ -817,19 +817,78 @@ bool IfNode::fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* f
|
|||||||
BoolTest::mask hi_test = this_bool->_test._test;
|
BoolTest::mask hi_test = this_bool->_test._test;
|
||||||
BoolTest::mask cond = hi_test;
|
BoolTest::mask cond = hi_test;
|
||||||
|
|
||||||
|
// convert:
|
||||||
|
//
|
||||||
|
// dom_bool = x {<,<=,>,>=} a
|
||||||
|
// / \
|
||||||
|
// proj = {True,False} / \ otherproj = {False,True}
|
||||||
|
// /
|
||||||
|
// this_bool = x {<,<=} b
|
||||||
|
// / \
|
||||||
|
// fail = {True,False} / \ success = {False,True}
|
||||||
|
// /
|
||||||
|
//
|
||||||
|
// (Second test guaranteed canonicalized, first one may not have
|
||||||
|
// been canonicalized yet)
|
||||||
|
//
|
||||||
|
// into:
|
||||||
|
//
|
||||||
|
// cond = (x - lo) {<u,<=u,>u,>=u} adjusted_lim
|
||||||
|
// / \
|
||||||
|
// fail / \ success
|
||||||
|
// /
|
||||||
|
//
|
||||||
|
|
||||||
// Figure out which of the two tests sets the upper bound and which
|
// Figure out which of the two tests sets the upper bound and which
|
||||||
// sets the lower bound if any.
|
// sets the lower bound if any.
|
||||||
|
Node* adjusted_lim = NULL;
|
||||||
if (hi_type->_lo > lo_type->_hi && hi_type->_hi == max_jint && lo_type->_lo == min_jint) {
|
if (hi_type->_lo > lo_type->_hi && hi_type->_hi == max_jint && lo_type->_lo == min_jint) {
|
||||||
|
|
||||||
assert((dom_bool->_test.is_less() && !proj->_con) ||
|
assert((dom_bool->_test.is_less() && !proj->_con) ||
|
||||||
(dom_bool->_test.is_greater() && proj->_con), "incorrect test");
|
(dom_bool->_test.is_greater() && proj->_con), "incorrect test");
|
||||||
// this test was canonicalized
|
// this test was canonicalized
|
||||||
assert(this_bool->_test.is_less() && fail->_con, "incorrect test");
|
assert(this_bool->_test.is_less() && fail->_con, "incorrect test");
|
||||||
|
|
||||||
|
// this_bool = <
|
||||||
|
// dom_bool = >= (proj = True) or dom_bool = < (proj = False)
|
||||||
|
// x in [a, b[ on the fail (= True) projection, b > a-1 (because of hi_type->_lo > lo_type->_hi test above):
|
||||||
|
// lo = a, hi = b, adjusted_lim = b-a, cond = <u
|
||||||
|
// dom_bool = > (proj = True) or dom_bool = <= (proj = False)
|
||||||
|
// x in ]a, b[ on the fail (= True) projection, b > a:
|
||||||
|
// lo = a+1, hi = b, adjusted_lim = b-a-1, cond = <u
|
||||||
|
// this_bool = <=
|
||||||
|
// dom_bool = >= (proj = True) or dom_bool = < (proj = False)
|
||||||
|
// x in [a, b] on the fail (= True) projection, b+1 > a-1:
|
||||||
|
// lo = a, hi = b, adjusted_lim = b-a, cond = <=u
|
||||||
|
// dom_bool = > (proj = True) or dom_bool = <= (proj = False)
|
||||||
|
// x in ]a, b] on the fail (= True) projection b+1 > a:
|
||||||
|
// lo = a+1, hi = b, adjusted_lim = b-a, cond = <u
|
||||||
|
// lo = a+1, hi = b, adjusted_lim = b-a-1, cond = <=u doesn't work because a = b is possible, then hi-lo = -1
|
||||||
|
|
||||||
if (lo_test == BoolTest::gt || lo_test == BoolTest::le) {
|
if (lo_test == BoolTest::gt || lo_test == BoolTest::le) {
|
||||||
|
if (hi_test == BoolTest::le) {
|
||||||
|
adjusted_lim = igvn->transform(new SubINode(hi, lo));
|
||||||
|
cond = BoolTest::lt;
|
||||||
|
}
|
||||||
lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
|
lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
|
||||||
}
|
}
|
||||||
} else if (lo_type->_lo > hi_type->_hi && lo_type->_hi == max_jint && hi_type->_lo == min_jint) {
|
} else if (lo_type->_lo > hi_type->_hi && lo_type->_hi == max_jint && hi_type->_lo == min_jint) {
|
||||||
|
|
||||||
|
// this_bool = <
|
||||||
|
// dom_bool = < (proj = True) or dom_bool = >= (proj = False)
|
||||||
|
// x in [b, a[ on the fail (= False) projection, a > b-1 (because of lo_type->_lo > hi_type->_hi above):
|
||||||
|
// lo = b, hi = a, adjusted_lim = a-b, cond = >=u
|
||||||
|
// dom_bool = <= (proj = True) or dom_bool = > (proj = False)
|
||||||
|
// x in [b, a] on the fail (= False) projection, a+1 > b-1:
|
||||||
|
// lo = b, hi = a, adjusted_lim = a-b, cond = >u
|
||||||
|
// this_bool = <=
|
||||||
|
// dom_bool = < (proj = True) or dom_bool = >= (proj = False)
|
||||||
|
// x in ]b, a[ on the fail (= False) projection, a > b:
|
||||||
|
// lo = b+1, hi = a, adjusted_lim = a-b-1, cond = >=u
|
||||||
|
// dom_bool = <= (proj = True) or dom_bool = > (proj = False)
|
||||||
|
// x in ]b, a] on the fail (= False) projection, a+1 > b:
|
||||||
|
// lo = b+1, hi = a, adjusted_lim = a-b, cond = >=u
|
||||||
|
// lo = b+1, hi = a, adjusted_lim = a-b-1, cond = >u doesn't work because a = b is possible, then hi-lo = -1
|
||||||
|
|
||||||
swap(lo, hi);
|
swap(lo, hi);
|
||||||
swap(lo_type, hi_type);
|
swap(lo_type, hi_type);
|
||||||
swap(lo_test, hi_test);
|
swap(lo_test, hi_test);
|
||||||
@ -842,6 +901,10 @@ bool IfNode::fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* f
|
|||||||
cond = (hi_test == BoolTest::le || hi_test == BoolTest::gt) ? BoolTest::gt : BoolTest::ge;
|
cond = (hi_test == BoolTest::le || hi_test == BoolTest::gt) ? BoolTest::gt : BoolTest::ge;
|
||||||
|
|
||||||
if (lo_test == BoolTest::le) {
|
if (lo_test == BoolTest::le) {
|
||||||
|
if (cond == BoolTest::gt) {
|
||||||
|
adjusted_lim = igvn->transform(new SubINode(hi, lo));
|
||||||
|
cond = BoolTest::ge;
|
||||||
|
}
|
||||||
lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
|
lo = igvn->transform(new AddINode(lo, igvn->intcon(1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -860,7 +923,6 @@ bool IfNode::fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* f
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lo = NULL;
|
lo = NULL;
|
||||||
hi = NULL;
|
hi = NULL;
|
||||||
}
|
}
|
||||||
@ -868,12 +930,13 @@ bool IfNode::fold_compares_helper(ProjNode* proj, ProjNode* success, ProjNode* f
|
|||||||
if (lo && hi) {
|
if (lo && hi) {
|
||||||
// Merge the two compares into a single unsigned compare by building (CmpU (n - lo) (hi - lo))
|
// Merge the two compares into a single unsigned compare by building (CmpU (n - lo) (hi - lo))
|
||||||
Node* adjusted_val = igvn->transform(new SubINode(n, lo));
|
Node* adjusted_val = igvn->transform(new SubINode(n, lo));
|
||||||
Node* adjusted_lim = igvn->transform(new SubINode(hi, lo));
|
if (adjusted_lim == NULL) {
|
||||||
|
adjusted_lim = igvn->transform(new SubINode(hi, lo));
|
||||||
|
}
|
||||||
Node* newcmp = igvn->transform(new CmpUNode(adjusted_val, adjusted_lim));
|
Node* newcmp = igvn->transform(new CmpUNode(adjusted_val, adjusted_lim));
|
||||||
Node* newbool = igvn->transform(new BoolNode(newcmp, cond));
|
Node* newbool = igvn->transform(new BoolNode(newcmp, cond));
|
||||||
|
|
||||||
igvn->is_IterGVN()->replace_input_of(dom_iff, 1, igvn->intcon(proj->_con));
|
igvn->replace_input_of(dom_iff, 1, igvn->intcon(proj->_con));
|
||||||
igvn->hash_delete(this);
|
|
||||||
set_req(1, newbool);
|
set_req(1, newbool);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
@ -291,6 +291,8 @@ class LibraryCallKit : public GraphKit {
|
|||||||
bool inline_updateBytesCRC32();
|
bool inline_updateBytesCRC32();
|
||||||
bool inline_updateByteBufferCRC32();
|
bool inline_updateByteBufferCRC32();
|
||||||
bool inline_multiplyToLen();
|
bool inline_multiplyToLen();
|
||||||
|
bool inline_squareToLen();
|
||||||
|
bool inline_mulAdd();
|
||||||
|
|
||||||
bool inline_profileBoolean();
|
bool inline_profileBoolean();
|
||||||
bool inline_isCompileConstant();
|
bool inline_isCompileConstant();
|
||||||
@ -494,6 +496,14 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
|
|||||||
if (!UseMultiplyToLenIntrinsic) return NULL;
|
if (!UseMultiplyToLenIntrinsic) return NULL;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case vmIntrinsics::_squareToLen:
|
||||||
|
if (!UseSquareToLenIntrinsic) return NULL;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case vmIntrinsics::_mulAdd:
|
||||||
|
if (!UseMulAddIntrinsic) return NULL;
|
||||||
|
break;
|
||||||
|
|
||||||
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
|
case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
|
||||||
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
|
case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
|
||||||
if (!UseAESIntrinsics) return NULL;
|
if (!UseAESIntrinsics) return NULL;
|
||||||
@ -913,6 +923,12 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|||||||
case vmIntrinsics::_multiplyToLen:
|
case vmIntrinsics::_multiplyToLen:
|
||||||
return inline_multiplyToLen();
|
return inline_multiplyToLen();
|
||||||
|
|
||||||
|
case vmIntrinsics::_squareToLen:
|
||||||
|
return inline_squareToLen();
|
||||||
|
|
||||||
|
case vmIntrinsics::_mulAdd:
|
||||||
|
return inline_mulAdd();
|
||||||
|
|
||||||
case vmIntrinsics::_encodeISOArray:
|
case vmIntrinsics::_encodeISOArray:
|
||||||
return inline_encodeISOArray();
|
return inline_encodeISOArray();
|
||||||
|
|
||||||
@ -5306,6 +5322,100 @@ bool LibraryCallKit::inline_multiplyToLen() {
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//-------------inline_squareToLen------------------------------------
|
||||||
|
bool LibraryCallKit::inline_squareToLen() {
|
||||||
|
assert(UseSquareToLenIntrinsic, "not implementated on this platform");
|
||||||
|
|
||||||
|
address stubAddr = StubRoutines::squareToLen();
|
||||||
|
if (stubAddr == NULL) {
|
||||||
|
return false; // Intrinsic's stub is not implemented on this platform
|
||||||
|
}
|
||||||
|
const char* stubName = "squareToLen";
|
||||||
|
|
||||||
|
assert(callee()->signature()->size() == 4, "implSquareToLen has 4 parameters");
|
||||||
|
|
||||||
|
Node* x = argument(0);
|
||||||
|
Node* len = argument(1);
|
||||||
|
Node* z = argument(2);
|
||||||
|
Node* zlen = argument(3);
|
||||||
|
|
||||||
|
const Type* x_type = x->Value(&_gvn);
|
||||||
|
const Type* z_type = z->Value(&_gvn);
|
||||||
|
const TypeAryPtr* top_x = x_type->isa_aryptr();
|
||||||
|
const TypeAryPtr* top_z = z_type->isa_aryptr();
|
||||||
|
if (top_x == NULL || top_x->klass() == NULL ||
|
||||||
|
top_z == NULL || top_z->klass() == NULL) {
|
||||||
|
// failed array check
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
BasicType x_elem = x_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||||
|
BasicType z_elem = z_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||||
|
if (x_elem != T_INT || z_elem != T_INT) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Node* x_start = array_element_address(x, intcon(0), x_elem);
|
||||||
|
Node* z_start = array_element_address(z, intcon(0), z_elem);
|
||||||
|
|
||||||
|
Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||||
|
OptoRuntime::squareToLen_Type(),
|
||||||
|
stubAddr, stubName, TypePtr::BOTTOM,
|
||||||
|
x_start, len, z_start, zlen);
|
||||||
|
|
||||||
|
set_result(z);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
//-------------inline_mulAdd------------------------------------------
|
||||||
|
bool LibraryCallKit::inline_mulAdd() {
|
||||||
|
assert(UseMulAddIntrinsic, "not implementated on this platform");
|
||||||
|
|
||||||
|
address stubAddr = StubRoutines::mulAdd();
|
||||||
|
if (stubAddr == NULL) {
|
||||||
|
return false; // Intrinsic's stub is not implemented on this platform
|
||||||
|
}
|
||||||
|
const char* stubName = "mulAdd";
|
||||||
|
|
||||||
|
assert(callee()->signature()->size() == 5, "mulAdd has 5 parameters");
|
||||||
|
|
||||||
|
Node* out = argument(0);
|
||||||
|
Node* in = argument(1);
|
||||||
|
Node* offset = argument(2);
|
||||||
|
Node* len = argument(3);
|
||||||
|
Node* k = argument(4);
|
||||||
|
|
||||||
|
const Type* out_type = out->Value(&_gvn);
|
||||||
|
const Type* in_type = in->Value(&_gvn);
|
||||||
|
const TypeAryPtr* top_out = out_type->isa_aryptr();
|
||||||
|
const TypeAryPtr* top_in = in_type->isa_aryptr();
|
||||||
|
if (top_out == NULL || top_out->klass() == NULL ||
|
||||||
|
top_in == NULL || top_in->klass() == NULL) {
|
||||||
|
// failed array check
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
BasicType out_elem = out_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||||
|
BasicType in_elem = in_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
|
||||||
|
if (out_elem != T_INT || in_elem != T_INT) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node* outlen = load_array_length(out);
|
||||||
|
Node* new_offset = _gvn.transform(new SubINode(outlen, offset));
|
||||||
|
Node* out_start = array_element_address(out, intcon(0), out_elem);
|
||||||
|
Node* in_start = array_element_address(in, intcon(0), in_elem);
|
||||||
|
|
||||||
|
Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||||
|
OptoRuntime::mulAdd_Type(),
|
||||||
|
stubAddr, stubName, TypePtr::BOTTOM,
|
||||||
|
out_start,in_start, new_offset, len, k);
|
||||||
|
Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms));
|
||||||
|
set_result(result);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate CRC32 for byte.
|
* Calculate CRC32 for byte.
|
||||||
|
@ -475,7 +475,7 @@ void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
|
|||||||
|
|
||||||
C->set_major_progress();
|
C->set_major_progress();
|
||||||
// Peeling a 'main' loop in a pre/main/post situation obfuscates the
|
// Peeling a 'main' loop in a pre/main/post situation obfuscates the
|
||||||
// 'pre' loop from the main and the 'pre' can no longer have it's
|
// 'pre' loop from the main and the 'pre' can no longer have its
|
||||||
// iterations adjusted. Therefore, we need to declare this loop as
|
// iterations adjusted. Therefore, we need to declare this loop as
|
||||||
// no longer a 'main' loop; it will need new pre and post loops before
|
// no longer a 'main' loop; it will need new pre and post loops before
|
||||||
// we can do further RCE.
|
// we can do further RCE.
|
||||||
@ -1911,10 +1911,13 @@ void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
|
|||||||
return;
|
return;
|
||||||
assert(opqzm->in(1) == main_limit, "do not understand situation");
|
assert(opqzm->in(1) == main_limit, "do not understand situation");
|
||||||
|
|
||||||
// Find the pre-loop limit; we will expand it's iterations to
|
// Find the pre-loop limit; we will expand its iterations to
|
||||||
// not ever trip low tests.
|
// not ever trip low tests.
|
||||||
Node *p_f = iffm->in(0);
|
Node *p_f = iffm->in(0);
|
||||||
assert(p_f->Opcode() == Op_IfFalse, "");
|
// pre loop may have been optimized out
|
||||||
|
if (p_f->Opcode() != Op_IfFalse) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
|
CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
|
||||||
assert(pre_end->loopnode()->is_pre_loop(), "");
|
assert(pre_end->loopnode()->is_pre_loop(), "");
|
||||||
Node *pre_opaq1 = pre_end->limit();
|
Node *pre_opaq1 = pre_end->limit();
|
||||||
@ -2215,6 +2218,56 @@ void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ASSERT
|
||||||
|
static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
|
||||||
|
Node *ctrl = cl->in(LoopNode::EntryControl);
|
||||||
|
assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
|
||||||
|
Node *iffm = ctrl->in(0);
|
||||||
|
assert(iffm->Opcode() == Op_If, "");
|
||||||
|
Node *p_f = iffm->in(0);
|
||||||
|
assert(p_f->Opcode() == Op_IfFalse, "");
|
||||||
|
CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
|
||||||
|
assert(pre_end->loopnode()->is_pre_loop(), "");
|
||||||
|
return pre_end->loopnode();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Remove the main and post loops and make the pre loop execute all
|
||||||
|
// iterations. Useful when the pre loop is found empty.
|
||||||
|
void IdealLoopTree::remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase) {
|
||||||
|
CountedLoopEndNode* pre_end = cl->loopexit();
|
||||||
|
Node* pre_cmp = pre_end->cmp_node();
|
||||||
|
if (pre_cmp->in(2)->Opcode() != Op_Opaque1) {
|
||||||
|
// Only safe to remove the main loop if the compiler optimized it
|
||||||
|
// out based on an unknown number of iterations
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Can we find the main loop?
|
||||||
|
if (_next == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node* next_head = _next->_head;
|
||||||
|
if (!next_head->is_CountedLoop()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
CountedLoopNode* main_head = next_head->as_CountedLoop();
|
||||||
|
if (!main_head->is_main_loop()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(locate_pre_from_main(main_head) == cl, "bad main loop");
|
||||||
|
Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
|
||||||
|
|
||||||
|
// Remove the Opaque1Node of the pre loop and make it execute all iterations
|
||||||
|
phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
|
||||||
|
// Remove the Opaque1Node of the main loop so it can be optimized out
|
||||||
|
Node* main_cmp = main_iff->in(1)->in(1);
|
||||||
|
assert(main_cmp->in(2)->Opcode() == Op_Opaque1, "main loop has no opaque node?");
|
||||||
|
phase->_igvn.replace_input_of(main_cmp, 2, main_cmp->in(2)->in(1));
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------policy_do_remove_empty_loop--------------------
|
//------------------------------policy_do_remove_empty_loop--------------------
|
||||||
// Micro-benchmark spamming. Policy is to always remove empty loops.
|
// Micro-benchmark spamming. Policy is to always remove empty loops.
|
||||||
@ -2233,6 +2286,12 @@ bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
|
|||||||
if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
|
if (!phase->is_member(this, phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue))))
|
||||||
return false; // Infinite loop
|
return false; // Infinite loop
|
||||||
|
|
||||||
|
if (cl->is_pre_loop()) {
|
||||||
|
// If the loop we are removing is a pre-loop then the main and
|
||||||
|
// post loop can be removed as well
|
||||||
|
remove_main_post_loops(cl, phase);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef ASSERT
|
#ifdef ASSERT
|
||||||
// Ensure only one phi which is the iv.
|
// Ensure only one phi which is the iv.
|
||||||
Node* iv = NULL;
|
Node* iv = NULL;
|
||||||
|
@ -485,6 +485,8 @@ public:
|
|||||||
bool is_inner() { return is_loop() && _child == NULL; }
|
bool is_inner() { return is_loop() && _child == NULL; }
|
||||||
bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
|
bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
|
||||||
|
|
||||||
|
void remove_main_post_loops(CountedLoopNode *cl, PhaseIdealLoop *phase);
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
void dump_head( ) const; // Dump loop head only
|
void dump_head( ) const; // Dump loop head only
|
||||||
void dump() const; // Dump this loop recursively
|
void dump() const; // Dump this loop recursively
|
||||||
|
@ -945,6 +945,48 @@ const TypeFunc* OptoRuntime::multiplyToLen_Type() {
|
|||||||
return TypeFunc::make(domain, range);
|
return TypeFunc::make(domain, range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const TypeFunc* OptoRuntime::squareToLen_Type() {
|
||||||
|
// create input type (domain)
|
||||||
|
int num_args = 4;
|
||||||
|
int argcnt = num_args;
|
||||||
|
const Type** fields = TypeTuple::fields(argcnt);
|
||||||
|
int argp = TypeFunc::Parms;
|
||||||
|
fields[argp++] = TypePtr::NOTNULL; // x
|
||||||
|
fields[argp++] = TypeInt::INT; // len
|
||||||
|
fields[argp++] = TypePtr::NOTNULL; // z
|
||||||
|
fields[argp++] = TypeInt::INT; // zlen
|
||||||
|
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
||||||
|
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||||
|
|
||||||
|
// no result type needed
|
||||||
|
fields = TypeTuple::fields(1);
|
||||||
|
fields[TypeFunc::Parms+0] = NULL;
|
||||||
|
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
|
||||||
|
return TypeFunc::make(domain, range);
|
||||||
|
}
|
||||||
|
|
||||||
|
// for mulAdd calls, 2 pointers and 3 ints, returning int
|
||||||
|
const TypeFunc* OptoRuntime::mulAdd_Type() {
|
||||||
|
// create input type (domain)
|
||||||
|
int num_args = 5;
|
||||||
|
int argcnt = num_args;
|
||||||
|
const Type** fields = TypeTuple::fields(argcnt);
|
||||||
|
int argp = TypeFunc::Parms;
|
||||||
|
fields[argp++] = TypePtr::NOTNULL; // out
|
||||||
|
fields[argp++] = TypePtr::NOTNULL; // in
|
||||||
|
fields[argp++] = TypeInt::INT; // offset
|
||||||
|
fields[argp++] = TypeInt::INT; // len
|
||||||
|
fields[argp++] = TypeInt::INT; // k
|
||||||
|
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
||||||
|
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||||
|
|
||||||
|
// returning carry (int)
|
||||||
|
fields = TypeTuple::fields(1);
|
||||||
|
fields[TypeFunc::Parms+0] = TypeInt::INT;
|
||||||
|
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields);
|
||||||
|
return TypeFunc::make(domain, range);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//------------- Interpreter state access for on stack replacement
|
//------------- Interpreter state access for on stack replacement
|
||||||
|
@ -312,6 +312,10 @@ private:
|
|||||||
|
|
||||||
static const TypeFunc* multiplyToLen_Type();
|
static const TypeFunc* multiplyToLen_Type();
|
||||||
|
|
||||||
|
static const TypeFunc* squareToLen_Type();
|
||||||
|
|
||||||
|
static const TypeFunc* mulAdd_Type();
|
||||||
|
|
||||||
static const TypeFunc* updateBytesCRC32_Type();
|
static const TypeFunc* updateBytesCRC32_Type();
|
||||||
|
|
||||||
// leaf on stack replacement interpreter accessor types
|
// leaf on stack replacement interpreter accessor types
|
||||||
|
@ -137,6 +137,8 @@ address StubRoutines::_updateBytesCRC32 = NULL;
|
|||||||
address StubRoutines::_crc_table_adr = NULL;
|
address StubRoutines::_crc_table_adr = NULL;
|
||||||
|
|
||||||
address StubRoutines::_multiplyToLen = NULL;
|
address StubRoutines::_multiplyToLen = NULL;
|
||||||
|
address StubRoutines::_squareToLen = NULL;
|
||||||
|
address StubRoutines::_mulAdd = NULL;
|
||||||
|
|
||||||
double (* StubRoutines::_intrinsic_log )(double) = NULL;
|
double (* StubRoutines::_intrinsic_log )(double) = NULL;
|
||||||
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
|
double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
|
||||||
|
@ -197,6 +197,8 @@ class StubRoutines: AllStatic {
|
|||||||
static address _crc_table_adr;
|
static address _crc_table_adr;
|
||||||
|
|
||||||
static address _multiplyToLen;
|
static address _multiplyToLen;
|
||||||
|
static address _squareToLen;
|
||||||
|
static address _mulAdd;
|
||||||
|
|
||||||
// These are versions of the java.lang.Math methods which perform
|
// These are versions of the java.lang.Math methods which perform
|
||||||
// the same operations as the intrinsic version. They are used for
|
// the same operations as the intrinsic version. They are used for
|
||||||
@ -356,6 +358,8 @@ class StubRoutines: AllStatic {
|
|||||||
static address crc_table_addr() { return _crc_table_adr; }
|
static address crc_table_addr() { return _crc_table_adr; }
|
||||||
|
|
||||||
static address multiplyToLen() {return _multiplyToLen; }
|
static address multiplyToLen() {return _multiplyToLen; }
|
||||||
|
static address squareToLen() {return _squareToLen; }
|
||||||
|
static address mulAdd() {return _mulAdd; }
|
||||||
|
|
||||||
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
static address select_fill_function(BasicType t, bool aligned, const char* &name);
|
||||||
|
|
||||||
|
@ -831,6 +831,8 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
|||||||
static_field(StubRoutines, _updateBytesCRC32, address) \
|
static_field(StubRoutines, _updateBytesCRC32, address) \
|
||||||
static_field(StubRoutines, _crc_table_adr, address) \
|
static_field(StubRoutines, _crc_table_adr, address) \
|
||||||
static_field(StubRoutines, _multiplyToLen, address) \
|
static_field(StubRoutines, _multiplyToLen, address) \
|
||||||
|
static_field(StubRoutines, _squareToLen, address) \
|
||||||
|
static_field(StubRoutines, _mulAdd, address) \
|
||||||
\
|
\
|
||||||
/*****************/ \
|
/*****************/ \
|
||||||
/* SharedRuntime */ \
|
/* SharedRuntime */ \
|
||||||
|
117
hotspot/test/compiler/intrinsics/muladd/TestMulAdd.java
Normal file
117
hotspot/test/compiler/intrinsics/muladd/TestMulAdd.java
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8081778
|
||||||
|
* @summary Add C2 x86 intrinsic for BigInteger::mulAdd() method
|
||||||
|
*
|
||||||
|
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
|
||||||
|
* -XX:+IgnoreUnrecognizedVMOptions -XX:-UseSquareToLenIntrinsic -XX:-UseMultiplyToLenIntrinsic
|
||||||
|
* -XX:CompileCommand=dontinline,TestMulAdd::main
|
||||||
|
* -XX:CompileCommand=option,TestMulAdd::base_multiply,ccstr,DisableIntrinsic,_mulAdd
|
||||||
|
* -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_mulAdd
|
||||||
|
* -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_mulAdd
|
||||||
|
* -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_mulAdd
|
||||||
|
* -XX:CompileCommand=option,java.math.BigInteger::mulAdd,ccstr,DisableIntrinsic,_mulAdd
|
||||||
|
* -XX:CompileCommand=inline,java.math.BigInteger::multiply
|
||||||
|
* -XX:CompileCommand=inline,java.math.BigInteger::square
|
||||||
|
* -XX:CompileCommand=inline,java.math.BigInteger::squareToLen
|
||||||
|
* -XX:CompileCommand=inline,java.math.BigInteger::mulAdd TestMulAdd
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.math.*;
|
||||||
|
|
||||||
|
public class TestMulAdd {
|
||||||
|
|
||||||
|
// Avoid intrinsic by preventing inlining multiply() and mulAdd().
|
||||||
|
public static BigInteger base_multiply(BigInteger op1) {
|
||||||
|
return op1.multiply(op1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate mulAdd() intrinsic by inlining multiply().
|
||||||
|
public static BigInteger new_multiply(BigInteger op1) {
|
||||||
|
return op1.multiply(op1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean bytecompare(BigInteger b1, BigInteger b2) {
|
||||||
|
byte[] data1 = b1.toByteArray();
|
||||||
|
byte[] data2 = b2.toByteArray();
|
||||||
|
if (data1.length != data2.length)
|
||||||
|
return false;
|
||||||
|
for (int i = 0; i < data1.length; i++) {
|
||||||
|
if (data1[i] != data2[i])
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String stringify(BigInteger b) {
|
||||||
|
String strout= "";
|
||||||
|
byte [] data = b.toByteArray();
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
strout += (String.format("%02x",data[i]) + " ");
|
||||||
|
}
|
||||||
|
return strout;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String args[]) throws Exception {
|
||||||
|
|
||||||
|
BigInteger oldsum = new BigInteger("0");
|
||||||
|
BigInteger newsum = new BigInteger("0");
|
||||||
|
|
||||||
|
BigInteger b1, b2, oldres, newres;
|
||||||
|
|
||||||
|
Random rand = new Random();
|
||||||
|
long seed = System.nanoTime();
|
||||||
|
Random rand1 = new Random();
|
||||||
|
long seed1 = System.nanoTime();
|
||||||
|
rand.setSeed(seed);
|
||||||
|
rand1.setSeed(seed1);
|
||||||
|
|
||||||
|
for (int j = 0; j < 100000; j++) {
|
||||||
|
int rand_int = rand1.nextInt(3136)+32;
|
||||||
|
b1 = new BigInteger(rand_int, rand);
|
||||||
|
|
||||||
|
oldres = base_multiply(b1);
|
||||||
|
newres = new_multiply(b1);
|
||||||
|
|
||||||
|
oldsum = oldsum.add(oldres);
|
||||||
|
newsum = newsum.add(newres);
|
||||||
|
|
||||||
|
if (!bytecompare(oldres,newres)) {
|
||||||
|
System.out.print("mismatch for:b1:" + stringify(b1) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres));
|
||||||
|
System.out.println(b1);
|
||||||
|
throw new Exception("Failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!bytecompare(oldsum,newsum)) {
|
||||||
|
System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum));
|
||||||
|
throw new Exception("Failed");
|
||||||
|
} else {
|
||||||
|
System.out.println("Success");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,114 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @test
|
||||||
|
* @bug 8081778
|
||||||
|
* @summary Add C2 x86 intrinsic for BigInteger::squareToLen() method
|
||||||
|
*
|
||||||
|
* @run main/othervm/timeout=600 -XX:-TieredCompilation -Xbatch
|
||||||
|
* -XX:CompileCommand=exclude,TestSquareToLen::main
|
||||||
|
* -XX:CompileCommand=option,TestSquareToLen::base_multiply,ccstr,DisableIntrinsic,_squareToLen
|
||||||
|
* -XX:CompileCommand=option,java.math.BigInteger::multiply,ccstr,DisableIntrinsic,_squareToLen
|
||||||
|
* -XX:CompileCommand=option,java.math.BigInteger::square,ccstr,DisableIntrinsic,_squareToLen
|
||||||
|
* -XX:CompileCommand=option,java.math.BigInteger::squareToLen,ccstr,DisableIntrinsic,_squareToLen
|
||||||
|
* -XX:CompileCommand=inline,java.math.BigInteger::multiply
|
||||||
|
* -XX:CompileCommand=inline,java.math.BigInteger::square
|
||||||
|
* -XX:CompileCommand=inline,java.math.BigInteger::squareToLen TestSquareToLen
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Random;
|
||||||
|
import java.math.*;
|
||||||
|
|
||||||
|
public class TestSquareToLen {
|
||||||
|
|
||||||
|
// Avoid intrinsic by preventing inlining multiply() and squareToLen().
|
||||||
|
public static BigInteger base_multiply(BigInteger op1) {
|
||||||
|
return op1.multiply(op1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Generate squareToLen() intrinsic by inlining multiply().
|
||||||
|
public static BigInteger new_multiply(BigInteger op1) {
|
||||||
|
return op1.multiply(op1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean bytecompare(BigInteger b1, BigInteger b2) {
|
||||||
|
byte[] data1 = b1.toByteArray();
|
||||||
|
byte[] data2 = b2.toByteArray();
|
||||||
|
if (data1.length != data2.length)
|
||||||
|
return false;
|
||||||
|
for (int i = 0; i < data1.length; i++) {
|
||||||
|
if (data1[i] != data2[i])
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String stringify(BigInteger b) {
|
||||||
|
String strout= "";
|
||||||
|
byte [] data = b.toByteArray();
|
||||||
|
for (int i = 0; i < data.length; i++) {
|
||||||
|
strout += (String.format("%02x",data[i]) + " ");
|
||||||
|
}
|
||||||
|
return strout;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void main(String args[]) throws Exception {
|
||||||
|
|
||||||
|
BigInteger oldsum = new BigInteger("0");
|
||||||
|
BigInteger newsum = new BigInteger("0");
|
||||||
|
|
||||||
|
BigInteger b1, b2, oldres, newres;
|
||||||
|
|
||||||
|
Random rand = new Random();
|
||||||
|
long seed = System.nanoTime();
|
||||||
|
Random rand1 = new Random();
|
||||||
|
long seed1 = System.nanoTime();
|
||||||
|
rand.setSeed(seed);
|
||||||
|
rand1.setSeed(seed1);
|
||||||
|
|
||||||
|
for (int j = 0; j < 100000; j++) {
|
||||||
|
int rand_int = rand1.nextInt(3136)+32;
|
||||||
|
b1 = new BigInteger(rand_int, rand);
|
||||||
|
|
||||||
|
oldres = base_multiply(b1);
|
||||||
|
newres = new_multiply(b1);
|
||||||
|
|
||||||
|
oldsum = oldsum.add(oldres);
|
||||||
|
newsum = newsum.add(newres);
|
||||||
|
|
||||||
|
if (!bytecompare(oldres,newres)) {
|
||||||
|
System.out.print("mismatch for:b1:" + stringify(b1) + " :oldres:" + stringify(oldres) + " :newres:" + stringify(newres));
|
||||||
|
System.out.println(b1);
|
||||||
|
throw new Exception("Failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!bytecompare(oldsum,newsum)) {
|
||||||
|
System.out.println("Failure: oldsum:" + stringify(oldsum) + " newsum:" + stringify(newsum));
|
||||||
|
throw new Exception("Failed");
|
||||||
|
} else {
|
||||||
|
System.out.println("Success");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
94
hotspot/test/compiler/rangechecks/TestBadFoldCompare.java
Normal file
94
hotspot/test/compiler/rangechecks/TestBadFoldCompare.java
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8085832
|
||||||
|
* @summary x <= 0 || x > 0 wrongly folded as (x-1) >u -1
|
||||||
|
* @run main/othervm -XX:-BackgroundCompilation -XX:-UseOnStackReplacement TestBadFoldCompare
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TestBadFoldCompare {
|
||||||
|
|
||||||
|
static boolean test1_taken;
|
||||||
|
|
||||||
|
static void helper1(int i, int a, int b, boolean flag) {
|
||||||
|
if (flag) {
|
||||||
|
if (i <= a || i > b) {
|
||||||
|
test1_taken = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test1(int i, boolean flag) {
|
||||||
|
helper1(i, 0, 0, flag);
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean test2_taken;
|
||||||
|
|
||||||
|
static void helper2(int i, int a, int b, boolean flag) {
|
||||||
|
if (flag) {
|
||||||
|
if (i > b || i <= a) {
|
||||||
|
test2_taken = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void test2(int i, boolean flag) {
|
||||||
|
helper2(i, 0, 0, flag);
|
||||||
|
}
|
||||||
|
|
||||||
|
static public void main(String[] args) {
|
||||||
|
boolean success = true;
|
||||||
|
|
||||||
|
for (int i = 0; i < 20000; i++) {
|
||||||
|
helper1(5, 0, 10, (i%2)==0);
|
||||||
|
helper1(-1, 0, 10, (i%2)==0);
|
||||||
|
helper1(15, 0, 10, (i%2)==0);
|
||||||
|
test1(0, false);
|
||||||
|
}
|
||||||
|
test1_taken = false;
|
||||||
|
test1(0, true);
|
||||||
|
if (!test1_taken) {
|
||||||
|
System.out.println("Test1 failed");
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < 20000; i++) {
|
||||||
|
helper2(5, 0, 10, (i%2)==0);
|
||||||
|
helper2(-1, 0, 10, (i%2)==0);
|
||||||
|
helper2(15, 0, 10, (i%2)==0);
|
||||||
|
test2(0, false);
|
||||||
|
}
|
||||||
|
test2_taken = false;
|
||||||
|
test2(0, true);
|
||||||
|
|
||||||
|
if (!test2_taken) {
|
||||||
|
System.out.println("Test2 failed");
|
||||||
|
success = false;
|
||||||
|
}
|
||||||
|
if (!success) {
|
||||||
|
throw new RuntimeException("Some tests failed");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user