8257815: Replace global log2 functions with efficient implementations
Reviewed-by: kbarrett, stefank
This commit is contained in:
parent
f80c63b380
commit
9d160aa1b7
@ -1772,7 +1772,7 @@ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr
|
||||
// move lreg_lo to dreg if divisor is 1
|
||||
__ mov(dreg, lreg_lo);
|
||||
} else {
|
||||
unsigned int shift = exact_log2_long(c);
|
||||
unsigned int shift = log2i_exact(c);
|
||||
// use rscratch1 as intermediate result register
|
||||
__ asr(rscratch1, lreg_lo, 63);
|
||||
__ add(rscratch1, lreg_lo, rscratch1, Assembler::LSR, 64 - shift);
|
||||
|
@ -200,7 +200,7 @@ size_t ZPlatformAddressOffsetBits() {
|
||||
const size_t max_address_offset_bits = valid_max_address_offset_bits - 3;
|
||||
const size_t min_address_offset_bits = max_address_offset_bits - 2;
|
||||
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
|
||||
const size_t address_offset_bits = log2_intptr(address_offset);
|
||||
const size_t address_offset_bits = log2i_exact(address_offset);
|
||||
return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
|
||||
}
|
||||
|
||||
|
@ -302,7 +302,7 @@ void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acqui
|
||||
cmp(in_nmethod ? sp : rfp, rscratch1);
|
||||
br(Assembler::HI, slow_path);
|
||||
} else {
|
||||
tbnz(rscratch1, exact_log2(SafepointMechanism::poll_bit()), slow_path);
|
||||
tbnz(rscratch1, log2i_exact(SafepointMechanism::poll_bit()), slow_path);
|
||||
}
|
||||
}
|
||||
|
||||
@ -3996,7 +3996,7 @@ MacroAssembler::KlassDecodeMode MacroAssembler::klass_decode_mode() {
|
||||
if (operand_valid_for_logical_immediate(
|
||||
/*is32*/false, (uint64_t)CompressedKlassPointers::base())) {
|
||||
const uint64_t range_mask =
|
||||
(1ULL << log2_intptr(CompressedKlassPointers::range())) - 1;
|
||||
(1ULL << log2i(CompressedKlassPointers::range())) - 1;
|
||||
if (((uint64_t)CompressedKlassPointers::base() & range_mask) == 0) {
|
||||
return (_klass_decode_mode = KlassDecodeXor);
|
||||
}
|
||||
|
@ -848,7 +848,7 @@ void LIR_Assembler::emit_op3(LIR_Op3* op) {
|
||||
__ add_32(dest, left, AsmOperand(left, lsr, 31));
|
||||
__ asr_32(dest, dest, 1);
|
||||
} else if (c != (int) 0x80000000) {
|
||||
int power = log2_intptr(c);
|
||||
int power = log2i_exact(c);
|
||||
__ asr_32(Rtemp, left, 31);
|
||||
__ add_32(dest, left, AsmOperand(Rtemp, lsr, 32-power)); // dest = left + (left < 0 ? 2^power - 1 : 0);
|
||||
__ asr_32(dest, dest, power); // dest = dest >>> power;
|
||||
|
@ -329,12 +329,12 @@ void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr bas
|
||||
bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
|
||||
assert(left != result, "should be different registers");
|
||||
if (is_power_of_2(c + 1)) {
|
||||
LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c + 1);
|
||||
LIR_Address::Scale scale = (LIR_Address::Scale) log2i_exact(c + 1);
|
||||
LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT);
|
||||
__ sub(LIR_OprFact::address(addr), left, result); // rsb with shifted register
|
||||
return true;
|
||||
} else if (is_power_of_2(c - 1)) {
|
||||
LIR_Address::Scale scale = (LIR_Address::Scale) log2_intptr(c - 1);
|
||||
LIR_Address::Scale scale = (LIR_Address::Scale) log2i_exact(c - 1);
|
||||
LIR_Address* addr = new LIR_Address(left, left, scale, 0, T_INT);
|
||||
__ add(left, LIR_OprFact::address(addr), result); // add with shifted register
|
||||
return true;
|
||||
|
@ -81,13 +81,13 @@ int Assembler::branch_destination(int inst, int pos) {
|
||||
void Assembler::andi(Register a, Register s, const long ui16) {
|
||||
if (is_power_of_2(((jlong) ui16)+1)) {
|
||||
// pow2minus1
|
||||
clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
|
||||
clrldi(a, s, 64 - log2i_exact((((jlong) ui16)+1)));
|
||||
} else if (is_power_of_2((jlong) ui16)) {
|
||||
// pow2
|
||||
rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
|
||||
rlwinm(a, s, 0, 31 - log2i_exact((jlong) ui16), 31 - log2i_exact((jlong) ui16));
|
||||
} else if (is_power_of_2((jlong)-ui16)) {
|
||||
// negpow2
|
||||
clrrdi(a, s, log2_long((jlong)-ui16));
|
||||
clrrdi(a, s, log2i_exact((jlong)-ui16));
|
||||
} else {
|
||||
assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
|
||||
andi_(a, s, ui16);
|
||||
|
@ -341,7 +341,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
|
||||
|
||||
} else if (is_power_of_2(divisor)) {
|
||||
// Convert division by a power of two into some shifts and logical operations.
|
||||
int log2 = log2_intptr(divisor);
|
||||
int log2 = log2i_exact(divisor);
|
||||
|
||||
// Round towards 0.
|
||||
if (divisor == 2) {
|
||||
|
@ -293,11 +293,11 @@ void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr bas
|
||||
bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
|
||||
assert(left != result, "should be different registers");
|
||||
if (is_power_of_2(c + 1)) {
|
||||
__ shift_left(left, log2_int(c + 1), result);
|
||||
__ shift_left(left, log2i_exact(c + 1), result);
|
||||
__ sub(result, left, result);
|
||||
return true;
|
||||
} else if (is_power_of_2(c - 1)) {
|
||||
__ shift_left(left, log2_int(c - 1), result);
|
||||
__ shift_left(left, log2i_exact(c - 1), result);
|
||||
__ add(result, left, result);
|
||||
return true;
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ inline int MacroAssembler::get_ld_largeoffset_offset(address a) {
|
||||
inline void MacroAssembler::round_to(Register r, int modulus) {
|
||||
assert(is_power_of_2((jlong)modulus), "must be power of 2");
|
||||
addi(r, r, modulus-1);
|
||||
clrrdi(r, r, log2_long((jlong)modulus));
|
||||
clrrdi(r, r, log2i_exact((jlong)modulus));
|
||||
}
|
||||
|
||||
// Move register if destination register and target register are different.
|
||||
|
@ -4042,7 +4042,7 @@ operand immIhi16() %{
|
||||
%}
|
||||
|
||||
operand immInegpow2() %{
|
||||
predicate(is_power_of_2((jlong) (julong) (juint) (-(n->get_int()))));
|
||||
predicate(is_power_of_2(-(juint)(n->get_int())));
|
||||
match(ConI);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
@ -4050,7 +4050,7 @@ operand immInegpow2() %{
|
||||
%}
|
||||
|
||||
operand immIpow2minus1() %{
|
||||
predicate(is_power_of_2((((jlong) (n->get_int()))+1)));
|
||||
predicate(is_power_of_2((juint)(n->get_int()) + 1u));
|
||||
match(ConI);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
@ -4058,7 +4058,7 @@ operand immIpow2minus1() %{
|
||||
%}
|
||||
|
||||
operand immIpowerOf2() %{
|
||||
predicate(is_power_of_2((((jlong) (julong) (juint) (n->get_int())))));
|
||||
predicate(is_power_of_2((juint)(n->get_int())));
|
||||
match(ConI);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
@ -4292,7 +4292,7 @@ operand immLhighest16() %{
|
||||
%}
|
||||
|
||||
operand immLnegpow2() %{
|
||||
predicate(is_power_of_2((jlong)-(n->get_long())));
|
||||
predicate(is_power_of_2(-(julong)(n->get_long())));
|
||||
match(ConL);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
@ -4300,8 +4300,7 @@ operand immLnegpow2() %{
|
||||
%}
|
||||
|
||||
operand immLpow2minus1() %{
|
||||
predicate(is_power_of_2((((jlong) (n->get_long()))+1)) &&
|
||||
(n->get_long() != (jlong)0xffffffffffffffffL));
|
||||
predicate(is_power_of_2((julong)(n->get_long()) + 1ull));
|
||||
match(ConL);
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
@ -9076,9 +9075,8 @@ instruct lShiftI_andI_immInegpow2_imm5(iRegIdst dst, iRegIsrc src1, immInegpow2
|
||||
format %{ "RLWINM $dst, lShiftI(AndI($src1, $src2), $src3)" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
long src2 = $src2$$constant;
|
||||
long src3 = $src3$$constant;
|
||||
long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2);
|
||||
long maskbits = src3 + log2i_exact(-(juint)$src2$$constant);
|
||||
if (maskbits >= 32) {
|
||||
__ li($dst$$Register, 0); // addi
|
||||
} else {
|
||||
@ -9096,9 +9094,8 @@ instruct lShiftI_andI_immInegpow2_rShiftI_imm5(iRegIdst dst, iRegIsrc src1, immI
|
||||
format %{ "RLWINM $dst, lShiftI(AndI(RShiftI($src1, $src3), $src2), $src3)" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
long src2 = $src2$$constant;
|
||||
long src3 = $src3$$constant;
|
||||
long maskbits = src3 + log2_long((jlong) (julong) (juint) -src2);
|
||||
long maskbits = src3 + log2i_exact(-(juint)$src2$$constant);
|
||||
if (maskbits >= 32) {
|
||||
__ li($dst$$Register, 0); // addi
|
||||
} else {
|
||||
@ -9366,7 +9363,7 @@ instruct andI_urShiftI_regI_immI_immIpow2minus1(iRegIdst dst, iRegIsrc src1, imm
|
||||
size(4);
|
||||
ins_encode %{
|
||||
int rshift = ($src2$$constant) & 0x1f;
|
||||
int length = log2_long(((jlong) $src3$$constant) + 1);
|
||||
int length = log2i_exact((juint)$src3$$constant + 1u);
|
||||
if (rshift + length > 32) {
|
||||
// if necessary, adjust mask to omit rotated bits.
|
||||
length = 32 - rshift;
|
||||
@ -9384,7 +9381,7 @@ instruct andL_urShiftL_regL_immI_immLpow2minus1(iRegLdst dst, iRegLsrc src1, imm
|
||||
size(4);
|
||||
ins_encode %{
|
||||
int rshift = ($src2$$constant) & 0x3f;
|
||||
int length = log2_long(((jlong) $src3$$constant) + 1);
|
||||
int length = log2i_exact((julong)$src3$$constant + 1ull);
|
||||
if (rshift + length > 64) {
|
||||
// if necessary, adjust mask to omit rotated bits.
|
||||
length = 64 - rshift;
|
||||
@ -9788,7 +9785,7 @@ instruct andI_reg_immInegpow2(iRegIdst dst, iRegIsrc src1, immInegpow2 src2) %{
|
||||
format %{ "ANDWI $dst, $src1, $src2" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)(julong)(juint)-($src2$$constant)));
|
||||
__ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(juint)$src2$$constant));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -9798,7 +9795,7 @@ instruct andI_reg_immIpow2minus1(iRegIdst dst, iRegIsrc src1, immIpow2minus1 src
|
||||
format %{ "ANDWI $dst, $src1, $src2" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
|
||||
__ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((juint)$src2$$constant + 1u));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -9809,8 +9806,8 @@ instruct andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src1, immIpowerOf2 src2) %
|
||||
format %{ "ANDWI $dst, $src1, $src2" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ rlwinm($dst$$Register, $src1$$Register, 0,
|
||||
(31-log2_long((jlong) $src2$$constant)) & 0x1f, (31-log2_long((jlong) $src2$$constant)) & 0x1f);
|
||||
int bitpos = 31 - log2i_exact((juint)$src2$$constant);
|
||||
__ rlwinm($dst$$Register, $src1$$Register, 0, bitpos, bitpos);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -9848,7 +9845,7 @@ instruct andL_reg_immLnegpow2(iRegLdst dst, iRegLsrc src1, immLnegpow2 src2) %{
|
||||
format %{ "ANDDI $dst, $src1, $src2" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ clrrdi($dst$$Register, $src1$$Register, log2_long((jlong)-$src2$$constant));
|
||||
__ clrrdi($dst$$Register, $src1$$Register, log2i_exact(-(julong)$src2$$constant));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -9858,7 +9855,7 @@ instruct andL_reg_immLpow2minus1(iRegLdst dst, iRegLsrc src1, immLpow2minus1 src
|
||||
format %{ "ANDDI $dst, $src1, $src2" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
|
||||
__ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -9871,7 +9868,7 @@ instruct convL2I_andL_reg_immLpow2minus1(iRegIdst dst, iRegLsrc src1, immLpow2mi
|
||||
format %{ "ANDDI $dst, $src1, $src2 \t// long + l2i" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ clrldi($dst$$Register, $src1$$Register, 64-log2_long((((jlong) $src2$$constant)+1)));
|
||||
__ clrldi($dst$$Register, $src1$$Register, 64 - log2i_exact((julong)$src2$$constant + 1ull));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -10459,7 +10456,7 @@ instruct convI2Bool_andI_reg_immIpowerOf2(iRegIdst dst, iRegIsrc src, immIpowerO
|
||||
format %{ "RLWINM $dst, $src, $mask \t// convI2B(AndI($src, $mask))" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ rlwinm($dst$$Register, $src$$Register, (32-log2_long((jlong)$mask$$constant)) & 0x1f, 31, 31);
|
||||
__ rlwinm($dst$$Register, $src$$Register, 32 - log2i_exact((juint)($mask$$constant)), 31, 31);
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
@ -12154,7 +12151,7 @@ instruct align_addr(iRegPdst dst, iRegPsrc src, immLnegpow2 mask) %{
|
||||
format %{ "ANDDI $dst, $src, $mask \t// next aligned address" %}
|
||||
size(4);
|
||||
ins_encode %{
|
||||
__ clrrdi($dst$$Register, $src$$Register, log2_long((jlong)-$mask$$constant));
|
||||
__ clrrdi($dst$$Register, $src$$Register, log2i_exact(-(julong)$mask$$constant));
|
||||
%}
|
||||
ins_pipe(pipe_class_default);
|
||||
%}
|
||||
|
@ -1897,7 +1897,7 @@ void TemplateTable::tableswitch() {
|
||||
|
||||
// Align bcp.
|
||||
__ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
|
||||
__ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
|
||||
__ clrrdi(Rdef_offset_addr, Rdef_offset_addr, LogBytesPerInt);
|
||||
|
||||
// Load lo & hi.
|
||||
__ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
|
||||
@ -1956,7 +1956,7 @@ void TemplateTable::fast_linearswitch() {
|
||||
|
||||
// Align bcp.
|
||||
__ addi(Rdef_offset_addr, R14_bcp, BytesPerInt);
|
||||
__ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
|
||||
__ clrrdi(Rdef_offset_addr, Rdef_offset_addr, LogBytesPerInt);
|
||||
|
||||
// Setup loop counter and limit.
|
||||
__ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
|
||||
@ -2044,7 +2044,7 @@ void TemplateTable::fast_binaryswitch() {
|
||||
|
||||
// Find Array start,
|
||||
__ addi(Rarray, R14_bcp, 3 * BytesPerInt);
|
||||
__ clrrdi(Rarray, Rarray, log2_long((jlong)BytesPerInt));
|
||||
__ clrrdi(Rarray, Rarray, LogBytesPerInt);
|
||||
|
||||
// initialize i & j
|
||||
__ li(Ri,0);
|
||||
|
@ -1802,7 +1802,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
|
||||
Register treg1 = Z_R0_scratch;
|
||||
Register treg2 = Z_R1_scratch;
|
||||
jlong divisor = right->as_jlong();
|
||||
jlong log_divisor = log2_long(right->as_jlong());
|
||||
jlong log_divisor = log2i_exact(right->as_jlong());
|
||||
|
||||
if (divisor == min_jlong) {
|
||||
// Min_jlong is special. Result is '0' except for min_jlong/min_jlong = 1.
|
||||
@ -1890,7 +1890,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
|
||||
Register treg1 = Z_R0_scratch;
|
||||
Register treg2 = Z_R1_scratch;
|
||||
jlong divisor = right->as_jint();
|
||||
jlong log_divisor = log2_long(right->as_jint());
|
||||
jlong log_divisor = log2i_exact(right->as_jint());
|
||||
__ move_reg_if_needed(dreg, T_LONG, lreg, T_INT); // sign extend
|
||||
if (divisor == 2) {
|
||||
__ z_srlg(treg2, dreg, 63); // dividend < 0 ? 1 : 0
|
||||
|
@ -228,12 +228,12 @@ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result
|
||||
if (tmp->is_valid()) {
|
||||
if (is_power_of_2(c + 1)) {
|
||||
__ move(left, tmp);
|
||||
__ shift_left(left, log2_int(c + 1), left);
|
||||
__ shift_left(left, log2i_exact(c + 1), left);
|
||||
__ sub(left, tmp, result);
|
||||
return true;
|
||||
} else if (is_power_of_2(c - 1)) {
|
||||
__ move(left, tmp);
|
||||
__ shift_left(left, log2_int(c - 1), left);
|
||||
__ shift_left(left, log2i_exact(c - 1), left);
|
||||
__ add(left, tmp, result);
|
||||
return true;
|
||||
}
|
||||
|
@ -2630,7 +2630,7 @@ void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right,
|
||||
__ andl(rdx, divisor - 1);
|
||||
__ addl(lreg, rdx);
|
||||
}
|
||||
__ sarl(lreg, log2_jint(divisor));
|
||||
__ sarl(lreg, log2i_exact(divisor));
|
||||
move_regs(lreg, dreg);
|
||||
} else if (code == lir_irem) {
|
||||
Label done;
|
||||
|
@ -249,12 +249,12 @@ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result
|
||||
if (tmp->is_valid() && c > 0 && c < max_jint) {
|
||||
if (is_power_of_2(c + 1)) {
|
||||
__ move(left, tmp);
|
||||
__ shift_left(left, log2_jint(c + 1), left);
|
||||
__ shift_left(left, log2i_exact(c + 1), left);
|
||||
__ sub(left, tmp, result);
|
||||
return true;
|
||||
} else if (is_power_of_2(c - 1)) {
|
||||
__ move(left, tmp);
|
||||
__ shift_left(left, log2_jint(c - 1), left);
|
||||
__ shift_left(left, log2i_exact(c - 1), left);
|
||||
__ add(left, tmp, result);
|
||||
return true;
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ size_t ZPlatformAddressOffsetBits() {
|
||||
const size_t min_address_offset_bits = 42; // 4TB
|
||||
const size_t max_address_offset_bits = 44; // 16TB
|
||||
const size_t address_offset = round_up_power_of_2(MaxHeapSize * ZVirtualToPhysicalRatio);
|
||||
const size_t address_offset_bits = log2_intptr(address_offset);
|
||||
const size_t address_offset_bits = log2i_exact(address_offset);
|
||||
return clamp(address_offset_bits, min_address_offset_bits, max_address_offset_bits);
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,7 @@ void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register ca
|
||||
// CallTypeData/VirtualCallTypeData to reach its end. Non null
|
||||
// if there's a return to profile.
|
||||
assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
|
||||
shll(tmp, exact_log2(DataLayout::cell_size));
|
||||
shll(tmp, log2i_exact((int)DataLayout::cell_size));
|
||||
addptr(mdp, tmp);
|
||||
}
|
||||
movptr(Address(rbp, frame::interpreter_frame_mdp_offset * wordSize), mdp);
|
||||
|
@ -782,7 +782,7 @@ void VM_Version::get_processor_features() {
|
||||
cores_per_cpu(), threads_per_core(),
|
||||
cpu_family(), _model, _stepping, os::cpu_microcode_revision());
|
||||
assert(res > 0, "not enough temporary space allocated");
|
||||
assert(exact_log2_long(CPU_MAX_FEATURE) + 1 == sizeof(_features_names) / sizeof(char*), "wrong size features_names");
|
||||
assert(log2i_exact((uint64_t)CPU_MAX_FEATURE) + 1 == sizeof(_features_names) / sizeof(char*), "wrong size features_names");
|
||||
insert_features_names(buf + res, sizeof(buf) - res, _features_names);
|
||||
|
||||
_features_string = os::strdup(buf);
|
||||
|
@ -9592,7 +9592,7 @@ instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
|
||||
%{
|
||||
// con should be a pure 64-bit immediate given that not(con) is a power of 2
|
||||
// because AND/OR works well enough for 8/32-bit values.
|
||||
predicate(log2_long(~n->in(3)->in(2)->get_long()) > 30);
|
||||
predicate(log2i_graceful(~n->in(3)->in(2)->get_long()) > 30);
|
||||
|
||||
match(Set dst (StoreL dst (AndL (LoadL dst) con)));
|
||||
effect(KILL cr);
|
||||
@ -9600,7 +9600,7 @@ instruct btrL_mem_imm(memory dst, immL_NotPow2 con, rFlagsReg cr)
|
||||
ins_cost(125);
|
||||
format %{ "btrq $dst, log2(not($con))\t# long" %}
|
||||
ins_encode %{
|
||||
__ btrq($dst$$Address, log2_long(~$con$$constant));
|
||||
__ btrq($dst$$Address, log2i_exact(~$con$$constant));
|
||||
%}
|
||||
ins_pipe(ialu_mem_imm);
|
||||
%}
|
||||
@ -9802,7 +9802,7 @@ instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
|
||||
%{
|
||||
// con should be a pure 64-bit power of 2 immediate
|
||||
// because AND/OR works well enough for 8/32-bit values.
|
||||
predicate(log2_long(n->in(3)->in(2)->get_long()) > 31);
|
||||
predicate(log2i_graceful(n->in(3)->in(2)->get_long()) > 31);
|
||||
|
||||
match(Set dst (StoreL dst (OrL (LoadL dst) con)));
|
||||
effect(KILL cr);
|
||||
@ -9810,7 +9810,7 @@ instruct btsL_mem_imm(memory dst, immL_Pow2 con, rFlagsReg cr)
|
||||
ins_cost(125);
|
||||
format %{ "btsq $dst, log2($con)\t# long" %}
|
||||
ins_encode %{
|
||||
__ btsq($dst$$Address, log2_long((julong)$con$$constant));
|
||||
__ btsq($dst$$Address, log2i_exact((julong)$con$$constant));
|
||||
%}
|
||||
ins_pipe(ialu_mem_imm);
|
||||
%}
|
||||
|
@ -251,7 +251,7 @@ void SimpleCompPolicy::initialize() {
|
||||
// Example: if CICompilerCountPerCPU is true, then we get
|
||||
// max(log2(8)-1,1) = 2 compiler threads on an 8-way machine.
|
||||
// May help big-app startup time.
|
||||
_compiler_count = MAX2(log2_int(os::active_processor_count())-1,1);
|
||||
_compiler_count = MAX2(log2i_graceful(os::active_processor_count()) - 1, 1);
|
||||
// Make sure there is enough space in the code cache to hold all the compiler buffers
|
||||
size_t buffer_size = 1;
|
||||
#ifdef COMPILER1
|
||||
|
@ -117,19 +117,14 @@ intx CompilerConfig::scaled_freq_log(intx freq_log, double scale) {
|
||||
// of length InvocationCounter::number_of_count_bits. Mask values are always
|
||||
// one bit shorter then the value of the notification frequency. Set
|
||||
// max_freq_bits accordingly.
|
||||
intx max_freq_bits = InvocationCounter::number_of_count_bits + 1;
|
||||
int max_freq_bits = InvocationCounter::number_of_count_bits + 1;
|
||||
intx scaled_freq = scaled_compile_threshold((intx)1 << freq_log, scale);
|
||||
|
||||
if (scaled_freq == 0) {
|
||||
// Return 0 right away to avoid calculating log2 of 0.
|
||||
return 0;
|
||||
} else {
|
||||
intx res = log2_intptr(scaled_freq);
|
||||
if (res > max_freq_bits) {
|
||||
return max_freq_bits;
|
||||
} else {
|
||||
return res;
|
||||
}
|
||||
return MIN2(log2i(scaled_freq), max_freq_bits);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -253,8 +253,8 @@ void TieredThresholdPolicy::initialize() {
|
||||
}
|
||||
if (CICompilerCountPerCPU) {
|
||||
// Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
|
||||
int log_cpu = log2_int(os::active_processor_count());
|
||||
int loglog_cpu = log2_int(MAX2(log_cpu, 1));
|
||||
int log_cpu = log2i(os::active_processor_count());
|
||||
int loglog_cpu = log2i(MAX2(log_cpu, 1));
|
||||
count = MAX2(log_cpu * loglog_cpu * 3 / 2, 2);
|
||||
// Make sure there is enough space in the code cache to hold all the compiler buffers
|
||||
size_t c1_size = Compiler::code_buffer_size();
|
||||
|
@ -82,7 +82,7 @@ protected:
|
||||
size_t num_target_elems = pointer_delta(end, bottom, mapping_granularity_in_bytes);
|
||||
idx_t bias = (uintptr_t)bottom / mapping_granularity_in_bytes;
|
||||
address base = create_new_base_array(num_target_elems, target_elem_size_in_bytes);
|
||||
initialize_base(base, num_target_elems, bias, target_elem_size_in_bytes, log2_intptr(mapping_granularity_in_bytes));
|
||||
initialize_base(base, num_target_elems, bias, target_elem_size_in_bytes, log2i_exact(mapping_granularity_in_bytes));
|
||||
}
|
||||
|
||||
size_t bias() const { return _bias; }
|
||||
|
@ -29,6 +29,7 @@
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
G1FreeIdSet::G1FreeIdSet(uint start, uint size) :
|
||||
_sem(size), // counting semaphore for available ids
|
||||
@ -43,7 +44,7 @@ G1FreeIdSet::G1FreeIdSet(uint start, uint size) :
|
||||
"start (%u) + size (%u) overflow: ", start, size);
|
||||
// 2^shift must be greater than size. Equal is not permitted, because
|
||||
// size is the "end of list" value, and can be the index part of _head.
|
||||
uint shift = log2_intptr((uintptr_t)size) + 1;
|
||||
uint shift = log2i(size) + 1;
|
||||
assert(shift <= (BitsPerWord / 2), "excessive size %u", size);
|
||||
_head_index_mask = (uintx(1) << shift) - 1;
|
||||
assert(size <= _head_index_mask, "invariant");
|
||||
|
@ -57,6 +57,7 @@
|
||||
#include "runtime/os.hpp"
|
||||
#include "utilities/align.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
#include "utilities/stack.inline.hpp"
|
||||
#include "utilities/ticks.hpp"
|
||||
|
||||
@ -289,7 +290,7 @@ public:
|
||||
_collection_set_iter_state(NULL),
|
||||
_card_table_scan_state(NULL),
|
||||
_scan_chunks_per_region(get_chunks_per_region(HeapRegion::LogOfHRGrainBytes)),
|
||||
_log_scan_chunks_per_region(log2_uint(_scan_chunks_per_region)),
|
||||
_log_scan_chunks_per_region(log2i(_scan_chunks_per_region)),
|
||||
_region_scan_chunks(NULL),
|
||||
_num_total_scan_chunks(0),
|
||||
_scan_chunks_shift(0),
|
||||
@ -313,7 +314,7 @@ public:
|
||||
_num_total_scan_chunks = max_reserved_regions * _scan_chunks_per_region;
|
||||
_region_scan_chunks = NEW_C_HEAP_ARRAY(bool, _num_total_scan_chunks, mtGC);
|
||||
|
||||
_scan_chunks_shift = (uint8_t)log2_intptr(HeapRegion::CardsPerRegion / _scan_chunks_per_region);
|
||||
_scan_chunks_shift = (uint8_t)log2i(HeapRegion::CardsPerRegion / _scan_chunks_per_region);
|
||||
_scan_top = NEW_C_HEAP_ARRAY(HeapWord*, max_reserved_regions, mtGC);
|
||||
}
|
||||
|
||||
|
@ -78,7 +78,7 @@ void HeapRegion::setup_heap_region_size(size_t max_heap_size) {
|
||||
region_size = clamp(region_size, HeapRegionBounds::min_size(), HeapRegionBounds::max_size());
|
||||
|
||||
// Calculate the log for the region size.
|
||||
int region_size_log = exact_log2_long((jlong)region_size);
|
||||
int region_size_log = log2i_exact(region_size);
|
||||
|
||||
// Now, set up the globals.
|
||||
guarantee(LogOfHRGrainBytes == 0, "we should only set it once");
|
||||
@ -99,7 +99,7 @@ void HeapRegion::setup_heap_region_size(size_t max_heap_size) {
|
||||
guarantee(CardsPerRegion == 0, "we should only set it once");
|
||||
CardsPerRegion = GrainBytes >> G1CardTable::card_shift;
|
||||
|
||||
LogCardsPerRegion = log2_long((jlong) CardsPerRegion);
|
||||
LogCardsPerRegion = log2i(CardsPerRegion);
|
||||
|
||||
if (G1HeapRegionSize != GrainBytes) {
|
||||
FLAG_SET_ERGO(G1HeapRegionSize, GrainBytes);
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include "utilities/formatBuffer.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/growableArray.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
const char* HeapRegionRemSet::_state_strings[] = {"Untracked", "Updating", "Complete"};
|
||||
const char* HeapRegionRemSet::_short_state_strings[] = {"UNTRA", "UPDAT", "CMPLT"};
|
||||
@ -83,7 +84,7 @@ OtherRegionsTable::OtherRegionsTable(Mutex* m) :
|
||||
|
||||
if (_max_fine_entries == 0) {
|
||||
assert(_mod_max_fine_entries_mask == 0, "Both or none.");
|
||||
size_t max_entries_log = (size_t)log2_long((jlong)G1RSetRegionEntries);
|
||||
size_t max_entries_log = (size_t)log2i(G1RSetRegionEntries);
|
||||
_max_fine_entries = (size_t)1 << max_entries_log;
|
||||
_mod_max_fine_entries_mask = _max_fine_entries - 1;
|
||||
|
||||
|
@ -26,6 +26,7 @@
|
||||
#include "gc/shared/partialArrayTaskStepper.hpp"
|
||||
#include "oops/arrayOop.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
static uint compute_task_limit(uint n_workers) {
|
||||
// Don't need more than n_workers tasks at a time. But allowing up to
|
||||
@ -39,7 +40,7 @@ static uint compute_task_fanout(uint task_limit) {
|
||||
// number of enqueued tasks. A constant fanout may be too slow when
|
||||
// parallelism (and so task_limit) is large. A constant fraction might
|
||||
// be overly eager. Using log2 attempts to balance between those.
|
||||
uint result = log2_uint(task_limit);
|
||||
uint result = log2i(task_limit);
|
||||
// result must be > 0. result should be > 1 if task_limit > 1, to
|
||||
// provide some potentially parallel tasks. But don't just +1 to
|
||||
// avoid otherwise increasing rate of task generation.
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "oops/compressedOops.inline.hpp"
|
||||
#include "oops/oop.inline.hpp"
|
||||
#include "runtime/prefetch.inline.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
template <class T>
|
||||
void ShenandoahConcurrentMark::do_task(ShenandoahObjToScanQueue* q, T* cl, ShenandoahLiveData* live_data, ShenandoahMarkTask* task) {
|
||||
@ -120,7 +121,7 @@ inline void ShenandoahConcurrentMark::do_chunked_array_start(ShenandoahObjToScan
|
||||
// A few slices only, process directly
|
||||
array->oop_iterate_range(cl, 0, len);
|
||||
} else {
|
||||
int bits = log2_long((size_t) len);
|
||||
int bits = log2i_graceful(len);
|
||||
// Compensate for non-power-of-two arrays, cover the array in excess:
|
||||
if (len != (1 << bits)) bits++;
|
||||
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include "runtime/mutexLocker.hpp"
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/safepoint.hpp"
|
||||
#include "utilities/powerOfTwo.hpp"
|
||||
|
||||
size_t ShenandoahHeapRegion::RegionCount = 0;
|
||||
size_t ShenandoahHeapRegion::RegionSizeBytes = 0;
|
||||
@ -548,7 +549,7 @@ void ShenandoahHeapRegion::setup_sizes(size_t max_heap_size) {
|
||||
region_size = MAX2(region_size, os::large_page_size());
|
||||
}
|
||||
|
||||
int region_size_log = log2_long((jlong) region_size);
|
||||
int region_size_log = log2i(region_size);
|
||||
// Recalculate the region size to make sure it's a power of
|
||||
// 2. This means that region_size is the largest power of 2 that's
|
||||
// <= what we've calculated so far.
|
||||
|
@ -139,7 +139,7 @@ void BinaryMagnitudeSeq::clear() {
|
||||
void BinaryMagnitudeSeq::add(size_t val) {
|
||||
Atomic::add(&_sum, val);
|
||||
|
||||
int mag = log2_intptr(val) + 1;
|
||||
int mag = log2i_graceful(val) + 1;
|
||||
|
||||
// Defensively saturate for product bits:
|
||||
if (mag < 0) {
|
||||
|
@ -45,7 +45,7 @@ void ZHeuristics::set_medium_page_size() {
|
||||
if (size > ZPageSizeSmall) {
|
||||
// Enable medium pages
|
||||
ZPageSizeMedium = size;
|
||||
ZPageSizeMediumShift = log2_intptr(ZPageSizeMedium);
|
||||
ZPageSizeMediumShift = log2i_exact(ZPageSizeMedium);
|
||||
ZObjectSizeLimitMedium = ZPageSizeMedium / 8;
|
||||
ZObjectAlignmentMediumShift = (int)ZPageSizeMediumShift - 13;
|
||||
ZObjectAlignmentMedium = 1 << ZObjectAlignmentMediumShift;
|
||||
|
@ -134,7 +134,7 @@ static Node *transform_int_divide( PhaseGVN *phase, Node *dividend, jint divisor
|
||||
}
|
||||
|
||||
// Add rounding to the shift to handle the sign bit
|
||||
int l = log2_jint(d-1)+1;
|
||||
int l = log2i_graceful(d - 1) + 1;
|
||||
if (needs_rounding) {
|
||||
// Divide-by-power-of-2 can be made into a shift, but you have to do
|
||||
// more math for the rounding. You need to add 0 for positive
|
||||
@ -380,7 +380,7 @@ static Node *transform_long_divide( PhaseGVN *phase, Node *dividend, jlong divis
|
||||
}
|
||||
|
||||
// Add rounding to the shift to handle the sign bit
|
||||
int l = log2_long(d-1)+1;
|
||||
int l = log2i_graceful(d - 1) + 1;
|
||||
if (needs_rounding) {
|
||||
// Divide-by-power-of-2 can be made into a shift, but you have to do
|
||||
// more math for the rounding. You need to add 0 for positive
|
||||
@ -929,8 +929,8 @@ Node *ModINode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
int log2_con = -1;
|
||||
|
||||
// If this is a power of two, they maybe we can mask it
|
||||
if( is_power_of_2(pos_con) ) {
|
||||
log2_con = log2_intptr((intptr_t)pos_con);
|
||||
if (is_power_of_2(pos_con)) {
|
||||
log2_con = log2i_exact(pos_con);
|
||||
|
||||
const Type *dt = phase->type(in(1));
|
||||
const TypeInt *dti = dt->isa_int();
|
||||
@ -1036,8 +1036,8 @@ Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
Node *hook = new Node(1);
|
||||
|
||||
// Expand mod
|
||||
if( con >= 0 && con < max_jlong && is_power_of_2(con+1) ) {
|
||||
uint k = exact_log2_long(con+1); // Extract k
|
||||
if(con >= 0 && con < max_jlong && is_power_of_2(con + 1)) {
|
||||
uint k = log2i_exact(con + 1); // Extract k
|
||||
|
||||
// Basic algorithm by David Detlefs. See fastmod_long.java for gory details.
|
||||
// Used to help a popular random number generator which does a long-mod
|
||||
@ -1096,8 +1096,8 @@ Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
int log2_con = -1;
|
||||
|
||||
// If this is a power of two, then maybe we can mask it
|
||||
if( is_power_of_2(pos_con) ) {
|
||||
log2_con = exact_log2_long(pos_con);
|
||||
if (is_power_of_2(pos_con)) {
|
||||
log2_con = log2i_exact(pos_con);
|
||||
|
||||
const Type *dt = phase->type(in(1));
|
||||
const TypeLong *dtl = dt->isa_long();
|
||||
|
@ -201,21 +201,19 @@ Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
Node *res = NULL;
|
||||
unsigned int bit1 = abs_con & (0-abs_con); // Extract low bit
|
||||
if (bit1 == abs_con) { // Found a power of 2?
|
||||
res = new LShiftINode(in(1), phase->intcon(log2_uint(bit1)));
|
||||
res = new LShiftINode(in(1), phase->intcon(log2i_exact(bit1)));
|
||||
} else {
|
||||
|
||||
// Check for constant with 2 bits set
|
||||
unsigned int bit2 = abs_con-bit1;
|
||||
bit2 = bit2 & (0-bit2); // Extract 2nd bit
|
||||
unsigned int bit2 = abs_con - bit1;
|
||||
bit2 = bit2 & (0 - bit2); // Extract 2nd bit
|
||||
if (bit2 + bit1 == abs_con) { // Found all bits in con?
|
||||
Node *n1 = phase->transform( new LShiftINode(in(1), phase->intcon(log2_uint(bit1))));
|
||||
Node *n2 = phase->transform( new LShiftINode(in(1), phase->intcon(log2_uint(bit2))));
|
||||
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit1))));
|
||||
Node *n2 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(bit2))));
|
||||
res = new AddINode(n2, n1);
|
||||
|
||||
} else if (is_power_of_2(abs_con+1)) {
|
||||
// Sleezy: power-of-2 -1. Next time be generic.
|
||||
} else if (is_power_of_2(abs_con + 1)) {
|
||||
// Sleezy: power-of-2 - 1. Next time be generic.
|
||||
unsigned int temp = abs_con + 1;
|
||||
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2_uint(temp))));
|
||||
Node *n1 = phase->transform(new LShiftINode(in(1), phase->intcon(log2i_exact(temp))));
|
||||
res = new SubINode(n1, in(1));
|
||||
} else {
|
||||
return MulNode::Ideal(phase, can_reshape);
|
||||
@ -297,21 +295,21 @@ Node *MulLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
|
||||
Node *res = NULL;
|
||||
julong bit1 = abs_con & (0-abs_con); // Extract low bit
|
||||
if (bit1 == abs_con) { // Found a power of 2?
|
||||
res = new LShiftLNode(in(1), phase->intcon(log2_long(bit1)));
|
||||
res = new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1)));
|
||||
} else {
|
||||
|
||||
// Check for constant with 2 bits set
|
||||
julong bit2 = abs_con-bit1;
|
||||
bit2 = bit2 & (0-bit2); // Extract 2nd bit
|
||||
if (bit2 + bit1 == abs_con) { // Found all bits in con?
|
||||
Node *n1 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2_long(bit1))));
|
||||
Node *n2 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2_long(bit2))));
|
||||
Node *n1 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit1))));
|
||||
Node *n2 = phase->transform(new LShiftLNode(in(1), phase->intcon(log2i_exact(bit2))));
|
||||
res = new AddLNode(n2, n1);
|
||||
|
||||
} else if (is_power_of_2(abs_con+1)) {
|
||||
// Sleezy: power-of-2 -1. Next time be generic.
|
||||
julong temp = abs_con + 1;
|
||||
Node *n1 = phase->transform( new LShiftLNode(in(1), phase->intcon(log2_long(temp))));
|
||||
Node *n1 = phase->transform( new LShiftLNode(in(1), phase->intcon(log2i_exact(temp))));
|
||||
res = new SubLNode(n1, in(1));
|
||||
} else {
|
||||
return MulNode::Ideal(phase, can_reshape);
|
||||
@ -447,9 +445,9 @@ Node* AndINode::Identity(PhaseGVN* phase) {
|
||||
if (t2 && t2->is_con()) {
|
||||
int con = t2->get_con();
|
||||
// Masking off high bits which are always zero is useless.
|
||||
const TypeInt* t1 = phase->type( in(1) )->isa_int();
|
||||
const TypeInt* t1 = phase->type(in(1))->isa_int();
|
||||
if (t1 != NULL && t1->_lo >= 0) {
|
||||
jint t1_support = right_n_bits(1 + log2_jint(t1->_hi));
|
||||
jint t1_support = right_n_bits(1 + log2i_graceful(t1->_hi));
|
||||
if ((t1_support & con) == t1_support)
|
||||
return in1;
|
||||
}
|
||||
@ -572,7 +570,7 @@ Node* AndLNode::Identity(PhaseGVN* phase) {
|
||||
// Masking off high bits which are always zero is useless.
|
||||
const TypeLong* t1 = phase->type( in(1) )->isa_long();
|
||||
if (t1 != NULL && t1->_lo >= 0) {
|
||||
int bit_count = log2_long(t1->_hi) + 1;
|
||||
int bit_count = log2i_graceful(t1->_hi) + 1;
|
||||
jlong t1_support = jlong(max_julong >> (BitsPerJavaLong - bit_count));
|
||||
if ((t1_support & con) == t1_support)
|
||||
return usr;
|
||||
|
@ -975,7 +975,7 @@ void Parse::jump_switch_ranges(Node* key_val, SwitchRange *lo, SwitchRange *hi,
|
||||
#ifndef PRODUCT
|
||||
if (switch_depth == 0) {
|
||||
_max_switch_depth = 0;
|
||||
_est_switch_depth = log2_intptr((hi-lo+1)-1)+1;
|
||||
_est_switch_depth = log2i_graceful((hi - lo + 1) - 1) + 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
@ -368,7 +368,7 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
|
||||
Node* bcast_step = gvn().transform(VectorNode::scalar2vector(step, num_elem, type_bt));
|
||||
res = gvn().transform(VectorNode::make(Op_MulI, res, bcast_step, num_elem, elem_bt));
|
||||
} else if (step_val->get_con() > 1) {
|
||||
Node* cnt = gvn().makecon(TypeInt::make(log2_int(step_val->get_con())));
|
||||
Node* cnt = gvn().makecon(TypeInt::make(log2i_exact(step_val->get_con())));
|
||||
res = gvn().transform(VectorNode::make(Op_LShiftVB, res, cnt, vt));
|
||||
}
|
||||
|
||||
|
@ -1042,59 +1042,6 @@ inline T clamp(T value, T min, T max) {
|
||||
return MIN2(MAX2(value, min), max);
|
||||
}
|
||||
|
||||
// Returns largest i such that 2^i <= x.
|
||||
// If x == 0, the function returns -1.
|
||||
inline int log2_intptr(uintptr_t x) {
|
||||
int i = -1;
|
||||
uintptr_t p = 1;
|
||||
while (p != 0 && p <= x) {
|
||||
// p = 2^(i+1) && p <= x (i.e., 2^(i+1) <= x)
|
||||
i++; p *= 2;
|
||||
}
|
||||
// p = 2^(i+1) && x < p (i.e., 2^i <= x < 2^(i+1))
|
||||
// If p = 0, overflow has occurred and i = 31 or i = 63 (depending on the machine word size).
|
||||
return i;
|
||||
}
|
||||
|
||||
//* largest i such that 2^i <= x
|
||||
inline int log2_long(julong x) {
|
||||
int i = -1;
|
||||
julong p = 1;
|
||||
while (p != 0 && p <= x) {
|
||||
// p = 2^(i+1) && p <= x (i.e., 2^(i+1) <= x)
|
||||
i++; p *= 2;
|
||||
}
|
||||
// p = 2^(i+1) && x < p (i.e., 2^i <= x < 2^(i+1))
|
||||
// (if p = 0 then overflow occurred and i = 63)
|
||||
return i;
|
||||
}
|
||||
|
||||
// If x < 0, the function returns 31 on a 32-bit machine and 63 on a 64-bit machine.
|
||||
inline int log2_intptr(intptr_t x) {
|
||||
return log2_intptr((uintptr_t)x);
|
||||
}
|
||||
|
||||
inline int log2_int(int x) {
|
||||
STATIC_ASSERT(sizeof(int) <= sizeof(uintptr_t));
|
||||
return log2_intptr((uintptr_t)(unsigned int)x);
|
||||
}
|
||||
|
||||
inline int log2_jint(jint x) {
|
||||
STATIC_ASSERT(sizeof(jint) <= sizeof(uintptr_t));
|
||||
return log2_intptr((uintptr_t)(juint)x);
|
||||
}
|
||||
|
||||
inline int log2_uint(uint x) {
|
||||
STATIC_ASSERT(sizeof(uint) <= sizeof(uintptr_t));
|
||||
return log2_intptr((uintptr_t)x);
|
||||
}
|
||||
|
||||
// A negative value of 'x' will return '63'
|
||||
inline int log2_jlong(jlong x) {
|
||||
STATIC_ASSERT(sizeof(jlong) <= sizeof(julong));
|
||||
return log2_long((julong)x);
|
||||
}
|
||||
|
||||
inline bool is_odd (intx x) { return x & 1; }
|
||||
inline bool is_even(intx x) { return !is_odd(x); }
|
||||
|
||||
|
@ -64,8 +64,7 @@ template <MEMFLAGS F> BasicHashtableEntry<F>* BasicHashtable<F>::new_entry(unsig
|
||||
if (_first_free_entry + _entry_size >= _end_block) {
|
||||
int block_size = MAX2((int)_table_size / 2, (int)_number_of_entries); // pick a reasonable value
|
||||
block_size = clamp(block_size, 2, 512); // but never go out of this range
|
||||
int len = _entry_size * block_size;
|
||||
len = 1 << log2_int(len); // round down to power of 2
|
||||
int len = round_down_power_of_2(_entry_size * block_size);
|
||||
assert(len >= _entry_size, "");
|
||||
_first_free_entry = NEW_C_HEAP_ARRAY2(char, len, F, CURRENT_PC);
|
||||
_entry_blocks.append(_first_free_entry);
|
||||
|
@ -27,6 +27,7 @@
|
||||
|
||||
#include "metaprogramming/enableIf.hpp"
|
||||
#include "utilities/count_leading_zeros.hpp"
|
||||
#include "utilities/count_trailing_zeros.hpp"
|
||||
#include "utilities/debug.hpp"
|
||||
#include "utilities/globalDefinitions.hpp"
|
||||
#include <limits>
|
||||
@ -40,26 +41,52 @@ constexpr T max_power_of_2() {
|
||||
return max_val - (max_val >> 1);
|
||||
}
|
||||
|
||||
// Returns true iff there exists integer i such that (T(1) << i) == x.
|
||||
// Returns true iff there exists integer i such that (T(1) << i) == value.
|
||||
template <typename T, ENABLE_IF(std::is_integral<T>::value)>
|
||||
constexpr bool is_power_of_2(T x) {
|
||||
return (x > T(0)) && ((x & (x - 1)) == T(0));
|
||||
constexpr bool is_power_of_2(T value) {
|
||||
return (value > T(0)) && ((value & (value - 1)) == T(0));
|
||||
}
|
||||
|
||||
// Log2 of a power of 2
|
||||
inline int exact_log2(intptr_t x) {
|
||||
assert(is_power_of_2((uintptr_t)x), "x must be a power of 2: " INTPTR_FORMAT, x);
|
||||
|
||||
const int bits = sizeof x * BitsPerByte;
|
||||
return bits - count_leading_zeros(x) - 1;
|
||||
// Log2 of a positive, integral value, i.e., largest i such that 2^i <= value
|
||||
// Precondition: value > 0
|
||||
template<typename T, ENABLE_IF(std::is_integral<T>::value)>
|
||||
inline int log2i(T value) {
|
||||
assert(value > T(0), "value must be > 0");
|
||||
const int bits = sizeof(value) * BitsPerByte;
|
||||
return bits - count_leading_zeros(value) - 1;
|
||||
}
|
||||
|
||||
// Log2 of a power of 2
|
||||
inline int exact_log2_long(jlong x) {
|
||||
assert(is_power_of_2((julong)x), "x must be a power of 2: " JLONG_FORMAT, x);
|
||||
// Log2 of positive, integral value, i.e., largest i such that 2^i <= value
|
||||
// Returns -1 if value is zero
|
||||
// For negative values this will return 63 for 64-bit types, 31 for
|
||||
// 32-bit types, and so on.
|
||||
template<typename T, ENABLE_IF(std::is_integral<T>::value)>
|
||||
inline int log2i_graceful(T value) {
|
||||
if (value == 0) {
|
||||
return -1;
|
||||
}
|
||||
const int bits = sizeof(value) * BitsPerByte;
|
||||
return bits - count_leading_zeros(value) - 1;
|
||||
}
|
||||
|
||||
const int bits = sizeof x * BitsPerByte;
|
||||
return bits - count_leading_zeros(x) - 1;
|
||||
// Log2 of a power of 2, i.e., i such that 2^i == value
|
||||
// Preconditions: value > 0, value is a power of two
|
||||
template<typename T, ENABLE_IF(std::is_integral<T>::value)>
|
||||
inline int log2i_exact(T value) {
|
||||
assert(is_power_of_2(value),
|
||||
"value must be a power of 2: " UINT64_FORMAT_X,
|
||||
static_cast<uint64_t>(value));
|
||||
return count_trailing_zeros(value);
|
||||
}
|
||||
|
||||
// Preconditions: value != 0, and the unsigned representation of value is a power of two
|
||||
inline int exact_log2(intptr_t value) {
|
||||
return log2i_exact((uintptr_t)value);
|
||||
}
|
||||
|
||||
// Preconditions: value != 0, and the unsigned representation of value is a power of two
|
||||
inline int exact_log2_long(jlong value) {
|
||||
return log2i_exact((julong)value);
|
||||
}
|
||||
|
||||
// Round down to the closest power of two less than or equal to the given value.
|
||||
@ -67,8 +94,7 @@ inline int exact_log2_long(jlong x) {
|
||||
template<typename T, ENABLE_IF(std::is_integral<T>::value)>
|
||||
inline T round_down_power_of_2(T value) {
|
||||
assert(value > 0, "Invalid value");
|
||||
uint32_t lz = count_leading_zeros(value);
|
||||
return T(1) << (sizeof(T) * BitsPerByte - 1 - lz);
|
||||
return T(1) << log2i(value);
|
||||
}
|
||||
|
||||
// Round up to the closest power of two greater to or equal to the given value.
|
||||
@ -81,8 +107,7 @@ inline T round_up_power_of_2(T value) {
|
||||
if (is_power_of_2(value)) {
|
||||
return value;
|
||||
}
|
||||
uint32_t lz = count_leading_zeros(value);
|
||||
return T(1) << (sizeof(T) * BitsPerByte - lz);
|
||||
return T(1) << (log2i(value) + 1);
|
||||
}
|
||||
|
||||
// Calculate the next power of two greater than the given value.
|
||||
|
@ -190,41 +190,6 @@ TEST(globalDefinitions, byte_size_in_exact_unit) {
|
||||
#endif
|
||||
}
|
||||
|
||||
#define EXPECT_EQ_LOG2(fn, type) \
|
||||
{ \
|
||||
int limit = sizeof (type) * BitsPerByte; \
|
||||
if (std::is_signed<type>::value) { \
|
||||
EXPECT_EQ(limit - 1, fn(std::numeric_limits<type>::min())); \
|
||||
EXPECT_EQ(limit - 1, fn((type)-1)); \
|
||||
limit--; \
|
||||
} \
|
||||
{ \
|
||||
/* Test the all-1s bit patterns */ \
|
||||
type var = 1; \
|
||||
for (int i = 0; i < limit; i++, var = (var << 1) | 1) { \
|
||||
EXPECT_EQ(i, fn(var)); \
|
||||
} \
|
||||
} \
|
||||
{ \
|
||||
/* Test the powers of 2 and powers + 1*/ \
|
||||
type var = 1; \
|
||||
for (int i = 0; i < limit; i++, var <<= 1) { \
|
||||
EXPECT_EQ(i, fn(var)); \
|
||||
EXPECT_EQ(i, fn(var | 1)); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
TEST(globalDefinitions, log2) {
|
||||
EXPECT_EQ_LOG2(log2_intptr, uintptr_t);
|
||||
EXPECT_EQ_LOG2(log2_intptr, intptr_t);
|
||||
EXPECT_EQ_LOG2(log2_long, julong);
|
||||
EXPECT_EQ_LOG2(log2_int, int);
|
||||
EXPECT_EQ_LOG2(log2_jint, jint);
|
||||
EXPECT_EQ_LOG2(log2_uint, uint);
|
||||
EXPECT_EQ_LOG2(log2_jlong, jlong);
|
||||
}
|
||||
|
||||
TEST(globalDefinitions, array_size) {
|
||||
const size_t test_size = 10;
|
||||
|
||||
|
@ -262,3 +262,45 @@ TEST(power_of_2, max) {
|
||||
EXPECT_EQ(max_power_of_2<uint32_t>(), 0x80000000u);
|
||||
EXPECT_EQ(max_power_of_2<uint64_t>(), UCONST64(0x8000000000000000));
|
||||
}
|
||||
|
||||
template <typename T, ENABLE_IF(std::is_integral<T>::value)>
|
||||
void check_log2i_variants_for(T dummy) {
|
||||
int limit = sizeof(T) * BitsPerByte;
|
||||
if (std::is_signed<T>::value) {
|
||||
T min = std::numeric_limits<T>::min();
|
||||
EXPECT_EQ(limit - 1, log2i_graceful(min));
|
||||
EXPECT_EQ(limit - 1, log2i_graceful((T)-1));
|
||||
limit--;
|
||||
}
|
||||
{
|
||||
// Test log2i_graceful handles 0 input
|
||||
EXPECT_EQ(-1, log2i_graceful(T(0)));
|
||||
}
|
||||
{
|
||||
// Test the all-1s bit patterns
|
||||
T var = 1;
|
||||
for (int i = 0; i < limit; i++, var = (var << 1) | 1) {
|
||||
EXPECT_EQ(i, log2i(var));
|
||||
}
|
||||
}
|
||||
{
|
||||
// Test the powers of 2 and powers + 1
|
||||
T var = 1;
|
||||
for (int i = 0; i < limit; i++, var <<= 1) {
|
||||
EXPECT_EQ(i, log2i(var));
|
||||
EXPECT_EQ(i, log2i_graceful(var));
|
||||
EXPECT_EQ(i, log2i_exact(var));
|
||||
EXPECT_EQ(i, log2i(var | 1));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(power_of_2, log2i) {
|
||||
check_log2i_variants_for((uintptr_t)0);
|
||||
check_log2i_variants_for((intptr_t)0);
|
||||
check_log2i_variants_for((julong)0);
|
||||
check_log2i_variants_for((int)0);
|
||||
check_log2i_variants_for((jint)0);
|
||||
check_log2i_variants_for((uint)0);
|
||||
check_log2i_variants_for((jlong)0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user