diff --git a/hotspot/src/cpu/aarch64/vm/aarch64.ad b/hotspot/src/cpu/aarch64/vm/aarch64.ad index f555721eaa1..0fe321509f5 100644 --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad @@ -865,6 +865,42 @@ reg_class double_reg( V31, V31_H ); +// Class for all 64bit vector registers +reg_class vectord_reg( + V0, V0_H, + V1, V1_H, + V2, V2_H, + V3, V3_H, + V4, V4_H, + V5, V5_H, + V6, V6_H, + V7, V7_H, + V8, V8_H, + V9, V9_H, + V10, V10_H, + V11, V11_H, + V12, V12_H, + V13, V13_H, + V14, V14_H, + V15, V15_H, + V16, V16_H, + V17, V17_H, + V18, V18_H, + V19, V19_H, + V20, V20_H, + V21, V21_H, + V22, V22_H, + V23, V23_H, + V24, V24_H, + V25, V25_H, + V26, V26_H, + V27, V27_H, + V28, V28_H, + V29, V29_H, + V30, V30_H, + V31, V31_H +); + // Class for all 128bit vector registers reg_class vectorx_reg( V0, V0_H, V0_J, V0_K, @@ -2133,40 +2169,48 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo if (bottom_type()->isa_vect() != NULL) { uint len = 4; + uint ireg = ideal_reg(); + assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector"); if (cbuf) { MacroAssembler _masm(cbuf); - uint ireg = ideal_reg(); assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity"); - assert(ireg == Op_VecX, "sanity"); if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) { // stack->stack int src_offset = ra_->reg2offset(src_lo); int dst_offset = ra_->reg2offset(dst_lo); assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset"); len = 8; - if (src_offset < 512) { - __ ldp(rscratch1, rscratch2, Address(sp, src_offset)); - } else { + if (ireg == Op_VecD) { __ ldr(rscratch1, Address(sp, src_offset)); - __ ldr(rscratch2, Address(sp, src_offset+4)); - len += 4; - } - if (dst_offset < 512) { - __ stp(rscratch1, rscratch2, Address(sp, dst_offset)); - } else { __ str(rscratch1, Address(sp, dst_offset)); - __ str(rscratch2, Address(sp, dst_offset+4)); - len += 4; + } else { + if (src_offset < 512) { + __ ldp(rscratch1, rscratch2, Address(sp, src_offset)); + } else { + __ ldr(rscratch1, Address(sp, src_offset)); + __ ldr(rscratch2, Address(sp, src_offset+4)); + len += 4; + } + if (dst_offset < 512) { + __ stp(rscratch1, rscratch2, Address(sp, dst_offset)); + } else { + __ str(rscratch1, Address(sp, dst_offset)); + __ str(rscratch2, Address(sp, dst_offset+4)); + len += 4; + } } } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { - __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ T16B, + __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ T8B : __ T16B, as_FloatRegister(Matcher::_regEncode[src_lo]), as_FloatRegister(Matcher::_regEncode[src_lo])); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { - __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), __ Q, + __ str(as_FloatRegister(Matcher::_regEncode[src_lo]), + ireg == Op_VecD ? __ D : __ Q, Address(sp, ra_->reg2offset(dst_lo))); } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { - __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), __ Q, + __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]), + ireg == Op_VecD ? __ D : __ Q, Address(sp, ra_->reg2offset(src_lo))); } else { ShouldNotReachHere(); @@ -2176,17 +2220,22 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo // stack->stack int src_offset = ra_->reg2offset(src_lo); int dst_offset = ra_->reg2offset(dst_lo); - if (src_offset < 512) { - st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset); - } else { + if (ireg == Op_VecD) { st->print("ldr rscratch1, [sp, #%d]", src_offset); - st->print("\nldr rscratch2, [sp, #%d]", src_offset+4); - } - if (dst_offset < 512) { - st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset); + st->print("str rscratch1, [sp, #%d]", dst_offset); } else { - st->print("\nstr rscratch1, [sp, #%d]", dst_offset); - st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4); + if (src_offset < 512) { + st->print("ldp rscratch1, rscratch2, [sp, #%d]", src_offset); + } else { + st->print("ldr rscratch1, [sp, #%d]", src_offset); + st->print("\nldr rscratch2, [sp, #%d]", src_offset+4); + } + if (dst_offset < 512) { + st->print("\nstp rscratch1, rscratch2, [sp, #%d]", dst_offset); + } else { + st->print("\nstr rscratch1, [sp, #%d]", dst_offset); + st->print("\nstr rscratch2, [sp, #%d]", dst_offset+4); + } } st->print("\t# vector spill, stack to stack"); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) { @@ -2638,17 +2687,22 @@ const int Matcher::max_vector_size(const BasicType bt) { return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { - //return (type2aelembytes(bt) == 1) ? 4 : 2; - // For the moment, only support 1 vector size, 128 bits - return max_vector_size(bt); +// For the moment limit the vector size to 8 bytes + int size = 8 / type2aelembytes(bt); + if (size < 2) size = 2; + return size; } // Vector ideal reg. const int Matcher::vector_ideal_reg(int len) { - return Op_VecX; + switch(len) { + case 8: return Op_VecD; + case 16: return Op_VecX; + } + ShouldNotReachHere(); + return 0; } -// Only lowest bits of xmm reg are used for vector shift count. const int Matcher::vector_shift_count_ideal_reg(int size) { return Op_VecX; } @@ -2660,9 +2714,7 @@ const bool Matcher::pass_original_key_for_aes() { // x86 supports misaligned vectors store/load. const bool Matcher::misaligned_vectors_ok() { - // TODO fixme - // return !AlignVector; // can be changed by flag - return false; + return !AlignVector; // can be changed by flag } // false => size gets scaled to BytesPerLong, ok. @@ -3073,13 +3125,13 @@ encode %{ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_ldrvS(vecX dst, memory mem) %{ + enc_class aarch64_enc_ldrvS(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::S, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_ldrvD(vecX dst, memory mem) %{ + enc_class aarch64_enc_ldrvD(vecD dst, memory mem) %{ FloatRegister dst_reg = as_FloatRegister($dst$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldr, dst_reg, MacroAssembler::D, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); @@ -3159,13 +3211,13 @@ encode %{ as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_strvS(vecX src, memory mem) %{ + enc_class aarch64_enc_strvS(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::S, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); %} - enc_class aarch64_enc_strvD(vecX src, memory mem) %{ + enc_class aarch64_enc_strvD(vecD src, memory mem) %{ FloatRegister src_reg = as_FloatRegister($src$$reg); loadStore(MacroAssembler(&cbuf), &MacroAssembler::str, src_reg, MacroAssembler::D, $mem->opcode(), as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); @@ -5187,6 +5239,16 @@ operand vRegD() interface(REG_INTER); %} +operand vecD() +%{ + constraint(ALLOC_IN_RC(vectord_reg)); + match(VecD); + + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + operand vecX() %{ constraint(ALLOC_IN_RC(vectorx_reg)); @@ -7402,6 +7464,96 @@ instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{ ins_pipe(ialu_reg); %} +//---------- Population Count Instructions ------------------------------------- +// + +instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "movw $src, $src\n\t" + "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0 + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountI_mem(iRegINoSp dst, memory mem, vRegF tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountI (LoadI mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrs $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +// Note: Long.bitCount(long) returns an int. +instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL src)); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "mov $tmp, $src\t# vector (1D)\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + __ mov($tmp$$FloatRegister, __ T1D, 0, $src$$Register); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + +instruct popCountL_mem(iRegINoSp dst, memory mem, vRegD tmp) %{ + predicate(UsePopCountInstruction); + match(Set dst (PopCountL (LoadL mem))); + effect(TEMP tmp); + ins_cost(INSN_COST * 13); + + format %{ "ldrd $tmp, $mem\n\t" + "cnt $tmp, $tmp\t# vector (8B)\n\t" + "addv $tmp, $tmp\t# vector (8B)\n\t" + "mov $dst, $tmp\t# vector (1D)" %} + ins_encode %{ + FloatRegister tmp_reg = as_FloatRegister($tmp$$reg); + loadStore(MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp); + __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister); + __ mov($dst$$Register, $tmp$$FloatRegister, __ T1D, 0); + %} + + ins_pipe(pipe_class_default); +%} + // ============================================================================ // MemBar Instruction @@ -13194,7 +13346,7 @@ instruct tlsLoadP(thread_RegP dst) // ====================VECTOR INSTRUCTIONS===================================== // Load vector (32 bits) -instruct loadV4(vecX dst, vmem mem) +instruct loadV4(vecD dst, vmem mem) %{ predicate(n->as_LoadVector()->memory_size() == 4); match(Set dst (LoadVector mem)); @@ -13205,7 +13357,7 @@ instruct loadV4(vecX dst, vmem mem) %} // Load vector (64 bits) -instruct loadV8(vecX dst, vmem mem) +instruct loadV8(vecD dst, vmem mem) %{ predicate(n->as_LoadVector()->memory_size() == 8); match(Set dst (LoadVector mem)); @@ -13227,7 +13379,7 @@ instruct loadV16(vecX dst, vmem mem) %} // Store Vector (32 bits) -instruct storeV4(vecX src, vmem mem) +instruct storeV4(vecD src, vmem mem) %{ predicate(n->as_StoreVector()->memory_size() == 4); match(Set mem (StoreVector mem src)); @@ -13238,7 +13390,7 @@ instruct storeV4(vecX src, vmem mem) %} // Store Vector (64 bits) -instruct storeV8(vecX src, vmem mem) +instruct storeV8(vecD src, vmem mem) %{ predicate(n->as_StoreVector()->memory_size() == 8); match(Set mem (StoreVector mem src)); @@ -13259,8 +13411,22 @@ instruct storeV16(vecX src, vmem mem) ins_pipe(pipe_class_memory); %} +instruct replicate8B(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (8B)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T8B, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate16B(vecX dst, iRegIorL2I src) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateB src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (16B)" %} @@ -13270,8 +13436,22 @@ instruct replicate16B(vecX dst, iRegIorL2I src) ins_pipe(pipe_class_default); %} +instruct replicate8B_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (ReplicateB con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(8B)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T8B, $con$$constant & 0xff); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate16B_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (ReplicateB con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(16B)" %} @@ -13281,8 +13461,22 @@ instruct replicate16B_imm(vecX dst, immI con) ins_pipe(pipe_class_default); %} +instruct replicate4S(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (4S)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T4H, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate8S(vecX dst, iRegIorL2I src) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (8S)" %} @@ -13292,8 +13486,22 @@ instruct replicate8S(vecX dst, iRegIorL2I src) ins_pipe(pipe_class_default); %} +instruct replicate4S_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (ReplicateS con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(4H)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T4H, $con$$constant & 0xffff); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate8S_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (ReplicateS con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(8H)" %} @@ -13303,8 +13511,21 @@ instruct replicate8S_imm(vecX dst, immI con) ins_pipe(pipe_class_default); %} +instruct replicate2I(vecD dst, iRegIorL2I src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2I)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, as_Register($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate4I(vecX dst, iRegIorL2I src) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateI src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4I)" %} @@ -13314,8 +13535,21 @@ instruct replicate4I(vecX dst, iRegIorL2I src) ins_pipe(pipe_class_default); %} +instruct replicate2I_imm(vecD dst, immI con) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateI con)); + ins_cost(INSN_COST); + format %{ "movi $dst, $con\t# vector(2I)" %} + ins_encode %{ + __ mov(as_FloatRegister($dst$$reg), __ T2S, $con$$constant); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate4I_imm(vecX dst, immI con) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateI con)); ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(4I)" %} @@ -13327,6 +13561,7 @@ instruct replicate4I_imm(vecX dst, immI con) instruct replicate2L(vecX dst, iRegL src) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateL src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2L)" %} @@ -13338,6 +13573,7 @@ instruct replicate2L(vecX dst, iRegL src) instruct replicate2L_zero(vecX dst, immI0 zero) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateI zero)); ins_cost(INSN_COST); format %{ "movi $dst, $zero\t# vector(4I)" %} @@ -13349,8 +13585,22 @@ instruct replicate2L_zero(vecX dst, immI0 zero) ins_pipe(pipe_class_default); %} +instruct replicate2F(vecD dst, vRegF src) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (ReplicateF src)); + ins_cost(INSN_COST); + format %{ "dup $dst, $src\t# vector (2F)" %} + ins_encode %{ + __ dup(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct replicate4F(vecX dst, vRegF src) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (ReplicateF src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (4F)" %} @@ -13363,6 +13613,7 @@ instruct replicate4F(vecX dst, vRegF src) instruct replicate2D(vecX dst, vRegD src) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (ReplicateD src)); ins_cost(INSN_COST); format %{ "dup $dst, $src\t# vector (2D)" %} @@ -13375,6 +13626,25 @@ instruct replicate2D(vecX dst, vRegD src) // ====================REDUCTION ARITHMETIC==================================== +instruct reduce_add2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp, iRegI tmp2) +%{ + match(Set dst (AddReductionVI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP tmp2); + format %{ "umov $tmp, $src2, S, 0\n\t" + "umov $tmp2, $src2, S, 1\n\t" + "addw $dst, $src1, $tmp\n\t" + "addw $dst, $dst, $tmp2\t add reduction2i" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0); + __ umov($tmp2$$Register, as_FloatRegister($src2$$reg), __ S, 1); + __ addw($dst$$Register, $src1$$Register, $tmp$$Register); + __ addw($dst$$Register, $dst$$Register, $tmp2$$Register); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) %{ match(Set dst (AddReductionVI src1 src2)); @@ -13393,6 +13663,25 @@ instruct reduce_add4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI ins_pipe(pipe_class_default); %} +instruct reduce_mul2I(iRegINoSp dst, iRegIorL2I src1, vecD src2, iRegI tmp) +%{ + match(Set dst (MulReductionVI src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "umov $tmp, $src2, S, 0\n\t" + "mul $dst, $tmp, $src1\n\t" + "umov $tmp, $src2, S, 1\n\t" + "mul $dst, $tmp, $dst\t mul reduction2i\n\t" + %} + ins_encode %{ + __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 0); + __ mul($dst$$Register, $tmp$$Register, $src1$$Register); + __ umov($tmp$$Register, as_FloatRegister($src2$$reg), __ S, 1); + __ mul($dst$$Register, $tmp$$Register, $dst$$Register); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI tmp2) %{ match(Set dst (MulReductionVI src1 src2)); @@ -13418,6 +13707,26 @@ instruct reduce_mul4I(iRegINoSp dst, iRegIorL2I src1, vecX src2, vecX tmp, iRegI ins_pipe(pipe_class_default); %} +instruct reduce_add2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) +%{ + match(Set dst (AddReductionVF src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fadds $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fadds $dst, $dst, $tmp\t add reduction2f" + %} + ins_encode %{ + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 1); + __ fadds(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) %{ match(Set dst (AddReductionVF src1 src2)); @@ -13450,6 +13759,26 @@ instruct reduce_add4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) ins_pipe(pipe_class_default); %} +instruct reduce_mul2F(vRegF dst, vRegF src1, vecD src2, vecD tmp) +%{ + match(Set dst (MulReductionVF src1 src2)); + ins_cost(INSN_COST); + effect(TEMP tmp, TEMP dst); + format %{ "fmuls $dst, $src1, $src2\n\t" + "ins $tmp, S, $src2, 0, 1\n\t" + "fmuls $dst, $dst, $tmp\t add reduction4f" + %} + ins_encode %{ + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($src1$$reg), as_FloatRegister($src2$$reg)); + __ ins(as_FloatRegister($tmp$$reg), __ S, + as_FloatRegister($src2$$reg), 0, 1); + __ fmuls(as_FloatRegister($dst$$reg), + as_FloatRegister($dst$$reg), as_FloatRegister($tmp$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct reduce_mul4F(vRegF dst, vRegF src1, vecX src2, vecX tmp) %{ match(Set dst (MulReductionVF src1 src2)); @@ -13526,8 +13855,24 @@ instruct reduce_mul2D(vRegD dst, vRegD src1, vecX src2, vecX tmp) // --------------------------------- ADD -------------------------------------- +instruct vadd8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (AddVB src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (AddVB src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (16B)" %} @@ -13539,8 +13884,24 @@ instruct vadd16B(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vadd4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (AddVS src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (AddVS src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (8H)" %} @@ -13552,8 +13913,23 @@ instruct vadd8S(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vadd2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVI src1 src2)); + ins_cost(INSN_COST); + format %{ "addv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ addv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (AddVI src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (4S)" %} @@ -13567,6 +13943,7 @@ instruct vadd4I(vecX dst, vecX src1, vecX src2) instruct vadd2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (AddVL src1 src2)); ins_cost(INSN_COST); format %{ "addv $dst,$src1,$src2\t# vector (2L)" %} @@ -13578,8 +13955,23 @@ instruct vadd2L(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vadd2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fadd $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fadd(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vadd4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (AddVF src1 src2)); ins_cost(INSN_COST); format %{ "fadd $dst,$src1,$src2\t# vector (4S)" %} @@ -13606,8 +13998,24 @@ instruct vadd2D(vecX dst, vecX src1, vecX src2) // --------------------------------- SUB -------------------------------------- +instruct vsub8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (SubVB src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (SubVB src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (16B)" %} @@ -13619,8 +14027,24 @@ instruct vsub16B(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vsub4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (SubVS src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (SubVS src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (8H)" %} @@ -13632,8 +14056,23 @@ instruct vsub8S(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vsub2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (SubVI src1 src2)); + ins_cost(INSN_COST); + format %{ "subv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ subv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (SubVI src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (4S)" %} @@ -13647,6 +14086,7 @@ instruct vsub4I(vecX dst, vecX src1, vecX src2) instruct vsub2L(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (SubVL src1 src2)); ins_cost(INSN_COST); format %{ "subv $dst,$src1,$src2\t# vector (2L)" %} @@ -13658,8 +14098,23 @@ instruct vsub2L(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vsub2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (AddVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fsub $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fsub(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsub4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (SubVF src1 src2)); ins_cost(INSN_COST); format %{ "fsub $dst,$src1,$src2\t# vector (4S)" %} @@ -13673,6 +14128,7 @@ instruct vsub4F(vecX dst, vecX src1, vecX src2) instruct vsub2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (SubVD src1 src2)); ins_cost(INSN_COST); format %{ "fsub $dst,$src1,$src2\t# vector (2D)" %} @@ -13686,8 +14142,24 @@ instruct vsub2D(vecX dst, vecX src1, vecX src2) // --------------------------------- MUL -------------------------------------- +instruct vmul4S(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (MulVS src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (4H)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vmul8S(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (MulVS src1 src2)); ins_cost(INSN_COST); format %{ "mulv $dst,$src1,$src2\t# vector (8H)" %} @@ -13699,8 +14171,23 @@ instruct vmul8S(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vmul2I(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVI src1 src2)); + ins_cost(INSN_COST); + format %{ "mulv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ mulv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vmul4I(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (MulVI src1 src2)); ins_cost(INSN_COST); format %{ "mulv $dst,$src1,$src2\t# vector (4S)" %} @@ -13712,8 +14199,23 @@ instruct vmul4I(vecX dst, vecX src1, vecX src2) ins_pipe(pipe_class_default); %} +instruct vmul2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (MulVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fmul $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fmul(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vmul4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (MulVF src1 src2)); ins_cost(INSN_COST); format %{ "fmul $dst,$src1,$src2\t# vector (4S)" %} @@ -13727,6 +14229,7 @@ instruct vmul4F(vecX dst, vecX src1, vecX src2) instruct vmul2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (MulVD src1 src2)); ins_cost(INSN_COST); format %{ "fmul $dst,$src1,$src2\t# vector (2D)" %} @@ -13740,8 +14243,23 @@ instruct vmul2D(vecX dst, vecX src1, vecX src2) // --------------------------------- DIV -------------------------------------- +instruct vdiv2F(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (DivVF src1 src2)); + ins_cost(INSN_COST); + format %{ "fdiv $dst,$src1,$src2\t# vector (2S)" %} + ins_encode %{ + __ fdiv(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vdiv4F(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (DivVF src1 src2)); ins_cost(INSN_COST); format %{ "fdiv $dst,$src1,$src2\t# vector (4S)" %} @@ -13755,6 +14273,7 @@ instruct vdiv4F(vecX dst, vecX src1, vecX src2) instruct vdiv2D(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (DivVD src1 src2)); ins_cost(INSN_COST); format %{ "fdiv $dst,$src1,$src2\t# vector (2D)" %} @@ -13768,8 +14287,24 @@ instruct vdiv2D(vecX dst, vecX src1, vecX src2) // --------------------------------- AND -------------------------------------- +instruct vand8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (AndV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ andr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vand16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (AndV src1 src2)); ins_cost(INSN_COST); format %{ "and $dst,$src1,$src2\t# vector (16B)" %} @@ -13783,8 +14318,24 @@ instruct vand16B(vecX dst, vecX src1, vecX src2) // --------------------------------- OR --------------------------------------- +instruct vor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (OrV src1 src2)); + ins_cost(INSN_COST); + format %{ "and $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ orr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vor16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (OrV src1 src2)); ins_cost(INSN_COST); format %{ "orr $dst,$src1,$src2\t# vector (16B)" %} @@ -13798,8 +14349,24 @@ instruct vor16B(vecX dst, vecX src1, vecX src2) // --------------------------------- XOR -------------------------------------- +instruct vxor8B(vecD dst, vecD src1, vecD src2) +%{ + predicate(n->as_Vector()->length_in_bytes() == 4 || + n->as_Vector()->length_in_bytes() == 8); + match(Set dst (XorV src1 src2)); + ins_cost(INSN_COST); + format %{ "xor $dst,$src1,$src2\t# vector (8B)" %} + ins_encode %{ + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src1$$reg), + as_FloatRegister($src2$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vxor16B(vecX dst, vecX src1, vecX src2) %{ + predicate(n->as_Vector()->length_in_bytes() == 16); match(Set dst (XorV src1 src2)); ins_cost(INSN_COST); format %{ "xor $dst,$src1,$src2\t# vector (16B)" %} @@ -13833,7 +14400,23 @@ instruct vshiftcntR(vecX dst, iRegIorL2I cnt) %{ ins_pipe(pipe_class_default); %} +instruct vsll8B(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (LShiftVB src shift)); match(Set dst (RShiftVB src shift)); ins_cost(INSN_COST); @@ -13846,7 +14429,22 @@ instruct vsll16B(vecX dst, vecX src, vecX shift) %{ ins_pipe(pipe_class_default); %} +instruct vsrl8B(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (8B)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl16B(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (URShiftVB src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (16B)" %} @@ -13858,7 +14456,28 @@ instruct vsrl16B(vecX dst, vecX src, vecX shift) %{ ins_pipe(pipe_class_default); %} +instruct vsll8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (LShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (LShiftVB src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (16B)" %} @@ -13876,7 +14495,24 @@ instruct vsll16B_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsra8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (RShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) sh = 7; + sh = -sh & 7; + __ sshr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (RShiftVB src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (16B)" %} @@ -13890,7 +14526,28 @@ instruct vsra16B_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsrl8B_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 4 || + n->as_Vector()->length() == 8); + match(Set dst (URShiftVB src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (8B)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 8) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), -sh & 7); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 16); match(Set dst (URShiftVB src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (16B)" %} @@ -13908,7 +14565,23 @@ instruct vsrl16B_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsll4S(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); match(Set dst (RShiftVS src shift)); ins_cost(INSN_COST); @@ -13921,7 +14594,22 @@ instruct vsll8S(vecX dst, vecX src, vecX shift) %{ ins_pipe(pipe_class_default); %} +instruct vsrl4S(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (4H)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl8S(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (8H)" %} @@ -13933,7 +14621,28 @@ instruct vsrl8S(vecX dst, vecX src, vecX shift) %{ ins_pipe(pipe_class_default); %} +instruct vsll4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (LShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ shl(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (LShiftVS src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (8H)" %} @@ -13951,7 +14660,24 @@ instruct vsll8S_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsra4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (RShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) sh = 15; + sh = -sh & 15; + __ sshr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), sh); + %} + ins_pipe(pipe_class_default); +%} + instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (RShiftVS src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (8H)" %} @@ -13965,7 +14691,28 @@ instruct vsra8S_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsrl4S_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2 || + n->as_Vector()->length() == 4); + match(Set dst (URShiftVS src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (4H)" %} + ins_encode %{ + int sh = (int)$shift$$constant & 31; + if (sh >= 16) { + __ eor(as_FloatRegister($dst$$reg), __ T8B, + as_FloatRegister($src$$reg), + as_FloatRegister($src$$reg)); + } else { + __ ushr(as_FloatRegister($dst$$reg), __ T4H, + as_FloatRegister($src$$reg), -sh & 15); + } + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 8); match(Set dst (URShiftVS src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (8H)" %} @@ -13983,7 +14730,22 @@ instruct vsrl8S_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsll2I(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ sshl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); match(Set dst (RShiftVI src shift)); ins_cost(INSN_COST); @@ -13996,7 +14758,21 @@ instruct vsll4I(vecX dst, vecX src, vecX shift) %{ ins_pipe(pipe_class_default); %} +instruct vsrl2I(vecD dst, vecD src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushl $dst,$src,$shift\t# vector (2S)" %} + ins_encode %{ + __ ushl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + as_FloatRegister($shift$$reg)); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl4I(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (4S)" %} @@ -14008,7 +14784,21 @@ instruct vsrl4I(vecX dst, vecX src, vecX shift) %{ ins_pipe(pipe_class_default); %} +instruct vsll2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (LShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "shl $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ shl(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + (int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (LShiftVI src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (4S)" %} @@ -14020,7 +14810,21 @@ instruct vsll4I_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsra2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (RShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "sshr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ sshr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (RShiftVI src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (4S)" %} @@ -14032,7 +14836,21 @@ instruct vsra4I_imm(vecX dst, vecX src, immI shift) %{ ins_pipe(pipe_class_default); %} +instruct vsrl2I_imm(vecD dst, vecD src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); + match(Set dst (URShiftVI src shift)); + ins_cost(INSN_COST); + format %{ "ushr $dst, $src, $shift\t# vector (2S)" %} + ins_encode %{ + __ ushr(as_FloatRegister($dst$$reg), __ T2S, + as_FloatRegister($src$$reg), + -(int)$shift$$constant & 31); + %} + ins_pipe(pipe_class_default); +%} + instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 4); match(Set dst (URShiftVI src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (4S)" %} @@ -14045,6 +14863,7 @@ instruct vsrl4I_imm(vecX dst, vecX src, immI shift) %{ %} instruct vsll2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); match(Set dst (RShiftVL src shift)); ins_cost(INSN_COST); @@ -14058,6 +14877,7 @@ instruct vsll2L(vecX dst, vecX src, vecX shift) %{ %} instruct vsrl2L(vecX dst, vecX src, vecX shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); ins_cost(INSN_COST); format %{ "ushl $dst,$src,$shift\t# vector (2D)" %} @@ -14070,6 +14890,7 @@ instruct vsrl2L(vecX dst, vecX src, vecX shift) %{ %} instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (LShiftVL src shift)); ins_cost(INSN_COST); format %{ "shl $dst, $src, $shift\t# vector (2D)" %} @@ -14082,6 +14903,7 @@ instruct vsll2L_imm(vecX dst, vecX src, immI shift) %{ %} instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (RShiftVL src shift)); ins_cost(INSN_COST); format %{ "sshr $dst, $src, $shift\t# vector (2D)" %} @@ -14094,6 +14916,7 @@ instruct vsra2L_imm(vecX dst, vecX src, immI shift) %{ %} instruct vsrl2L_imm(vecX dst, vecX src, immI shift) %{ + predicate(n->as_Vector()->length() == 2); match(Set dst (URShiftVL src shift)); ins_cost(INSN_COST); format %{ "ushr $dst, $src, $shift\t# vector (2D)" %} diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp index 1de8ed8f1fb..85752e738c2 100644 --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -2055,6 +2055,9 @@ public: INSN(negr, 1, 0b100000101110); INSN(notr, 1, 0b100000010110); INSN(addv, 0, 0b110001101110); + INSN(cls, 0, 0b100000010010); + INSN(clz, 1, 0b100000010010); + INSN(cnt, 0, 0b100000010110); #undef INSN diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index 99187783be7..8a8c58e3955 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -36,6 +36,7 @@ class MacroAssembler: public Assembler { friend class LIR_Assembler; + public: using Assembler::mov; using Assembler::movi; diff --git a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp index 759a4172897..cf76c0d1f07 100644 --- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp @@ -199,6 +199,12 @@ void VM_Version::get_processor_features() { UseCRC32Intrinsics = true; } + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) { if (FLAG_IS_DEFAULT(UseSHA)) { FLAG_SET_DEFAULT(UseSHA, true); @@ -251,6 +257,10 @@ void VM_Version::get_processor_features() { UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0; } + if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { + UsePopCountInstruction = true; + } + #ifdef COMPILER2 if (FLAG_IS_DEFAULT(OptoScheduling)) { OptoScheduling = true; diff --git a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp index 8913546bd53..28d19fbe4e4 100644 --- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp +++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp @@ -191,6 +191,13 @@ void VM_Version::initialize() { FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags. if (!has_tcheck() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp index 55f338754dd..42ed9b6b992 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp @@ -128,8 +128,11 @@ class Assembler : public AbstractAssembler { faligndata_op3 = 0x36, flog3_op3 = 0x36, edge_op3 = 0x36, + fzero_op3 = 0x36, fsrc_op3 = 0x36, + fnot_op3 = 0x36, xmulx_op3 = 0x36, + crc32c_op3 = 0x36, impdep2_op3 = 0x37, stpartialf_op3 = 0x37, jmpl_op3 = 0x38, @@ -231,7 +234,9 @@ class Assembler : public AbstractAssembler { sha1_opf = 0x141, sha256_opf = 0x142, - sha512_opf = 0x143 + sha512_opf = 0x143, + + crc32c_opf = 0x147 }; enum op5s { @@ -600,6 +605,11 @@ class Assembler : public AbstractAssembler { return x & ((1 << 10) - 1); } + // create a low12 __value__ (not a field) for a given a 32-bit constant + static int low12( int x ) { + return x & ((1 << 12) - 1); + } + // AES crypto instructions supported only on certain processors static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); } @@ -608,6 +618,9 @@ class Assembler : public AbstractAssembler { static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); } static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); } + // CRC32C instruction supported only on certain processors + static void crc32c_only() { assert( VM_Version::has_crc32c(), "This instruction only works on SPARC with CRC32C"); } + // instruction only in VIS1 static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); } @@ -1022,6 +1035,7 @@ public: void nop() { emit_int32( op(branch_op) | op2(sethi_op2) ); } + void sw_count() { emit_int32( op(branch_op) | op2(sethi_op2) | 0x3f0 ); } // pp 202 @@ -1198,8 +1212,14 @@ public: void faligndata( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(faligndata_op3) | fs1(s1, FloatRegisterImpl::D) | opf(faligndata_opf) | fs2(s2, FloatRegisterImpl::D)); } + void fzero( FloatRegisterImpl::Width w, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fzero_op3) | opf(0x62 - w)); } + void fsrc2( FloatRegisterImpl::Width w, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fsrc_op3) | opf(0x7A - w) | fs2(s2, w)); } + void fnot1( FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, w) | op3(fnot_op3) | fs1(s1, w) | opf(0x6C - w)); } + + void fpmerge( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis1_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(0x36) | fs1(s1, FloatRegisterImpl::S) | opf(0x4b) | fs2(s2, FloatRegisterImpl::S)); } + void stpartialf( Register s1, Register s2, FloatRegister d, int ia = -1 ) { vis1_only(); emit_int32( op(ldst_op) | fd(d, FloatRegisterImpl::D) | op3(stpartialf_op3) | rs1(s1) | imm_asi(ia) | rs2(s2)); } // VIS2 instructions @@ -1224,6 +1244,10 @@ public: void sha256() { sha256_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); } void sha512() { sha512_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); } + // CRC32C instruction + + void crc32c( FloatRegister s1, FloatRegister s2, FloatRegister d ) { crc32c_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D)); } + // Creation Assembler(CodeBuffer* code) : AbstractAssembler(code) { #ifdef CHECK_DELAY diff --git a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp index d4507cf04b0..e9ce3f4ec0f 100644 --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp @@ -956,6 +956,7 @@ void MacroAssembler::set64(jlong value, Register d, Register tmp) { int hi = (int)(value >> 32); int lo = (int)(value & ~0); + int bits_33to2 = (int)((value >> 2) & ~0); // (Matcher::isSimpleConstant64 knows about the following optimizations.) if (Assembler::is_simm13(lo) && value == lo) { or3(G0, lo, d); @@ -964,6 +965,12 @@ void MacroAssembler::set64(jlong value, Register d, Register tmp) { if (low10(lo) != 0) or3(d, low10(lo), d); } + else if ((hi >> 2) == 0) { + Assembler::sethi(bits_33to2, d); // hardware version zero-extends to upper 32 + sllx(d, 2, d); + if (low12(lo) != 0) + or3(d, low12(lo), d); + } else if (hi == -1) { Assembler::sethi(~lo, d); // hardware version zero-extends to upper 32 xor3(d, low10(lo) ^ ~low10(~0), d); @@ -4351,3 +4358,52 @@ void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Lab cmp_and_brx_short(to, end, Assembler::lessUnsigned, Assembler::pt, small_loop); nop(); // Separate short branches } + +/** + * Update CRC-32[C] with a byte value according to constants in table + * + * @param [in,out]crc Register containing the crc. + * @param [in]val Register containing the byte to fold into the CRC. + * @param [in]table Register containing the table of crc constants. + * + * uint32_t crc; + * val = crc_table[(val ^ crc) & 0xFF]; + * crc = val ^ (crc >> 8); + */ +void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) { + xor3(val, crc, val); + and3(val, 0xFF, val); + sllx(val, 2, val); + lduw(table, val, val); + srlx(crc, 8, crc); + xor3(val, crc, crc); +} + +// Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros +void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) { + srlx(src, 24, dst); + + sllx(src, 32+8, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 8, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+16, tmp); + srlx(tmp, 32+24, tmp); + sllx(tmp, 16, tmp); + or3(dst, tmp, dst); + + sllx(src, 32+24, tmp); + srlx(tmp, 32, tmp); + or3(dst, tmp, dst); +} + +void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) { + reverse_bytes_32(src, tmp1, tmp2); + movxtod(tmp1, dst); +} + +void MacroAssembler::movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2) { + movdtox(src, tmp1); + reverse_bytes_32(tmp1, dst, tmp2); +} diff --git a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp index 5fc0dd0632c..22f56b999bc 100644 --- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -903,6 +903,10 @@ public: inline void ldf(FloatRegisterImpl::Width w, Register s1, RegisterOrConstant s2, FloatRegister d); inline void ldf(FloatRegisterImpl::Width w, const Address& a, FloatRegister d, int offset = 0); + // little-endian + inline void ldxl(Register s1, Register s2, Register d) { ldxa(s1, s2, ASI_PRIMARY_LITTLE, d); } + inline void ldfl(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { ldfa(w, s1, s2, ASI_PRIMARY_LITTLE, d); } + // membar psuedo instruction. takes into account target memory model. inline void membar( Assembler::Membar_mask_bits const7a ); @@ -1436,6 +1440,14 @@ public: // Use BIS for zeroing void bis_zeroing(Register to, Register count, Register temp, Label& Ldone); + // Update CRC-32[C] with a byte value according to constants in table + void update_byte_crc32(Register crc, Register val, Register table); + + // Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros + void reverse_bytes_32(Register src, Register dst, Register tmp); + void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2); + void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2); + #undef VIRTUAL }; diff --git a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp index 471bd65d269..f0bfa8de742 100644 --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp @@ -4910,6 +4910,206 @@ class StubGenerator: public StubCodeGenerator { return start; } +#define CHUNK_LEN 128 /* 128 x 8B = 1KB */ +#define CHUNK_K1 0x1307a0206 /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */ +#define CHUNK_K2 0x1a0f717c4 /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */ +#define CHUNK_K3 0x0170076fa /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */ + + /** + * Arguments: + * + * Inputs: + * O0 - int crc + * O1 - byte* buf + * O2 - int len + * O3 - int* table + * + * Output: + * O0 - int crc result + */ + address generate_updateBytesCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction"); + + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C"); + address start = __ pc(); + + const Register crc = O0; // crc + const Register buf = O1; // source java byte array address + const Register len = O2; // number of bytes + const Register table = O3; // byteTable + + Label L_crc32c_head, L_crc32c_aligned; + Label L_crc32c_parallel, L_crc32c_parallel_loop; + Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop; + Label L_crc32c_done, L_crc32c_tail, L_crc32c_return; + + __ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return); + + // clear upper 32 bits of crc + __ clruwu(crc); + + __ and3(buf, 7, G4); + __ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned); + + __ mov(8, G1); + __ sub(G1, G4, G4); + + // ------ process the misaligned head (7 bytes or less) ------ + __ BIND(L_crc32c_head); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + __ ldub(buf, 0, G1); + __ update_byte_crc32(crc, G1, table); + + __ inc(buf); + __ dec(len); + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return); + __ dec(G4); + __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head); + + // ------ process the 8-byte-aligned body ------ + __ BIND(L_crc32c_aligned); + __ nop(); + __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail); + + // reverse the byte order of lower 32 bits to big endian, and move to FP side + __ movitof_revbytes(crc, F0, G1, G3); + + __ set(CHUNK_LEN*8*4, G4); + __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial); + + // ------ process four 1KB chunks in parallel ------ + __ BIND(L_crc32c_parallel); + + __ fzero(FloatRegisterImpl::D, F2); + __ fzero(FloatRegisterImpl::D, F4); + __ fzero(FloatRegisterImpl::D, F6); + + __ mov(CHUNK_LEN - 1, G4); + __ BIND(L_crc32c_parallel_loop); + // schedule ldf's ahead of crc32c's to hide the load-use latency + __ ldf(FloatRegisterImpl::D, buf, 0, F8); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14); + __ crc32c(F0, F8, F0); + __ crc32c(F2, F10, F2); + __ crc32c(F4, F12, F4); + __ crc32c(F6, F14, F6); + __ inc(buf, 8); + __ dec(G4); + __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop); + + __ ldf(FloatRegisterImpl::D, buf, 0, F8); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8, F10); + __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12); + __ crc32c(F0, F8, F0); + __ crc32c(F2, F10, F2); + __ crc32c(F4, F12, F4); + + __ inc(buf, CHUNK_LEN*24); + __ ldfl(FloatRegisterImpl::D, buf, G0, F14); // load in little endian + __ inc(buf, 8); + + __ prefetch(buf, 0, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*8, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads); + __ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + __ movftoi_revbytes(F0, O4, G1, G4); + __ movftoi_revbytes(F2, O5, G1, G4); + __ movftoi_revbytes(F4, G5, G1, G4); + + // combine the results of 4 chunks + __ set64(CHUNK_K1, G3, G1); + __ xmulx(O4, G3, O4); + __ set64(CHUNK_K2, G3, G1); + __ xmulx(O5, G3, O5); + __ set64(CHUNK_K3, G3, G1); + __ xmulx(G5, G3, G5); + + __ movdtox(F14, G4); + __ xor3(O4, O5, O5); + __ xor3(G5, O5, O5); + __ xor3(G4, O5, O5); + + // reverse the byte order to big endian, via stack, and move to FP side + __ add(SP, -8, G1); + __ srlx(G1, 3, G1); + __ sllx(G1, 3, G1); + __ stx(O5, G1, G0); + __ ldfl(FloatRegisterImpl::D, G1, G0, F2); // load in little endian + + __ crc32c(F6, F2, F0); + + __ set(CHUNK_LEN*8*4, G4); + __ sub(len, G4, len); + __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel); + __ nop(); + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done); + + __ BIND(L_crc32c_serial); + + __ mov(32, G4); + __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8); + + // ------ process 32B chunks ------ + __ BIND(L_crc32c_x32_loop); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ dec(len, 32); + __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop); + + __ BIND(L_crc32c_x8); + __ nop(); + __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done); + + // ------ process 8B chunks ------ + __ BIND(L_crc32c_x8_loop); + __ ldf(FloatRegisterImpl::D, buf, 0, F2); + __ inc(buf, 8); + __ crc32c(F0, F2, F0); + __ dec(len, 8); + __ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop); + + __ BIND(L_crc32c_done); + + // move to INT side, and reverse the byte order of lower 32 bits to little endian + __ movftoi_revbytes(F0, crc, G1, G3); + + __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return); + + // ------ process the misaligned tail (7 bytes or less) ------ + __ BIND(L_crc32c_tail); + + // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF]; + __ ldub(buf, 0, G1); + __ update_byte_crc32(crc, G1, table); + + __ inc(buf); + __ dec(len); + __ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail); + + __ BIND(L_crc32c_return); + __ nop(); + __ retl(); + __ delayed()->nop(); + + return start; + } + void generate_initial() { // Generates all stubs and initializes the entry points @@ -5001,6 +5201,11 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress"); StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB"); } + + // generate CRC32C intrinsic code + if (UseCRC32CIntrinsics) { + StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); + } } diff --git a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp index f3b30e884c3..2eab8d202be 100644 --- a/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/stubRoutines_sparc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) { enum /* platform_dependent_constants */ { // %%%%%%%% May be able to shrink this a lot code_size1 = 20000, // simply increase if too small (assembler will crash if too small) - code_size2 = 23000 // simply increase if too small (assembler will crash if too small) + code_size2 = 24000 // simply increase if too small (assembler will crash if too small) }; class Sparc { diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp index a76f05e15f1..441b9e4eff6 100644 --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp @@ -230,7 +230,7 @@ void VM_Version::initialize() { assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size"); char buf[512]; - jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")), (has_hardware_popc() ? ", popc" : ""), (has_vis1() ? ", vis1" : ""), @@ -242,6 +242,7 @@ void VM_Version::initialize() { (has_sha1() ? ", sha1" : ""), (has_sha256() ? ", sha256" : ""), (has_sha512() ? ", sha512" : ""), + (has_crc32c() ? ", crc32c" : ""), (is_ultra3() ? ", ultra3" : ""), (is_sun4v() ? ", sun4v" : ""), (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")), @@ -363,6 +364,23 @@ void VM_Version::initialize() { } } + // SPARC T4 and above should have support for CRC32C instruction + if (has_crc32c()) { + if (UseVIS > 2) { // CRC32C intrinsics use VIS3 instructions + if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true); + } + } else { + if (UseCRC32CIntrinsics) { + warning("SPARC CRC32C intrinsics require VIS3 instruction support. Intrinsics will be disabled."); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + } + } else if (UseCRC32CIntrinsics) { + warning("CRC32C instruction is not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && (cache_line_size > ContendedPaddingWidth)) ContendedPaddingWidth = cache_line_size; diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp index 59969ed1089..d1c40e6488e 100644 --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -53,7 +53,8 @@ protected: aes_instructions = 19, sha1_instruction = 20, sha256_instruction = 21, - sha512_instruction = 22 + sha512_instruction = 22, + crc32c_instruction = 23 }; enum Feature_Flag_Set { @@ -83,6 +84,7 @@ protected: sha1_instruction_m = 1 << sha1_instruction, sha256_instruction_m = 1 << sha256_instruction, sha512_instruction_m = 1 << sha512_instruction, + crc32c_instruction_m = 1 << crc32c_instruction, generic_v8_m = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m, generic_v9_m = generic_v8_m | v9_instructions_m, @@ -141,6 +143,7 @@ public: static bool has_sha1() { return (_features & sha1_instruction_m) != 0; } static bool has_sha256() { return (_features & sha256_instruction_m) != 0; } static bool has_sha512() { return (_features & sha512_instruction_m) != 0; } + static bool has_crc32c() { return (_features & crc32c_instruction_m) != 0; } static bool supports_compare_and_exchange() { return has_v9(); } diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp index cfa00dcb5c5..10a6a0c448d 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp @@ -699,6 +699,12 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); } + if (UseCRC32CIntrinsics) { + if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) + warning("CRC32C intrinsics are not available on this CPU"); + FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); + } + // Adjust RTM (Restricted Transactional Memory) flags if (!supports_rtm() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag diff --git a/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp b/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp index 03b2d4b1b29..b7cafd4c618 100644 --- a/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp +++ b/hotspot/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -363,6 +363,11 @@ int VM_Version::platform_features(int features) { #endif if (av & AV_SPARC_CBCOND) features |= cbcond_instructions_m; +#ifndef AV_SPARC_CRC32C +#define AV_SPARC_CRC32C 0x20000000 /* crc32c instruction supported */ +#endif + if (av & AV_SPARC_CRC32C) features |= crc32c_instruction_m; + #ifndef AV_SPARC_AES #define AV_SPARC_AES 0x00020000 /* aes instrs supported */ #endif diff --git a/hotspot/src/share/vm/ci/ciField.cpp b/hotspot/src/share/vm/ci/ciField.cpp index 9fe63de27bf..78be2023ce8 100644 --- a/hotspot/src/share/vm/ci/ciField.cpp +++ b/hotspot/src/share/vm/ci/ciField.cpp @@ -186,6 +186,10 @@ static bool trust_final_non_static_fields(ciInstanceKlass* holder) { // Even if general trusting is disabled, trust system-built closures in these packages. if (holder->is_in_package("java/lang/invoke") || holder->is_in_package("sun/invoke")) return true; + // Trust VM anonymous classes. They are private API (sun.misc.Unsafe) and can't be serialized, + // so there is no hacking of finals going on with them. + if (holder->is_anonymous()) + return true; return TrustFinalNonStaticFields; } diff --git a/hotspot/src/share/vm/ci/ciInstanceKlass.cpp b/hotspot/src/share/vm/ci/ciInstanceKlass.cpp index ec97e100d48..cfbc7b8c65f 100644 --- a/hotspot/src/share/vm/ci/ciInstanceKlass.cpp +++ b/hotspot/src/share/vm/ci/ciInstanceKlass.cpp @@ -58,6 +58,7 @@ ciInstanceKlass::ciInstanceKlass(KlassHandle h_k) : _nonstatic_field_size = ik->nonstatic_field_size(); _has_nonstatic_fields = ik->has_nonstatic_fields(); _has_default_methods = ik->has_default_methods(); + _is_anonymous = ik->is_anonymous(); _nonstatic_fields = NULL; // initialized lazily by compute_nonstatic_fields: _has_injected_fields = -1; _implementor = NULL; // we will fill these lazily @@ -101,6 +102,7 @@ ciInstanceKlass::ciInstanceKlass(ciSymbol* name, _has_nonstatic_fields = false; _nonstatic_fields = NULL; _has_injected_fields = -1; + _is_anonymous = false; _loader = loader; _protection_domain = protection_domain; _is_shared = false; diff --git a/hotspot/src/share/vm/ci/ciInstanceKlass.hpp b/hotspot/src/share/vm/ci/ciInstanceKlass.hpp index df9ed0f7fe9..080acd380e1 100644 --- a/hotspot/src/share/vm/ci/ciInstanceKlass.hpp +++ b/hotspot/src/share/vm/ci/ciInstanceKlass.hpp @@ -53,6 +53,7 @@ private: bool _has_subklass; bool _has_nonstatic_fields; bool _has_default_methods; + bool _is_anonymous; ciFlags _flags; jint _nonstatic_field_size; @@ -179,6 +180,10 @@ public: return _has_default_methods; } + bool is_anonymous() { + return _is_anonymous; + } + ciInstanceKlass* get_canonical_holder(int offset); ciField* get_field_by_offset(int field_offset, bool is_static); ciField* get_field_by_name(ciSymbol* name, ciSymbol* signature, bool is_static); diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index a6390215eac..0844489c00f 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -863,6 +863,12 @@ do_name( updateByteBuffer_name, "updateByteBuffer") \ do_signature(updateByteBuffer_signature, "(IJII)I") \ \ + /* support for java.util.zip.CRC32C */ \ + do_class(java_util_zip_CRC32C, "java/util/zip/CRC32C") \ + do_intrinsic(_updateBytesCRC32C, java_util_zip_CRC32C, updateBytes_name, updateBytes_signature, F_S) \ + do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_name, updateByteBuffer_signature, F_S) \ + do_name( updateDirectByteBuffer_name, "updateDirectByteBuffer") \ + \ /* support for sun.misc.Unsafe */ \ do_class(sun_misc_Unsafe, "sun/misc/Unsafe") \ \ diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp index 1d0f628892d..744952a1c10 100644 --- a/hotspot/src/share/vm/opto/escape.cpp +++ b/hotspot/src/share/vm/opto/escape.cpp @@ -962,6 +962,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre") == 0 || strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 || strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32") == 0 || + strcmp(call->as_CallLeaf()->_name, "updateBytesCRC32C") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 || strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 || diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp index b4fd3aeba02..f87f52cfc29 100644 --- a/hotspot/src/share/vm/opto/library_call.cpp +++ b/hotspot/src/share/vm/opto/library_call.cpp @@ -197,7 +197,7 @@ class LibraryCallKit : public GraphKit { CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) { return generate_method_call(method_id, true, false); } - Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static); + Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static, ciInstanceKlass * fromKls); Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2); Node* make_string_method_node(int opcode, Node* str1, Node* str2); @@ -291,6 +291,9 @@ class LibraryCallKit : public GraphKit { bool inline_updateCRC32(); bool inline_updateBytesCRC32(); bool inline_updateByteBufferCRC32(); + Node* get_table_from_crc32c_class(ciInstanceKlass *crc32c_class); + bool inline_updateBytesCRC32C(); + bool inline_updateDirectByteBufferCRC32C(); bool inline_multiplyToLen(); bool inline_squareToLen(); bool inline_mulAdd(); @@ -539,6 +542,11 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { if (!UseCRC32Intrinsics) return NULL; break; + case vmIntrinsics::_updateBytesCRC32C: + case vmIntrinsics::_updateDirectByteBufferCRC32C: + if (!UseCRC32CIntrinsics) return NULL; + break; + case vmIntrinsics::_incrementExactI: case vmIntrinsics::_addExactI: if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL; @@ -947,6 +955,11 @@ bool LibraryCallKit::try_to_inline(int predicate) { case vmIntrinsics::_updateByteBufferCRC32: return inline_updateByteBufferCRC32(); + case vmIntrinsics::_updateBytesCRC32C: + return inline_updateBytesCRC32C(); + case vmIntrinsics::_updateDirectByteBufferCRC32C: + return inline_updateDirectByteBufferCRC32C(); + case vmIntrinsics::_profileBoolean: return inline_profileBoolean(); case vmIntrinsics::_isCompileConstant: @@ -5536,6 +5549,106 @@ bool LibraryCallKit::inline_updateByteBufferCRC32() { return true; } +//------------------------------get_table_from_crc32c_class----------------------- +Node * LibraryCallKit::get_table_from_crc32c_class(ciInstanceKlass *crc32c_class) { + Node* table = load_field_from_object(NULL, "byteTable", "[I", /*is_exact*/ false, /*is_static*/ true, crc32c_class); + assert (table != NULL, "wrong version of java.util.zip.CRC32C"); + + return table; +} + +//------------------------------inline_updateBytesCRC32C----------------------- +// +// Calculate CRC32C for byte[] array. +// int java.util.zip.CRC32C.updateBytes(int crc, byte[] buf, int off, int end) +// +bool LibraryCallKit::inline_updateBytesCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction support"); + assert(callee()->signature()->size() == 4, "updateBytes has 4 parameters"); + assert(callee()->holder()->is_loaded(), "CRC32C class must be loaded"); + // no receiver since it is a static method + Node* crc = argument(0); // type: int + Node* src = argument(1); // type: oop + Node* offset = argument(2); // type: int + Node* end = argument(3); // type: int + + Node* length = _gvn.transform(new SubINode(end, offset)); + + const Type* src_type = src->Value(&_gvn); + const TypeAryPtr* top_src = src_type->isa_aryptr(); + if (top_src == NULL || top_src->klass() == NULL) { + // failed array check + return false; + } + + // Figure out the size and type of the elements we will be copying. + BasicType src_elem = src_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); + if (src_elem != T_BYTE) { + return false; + } + + // 'src_start' points to src array + scaled offset + Node* src_start = array_element_address(src, offset, src_elem); + + // static final int[] byteTable in class CRC32C + Node* table = get_table_from_crc32c_class(callee()->holder()); + Node* table_start = array_element_address(table, intcon(0), T_INT); + + // We assume that range check is done by caller. + // TODO: generate range check (offset+length < src.length) in debug VM. + + // Call the stub. + address stubAddr = StubRoutines::updateBytesCRC32C(); + const char *stubName = "updateBytesCRC32C"; + + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesCRC32C_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + crc, src_start, length, table_start); + Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + +//------------------------------inline_updateDirectByteBufferCRC32C----------------------- +// +// Calculate CRC32C for DirectByteBuffer. +// int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end) +// +bool LibraryCallKit::inline_updateDirectByteBufferCRC32C() { + assert(UseCRC32CIntrinsics, "need CRC32C instruction support"); + assert(callee()->signature()->size() == 5, "updateDirectByteBuffer has 4 parameters and one is long"); + assert(callee()->holder()->is_loaded(), "CRC32C class must be loaded"); + // no receiver since it is a static method + Node* crc = argument(0); // type: int + Node* src = argument(1); // type: long + Node* offset = argument(3); // type: int + Node* end = argument(4); // type: int + + Node* length = _gvn.transform(new SubINode(end, offset)); + + src = ConvL2X(src); // adjust Java long to machine word + Node* base = _gvn.transform(new CastX2PNode(src)); + offset = ConvI2X(offset); + + // 'src_start' points to src array + scaled offset + Node* src_start = basic_plus_adr(top(), base, offset); + + // static final int[] byteTable in class CRC32C + Node* table = get_table_from_crc32c_class(callee()->holder()); + Node* table_start = array_element_address(table, intcon(0), T_INT); + + // Call the stub. + address stubAddr = StubRoutines::updateBytesCRC32C(); + const char *stubName = "updateBytesCRC32C"; + + Node* call = make_runtime_call(RC_LEAF, OptoRuntime::updateBytesCRC32C_Type(), + stubAddr, stubName, TypePtr::BOTTOM, + crc, src_start, length, table_start); + Node* result = _gvn.transform(new ProjNode(call, TypeFunc::Parms)); + set_result(result); + return true; +} + //----------------------------inline_reference_get---------------------------- // public T java.lang.ref.Reference.get(); bool LibraryCallKit::inline_reference_get() { @@ -5571,18 +5684,28 @@ bool LibraryCallKit::inline_reference_get() { Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, - bool is_exact=true, bool is_static=false) { + bool is_exact=true, bool is_static=false, + ciInstanceKlass * fromKls=NULL) { + if (fromKls == NULL) { + const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr(); + assert(tinst != NULL, "obj is null"); + assert(tinst->klass()->is_loaded(), "obj is not loaded"); + assert(!is_exact || tinst->klass_is_exact(), "klass not exact"); + fromKls = tinst->klass()->as_instance_klass(); + } else { + assert(is_static, "only for static field access"); + } + ciField* field = fromKls->get_field_by_name(ciSymbol::make(fieldName), + ciSymbol::make(fieldTypeString), + is_static); - const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr(); - assert(tinst != NULL, "obj is null"); - assert(tinst->klass()->is_loaded(), "obj is not loaded"); - assert(!is_exact || tinst->klass_is_exact(), "klass not exact"); - - ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName), - ciSymbol::make(fieldTypeString), - is_static); - if (field == NULL) return (Node *) NULL; assert (field != NULL, "undefined field"); + if (field == NULL) return (Node *) NULL; + + if (is_static) { + const TypeInstPtr* tip = TypeInstPtr::make(fromKls->java_mirror()); + fromObj = makecon(tip); + } // Next code copied from Parse::do_get_xxx(): diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp index e0a4ee454a3..56bf22b964a 100644 --- a/hotspot/src/share/vm/opto/runtime.cpp +++ b/hotspot/src/share/vm/opto/runtime.cpp @@ -851,6 +851,29 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() { return TypeFunc::make(domain, range); } +/** + * int updateBytesCRC32C(int crc, byte* buf, int len, int* table) + */ +const TypeFunc* OptoRuntime::updateBytesCRC32C_Type() { + // create input type (domain) + int num_args = 4; + int argcnt = num_args; + const Type** fields = TypeTuple::fields(argcnt); + int argp = TypeFunc::Parms; + fields[argp++] = TypeInt::INT; // crc + fields[argp++] = TypePtr::NOTNULL; // buf + fields[argp++] = TypeInt::INT; // len + fields[argp++] = TypePtr::NOTNULL; // table + assert(argp == TypeFunc::Parms+argcnt, "correct decoding"); + const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields); + + // result type needed + fields = TypeTuple::fields(1); + fields[TypeFunc::Parms+0] = TypeInt::INT; // crc result + const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+1, fields); + return TypeFunc::make(domain, range); +} + // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { // create input type (domain) diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp index e412dbc82f8..424bf822b0d 100644 --- a/hotspot/src/share/vm/opto/runtime.hpp +++ b/hotspot/src/share/vm/opto/runtime.hpp @@ -319,6 +319,7 @@ private: static const TypeFunc* ghash_processBlocks_Type(); static const TypeFunc* updateBytesCRC32_Type(); + static const TypeFunc* updateBytesCRC32C_Type(); // leaf on stack replacement interpreter accessor types static const TypeFunc* osr_end_Type(); diff --git a/hotspot/src/share/vm/opto/superword.cpp b/hotspot/src/share/vm/opto/superword.cpp index df06cc2577d..d7f035f7f92 100644 --- a/hotspot/src/share/vm/opto/superword.cpp +++ b/hotspot/src/share/vm/opto/superword.cpp @@ -183,13 +183,20 @@ void SuperWord::unrolling_analysis(CountedLoopNode *cl, int &local_loop_unroll_f break; } + // Ignore nodes with non-primitive type. + BasicType bt; + if (n->is_Mem()) { + bt = n->as_Mem()->memory_type(); + } else { + bt = n->bottom_type()->basic_type(); + } + if (is_java_primitive(bt) == false) { + ignored_loop_nodes[i] = n->_idx; + continue; + } + if (n->is_Mem()) { MemNode* current = n->as_Mem(); - BasicType bt = current->memory_type(); - if (is_java_primitive(bt) == false) { - ignored_loop_nodes[i] = n->_idx; - continue; - } Node* adr = n->in(MemNode::Address); Node* n_ctrl = _phase->get_ctrl(adr); @@ -231,11 +238,12 @@ void SuperWord::unrolling_analysis(CountedLoopNode *cl, int &local_loop_unroll_f BasicType bt; Node* n = lpt()->_body.at(i); - if (n->is_Store()) { + if (n->is_Mem()) { bt = n->as_Mem()->memory_type(); } else { bt = n->bottom_type()->basic_type(); } + if (is_java_primitive(bt) == false) continue; int cur_max_vector = Matcher::max_vector_size(bt); diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp index 5d89e91b6a4..68901fbe84c 100644 --- a/hotspot/src/share/vm/runtime/arguments.cpp +++ b/hotspot/src/share/vm/runtime/arguments.cpp @@ -3753,8 +3753,12 @@ jint Arguments::apply_ergo() { if (TieredCompilation) { set_tiered_flags(); } else { - // Check if the policy is valid. Policies 0 and 1 are valid for non-tiered setup. - if (CompilationPolicyChoice >= 2) { + int max_compilation_policy_choice = 1; +#ifdef COMPILER2 + max_compilation_policy_choice = 2; +#endif + // Check if the policy is valid. + if (CompilationPolicyChoice >= max_compilation_policy_choice) { vm_exit_during_initialization( "Incompatible compilation policy selected", NULL); } diff --git a/hotspot/src/share/vm/runtime/compilationPolicy.cpp b/hotspot/src/share/vm/runtime/compilationPolicy.cpp index db8122b754d..59c37428da4 100644 --- a/hotspot/src/share/vm/runtime/compilationPolicy.cpp +++ b/hotspot/src/share/vm/runtime/compilationPolicy.cpp @@ -512,7 +512,7 @@ void StackWalkCompPolicy::method_invocation_event(methodHandle m, JavaThread* th RegisterMap reg_map(thread, false); javaVFrame* triggerVF = thread->last_java_vframe(®_map); // triggerVF is the frame that triggered its counter - RFrame* first = new InterpretedRFrame(triggerVF->fr(), thread, m); + RFrame* first = new InterpretedRFrame(triggerVF->fr(), thread, m()); if (first->top_method()->code() != NULL) { // called obsolete method/nmethod -- no need to recompile @@ -557,8 +557,8 @@ RFrame* StackWalkCompPolicy::findTopInlinableFrame(GrowableArray* stack if( !next ) // No next frame up the stack? break; // Then compile with current frame - methodHandle m = current->top_method(); - methodHandle next_m = next->top_method(); + Method* m = current->top_method(); + Method* next_m = next->top_method(); if (TraceCompilationPolicy && Verbose) { tty->print("[caller: "); @@ -644,7 +644,7 @@ RFrame* StackWalkCompPolicy::findTopInlinableFrame(GrowableArray* stack if (TraceCompilationPolicy && Verbose) { tty->print("\n\t check caller: "); next_m->print_short_name(tty); - tty->print(" ( interpreted " INTPTR_FORMAT ", size=%d ) ", p2i((address)next_m()), next_m->code_size()); + tty->print(" ( interpreted " INTPTR_FORMAT ", size=%d ) ", p2i((address)next_m), next_m->code_size()); } current = next; diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 5bbefd9ecf2..be8b18e7ee5 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -848,6 +848,9 @@ public: product(bool, UseCRC32Intrinsics, false, \ "use intrinsics for java.util.zip.CRC32") \ \ + product(bool, UseCRC32CIntrinsics, false, \ + "use intrinsics for java.util.zip.CRC32C") \ + \ develop(bool, TraceCallFixup, false, \ "Trace all call fixups") \ \ diff --git a/hotspot/src/share/vm/runtime/rframe.cpp b/hotspot/src/share/vm/runtime/rframe.cpp index f3ef5504b37..1707bf13bb0 100644 --- a/hotspot/src/share/vm/runtime/rframe.cpp +++ b/hotspot/src/share/vm/runtime/rframe.cpp @@ -52,12 +52,12 @@ InterpretedRFrame::InterpretedRFrame(frame fr, JavaThread* thread, RFrame*const : RFrame(fr, thread, callee) { RegisterMap map(thread, false); _vf = javaVFrame::cast(vframe::new_vframe(&_fr, &map, thread)); - _method = methodHandle(thread, _vf->method()); + _method = _vf->method(); assert( _vf->is_interpreted_frame(), "must be interpreted"); init(); } -InterpretedRFrame::InterpretedRFrame(frame fr, JavaThread* thread, methodHandle m) +InterpretedRFrame::InterpretedRFrame(frame fr, JavaThread* thread, Method* m) : RFrame(fr, thread, NULL) { RegisterMap map(thread, false); _vf = javaVFrame::cast(vframe::new_vframe(&_fr, &map, thread)); @@ -140,8 +140,8 @@ void CompiledRFrame::init() { _nm = compiledVFrame::cast(vf)->code(); vf = vf->top(); _vf = javaVFrame::cast(vf); - _method = methodHandle(thread(), CodeCache::find_nmethod(_fr.pc())->method()); - assert(_method(), "should have found a method"); + _method = CodeCache::find_nmethod(_fr.pc())->method(); + assert(_method, "should have found a method"); #ifndef PRODUCT _invocations = _method->compiled_invocation_count(); #endif diff --git a/hotspot/src/share/vm/runtime/rframe.hpp b/hotspot/src/share/vm/runtime/rframe.hpp index e70093b944b..af8fea3943e 100644 --- a/hotspot/src/share/vm/runtime/rframe.hpp +++ b/hotspot/src/share/vm/runtime/rframe.hpp @@ -60,7 +60,7 @@ class RFrame : public ResourceObj { frame fr() const { return _fr; } JavaThread* thread() const { return _thread; } virtual int cost() const = 0; // estimated inlining cost (size) - virtual methodHandle top_method() const = 0; + virtual Method* top_method() const = 0; virtual javaVFrame* top_vframe() const = 0; virtual nmethod* nm() const { ShouldNotCallThis(); return NULL; } @@ -79,7 +79,7 @@ class CompiledRFrame : public RFrame { // frame containing a compiled method protected: nmethod* _nm; javaVFrame* _vf; // top vframe; may be NULL (for most recent frame) - methodHandle _method; // top method + Method* _method; // top method CompiledRFrame(frame fr, JavaThread* thread, RFrame*const callee); void init(); @@ -88,7 +88,7 @@ class CompiledRFrame : public RFrame { // frame containing a compiled method public: CompiledRFrame(frame fr, JavaThread* thread); // for nmethod triggering its counter (callee == NULL) bool is_compiled() const { return true; } - methodHandle top_method() const { return _method; } + Method* top_method() const { return _method; } javaVFrame* top_vframe() const { return _vf; } nmethod* nm() const { return _nm; } int cost() const; @@ -98,16 +98,16 @@ class CompiledRFrame : public RFrame { // frame containing a compiled method class InterpretedRFrame : public RFrame { // interpreter frame protected: javaVFrame* _vf; // may be NULL (for most recent frame) - methodHandle _method; + Method* _method; InterpretedRFrame(frame fr, JavaThread* thread, RFrame*const callee); void init(); friend class RFrame; public: - InterpretedRFrame(frame fr, JavaThread* thread, methodHandle m); // constructor for method triggering its invocation counter + InterpretedRFrame(frame fr, JavaThread* thread, Method* m); // constructor for method triggering its invocation counter bool is_interpreted() const { return true; } - methodHandle top_method() const { return _method; } + Method* top_method() const { return _method; } javaVFrame* top_vframe() const { return _vf; } int cost() const; void print(); diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp index 7e2ff57ecd1..f920d130ea9 100644 --- a/hotspot/src/share/vm/runtime/stubRoutines.cpp +++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp @@ -137,6 +137,8 @@ address StubRoutines::_sha512_implCompressMB = NULL; address StubRoutines::_updateBytesCRC32 = NULL; address StubRoutines::_crc_table_adr = NULL; +address StubRoutines::_updateBytesCRC32C = NULL; + address StubRoutines::_multiplyToLen = NULL; address StubRoutines::_squareToLen = NULL; address StubRoutines::_mulAdd = NULL; diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp index 66723532ed6..22ad6c04d29 100644 --- a/hotspot/src/share/vm/runtime/stubRoutines.hpp +++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp @@ -197,6 +197,8 @@ class StubRoutines: AllStatic { static address _updateBytesCRC32; static address _crc_table_adr; + static address _updateBytesCRC32C; + static address _multiplyToLen; static address _squareToLen; static address _mulAdd; @@ -359,6 +361,8 @@ class StubRoutines: AllStatic { static address updateBytesCRC32() { return _updateBytesCRC32; } static address crc_table_addr() { return _crc_table_adr; } + static address updateBytesCRC32C() { return _updateBytesCRC32C; } + static address multiplyToLen() {return _multiplyToLen; } static address squareToLen() {return _squareToLen; } static address mulAdd() {return _mulAdd; } diff --git a/hotspot/src/share/vm/runtime/vmStructs.cpp b/hotspot/src/share/vm/runtime/vmStructs.cpp index a2943c206b2..d0da0c6e05a 100644 --- a/hotspot/src/share/vm/runtime/vmStructs.cpp +++ b/hotspot/src/share/vm/runtime/vmStructs.cpp @@ -830,6 +830,7 @@ typedef CompactHashtable SymbolCompactHashTable; static_field(StubRoutines, _ghash_processBlocks, address) \ static_field(StubRoutines, _updateBytesCRC32, address) \ static_field(StubRoutines, _crc_table_adr, address) \ + static_field(StubRoutines, _updateBytesCRC32C, address) \ static_field(StubRoutines, _multiplyToLen, address) \ static_field(StubRoutines, _squareToLen, address) \ static_field(StubRoutines, _mulAdd, address) \ diff --git a/hotspot/test/TEST.groups b/hotspot/test/TEST.groups index fa23c56f20b..6d77765c2b1 100644 --- a/hotspot/test/TEST.groups +++ b/hotspot/test/TEST.groups @@ -147,12 +147,16 @@ needs_compact3 = \ gc/survivorAlignment \ gc/TestGCLogRotationViaJcmd.java \ runtime/InternalApi/ThreadCpuTimesDeadlock.java \ + runtime/NMT/JcmdSummaryDiff.java \ + runtime/RedefineTests/RedefineAnnotations.java serviceability/sa/jmap-hashcode/Test8028623.java \ serviceability/threads/TestFalseDeadLock.java \ compiler/codecache/jmx \ compiler/jsr292/RedefineMethodUsedByMultipleMethodHandles.java \ compiler/rangechecks/TestRangeCheckSmearing.java \ - serviceability/dcmd + compiler/whitebox/DeoptimizeMultipleOSRTest.java \ + serviceability/dcmd \ + testlibrary_tests/whitebox/vm_flags # Compact 2 adds full VM tests compact2 = \ diff --git a/hotspot/test/compiler/codecache/jmx/CodeCacheUtils.java b/hotspot/test/compiler/codecache/jmx/CodeCacheUtils.java index 6b3313206cb..0adab36140c 100644 --- a/hotspot/test/compiler/codecache/jmx/CodeCacheUtils.java +++ b/hotspot/test/compiler/codecache/jmx/CodeCacheUtils.java @@ -21,6 +21,7 @@ * questions. */ +import jdk.test.lib.Asserts; import jdk.test.lib.Utils; import java.lang.management.MemoryPoolMXBean; import javax.management.Notification; @@ -80,19 +81,42 @@ public final class CodeCacheUtils { } /** - * A "non-nmethods" code heap is used by interpreter during bytecode - * execution, thus, it can't be predicted if this code heap usage will be - * increased or not. Same goes for 'All'. + * Checks if the usage of the code heap corresponding to 'btype' can be + * predicted at runtime if we disable compilation. The usage of the + * 'NonNMethod' code heap can not be predicted because we generate adapters + * and buffers at runtime. The 'MethodNonProfiled' code heap is also not + * predictable because we may generate compiled versions of method handle + * intrinsics while resolving methods at runtime. Same applies to 'All'. * * @param btype BlobType to be checked * @return boolean value, true if respective code heap is predictable */ public static boolean isCodeHeapPredictable(BlobType btype) { - return btype == BlobType.MethodNonProfiled - || btype == BlobType.MethodProfiled; + return btype == BlobType.MethodProfiled; } - public static void disableCollectionUsageThresholds(){ + /** + * Verifies that 'newValue' is equal to 'oldValue' if usage of the + * corresponding code heap is predictable. Checks the weaker condition + * 'newValue >= oldValue' if usage is not predictable because intermediate + * allocations may happen. + * + * @param btype BlobType of the code heap to be checked + * @param newValue New value to be verified + * @param oldValue Old value to be verified + * @param msg Error message if verification fails + */ + public static void assertEQorGTE(BlobType btype, long newValue, long oldValue, String msg) { + if (CodeCacheUtils.isCodeHeapPredictable(btype)) { + // Usage is predictable, check strong == condition + Asserts.assertEQ(newValue, oldValue, msg); + } else { + // Usage is not predictable, check weaker >= condition + Asserts.assertGTE(newValue, oldValue, msg); + } + } + + public static void disableCollectionUsageThresholds() { BlobType.getAvailable().stream() .map(BlobType::getMemoryPool) .filter(MemoryPoolMXBean::isCollectionUsageThresholdSupported) diff --git a/hotspot/test/compiler/codecache/jmx/GetUsageTest.java b/hotspot/test/compiler/codecache/jmx/GetUsageTest.java index 5eaf5515f34..d657699395c 100644 --- a/hotspot/test/compiler/codecache/jmx/GetUsageTest.java +++ b/hotspot/test/compiler/codecache/jmx/GetUsageTest.java @@ -52,10 +52,8 @@ public class GetUsageTest { public static void main(String[] args) throws Exception { for (BlobType btype : BlobType.getAvailable()) { - if (CodeCacheUtils.isCodeHeapPredictable(btype)) { - for (int allocSize = 10; allocSize < 100000; allocSize *= 10) { - new GetUsageTest(btype, allocSize).runTest(); - } + for (int allocSize = 10; allocSize < 100000; allocSize *= 10) { + new GetUsageTest(btype, allocSize).runTest(); } } } @@ -87,13 +85,15 @@ public class GetUsageTest { for (MemoryPoolMXBean entry : predictableBeans) { long diff = current.get(entry) - initial.get(entry); if (entry.equals(btype.getMemoryPool())) { - Asserts.assertFalse(diff <= 0L || diff > usageUpperEstimate, - String.format("Pool %s usage increase was reported " - + "unexpectedly as increased by %d using " - + "allocation size %d", entry.getName(), - diff, allocateSize)); + if (CodeCacheUtils.isCodeHeapPredictable(btype)) { + Asserts.assertFalse(diff <= 0L || diff > usageUpperEstimate, + String.format("Pool %s usage increase was reported " + + "unexpectedly as increased by %d using " + + "allocation size %d", entry.getName(), + diff, allocateSize)); + } } else { - Asserts.assertEQ(diff, 0L, + CodeCacheUtils.assertEQorGTE(btype, diff, 0L, String.format("Pool %s usage changed unexpectedly while" + " trying to increase: %s using allocation " + "size %d", entry.getName(), diff --git a/hotspot/test/compiler/codecache/jmx/PeakUsageTest.java b/hotspot/test/compiler/codecache/jmx/PeakUsageTest.java index 3c596ba0726..e906b86f032 100644 --- a/hotspot/test/compiler/codecache/jmx/PeakUsageTest.java +++ b/hotspot/test/compiler/codecache/jmx/PeakUsageTest.java @@ -52,9 +52,7 @@ public class PeakUsageTest { public static void main(String[] args) { for (BlobType btype : BlobType.getAvailable()) { - if (CodeCacheUtils.isCodeHeapPredictable(btype)) { - new PeakUsageTest(btype).runTest(); - } + new PeakUsageTest(btype).runTest(); } } @@ -65,7 +63,7 @@ public class PeakUsageTest { CodeCacheUtils.ALLOCATION_SIZE, btype.id); long newPeakUsage = bean.getPeakUsage().getUsed(); try { - Asserts.assertEQ(newPeakUsage, bean.getUsage().getUsed(), + CodeCacheUtils.assertEQorGTE(btype, newPeakUsage, bean.getUsage().getUsed(), "Peak usage does not match usage after allocation for " + bean.getName()); } finally { @@ -73,18 +71,18 @@ public class PeakUsageTest { CodeCacheUtils.WB.freeCodeBlob(addr); } } - Asserts.assertEQ(newPeakUsage, bean.getPeakUsage().getUsed(), + CodeCacheUtils.assertEQorGTE(btype, newPeakUsage, bean.getPeakUsage().getUsed(), "Code cache peak usage has changed after usage decreased for " + bean.getName()); bean.resetPeakUsage(); - Asserts.assertEQ(bean.getPeakUsage().getUsed(), + CodeCacheUtils.assertEQorGTE(btype, bean.getPeakUsage().getUsed(), bean.getUsage().getUsed(), "Code cache peak usage is not equal to usage after reset for " + bean.getName()); long addr2 = CodeCacheUtils.WB.allocateCodeBlob( CodeCacheUtils.ALLOCATION_SIZE, btype.id); try { - Asserts.assertEQ(bean.getPeakUsage().getUsed(), + CodeCacheUtils.assertEQorGTE(btype, bean.getPeakUsage().getUsed(), bean.getUsage().getUsed(), "Code cache peak usage is not equal to usage after fresh " + "allocation for " + bean.getName()); diff --git a/hotspot/test/compiler/codecache/jmx/PoolsIndependenceTest.java b/hotspot/test/compiler/codecache/jmx/PoolsIndependenceTest.java index 5c7559346c9..ee0fce1106c 100644 --- a/hotspot/test/compiler/codecache/jmx/PoolsIndependenceTest.java +++ b/hotspot/test/compiler/codecache/jmx/PoolsIndependenceTest.java @@ -97,13 +97,11 @@ public class PoolsIndependenceTest implements NotificationListener { return false; }); for (BlobType bt : BlobType.getAvailable()) { - if (CodeCacheUtils.isCodeHeapPredictable(bt)) { - int expectedNotificationsAmount = bt.equals(btype) ? 1 : 0; - Asserts.assertEQ(counters.get(bt.getMemoryPool().getName()).get(), - expectedNotificationsAmount, String.format("Unexpected " - + "amount of notifications for pool: %s", - bt.getMemoryPool().getName())); - } + int expectedNotificationsAmount = bt.equals(btype) ? 1 : 0; + CodeCacheUtils.assertEQorGTE(btype, counters.get(bt.getMemoryPool().getName()).get(), + expectedNotificationsAmount, String.format("Unexpected " + + "amount of notifications for pool: %s", + bt.getMemoryPool().getName())); } try { ((NotificationEmitter) ManagementFactory.getMemoryMXBean()). diff --git a/hotspot/test/compiler/codecache/jmx/ThresholdNotificationsTest.java b/hotspot/test/compiler/codecache/jmx/ThresholdNotificationsTest.java index 65793eac327..c5b9223ac10 100644 --- a/hotspot/test/compiler/codecache/jmx/ThresholdNotificationsTest.java +++ b/hotspot/test/compiler/codecache/jmx/ThresholdNotificationsTest.java @@ -54,9 +54,7 @@ public class ThresholdNotificationsTest implements NotificationListener { public static void main(String[] args) { for (BlobType bt : BlobType.getAvailable()) { - if (CodeCacheUtils.isCodeHeapPredictable(bt)) { - new ThresholdNotificationsTest(bt).runTest(); - } + new ThresholdNotificationsTest(bt).runTest(); } } @@ -92,7 +90,9 @@ public class ThresholdNotificationsTest implements NotificationListener { } Asserts.assertTrue( Utils.waitForCondition( - () -> counter == iterationsCount, WAIT_TIME), + () -> (CodeCacheUtils.isCodeHeapPredictable(btype) ? + (counter == iterationsCount) : (counter >= iterationsCount)), + WAIT_TIME), "Couldn't receive expected notifications count"); try { ((NotificationEmitter) ManagementFactory.getMemoryMXBean()). diff --git a/hotspot/test/compiler/codecache/jmx/UsageThresholdExceededTest.java b/hotspot/test/compiler/codecache/jmx/UsageThresholdExceededTest.java index 4ebbadc1f7b..a699134c6cb 100644 --- a/hotspot/test/compiler/codecache/jmx/UsageThresholdExceededTest.java +++ b/hotspot/test/compiler/codecache/jmx/UsageThresholdExceededTest.java @@ -51,13 +51,9 @@ public class UsageThresholdExceededTest { } public static void main(String[] args) { - int iterationsCount = - Integer.getInteger("jdk.test.lib.iterations", 1); + int iterationsCount = Integer.getInteger("jdk.test.lib.iterations", 1); for (BlobType btype : BlobType.getAvailable()) { - if (CodeCacheUtils.isCodeHeapPredictable(btype)) { - new UsageThresholdExceededTest(btype, iterationsCount) - .runTest(); - } + new UsageThresholdExceededTest(btype, iterationsCount).runTest(); } } @@ -67,9 +63,8 @@ public class UsageThresholdExceededTest { for (int i = 0; i < iterations; i++) { CodeCacheUtils.hitUsageThreshold(bean, btype); } - Asserts.assertEQ(bean.getUsageThresholdCount(), oldValue + iterations, + CodeCacheUtils.assertEQorGTE(btype, bean.getUsageThresholdCount(), oldValue + iterations, "Unexpected threshold usage count"); - System.out.printf("INFO: Scenario finished successfully for %s%n", - bean.getName()); + System.out.printf("INFO: Scenario finished successfully for %s%n", bean.getName()); } } diff --git a/hotspot/test/compiler/codecache/jmx/UsageThresholdIncreasedTest.java b/hotspot/test/compiler/codecache/jmx/UsageThresholdIncreasedTest.java index ebb5d4b82cd..5fc45184b85 100644 --- a/hotspot/test/compiler/codecache/jmx/UsageThresholdIncreasedTest.java +++ b/hotspot/test/compiler/codecache/jmx/UsageThresholdIncreasedTest.java @@ -27,7 +27,6 @@ import sun.hotspot.code.BlobType; /* * @test UsageThresholdIncreasedTest - * @ignore 8129937 * @library /testlibrary /../../test/lib * @modules java.base/sun.misc * java.management @@ -54,14 +53,12 @@ public class UsageThresholdIncreasedTest { public static void main(String[] args) { for (BlobType btype : BlobType.getAvailable()) { - if (CodeCacheUtils.isCodeHeapPredictable(btype)) { - new UsageThresholdIncreasedTest(btype).runTest(); - } + new UsageThresholdIncreasedTest(btype).runTest(); } } private void checkUsageThresholdCount(MemoryPoolMXBean bean, long count){ - Asserts.assertEQ(bean.getUsageThresholdCount(), count, + CodeCacheUtils.assertEQorGTE(btype, bean.getUsageThresholdCount(), count, String.format("Usage threshold was hit: %d times for %s " + "Threshold value: %d with current usage: %d", bean.getUsageThresholdCount(), bean.getName(), diff --git a/hotspot/test/compiler/codecache/jmx/UsageThresholdNotExceededTest.java b/hotspot/test/compiler/codecache/jmx/UsageThresholdNotExceededTest.java index aabede08001..23036cc1938 100644 --- a/hotspot/test/compiler/codecache/jmx/UsageThresholdNotExceededTest.java +++ b/hotspot/test/compiler/codecache/jmx/UsageThresholdNotExceededTest.java @@ -50,9 +50,7 @@ public class UsageThresholdNotExceededTest { public static void main(String[] args) { for (BlobType btype : BlobType.getAvailable()) { - if (CodeCacheUtils.isCodeHeapPredictable(btype)) { - new UsageThresholdNotExceededTest(btype).runTest(); - } + new UsageThresholdNotExceededTest(btype).runTest(); } } @@ -65,13 +63,11 @@ public class UsageThresholdNotExceededTest { - CodeCacheUtils.getHeaderSize(btype), btype.id); // a gc cycle triggers usage threshold recalculation CodeCacheUtils.WB.fullGC(); - Asserts.assertEQ(bean.getUsageThresholdCount(), initialThresholdCount, - String.format("Usage threshold was hit: %d times for %s. " + CodeCacheUtils.assertEQorGTE(btype, bean.getUsageThresholdCount(), initialThresholdCount, + String.format("Usage threshold was hit: %d times for %s. " + "Threshold value: %d with current usage: %d", bean.getUsageThresholdCount(), bean.getName(), bean.getUsageThreshold(), bean.getUsage().getUsed())); - - System.out.println("INFO: Case finished successfully for " - + bean.getName()); + System.out.println("INFO: Case finished successfully for " + bean.getName()); } } diff --git a/hotspot/test/compiler/intrinsics/crc32c/TestCRC32C.java b/hotspot/test/compiler/intrinsics/crc32c/TestCRC32C.java new file mode 100644 index 00000000000..676f541937c --- /dev/null +++ b/hotspot/test/compiler/intrinsics/crc32c/TestCRC32C.java @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +/** + * @test + * @bug 8073583 + * @summary C2 support for CRC32C on SPARC + * + * @run main/othervm/timeout=600 -Xbatch TestCRC32C -m + */ + +import java.nio.ByteBuffer; +import java.util.zip.Checksum; +import java.util.zip.CRC32C; + +public class TestCRC32C { + public static void main(String[] args) { + int offset = Integer.getInteger("offset", 0); + int msgSize = Integer.getInteger("msgSize", 512); + boolean multi = false; + int iters = 20000; + int warmupIters = 20000; + + if (args.length > 0) { + if (args[0].equals("-m")) { + multi = true; + } else { + iters = Integer.valueOf(args[0]); + } + if (args.length > 1) { + warmupIters = Integer.valueOf(args[1]); + } + } + + if (multi) { + test_multi(warmupIters); + return; + } + + System.out.println(" offset = " + offset); + System.out.println("msgSize = " + msgSize + " bytes"); + System.out.println(" iters = " + iters); + + byte[] b = initializedBytes(msgSize, offset); + + CRC32C crc0 = new CRC32C(); + CRC32C crc1 = new CRC32C(); + CRC32C crc2 = new CRC32C(); + + crc0.update(b, offset, msgSize); + + System.out.println("-------------------------------------------------------"); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + } + + /* measure performance */ + long start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + } + long end = System.nanoTime(); + double total = (double)(end - start)/1e9; // in seconds + double thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("CRC32C.update(byte[]) runtime = " + total + " seconds"); + System.out.println("CRC32C.update(byte[]) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + crc1.reset(); + crc1.update(b, offset, msgSize); + if (!check(crc0, crc1)) break; + } + report("CRCs", crc0, crc1); + + System.out.println("-------------------------------------------------------"); + + ByteBuffer buf = ByteBuffer.allocateDirect(msgSize); + buf.put(b, offset, msgSize); + buf.flip(); + + /* warm up */ + for (int i = 0; i < warmupIters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + } + + /* measure performance */ + start = System.nanoTime(); + for (int i = 0; i < iters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + } + end = System.nanoTime(); + total = (double)(end - start)/1e9; // in seconds + thruput = (double)msgSize*iters/1e6/total; // in MB/s + System.out.println("CRC32C.update(ByteBuffer) runtime = " + total + " seconds"); + System.out.println("CRC32C.update(ByteBuffer) throughput = " + thruput + " MB/s"); + + /* check correctness */ + for (int i = 0; i < iters; i++) { + crc2.reset(); + crc2.update(buf); + buf.rewind(); + if (!check(crc0, crc2)) break; + } + report("CRCs", crc0, crc2); + + System.out.println("-------------------------------------------------------"); + } + + private static void report(String s, Checksum crc0, Checksum crc1) { + System.out.printf("%s: crc0 = %08x, crc1 = %08x\n", + s, crc0.getValue(), crc1.getValue()); + } + + private static boolean check(Checksum crc0, Checksum crc1) { + if (crc0.getValue() != crc1.getValue()) { + System.err.printf("ERROR: crc0 = %08x, crc1 = %08x\n", + crc0.getValue(), crc1.getValue()); + return false; + } + return true; + } + + private static byte[] initializedBytes(int M, int offset) { + byte[] bytes = new byte[M + offset]; + for (int i = 0; i < offset; i++) { + bytes[i] = (byte) i; + } + for (int i = offset; i < bytes.length; i++) { + bytes[i] = (byte) (i - offset); + } + return bytes; + } + + private static void test_multi(int iters) { + int len1 = 8; // the 8B/iteration loop + int len2 = 32; // the 32B/iteration loop + int len3 = 4096; // the 4KB/iteration loop + + byte[] b = initializedBytes(len3*16, 0); + int[] offsets = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, 64, 128, 256, 512 }; + int[] sizes = { 0, 1, 2, 3, 4, 5, 6, 7, + len1, len1+1, len1+2, len1+3, len1+4, len1+5, len1+6, len1+7, + len1*2, len1*2+1, len1*2+3, len1*2+5, len1*2+7, + len2, len2+1, len2+3, len2+5, len2+7, + len2*2, len2*4, len2*8, len2*16, len2*32, len2*64, + len3, len3+1, len3+3, len3+5, len3+7, + len3*2, len3*4, len3*8, + len1+len2, len1+len2+1, len1+len2+3, len1+len2+5, len1+len2+7, + len1+len3, len1+len3+1, len1+len3+3, len1+len3+5, len1+len3+7, + len2+len3, len2+len3+1, len2+len3+3, len2+len3+5, len2+len3+7, + len1+len2+len3, len1+len2+len3+1, len1+len2+len3+3, + len1+len2+len3+5, len1+len2+len3+7, + (len1+len2+len3)*2, (len1+len2+len3)*2+1, (len1+len2+len3)*2+3, + (len1+len2+len3)*2+5, (len1+len2+len3)*2+7, + (len1+len2+len3)*3, (len1+len2+len3)*3-1, (len1+len2+len3)*3-3, + (len1+len2+len3)*3-5, (len1+len2+len3)*3-7 }; + CRC32C[] crc0 = new CRC32C[offsets.length*sizes.length]; + CRC32C[] crc1 = new CRC32C[offsets.length*sizes.length]; + int i, j, k; + + System.out.printf("testing %d cases ...\n", offsets.length*sizes.length); + + /* set the result from interpreter as reference */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + crc0[i*sizes.length + j] = new CRC32C(); + crc1[i*sizes.length + j] = new CRC32C(); + crc0[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + + /* warm up the JIT compiler and get result */ + for (k = 0; k < iters; k++) { + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + crc1[i*sizes.length + j].reset(); + crc1[i*sizes.length + j].update(b, offsets[i], sizes[j]); + } + } + } + + /* check correctness */ + for (i = 0; i < offsets.length; i++) { + for (j = 0; j < sizes.length; j++) { + if (!check(crc0[i*sizes.length + j], crc1[i*sizes.length + j])) { + System.out.printf("offsets[%d] = %d", i, offsets[i]); + System.out.printf("\tsizes[%d] = %d\n", j, sizes[j]); + } + } + } + } +}