8288047: Accelerate Poly1305 on x86_64 using AVX512 instructions
Reviewed-by: sviswanathan, vlivanov
This commit is contained in:
parent
cd6a203a3e
commit
f12710e938
@ -5008,6 +5008,40 @@ assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
|
||||
emit_int16(0x04, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpmadd52luq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
|
||||
evpmadd52luq(dst, k0, src1, src2, false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evpmadd52luq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512ifma(), "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xB4, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpmadd52huq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
|
||||
evpmadd52huq(dst, k0, src1, src2, false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evpmadd52huq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_avx512ifma(), "");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
|
||||
emit_int16((unsigned char)0xB5, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(VM_Version::supports_avx512_vnni(), "must support vnni");
|
||||
@ -5425,6 +5459,42 @@ void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
|
||||
emit_int16(0x6C, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpunpcklqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
|
||||
evpunpcklqdq(dst, k0, src1, src2, false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evpunpcklqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires AVX512F");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x6C, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::evpunpckhqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len) {
|
||||
evpunpckhqdq(dst, k0, src1, src2, false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires AVX512F");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
|
||||
InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
if (merge) {
|
||||
attributes.reset_is_clear_context();
|
||||
}
|
||||
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src1->encoding(), src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16(0x6D, (0xC0 | encode));
|
||||
}
|
||||
|
||||
void Assembler::push(int32_t imm32) {
|
||||
// in 64bits we push 64bits onto the stack but only
|
||||
// take a 32bit immediate
|
||||
@ -5869,6 +5939,18 @@ void Assembler::shrdl(Register dst, Register src, int8_t imm8) {
|
||||
emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void Assembler::shldq(Register dst, Register src, int8_t imm8) {
|
||||
int encode = prefixq_and_encode(src->encoding(), dst->encoding());
|
||||
emit_int32(0x0F, (unsigned char)0xA4, (0xC0 | encode), imm8);
|
||||
}
|
||||
|
||||
void Assembler::shrdq(Register dst, Register src, int8_t imm8) {
|
||||
int encode = prefixq_and_encode(src->encoding(), dst->encoding());
|
||||
emit_int32(0x0F, (unsigned char)0xAC, (0xC0 | encode), imm8);
|
||||
}
|
||||
#endif
|
||||
|
||||
// copies a single word from [esi] to [edi]
|
||||
void Assembler::smovl() {
|
||||
emit_int8((unsigned char)0xA5);
|
||||
@ -7740,11 +7822,12 @@ void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16((unsigned char)0xDB, (0xC0 | encode));
|
||||
void Assembler::evpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
evpandq(dst, k0, nds, src, false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evpandq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||
evpandq(dst, k0, nds, src, false, vector_len);
|
||||
}
|
||||
|
||||
//Variable Shift packed integers logically left.
|
||||
@ -7857,13 +7940,13 @@ void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_l
|
||||
emit_operand(dst, src, 0);
|
||||
}
|
||||
|
||||
void Assembler::vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
|
||||
emit_int16((unsigned char)0xEB, (0xC0 | encode));
|
||||
void Assembler::evporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
|
||||
evporq(dst, k0, nds, src, false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evporq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
|
||||
evporq(dst, k0, nds, src, false, vector_len);
|
||||
}
|
||||
|
||||
void Assembler::evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
@ -8004,7 +8087,8 @@ void Assembler::evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Addres
|
||||
}
|
||||
|
||||
void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(VM_Version::supports_evex(), "requires AVX512F");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
@ -8016,7 +8100,8 @@ void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMReg
|
||||
}
|
||||
|
||||
void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
assert(VM_Version::supports_evex(), "requires AVX512F");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit);
|
||||
@ -8031,7 +8116,8 @@ void Assembler::evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Addres
|
||||
}
|
||||
|
||||
void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(VM_Version::supports_evex(), "requires AVX512F");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_embedded_opmask_register_specifier(mask);
|
||||
@ -8043,7 +8129,8 @@ void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegi
|
||||
}
|
||||
|
||||
void Assembler::evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len) {
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
assert(VM_Version::supports_evex(), "requires AVX512F");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true,/* legacy_mode */ false, /* no_mask_reg */ false,/* uses_vl */ true);
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV,/* input_size_in_bits */ EVEX_32bit);
|
||||
@ -8201,8 +8288,8 @@ void Assembler::vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address
|
||||
}
|
||||
|
||||
void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||
assert(VM_Version::supports_evex(), "requires AVX512F");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires AVX512VL");
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
@ -8211,6 +8298,20 @@ void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegis
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "requires EVEX support");
|
||||
assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
|
||||
assert(dst != xnoreg, "sanity");
|
||||
InstructionMark im(this);
|
||||
InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
|
||||
attributes.set_is_evex_instruction();
|
||||
attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
|
||||
vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
|
||||
emit_int8(0x25);
|
||||
emit_operand(dst, src3, 1);
|
||||
emit_int8(imm8);
|
||||
}
|
||||
|
||||
void Assembler::evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len) {
|
||||
assert(VM_Version::supports_evex(), "");
|
||||
assert(vector_len == AVX_512bit || VM_Version::supports_avx512vl(), "");
|
||||
@ -13452,6 +13553,13 @@ void Assembler::vzeroupper() {
|
||||
emit_copy(code_section(), vzup_code, vzup_len);
|
||||
}
|
||||
|
||||
void Assembler::vzeroall() {
|
||||
assert(VM_Version::supports_avx(), "requires AVX");
|
||||
InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
|
||||
(void)vex_prefix_and_encode(0, 0, 0, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
|
||||
emit_int8(0x77);
|
||||
}
|
||||
|
||||
void Assembler::pushq(Address src) {
|
||||
InstructionMark im(this);
|
||||
emit_int16(get_prefixq(src), (unsigned char)0xFF);
|
||||
|
@ -1891,6 +1891,10 @@ private:
|
||||
void pmaddwd(XMMRegister dst, XMMRegister src);
|
||||
void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void evpmadd52luq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void evpmadd52luq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
|
||||
void evpmadd52huq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void evpmadd52huq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
|
||||
|
||||
// Multiply add accumulate
|
||||
void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
@ -1990,6 +1994,11 @@ private:
|
||||
// Interleave Low Quadwords
|
||||
void punpcklqdq(XMMRegister dst, XMMRegister src);
|
||||
|
||||
void evpunpcklqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void evpunpcklqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
|
||||
void evpunpckhqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
|
||||
void evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
|
||||
|
||||
// Vector sum of absolute difference.
|
||||
void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
|
||||
@ -2092,6 +2101,10 @@ private:
|
||||
void shldl(Register dst, Register src, int8_t imm8);
|
||||
void shrdl(Register dst, Register src);
|
||||
void shrdl(Register dst, Register src, int8_t imm8);
|
||||
#ifdef _LP64
|
||||
void shldq(Register dst, Register src, int8_t imm8);
|
||||
void shrdq(Register dst, Register src, int8_t imm8);
|
||||
#endif
|
||||
|
||||
void shll(Register dst, int imm8);
|
||||
void shll(Register dst);
|
||||
@ -2616,7 +2629,8 @@ private:
|
||||
void pand(XMMRegister dst, XMMRegister src);
|
||||
void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evpandq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
// Andn packed integers
|
||||
void pandn(XMMRegister dst, XMMRegister src);
|
||||
@ -2626,7 +2640,8 @@ private:
|
||||
void por(XMMRegister dst, XMMRegister src);
|
||||
void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
|
||||
void evporq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
|
||||
|
||||
// Xor packed integers
|
||||
void pxor(XMMRegister dst, XMMRegister src);
|
||||
@ -2640,6 +2655,7 @@ private:
|
||||
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
|
||||
void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
|
||||
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
|
||||
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
|
||||
|
||||
// Vector compress/expand instructions.
|
||||
void evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
|
||||
@ -2753,6 +2769,8 @@ private:
|
||||
// runtime code and native libraries.
|
||||
void vzeroupper();
|
||||
|
||||
void vzeroall();
|
||||
|
||||
// Vector double compares
|
||||
void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
|
||||
void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
|
||||
|
@ -5255,7 +5255,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg
|
||||
// Get the reverse bit sequence of lower nibble of each byte.
|
||||
vmovdqu(xtmp1, ExternalAddress(StubRoutines::x86::vector_reverse_bit_lut()), vec_enc, noreg);
|
||||
vbroadcast(T_INT, xtmp2, 0x0F0F0F0F, rtmp, vec_enc);
|
||||
vpandq(dst, xtmp2, src, vec_enc);
|
||||
evpandq(dst, xtmp2, src, vec_enc);
|
||||
vpshufb(dst, xtmp1, dst, vec_enc);
|
||||
vpsllq(dst, dst, 4, vec_enc);
|
||||
|
||||
@ -5266,7 +5266,7 @@ void C2_MacroAssembler::vector_reverse_bit(BasicType bt, XMMRegister dst, XMMReg
|
||||
|
||||
// Perform logical OR operation b/w left shifted reverse bit sequence of lower nibble and
|
||||
// right shifted reverse bit sequence of upper nibble to obtain the reverse bit sequence of each byte.
|
||||
vporq(xtmp2, dst, xtmp2, vec_enc);
|
||||
evporq(xtmp2, dst, xtmp2, vec_enc);
|
||||
vector_reverse_byte(bt, dst, xtmp2, vec_enc);
|
||||
|
||||
} else if(vec_enc == Assembler::AVX_512bit) {
|
||||
@ -5321,11 +5321,11 @@ void C2_MacroAssembler::vector_reverse_bit_gfni(BasicType bt, XMMRegister dst, X
|
||||
void C2_MacroAssembler::vector_swap_nbits(int nbits, int bitmask, XMMRegister dst, XMMRegister src,
|
||||
XMMRegister xtmp1, Register rtmp, int vec_enc) {
|
||||
vbroadcast(T_INT, xtmp1, bitmask, rtmp, vec_enc);
|
||||
vpandq(dst, xtmp1, src, vec_enc);
|
||||
evpandq(dst, xtmp1, src, vec_enc);
|
||||
vpsllq(dst, dst, nbits, vec_enc);
|
||||
vpandn(xtmp1, xtmp1, src, vec_enc);
|
||||
vpsrlq(xtmp1, xtmp1, nbits, vec_enc);
|
||||
vporq(dst, dst, xtmp1, vec_enc);
|
||||
evporq(dst, dst, xtmp1, vec_enc);
|
||||
}
|
||||
|
||||
void C2_MacroAssembler::vector_reverse_byte64(BasicType bt, XMMRegister dst, XMMRegister src, XMMRegister xtmp1,
|
||||
|
@ -1217,6 +1217,19 @@ void MacroAssembler::andptr(Register dst, int32_t imm32) {
|
||||
LP64_ONLY(andq(dst, imm32)) NOT_LP64(andl(dst, imm32));
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
void MacroAssembler::andq(Register dst, AddressLiteral src, Register rscratch) {
|
||||
assert(rscratch != noreg || always_reachable(src), "missing");
|
||||
|
||||
if (reachable(src)) {
|
||||
andq(dst, as_Address(src));
|
||||
} else {
|
||||
lea(rscratch, src);
|
||||
andq(dst, Address(rscratch, 0));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void MacroAssembler::atomic_incl(Address counter_addr) {
|
||||
lock();
|
||||
incrementl(counter_addr);
|
||||
@ -9105,6 +9118,40 @@ void MacroAssembler::evrord(BasicType type, XMMRegister dst, KRegister mask, XMM
|
||||
fatal("Unexpected type argument %s", type2name(type)); break;
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evpandq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
assert(rscratch != noreg || always_reachable(src), "missing");
|
||||
|
||||
if (reachable(src)) {
|
||||
evpandq(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch, src);
|
||||
evpandq(dst, nds, Address(rscratch, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::evporq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch) {
|
||||
assert(rscratch != noreg || always_reachable(src), "missing");
|
||||
|
||||
if (reachable(src)) {
|
||||
evporq(dst, nds, as_Address(src), vector_len);
|
||||
} else {
|
||||
lea(rscratch, src);
|
||||
evporq(dst, nds, Address(rscratch, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch) {
|
||||
assert(rscratch != noreg || always_reachable(src3), "missing");
|
||||
|
||||
if (reachable(src3)) {
|
||||
vpternlogq(dst, imm8, src2, as_Address(src3), vector_len);
|
||||
} else {
|
||||
lea(rscratch, src3);
|
||||
vpternlogq(dst, imm8, src2, Address(rscratch, 0), vector_len);
|
||||
}
|
||||
}
|
||||
|
||||
#if COMPILER2_OR_JVMCI
|
||||
|
||||
void MacroAssembler::fill_masked(BasicType bt, Address dst, XMMRegister xmm, KRegister mask,
|
||||
|
@ -730,6 +730,11 @@ public:
|
||||
void andptr(Register dst, int32_t src);
|
||||
void andptr(Register src1, Register src2) { LP64_ONLY(andq(src1, src2)) NOT_LP64(andl(src1, src2)) ; }
|
||||
|
||||
#ifdef _LP64
|
||||
using Assembler::andq;
|
||||
void andq(Register dst, AddressLiteral src, Register rscratch = noreg);
|
||||
#endif
|
||||
|
||||
void cmp8(AddressLiteral src1, int imm, Register rscratch = noreg);
|
||||
|
||||
// renamed to drag out the casting of address to int32_t/intptr_t
|
||||
@ -1754,6 +1759,15 @@ public:
|
||||
void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vlen_enc);
|
||||
void evrord(BasicType type, XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc);
|
||||
|
||||
using Assembler::evpandq;
|
||||
void evpandq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
|
||||
|
||||
using Assembler::evporq;
|
||||
void evporq(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len, Register rscratch = noreg);
|
||||
|
||||
using Assembler::vpternlogq;
|
||||
void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, AddressLiteral src3, int vector_len, Register rscratch = noreg);
|
||||
|
||||
void alltrue(Register dst, uint masklen, KRegister src1, KRegister src2, KRegister kscratch);
|
||||
void anytrue(Register dst, uint masklen, KRegister src, KRegister kscratch);
|
||||
|
||||
|
@ -2519,7 +2519,7 @@ address StubGenerator::generate_base64_decodeBlock() {
|
||||
// Decode all bytes within our merged input
|
||||
__ evmovdquq(tmp, lookup_lo, Assembler::AVX_512bit);
|
||||
__ evpermt2b(tmp, input_initial_valid_b64, lookup_hi, Assembler::AVX_512bit);
|
||||
__ vporq(mask, tmp, input_initial_valid_b64, Assembler::AVX_512bit);
|
||||
__ evporq(mask, tmp, input_initial_valid_b64, Assembler::AVX_512bit);
|
||||
|
||||
// Check for error. Compare (decoded | initial) to all invalid.
|
||||
// If any bytes have their high-order bit set, then we have an error.
|
||||
@ -3709,6 +3709,10 @@ void StubGenerator::generate_initial() {
|
||||
StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
|
||||
}
|
||||
|
||||
if (UsePolyIntrinsics) {
|
||||
StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
|
||||
}
|
||||
|
||||
if (UseCRC32CIntrinsics) {
|
||||
bool supports_clmul = VM_Version::supports_clmul();
|
||||
StubRoutines::x86::generate_CRC32C_table(supports_clmul);
|
||||
|
@ -387,6 +387,24 @@ class StubGenerator: public StubCodeGenerator {
|
||||
// Ghash single and multi block operations using AVX instructions
|
||||
address generate_avx_ghash_processBlocks();
|
||||
|
||||
// Poly1305 multiblock using IFMA instructions
|
||||
address generate_poly1305_processBlocks();
|
||||
void poly1305_process_blocks_avx512(const Register input, const Register length,
|
||||
const Register A0, const Register A1, const Register A2,
|
||||
const Register R0, const Register R1, const Register C1);
|
||||
void poly1305_multiply_scalar(const Register a0, const Register a1, const Register a2,
|
||||
const Register r0, const Register r1, const Register c1, bool only128,
|
||||
const Register t0, const Register t1, const Register t2,
|
||||
const Register mulql, const Register mulqh);
|
||||
void poly1305_multiply8_avx512(const XMMRegister A0, const XMMRegister A1, const XMMRegister A2,
|
||||
const XMMRegister R0, const XMMRegister R1, const XMMRegister R2, const XMMRegister R1P, const XMMRegister R2P,
|
||||
const XMMRegister P0L, const XMMRegister P0H, const XMMRegister P1L, const XMMRegister P1H, const XMMRegister P2L, const XMMRegister P2H,
|
||||
const XMMRegister TMP, const Register rscratch);
|
||||
void poly1305_limbs(const Register limbs, const Register a0, const Register a1, const Register a2, const Register t0, const Register t1);
|
||||
void poly1305_limbs_out(const Register a0, const Register a1, const Register a2, const Register limbs, const Register t0, const Register t1);
|
||||
void poly1305_limbs_avx512(const XMMRegister D0, const XMMRegister D1,
|
||||
const XMMRegister L0, const XMMRegister L1, const XMMRegister L2, bool padMSG,
|
||||
const XMMRegister TMP, const Register rscratch);
|
||||
|
||||
// BASE64 stubs
|
||||
|
||||
|
1027
src/hotspot/cpu/x86/stubGenerator_x86_64_poly.cpp
Normal file
1027
src/hotspot/cpu/x86/stubGenerator_x86_64_poly.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -33,7 +33,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
|
||||
|
||||
enum platform_dependent_constants {
|
||||
code_size1 = 20000 LP64_ONLY(+10000), // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 35300 LP64_ONLY(+35000) WINDOWS_ONLY(+2048) // simply increase if too small (assembler will crash if too small)
|
||||
code_size2 = 35300 LP64_ONLY(+45000) WINDOWS_ONLY(+2048) // simply increase if too small (assembler will crash if too small)
|
||||
};
|
||||
|
||||
class x86 {
|
||||
|
@ -947,6 +947,7 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_AVX512_VBMI;
|
||||
_features &= ~CPU_AVX512_VBMI2;
|
||||
_features &= ~CPU_AVX512_BITALG;
|
||||
_features &= ~CPU_AVX512_IFMA;
|
||||
}
|
||||
|
||||
if (UseAVX < 2)
|
||||
@ -978,6 +979,7 @@ void VM_Version::get_processor_features() {
|
||||
_features &= ~CPU_FLUSHOPT;
|
||||
_features &= ~CPU_GFNI;
|
||||
_features &= ~CPU_AVX512_BITALG;
|
||||
_features &= ~CPU_AVX512_IFMA;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1330,6 +1332,18 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
#endif // COMPILER2 && ASSERT
|
||||
|
||||
#ifdef _LP64
|
||||
if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) {
|
||||
if (FLAG_IS_DEFAULT(UsePolyIntrinsics)) {
|
||||
FLAG_SET_DEFAULT(UsePolyIntrinsics, true);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
if (UsePolyIntrinsics) {
|
||||
warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
|
||||
FLAG_SET_DEFAULT(UsePolyIntrinsics, false);
|
||||
}
|
||||
|
||||
#ifdef _LP64
|
||||
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
|
||||
UseMultiplyToLenIntrinsic = true;
|
||||
@ -2894,6 +2908,8 @@ uint64_t VM_Version::feature_flags() {
|
||||
result |= CPU_AVX512CD;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
|
||||
result |= CPU_AVX512DQ;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0)
|
||||
result |= CPU_AVX512_IFMA;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
|
||||
result |= CPU_AVX512PF;
|
||||
if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
|
||||
|
@ -223,7 +223,9 @@ class VM_Version : public Abstract_VM_Version {
|
||||
avx512dq : 1,
|
||||
: 1,
|
||||
adx : 1,
|
||||
: 3,
|
||||
: 1,
|
||||
avx512ifma : 1,
|
||||
: 1,
|
||||
clflushopt : 1,
|
||||
clwb : 1,
|
||||
: 1,
|
||||
@ -387,7 +389,8 @@ protected:
|
||||
decl(PKU, "pku", 54) /* Protection keys for user-mode pages */ \
|
||||
decl(OSPKE, "ospke", 55) /* OS enables protection keys */ \
|
||||
decl(CET_IBT, "cet_ibt", 56) /* Control Flow Enforcement - Indirect Branch Tracking */ \
|
||||
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */
|
||||
decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
|
||||
decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/
|
||||
|
||||
#define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
|
||||
CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
|
||||
@ -667,6 +670,7 @@ public:
|
||||
static bool supports_adx() { return (_features & CPU_ADX) != 0; }
|
||||
static bool supports_evex() { return (_features & CPU_AVX512F) != 0; }
|
||||
static bool supports_avx512dq() { return (_features & CPU_AVX512DQ) != 0; }
|
||||
static bool supports_avx512ifma() { return (_features & CPU_AVX512_IFMA) != 0; }
|
||||
static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
|
||||
static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
|
||||
static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
|
||||
|
@ -479,6 +479,9 @@ bool vmIntrinsics::disabled_by_jvm_flags(vmIntrinsics::ID id) {
|
||||
case vmIntrinsics::_base64_decodeBlock:
|
||||
if (!UseBASE64Intrinsics) return true;
|
||||
break;
|
||||
case vmIntrinsics::_poly1305_processBlocks:
|
||||
if (!UsePolyIntrinsics) return true;
|
||||
break;
|
||||
case vmIntrinsics::_updateBytesCRC32C:
|
||||
case vmIntrinsics::_updateDirectByteBufferCRC32C:
|
||||
if (!UseCRC32CIntrinsics) return true;
|
||||
|
@ -519,7 +519,7 @@ class methodHandle;
|
||||
do_class(java_util_Base64_Decoder, "java/util/Base64$Decoder") \
|
||||
do_intrinsic(_base64_decodeBlock, java_util_Base64_Decoder, decodeBlock_name, decodeBlock_signature, F_R) \
|
||||
do_name(decodeBlock_name, "decodeBlock") \
|
||||
do_signature(decodeBlock_signature, "([BII[BIZZ)I") \
|
||||
do_signature(decodeBlock_signature, "([BII[BIZZ)I") \
|
||||
\
|
||||
/* support for com.sun.crypto.provider.GHASH */ \
|
||||
do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH") \
|
||||
@ -527,6 +527,11 @@ class methodHandle;
|
||||
do_name(processBlocks_name, "processBlocks") \
|
||||
do_signature(ghash_processBlocks_signature, "([BII[J[J)V") \
|
||||
\
|
||||
/* support for com.sun.crypto.provider.Poly1305 */ \
|
||||
do_class(com_sun_crypto_provider_Poly1305, "com/sun/crypto/provider/Poly1305") \
|
||||
do_intrinsic(_poly1305_processBlocks, com_sun_crypto_provider_Poly1305, processMultipleBlocks_name, ghash_processBlocks_signature, F_R) \
|
||||
do_name(processMultipleBlocks_name, "processMultipleBlocks") \
|
||||
\
|
||||
/* support for java.util.zip */ \
|
||||
do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \
|
||||
do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \
|
||||
|
@ -739,6 +739,7 @@ bool C2Compiler::is_intrinsic_supported(const methodHandle& method, bool is_virt
|
||||
case vmIntrinsics::_ghash_processBlocks:
|
||||
case vmIntrinsics::_base64_encodeBlock:
|
||||
case vmIntrinsics::_base64_decodeBlock:
|
||||
case vmIntrinsics::_poly1305_processBlocks:
|
||||
case vmIntrinsics::_updateCRC32:
|
||||
case vmIntrinsics::_updateBytesCRC32:
|
||||
case vmIntrinsics::_updateByteBufferCRC32:
|
||||
|
@ -1166,6 +1166,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
||||
strcmp(call->as_CallLeaf()->_name, "electronicCodeBook_decryptAESCrypt") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "counterMode_AESCrypt") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "galoisCounterMode_AESCrypt") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "poly1305_processBlocks") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "encodeBlock") == 0 ||
|
||||
strcmp(call->as_CallLeaf()->_name, "decodeBlock") == 0 ||
|
||||
|
@ -612,6 +612,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
||||
return inline_base64_encodeBlock();
|
||||
case vmIntrinsics::_base64_decodeBlock:
|
||||
return inline_base64_decodeBlock();
|
||||
case vmIntrinsics::_poly1305_processBlocks:
|
||||
return inline_poly1305_processBlocks();
|
||||
|
||||
case vmIntrinsics::_encodeISOArray:
|
||||
case vmIntrinsics::_encodeByteISOArray:
|
||||
@ -6962,6 +6964,42 @@ bool LibraryCallKit::inline_base64_decodeBlock() {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool LibraryCallKit::inline_poly1305_processBlocks() {
|
||||
address stubAddr;
|
||||
const char *stubName;
|
||||
assert(UsePolyIntrinsics, "need Poly intrinsics support");
|
||||
assert(callee()->signature()->size() == 5, "poly1305_processBlocks has %d parameters", callee()->signature()->size());
|
||||
stubAddr = StubRoutines::poly1305_processBlocks();
|
||||
stubName = "poly1305_processBlocks";
|
||||
|
||||
if (!stubAddr) return false;
|
||||
null_check_receiver(); // null-check receiver
|
||||
if (stopped()) return true;
|
||||
|
||||
Node* input = argument(1);
|
||||
Node* input_offset = argument(2);
|
||||
Node* len = argument(3);
|
||||
Node* alimbs = argument(4);
|
||||
Node* rlimbs = argument(5);
|
||||
|
||||
input = must_be_not_null(input, true);
|
||||
alimbs = must_be_not_null(alimbs, true);
|
||||
rlimbs = must_be_not_null(rlimbs, true);
|
||||
|
||||
Node* input_start = array_element_address(input, input_offset, T_BYTE);
|
||||
assert(input_start, "input array is NULL");
|
||||
Node* acc_start = array_element_address(alimbs, intcon(0), T_LONG);
|
||||
assert(acc_start, "acc array is NULL");
|
||||
Node* r_start = array_element_address(rlimbs, intcon(0), T_LONG);
|
||||
assert(r_start, "r array is NULL");
|
||||
|
||||
Node* call = make_runtime_call(RC_LEAF | RC_NO_FP,
|
||||
OptoRuntime::poly1305_processBlocks_Type(),
|
||||
stubAddr, stubName, TypePtr::BOTTOM,
|
||||
input_start, len, acc_start, r_start);
|
||||
return true;
|
||||
}
|
||||
|
||||
//------------------------------inline_digestBase_implCompress-----------------------
|
||||
//
|
||||
// Calculate MD5 for single-block byte[] array.
|
||||
|
@ -293,6 +293,7 @@ class LibraryCallKit : public GraphKit {
|
||||
bool inline_ghash_processBlocks();
|
||||
bool inline_base64_encodeBlock();
|
||||
bool inline_base64_decodeBlock();
|
||||
bool inline_poly1305_processBlocks();
|
||||
bool inline_digestBase_implCompress(vmIntrinsics::ID id);
|
||||
bool inline_digestBase_implCompressMB(int predicate);
|
||||
bool inline_digestBase_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass,
|
||||
|
@ -1266,6 +1266,26 @@ const TypeFunc* OptoRuntime::base64_decodeBlock_Type() {
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
// Poly1305 processMultipleBlocks function
|
||||
const TypeFunc* OptoRuntime::poly1305_processBlocks_Type() {
|
||||
int argcnt = 4;
|
||||
|
||||
const Type** fields = TypeTuple::fields(argcnt);
|
||||
int argp = TypeFunc::Parms;
|
||||
fields[argp++] = TypePtr::NOTNULL; // input array
|
||||
fields[argp++] = TypeInt::INT; // input length
|
||||
fields[argp++] = TypePtr::NOTNULL; // accumulator array
|
||||
fields[argp++] = TypePtr::NOTNULL; // r array
|
||||
assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
|
||||
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||
|
||||
// result type needed
|
||||
fields = TypeTuple::fields(1);
|
||||
fields[TypeFunc::Parms + 0] = NULL; // void
|
||||
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
|
||||
return TypeFunc::make(domain, range);
|
||||
}
|
||||
|
||||
//------------- Interpreter state access for on stack replacement
|
||||
const TypeFunc* OptoRuntime::osr_end_Type() {
|
||||
// create input type (domain)
|
||||
|
@ -280,6 +280,7 @@ private:
|
||||
static const TypeFunc* ghash_processBlocks_Type();
|
||||
static const TypeFunc* base64_encodeBlock_Type();
|
||||
static const TypeFunc* base64_decodeBlock_Type();
|
||||
static const TypeFunc* poly1305_processBlocks_Type();
|
||||
|
||||
static const TypeFunc* updateBytesCRC32_Type();
|
||||
static const TypeFunc* updateBytesCRC32C_Type();
|
||||
|
@ -238,6 +238,9 @@ const int ObjectAlignmentInBytes = 8;
|
||||
product(bool, UseBASE64Intrinsics, false, \
|
||||
"Use intrinsics for java.util.Base64") \
|
||||
\
|
||||
product(bool, UsePolyIntrinsics, false, DIAGNOSTIC, \
|
||||
"Use intrinsics for sun.security.util.math.intpoly") \
|
||||
\
|
||||
product(size_t, LargePageSizeInBytes, 0, \
|
||||
"Maximum large page size used (0 will use the default large " \
|
||||
"page size for the environment as the maximum)") \
|
||||
|
@ -130,6 +130,7 @@ address StubRoutines::_galoisCounterMode_AESCrypt = NULL;
|
||||
address StubRoutines::_ghash_processBlocks = NULL;
|
||||
address StubRoutines::_base64_encodeBlock = NULL;
|
||||
address StubRoutines::_base64_decodeBlock = NULL;
|
||||
address StubRoutines::_poly1305_processBlocks = NULL;
|
||||
|
||||
address StubRoutines::_md5_implCompress = NULL;
|
||||
address StubRoutines::_md5_implCompressMB = NULL;
|
||||
|
@ -211,6 +211,7 @@ class StubRoutines: AllStatic {
|
||||
static address _ghash_processBlocks;
|
||||
static address _base64_encodeBlock;
|
||||
static address _base64_decodeBlock;
|
||||
static address _poly1305_processBlocks;
|
||||
|
||||
static address _md5_implCompress;
|
||||
static address _md5_implCompressMB;
|
||||
@ -384,6 +385,7 @@ class StubRoutines: AllStatic {
|
||||
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
|
||||
static address electronicCodeBook_encryptAESCrypt() { return _electronicCodeBook_encryptAESCrypt; }
|
||||
static address electronicCodeBook_decryptAESCrypt() { return _electronicCodeBook_decryptAESCrypt; }
|
||||
static address poly1305_processBlocks() { return _poly1305_processBlocks; }
|
||||
static address counterMode_AESCrypt() { return _counterMode_AESCrypt; }
|
||||
static address ghash_processBlocks() { return _ghash_processBlocks; }
|
||||
static address base64_encodeBlock() { return _base64_encodeBlock; }
|
||||
|
@ -544,6 +544,7 @@
|
||||
static_field(StubRoutines, _ghash_processBlocks, address) \
|
||||
static_field(StubRoutines, _base64_encodeBlock, address) \
|
||||
static_field(StubRoutines, _base64_decodeBlock, address) \
|
||||
static_field(StubRoutines, _poly1305_processBlocks, address) \
|
||||
static_field(StubRoutines, _updateBytesCRC32, address) \
|
||||
static_field(StubRoutines, _crc_table_adr, address) \
|
||||
static_field(StubRoutines, _crc32c_table_addr, address) \
|
||||
|
@ -34,6 +34,8 @@ import java.util.Objects;
|
||||
|
||||
import sun.security.util.math.*;
|
||||
import sun.security.util.math.intpoly.*;
|
||||
import jdk.internal.vm.annotation.IntrinsicCandidate;
|
||||
import jdk.internal.vm.annotation.ForceInline;
|
||||
|
||||
/**
|
||||
* This class represents the Poly1305 function defined in RFC 7539.
|
||||
@ -59,8 +61,10 @@ final class Poly1305 {
|
||||
private IntegerModuloP s;
|
||||
private MutableIntegerModuloP a;
|
||||
private final MutableIntegerModuloP n = ipl1305.get1().mutable();
|
||||
private final boolean checkWeakKey;
|
||||
|
||||
Poly1305() { }
|
||||
Poly1305() { this(true); }
|
||||
Poly1305(boolean checkKey) { checkWeakKey = checkKey; }
|
||||
|
||||
/**
|
||||
* Initialize the Poly1305 object
|
||||
@ -165,11 +169,15 @@ final class Poly1305 {
|
||||
blockOffset = 0;
|
||||
}
|
||||
}
|
||||
while (len >= BLOCK_LENGTH) {
|
||||
processBlock(input, offset, BLOCK_LENGTH);
|
||||
offset += BLOCK_LENGTH;
|
||||
len -= BLOCK_LENGTH;
|
||||
}
|
||||
|
||||
int blockMultipleLength = len & (~(BLOCK_LENGTH-1));
|
||||
long[] aLimbs = a.getLimbs();
|
||||
long[] rLimbs = r.getLimbs();
|
||||
processMultipleBlocksCheck(input, offset, blockMultipleLength, aLimbs, rLimbs);
|
||||
processMultipleBlocks(input, offset, blockMultipleLength, aLimbs, rLimbs);
|
||||
offset += blockMultipleLength;
|
||||
len -= blockMultipleLength;
|
||||
|
||||
if (len > 0) { // and len < BLOCK_LENGTH
|
||||
System.arraycopy(input, offset, block, 0, len);
|
||||
blockOffset = len;
|
||||
@ -235,12 +243,35 @@ final class Poly1305 {
|
||||
a.setProduct(r); // a = (a * r) % p
|
||||
}
|
||||
|
||||
// This is an intrinsified method. The unused parameters aLimbs and rLimbs are used by the intrinsic.
|
||||
// They correspond to this.a and this.r respectively
|
||||
@ForceInline
|
||||
@IntrinsicCandidate
|
||||
private void processMultipleBlocks(byte[] input, int offset, int length, long[] aLimbs, long[] rLimbs) {
|
||||
while (length >= BLOCK_LENGTH) {
|
||||
processBlock(input, offset, BLOCK_LENGTH);
|
||||
offset += BLOCK_LENGTH;
|
||||
length -= BLOCK_LENGTH;
|
||||
}
|
||||
}
|
||||
|
||||
private static void processMultipleBlocksCheck(byte[] input, int offset, int length, long[] aLimbs, long[] rLimbs) {
|
||||
Objects.checkFromIndexSize(offset, length, input.length);
|
||||
final int numLimbs = 5; // Intrinsic expects exactly 5 limbs
|
||||
if (aLimbs.length != numLimbs) {
|
||||
throw new RuntimeException("invalid accumulator length: " + aLimbs.length);
|
||||
}
|
||||
if (rLimbs.length != numLimbs) {
|
||||
throw new RuntimeException("invalid R length: " + rLimbs.length);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Partition the authentication key into the R and S components, clamp
|
||||
* the R value, and instantiate IntegerModuloP objects to R and S's
|
||||
* numeric values.
|
||||
*/
|
||||
private void setRSVals() {
|
||||
private void setRSVals() throws InvalidKeyException {
|
||||
// Clamp the bytes in the "r" half of the key.
|
||||
keyBytes[3] &= 15;
|
||||
keyBytes[7] &= 15;
|
||||
@ -250,6 +281,24 @@ final class Poly1305 {
|
||||
keyBytes[8] &= (byte)252;
|
||||
keyBytes[12] &= (byte)252;
|
||||
|
||||
if (checkWeakKey) {
|
||||
byte keyIsZero = 0;
|
||||
for (int i = 0; i < RS_LENGTH; i++) {
|
||||
keyIsZero |= keyBytes[i];
|
||||
}
|
||||
if (keyIsZero == 0) {
|
||||
throw new InvalidKeyException("R is set to zero");
|
||||
}
|
||||
|
||||
keyIsZero = 0;
|
||||
for (int i = RS_LENGTH; i < 2*RS_LENGTH; i++) {
|
||||
keyIsZero |= keyBytes[i];
|
||||
}
|
||||
if (keyIsZero == 0) {
|
||||
throw new InvalidKeyException("S is set to zero");
|
||||
}
|
||||
}
|
||||
|
||||
// Create IntegerModuloP elements from the r and s values
|
||||
r = ipl1305.getElement(keyBytes, 0, RS_LENGTH, (byte)0);
|
||||
s = ipl1305.getElement(keyBytes, RS_LENGTH, RS_LENGTH, (byte)0);
|
||||
|
@ -153,6 +153,11 @@ public interface IntegerModuloP {
|
||||
*/
|
||||
void asByteArray(byte[] result);
|
||||
|
||||
/**
|
||||
* Break encapsulation, used for IntrinsicCandidate functions
|
||||
*/
|
||||
long[] getLimbs();
|
||||
|
||||
/**
|
||||
* Compute the multiplicative inverse of this field element.
|
||||
*
|
||||
|
@ -626,6 +626,10 @@ public abstract sealed class IntegerPolynomial implements IntegerFieldModuloP
|
||||
}
|
||||
limbsToByteArray(limbs, result);
|
||||
}
|
||||
|
||||
public long[] getLimbs() {
|
||||
return limbs;
|
||||
}
|
||||
}
|
||||
|
||||
protected class MutableElement extends Element
|
||||
|
@ -231,6 +231,7 @@ public class AMD64 extends Architecture {
|
||||
OSPKE,
|
||||
CET_IBT,
|
||||
CET_SS,
|
||||
AVX512_IFMA,
|
||||
}
|
||||
|
||||
private final EnumSet<CPUFeature> features;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -28,3 +28,41 @@
|
||||
* @run main java.base/com.sun.crypto.provider.Poly1305UnitTest
|
||||
* @summary Unit test for com.sun.crypto.provider.Poly1305.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @key randomness
|
||||
* @modules java.base/com.sun.crypto.provider
|
||||
* @run main java.base/com.sun.crypto.provider.Poly1305IntrinsicFuzzTest
|
||||
* @summary Unit test for com.sun.crypto.provider.Poly1305.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @modules java.base/com.sun.crypto.provider
|
||||
* @run main java.base/com.sun.crypto.provider.Poly1305KAT
|
||||
* @summary Unit test for com.sun.crypto.provider.Poly1305.
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @key randomness
|
||||
* @modules java.base/com.sun.crypto.provider
|
||||
* @summary Unit test for IntrinsicCandidate in com.sun.crypto.provider.Poly1305.
|
||||
* @run main/othervm -Xcomp -XX:-TieredCompilation java.base/com.sun.crypto.provider.Poly1305IntrinsicFuzzTest
|
||||
*/
|
||||
|
||||
/*
|
||||
* @test
|
||||
* @modules java.base/com.sun.crypto.provider
|
||||
* @summary Unit test for IntrinsicCandidate in com.sun.crypto.provider.Poly1305.
|
||||
* @run main/othervm -Xcomp -XX:-TieredCompilation java.base/com.sun.crypto.provider.Poly1305KAT
|
||||
*/
|
||||
|
||||
package com.sun.crypto.provider.Cipher.ChaCha20;
|
||||
|
||||
public class Poly1305UnitTestDriver {
|
||||
static public void main(String[] args) {
|
||||
System.out.println("Passed");
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Intel Corporation. All rights reserved.
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.crypto.provider;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
|
||||
import javax.crypto.spec.SecretKeySpec;
|
||||
|
||||
// This test case relies on the fact that single-byte Poly1305.engineUpdate(byte) does not have an intrinsic
|
||||
// In this way we can compare if the intrinsic and pure java produce same result
|
||||
// This test case is NOT entirely deterministic, it uses a random seed for pseudo-random number generator
|
||||
// If a failure occurs, hardcode the seed to make the test case deterministic
|
||||
public class Poly1305IntrinsicFuzzTest {
|
||||
public static void main(String[] args) throws Exception {
|
||||
//Note: it might be useful to increase this number during development of new Poly1305 intrinsics
|
||||
final int repeat = 100;
|
||||
for (int i = 0; i < repeat; i++) {
|
||||
run();
|
||||
}
|
||||
System.out.println("Fuzz Success");
|
||||
}
|
||||
|
||||
public static void run() throws Exception {
|
||||
java.util.Random rnd = new java.util.Random();
|
||||
long seed = rnd.nextLong();
|
||||
rnd.setSeed(seed);
|
||||
|
||||
byte[] key = new byte[32];
|
||||
rnd.nextBytes(key);
|
||||
int msgLen = rnd.nextInt(128, 4096); // x86_64 intrinsic requires 256 bytes minimum
|
||||
byte[] message = new byte[msgLen];
|
||||
|
||||
Poly1305 authenticator = new Poly1305();
|
||||
Poly1305 authenticatorSlow = new Poly1305();
|
||||
if (authenticator.engineGetMacLength() != 16) {
|
||||
throw new RuntimeException("The length of Poly1305 MAC must be 16-bytes.");
|
||||
}
|
||||
|
||||
authenticator.engineInit(new SecretKeySpec(key, 0, 32, "Poly1305"), null);
|
||||
authenticatorSlow.engineInit(new SecretKeySpec(key, 0, 32, "Poly1305"), null);
|
||||
|
||||
if (rnd.nextBoolean()) {
|
||||
// Prime just the buffer and/or accumulator (buffer can keep at most 16 bytes from previous engineUpdate)
|
||||
int initDataLen = rnd.nextInt(8, 24);
|
||||
authenticator.engineUpdate(message, 0, initDataLen);
|
||||
slowUpdate(authenticatorSlow, message, 0, initDataLen);
|
||||
}
|
||||
|
||||
if (rnd.nextBoolean()) {
|
||||
// Multiple calls to engineUpdate
|
||||
authenticator.engineUpdate(message, 0, message.length);
|
||||
slowUpdate(authenticatorSlow, message, 0, message.length);
|
||||
}
|
||||
|
||||
authenticator.engineUpdate(message, 0, message.length);
|
||||
slowUpdate(authenticatorSlow, message, 0, message.length);
|
||||
|
||||
byte[] tag = authenticator.engineDoFinal();
|
||||
byte[] tagSlow = authenticatorSlow.engineDoFinal();
|
||||
|
||||
if (!Arrays.equals(tag, tagSlow)) {
|
||||
throw new RuntimeException("[Seed "+seed+"] Tag mismatch: " + Arrays.toString(tag) + " != " + Arrays.toString(tagSlow));
|
||||
}
|
||||
}
|
||||
|
||||
static void slowUpdate(Poly1305 authenticator, byte[] message, int offset, int len) {
|
||||
len = Math.min(message.length, offset + len);
|
||||
for (int i = offset; i < len; i++) {
|
||||
authenticator.engineUpdate(message[i]);
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,199 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Intel Corporation. All rights reserved.
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
package com.sun.crypto.provider;
|
||||
|
||||
import java.util.*;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Arrays;
|
||||
|
||||
import javax.crypto.spec.SecretKeySpec;
|
||||
|
||||
public class Poly1305KAT {
|
||||
public static class TestData {
|
||||
public TestData(String name, String keyStr, String inputStr, String outStr) {
|
||||
HexFormat hex = HexFormat.of();
|
||||
testName = Objects.requireNonNull(name);
|
||||
key = hex.parseHex(Objects.requireNonNull(keyStr));
|
||||
input = hex.parseHex(Objects.requireNonNull(inputStr));
|
||||
expOutput = hex.parseHex(Objects.requireNonNull(outStr));
|
||||
}
|
||||
|
||||
public final String testName;
|
||||
public final byte[] key;
|
||||
public final byte[] input;
|
||||
public final byte[] expOutput;
|
||||
}
|
||||
|
||||
public static final List<TestData> testList = new LinkedList<TestData>() {{
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #1",
|
||||
"0000000000000000000000000000000000000000000000000000000000000000",
|
||||
"0000000000000000000000000000000000000000000000000000000000000000" +
|
||||
"0000000000000000000000000000000000000000000000000000000000000000",
|
||||
"00000000000000000000000000000000"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #2",
|
||||
"0000000000000000000000000000000036e5f6b5c5e06070f0efca96227a863e",
|
||||
"416e79207375626d697373696f6e20746f20746865204945544620696e74656e" +
|
||||
"6465642062792074686520436f6e7472696275746f7220666f72207075626c69" +
|
||||
"636174696f6e20617320616c6c206f722070617274206f6620616e2049455446" +
|
||||
"20496e7465726e65742d4472616674206f722052464320616e6420616e792073" +
|
||||
"746174656d656e74206d6164652077697468696e2074686520636f6e74657874" +
|
||||
"206f6620616e204945544620616374697669747920697320636f6e7369646572" +
|
||||
"656420616e20224945544620436f6e747269627574696f6e222e205375636820" +
|
||||
"73746174656d656e747320696e636c756465206f72616c2073746174656d656e" +
|
||||
"747320696e20494554462073657373696f6e732c2061732077656c6c20617320" +
|
||||
"7772697474656e20616e6420656c656374726f6e696320636f6d6d756e696361" +
|
||||
"74696f6e73206d61646520617420616e792074696d65206f7220706c6163652c" +
|
||||
"207768696368206172652061646472657373656420746f",
|
||||
"36e5f6b5c5e06070f0efca96227a863e"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #3",
|
||||
"36e5f6b5c5e06070f0efca96227a863e00000000000000000000000000000000",
|
||||
"416e79207375626d697373696f6e20746f20746865204945544620696e74656e" +
|
||||
"6465642062792074686520436f6e7472696275746f7220666f72207075626c69" +
|
||||
"636174696f6e20617320616c6c206f722070617274206f6620616e2049455446" +
|
||||
"20496e7465726e65742d4472616674206f722052464320616e6420616e792073" +
|
||||
"746174656d656e74206d6164652077697468696e2074686520636f6e74657874" +
|
||||
"206f6620616e204945544620616374697669747920697320636f6e7369646572" +
|
||||
"656420616e20224945544620436f6e747269627574696f6e222e205375636820" +
|
||||
"73746174656d656e747320696e636c756465206f72616c2073746174656d656e" +
|
||||
"747320696e20494554462073657373696f6e732c2061732077656c6c20617320" +
|
||||
"7772697474656e20616e6420656c656374726f6e696320636f6d6d756e696361" +
|
||||
"74696f6e73206d61646520617420616e792074696d65206f7220706c6163652c" +
|
||||
"207768696368206172652061646472657373656420746f",
|
||||
"f3477e7cd95417af89a6b8794c310cf0"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #4",
|
||||
"1c9240a5eb55d38af333888604f6b5f0473917c1402b80099dca5cbc207075c0",
|
||||
"2754776173206272696c6c69672c20616e642074686520736c6974687920746f" +
|
||||
"7665730a446964206779726520616e642067696d626c6520696e207468652077" +
|
||||
"6162653a0a416c6c206d696d737920776572652074686520626f726f676f7665" +
|
||||
"732c0a416e6420746865206d6f6d65207261746873206f757467726162652e",
|
||||
"4541669a7eaaee61e708dc7cbcc5eb62"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #5: If one uses 130-bit partial reduction, does the code handle the case where partially reducedfinal result is not fully reduced?",
|
||||
"0200000000000000000000000000000000000000000000000000000000000000",
|
||||
"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
|
||||
"03000000000000000000000000000000"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #6: What happens if addition of s overflows modulo 2^128?",
|
||||
"02000000000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
|
||||
"02000000000000000000000000000000",
|
||||
"03000000000000000000000000000000"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #7: What happens if data limb is all ones and there is carry from lower limb?",
|
||||
"0100000000000000000000000000000000000000000000000000000000000000",
|
||||
"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" +
|
||||
"11000000000000000000000000000000",
|
||||
"05000000000000000000000000000000"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #8: What happens if final result from polynomial part is exactly 2^130-5?",
|
||||
"0100000000000000000000000000000000000000000000000000000000000000",
|
||||
"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFBFEFEFEFEFEFEFEFEFEFEFEFEFEFEFE" +
|
||||
"01010101010101010101010101010101",
|
||||
"00000000000000000000000000000000"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #9: What happens if final result from polynomial part is exactly 2^130-6?",
|
||||
"0200000000000000000000000000000000000000000000000000000000000000",
|
||||
"FDFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
|
||||
"FAFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #10: What happens if 5*H+L-type reduction produces 131-bit intermediate result?",
|
||||
"0100000000000000040000000000000000000000000000000000000000000000",
|
||||
"E33594D7505E43B900000000000000003394D7505E4379CD0100000000000000" +
|
||||
"0000000000000000000000000000000001000000000000000000000000000000",
|
||||
"14000000000000005500000000000000"));
|
||||
add(new TestData("RFC 7539 A.3 Test Vector #11: What happens if 5*H+L-type reduction produces 131-bit final result?",
|
||||
"0100000000000000040000000000000000000000000000000000000000000000",
|
||||
"E33594D7505E43B900000000000000003394D7505E4379CD0100000000000000" +
|
||||
"00000000000000000000000000000000",
|
||||
"13000000000000000000000000000000"));
|
||||
}};
|
||||
|
||||
public static void main(String args[]) throws Exception {
|
||||
int testsPassed = 0;
|
||||
int testNumber = 0;
|
||||
|
||||
for (TestData test : testList) {
|
||||
System.out.println("*** Test " + ++testNumber + ": " +
|
||||
test.testName);
|
||||
if (runSingleTest(test)) {
|
||||
testsPassed++;
|
||||
}
|
||||
}
|
||||
System.out.println();
|
||||
|
||||
if (testsPassed != testNumber) {
|
||||
throw new RuntimeException("One or more tests failed. " +
|
||||
"Check output for details");
|
||||
}
|
||||
}
|
||||
|
||||
private static boolean runSingleTest(TestData testData) throws Exception {
|
||||
Poly1305 authenticator = new Poly1305(false);
|
||||
authenticator.engineInit(new SecretKeySpec(testData.key, 0, testData.key.length, "Poly1305"), null);
|
||||
authenticator.engineUpdate(testData.input, 0, testData.input.length);
|
||||
byte[] tag = authenticator.engineDoFinal();
|
||||
if (!Arrays.equals(tag, testData.expOutput)) {
|
||||
System.out.println("ERROR - Output Mismatch!");
|
||||
System.out.println("Expected:\n" +
|
||||
dumpHexBytes(testData.expOutput, testData.expOutput.length, "\n", " "));
|
||||
System.out.println("Actual:\n" +
|
||||
dumpHexBytes(tag, tag.length, "\n", " "));
|
||||
System.out.println();
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Dump the hex bytes of a buffer into string form.
|
||||
*
|
||||
* @param data The array of bytes to dump to stdout.
|
||||
* @param itemsPerLine The number of bytes to display per line
|
||||
* if the {@code lineDelim} character is blank then all bytes
|
||||
* will be printed on a single line.
|
||||
* @param lineDelim The delimiter between lines
|
||||
* @param itemDelim The delimiter between bytes
|
||||
*
|
||||
* @return The hexdump of the byte array
|
||||
*/
|
||||
private static String dumpHexBytes(byte[] data, int itemsPerLine,
|
||||
String lineDelim, String itemDelim) {
|
||||
return dumpHexBytes(ByteBuffer.wrap(data), itemsPerLine, lineDelim,
|
||||
itemDelim);
|
||||
}
|
||||
|
||||
private static String dumpHexBytes(ByteBuffer data, int itemsPerLine,
|
||||
String lineDelim, String itemDelim) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
if (data != null) {
|
||||
data.mark();
|
||||
int i = 0;
|
||||
while (data.remaining() > 0) {
|
||||
if (i % itemsPerLine == 0 && i != 0) {
|
||||
sb.append(lineDelim);
|
||||
}
|
||||
sb.append(String.format("%02X", data.get())).append(itemDelim);
|
||||
i++;
|
||||
}
|
||||
data.reset();
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
@ -65,7 +65,7 @@ public class CPUInfoTest {
|
||||
"avx512_vbmi2", "avx512_vbmi", "rdtscp", "rdpid",
|
||||
"hv", "fsrm", "avx512_bitalg", "gfni",
|
||||
"f16c", "pku", "ospke", "cet_ibt",
|
||||
"cet_ss"
|
||||
"cet_ss", "avx512_ifma"
|
||||
);
|
||||
// @formatter:on
|
||||
// Checkstyle: resume
|
||||
|
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2022, Intel Corporation. All rights reserved.
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.javax.crypto.full;
|
||||
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
|
||||
import java.lang.invoke.MethodHandle;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.reflect.Method;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.security.Key;
|
||||
import java.security.spec.AlgorithmParameterSpec;
|
||||
import javax.crypto.spec.SecretKeySpec;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
|
||||
@Measurement(iterations = 3, time = 10)
|
||||
@Warmup(iterations = 3, time = 10)
|
||||
@Fork(value = 1, jvmArgsAppend = {"--add-opens", "java.base/com.sun.crypto.provider=ALL-UNNAMED"})
|
||||
public class Poly1305DigestBench extends CryptoBase {
|
||||
public static final int SET_SIZE = 128;
|
||||
|
||||
@Param({"64", "256", "1024", "" + 16*1024, "" + 1024*1024})
|
||||
int dataSize;
|
||||
|
||||
private byte[][] data;
|
||||
int index = 0;
|
||||
private static MethodHandle polyEngineInit, polyEngineUpdate, polyEngineFinal;
|
||||
private static Object polyObj;
|
||||
|
||||
static {
|
||||
try {
|
||||
MethodHandles.Lookup lookup = MethodHandles.lookup();
|
||||
Class<?> polyClazz = Class.forName("com.sun.crypto.provider.Poly1305");
|
||||
Constructor<?> constructor = polyClazz.getDeclaredConstructor();
|
||||
constructor.setAccessible(true);
|
||||
polyObj = constructor.newInstance();
|
||||
|
||||
Method m = polyClazz.getDeclaredMethod("engineInit", Key.class, AlgorithmParameterSpec.class);
|
||||
m.setAccessible(true);
|
||||
polyEngineInit = lookup.unreflect(m);
|
||||
|
||||
m = polyClazz.getDeclaredMethod("engineUpdate", byte[].class, int.class, int.class);
|
||||
m.setAccessible(true);
|
||||
polyEngineUpdate = lookup.unreflect(m);
|
||||
|
||||
m = polyClazz.getDeclaredMethod("engineDoFinal");
|
||||
m.setAccessible(true);
|
||||
polyEngineFinal = lookup.unreflect(m);
|
||||
} catch (Throwable ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Setup
|
||||
public void setup() {
|
||||
setupProvider();
|
||||
data = fillRandom(new byte[SET_SIZE][dataSize]);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public byte[] digest() {
|
||||
try {
|
||||
byte[] d = data[index];
|
||||
index = (index +1) % SET_SIZE;
|
||||
polyEngineInit.invoke(polyObj, new SecretKeySpec(d, 0, 32, "Poly1305"), null);
|
||||
polyEngineUpdate.invoke(polyObj, d, 0, d.length);
|
||||
return (byte[])polyEngineFinal.invoke(polyObj);
|
||||
} catch (Throwable ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user