8073108: Use x86 and SPARC CPU instructions for GHASH acceleration
Reviewed-by: kvn, jrose
This commit is contained in:
parent
7c7b91845f
commit
c4c528df14
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2015, Red Hat Inc. All rights reserved.
|
* Copyright (c) 2015, Red Hat Inc. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
@ -190,6 +190,11 @@ void VM_Version::get_processor_features() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UseGHASHIntrinsics) {
|
||||||
|
warning("GHASH intrinsics are not available on this CPU");
|
||||||
|
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
|
if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
|
||||||
UseCRC32Intrinsics = true;
|
UseCRC32Intrinsics = true;
|
||||||
}
|
}
|
||||||
|
@ -176,6 +176,11 @@ void VM_Version::initialize() {
|
|||||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (UseGHASHIntrinsics) {
|
||||||
|
warning("GHASH intrinsics are not available on this CPU");
|
||||||
|
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (UseSHA) {
|
if (UseSHA) {
|
||||||
warning("SHA instructions are not available on this CPU");
|
warning("SHA instructions are not available on this CPU");
|
||||||
FLAG_SET_DEFAULT(UseSHA, false);
|
FLAG_SET_DEFAULT(UseSHA, false);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -129,6 +129,7 @@ class Assembler : public AbstractAssembler {
|
|||||||
flog3_op3 = 0x36,
|
flog3_op3 = 0x36,
|
||||||
edge_op3 = 0x36,
|
edge_op3 = 0x36,
|
||||||
fsrc_op3 = 0x36,
|
fsrc_op3 = 0x36,
|
||||||
|
xmulx_op3 = 0x36,
|
||||||
impdep2_op3 = 0x37,
|
impdep2_op3 = 0x37,
|
||||||
stpartialf_op3 = 0x37,
|
stpartialf_op3 = 0x37,
|
||||||
jmpl_op3 = 0x38,
|
jmpl_op3 = 0x38,
|
||||||
@ -220,6 +221,8 @@ class Assembler : public AbstractAssembler {
|
|||||||
mdtox_opf = 0x110,
|
mdtox_opf = 0x110,
|
||||||
mstouw_opf = 0x111,
|
mstouw_opf = 0x111,
|
||||||
mstosw_opf = 0x113,
|
mstosw_opf = 0x113,
|
||||||
|
xmulx_opf = 0x115,
|
||||||
|
xmulxhi_opf = 0x116,
|
||||||
mxtod_opf = 0x118,
|
mxtod_opf = 0x118,
|
||||||
mwtos_opf = 0x119,
|
mwtos_opf = 0x119,
|
||||||
|
|
||||||
@ -1212,6 +1215,9 @@ public:
|
|||||||
void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
|
void movwtos( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
|
||||||
void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
|
void movxtod( Register s, FloatRegister d ) { vis3_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
|
||||||
|
|
||||||
|
void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); }
|
||||||
|
void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); }
|
||||||
|
|
||||||
// Crypto SHA instructions
|
// Crypto SHA instructions
|
||||||
|
|
||||||
void sha1() { sha1_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
|
void sha1() { sha1_only(); emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
|
||||||
|
@ -4786,6 +4786,130 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Single and multi-block ghash operations */
|
||||||
|
address generate_ghash_processBlocks() {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
Label L_ghash_loop, L_aligned, L_main;
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
Register state = I0;
|
||||||
|
Register subkeyH = I1;
|
||||||
|
Register data = I2;
|
||||||
|
Register len = I3;
|
||||||
|
|
||||||
|
__ save_frame(0);
|
||||||
|
|
||||||
|
__ ldx(state, 0, O0);
|
||||||
|
__ ldx(state, 8, O1);
|
||||||
|
|
||||||
|
// Loop label for multiblock operations
|
||||||
|
__ BIND(L_ghash_loop);
|
||||||
|
|
||||||
|
// Check if 'data' is unaligned
|
||||||
|
__ andcc(data, 7, G1);
|
||||||
|
__ br(Assembler::zero, false, Assembler::pt, L_aligned);
|
||||||
|
__ delayed()->nop();
|
||||||
|
|
||||||
|
Register left_shift = L1;
|
||||||
|
Register right_shift = L2;
|
||||||
|
Register data_ptr = L3;
|
||||||
|
|
||||||
|
// Get left and right shift values in bits
|
||||||
|
__ sll(G1, LogBitsPerByte, left_shift);
|
||||||
|
__ mov(64, right_shift);
|
||||||
|
__ sub(right_shift, left_shift, right_shift);
|
||||||
|
|
||||||
|
// Align to read 'data'
|
||||||
|
__ sub(data, G1, data_ptr);
|
||||||
|
|
||||||
|
// Load first 8 bytes of 'data'
|
||||||
|
__ ldx(data_ptr, 0, O4);
|
||||||
|
__ sllx(O4, left_shift, O4);
|
||||||
|
__ ldx(data_ptr, 8, O5);
|
||||||
|
__ srlx(O5, right_shift, G4);
|
||||||
|
__ bset(G4, O4);
|
||||||
|
|
||||||
|
// Load second 8 bytes of 'data'
|
||||||
|
__ sllx(O5, left_shift, O5);
|
||||||
|
__ ldx(data_ptr, 16, G4);
|
||||||
|
__ srlx(G4, right_shift, G4);
|
||||||
|
__ ba(L_main);
|
||||||
|
__ delayed()->bset(G4, O5);
|
||||||
|
|
||||||
|
// If 'data' is aligned, load normally
|
||||||
|
__ BIND(L_aligned);
|
||||||
|
__ ldx(data, 0, O4);
|
||||||
|
__ ldx(data, 8, O5);
|
||||||
|
|
||||||
|
__ BIND(L_main);
|
||||||
|
__ ldx(subkeyH, 0, O2);
|
||||||
|
__ ldx(subkeyH, 8, O3);
|
||||||
|
|
||||||
|
__ xor3(O0, O4, O0);
|
||||||
|
__ xor3(O1, O5, O1);
|
||||||
|
|
||||||
|
__ xmulxhi(O0, O3, G3);
|
||||||
|
__ xmulx(O0, O2, O5);
|
||||||
|
__ xmulxhi(O1, O2, G4);
|
||||||
|
__ xmulxhi(O1, O3, G5);
|
||||||
|
__ xmulx(O0, O3, G1);
|
||||||
|
__ xmulx(O1, O3, G2);
|
||||||
|
__ xmulx(O1, O2, O3);
|
||||||
|
__ xmulxhi(O0, O2, O4);
|
||||||
|
|
||||||
|
__ mov(0xE1, O0);
|
||||||
|
__ sllx(O0, 56, O0);
|
||||||
|
|
||||||
|
__ xor3(O5, G3, O5);
|
||||||
|
__ xor3(O5, G4, O5);
|
||||||
|
__ xor3(G5, G1, G1);
|
||||||
|
__ xor3(G1, O3, G1);
|
||||||
|
__ srlx(G2, 63, O1);
|
||||||
|
__ srlx(G1, 63, G3);
|
||||||
|
__ sllx(G2, 63, O3);
|
||||||
|
__ sllx(G2, 58, O2);
|
||||||
|
__ xor3(O3, O2, O2);
|
||||||
|
|
||||||
|
__ sllx(G1, 1, G1);
|
||||||
|
__ or3(G1, O1, G1);
|
||||||
|
|
||||||
|
__ xor3(G1, O2, G1);
|
||||||
|
|
||||||
|
__ sllx(G2, 1, G2);
|
||||||
|
|
||||||
|
__ xmulxhi(G1, O0, O1);
|
||||||
|
__ xmulx(G1, O0, O2);
|
||||||
|
__ xmulxhi(G2, O0, O3);
|
||||||
|
__ xmulx(G2, O0, G1);
|
||||||
|
|
||||||
|
__ xor3(O4, O1, O4);
|
||||||
|
__ xor3(O5, O2, O5);
|
||||||
|
__ xor3(O5, O3, O5);
|
||||||
|
|
||||||
|
__ sllx(O4, 1, O2);
|
||||||
|
__ srlx(O5, 63, O3);
|
||||||
|
|
||||||
|
__ or3(O2, O3, O0);
|
||||||
|
|
||||||
|
__ sllx(O5, 1, O1);
|
||||||
|
__ srlx(G1, 63, O2);
|
||||||
|
__ or3(O1, O2, O1);
|
||||||
|
__ xor3(O1, G3, O1);
|
||||||
|
|
||||||
|
__ deccc(len);
|
||||||
|
__ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop);
|
||||||
|
__ delayed()->add(data, 16, data);
|
||||||
|
|
||||||
|
__ stx(O0, I0, 0);
|
||||||
|
__ stx(O1, I0, 8);
|
||||||
|
|
||||||
|
__ ret();
|
||||||
|
__ delayed()->restore();
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
void generate_initial() {
|
void generate_initial() {
|
||||||
// Generates all stubs and initializes the entry points
|
// Generates all stubs and initializes the entry points
|
||||||
|
|
||||||
@ -4859,6 +4983,10 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
|
||||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||||
}
|
}
|
||||||
|
// generate GHASH intrinsics code
|
||||||
|
if (UseGHASHIntrinsics) {
|
||||||
|
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
||||||
|
}
|
||||||
|
|
||||||
// generate SHA1/SHA256/SHA512 intrinsics code
|
// generate SHA1/SHA256/SHA512 intrinsics code
|
||||||
if (UseSHA1Intrinsics) {
|
if (UseSHA1Intrinsics) {
|
||||||
|
@ -300,6 +300,17 @@ void VM_Version::initialize() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GHASH/GCM intrinsics
|
||||||
|
if (has_vis3() && (UseVIS > 2)) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
|
||||||
|
UseGHASHIntrinsics = true;
|
||||||
|
}
|
||||||
|
} else if (UseGHASHIntrinsics) {
|
||||||
|
if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
|
||||||
|
warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled");
|
||||||
|
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
||||||
|
}
|
||||||
|
|
||||||
// SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
|
// SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
|
||||||
if (has_sha1() || has_sha256() || has_sha512()) {
|
if (has_sha1() || has_sha256() || has_sha512()) {
|
||||||
if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
|
if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
|
||||||
|
@ -3095,8 +3095,16 @@ void Assembler::pshuflw(XMMRegister dst, Address src, int mode) {
|
|||||||
void Assembler::psrldq(XMMRegister dst, int shift) {
|
void Assembler::psrldq(XMMRegister dst, int shift) {
|
||||||
// Shift 128 bit value in xmm register by number of bytes.
|
// Shift 128 bit value in xmm register by number of bytes.
|
||||||
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F,
|
int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
|
||||||
false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
|
emit_int8(0x73);
|
||||||
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
|
emit_int8(shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Assembler::pslldq(XMMRegister dst, int shift) {
|
||||||
|
// Shift left 128 bit value in xmm register by number of bytes.
|
||||||
|
NOT_LP64(assert(VM_Version::supports_sse2(), ""));
|
||||||
|
int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
|
||||||
emit_int8(0x73);
|
emit_int8(0x73);
|
||||||
emit_int8((unsigned char)(0xC0 | encode));
|
emit_int8((unsigned char)(0xC0 | encode));
|
||||||
emit_int8(shift);
|
emit_int8(shift);
|
||||||
|
@ -1666,6 +1666,8 @@ private:
|
|||||||
|
|
||||||
// Shift Right by bytes Logical DoubleQuadword Immediate
|
// Shift Right by bytes Logical DoubleQuadword Immediate
|
||||||
void psrldq(XMMRegister dst, int shift);
|
void psrldq(XMMRegister dst, int shift);
|
||||||
|
// Shift Left by bytes Logical DoubleQuadword Immediate
|
||||||
|
void pslldq(XMMRegister dst, int shift);
|
||||||
|
|
||||||
// Logical Compare 128bit
|
// Logical Compare 128bit
|
||||||
void ptest(XMMRegister dst, XMMRegister src);
|
void ptest(XMMRegister dst, XMMRegister src);
|
||||||
|
@ -2727,6 +2727,167 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// byte swap x86 long
|
||||||
|
address generate_ghash_long_swap_mask() {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data(0x0b0a0908, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x0f0e0d0c, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x03020100, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x07060504, relocInfo::none, 0);
|
||||||
|
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// byte swap x86 byte array
|
||||||
|
address generate_ghash_byte_swap_mask() {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data(0x0c0d0e0f, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x08090a0b, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x04050607, relocInfo::none, 0);
|
||||||
|
__ emit_data(0x00010203, relocInfo::none, 0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Single and multi-block ghash operations */
|
||||||
|
address generate_ghash_processBlocks() {
|
||||||
|
assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
Label L_ghash_loop, L_exit;
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
const Register state = rdi;
|
||||||
|
const Register subkeyH = rsi;
|
||||||
|
const Register data = rdx;
|
||||||
|
const Register blocks = rcx;
|
||||||
|
|
||||||
|
const Address state_param(rbp, 8+0);
|
||||||
|
const Address subkeyH_param(rbp, 8+4);
|
||||||
|
const Address data_param(rbp, 8+8);
|
||||||
|
const Address blocks_param(rbp, 8+12);
|
||||||
|
|
||||||
|
const XMMRegister xmm_temp0 = xmm0;
|
||||||
|
const XMMRegister xmm_temp1 = xmm1;
|
||||||
|
const XMMRegister xmm_temp2 = xmm2;
|
||||||
|
const XMMRegister xmm_temp3 = xmm3;
|
||||||
|
const XMMRegister xmm_temp4 = xmm4;
|
||||||
|
const XMMRegister xmm_temp5 = xmm5;
|
||||||
|
const XMMRegister xmm_temp6 = xmm6;
|
||||||
|
const XMMRegister xmm_temp7 = xmm7;
|
||||||
|
|
||||||
|
__ enter();
|
||||||
|
|
||||||
|
__ movptr(state, state_param);
|
||||||
|
__ movptr(subkeyH, subkeyH_param);
|
||||||
|
__ movptr(data, data_param);
|
||||||
|
__ movptr(blocks, blocks_param);
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp0, Address(state, 0));
|
||||||
|
__ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp1, Address(subkeyH, 0));
|
||||||
|
__ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||||
|
|
||||||
|
__ BIND(L_ghash_loop);
|
||||||
|
__ movdqu(xmm_temp2, Address(data, 0));
|
||||||
|
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
|
||||||
|
|
||||||
|
__ pxor(xmm_temp0, xmm_temp2);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Multiply with the hash key
|
||||||
|
//
|
||||||
|
__ movdqu(xmm_temp3, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
|
||||||
|
__ movdqu(xmm_temp4, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
|
||||||
|
__ movdqu(xmm_temp6, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
|
||||||
|
|
||||||
|
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
|
||||||
|
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
|
||||||
|
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
|
||||||
|
__ pxor(xmm_temp3, xmm_temp5);
|
||||||
|
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
|
||||||
|
// of the carry-less multiplication of
|
||||||
|
// xmm0 by xmm1.
|
||||||
|
|
||||||
|
// We shift the result of the multiplication by one bit position
|
||||||
|
// to the left to cope for the fact that the bits are reversed.
|
||||||
|
__ movdqu(xmm_temp7, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp4, xmm_temp6);
|
||||||
|
__ pslld (xmm_temp3, 1);
|
||||||
|
__ pslld(xmm_temp6, 1);
|
||||||
|
__ psrld(xmm_temp7, 31);
|
||||||
|
__ psrld(xmm_temp4, 31);
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp7);
|
||||||
|
__ pslldq(xmm_temp4, 4);
|
||||||
|
__ pslldq(xmm_temp7, 4);
|
||||||
|
__ psrldq(xmm_temp5, 12);
|
||||||
|
__ por(xmm_temp3, xmm_temp7);
|
||||||
|
__ por(xmm_temp6, xmm_temp4);
|
||||||
|
__ por(xmm_temp6, xmm_temp5);
|
||||||
|
|
||||||
|
//
|
||||||
|
// First phase of the reduction
|
||||||
|
//
|
||||||
|
// Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
|
||||||
|
// independently.
|
||||||
|
__ movdqu(xmm_temp7, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp4, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp3);
|
||||||
|
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31
|
||||||
|
__ pslld(xmm_temp4, 30); // packed right shift shifting << 30
|
||||||
|
__ pslld(xmm_temp5, 25); // packed right shift shifting << 25
|
||||||
|
__ pxor(xmm_temp7, xmm_temp4); // xor the shifted versions
|
||||||
|
__ pxor(xmm_temp7, xmm_temp5);
|
||||||
|
__ movdqu(xmm_temp4, xmm_temp7);
|
||||||
|
__ pslldq(xmm_temp7, 12);
|
||||||
|
__ psrldq(xmm_temp4, 4);
|
||||||
|
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
|
||||||
|
|
||||||
|
//
|
||||||
|
// Second phase of the reduction
|
||||||
|
//
|
||||||
|
// Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
|
||||||
|
// shift operations.
|
||||||
|
__ movdqu(xmm_temp2, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp7, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp3);
|
||||||
|
__ psrld(xmm_temp2, 1); // packed left shifting >> 1
|
||||||
|
__ psrld(xmm_temp7, 2); // packed left shifting >> 2
|
||||||
|
__ psrld(xmm_temp5, 7); // packed left shifting >> 7
|
||||||
|
__ pxor(xmm_temp2, xmm_temp7); // xor the shifted versions
|
||||||
|
__ pxor(xmm_temp2, xmm_temp5);
|
||||||
|
__ pxor(xmm_temp2, xmm_temp4);
|
||||||
|
__ pxor(xmm_temp3, xmm_temp2);
|
||||||
|
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
|
||||||
|
|
||||||
|
__ decrement(blocks);
|
||||||
|
__ jcc(Assembler::zero, L_exit);
|
||||||
|
__ movdqu(xmm_temp0, xmm_temp6);
|
||||||
|
__ addptr(data, 16);
|
||||||
|
__ jmp(L_ghash_loop);
|
||||||
|
|
||||||
|
__ BIND(L_exit);
|
||||||
|
// Byte swap 16-byte result
|
||||||
|
__ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||||
|
__ movdqu(Address(state, 0), xmm_temp6); // store the result
|
||||||
|
|
||||||
|
__ leave();
|
||||||
|
__ ret(0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Arguments:
|
* Arguments:
|
||||||
*
|
*
|
||||||
@ -3026,6 +3187,13 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
|
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate GHASH intrinsics code
|
||||||
|
if (UseGHASHIntrinsics) {
|
||||||
|
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||||
|
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
|
||||||
|
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
||||||
|
}
|
||||||
|
|
||||||
// Safefetch stubs.
|
// Safefetch stubs.
|
||||||
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
|
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
|
||||||
&StubRoutines::_safefetch32_fault_pc,
|
&StubRoutines::_safefetch32_fault_pc,
|
||||||
|
@ -3681,6 +3681,175 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
return start;
|
return start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// byte swap x86 long
|
||||||
|
address generate_ghash_long_swap_mask() {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
|
||||||
|
__ emit_data64(0x0706050403020100, relocInfo::none );
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
// byte swap x86 byte array
|
||||||
|
address generate_ghash_byte_swap_mask() {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
|
||||||
|
address start = __ pc();
|
||||||
|
__ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
|
||||||
|
__ emit_data64(0x0001020304050607, relocInfo::none );
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Single and multi-block ghash operations */
|
||||||
|
address generate_ghash_processBlocks() {
|
||||||
|
__ align(CodeEntryAlignment);
|
||||||
|
Label L_ghash_loop, L_exit;
|
||||||
|
StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
|
||||||
|
address start = __ pc();
|
||||||
|
|
||||||
|
const Register state = c_rarg0;
|
||||||
|
const Register subkeyH = c_rarg1;
|
||||||
|
const Register data = c_rarg2;
|
||||||
|
const Register blocks = c_rarg3;
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
const int XMM_REG_LAST = 10;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const XMMRegister xmm_temp0 = xmm0;
|
||||||
|
const XMMRegister xmm_temp1 = xmm1;
|
||||||
|
const XMMRegister xmm_temp2 = xmm2;
|
||||||
|
const XMMRegister xmm_temp3 = xmm3;
|
||||||
|
const XMMRegister xmm_temp4 = xmm4;
|
||||||
|
const XMMRegister xmm_temp5 = xmm5;
|
||||||
|
const XMMRegister xmm_temp6 = xmm6;
|
||||||
|
const XMMRegister xmm_temp7 = xmm7;
|
||||||
|
const XMMRegister xmm_temp8 = xmm8;
|
||||||
|
const XMMRegister xmm_temp9 = xmm9;
|
||||||
|
const XMMRegister xmm_temp10 = xmm10;
|
||||||
|
|
||||||
|
__ enter();
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
// save the xmm registers which must be preserved 6-10
|
||||||
|
__ subptr(rsp, -rsp_after_call_off * wordSize);
|
||||||
|
for (int i = 6; i <= XMM_REG_LAST; i++) {
|
||||||
|
__ movdqu(xmm_save(i), as_XMMRegister(i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp0, Address(state, 0));
|
||||||
|
__ pshufb(xmm_temp0, xmm_temp10);
|
||||||
|
|
||||||
|
|
||||||
|
__ BIND(L_ghash_loop);
|
||||||
|
__ movdqu(xmm_temp2, Address(data, 0));
|
||||||
|
__ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp1, Address(subkeyH, 0));
|
||||||
|
__ pshufb(xmm_temp1, xmm_temp10);
|
||||||
|
|
||||||
|
__ pxor(xmm_temp0, xmm_temp2);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Multiply with the hash key
|
||||||
|
//
|
||||||
|
__ movdqu(xmm_temp3, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp3, xmm_temp1, 0); // xmm3 holds a0*b0
|
||||||
|
__ movdqu(xmm_temp4, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp4, xmm_temp1, 16); // xmm4 holds a0*b1
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp5, xmm_temp1, 1); // xmm5 holds a1*b0
|
||||||
|
__ movdqu(xmm_temp6, xmm_temp0);
|
||||||
|
__ pclmulqdq(xmm_temp6, xmm_temp1, 17); // xmm6 holds a1*b1
|
||||||
|
|
||||||
|
__ pxor(xmm_temp4, xmm_temp5); // xmm4 holds a0*b1 + a1*b0
|
||||||
|
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp4); // move the contents of xmm4 to xmm5
|
||||||
|
__ psrldq(xmm_temp4, 8); // shift by xmm4 64 bits to the right
|
||||||
|
__ pslldq(xmm_temp5, 8); // shift by xmm5 64 bits to the left
|
||||||
|
__ pxor(xmm_temp3, xmm_temp5);
|
||||||
|
__ pxor(xmm_temp6, xmm_temp4); // Register pair <xmm6:xmm3> holds the result
|
||||||
|
// of the carry-less multiplication of
|
||||||
|
// xmm0 by xmm1.
|
||||||
|
|
||||||
|
// We shift the result of the multiplication by one bit position
|
||||||
|
// to the left to cope for the fact that the bits are reversed.
|
||||||
|
__ movdqu(xmm_temp7, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp8, xmm_temp6);
|
||||||
|
__ pslld(xmm_temp3, 1);
|
||||||
|
__ pslld(xmm_temp6, 1);
|
||||||
|
__ psrld(xmm_temp7, 31);
|
||||||
|
__ psrld(xmm_temp8, 31);
|
||||||
|
__ movdqu(xmm_temp9, xmm_temp7);
|
||||||
|
__ pslldq(xmm_temp8, 4);
|
||||||
|
__ pslldq(xmm_temp7, 4);
|
||||||
|
__ psrldq(xmm_temp9, 12);
|
||||||
|
__ por(xmm_temp3, xmm_temp7);
|
||||||
|
__ por(xmm_temp6, xmm_temp8);
|
||||||
|
__ por(xmm_temp6, xmm_temp9);
|
||||||
|
|
||||||
|
//
|
||||||
|
// First phase of the reduction
|
||||||
|
//
|
||||||
|
// Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
|
||||||
|
// independently.
|
||||||
|
__ movdqu(xmm_temp7, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp8, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp9, xmm_temp3);
|
||||||
|
__ pslld(xmm_temp7, 31); // packed right shift shifting << 31
|
||||||
|
__ pslld(xmm_temp8, 30); // packed right shift shifting << 30
|
||||||
|
__ pslld(xmm_temp9, 25); // packed right shift shifting << 25
|
||||||
|
__ pxor(xmm_temp7, xmm_temp8); // xor the shifted versions
|
||||||
|
__ pxor(xmm_temp7, xmm_temp9);
|
||||||
|
__ movdqu(xmm_temp8, xmm_temp7);
|
||||||
|
__ pslldq(xmm_temp7, 12);
|
||||||
|
__ psrldq(xmm_temp8, 4);
|
||||||
|
__ pxor(xmm_temp3, xmm_temp7); // first phase of the reduction complete
|
||||||
|
|
||||||
|
//
|
||||||
|
// Second phase of the reduction
|
||||||
|
//
|
||||||
|
// Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
|
||||||
|
// shift operations.
|
||||||
|
__ movdqu(xmm_temp2, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp4, xmm_temp3);
|
||||||
|
__ movdqu(xmm_temp5, xmm_temp3);
|
||||||
|
__ psrld(xmm_temp2, 1); // packed left shifting >> 1
|
||||||
|
__ psrld(xmm_temp4, 2); // packed left shifting >> 2
|
||||||
|
__ psrld(xmm_temp5, 7); // packed left shifting >> 7
|
||||||
|
__ pxor(xmm_temp2, xmm_temp4); // xor the shifted versions
|
||||||
|
__ pxor(xmm_temp2, xmm_temp5);
|
||||||
|
__ pxor(xmm_temp2, xmm_temp8);
|
||||||
|
__ pxor(xmm_temp3, xmm_temp2);
|
||||||
|
__ pxor(xmm_temp6, xmm_temp3); // the result is in xmm6
|
||||||
|
|
||||||
|
__ decrement(blocks);
|
||||||
|
__ jcc(Assembler::zero, L_exit);
|
||||||
|
__ movdqu(xmm_temp0, xmm_temp6);
|
||||||
|
__ addptr(data, 16);
|
||||||
|
__ jmp(L_ghash_loop);
|
||||||
|
|
||||||
|
__ BIND(L_exit);
|
||||||
|
__ pshufb(xmm_temp6, xmm_temp10); // Byte swap 16-byte result
|
||||||
|
__ movdqu(Address(state, 0), xmm_temp6); // store the result
|
||||||
|
|
||||||
|
#ifdef _WIN64
|
||||||
|
// restore xmm regs belonging to calling function
|
||||||
|
for (int i = 6; i <= XMM_REG_LAST; i++) {
|
||||||
|
__ movdqu(as_XMMRegister(i), xmm_save(i));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
__ leave();
|
||||||
|
__ ret(0);
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Arguments:
|
* Arguments:
|
||||||
*
|
*
|
||||||
@ -4120,6 +4289,13 @@ class StubGenerator: public StubCodeGenerator {
|
|||||||
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generate GHASH intrinsics code
|
||||||
|
if (UseGHASHIntrinsics) {
|
||||||
|
StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
|
||||||
|
StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
|
||||||
|
StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
|
||||||
|
}
|
||||||
|
|
||||||
// Safefetch stubs.
|
// Safefetch stubs.
|
||||||
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
|
generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
|
||||||
&StubRoutines::_safefetch32_fault_pc,
|
&StubRoutines::_safefetch32_fault_pc,
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -33,6 +33,8 @@
|
|||||||
|
|
||||||
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
|
address StubRoutines::x86::_verify_mxcsr_entry = NULL;
|
||||||
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
|
address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
|
||||||
|
address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
|
||||||
|
address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
|
||||||
|
|
||||||
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
uint64_t StubRoutines::x86::_crc_by128_masks[] =
|
||||||
{
|
{
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -36,10 +36,15 @@
|
|||||||
// masks and table for CRC32
|
// masks and table for CRC32
|
||||||
static uint64_t _crc_by128_masks[];
|
static uint64_t _crc_by128_masks[];
|
||||||
static juint _crc_table[];
|
static juint _crc_table[];
|
||||||
|
// swap mask for ghash
|
||||||
|
static address _ghash_long_swap_mask_addr;
|
||||||
|
static address _ghash_byte_swap_mask_addr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
static address verify_mxcsr_entry() { return _verify_mxcsr_entry; }
|
||||||
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
|
||||||
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
static address crc_by128_masks_addr() { return (address)_crc_by128_masks; }
|
||||||
|
static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
|
||||||
|
static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
|
||||||
|
|
||||||
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
#endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
|
||||||
|
@ -677,6 +677,17 @@ void VM_Version::get_processor_features() {
|
|||||||
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GHASH/GCM intrinsics
|
||||||
|
if (UseCLMUL && (UseSSE > 2)) {
|
||||||
|
if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
|
||||||
|
UseGHASHIntrinsics = true;
|
||||||
|
}
|
||||||
|
} else if (UseGHASHIntrinsics) {
|
||||||
|
if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
|
||||||
|
warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
|
||||||
|
FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (UseSHA) {
|
if (UseSHA) {
|
||||||
warning("SHA instructions are not available on this CPU");
|
warning("SHA instructions are not available on this CPU");
|
||||||
FLAG_SET_DEFAULT(UseSHA, false);
|
FLAG_SET_DEFAULT(UseSHA, false);
|
||||||
|
@ -846,6 +846,12 @@
|
|||||||
do_name( implCompressMB_name, "implCompressMultiBlock") \
|
do_name( implCompressMB_name, "implCompressMultiBlock") \
|
||||||
do_signature(implCompressMB_signature, "([BII)I") \
|
do_signature(implCompressMB_signature, "([BII)I") \
|
||||||
\
|
\
|
||||||
|
/* support for com.sun.crypto.provider.GHASH */ \
|
||||||
|
do_class(com_sun_crypto_provider_ghash, "com/sun/crypto/provider/GHASH") \
|
||||||
|
do_intrinsic(_ghash_processBlocks, com_sun_crypto_provider_ghash, processBlocks_name, ghash_processBlocks_signature, F_S) \
|
||||||
|
do_name(processBlocks_name, "processBlocks") \
|
||||||
|
do_signature(ghash_processBlocks_signature, "([BII[J[J)V") \
|
||||||
|
\
|
||||||
/* support for java.util.zip */ \
|
/* support for java.util.zip */ \
|
||||||
do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \
|
do_class(java_util_zip_CRC32, "java/util/zip/CRC32") \
|
||||||
do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \
|
do_intrinsic(_updateCRC32, java_util_zip_CRC32, update_name, int2_int_signature, F_SN) \
|
||||||
|
@ -966,6 +966,7 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
|
|||||||
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0 ||
|
||||||
|
strcmp(call->as_CallLeaf()->_name, "ghash_processBlocks") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "sha1_implCompress") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "sha1_implCompressMB") == 0 ||
|
||||||
strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
|
strcmp(call->as_CallLeaf()->_name, "sha256_implCompress") == 0 ||
|
||||||
|
@ -278,6 +278,7 @@ class LibraryCallKit : public GraphKit {
|
|||||||
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
|
Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
|
||||||
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
|
Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||||
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
|
Node* get_original_key_start_from_aescrypt_object(Node* aescrypt_object);
|
||||||
|
bool inline_ghash_processBlocks();
|
||||||
bool inline_sha_implCompress(vmIntrinsics::ID id);
|
bool inline_sha_implCompress(vmIntrinsics::ID id);
|
||||||
bool inline_digestBase_implCompressMB(int predicate);
|
bool inline_digestBase_implCompressMB(int predicate);
|
||||||
bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
|
bool inline_sha_implCompressMB(Node* digestBaseObj, ciInstanceKlass* instklass_SHA,
|
||||||
@ -528,6 +529,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
|
|||||||
predicates = 3;
|
predicates = 3;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case vmIntrinsics::_ghash_processBlocks:
|
||||||
|
if (!UseGHASHIntrinsics) return NULL;
|
||||||
|
break;
|
||||||
|
|
||||||
case vmIntrinsics::_updateCRC32:
|
case vmIntrinsics::_updateCRC32:
|
||||||
case vmIntrinsics::_updateBytesCRC32:
|
case vmIntrinsics::_updateBytesCRC32:
|
||||||
case vmIntrinsics::_updateByteBufferCRC32:
|
case vmIntrinsics::_updateByteBufferCRC32:
|
||||||
@ -929,6 +934,9 @@ bool LibraryCallKit::try_to_inline(int predicate) {
|
|||||||
case vmIntrinsics::_mulAdd:
|
case vmIntrinsics::_mulAdd:
|
||||||
return inline_mulAdd();
|
return inline_mulAdd();
|
||||||
|
|
||||||
|
case vmIntrinsics::_ghash_processBlocks:
|
||||||
|
return inline_ghash_processBlocks();
|
||||||
|
|
||||||
case vmIntrinsics::_encodeISOArray:
|
case vmIntrinsics::_encodeISOArray:
|
||||||
return inline_encodeISOArray();
|
return inline_encodeISOArray();
|
||||||
|
|
||||||
@ -5858,6 +5866,35 @@ Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypt
|
|||||||
return _gvn.transform(region);
|
return _gvn.transform(region);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//------------------------------inline_ghash_processBlocks
|
||||||
|
bool LibraryCallKit::inline_ghash_processBlocks() {
|
||||||
|
address stubAddr;
|
||||||
|
const char *stubName;
|
||||||
|
assert(UseGHASHIntrinsics, "need GHASH intrinsics support");
|
||||||
|
|
||||||
|
stubAddr = StubRoutines::ghash_processBlocks();
|
||||||
|
stubName = "ghash_processBlocks";
|
||||||
|
|
||||||
|
Node* data = argument(0);
|
||||||
|
Node* offset = argument(1);
|
||||||
|
Node* len = argument(2);
|
||||||
|
Node* state = argument(3);
|
||||||
|
Node* subkeyH = argument(4);
|
||||||
|
|
||||||
|
Node* state_start = array_element_address(state, intcon(0), T_LONG);
|
||||||
|
assert(state_start, "state is NULL");
|
||||||
|
Node* subkeyH_start = array_element_address(subkeyH, intcon(0), T_LONG);
|
||||||
|
assert(subkeyH_start, "subkeyH is NULL");
|
||||||
|
Node* data_start = array_element_address(data, offset, T_BYTE);
|
||||||
|
assert(data_start, "data is NULL");
|
||||||
|
|
||||||
|
Node* ghash = make_runtime_call(RC_LEAF|RC_NO_FP,
|
||||||
|
OptoRuntime::ghash_processBlocks_Type(),
|
||||||
|
stubAddr, stubName, TypePtr::BOTTOM,
|
||||||
|
state_start, subkeyH_start, data_start, len);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------inline_sha_implCompress-----------------------
|
//------------------------------inline_sha_implCompress-----------------------
|
||||||
//
|
//
|
||||||
// Calculate SHA (i.e., SHA-1) for single-block byte[] array.
|
// Calculate SHA (i.e., SHA-1) for single-block byte[] array.
|
||||||
|
@ -987,7 +987,25 @@ const TypeFunc* OptoRuntime::mulAdd_Type() {
|
|||||||
return TypeFunc::make(domain, range);
|
return TypeFunc::make(domain, range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GHASH block processing
|
||||||
|
const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
|
||||||
|
int argcnt = 4;
|
||||||
|
|
||||||
|
const Type** fields = TypeTuple::fields(argcnt);
|
||||||
|
int argp = TypeFunc::Parms;
|
||||||
|
fields[argp++] = TypePtr::NOTNULL; // state
|
||||||
|
fields[argp++] = TypePtr::NOTNULL; // subkeyH
|
||||||
|
fields[argp++] = TypePtr::NOTNULL; // data
|
||||||
|
fields[argp++] = TypeInt::INT; // blocks
|
||||||
|
assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
|
||||||
|
const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
|
||||||
|
|
||||||
|
// result type needed
|
||||||
|
fields = TypeTuple::fields(1);
|
||||||
|
fields[TypeFunc::Parms+0] = NULL; // void
|
||||||
|
const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
|
||||||
|
return TypeFunc::make(domain, range);
|
||||||
|
}
|
||||||
|
|
||||||
//------------- Interpreter state access for on stack replacement
|
//------------- Interpreter state access for on stack replacement
|
||||||
const TypeFunc* OptoRuntime::osr_end_Type() {
|
const TypeFunc* OptoRuntime::osr_end_Type() {
|
||||||
|
@ -316,6 +316,8 @@ private:
|
|||||||
|
|
||||||
static const TypeFunc* mulAdd_Type();
|
static const TypeFunc* mulAdd_Type();
|
||||||
|
|
||||||
|
static const TypeFunc* ghash_processBlocks_Type();
|
||||||
|
|
||||||
static const TypeFunc* updateBytesCRC32_Type();
|
static const TypeFunc* updateBytesCRC32_Type();
|
||||||
|
|
||||||
// leaf on stack replacement interpreter accessor types
|
// leaf on stack replacement interpreter accessor types
|
||||||
|
@ -641,6 +641,9 @@ class CommandLineFlags {
|
|||||||
product(bool, UseSHA, false, \
|
product(bool, UseSHA, false, \
|
||||||
"Control whether SHA instructions can be used on SPARC") \
|
"Control whether SHA instructions can be used on SPARC") \
|
||||||
\
|
\
|
||||||
|
product(bool, UseGHASHIntrinsics, false, \
|
||||||
|
"Use intrinsics for GHASH versions of crypto") \
|
||||||
|
\
|
||||||
product(size_t, LargePageSizeInBytes, 0, \
|
product(size_t, LargePageSizeInBytes, 0, \
|
||||||
"Large page size (0 to let VM choose the page size)") \
|
"Large page size (0 to let VM choose the page size)") \
|
||||||
\
|
\
|
||||||
|
@ -125,6 +125,7 @@ address StubRoutines::_aescrypt_encryptBlock = NULL;
|
|||||||
address StubRoutines::_aescrypt_decryptBlock = NULL;
|
address StubRoutines::_aescrypt_decryptBlock = NULL;
|
||||||
address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
|
address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
|
||||||
address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
|
address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
|
||||||
|
address StubRoutines::_ghash_processBlocks = NULL;
|
||||||
|
|
||||||
address StubRoutines::_sha1_implCompress = NULL;
|
address StubRoutines::_sha1_implCompress = NULL;
|
||||||
address StubRoutines::_sha1_implCompressMB = NULL;
|
address StubRoutines::_sha1_implCompressMB = NULL;
|
||||||
|
@ -185,6 +185,7 @@ class StubRoutines: AllStatic {
|
|||||||
static address _aescrypt_decryptBlock;
|
static address _aescrypt_decryptBlock;
|
||||||
static address _cipherBlockChaining_encryptAESCrypt;
|
static address _cipherBlockChaining_encryptAESCrypt;
|
||||||
static address _cipherBlockChaining_decryptAESCrypt;
|
static address _cipherBlockChaining_decryptAESCrypt;
|
||||||
|
static address _ghash_processBlocks;
|
||||||
|
|
||||||
static address _sha1_implCompress;
|
static address _sha1_implCompress;
|
||||||
static address _sha1_implCompressMB;
|
static address _sha1_implCompressMB;
|
||||||
@ -346,6 +347,7 @@ class StubRoutines: AllStatic {
|
|||||||
static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
|
static address aescrypt_decryptBlock() { return _aescrypt_decryptBlock; }
|
||||||
static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
|
static address cipherBlockChaining_encryptAESCrypt() { return _cipherBlockChaining_encryptAESCrypt; }
|
||||||
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
|
static address cipherBlockChaining_decryptAESCrypt() { return _cipherBlockChaining_decryptAESCrypt; }
|
||||||
|
static address ghash_processBlocks() { return _ghash_processBlocks; }
|
||||||
|
|
||||||
static address sha1_implCompress() { return _sha1_implCompress; }
|
static address sha1_implCompress() { return _sha1_implCompress; }
|
||||||
static address sha1_implCompressMB() { return _sha1_implCompressMB; }
|
static address sha1_implCompressMB() { return _sha1_implCompressMB; }
|
||||||
|
@ -828,6 +828,7 @@ typedef CompactHashtable<Symbol*, char> SymbolCompactHashTable;
|
|||||||
static_field(StubRoutines, _aescrypt_decryptBlock, address) \
|
static_field(StubRoutines, _aescrypt_decryptBlock, address) \
|
||||||
static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \
|
static_field(StubRoutines, _cipherBlockChaining_encryptAESCrypt, address) \
|
||||||
static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \
|
static_field(StubRoutines, _cipherBlockChaining_decryptAESCrypt, address) \
|
||||||
|
static_field(StubRoutines, _ghash_processBlocks, address) \
|
||||||
static_field(StubRoutines, _updateBytesCRC32, address) \
|
static_field(StubRoutines, _updateBytesCRC32, address) \
|
||||||
static_field(StubRoutines, _crc_table_adr, address) \
|
static_field(StubRoutines, _crc_table_adr, address) \
|
||||||
static_field(StubRoutines, _multiplyToLen, address) \
|
static_field(StubRoutines, _multiplyToLen, address) \
|
||||||
|
@ -31,6 +31,7 @@ import java.security.AlgorithmParameters;
|
|||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
import javax.crypto.Cipher;
|
import javax.crypto.Cipher;
|
||||||
import javax.crypto.SecretKey;
|
import javax.crypto.SecretKey;
|
||||||
|
import javax.crypto.spec.GCMParameterSpec;
|
||||||
import javax.crypto.spec.IvParameterSpec;
|
import javax.crypto.spec.IvParameterSpec;
|
||||||
import javax.crypto.spec.SecretKeySpec;
|
import javax.crypto.spec.SecretKeySpec;
|
||||||
|
|
||||||
@ -62,6 +63,10 @@ abstract public class TestAESBase {
|
|||||||
Cipher dCipher;
|
Cipher dCipher;
|
||||||
AlgorithmParameters algParams;
|
AlgorithmParameters algParams;
|
||||||
SecretKey key;
|
SecretKey key;
|
||||||
|
GCMParameterSpec gcm_spec;
|
||||||
|
byte[] aad;
|
||||||
|
int tlen = 12;
|
||||||
|
byte[] iv;
|
||||||
|
|
||||||
static int numThreads = 0;
|
static int numThreads = 0;
|
||||||
int threadId;
|
int threadId;
|
||||||
@ -100,6 +105,12 @@ abstract public class TestAESBase {
|
|||||||
int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
|
int ivLen = (algorithm.equals("AES") ? 16 : algorithm.equals("DES") ? 8 : 0);
|
||||||
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
|
IvParameterSpec initVector = new IvParameterSpec(new byte[ivLen]);
|
||||||
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
|
cipher.init(Cipher.ENCRYPT_MODE, key, initVector);
|
||||||
|
} else if (mode.equals("GCM")) {
|
||||||
|
iv = new byte[64];
|
||||||
|
random.nextBytes(iv);
|
||||||
|
aad = new byte[5];
|
||||||
|
random.nextBytes(aad);
|
||||||
|
gcm_init();
|
||||||
} else {
|
} else {
|
||||||
algParams = cipher.getParameters();
|
algParams = cipher.getParameters();
|
||||||
cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
|
cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
|
||||||
@ -186,4 +197,12 @@ abstract public class TestAESBase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
abstract void childShowCipher();
|
abstract void childShowCipher();
|
||||||
|
|
||||||
|
void gcm_init() throws Exception {
|
||||||
|
tlen = 12;
|
||||||
|
gcm_spec = new GCMParameterSpec(tlen * 8, iv);
|
||||||
|
cipher = Cipher.getInstance(algorithm + "/" + mode + "/" + paddingStr, "SunJCE");
|
||||||
|
cipher.init(Cipher.ENCRYPT_MODE, key, gcm_spec);
|
||||||
|
cipher.update(aad);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2012, 2014, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -32,7 +32,11 @@ public class TestAESEncode extends TestAESBase {
|
|||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
try {
|
try {
|
||||||
if (!noReinit) cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
|
if (mode.equals("GCM")) {
|
||||||
|
gcm_init();
|
||||||
|
} else if (!noReinit) {
|
||||||
|
cipher.init(Cipher.ENCRYPT_MODE, key, algParams);
|
||||||
|
}
|
||||||
encode = new byte[encodeLength];
|
encode = new byte[encodeLength];
|
||||||
if (testingMisalignment) {
|
if (testingMisalignment) {
|
||||||
int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
|
int tempSize = cipher.update(input, encInputOffset, (msgSize - lastChunkSize), encode, encOutputOffset);
|
||||||
|
@ -44,6 +44,13 @@
|
|||||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
|
||||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
|
||||||
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=ECB -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
|
||||||
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM TestAESMain
|
||||||
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 TestAESMain
|
||||||
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencOutputOffset=1 TestAESMain
|
||||||
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DdecOutputOffset=1 TestAESMain
|
||||||
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 TestAESMain
|
||||||
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 TestAESMain
|
||||||
|
* @run main/othervm/timeout=600 -Xbatch -DcheckOutput=true -Dmode=GCM -DencInputOffset=1 -DencOutputOffset=1 -DdecOutputOffset=1 -DpaddingStr=NoPadding -DmsgSize=640 TestAESMain
|
||||||
*
|
*
|
||||||
* @author Tom Deneau
|
* @author Tom Deneau
|
||||||
*/
|
*/
|
||||||
|
Loading…
Reference in New Issue
Block a user