This commit is contained in:
Jesper Wilhelmsson 2017-10-04 21:02:13 +02:00
commit b708f0ddbd
239 changed files with 7625 additions and 2054 deletions

View File

@ -113,6 +113,7 @@ PLATFORM_MODULES += \
jdk.dynalink \
jdk.httpserver \
jdk.incubator.httpclient \
jdk.internal.vm.compiler.management \
jdk.jsobject \
jdk.localedata \
jdk.naming.dns \
@ -215,6 +216,7 @@ endif
ifeq ($(INCLUDE_GRAAL), false)
MODULES_FILTER += jdk.internal.vm.compiler
MODULES_FILTER += jdk.internal.vm.compiler.management
endif
################################################################################

View File

@ -54,15 +54,4 @@ $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java:
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/module/ModuleLoaderMap.java
$(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat: \
$(TOPDIR)/src/java.base/share/classes/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat \
$(VARDEPS_FILE) $(BUILD_TOOLS_JDK)
$(MKDIR) -p $(@D)
$(RM) $@ $@.tmp
$(TOOL_GENCLASSLOADERMAP) -boot $(BOOT_MODULES_LIST) \
-platform $(PLATFORM_MODULES_LIST) -o $@.tmp $<
$(MV) $@.tmp $@
GENSRC_JAVA_BASE += $(SUPPORT_OUTPUTDIR)/gensrc/java.base/jdk/internal/vm/cds/resources/ModuleLoaderMap.dat
################################################################################

View File

@ -47,6 +47,9 @@ endif
ifeq ($(call check-jvm-feature, zero), true)
JVM_CFLAGS_FEATURES += -DZERO -DCC_INTERP -DZERO_LIBARCH='"$(OPENJDK_TARGET_CPU_LEGACY_LIB)"' $(LIBFFI_CFLAGS)
JVM_LIBS_FEATURES += $(LIBFFI_LIBS)
ifeq ($(OPENJDK_TARGET_CPU), sparcv9)
BUILD_LIBJVM_EXTRA_FILES := $(TOPDIR)/src/hotspot/cpu/sparc/memset_with_concurrent_readers_sparc.cpp
endif
endif
ifeq ($(call check-jvm-feature, shark), true)

View File

@ -77,30 +77,22 @@ public class GenModuleLoaderMap {
throw new IllegalArgumentException(source + " not exist");
}
boolean needsQuotes = outfile.toString().contains(".java.tmp");
try (BufferedWriter bw = Files.newBufferedWriter(outfile, StandardCharsets.UTF_8);
PrintWriter writer = new PrintWriter(bw)) {
for (String line : Files.readAllLines(source)) {
if (line.contains("@@BOOT_MODULE_NAMES@@")) {
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules, needsQuotes);
line = patch(line, "@@BOOT_MODULE_NAMES@@", bootModules);
} else if (line.contains("@@PLATFORM_MODULE_NAMES@@")) {
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules, needsQuotes);
line = patch(line, "@@PLATFORM_MODULE_NAMES@@", platformModules);
}
writer.println(line);
}
}
}
private static String patch(String s, String tag, Stream<String> stream, boolean needsQuotes) {
String mns = null;
if (needsQuotes) {
mns = stream.sorted()
.collect(Collectors.joining("\",\n \""));
} else {
mns = stream.sorted()
.collect(Collectors.joining("\n"));
}
private static String patch(String s, String tag, Stream<String> stream) {
String mns = stream.sorted()
.collect(Collectors.joining("\",\n \""));
return s.replace(tag, mns);
}

View File

@ -2840,6 +2840,44 @@ void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Regi
bind(L_done);
}
// Code for BigInteger::mulAdd instrinsic
// out = r0
// in = r1
// offset = r2 (already out.length-offset)
// len = r3
// k = r4
//
// pseudo code from java implementation:
// carry = 0;
// offset = out.length-offset - 1;
// for (int j=len-1; j >= 0; j--) {
// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
// out[offset--] = (int)product;
// carry = product >>> 32;
// }
// return (int)carry;
void MacroAssembler::mul_add(Register out, Register in, Register offset,
Register len, Register k) {
Label LOOP, END;
// pre-loop
cmp(len, zr); // cmp, not cbz/cbnz: to use condition twice => less branches
csel(out, zr, out, Assembler::EQ);
br(Assembler::EQ, END);
add(in, in, len, LSL, 2); // in[j+1] address
add(offset, out, offset, LSL, 2); // out[offset + 1] address
mov(out, zr); // used to keep carry now
BIND(LOOP);
ldrw(rscratch1, Address(pre(in, -4)));
madd(rscratch1, rscratch1, k, out);
ldrw(rscratch2, Address(pre(offset, -4)));
add(rscratch1, rscratch1, rscratch2);
strw(rscratch1, Address(offset));
lsr(out, rscratch1, 32);
subs(len, len, 1);
br(Assembler::NE, LOOP);
BIND(END);
}
/**
* Emits code to update CRC-32 with a byte value according to constants in table
*
@ -3291,6 +3329,7 @@ void MacroAssembler::load_mirror(Register dst, Register method) {
ldr(dst, Address(dst, ConstMethod::constants_offset()));
ldr(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
ldr(dst, Address(dst, mirror_offset));
resolve_oop_handle(dst);
}
void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {

View File

@ -1265,6 +1265,7 @@ public:
void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
Register zlen, Register tmp1, Register tmp2, Register tmp3,
Register tmp4, Register tmp5, Register tmp6, Register tmp7);
void mul_add(Register out, Register in, Register offs, Register len, Register k);
// ISB may be needed because of a safepoint
void maybe_isb() { isb(); }

View File

@ -3607,6 +3607,63 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_squareToLen() {
// squareToLen algorithm for sizes 1..127 described in java code works
// faster than multiply_to_len on some CPUs and slower on others, but
// multiply_to_len shows a bit better overall results
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "squareToLen");
address start = __ pc();
const Register x = r0;
const Register xlen = r1;
const Register z = r2;
const Register zlen = r3;
const Register y = r4; // == x
const Register ylen = r5; // == xlen
const Register tmp1 = r10;
const Register tmp2 = r11;
const Register tmp3 = r12;
const Register tmp4 = r13;
const Register tmp5 = r14;
const Register tmp6 = r15;
const Register tmp7 = r16;
RegSet spilled_regs = RegSet::of(y, ylen);
BLOCK_COMMENT("Entry:");
__ enter();
__ push(spilled_regs, sp);
__ mov(y, x);
__ mov(ylen, xlen);
__ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
__ pop(spilled_regs, sp);
__ leave();
__ ret(lr);
return start;
}
address generate_mulAdd() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "mulAdd");
address start = __ pc();
const Register out = r0;
const Register in = r1;
const Register offset = r2;
const Register len = r3;
const Register k = r4;
BLOCK_COMMENT("Entry:");
__ enter();
__ mul_add(out, in, offset, len, k);
__ leave();
__ ret(lr);
return start;
}
void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3, FloatRegister tmp4) {
@ -4913,6 +4970,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
if (UseSquareToLenIntrinsic) {
StubRoutines::_squareToLen = generate_squareToLen();
}
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
if (UseMontgomeryMultiplyIntrinsic) {
StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);

View File

@ -2297,6 +2297,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(obj, Address(obj, mirror_offset));
__ resolve_oop_handle(obj);
}
}

View File

@ -340,6 +340,14 @@ void VM_Version::get_processor_features() {
UseMultiplyToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
UseSquareToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
UseMulAddIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
}

View File

@ -2899,6 +2899,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp)
ldr(tmp, Address(tmp, ConstMethod::constants_offset()));
ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset_in_bytes()));
ldr(mirror, Address(tmp, mirror_offset));
resolve_oop_handle(mirror);
}

View File

@ -2963,6 +2963,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Rcache,
cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ldr(Robj, Address(Robj, mirror_offset));
__ resolve_oop_handle(Robj);
}
}

View File

@ -517,6 +517,9 @@ class Assembler : public AbstractAssembler {
XXPERMDI_OPCODE= (60u << OPCODE_SHIFT | 10u << 3),
XXMRGHW_OPCODE = (60u << OPCODE_SHIFT | 18u << 3),
XXMRGLW_OPCODE = (60u << OPCODE_SHIFT | 50u << 3),
XXSPLTW_OPCODE = (60u << OPCODE_SHIFT | 164u << 2),
XXLXOR_OPCODE = (60u << OPCODE_SHIFT | 154u << 3),
XXLEQV_OPCODE = (60u << OPCODE_SHIFT | 186u << 3),
// Vector Permute and Formatting
VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ),
@ -1125,6 +1128,7 @@ class Assembler : public AbstractAssembler {
static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions
static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction
static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions
static int xxsplt_uim(int x) { return opp_u_field(x, 15, 14); } // for xxsplt* instructions
//static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes
//static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes
@ -1308,6 +1312,7 @@ class Assembler : public AbstractAssembler {
inline void li( Register d, int si16);
inline void lis( Register d, int si16);
inline void addir(Register d, int si16, Register a);
inline void subi( Register d, Register a, int si16);
static bool is_addi(int x) {
return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
@ -2154,6 +2159,11 @@ class Assembler : public AbstractAssembler {
inline void xxpermdi( VectorSRegister d, VectorSRegister a, VectorSRegister b, int dm);
inline void xxmrghw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxmrglw( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void mtvsrd( VectorSRegister d, Register a);
inline void mtvsrwz( VectorSRegister d, Register a);
inline void xxspltw( VectorSRegister d, VectorSRegister b, int ui2);
inline void xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b);
inline void xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b);
// VSX Extended Mnemonics
inline void xxspltd( VectorSRegister d, VectorSRegister a, int x);
@ -2174,7 +2184,8 @@ class Assembler : public AbstractAssembler {
inline void vsbox( VectorRegister d, VectorRegister a);
// SHA (introduced with Power 8)
// Not yet implemented.
inline void vshasigmad(VectorRegister d, VectorRegister a, bool st, int six);
inline void vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six);
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b);
@ -2285,6 +2296,10 @@ class Assembler : public AbstractAssembler {
inline void lvsl( VectorRegister d, Register s2);
inline void lvsr( VectorRegister d, Register s2);
// Endianess specific concatenation of 2 loaded vectors.
inline void load_perm(VectorRegister perm, Register addr);
inline void vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm);
// RegisterOrConstant versions.
// These emitters choose between the versions using two registers and
// those with register and immediate, depending on the content of roc.

View File

@ -164,6 +164,7 @@ inline void Assembler::divwo_( Register d, Register a, Register b) { emit_int32
inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); }
inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); }
inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
inline void Assembler::subi( Register d, Register a, int si16) { Assembler::addi(d, a, -si16); }
// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
@ -760,9 +761,14 @@ inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit
// Vector-Scalar (VSX) instructions.
inline void Assembler::lxvd2x( VectorSRegister d, Register s1) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::lxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( LXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra(0) | rb(s1)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrt(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra(0) | rb(s1)); }
inline void Assembler::stxvd2x( VectorSRegister d, Register s1, Register s2) { emit_int32( STXVD2X_OPCODE | vsrs(d) | ra0mem(s1) | rb(s2)); }
inline void Assembler::mtvsrd( VectorSRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d) | ra(a)); }
inline void Assembler::mtvsrwz( VectorSRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d) | ra(a)); }
inline void Assembler::xxspltw( VectorSRegister d, VectorSRegister b, int ui2) { emit_int32( XXSPLTW_OPCODE | vsrt(d) | vsrb(b) | xxsplt_uim(uimm(ui2,2))); }
inline void Assembler::xxlxor( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLXOR_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::xxleqv( VectorSRegister d, VectorSRegister a, VectorSRegister b) { emit_int32( XXLEQV_OPCODE | vsrt(d) | vsra(a) | vsrb(b)); }
inline void Assembler::mtvrd( VectorRegister d, Register a) { emit_int32( MTVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrd( Register a, VectorRegister d) { emit_int32( MFVSRD_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mtvrwz( VectorRegister d, Register a) { emit_int32( MTVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
inline void Assembler::mfvrwz( Register a, VectorRegister d) { emit_int32( MFVSRWZ_OPCODE | vsrt(d->to_vsr()) | ra(a)); }
@ -925,7 +931,8 @@ inline void Assembler::vncipherlast(VectorRegister d, VectorRegister a, VectorRe
inline void Assembler::vsbox( VectorRegister d, VectorRegister a) { emit_int32( VSBOX_OPCODE | vrt(d) | vra(a) ); }
// SHA (introduced with Power 8)
// Not yet implemented.
inline void Assembler::vshasigmad(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAD_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
inline void Assembler::vshasigmaw(VectorRegister d, VectorRegister a, bool st, int six) { emit_int32( VSHASIGMAW_OPCODE | vrt(d) | vra(a) | vst(st) | vsix(six)); }
// Vector Binary Polynomial Multiplication (introduced with Power 8)
inline void Assembler::vpmsumb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPMSUMB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
@ -1034,6 +1041,22 @@ inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL
inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); }
inline void Assembler::load_perm(VectorRegister perm, Register addr) {
#if defined(VM_LITTLE_ENDIAN)
lvsr(perm, addr);
#else
lvsl(perm, addr);
#endif
}
inline void Assembler::vec_perm(VectorRegister first_dest, VectorRegister second, VectorRegister perm) {
#if defined(VM_LITTLE_ENDIAN)
vperm(first_dest, second, first_dest, perm);
#else
vperm(first_dest, first_dest, second, perm);
#endif
}
inline void Assembler::load_const(Register d, void* x, Register tmp) {
load_const(d, (long)x, tmp);
}

View File

@ -32,7 +32,7 @@
// Sets the default values for platform dependent flags used by the runtime system.
// (see globals.hpp)
define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress.
define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.

View File

@ -129,7 +129,7 @@ void MacroAssembler::calculate_address_from_global_toc(Register dst, address add
}
}
int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
address MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
const int offset = MacroAssembler::offset_to_global_toc(addr);
const address inst2_addr = a;
@ -155,7 +155,7 @@ int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, addres
assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
return (int)((intptr_t)addr - (intptr_t)inst1_addr);
return inst1_addr;
}
address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
@ -201,7 +201,7 @@ address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr
// clrldi rx = rx & 0xFFFFffff // clearMS32b, optional
// ori rx = rx | const.lo
// Clrldi will be passed by.
int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
address MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
assert(UseCompressedOops, "Should only patch compressed oops");
const address inst2_addr = a;
@ -227,7 +227,7 @@ int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop dat
set_imm((int *)inst1_addr, (short)(xc)); // see enc_load_con_narrow_hi/_lo
set_imm((int *)inst2_addr, (xd)); // unsigned int
return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
return inst1_addr;
}
// Get compressed oop or klass constant.
@ -3382,6 +3382,7 @@ void MacroAssembler::load_mirror_from_const_method(Register mirror, Register con
ld(mirror, in_bytes(ConstMethod::constants_offset()), const_method);
ld(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld(mirror, in_bytes(Klass::java_mirror_offset()), mirror);
resolve_oop_handle(mirror);
}
// Clear Array
@ -5234,6 +5235,40 @@ void MacroAssembler::multiply_128_x_128_loop(Register x_xstart,
bind(L_post_third_loop_done);
} // multiply_128_x_128_loop
void MacroAssembler::muladd(Register out, Register in,
Register offset, Register len, Register k,
Register tmp1, Register tmp2, Register carry) {
// Labels
Label LOOP, SKIP;
// Make sure length is positive.
cmpdi (CCR0, len, 0);
// Prepare variables
subi (offset, offset, 4);
li (carry, 0);
ble (CCR0, SKIP);
mtctr (len);
subi (len, len, 1 );
sldi (len, len, 2 );
// Main loop
bind(LOOP);
lwzx (tmp1, len, in );
lwzx (tmp2, offset, out );
mulld (tmp1, tmp1, k );
add (tmp2, carry, tmp2 );
add (tmp2, tmp1, tmp2 );
stwx (tmp2, offset, out );
srdi (carry, tmp2, 32 );
subi (offset, offset, 4 );
subi (len, len, 4 );
bdnz (LOOP);
bind(SKIP);
}
void MacroAssembler::multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,

View File

@ -105,13 +105,15 @@ class MacroAssembler: public Assembler {
};
inline static bool is_calculate_address_from_global_toc_at(address a, address bound);
static int patch_calculate_address_from_global_toc_at(address a, address addr, address bound);
// Returns address of first instruction in sequence.
static address patch_calculate_address_from_global_toc_at(address a, address bound, address addr);
static address get_address_of_calculate_address_from_global_toc_at(address a, address addr);
#ifdef _LP64
// Patch narrow oop constant.
inline static bool is_set_narrow_oop(address a, address bound);
static int patch_set_narrow_oop(address a, address bound, narrowOop data);
// Returns address of first instruction in sequence.
static address patch_set_narrow_oop(address a, address bound, narrowOop data);
static narrowOop get_narrow_oop(address a, address bound);
#endif
@ -813,6 +815,8 @@ class MacroAssembler: public Assembler {
Register yz_idx, Register idx, Register carry,
Register product_high, Register product,
Register carry2, Register tmp);
void muladd(Register out, Register in, Register offset, Register len, Register k,
Register tmp1, Register tmp2, Register carry);
void multiply_to_len(Register x, Register xlen,
Register y, Register ylen,
Register z, Register zlen,
@ -862,6 +866,40 @@ class MacroAssembler: public Assembler {
void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
bool invertCRC);
// SHA-2 auxiliary functions and public interfaces
private:
void sha256_deque(const VectorRegister src,
const VectorRegister dst1, const VectorRegister dst2, const VectorRegister dst3);
void sha256_load_h_vec(const VectorRegister a, const VectorRegister e, const Register hptr);
void sha256_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
void sha256_load_w_plus_k_vec(const Register buf_in, const VectorRegister* ws,
const int total_ws, const Register k, const VectorRegister* kpws,
const int total_kpws);
void sha256_calc_4w(const VectorRegister w0, const VectorRegister w1,
const VectorRegister w2, const VectorRegister w3, const VectorRegister kpw0,
const VectorRegister kpw1, const VectorRegister kpw2, const VectorRegister kpw3,
const Register j, const Register k);
void sha256_update_sha_state(const VectorRegister a, const VectorRegister b,
const VectorRegister c, const VectorRegister d, const VectorRegister e,
const VectorRegister f, const VectorRegister g, const VectorRegister h,
const Register hptr);
void sha512_load_w_vec(const Register buf_in, const VectorRegister* ws, const int total_ws);
void sha512_update_sha_state(const Register state, const VectorRegister* hs, const int total_hs);
void sha512_round(const VectorRegister* hs, const int total_hs, int& h_cnt, const VectorRegister kpw);
void sha512_load_h_vec(const Register state, const VectorRegister* hs, const int total_hs);
void sha512_calc_2w(const VectorRegister w0, const VectorRegister w1,
const VectorRegister w2, const VectorRegister w3,
const VectorRegister w4, const VectorRegister w5,
const VectorRegister w6, const VectorRegister w7,
const VectorRegister kpw0, const VectorRegister kpw1, const Register j,
const VectorRegister vRb, const Register k);
public:
void sha256(bool multi_block);
void sha512(bool multi_block);
//
// Debugging
//

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -221,13 +221,13 @@ address NativeMovConstReg::set_data_plain(intptr_t data, CodeBlob *cb) {
// A calculation relative to the global TOC.
if (MacroAssembler::get_address_of_calculate_address_from_global_toc_at(addr, cb->content_begin()) !=
(address)data) {
const int invalidated_range =
MacroAssembler::patch_calculate_address_from_global_toc_at(addr, cb->content_begin(),
const address inst2_addr = addr;
const address inst1_addr =
MacroAssembler::patch_calculate_address_from_global_toc_at(inst2_addr, cb->content_begin(),
(address)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::ppc64_flush_icache_bytes(start, range);
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
next_address = addr + 1 * BytesPerInstWord;
} else if (MacroAssembler::is_load_const_at(addr)) {
@ -288,15 +288,15 @@ void NativeMovConstReg::set_data(intptr_t data) {
}
void NativeMovConstReg::set_narrow_oop(narrowOop data, CodeBlob *code /* = NULL */) {
address addr = addr_at(0);
address inst2_addr = addr_at(0);
CodeBlob* cb = (code) ? code : CodeCache::find_blob(instruction_address());
if (MacroAssembler::get_narrow_oop(addr, cb->content_begin()) == (long)data) return;
const int invalidated_range =
MacroAssembler::patch_set_narrow_oop(addr, cb->content_begin(), (long)data);
const address start = invalidated_range < 0 ? addr + invalidated_range : addr;
// FIXME:
const int range = invalidated_range < 0 ? 4 - invalidated_range : 8;
ICache::ppc64_flush_icache_bytes(start, range);
if (MacroAssembler::get_narrow_oop(inst2_addr, cb->content_begin()) == (long)data)
return;
const address inst1_addr =
MacroAssembler::patch_set_narrow_oop(inst2_addr, cb->content_begin(), (long)data);
assert(inst1_addr != NULL && inst1_addr < inst2_addr, "first instruction must be found");
const int range = inst2_addr - inst1_addr + BytesPerInstWord;
ICache::ppc64_flush_icache_bytes(inst1_addr, range);
}
// Do not use an assertion here. Let clients decide whether they only

View File

@ -254,6 +254,73 @@ register %{
reg_def SR_SPEFSCR(SOC, SOC, Op_RegP, 4, SR_SPEFSCR->as_VMReg()); // v
reg_def SR_PPR( SOC, SOC, Op_RegP, 5, SR_PPR->as_VMReg()); // v
// ----------------------------
// Vector-Scalar Registers
// ----------------------------
reg_def VSR0 ( SOC, SOC, Op_VecX, 0, NULL);
reg_def VSR1 ( SOC, SOC, Op_VecX, 1, NULL);
reg_def VSR2 ( SOC, SOC, Op_VecX, 2, NULL);
reg_def VSR3 ( SOC, SOC, Op_VecX, 3, NULL);
reg_def VSR4 ( SOC, SOC, Op_VecX, 4, NULL);
reg_def VSR5 ( SOC, SOC, Op_VecX, 5, NULL);
reg_def VSR6 ( SOC, SOC, Op_VecX, 6, NULL);
reg_def VSR7 ( SOC, SOC, Op_VecX, 7, NULL);
reg_def VSR8 ( SOC, SOC, Op_VecX, 8, NULL);
reg_def VSR9 ( SOC, SOC, Op_VecX, 9, NULL);
reg_def VSR10 ( SOC, SOC, Op_VecX, 10, NULL);
reg_def VSR11 ( SOC, SOC, Op_VecX, 11, NULL);
reg_def VSR12 ( SOC, SOC, Op_VecX, 12, NULL);
reg_def VSR13 ( SOC, SOC, Op_VecX, 13, NULL);
reg_def VSR14 ( SOC, SOC, Op_VecX, 14, NULL);
reg_def VSR15 ( SOC, SOC, Op_VecX, 15, NULL);
reg_def VSR16 ( SOC, SOC, Op_VecX, 16, NULL);
reg_def VSR17 ( SOC, SOC, Op_VecX, 17, NULL);
reg_def VSR18 ( SOC, SOC, Op_VecX, 18, NULL);
reg_def VSR19 ( SOC, SOC, Op_VecX, 19, NULL);
reg_def VSR20 ( SOC, SOC, Op_VecX, 20, NULL);
reg_def VSR21 ( SOC, SOC, Op_VecX, 21, NULL);
reg_def VSR22 ( SOC, SOC, Op_VecX, 22, NULL);
reg_def VSR23 ( SOC, SOC, Op_VecX, 23, NULL);
reg_def VSR24 ( SOC, SOC, Op_VecX, 24, NULL);
reg_def VSR25 ( SOC, SOC, Op_VecX, 25, NULL);
reg_def VSR26 ( SOC, SOC, Op_VecX, 26, NULL);
reg_def VSR27 ( SOC, SOC, Op_VecX, 27, NULL);
reg_def VSR28 ( SOC, SOC, Op_VecX, 28, NULL);
reg_def VSR29 ( SOC, SOC, Op_VecX, 29, NULL);
reg_def VSR30 ( SOC, SOC, Op_VecX, 30, NULL);
reg_def VSR31 ( SOC, SOC, Op_VecX, 31, NULL);
reg_def VSR32 ( SOC, SOC, Op_VecX, 32, NULL);
reg_def VSR33 ( SOC, SOC, Op_VecX, 33, NULL);
reg_def VSR34 ( SOC, SOC, Op_VecX, 34, NULL);
reg_def VSR35 ( SOC, SOC, Op_VecX, 35, NULL);
reg_def VSR36 ( SOC, SOC, Op_VecX, 36, NULL);
reg_def VSR37 ( SOC, SOC, Op_VecX, 37, NULL);
reg_def VSR38 ( SOC, SOC, Op_VecX, 38, NULL);
reg_def VSR39 ( SOC, SOC, Op_VecX, 39, NULL);
reg_def VSR40 ( SOC, SOC, Op_VecX, 40, NULL);
reg_def VSR41 ( SOC, SOC, Op_VecX, 41, NULL);
reg_def VSR42 ( SOC, SOC, Op_VecX, 42, NULL);
reg_def VSR43 ( SOC, SOC, Op_VecX, 43, NULL);
reg_def VSR44 ( SOC, SOC, Op_VecX, 44, NULL);
reg_def VSR45 ( SOC, SOC, Op_VecX, 45, NULL);
reg_def VSR46 ( SOC, SOC, Op_VecX, 46, NULL);
reg_def VSR47 ( SOC, SOC, Op_VecX, 47, NULL);
reg_def VSR48 ( SOC, SOC, Op_VecX, 48, NULL);
reg_def VSR49 ( SOC, SOC, Op_VecX, 49, NULL);
reg_def VSR50 ( SOC, SOC, Op_VecX, 50, NULL);
reg_def VSR51 ( SOC, SOC, Op_VecX, 51, NULL);
reg_def VSR52 ( SOC, SOC, Op_VecX, 52, NULL);
reg_def VSR53 ( SOC, SOC, Op_VecX, 53, NULL);
reg_def VSR54 ( SOC, SOC, Op_VecX, 54, NULL);
reg_def VSR55 ( SOC, SOC, Op_VecX, 55, NULL);
reg_def VSR56 ( SOC, SOC, Op_VecX, 56, NULL);
reg_def VSR57 ( SOC, SOC, Op_VecX, 57, NULL);
reg_def VSR58 ( SOC, SOC, Op_VecX, 58, NULL);
reg_def VSR59 ( SOC, SOC, Op_VecX, 59, NULL);
reg_def VSR60 ( SOC, SOC, Op_VecX, 60, NULL);
reg_def VSR61 ( SOC, SOC, Op_VecX, 61, NULL);
reg_def VSR62 ( SOC, SOC, Op_VecX, 62, NULL);
reg_def VSR63 ( SOC, SOC, Op_VecX, 63, NULL);
// ----------------------------
// Specify priority of register selection within phases of register
@ -395,6 +462,73 @@ alloc_class chunk3 (
SR_PPR
);
alloc_class chunk4 (
VSR0,
VSR1,
VSR2,
VSR3,
VSR4,
VSR5,
VSR6,
VSR7,
VSR8,
VSR9,
VSR10,
VSR11,
VSR12,
VSR13,
VSR14,
VSR15,
VSR16,
VSR17,
VSR18,
VSR19,
VSR20,
VSR21,
VSR22,
VSR23,
VSR24,
VSR25,
VSR26,
VSR27,
VSR28,
VSR29,
VSR30,
VSR31,
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51,
VSR52,
VSR53,
VSR54,
VSR55,
VSR56,
VSR57,
VSR58,
VSR59,
VSR60,
VSR61,
VSR62,
VSR63
);
//-------Architecture Description Register Classes-----------------------
// Several register classes are automatically defined based upon
@ -769,6 +903,45 @@ reg_class dbl_reg(
F31, F31_H // nv!
);
// ----------------------------
// Vector-Scalar Register Class
// ----------------------------
reg_class vs_reg(
VSR32,
VSR33,
VSR34,
VSR35,
VSR36,
VSR37,
VSR38,
VSR39,
VSR40,
VSR41,
VSR42,
VSR43,
VSR44,
VSR45,
VSR46,
VSR47,
VSR48,
VSR49,
VSR50,
VSR51
// VSR52, // nv!
// VSR53, // nv!
// VSR54, // nv!
// VSR55, // nv!
// VSR56, // nv!
// VSR57, // nv!
// VSR58, // nv!
// VSR59, // nv!
// VSR60, // nv!
// VSR61, // nv!
// VSR62, // nv!
// VSR63 // nv!
);
%}
//----------DEFINITION BLOCK---------------------------------------------------
@ -2048,14 +2221,24 @@ const bool Matcher::convL2FSupported(void) {
// Vector width in bytes.
const int Matcher::vector_width_in_bytes(BasicType bt) {
assert(MaxVectorSize == 8, "");
return 8;
if (VM_Version::has_vsx()) {
assert(MaxVectorSize == 16, "");
return 16;
} else {
assert(MaxVectorSize == 8, "");
return 8;
}
}
// Vector ideal reg.
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
if (VM_Version::has_vsx()) {
assert(MaxVectorSize == 16 && size == 16, "");
return Op_VecX;
} else {
assert(MaxVectorSize == 8 && size == 8, "");
return Op_RegL;
}
}
const uint Matcher::vector_shift_count_ideal_reg(int size) {
@ -2075,7 +2258,10 @@ const int Matcher::min_vector_size(const BasicType bt) {
// PPC doesn't support misaligned vectors store/load.
const bool Matcher::misaligned_vectors_ok() {
return false;
if (VM_Version::has_vsx())
return !AlignVector; // can be changed by flag
else
return false;
}
// PPC AES support not yet implemented
@ -2217,10 +2403,31 @@ const MachRegisterNumbers farg_reg[13] = {
F13_num
};
const MachRegisterNumbers vsarg_reg[64] = {
VSR0_num, VSR1_num, VSR2_num, VSR3_num,
VSR4_num, VSR5_num, VSR6_num, VSR7_num,
VSR8_num, VSR9_num, VSR10_num, VSR11_num,
VSR12_num, VSR13_num, VSR14_num, VSR15_num,
VSR16_num, VSR17_num, VSR18_num, VSR19_num,
VSR20_num, VSR21_num, VSR22_num, VSR23_num,
VSR24_num, VSR23_num, VSR24_num, VSR25_num,
VSR28_num, VSR29_num, VSR30_num, VSR31_num,
VSR32_num, VSR33_num, VSR34_num, VSR35_num,
VSR36_num, VSR37_num, VSR38_num, VSR39_num,
VSR40_num, VSR41_num, VSR42_num, VSR43_num,
VSR44_num, VSR45_num, VSR46_num, VSR47_num,
VSR48_num, VSR49_num, VSR50_num, VSR51_num,
VSR52_num, VSR53_num, VSR54_num, VSR55_num,
VSR56_num, VSR57_num, VSR58_num, VSR59_num,
VSR60_num, VSR61_num, VSR62_num, VSR63_num
};
const int num_iarg_registers = sizeof(iarg_reg) / sizeof(iarg_reg[0]);
const int num_farg_registers = sizeof(farg_reg) / sizeof(farg_reg[0]);
const int num_vsarg_registers = sizeof(vsarg_reg) / sizeof(vsarg_reg[0]);
// Return whether or not this register is ever used as an argument. This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
@ -2552,6 +2759,115 @@ loadConLNodesTuple loadConLNodesTuple_create(PhaseRegAlloc *ra_, Node *toc, immL
return nodes;
}
typedef struct {
loadConL_hiNode *_large_hi;
loadConL_loNode *_large_lo;
mtvsrdNode *_moved;
xxspltdNode *_replicated;
loadConLNode *_small;
MachNode *_last;
} loadConLReplicatedNodesTuple;
loadConLReplicatedNodesTuple loadConLReplicatedNodesTuple_create(Compile *C, PhaseRegAlloc *ra_, Node *toc, immLOper *immSrc,
vecXOper *dst, immI_0Oper *zero,
OptoReg::Name reg_second, OptoReg::Name reg_first,
OptoReg::Name reg_vec_second, OptoReg::Name reg_vec_first) {
loadConLReplicatedNodesTuple nodes;
const bool large_constant_pool = true; // TODO: PPC port C->cfg()->_consts_size > 4000;
if (large_constant_pool) {
// Create new nodes.
loadConL_hiNode *m1 = new loadConL_hiNode();
loadConL_loNode *m2 = new loadConL_loNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m1->add_req(NULL, toc);
m2->add_req(NULL, m1);
m3->add_req(NULL, m2);
m4->add_req(NULL, m3);
// operands for new nodes
m1->_opnds[0] = new iRegLdstOper(); // dst
m1->_opnds[1] = immSrc; // src
m1->_opnds[2] = new iRegPdstOper(); // toc
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegLdstOper(); // base
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib TOC fields.
m1->_const_toc_offset = -1;
m2->_const_toc_offset_hi_node = m1;
// Initialize ins_attrib instruction offset.
m1->_cbuf_insts_offset = -1;
// register allocation for new nodes
ra_->set_pair(m1->_idx, reg_second, reg_first);
ra_->set_pair(m2->_idx, reg_second, reg_first);
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// Create result.
nodes._large_hi = m1;
nodes._large_lo = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._small = NULL;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
} else {
loadConLNode *m2 = new loadConLNode();
mtvsrdNode *m3 = new mtvsrdNode();
xxspltdNode *m4 = new xxspltdNode();
// inputs for new nodes
m2->add_req(NULL, toc);
// operands for new nodes
m2->_opnds[0] = new iRegLdstOper(); // dst
m2->_opnds[1] = immSrc; // src
m2->_opnds[2] = new iRegPdstOper(); // toc
m3->_opnds[0] = new vecXOper(); // dst
m3->_opnds[1] = new iRegLdstOper(); // src
m4->_opnds[0] = new vecXOper(); // dst
m4->_opnds[1] = new vecXOper(); // src
m4->_opnds[2] = zero;
// Initialize ins_attrib instruction offset.
m2->_cbuf_insts_offset = -1;
ra_->set1(m3->_idx, reg_second);
ra_->set2(m3->_idx, reg_vec_first);
ra_->set_pair(m4->_idx, reg_vec_second, reg_vec_first);
// register allocation for new nodes
ra_->set_pair(m2->_idx, reg_second, reg_first);
// Create result.
nodes._large_hi = NULL;
nodes._large_lo = NULL;
nodes._small = m2;
nodes._moved = m3;
nodes._replicated = m4;
nodes._last = nodes._replicated;
assert(m2->bottom_type()->isa_long(), "must be long");
}
return nodes;
}
%} // source
encode %{
@ -3212,6 +3528,27 @@ encode %{
assert(loadConLNodes._last->bottom_type()->isa_long(), "must be long");
%}
enc_class postalloc_expand_load_replF_constant_vsx(vecX dst, immF src, iRegLdst toc) %{
// Create new nodes.
// Make an operand with the bit pattern to load as float.
immLOper *op_repl = new immLOper((jlong)replicate_immF(op_src->constantF()));
immI_0Oper *op_zero = new immI_0Oper(0);
loadConLReplicatedNodesTuple loadConLNodes =
loadConLReplicatedNodesTuple_create(C, ra_, n_toc, op_repl, op_dst, op_zero,
OptoReg::Name(R20_H_num), OptoReg::Name(R20_num),
OptoReg::Name(VSR11_num), OptoReg::Name(VSR10_num));
// Push new nodes.
if (loadConLNodes._large_hi) { nodes->push(loadConLNodes._large_hi); }
if (loadConLNodes._large_lo) { nodes->push(loadConLNodes._large_lo); }
if (loadConLNodes._moved) { nodes->push(loadConLNodes._moved); }
if (loadConLNodes._last) { nodes->push(loadConLNodes._last); }
assert(nodes->length() >= 1, "must have created at least 1 node");
%}
// This enc_class is needed so that scheduler gets proper
// input mapping for latency computation.
enc_class enc_poll(immI dst, iRegLdst poll) %{
@ -3840,6 +4177,14 @@ ins_attrib ins_field_load_ic_node(0);
//
// Formats are generated automatically for constants and base registers.
operand vecX() %{
constraint(ALLOC_IN_RC(vs_reg));
match(VecX);
format %{ %}
interface(REG_INTER);
%}
//----------Simple Operands----------------------------------------------------
// Immediate Operands
@ -5372,6 +5717,20 @@ instruct loadV8(iRegLdst dst, memoryAlg4 mem) %{
ins_pipe(pipe_class_memory);
%}
// Load Aligned Packed Byte
instruct loadV16(vecX dst, indirect mem) %{
predicate(n->as_LoadVector()->memory_size() == 16);
match(Set dst (LoadVector mem));
ins_cost(MEMORY_REF_COST);
format %{ "LXVD2X $dst, $mem \t// load 16-byte Vector" %}
size(4);
ins_encode %{
__ lxvd2x($dst$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Load Range, range = array length (=jint)
instruct loadRange(iRegIdst dst, memory mem) %{
match(Set dst (LoadRange mem));
@ -6368,6 +6727,20 @@ instruct storeA8B(memoryAlg4 mem, iRegLsrc src) %{
ins_pipe(pipe_class_memory);
%}
// Store Packed Byte long register to memory
instruct storeV16(indirect mem, vecX src) %{
predicate(n->as_StoreVector()->memory_size() == 16);
match(Set mem (StoreVector mem src));
ins_cost(MEMORY_REF_COST);
format %{ "STXVD2X $mem, $src \t// store 16-byte Vector" %}
size(4);
ins_encode %{
__ stxvd2x($src$$VectorSRegister, $mem$$Register);
%}
ins_pipe(pipe_class_default);
%}
// Store Compressed Oop
instruct storeN(memory dst, iRegN_P2N src) %{
match(Set dst (StoreN dst src));
@ -13239,6 +13612,26 @@ instruct storeS_reversed(iRegIsrc src, indirect mem) %{
ins_pipe(pipe_class_default);
%}
instruct mtvsrwz(vecX temp1, iRegIsrc src) %{
effect(DEF temp1, USE src);
size(4);
ins_encode %{
__ mtvsrwz($temp1$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltw(vecX dst, vecX src, immI8 imm1) %{
effect(DEF dst, USE src, USE imm1);
size(4);
ins_encode %{
__ xxspltw($dst$$VectorSRegister, $src$$VectorSRegister, $imm1$$constant);
%}
ins_pipe(pipe_class_default);
%}
//---------- Replicate Vector Instructions ------------------------------------
// Insrdi does replicate if src == dst.
@ -13318,6 +13711,46 @@ instruct repl8B_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl16B_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 imm1 %{ (int) 1 %}
moveReg(tmpL, src);
repl56(tmpL);
repl48(tmpL);
mtvsrwz(tmpV, tmpL);
xxspltw(dst, tmpV, imm1);
%}
%}
instruct repl16B_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateB zero));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLXOR $dst, $zero \t// replicate16B" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl16B_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateB src));
predicate(n->as_Vector()->length() == 16);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4S_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 4);
@ -13352,6 +13785,46 @@ instruct repl4S_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl8S_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl48(tmpL);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl8S_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateS zero));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLXOR $dst, $zero \t// replicate8S" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl8S_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateS src));
predicate(n->as_Vector()->length() == 8);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2I_reg_Ex(iRegLdst dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 2);
@ -13386,6 +13859,46 @@ instruct repl2I_immIminus1(iRegLdst dst, immI_minus1 src) %{
ins_pipe(pipe_class_default);
%}
instruct repl4I_reg_Ex(vecX dst, iRegIsrc src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * DEFAULT_COST);
expand %{
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveReg(tmpL, src);
repl32(tmpL);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4I_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateI zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4I" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl4I_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateI src));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLEQV $dst, $dst, $dst \t// replicate4I" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// Move float to int register via stack, replicate.
instruct repl2F_reg_Ex(iRegLdst dst, regF src) %{
match(Set dst (ReplicateF src));
@ -13484,6 +13997,154 @@ instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
%}
instruct repl4F_reg_Ex(vecX dst, regF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(2 * MEMORY_REF_COST + DEFAULT_COST);
expand %{
stackSlotL tmpS;
iRegIdst tmpI;
iRegLdst tmpL;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveF2I_reg_stack(tmpS, src); // Move float to stack.
moveF2I_stack_reg(tmpI, tmpS); // Move stack to int reg.
moveReg(tmpL, tmpI); // Move int to long reg.
repl32(tmpL); // Replicate bitpattern.
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl4F_immF_Ex(vecX dst, immF src) %{
match(Set dst (ReplicateF src));
predicate(n->as_Vector()->length() == 4);
ins_cost(10 * DEFAULT_COST);
postalloc_expand( postalloc_expand_load_replF_constant_vsx(dst, src, constanttablebase) );
%}
instruct repl4F_immF0(vecX dst, immF_0 zero) %{
match(Set dst (ReplicateF zero));
predicate(n->as_Vector()->length() == 4);
format %{ "XXLXOR $dst, $zero \t// replicate4F" %}
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_reg_Ex(vecX dst, regD src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
expand %{
stackSlotL tmpS;
iRegLdst tmpL;
iRegLdst tmp;
vecX tmpV;
immI8 zero %{ (int) 0 %}
moveD2L_reg_stack(tmpS, src);
moveD2L_stack_reg(tmpL, tmpS);
mtvsrd(tmpV, tmpL);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2D_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateD zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2D" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2D_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateD src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct mtvsrd(vecX dst, iRegLsrc src) %{
predicate(false);
effect(DEF dst, USE src);
format %{ "MTVSRD $dst, $src \t// Move to 16-byte register"%}
size(4);
ins_encode %{
__ mtvsrd($dst$$VectorSRegister, $src$$Register);
%}
ins_pipe(pipe_class_default);
%}
instruct xxspltd(vecX dst, vecX src, immI8 zero) %{
effect(DEF dst, USE src, USE zero);
format %{ "XXSPLATD $dst, $src, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src$$VectorSRegister, $src$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct xxpermdi(vecX dst, vecX src1, vecX src2, immI8 zero) %{
effect(DEF dst, USE src1, USE src2, USE zero);
format %{ "XXPERMDI $dst, $src1, $src2, $zero \t// Permute 16-byte register"%}
size(4);
ins_encode %{
__ xxpermdi($dst$$VectorSRegister, $src1$$VectorSRegister, $src2$$VectorSRegister, $zero$$constant);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_reg_Ex(vecX dst, iRegLsrc src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
expand %{
vecX tmpV;
immI8 zero %{ (int) 0 %}
mtvsrd(tmpV, src);
xxpermdi(dst, tmpV, tmpV, zero);
%}
%}
instruct repl2L_immI0(vecX dst, immI_0 zero) %{
match(Set dst (ReplicateL zero));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLXOR $dst, $zero \t// replicate2L" %}
size(4);
ins_encode %{
__ xxlxor($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
instruct repl2L_immIminus1(vecX dst, immI_minus1 src) %{
match(Set dst (ReplicateL src));
predicate(n->as_Vector()->length() == 2);
format %{ "XXLEQV $dst, $src \t// replicate16B" %}
size(4);
ins_encode %{
__ xxleqv($dst$$VectorSRegister, $dst$$VectorSRegister, $dst$$VectorSRegister);
%}
ins_pipe(pipe_class_default);
%}
// ============================================================================
// Safepoint Instruction

View File

@ -31,3 +31,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
REGISTER_DEFINITION(VectorSRegister, vsnoreg);

View File

@ -677,7 +677,7 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
* 2 // register halves
+ ConditionRegisterImpl::number_of_registers // condition code registers
+ SpecialRegisterImpl::number_of_registers // special registers
+ VectorRegisterImpl::number_of_registers // VSX registers
+ VectorSRegisterImpl::number_of_registers // VSX registers
};
static const int max_gpr;

View File

@ -479,8 +479,12 @@ void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_siz
// Is vector's size (in bytes) bigger than a size saved by default?
bool SharedRuntime::is_wide_vector(int size) {
// Note, MaxVectorSize == 8 on PPC64.
assert(size <= 8, "%d bytes vectors are not supported", size);
// Note, MaxVectorSize == 8/16 on PPC64.
if (VM_Version::has_vsx()) {
assert(size <= 16, "%d bytes vectors are not supported", size);
} else {
assert(size <= 8, "%d bytes vectors are not supported", size);
}
return size > 8;
}
@ -2234,9 +2238,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
// The JNI call
@ -2393,9 +2394,6 @@ nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
__ release();
// TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
__ bind(after_transition);
// Reguard any pages if necessary.

View File

@ -3095,6 +3095,28 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
address generate_sha256_implCompress(bool multi_block, const char *name) {
assert(UseSHA, "need SHA instructions");
StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry();
__ sha256 (multi_block);
__ blr();
return start;
}
address generate_sha512_implCompress(bool multi_block, const char *name) {
assert(UseSHA, "need SHA instructions");
StubCodeMark mark(this, "StubRoutines", name);
address start = __ function_entry();
__ sha512 (multi_block);
__ blr();
return start;
}
void generate_arraycopy_stubs() {
// Note: the disjoint stubs must be generated first, some of
// the conjoint stubs use them.
@ -3306,6 +3328,267 @@ class StubGenerator: public StubCodeGenerator {
BLOCK_COMMENT("} Stub body");
}
/**
* Arguments:
*
* Input:
* R3_ARG1 - out address
* R4_ARG2 - in address
* R5_ARG3 - offset
* R6_ARG4 - len
* R7_ARG5 - k
* Output:
* R3_RET - carry
*/
address generate_mulAdd() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "mulAdd");
address start = __ function_entry();
// C2 does not sign extend signed parameters to full 64 bits registers:
__ rldic (R5_ARG3, R5_ARG3, 2, 32); // always positive
__ clrldi(R6_ARG4, R6_ARG4, 32); // force zero bits on higher word
__ clrldi(R7_ARG5, R7_ARG5, 32); // force zero bits on higher word
__ muladd(R3_ARG1, R4_ARG2, R5_ARG3, R6_ARG4, R7_ARG5, R8, R9, R10);
// Moves output carry to return register
__ mr (R3_RET, R10);
__ blr();
return start;
}
/**
* Arguments:
*
* Input:
* R3_ARG1 - in address
* R4_ARG2 - in length
* R5_ARG3 - out address
* R6_ARG4 - out length
*/
address generate_squareToLen() {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "squareToLen");
address start = __ function_entry();
// args - higher word is cleaned (unsignedly) due to int to long casting
const Register in = R3_ARG1;
const Register in_len = R4_ARG2;
__ clrldi(in_len, in_len, 32);
const Register out = R5_ARG3;
const Register out_len = R6_ARG4;
__ clrldi(out_len, out_len, 32);
// output
const Register ret = R3_RET;
// temporaries
const Register lplw_s = R7;
const Register in_aux = R8;
const Register out_aux = R9;
const Register piece = R10;
const Register product = R14;
const Register lplw = R15;
const Register i_minus1 = R16;
const Register carry = R17;
const Register offset = R18;
const Register off_aux = R19;
const Register t = R20;
const Register mlen = R21;
const Register len = R22;
const Register a = R23;
const Register b = R24;
const Register i = R25;
const Register c = R26;
const Register cs = R27;
// Labels
Label SKIP_LSHIFT, SKIP_DIAGONAL_SUM, SKIP_ADDONE, SKIP_MULADD, SKIP_LOOP_SQUARE;
Label LOOP_LSHIFT, LOOP_DIAGONAL_SUM, LOOP_ADDONE, LOOP_MULADD, LOOP_SQUARE;
// Save non-volatile regs (frameless).
int current_offs = -8;
__ std(R28, current_offs, R1_SP); current_offs -= 8;
__ std(R27, current_offs, R1_SP); current_offs -= 8;
__ std(R26, current_offs, R1_SP); current_offs -= 8;
__ std(R25, current_offs, R1_SP); current_offs -= 8;
__ std(R24, current_offs, R1_SP); current_offs -= 8;
__ std(R23, current_offs, R1_SP); current_offs -= 8;
__ std(R22, current_offs, R1_SP); current_offs -= 8;
__ std(R21, current_offs, R1_SP); current_offs -= 8;
__ std(R20, current_offs, R1_SP); current_offs -= 8;
__ std(R19, current_offs, R1_SP); current_offs -= 8;
__ std(R18, current_offs, R1_SP); current_offs -= 8;
__ std(R17, current_offs, R1_SP); current_offs -= 8;
__ std(R16, current_offs, R1_SP); current_offs -= 8;
__ std(R15, current_offs, R1_SP); current_offs -= 8;
__ std(R14, current_offs, R1_SP);
// Store the squares, right shifted one bit (i.e., divided by 2)
__ subi (out_aux, out, 8);
__ subi (in_aux, in, 4);
__ cmpwi (CCR0, in_len, 0);
// Initialize lplw outside of the loop
__ xorr (lplw, lplw, lplw);
__ ble (CCR0, SKIP_LOOP_SQUARE); // in_len <= 0
__ mtctr (in_len);
__ bind(LOOP_SQUARE);
__ lwzu (piece, 4, in_aux);
__ mulld (product, piece, piece);
// shift left 63 bits and only keep the MSB
__ rldic (lplw_s, lplw, 63, 0);
__ mr (lplw, product);
// shift right 1 bit without sign extension
__ srdi (product, product, 1);
// join them to the same register and store it
__ orr (product, lplw_s, product);
#ifdef VM_LITTLE_ENDIAN
// Swap low and high words for little endian
__ rldicl (product, product, 32, 0);
#endif
__ stdu (product, 8, out_aux);
__ bdnz (LOOP_SQUARE);
__ bind(SKIP_LOOP_SQUARE);
// Add in off-diagonal sums
__ cmpwi (CCR0, in_len, 0);
__ ble (CCR0, SKIP_DIAGONAL_SUM);
// Avoid CTR usage here in order to use it at mulAdd
__ subi (i_minus1, in_len, 1);
__ li (offset, 4);
__ bind(LOOP_DIAGONAL_SUM);
__ sldi (off_aux, out_len, 2);
__ sub (off_aux, off_aux, offset);
__ mr (len, i_minus1);
__ sldi (mlen, i_minus1, 2);
__ lwzx (t, in, mlen);
__ muladd (out, in, off_aux, len, t, a, b, carry);
// begin<addOne>
// off_aux = out_len*4 - 4 - mlen - offset*4 - 4;
__ addi (mlen, mlen, 4);
__ sldi (a, out_len, 2);
__ subi (a, a, 4);
__ sub (a, a, mlen);
__ subi (off_aux, offset, 4);
__ sub (off_aux, a, off_aux);
__ lwzx (b, off_aux, out);
__ add (b, b, carry);
__ stwx (b, off_aux, out);
// if (((uint64_t)s >> 32) != 0) {
__ srdi_ (a, b, 32);
__ beq (CCR0, SKIP_ADDONE);
// while (--mlen >= 0) {
__ bind(LOOP_ADDONE);
__ subi (mlen, mlen, 4);
__ cmpwi (CCR0, mlen, 0);
__ beq (CCR0, SKIP_ADDONE);
// if (--offset_aux < 0) { // Carry out of number
__ subi (off_aux, off_aux, 4);
__ cmpwi (CCR0, off_aux, 0);
__ blt (CCR0, SKIP_ADDONE);
// } else {
__ lwzx (b, off_aux, out);
__ addi (b, b, 1);
__ stwx (b, off_aux, out);
__ cmpwi (CCR0, b, 0);
__ bne (CCR0, SKIP_ADDONE);
__ b (LOOP_ADDONE);
__ bind(SKIP_ADDONE);
// } } } end<addOne>
__ addi (offset, offset, 8);
__ subi (i_minus1, i_minus1, 1);
__ cmpwi (CCR0, i_minus1, 0);
__ bge (CCR0, LOOP_DIAGONAL_SUM);
__ bind(SKIP_DIAGONAL_SUM);
// Shift back up and set low bit
// Shifts 1 bit left up to len positions. Assumes no leading zeros
// begin<primitiveLeftShift>
__ cmpwi (CCR0, out_len, 0);
__ ble (CCR0, SKIP_LSHIFT);
__ li (i, 0);
__ lwz (c, 0, out);
__ subi (b, out_len, 1);
__ mtctr (b);
__ bind(LOOP_LSHIFT);
__ mr (b, c);
__ addi (cs, i, 4);
__ lwzx (c, out, cs);
__ sldi (b, b, 1);
__ srwi (cs, c, 31);
__ orr (b, b, cs);
__ stwx (b, i, out);
__ addi (i, i, 4);
__ bdnz (LOOP_LSHIFT);
__ sldi (c, out_len, 2);
__ subi (c, c, 4);
__ lwzx (b, out, c);
__ sldi (b, b, 1);
__ stwx (b, out, c);
__ bind(SKIP_LSHIFT);
// end<primitiveLeftShift>
// Set low bit
__ sldi (i, in_len, 2);
__ subi (i, i, 4);
__ lwzx (i, in, i);
__ sldi (c, out_len, 2);
__ subi (c, c, 4);
__ lwzx (b, out, c);
__ andi (i, i, 1);
__ orr (i, b, i);
__ stwx (i, out, c);
// Restore non-volatile regs.
current_offs = -8;
__ ld(R28, current_offs, R1_SP); current_offs -= 8;
__ ld(R27, current_offs, R1_SP); current_offs -= 8;
__ ld(R26, current_offs, R1_SP); current_offs -= 8;
__ ld(R25, current_offs, R1_SP); current_offs -= 8;
__ ld(R24, current_offs, R1_SP); current_offs -= 8;
__ ld(R23, current_offs, R1_SP); current_offs -= 8;
__ ld(R22, current_offs, R1_SP); current_offs -= 8;
__ ld(R21, current_offs, R1_SP); current_offs -= 8;
__ ld(R20, current_offs, R1_SP); current_offs -= 8;
__ ld(R19, current_offs, R1_SP); current_offs -= 8;
__ ld(R18, current_offs, R1_SP); current_offs -= 8;
__ ld(R17, current_offs, R1_SP); current_offs -= 8;
__ ld(R16, current_offs, R1_SP); current_offs -= 8;
__ ld(R15, current_offs, R1_SP); current_offs -= 8;
__ ld(R14, current_offs, R1_SP);
__ mr(ret, out);
__ blr();
return start;
}
/**
* Arguments:
@ -3500,6 +3783,12 @@ class StubGenerator: public StubCodeGenerator {
}
#endif
if (UseSquareToLenIntrinsic) {
StubRoutines::_squareToLen = generate_squareToLen();
}
if (UseMulAddIntrinsic) {
StubRoutines::_mulAdd = generate_mulAdd();
}
if (UseMontgomeryMultiplyIntrinsic) {
StubRoutines::_montgomeryMultiply
= CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
@ -3514,6 +3803,14 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
}
if (UseSHA256Intrinsics) {
StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
}
if (UseSHA512Intrinsics) {
StubRoutines::_sha512_implCompress = generate_sha512_implCompress(false, "sha512_implCompress");
StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true, "sha512_implCompressMB");
}
}
public:

View File

@ -34,7 +34,7 @@ static bool returns_to_call_stub(address return_pc) { return return_pc == _call_
enum platform_dependent_constants {
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
code_size2 = 20000 // simply increase if too small (assembler will crash if too small)
code_size2 = 22000 // simply increase if too small (assembler will crash if too small)
};
// CRC32 Intrinsics.

View File

@ -1470,10 +1470,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
// TODO PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
__ stw(R0, thread_(thread_state));
if (UseMembar) {
__ fence();
}
//=============================================================================
// Call the native method. Argument registers must not have been
// overwritten since "__ call_stub(signature_handler);" (except for
@ -1594,9 +1590,6 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ li(R0/*thread_state*/, _thread_in_Java);
__ release();
__ stw(R0/*thread_state*/, thread_(thread_state));
if (UseMembar) {
__ fence();
}
if (CheckJNICalls) {
// clear_pending_jni_exception_check

View File

@ -2224,6 +2224,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
if (is_static) {
__ ld(Robj, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f1_offset()), Rcache);
__ ld(Robj, in_bytes(Klass::java_mirror_offset()), Robj);
__ resolve_oop_handle(Robj);
// Acquire not needed here. Following access has an address dependency on this value.
}
}

View File

@ -107,13 +107,16 @@ void VM_Version::initialize() {
// TODO: PPC port PdScheduling::power6SectorSize = 0x20;
}
MaxVectorSize = 8;
if (VM_Version::has_vsx())
MaxVectorSize = 16;
else
MaxVectorSize = 8;
#endif
// Create and print feature-string.
char buf[(num_features+1) * 16]; // Max 16 chars per feature.
jio_snprintf(buf, sizeof(buf),
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"ppc64%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
(has_fsqrt() ? " fsqrt" : ""),
(has_isel() ? " isel" : ""),
(has_lxarxeh() ? " lxarxeh" : ""),
@ -130,7 +133,8 @@ void VM_Version::initialize() {
(has_mfdscr() ? " mfdscr" : ""),
(has_vsx() ? " vsx" : ""),
(has_ldbrx() ? " ldbrx" : ""),
(has_stdbrx() ? " stdbrx" : "")
(has_stdbrx() ? " stdbrx" : ""),
(has_vshasig() ? " sha" : "")
// Make sure number of %s matches num_features!
);
_features_string = os::strdup(buf);
@ -247,17 +251,49 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseFMA, true);
}
if (UseSHA) {
warning("SHA instructions are not available on this CPU");
if (has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA)) {
UseSHA = true;
}
} else if (UseSHA) {
if (!FLAG_IS_DEFAULT(UseSHA))
warning("SHA instructions are not available on this CPU");
FLAG_SET_DEFAULT(UseSHA, false);
}
if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
warning("SHA intrinsics are not available on this CPU");
if (UseSHA1Intrinsics) {
warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
}
if (UseSHA && has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
}
} else if (UseSHA256Intrinsics) {
warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
}
if (UseSHA && has_vshasig()) {
if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
}
} else if (UseSHA512Intrinsics) {
warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
}
if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
FLAG_SET_DEFAULT(UseSHA, false);
}
if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
UseSquareToLenIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
UseMulAddIntrinsic = true;
}
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
UseMultiplyToLenIntrinsic = true;
}
@ -657,6 +693,7 @@ void VM_Version::determine_features() {
a->lxvd2x(VSR0, R3_ARG1); // code[14] -> vsx
a->ldbrx(R7, R3_ARG1, R4_ARG2); // code[15] -> ldbrx
a->stdbrx(R7, R3_ARG1, R4_ARG2); // code[16] -> stdbrx
a->vshasigmaw(VR0, VR1, 1, 0xF); // code[17] -> vshasig
a->blr();
// Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@ -708,6 +745,7 @@ void VM_Version::determine_features() {
if (code[feature_cntr++]) features |= vsx_m;
if (code[feature_cntr++]) features |= ldbrx_m;
if (code[feature_cntr++]) features |= stdbrx_m;
if (code[feature_cntr++]) features |= vshasig_m;
// Print the detection code.
if (PrintAssembly) {

View File

@ -49,6 +49,7 @@ protected:
vsx,
ldbrx,
stdbrx,
vshasig,
num_features // last entry to count features
};
enum Feature_Flag_Set {
@ -64,6 +65,7 @@ protected:
vand_m = (1 << vand ),
lqarx_m = (1 << lqarx ),
vcipher_m = (1 << vcipher),
vshasig_m = (1 << vshasig),
vpmsumb_m = (1 << vpmsumb),
tcheck_m = (1 << tcheck ),
mfdscr_m = (1 << mfdscr ),
@ -106,6 +108,7 @@ public:
static bool has_vsx() { return (_features & vsx_m) != 0; }
static bool has_ldbrx() { return (_features & ldbrx_m) != 0; }
static bool has_stdbrx() { return (_features & stdbrx_m) != 0; }
static bool has_vshasig() { return (_features & vshasig_m) != 0; }
static bool has_mtfprd() { return has_vpmsumb(); } // alias for P8
// Assembler testing

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -250,7 +250,6 @@ class Address VALUE_OBJ_CLASS_SPEC {
bool is_RSform() { return has_base() && !has_index() && is_disp12(); }
bool is_RSYform() { return has_base() && !has_index() && is_disp20(); }
bool is_RXform() { return has_base() && has_index() && is_disp12(); }
bool is_RXEform() { return has_base() && has_index() && is_disp12(); }
bool is_RXYform() { return has_base() && has_index() && is_disp20(); }
bool uses(Register r) { return _base == r || _index == r; };
@ -1093,7 +1092,201 @@ class Assembler : public AbstractAssembler {
#define TRTT_ZOPC (unsigned int)(0xb9 << 24 | 0x90 << 16)
// Miscellaneous Operations
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
//--- Load (memory) ---
#define VLM_ZOPC (unsigned long)(0xe7L << 40 | 0x36L << 0) // load full vreg range (n * 128 bit)
#define VL_ZOPC (unsigned long)(0xe7L << 40 | 0x06L << 0) // load full vreg (128 bit)
#define VLEB_ZOPC (unsigned long)(0xe7L << 40 | 0x00L << 0) // load vreg element (8 bit)
#define VLEH_ZOPC (unsigned long)(0xe7L << 40 | 0x01L << 0) // load vreg element (16 bit)
#define VLEF_ZOPC (unsigned long)(0xe7L << 40 | 0x03L << 0) // load vreg element (32 bit)
#define VLEG_ZOPC (unsigned long)(0xe7L << 40 | 0x02L << 0) // load vreg element (64 bit)
#define VLREP_ZOPC (unsigned long)(0xe7L << 40 | 0x05L << 0) // load and replicate into all vector elements
#define VLLEZ_ZOPC (unsigned long)(0xe7L << 40 | 0x04L << 0) // load logical element and zero.
// vector register gather
#define VGEF_ZOPC (unsigned long)(0xe7L << 40 | 0x13L << 0) // gather element (32 bit), V1(M3) = [D2(V2(M3),B2)]
#define VGEG_ZOPC (unsigned long)(0xe7L << 40 | 0x12L << 0) // gather element (64 bit), V1(M3) = [D2(V2(M3),B2)]
// vector register scatter
#define VSCEF_ZOPC (unsigned long)(0xe7L << 40 | 0x1bL << 0) // vector scatter element FW
#define VSCEG_ZOPC (unsigned long)(0xe7L << 40 | 0x1aL << 0) // vector scatter element DW
#define VLBB_ZOPC (unsigned long)(0xe7L << 40 | 0x07L << 0) // load vreg to block boundary (load to alignment).
#define VLL_ZOPC (unsigned long)(0xe7L << 40 | 0x37L << 0) // load vreg with length.
//--- Load (register) ---
#define VLR_ZOPC (unsigned long)(0xe7L << 40 | 0x56L << 0) // copy full vreg (128 bit)
#define VLGV_ZOPC (unsigned long)(0xe7L << 40 | 0x21L << 0) // copy vreg element -> GR
#define VLVG_ZOPC (unsigned long)(0xe7L << 40 | 0x22L << 0) // copy GR -> vreg element
#define VLVGP_ZOPC (unsigned long)(0xe7L << 40 | 0x62L << 0) // copy GR2, GR3 (disjoint pair) -> vreg
// vector register pack: cut in half the size the source vector elements
#define VPK_ZOPC (unsigned long)(0xe7L << 40 | 0x94L << 0) // just cut
#define VPKS_ZOPC (unsigned long)(0xe7L << 40 | 0x97L << 0) // saturate as signed values
#define VPKLS_ZOPC (unsigned long)(0xe7L << 40 | 0x95L << 0) // saturate as unsigned values
// vector register unpack: double in size the source vector elements
#define VUPH_ZOPC (unsigned long)(0xe7L << 40 | 0xd7L << 0) // signed, left half of the source vector elements
#define VUPLH_ZOPC (unsigned long)(0xe7L << 40 | 0xd5L << 0) // unsigned, left half of the source vector elements
#define VUPL_ZOPC (unsigned long)(0xe7L << 40 | 0xd6L << 0) // signed, right half of the source vector elements
#define VUPLL_ZOPC (unsigned long)(0xe7L << 40 | 0xd4L << 0) // unsigned, right half of the source vector element
// vector register merge
#define VMRH_ZOPC (unsigned long)(0xe7L << 40 | 0x61L << 0) // register merge high (left half of source registers)
#define VMRL_ZOPC (unsigned long)(0xe7L << 40 | 0x60L << 0) // register merge low (right half of source registers)
// vector register permute
#define VPERM_ZOPC (unsigned long)(0xe7L << 40 | 0x8cL << 0) // vector permute
#define VPDI_ZOPC (unsigned long)(0xe7L << 40 | 0x84L << 0) // vector permute DW immediate
// vector register replicate
#define VREP_ZOPC (unsigned long)(0xe7L << 40 | 0x4dL << 0) // vector replicate
#define VREPI_ZOPC (unsigned long)(0xe7L << 40 | 0x45L << 0) // vector replicate immediate
#define VSEL_ZOPC (unsigned long)(0xe7L << 40 | 0x8dL << 0) // vector select
#define VSEG_ZOPC (unsigned long)(0xe7L << 40 | 0x5fL << 0) // vector sign-extend to DW (rightmost element in each DW).
//--- Load (immediate) ---
#define VLEIB_ZOPC (unsigned long)(0xe7L << 40 | 0x40L << 0) // load vreg element (16 bit imm to 8 bit)
#define VLEIH_ZOPC (unsigned long)(0xe7L << 40 | 0x41L << 0) // load vreg element (16 bit imm to 16 bit)
#define VLEIF_ZOPC (unsigned long)(0xe7L << 40 | 0x43L << 0) // load vreg element (16 bit imm to 32 bit)
#define VLEIG_ZOPC (unsigned long)(0xe7L << 40 | 0x42L << 0) // load vreg element (16 bit imm to 64 bit)
//--- Store ---
#define VSTM_ZOPC (unsigned long)(0xe7L << 40 | 0x3eL << 0) // store full vreg range (n * 128 bit)
#define VST_ZOPC (unsigned long)(0xe7L << 40 | 0x0eL << 0) // store full vreg (128 bit)
#define VSTEB_ZOPC (unsigned long)(0xe7L << 40 | 0x08L << 0) // store vreg element (8 bit)
#define VSTEH_ZOPC (unsigned long)(0xe7L << 40 | 0x09L << 0) // store vreg element (16 bit)
#define VSTEF_ZOPC (unsigned long)(0xe7L << 40 | 0x0bL << 0) // store vreg element (32 bit)
#define VSTEG_ZOPC (unsigned long)(0xe7L << 40 | 0x0aL << 0) // store vreg element (64 bit)
#define VSTL_ZOPC (unsigned long)(0xe7L << 40 | 0x3fL << 0) // store vreg with length.
//--- Misc ---
#define VGM_ZOPC (unsigned long)(0xe7L << 40 | 0x46L << 0) // generate bit mask, [start..end] = '1', else '0'
#define VGBM_ZOPC (unsigned long)(0xe7L << 40 | 0x44L << 0) // generate byte mask, bits(imm16) -> bytes
//---< Vector Arithmetic Instructions >---
// Load
#define VLC_ZOPC (unsigned long)(0xe7L << 40 | 0xdeL << 0) // V1 := -V2, element size = 2**m
#define VLP_ZOPC (unsigned long)(0xe7L << 40 | 0xdfL << 0) // V1 := |V2|, element size = 2**m
// ADD
#define VA_ZOPC (unsigned long)(0xe7L << 40 | 0xf3L << 0) // V1 := V2 + V3, element size = 2**m
#define VACC_ZOPC (unsigned long)(0xe7L << 40 | 0xf1L << 0) // V1 := carry(V2 + V3), element size = 2**m
// SUB
#define VS_ZOPC (unsigned long)(0xe7L << 40 | 0xf7L << 0) // V1 := V2 - V3, element size = 2**m
#define VSCBI_ZOPC (unsigned long)(0xe7L << 40 | 0xf5L << 0) // V1 := borrow(V2 - V3), element size = 2**m
// MUL
#define VML_ZOPC (unsigned long)(0xe7L << 40 | 0xa2L << 0) // V1 := V2 * V3, element size = 2**m
#define VMH_ZOPC (unsigned long)(0xe7L << 40 | 0xa3L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLH_ZOPC (unsigned long)(0xe7L << 40 | 0xa1L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
#define VME_ZOPC (unsigned long)(0xe7L << 40 | 0xa6L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLE_ZOPC (unsigned long)(0xe7L << 40 | 0xa4L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
#define VMO_ZOPC (unsigned long)(0xe7L << 40 | 0xa7L << 0) // V1 := V2 * V3, element size = 2**m
#define VMLO_ZOPC (unsigned long)(0xe7L << 40 | 0xa5L << 0) // V1 := V2 * V3, element size = 2**m, unsigned
// MUL & ADD
#define VMAL_ZOPC (unsigned long)(0xe7L << 40 | 0xaaL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMAH_ZOPC (unsigned long)(0xe7L << 40 | 0xabL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALH_ZOPC (unsigned long)(0xe7L << 40 | 0xa9L << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
#define VMAE_ZOPC (unsigned long)(0xe7L << 40 | 0xaeL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALE_ZOPC (unsigned long)(0xe7L << 40 | 0xacL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
#define VMAO_ZOPC (unsigned long)(0xe7L << 40 | 0xafL << 0) // V1 := V2 * V3 + V4, element size = 2**m
#define VMALO_ZOPC (unsigned long)(0xe7L << 40 | 0xadL << 0) // V1 := V2 * V3 + V4, element size = 2**m, unsigned
// Vector SUM
#define VSUM_ZOPC (unsigned long)(0xe7L << 40 | 0x64L << 0) // V1[j] := toFW(sum(V2[i]) + V3[j]), subelements: byte or HW
#define VSUMG_ZOPC (unsigned long)(0xe7L << 40 | 0x65L << 0) // V1[j] := toDW(sum(V2[i]) + V3[j]), subelements: HW or FW
#define VSUMQ_ZOPC (unsigned long)(0xe7L << 40 | 0x67L << 0) // V1[j] := toQW(sum(V2[i]) + V3[j]), subelements: FW or DW
// Average
#define VAVG_ZOPC (unsigned long)(0xe7L << 40 | 0xf2L << 0) // V1 := (V2+V3+1)/2, signed, element size = 2**m
#define VAVGL_ZOPC (unsigned long)(0xe7L << 40 | 0xf0L << 0) // V1 := (V2+V3+1)/2, unsigned, element size = 2**m
// VECTOR Galois Field Multiply Sum
#define VGFM_ZOPC (unsigned long)(0xe7L << 40 | 0xb4L << 0)
#define VGFMA_ZOPC (unsigned long)(0xe7L << 40 | 0xbcL << 0)
//---< Vector Logical Instructions >---
// AND
#define VN_ZOPC (unsigned long)(0xe7L << 40 | 0x68L << 0) // V1 := V2 & V3, element size = 2**m
#define VNC_ZOPC (unsigned long)(0xe7L << 40 | 0x69L << 0) // V1 := V2 & ~V3, element size = 2**m
// XOR
#define VX_ZOPC (unsigned long)(0xe7L << 40 | 0x6dL << 0) // V1 := V2 ^ V3, element size = 2**m
// NOR
#define VNO_ZOPC (unsigned long)(0xe7L << 40 | 0x6bL << 0) // V1 := !(V2 | V3), element size = 2**m
// OR
#define VO_ZOPC (unsigned long)(0xe7L << 40 | 0x6aL << 0) // V1 := V2 | V3, element size = 2**m
// Comparison (element-wise)
#define VCEQ_ZOPC (unsigned long)(0xe7L << 40 | 0xf8L << 0) // V1 := (V2 == V3) ? 0xffff : 0x0000, element size = 2**m
#define VCH_ZOPC (unsigned long)(0xe7L << 40 | 0xfbL << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, signed
#define VCHL_ZOPC (unsigned long)(0xe7L << 40 | 0xf9L << 0) // V1 := (V2 > V3) ? 0xffff : 0x0000, element size = 2**m, unsigned
// Max/Min (element-wise)
#define VMX_ZOPC (unsigned long)(0xe7L << 40 | 0xffL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, signed
#define VMXL_ZOPC (unsigned long)(0xe7L << 40 | 0xfdL << 0) // V1 := (V2 > V3) ? V2 : V3, element size = 2**m, unsigned
#define VMN_ZOPC (unsigned long)(0xe7L << 40 | 0xfeL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, signed
#define VMNL_ZOPC (unsigned long)(0xe7L << 40 | 0xfcL << 0) // V1 := (V2 < V3) ? V2 : V3, element size = 2**m, unsigned
// Leading/Trailing Zeros, population count
#define VCLZ_ZOPC (unsigned long)(0xe7L << 40 | 0x53L << 0) // V1 := leadingzeros(V2), element size = 2**m
#define VCTZ_ZOPC (unsigned long)(0xe7L << 40 | 0x52L << 0) // V1 := trailingzeros(V2), element size = 2**m
#define VPOPCT_ZOPC (unsigned long)(0xe7L << 40 | 0x50L << 0) // V1 := popcount(V2), bytewise!!
// Rotate/Shift
#define VERLLV_ZOPC (unsigned long)(0xe7L << 40 | 0x73L << 0) // V1 := rotateleft(V2), rotate count in V3 element
#define VERLL_ZOPC (unsigned long)(0xe7L << 40 | 0x33L << 0) // V1 := rotateleft(V3), rotate count from d2(b2).
#define VERIM_ZOPC (unsigned long)(0xe7L << 40 | 0x72L << 0) // Rotate then insert under mask. Read Principles of Operation!!
#define VESLV_ZOPC (unsigned long)(0xe7L << 40 | 0x70L << 0) // V1 := SLL(V2, V3), unsigned, element-wise
#define VESL_ZOPC (unsigned long)(0xe7L << 40 | 0x30L << 0) // V1 := SLL(V3), unsigned, shift count from d2(b2).
#define VESRAV_ZOPC (unsigned long)(0xe7L << 40 | 0x7AL << 0) // V1 := SRA(V2, V3), signed, element-wise
#define VESRA_ZOPC (unsigned long)(0xe7L << 40 | 0x3AL << 0) // V1 := SRA(V3), signed, shift count from d2(b2).
#define VESRLV_ZOPC (unsigned long)(0xe7L << 40 | 0x78L << 0) // V1 := SRL(V2, V3), unsigned, element-wise
#define VESRL_ZOPC (unsigned long)(0xe7L << 40 | 0x38L << 0) // V1 := SRL(V3), unsigned, shift count from d2(b2).
#define VSL_ZOPC (unsigned long)(0xe7L << 40 | 0x74L << 0) // V1 := SLL(V2), unsigned, bit-count
#define VSLB_ZOPC (unsigned long)(0xe7L << 40 | 0x75L << 0) // V1 := SLL(V2), unsigned, byte-count
#define VSLDB_ZOPC (unsigned long)(0xe7L << 40 | 0x77L << 0) // V1 := SLL((V2,V3)), unsigned, byte-count
#define VSRA_ZOPC (unsigned long)(0xe7L << 40 | 0x7eL << 0) // V1 := SRA(V2), signed, bit-count
#define VSRAB_ZOPC (unsigned long)(0xe7L << 40 | 0x7fL << 0) // V1 := SRA(V2), signed, byte-count
#define VSRL_ZOPC (unsigned long)(0xe7L << 40 | 0x7cL << 0) // V1 := SRL(V2), unsigned, bit-count
#define VSRLB_ZOPC (unsigned long)(0xe7L << 40 | 0x7dL << 0) // V1 := SRL(V2), unsigned, byte-count
// Test under Mask
#define VTM_ZOPC (unsigned long)(0xe7L << 40 | 0xd8L << 0) // Like TM, set CC according to state of selected bits.
//---< Vector String Instructions >---
#define VFAE_ZOPC (unsigned long)(0xe7L << 40 | 0x82L << 0) // Find any element
#define VFEE_ZOPC (unsigned long)(0xe7L << 40 | 0x80L << 0) // Find element equal
#define VFENE_ZOPC (unsigned long)(0xe7L << 40 | 0x81L << 0) // Find element not equal
#define VSTRC_ZOPC (unsigned long)(0xe7L << 40 | 0x8aL << 0) // String range compare
#define VISTR_ZOPC (unsigned long)(0xe7L << 40 | 0x5cL << 0) // Isolate String
//--------------------------------
//-- Miscellaneous Operations --
//--------------------------------
// Execute
#define EX_ZOPC (unsigned int)(68L << 24)
@ -1280,6 +1473,29 @@ class Assembler : public AbstractAssembler {
to_minus_infinity = 7
};
// Vector Register Element Type.
enum VRegElemType {
VRET_BYTE = 0,
VRET_HW = 1,
VRET_FW = 2,
VRET_DW = 3,
VRET_QW = 4
};
// Vector Operation Result Control.
// This is a set of flags used in some vector instructions to control
// the result (side) effects of instruction execution.
enum VOpRC {
VOPRC_CCSET = 0b0001, // set the CC.
VOPRC_CCIGN = 0b0000, // ignore, don't set CC.
VOPRC_ZS = 0b0010, // Zero Search. Additional, elementwise, comparison against zero.
VOPRC_NOZS = 0b0000, // No Zero Search.
VOPRC_RTBYTEIX = 0b0100, // generate byte index to lowest element with true comparison.
VOPRC_RTBITVEC = 0b0000, // generate bit vector, all 1s for true, all 0s for false element comparisons.
VOPRC_INVERT = 0b1000, // invert comparison results.
VOPRC_NOINVERT = 0b0000 // use comparison results as is, do not invert.
};
// Inverse condition code, i.e. determine "15 - cc" for a given condition code cc.
static branch_condition inverse_condition(branch_condition cc);
static branch_condition inverse_float_condition(branch_condition cc);
@ -1376,6 +1592,65 @@ class Assembler : public AbstractAssembler {
return r;
}
static int64_t rsmask_48( Address a) { assert(a.is_RSform(), "bad address format"); return rsmask_48( a.disp12(), a.base()); }
static int64_t rxmask_48( Address a) { if (a.is_RXform()) { return rxmask_48( a.disp12(), a.index(), a.base()); }
else if (a.is_RSform()) { return rsmask_48( a.disp12(), a.base()); }
else { guarantee(false, "bad address format"); return 0; }
}
static int64_t rsymask_48(Address a) { assert(a.is_RSYform(), "bad address format"); return rsymask_48(a.disp20(), a.base()); }
static int64_t rxymask_48(Address a) { if (a.is_RXYform()) { return rxymask_48( a.disp20(), a.index(), a.base()); }
else if (a.is_RSYform()) { return rsymask_48( a.disp20(), a.base()); }
else { guarantee(false, "bad address format"); return 0; }
}
static int64_t rsmask_48( int64_t d2, Register b2) { return uimm12(d2, 20, 48) | regz(b2, 16, 48); }
static int64_t rxmask_48( int64_t d2, Register x2, Register b2) { return uimm12(d2, 20, 48) | reg(x2, 12, 48) | regz(b2, 16, 48); }
static int64_t rsymask_48(int64_t d2, Register b2) { return simm20(d2) | regz(b2, 16, 48); }
static int64_t rxymask_48(int64_t d2, Register x2, Register b2) { return simm20(d2) | reg(x2, 12, 48) | regz(b2, 16, 48); }
// Address calculated from d12(vx,b) - vx is vector index register.
static int64_t rvmask_48( int64_t d2, VectorRegister x2, Register b2) { return uimm12(d2, 20, 48) | vreg(x2, 12) | regz(b2, 16, 48); }
static int64_t vreg_mask(VectorRegister v, int pos) {
return vreg(v, pos) | v->RXB_mask(pos);
}
// Vector Element Size Control. 4-bit field which indicates the size of the vector elements.
static int64_t vesc_mask(int64_t size, int min_size, int max_size, int pos) {
// min_size - minimum element size. Not all instructions support element sizes beginning with "byte".
// max_size - maximum element size. Not all instructions support element sizes up to "QW".
assert((min_size <= size) && (size <= max_size), "element size control out of range");
return uimm4(size, pos, 48);
}
// Vector Element IndeX. 4-bit field which indexes the target vector element.
static int64_t veix_mask(int64_t ix, int el_size, int pos) {
// el_size - size of the vector element. This is a VRegElemType enum value.
// ix - vector element index.
int max_ix = -1;
switch (el_size) {
case VRET_BYTE: max_ix = 15; break;
case VRET_HW: max_ix = 7; break;
case VRET_FW: max_ix = 3; break;
case VRET_DW: max_ix = 1; break;
case VRET_QW: max_ix = 0; break;
default: guarantee(false, "bad vector element size %d", el_size); break;
}
assert((0 <= ix) && (ix <= max_ix), "element size out of range (0 <= %ld <= %d)", ix, max_ix);
return uimm4(ix, pos, 48);
}
// Vector Operation Result Control. 4-bit field.
static int64_t voprc_any(int64_t flags, int pos, int64_t allowed_flags = 0b1111) {
assert((flags & allowed_flags) == flags, "Invalid VOPRC_* flag combination: %d", (int)flags);
return uimm4(flags, pos, 48);
}
// Vector Operation Result Control. Condition code setting.
static int64_t voprc_ccmask(int64_t flags, int pos) {
return voprc_any(flags, pos, VOPRC_CCIGN | VOPRC_CCSET);
}
public:
//--------------------------------------------------
@ -1453,6 +1728,8 @@ class Assembler : public AbstractAssembler {
static long imm24(int64_t i24, int s, int len) { return imm(i24, 24) << (len-s-24); }
static long imm32(int64_t i32, int s, int len) { return imm(i32, 32) << (len-s-32); }
static long vreg(VectorRegister v, int pos) { const int len = 48; return u_field(v->encoding()&0x0f, (len-pos)-1, (len-pos)-4) | v->RXB_mask(pos); }
static long fregt(FloatRegister r, int s, int len) { return freg(r,s,len); }
static long freg( FloatRegister r, int s, int len) { return u_field(r->encoding(), (len-s)-1, (len-s)-4); }
@ -2125,6 +2402,422 @@ class Assembler : public AbstractAssembler {
inline void z_trtt(Register r1, Register r2, int64_t m3);
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
// Load (transfer from memory)
inline void z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
// Gather/Scatter
inline void z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
inline void z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3);
// load and replicate
inline void z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vlrepb(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlreph(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlrepf(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlrepg(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vllezb(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezh(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezf(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vllezg(VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2);
// Load (register to register)
inline void z_vlr( VectorRegister v1, VectorRegister v2);
inline void z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4);
inline void z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2);
inline void z_vlvgp( VectorRegister v1, Register r2, Register r3);
// vector register pack
inline void z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpksgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3);
// vector register unpack (sign-extended)
inline void z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuphb( VectorRegister v1, VectorRegister v2);
inline void z_vuphh( VectorRegister v1, VectorRegister v2);
inline void z_vuphf( VectorRegister v1, VectorRegister v2);
inline void z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuplb( VectorRegister v1, VectorRegister v2);
inline void z_vuplh( VectorRegister v1, VectorRegister v2);
inline void z_vuplf( VectorRegister v1, VectorRegister v2);
// vector register unpack (zero-extended)
inline void z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vuplhb( VectorRegister v1, VectorRegister v2);
inline void z_vuplhh( VectorRegister v1, VectorRegister v2);
inline void z_vuplhf( VectorRegister v1, VectorRegister v2);
inline void z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vupllb( VectorRegister v1, VectorRegister v2);
inline void z_vupllh( VectorRegister v1, VectorRegister v2);
inline void z_vupllf( VectorRegister v1, VectorRegister v2);
// vector register merge high/low
inline void z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmrhb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrhg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmrlb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmrlg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
// vector register permute
inline void z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
// vector register replicate
inline void z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4);
inline void z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2);
inline void z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vrepib(VectorRegister v1, int64_t imm2);
inline void z_vrepih(VectorRegister v1, int64_t imm2);
inline void z_vrepif(VectorRegister v1, int64_t imm2);
inline void z_vrepig(VectorRegister v1, int64_t imm2);
inline void z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vseg( VectorRegister v1, VectorRegister v2, int64_t imm3);
// Load (immediate)
inline void z_vleib( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleih( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleif( VectorRegister v1, int64_t imm2, int64_t m3);
inline void z_vleig( VectorRegister v1, int64_t imm2, int64_t m3);
// Store
inline void z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2);
inline void z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3);
inline void z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2);
// Misc
inline void z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4);
inline void z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3);
inline void z_vgbm( VectorRegister v1, int64_t imm2);
inline void z_vzero( VectorRegister v1); // preferred method to set vreg to all zeroes
inline void z_vone( VectorRegister v1); // preferred method to set vreg to all ones
//---< Vector Arithmetic Instructions >---
// Load
inline void z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vlcb( VectorRegister v1, VectorRegister v2);
inline void z_vlch( VectorRegister v1, VectorRegister v2);
inline void z_vlcf( VectorRegister v1, VectorRegister v2);
inline void z_vlcg( VectorRegister v1, VectorRegister v2);
inline void z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vlpb( VectorRegister v1, VectorRegister v2);
inline void z_vlph( VectorRegister v1, VectorRegister v2);
inline void z_vlpf( VectorRegister v1, VectorRegister v2);
inline void z_vlpg( VectorRegister v1, VectorRegister v2);
// ADD
inline void z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// SUB
inline void z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// MULTIPLY
inline void z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
// MULTIPLY & ADD
inline void z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
// VECTOR SUM
inline void z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Average
inline void z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// VECTOR Galois Field Multiply Sum
inline void z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// VECTOR Galois Field Multiply Sum and Accumulate
inline void z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5);
inline void z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
inline void z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4);
//---< Vector Logical Instructions >---
// AND
inline void z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// XOR
inline void z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// NOR
inline void z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// OR
inline void z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Comparison (element-wise)
inline void z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5);
inline void z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Max/Min (element-wise)
inline void z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Leading/Trailing Zeros, population count
inline void z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vclzb( VectorRegister v1, VectorRegister v2);
inline void z_vclzh( VectorRegister v1, VectorRegister v2);
inline void z_vclzf( VectorRegister v1, VectorRegister v2);
inline void z_vclzg( VectorRegister v1, VectorRegister v2);
inline void z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3);
inline void z_vctzb( VectorRegister v1, VectorRegister v2);
inline void z_vctzh( VectorRegister v1, VectorRegister v2);
inline void z_vctzf( VectorRegister v1, VectorRegister v2);
inline void z_vctzg( VectorRegister v1, VectorRegister v2);
inline void z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3);
// Rotate/Shift
inline void z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5);
inline void z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4);
inline void z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4);
inline void z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2);
inline void z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4);
inline void z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3);
inline void z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3);
// Test under Mask
inline void z_vtm( VectorRegister v1, VectorRegister v2);
//---< Vector String Instructions >---
inline void z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find any element
inline void z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element equal
inline void z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5); // Find element not equal
inline void z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5);
inline void z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6); // String range compare
inline void z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6);
inline void z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5); // Isolate String
inline void z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5);
inline void z_vistrbs(VectorRegister v1, VectorRegister v2);
inline void z_vistrhs(VectorRegister v1, VectorRegister v2);
inline void z_vistrfs(VectorRegister v1, VectorRegister v2);
// Floatingpoint instructions
// ==========================

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -702,6 +702,421 @@ inline void Assembler::z_cvd(Register r1, int64_t d2, Register x2, Register b2)
inline void Assembler::z_cvdg(Register r1, int64_t d2, Register x2, Register b2) { emit_48( CVDG_ZOPC | regt(r1, 8, 48) | reg(x2, 12, 48) | reg(b2, 16, 48) | simm20(d2)); }
//---------------------------
//-- Vector Instructions --
//---------------------------
//---< Vector Support Instructions >---
// Load (transfer from memory)
inline void Assembler::z_vlm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VLM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
inline void Assembler::z_vl( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VL_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
inline void Assembler::z_vleb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vleh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vlef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vleg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
// Gather/Scatter
inline void Assembler::z_vgef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vgeg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VGEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
inline void Assembler::z_vscef( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEF_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vsceg( VectorRegister v1, int64_t d2, VectorRegister vx2, Register b2, int64_t m3) {emit_48(VSCEG_ZOPC | vreg(v1, 8) | rvmask_48(d2, vx2, b2) | veix_mask(m3, VRET_DW, 32)); }
// load and replicate
inline void Assembler::z_vlrep( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLREP_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlrepb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_BYTE); }// load byte and replicate to all vector elements of type 'B'
inline void Assembler::z_vlreph( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_HW); } // load HW and replicate to all vector elements of type 'H'
inline void Assembler::z_vlrepf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_FW); } // load FW and replicate to all vector elements of type 'F'
inline void Assembler::z_vlrepg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vlrep(v1, d2, x2, b2, VRET_DW); } // load DW and replicate to all vector elements of type 'G'
inline void Assembler::z_vllez( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLLEZ_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vllezb( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_BYTE); }// load logical byte into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezh( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_HW); } // load logical HW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezf( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_FW); } // load logical FW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vllezg( VectorRegister v1, int64_t d2, Register x2, Register b2) {z_vllez(v1, d2, x2, b2, VRET_DW); } // load logical DW into left DW of VR, zero all other bit positions.
inline void Assembler::z_vlbb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VLBB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | uimm4(m3, 32, 48)); }
inline void Assembler::z_vll( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VLL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
// Load (register to register)
inline void Assembler::z_vlr ( VectorRegister v1, VectorRegister v2) {emit_48(VLR_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
inline void Assembler::z_vlgv( Register r1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VLGV_ZOPC | reg(r1, 8, 48) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlgvb( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_BYTE); } // load byte from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvh( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_HW); } // load HW from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvf( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_FW); } // load FW from VR element (index d2(b2)) into GR (logical)
inline void Assembler::z_vlgvg( Register r1, VectorRegister v3, int64_t d2, Register b2) {z_vlgv(r1, v3, d2, b2, VRET_DW); } // load DW from VR element (index d2(b2)) into GR.
inline void Assembler::z_vlvg( VectorRegister v1, Register r3, int64_t d2, Register b2, int64_t m4) {emit_48(VLVG_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlvgb( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_BYTE); }
inline void Assembler::z_vlvgh( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_HW); }
inline void Assembler::z_vlvgf( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_FW); }
inline void Assembler::z_vlvgg( VectorRegister v1, Register r3, int64_t d2, Register b2) {z_vlvg(v1, r3, d2, b2, VRET_DW); }
inline void Assembler::z_vlvgp( VectorRegister v1, Register r2, Register r3) {emit_48(VLVGP_ZOPC | vreg(v1, 8) | reg(r2, 12, 48) | reg(r3, 16, 48)); }
// vector register pack
inline void Assembler::z_vpk( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPK_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32)); }
inline void Assembler::z_vpkh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vpkf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vpkg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpk(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vpks( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vpksh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vpksf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vpksg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vpkshs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
inline void Assembler::z_vpksfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
inline void Assembler::z_vpksgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpks(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
inline void Assembler::z_vpkls( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VPKLS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vpklsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vpklsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vpklsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vpklshs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', set CC
inline void Assembler::z_vpklsfs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', set CC
inline void Assembler::z_vpklsgs(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vpkls(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', set CC
// vector register unpack (sign-extended)
inline void Assembler::z_vuph( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuphb( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuphh( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuphf( VectorRegister v1, VectorRegister v2) {z_vuph(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vupl( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuplb( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuplf( VectorRegister v1, VectorRegister v2) {z_vupl(v1, v2, VRET_FW); } // vector element type 'F'
// vector register unpack (zero-extended)
inline void Assembler::z_vuplh( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vuplhb( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vuplhh( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vuplhf( VectorRegister v1, VectorRegister v2) {z_vuplh(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vupll( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VUPLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vupllb( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vupllh( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vupllf( VectorRegister v1, VectorRegister v2) {z_vupll(v1, v2, VRET_FW); } // vector element type 'F'
// vector register merge high/low
inline void Assembler::z_vmrh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmrhb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmrhh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmrhf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmrhg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmrl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmrlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmrlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmrlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmrh(v1, v2, v3, VRET_DW); } // vector element type 'G'
// vector register permute
inline void Assembler::z_vperm( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VPERM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
inline void Assembler::z_vpdi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VPDI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm4(m4, 32, 48)); }
// vector register replicate
inline void Assembler::z_vrep( VectorRegister v1, VectorRegister v3, int64_t imm2, int64_t m4) {emit_48(VREP_ZOPC | vreg(v1, 8) | vreg(v3, 12) | simm16(imm2, 16, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vrepb( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vreph( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vrepf( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vrepg( VectorRegister v1, VectorRegister v3, int64_t imm2) {z_vrep(v1, v3, imm2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vrepi( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VREPI_ZOPC | vreg(v1, 8) | simm16(imm2, 16, 48) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vrepib( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vrepih( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_HW); } // vector element type 'B'
inline void Assembler::z_vrepif( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_FW); } // vector element type 'B'
inline void Assembler::z_vrepig( VectorRegister v1, int64_t imm2) {z_vrepi(v1, imm2, VRET_DW); } // vector element type 'B'
inline void Assembler::z_vsel( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {emit_48(VSEL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32)); }
inline void Assembler::z_vseg( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VSEG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | uimm4(m3, 32, 48)); }
// Load (immediate)
inline void Assembler::z_vleib( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIB_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vleih( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIH_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vleif( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIF_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vleig( VectorRegister v1, int64_t imm2, int64_t m3) {emit_48(VLEIG_ZOPC | vreg(v1, 8) | simm16(imm2, 32, 48) | veix_mask(m3, VRET_DW, 32)); }
// Store
inline void Assembler::z_vstm( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {emit_48(VSTM_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2)); }
inline void Assembler::z_vst( VectorRegister v1, int64_t d2, Register x2, Register b2) {emit_48(VST_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2)); }
inline void Assembler::z_vsteb( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEB_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_BYTE, 32)); }
inline void Assembler::z_vsteh( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEH_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_HW, 32)); }
inline void Assembler::z_vstef( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEF_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_FW, 32)); }
inline void Assembler::z_vsteg( VectorRegister v1, int64_t d2, Register x2, Register b2, int64_t m3) {emit_48(VSTEG_ZOPC | vreg(v1, 8) | rxmask_48(d2, x2, b2) | veix_mask(m3, VRET_DW, 32)); }
inline void Assembler::z_vstl( VectorRegister v1, Register r3, int64_t d2, Register b2) {emit_48(VSTL_ZOPC | vreg(v1, 8) | reg(r3, 12, 48) | rsmask_48(d2, b2)); }
// Misc
inline void Assembler::z_vgm( VectorRegister v1, int64_t imm2, int64_t imm3, int64_t m4) {emit_48(VGM_ZOPC | vreg(v1, 8) | uimm8( imm2, 16, 48) | uimm8(imm3, 24, 48) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vgmb( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgmh( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgmf( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgmg( VectorRegister v1, int64_t imm2, int64_t imm3) {z_vgm(v1, imm2, imm3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vgbm( VectorRegister v1, int64_t imm2) {emit_48(VGBM_ZOPC | vreg(v1, 8) | uimm16(imm2, 16, 48)); }
inline void Assembler::z_vzero( VectorRegister v1) {z_vgbm(v1, 0); } // preferred method to set vreg to all zeroes
inline void Assembler::z_vone( VectorRegister v1) {z_vgbm(v1, 0xffff); } // preferred method to set vreg to all ones
//---< Vector Arithmetic Instructions >---
// Load
inline void Assembler::z_vlc( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlcb( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vlch( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vlcf( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vlcg( VectorRegister v1, VectorRegister v2) {z_vlc(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vlp( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VLP_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vlpb( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vlph( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vlpf( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vlpg( VectorRegister v1, VectorRegister v2) {z_vlp(v1, v2, VRET_DW); } // vector element type 'G'
// ADD
inline void Assembler::z_va( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vah( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vaf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vag( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vaq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_va(v1, v2, v3, VRET_QW); } // vector element type 'Q'
inline void Assembler::z_vacc( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VACC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vaccb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vacch( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vaccf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vaccg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vaccq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vacc(v1, v2, v3, VRET_QW); } // vector element type 'Q'
// SUB
inline void Assembler::z_vs( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VS_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vsb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vsh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vsf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vsg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vsq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vs(v1, v2, v3, VRET_QW); } // vector element type 'Q'
inline void Assembler::z_vscbi( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSCBI_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_QW, 32)); }
inline void Assembler::z_vscbib( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vscbih( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vscbif( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vscbig( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vscbiq( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vscbi(v1, v2, v3, VRET_QW); } // vector element type 'Q'
// MULTIPLY
inline void Assembler::z_vml( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VML_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmlh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vme( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VME_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmle( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
inline void Assembler::z_vmlo( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMLO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_FW, 32)); }
// MULTIPLY & ADD
inline void Assembler::z_vmal( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmalh( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmae( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmale( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmao( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMAO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
inline void Assembler::z_vmalo( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VMALO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(m5, VRET_BYTE, VRET_FW, 20)); }
// VECTOR SUM
inline void Assembler::z_vsum( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_HW, 32)); }
inline void Assembler::z_vsumb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vsumh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsum(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vsumg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_HW, VRET_FW, 32)); }
inline void Assembler::z_vsumgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_HW); } // vector element type 'B'
inline void Assembler::z_vsumgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumg(v1, v2, v3, VRET_FW); } // vector element type 'H'
inline void Assembler::z_vsumq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VSUMQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_FW, VRET_DW, 32)); }
inline void Assembler::z_vsumqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_FW); } // vector element type 'B'
inline void Assembler::z_vsumqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vsumq(v1, v2, v3, VRET_DW); } // vector element type 'H'
// Average
inline void Assembler::z_vavg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVG_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vavgb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vavgh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vavgf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vavgg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavg(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vavgl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VAVGL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vavglb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vavglh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vavglf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vavglg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vavgl(v1, v2, v3, VRET_DW); } // vector element type 'G'
// VECTOR Galois Field Multiply Sum
inline void Assembler::z_vgfm( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VGFM_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vgfmb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgfmh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgfmf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgfmg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vgfm(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vgfma( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t m5) {emit_48(VGFMA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v3, 16) | vesc_mask(m5, VRET_BYTE, VRET_DW, 20)); }
inline void Assembler::z_vgfmab( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vgfmah( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vgfmaf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vgfmag( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4) {z_vgfma(v1, v2, v3, v4, VRET_DW); } // vector element type 'G'
//---< Vector Logical Instructions >---
// AND
inline void Assembler::z_vn( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vnc( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// XOR
inline void Assembler::z_vx( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// NOR
inline void Assembler::z_vno( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VNO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// OR
inline void Assembler::z_vo( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VO_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// Comparison (element-wise)
inline void Assembler::z_vceq( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCEQ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vceqb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vceqh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vceqf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vceqg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vceqbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vceqhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vceqfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vceqgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vceq(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
inline void Assembler::z_vch( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCH_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vchb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vchh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vchf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vchg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vchbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vchhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vchfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vchgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vch(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
inline void Assembler::z_vchl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4, int64_t cc5) {emit_48(VCHL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32) | voprc_ccmask(cc5, 24)); }
inline void Assembler::z_vchlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCIGN); } // vector element type 'B', don't set CC
inline void Assembler::z_vchlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCIGN); } // vector element type 'H', don't set CC
inline void Assembler::z_vchlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCIGN); } // vector element type 'F', don't set CC
inline void Assembler::z_vchlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCIGN); } // vector element type 'G', don't set CC
inline void Assembler::z_vchlbs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_BYTE, VOPRC_CCSET); } // vector element type 'B', don't set CC
inline void Assembler::z_vchlhs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_HW, VOPRC_CCSET); } // vector element type 'H', don't set CC
inline void Assembler::z_vchlfs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_FW, VOPRC_CCSET); } // vector element type 'F', don't set CC
inline void Assembler::z_vchlgs( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vchl(v1, v2, v3, VRET_DW, VOPRC_CCSET); } // vector element type 'G', don't set CC
// Max/Min (element-wise)
inline void Assembler::z_vmx( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMX_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmxb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmxh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmxf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmxg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmx(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmxl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMXL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmxlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmxlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmxlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmxlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmxl(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmn( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMN_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmnb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmnh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmnf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmng( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmn(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vmnl( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VMNL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vmnlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vmnlh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vmnlf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vmnlg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vmnl(v1, v2, v3, VRET_DW); } // vector element type 'G'
// Leading/Trailing Zeros, population count
inline void Assembler::z_vclz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCLZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vclzb( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vclzh( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vclzf( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vclzg( VectorRegister v1, VectorRegister v2) {z_vclz(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vctz( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VCTZ_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vctzb( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vctzh( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vctzf( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vctzg( VectorRegister v1, VectorRegister v2) {z_vctz(v1, v2, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vpopct( VectorRegister v1, VectorRegister v2, int64_t m3) {emit_48(VPOPCT_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vesc_mask(m3, VRET_BYTE, VRET_DW, 32)); }
// Rotate/Shift
inline void Assembler::z_verllv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VERLLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verllvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_verllvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_verllvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_verllvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_verllv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_verll( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verllb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_verllh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_verllf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_verllg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_verll(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_verim( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t m5) {emit_48(VERLL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48) | vesc_mask(m5, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_verimb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_verimh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_HW); } // vector element type 'H'
inline void Assembler::z_verimf( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_FW); } // vector element type 'F'
inline void Assembler::z_verimg( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {z_verim(v1, v2, v3, imm4, VRET_DW); } // vector element type 'G'
inline void Assembler::z_veslv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESLV_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_veslvb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_veslvh( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_veslvf( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_veslvg( VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_veslv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_veslb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_BYTE);} // vector element type 'B'
inline void Assembler::z_veslh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_veslf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_veslg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vesrav( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRAV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesravb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vesravh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vesravf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vesravg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrav(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesra( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRA_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrab( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_vesrah( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_vesraf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_vesrag( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesra(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vesrlv( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t m4) {emit_48(VESRLV_ZOPC| vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrlvb(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_BYTE); } // vector element type 'B'
inline void Assembler::z_vesrlvh(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_HW); } // vector element type 'H'
inline void Assembler::z_vesrlvf(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_FW); } // vector element type 'F'
inline void Assembler::z_vesrlvg(VectorRegister v1, VectorRegister v2, VectorRegister v3) {z_vesrlv(v1, v2, v3, VRET_DW); } // vector element type 'G'
inline void Assembler::z_vesrl( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2, int64_t m4) {emit_48(VESRL_ZOPC | vreg(v1, 8) | vreg(v3, 12) | rsmask_48(d2, b2) | vesc_mask(m4, VRET_BYTE, VRET_DW, 32)); }
inline void Assembler::z_vesrlb( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_BYTE);}// vector element type 'B'
inline void Assembler::z_vesrlh( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_HW);} // vector element type 'H'
inline void Assembler::z_vesrlf( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_FW);} // vector element type 'F'
inline void Assembler::z_vesrlg( VectorRegister v1, VectorRegister v3, int64_t d2, Register b2) {z_vesrl(v1, v3, d2, b2, VRET_DW);} // vector element type 'G'
inline void Assembler::z_vsl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vslb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsldb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4) {emit_48(VSLDB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | uimm8(imm4, 24, 48)); }
inline void Assembler::z_vsra( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRA_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrab( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRAB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrl( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRL_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
inline void Assembler::z_vsrlb( VectorRegister v1, VectorRegister v2, VectorRegister v3) {emit_48(VSRLB_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16)); }
// Test under Mask
inline void Assembler::z_vtm( VectorRegister v1, VectorRegister v2) {emit_48(VTM_ZOPC | vreg(v1, 8) | vreg(v2, 12)); }
//---< Vector String Instructions >---
inline void Assembler::z_vfae( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFAE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find any element
inline void Assembler::z_vfaeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfaeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfaef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfae(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vfee( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFEE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element equal
inline void Assembler::z_vfeeb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfeeh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfeef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfee(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vfene( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t imm4, int64_t cc5) {emit_48(VFENE_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vesc_mask(imm4, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // Find element not equal
inline void Assembler::z_vfeneb( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_BYTE, cc5); }
inline void Assembler::z_vfeneh( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_HW, cc5); }
inline void Assembler::z_vfenef( VectorRegister v1, VectorRegister v2, VectorRegister v3, int64_t cc5) {z_vfene(v1, v2, v3, VRET_FW, cc5); }
inline void Assembler::z_vstrc( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t imm5, int64_t cc6) {emit_48(VSTRC_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vreg(v3, 16) | vreg(v4, 32) | vesc_mask(imm5, VRET_BYTE, VRET_FW, 20) | voprc_any(cc6, 24) ); } // String range compare
inline void Assembler::z_vstrcb( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_BYTE, cc6); }
inline void Assembler::z_vstrch( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_HW, cc6); }
inline void Assembler::z_vstrcf( VectorRegister v1, VectorRegister v2, VectorRegister v3, VectorRegister v4, int64_t cc6) {z_vstrc(v1, v2, v3, v4, VRET_FW, cc6); }
inline void Assembler::z_vistr( VectorRegister v1, VectorRegister v2, int64_t imm3, int64_t cc5) {emit_48(VISTR_ZOPC | vreg(v1, 8) | vreg(v2, 12) | vesc_mask(imm3, VRET_BYTE, VRET_FW, 32) | voprc_any(cc5, 24) ); } // isolate string
inline void Assembler::z_vistrb( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_BYTE, cc5); }
inline void Assembler::z_vistrh( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_HW, cc5); }
inline void Assembler::z_vistrf( VectorRegister v1, VectorRegister v2, int64_t cc5) {z_vistr(v1, v2, VRET_FW, cc5); }
inline void Assembler::z_vistrbs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_BYTE, VOPRC_CCSET); }
inline void Assembler::z_vistrhs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_HW, VOPRC_CCSET); }
inline void Assembler::z_vistrfs(VectorRegister v1, VectorRegister v2) {z_vistr(v1, v2, VRET_FW, VOPRC_CCSET); }
//-------------------------------
// FLOAT INSTRUCTIONS
//-------------------------------

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,7 +34,7 @@
// Sorted according to sparc.
// z/Architecture remembers branch targets, so don't share vtables.
define_pd_global(bool, ShareVtableStubs, false);
define_pd_global(bool, ShareVtableStubs, true);
define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this.
define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks.

View File

@ -4671,6 +4671,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
mem2reg_opt(mirror, Address(mirror, ConstMethod::constants_offset()));
mem2reg_opt(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
mem2reg_opt(mirror, Address(mirror, Klass::java_mirror_offset()));
resolve_oop_handle(mirror);
}
//---------------------------------------------------------------

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -35,3 +35,5 @@
REGISTER_DEFINITION(Register, noreg);
REGISTER_DEFINITION(FloatRegister, fnoreg);
REGISTER_DEFINITION(VectorRegister, vnoreg);

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -46,3 +46,13 @@ const char* FloatRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "fnoreg";
}
const char* VectorRegisterImpl::name() const {
const char* names[number_of_registers] = {
"Z_V0", "Z_V1", "Z_V2", "Z_V3", "Z_V4", "Z_V5", "Z_V6", "Z_V7",
"Z_V8", "Z_V9", "Z_V10", "Z_V11", "Z_V12", "Z_V13", "Z_V14", "Z_V15",
"Z_V16", "Z_V17", "Z_V18", "Z_V19", "Z_V20", "Z_V21", "Z_V22", "Z_V23",
"Z_V24", "Z_V25", "Z_V26", "Z_V27", "Z_V28", "Z_V29", "Z_V30", "Z_V31"
};
return is_valid() ? names[encoding()] : "fnoreg";
}

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -34,11 +34,6 @@ class VMRegImpl;
typedef VMRegImpl* VMReg;
// Use Register as shortcut.
class RegisterImpl;
typedef RegisterImpl* Register;
// The implementation of integer registers for z/Architecture.
// z/Architecture registers, see "LINUX for zSeries ELF ABI Supplement", IBM March 2001
//
@ -57,6 +52,17 @@ typedef RegisterImpl* Register;
// f1,f3,f5,f7 General purpose (volatile)
// f8-f15 General purpose (nonvolatile)
//===========================
//=== Integer Registers ===
//===========================
// Use Register as shortcut.
class RegisterImpl;
typedef RegisterImpl* Register;
// The implementation of integer registers for z/Architecture.
inline Register as_Register(int encoding) {
return (Register)(long)encoding;
}
@ -110,6 +116,11 @@ CONSTANT_REGISTER_DECLARATION(Register, Z_R13, (13));
CONSTANT_REGISTER_DECLARATION(Register, Z_R14, (14));
CONSTANT_REGISTER_DECLARATION(Register, Z_R15, (15));
//=============================
//=== Condition Registers ===
//=============================
// Use ConditionRegister as shortcut
class ConditionRegisterImpl;
typedef ConditionRegisterImpl* ConditionRegister;
@ -159,7 +170,7 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
// dangers of defines.
// If a particular file has a problem with these defines then it's possible
// to turn them off in that file by defining
// DONT_USE_REGISTER_DEFINES. Register_definition_s390.cpp does that
// DONT_USE_REGISTER_DEFINES. Register_definitions_s390.cpp does that
// so that it's able to provide real definitions of these registers
// for use in debuggers and such.
@ -186,6 +197,11 @@ CONSTANT_REGISTER_DECLARATION(ConditionRegister, Z_CR, (0));
#define Z_CR ((ConditionRegister)(Z_CR_ConditionRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
//=========================
//=== Float Registers ===
//=========================
// Use FloatRegister as shortcut
class FloatRegisterImpl;
typedef FloatRegisterImpl* FloatRegister;
@ -263,22 +279,6 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, Z_F15, (15));
#define Z_F15 ((FloatRegister)( Z_F15_FloatRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers =
(RegisterImpl::number_of_registers +
FloatRegisterImpl::number_of_registers)
* 2 // register halves
+ 1 // condition code register
};
static const int max_gpr;
static const int max_fpr;
};
// Single, Double and Quad fp reg classes. These exist to map the ADLC
// encoding for a floating point register, to the FloatRegister number
// desired by the macroassembler. A FloatRegister is a number between
@ -329,6 +329,161 @@ class QuadFloatRegisterImpl {
};
//==========================
//=== Vector Registers ===
//==========================
// Use VectorRegister as shortcut
class VectorRegisterImpl;
typedef VectorRegisterImpl* VectorRegister;
// The implementation of vector registers for z/Architecture.
inline VectorRegister as_VectorRegister(int encoding) {
return (VectorRegister)(long)encoding;
}
class VectorRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 32,
number_of_arg_registers = 0
};
// construction
inline friend VectorRegister as_VectorRegister(int encoding);
inline VMReg as_VMReg();
// accessors
int encoding() const {
assert(is_valid(), "invalid register"); return value();
}
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_volatile() const { return true; }
bool is_nonvolatile() const { return false; }
// Register fields in z/Architecture instructions are 4 bits wide, restricting the
// addressable register set size to 16.
// The vector register set size is 32, requiring an extension, by one bit, of the
// register encoding. This is accomplished by the introduction of a RXB field in the
// instruction. RXB = Register eXtension Bits.
// The RXB field contains the MSBs (most significant bit) of the vector register numbers
// used for this instruction. Assignment of MSB in RBX is by bit position of the
// register field in the instruction.
// Example:
// The register field starting at bit position 12 in the instruction is assigned RXB bit 0b0100.
int64_t RXB_mask(int pos) {
if (encoding() >= number_of_registers/2) {
switch (pos) {
case 8: return ((int64_t)0b1000) << 8; // actual bit pos: 36
case 12: return ((int64_t)0b0100) << 8; // actual bit pos: 37
case 16: return ((int64_t)0b0010) << 8; // actual bit pos: 38
case 32: return ((int64_t)0b0001) << 8; // actual bit pos: 39
default:
ShouldNotReachHere();
}
}
return 0;
}
const char* name() const;
VectorRegister successor() const { return as_VectorRegister(encoding() + 1); }
};
// The Vector registers of z/Architecture.
CONSTANT_REGISTER_DECLARATION(VectorRegister, vnoreg, (-1));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V0, (0));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V1, (1));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V2, (2));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V3, (3));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V4, (4));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V5, (5));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V6, (6));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V7, (7));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V8, (8));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V9, (9));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V10, (10));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V11, (11));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V12, (12));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V13, (13));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V14, (14));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V15, (15));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V16, (16));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V17, (17));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V18, (18));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V19, (19));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V20, (20));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V21, (21));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V22, (22));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V23, (23));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V24, (24));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V25, (25));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V26, (26));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V27, (27));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V28, (28));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V29, (29));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V30, (30));
CONSTANT_REGISTER_DECLARATION(VectorRegister, Z_V31, (31));
#ifndef DONT_USE_REGISTER_DEFINES
#define vnoreg ((VectorRegister)(vnoreg_VectorRegisterEnumValue))
#define Z_V0 ((VectorRegister)( Z_V0_VectorRegisterEnumValue))
#define Z_V1 ((VectorRegister)( Z_V1_VectorRegisterEnumValue))
#define Z_V2 ((VectorRegister)( Z_V2_VectorRegisterEnumValue))
#define Z_V3 ((VectorRegister)( Z_V3_VectorRegisterEnumValue))
#define Z_V4 ((VectorRegister)( Z_V4_VectorRegisterEnumValue))
#define Z_V5 ((VectorRegister)( Z_V5_VectorRegisterEnumValue))
#define Z_V6 ((VectorRegister)( Z_V6_VectorRegisterEnumValue))
#define Z_V7 ((VectorRegister)( Z_V7_VectorRegisterEnumValue))
#define Z_V8 ((VectorRegister)( Z_V8_VectorRegisterEnumValue))
#define Z_V9 ((VectorRegister)( Z_V9_VectorRegisterEnumValue))
#define Z_V10 ((VectorRegister)( Z_V10_VectorRegisterEnumValue))
#define Z_V11 ((VectorRegister)( Z_V11_VectorRegisterEnumValue))
#define Z_V12 ((VectorRegister)( Z_V12_VectorRegisterEnumValue))
#define Z_V13 ((VectorRegister)( Z_V13_VectorRegisterEnumValue))
#define Z_V14 ((VectorRegister)( Z_V14_VectorRegisterEnumValue))
#define Z_V15 ((VectorRegister)( Z_V15_VectorRegisterEnumValue))
#define Z_V16 ((VectorRegister)( Z_V16_VectorRegisterEnumValue))
#define Z_V17 ((VectorRegister)( Z_V17_VectorRegisterEnumValue))
#define Z_V18 ((VectorRegister)( Z_V18_VectorRegisterEnumValue))
#define Z_V19 ((VectorRegister)( Z_V19_VectorRegisterEnumValue))
#define Z_V20 ((VectorRegister)( Z_V20_VectorRegisterEnumValue))
#define Z_V21 ((VectorRegister)( Z_V21_VectorRegisterEnumValue))
#define Z_V22 ((VectorRegister)( Z_V22_VectorRegisterEnumValue))
#define Z_V23 ((VectorRegister)( Z_V23_VectorRegisterEnumValue))
#define Z_V24 ((VectorRegister)( Z_V24_VectorRegisterEnumValue))
#define Z_V25 ((VectorRegister)( Z_V25_VectorRegisterEnumValue))
#define Z_V26 ((VectorRegister)( Z_V26_VectorRegisterEnumValue))
#define Z_V27 ((VectorRegister)( Z_V27_VectorRegisterEnumValue))
#define Z_V28 ((VectorRegister)( Z_V28_VectorRegisterEnumValue))
#define Z_V29 ((VectorRegister)( Z_V29_VectorRegisterEnumValue))
#define Z_V30 ((VectorRegister)( Z_V30_VectorRegisterEnumValue))
#define Z_V31 ((VectorRegister)( Z_V31_VectorRegisterEnumValue))
#endif // DONT_USE_REGISTER_DEFINES
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
public:
enum {
number_of_registers =
(RegisterImpl::number_of_registers +
FloatRegisterImpl::number_of_registers)
* 2 // register halves
+ 1 // condition code register
};
static const int max_gpr;
static const int max_fpr;
};
// Common register declarations used in assembler code.
REGISTER_DECLARATION(Register, Z_EXC_OOP, Z_R2);
REGISTER_DECLARATION(Register, Z_EXC_PC, Z_R3);

View File

@ -2382,6 +2382,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
if (is_static) {
__ mem2reg_opt(obj, Address(cache, index, cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
__ mem2reg_opt(obj, Address(obj, Klass::java_mirror_offset()));
__ resolve_oop_handle(obj);
}
}

View File

@ -122,6 +122,7 @@ class Assembler : public AbstractAssembler {
fpop1_op3 = 0x34,
fpop2_op3 = 0x35,
impdep1_op3 = 0x36,
addx_op3 = 0x36,
aes3_op3 = 0x36,
sha_op3 = 0x36,
bmask_op3 = 0x36,
@ -133,6 +134,8 @@ class Assembler : public AbstractAssembler {
fzero_op3 = 0x36,
fsrc_op3 = 0x36,
fnot_op3 = 0x36,
mpmul_op3 = 0x36,
umulx_op3 = 0x36,
xmulx_op3 = 0x36,
crc32c_op3 = 0x36,
impdep2_op3 = 0x37,
@ -195,6 +198,9 @@ class Assembler : public AbstractAssembler {
fnegs_opf = 0x05,
fnegd_opf = 0x06,
addxc_opf = 0x11,
addxccc_opf = 0x13,
umulxhi_opf = 0x16,
alignaddr_opf = 0x18,
bmask_opf = 0x19,
@ -240,7 +246,8 @@ class Assembler : public AbstractAssembler {
sha256_opf = 0x142,
sha512_opf = 0x143,
crc32c_opf = 0x147
crc32c_opf = 0x147,
mpmul_opf = 0x148
};
enum op5s {
@ -380,7 +387,7 @@ class Assembler : public AbstractAssembler {
assert_signed_range(x, nbits + 2);
}
static void assert_unsigned_const(int x, int nbits) {
static void assert_unsigned_range(int x, int nbits) {
assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
}
@ -534,6 +541,12 @@ class Assembler : public AbstractAssembler {
return x & ((1 << nbits) - 1);
}
// unsigned immediate, in low bits, at most nbits long.
static int uimm(int x, int nbits) {
assert_unsigned_range(x, nbits);
return x & ((1 << nbits) - 1);
}
// compute inverse of wdisp16
static intptr_t inv_wdisp16(int x, intptr_t pos) {
int lo = x & ((1 << 14) - 1);
@ -631,6 +644,9 @@ class Assembler : public AbstractAssembler {
// FMAf instructions supported only on certain processors
static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
// MPMUL instruction supported only on certain processors
static void mpmul_only() { assert(VM_Version::has_mpmul(), "This instruction only works on SPARC with MPMUL"); }
// instruction only in VIS1
static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
@ -772,11 +788,12 @@ class Assembler : public AbstractAssembler {
AbstractAssembler::flush();
}
inline void emit_int32(int); // shadows AbstractAssembler::emit_int32
inline void emit_data(int);
inline void emit_data(int, RelocationHolder const &rspec);
inline void emit_data(int, relocInfo::relocType rtype);
// helper for above functions
inline void emit_int32(int32_t); // shadows AbstractAssembler::emit_int32
inline void emit_data(int32_t);
inline void emit_data(int32_t, RelocationHolder const&);
inline void emit_data(int32_t, relocInfo::relocType rtype);
// Helper for the above functions.
inline void check_delay();
@ -929,6 +946,10 @@ class Assembler : public AbstractAssembler {
// fmaf instructions.
inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
inline void fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
// pp 165
@ -960,6 +981,8 @@ class Assembler : public AbstractAssembler {
inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
RelocationHolder const &rspec = RelocationHolder());
inline void ldd(Register s1, Register s2, FloatRegister d);
inline void ldd(Register s1, int simm13a, FloatRegister d);
inline void ldfsr(Register s1, Register s2);
inline void ldfsr(Register s1, int simm13a);
@ -987,8 +1010,6 @@ class Assembler : public AbstractAssembler {
inline void lduw(Register s1, int simm13a, Register d);
inline void ldx(Register s1, Register s2, Register d);
inline void ldx(Register s1, int simm13a, Register d);
inline void ldd(Register s1, Register s2, Register d);
inline void ldd(Register s1, int simm13a, Register d);
// pp 177
@ -1157,6 +1178,9 @@ class Assembler : public AbstractAssembler {
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2);
inline void stf(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
inline void std(FloatRegister d, Register s1, Register s2);
inline void std(FloatRegister d, Register s1, int simm13a);
inline void stfsr(Register s1, Register s2);
inline void stfsr(Register s1, int simm13a);
inline void stxfsr(Register s1, Register s2);
@ -1177,8 +1201,6 @@ class Assembler : public AbstractAssembler {
inline void stw(Register d, Register s1, int simm13a);
inline void stx(Register d, Register s1, Register s2);
inline void stx(Register d, Register s1, int simm13a);
inline void std(Register d, Register s1, Register s2);
inline void std(Register d, Register s1, int simm13a);
// pp 177
@ -1267,6 +1289,9 @@ class Assembler : public AbstractAssembler {
// VIS3 instructions
inline void addxc(Register s1, Register s2, Register d);
inline void addxccc(Register s1, Register s2, Register d);
inline void movstosw(FloatRegister s, Register d);
inline void movstouw(FloatRegister s, Register d);
inline void movdtox(FloatRegister s, Register d);
@ -1276,6 +1301,7 @@ class Assembler : public AbstractAssembler {
inline void xmulx(Register s1, Register s2, Register d);
inline void xmulxhi(Register s1, Register s2, Register d);
inline void umulxhi(Register s1, Register s2, Register d);
// Crypto SHA instructions
@ -1287,6 +1313,10 @@ class Assembler : public AbstractAssembler {
inline void crc32c(FloatRegister s1, FloatRegister s2, FloatRegister d);
// MPMUL instruction
inline void mpmul(int uimm5);
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
#ifdef VALIDATE_PIPELINE

View File

@ -59,7 +59,7 @@ inline void Assembler::check_delay() {
#endif
}
inline void Assembler::emit_int32(int x) {
inline void Assembler::emit_int32(int32_t x) {
check_delay();
#ifdef VALIDATE_PIPELINE
_hazard_state = NoHazard;
@ -67,16 +67,16 @@ inline void Assembler::emit_int32(int x) {
AbstractAssembler::emit_int32(x);
}
inline void Assembler::emit_data(int x) {
inline void Assembler::emit_data(int32_t x) {
emit_int32(x);
}
inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
inline void Assembler::emit_data(int32_t x, relocInfo::relocType rtype) {
relocate(rtype);
emit_int32(x);
}
inline void Assembler::emit_data(int x, RelocationHolder const &rspec) {
inline void Assembler::emit_data(int32_t x, RelocationHolder const &rspec) {
relocate(rspec);
emit_int32(x);
}
@ -359,6 +359,19 @@ inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, Float
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
}
inline void Assembler::fmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x4 + w) | fs2(s2, w));
}
inline void Assembler::fnmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0xc + w) | fs2(s2, w));
}
inline void Assembler::fnmsub(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
fmaf_only();
emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(0x8 + w) | fs2(s2, w));
}
inline void Assembler::flush(Register s1, Register s2) {
emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
@ -402,6 +415,15 @@ inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a,
emit_data(op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);
}
inline void Assembler::ldd(Register s1, Register s2, FloatRegister d) {
assert(d->is_even(), "not even");
ldf(FloatRegisterImpl::D, s1, s2, d);
}
inline void Assembler::ldd(Register s1, int simm13a, FloatRegister d) {
assert(d->is_even(), "not even");
ldf(FloatRegisterImpl::D, s1, simm13a, d);
}
inline void Assembler::ldxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(ldfsr_op3) | rs1(s1) | rs2(s2));
}
@ -460,16 +482,6 @@ inline void Assembler::ldx(Register s1, Register s2, Register d) {
inline void Assembler::ldx(Register s1, int simm13a, Register d) {
emit_data(op(ldst_op) | rd(d) | op3(ldx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::ldd(Register s1, Register s2, Register d) {
v9_dep();
assert(d->is_even(), "not even");
emit_int32(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | rs2(s2));
}
inline void Assembler::ldd(Register s1, int simm13a, Register d) {
v9_dep();
assert(d->is_even(), "not even");
emit_data(op(ldst_op) | rd(d) | op3(ldd_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::ldsba(Register s1, Register s2, int ia, Register d) {
emit_int32(op(ldst_op) | rd(d) | op3(ldsb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -806,6 +818,15 @@ inline void Assembler::stf(FloatRegisterImpl::Width w, FloatRegister d, Register
emit_data(op(ldst_op) | fd(d, w) | alt_op3(stf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::std(FloatRegister d, Register s1, Register s2) {
assert(d->is_even(), "not even");
stf(FloatRegisterImpl::D, d, s1, s2);
}
inline void Assembler::std(FloatRegister d, Register s1, int simm13a) {
assert(d->is_even(), "not even");
stf(FloatRegisterImpl::D, d, s1, simm13a);
}
inline void Assembler::stxfsr(Register s1, Register s2) {
emit_int32(op(ldst_op) | rd(G1) | op3(stfsr_op3) | rs1(s1) | rs2(s2));
}
@ -848,16 +869,6 @@ inline void Assembler::stx(Register d, Register s1, Register s2) {
inline void Assembler::stx(Register d, Register s1, int simm13a) {
emit_data(op(ldst_op) | rd(d) | op3(stx_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::std(Register d, Register s1, Register s2) {
v9_dep();
assert(d->is_even(), "not even");
emit_int32(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | rs2(s2));
}
inline void Assembler::std(Register d, Register s1, int simm13a) {
v9_dep();
assert(d->is_even(), "not even");
emit_data(op(ldst_op) | rd(d) | op3(std_op3) | rs1(s1) | immed(true) | simm(simm13a, 13));
}
inline void Assembler::stba(Register d, Register s1, Register s2, int ia) {
emit_int32(op(ldst_op) | rd(d) | op3(stb_op3 | alt_bit_op3) | rs1(s1) | imm_asi(ia) | rs2(s2));
@ -1043,6 +1054,15 @@ inline void Assembler::bshuffle(FloatRegister s1, FloatRegister s2, FloatRegiste
// VIS3 instructions
inline void Assembler::addxc(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxc_opf) | rs2(s2));
}
inline void Assembler::addxccc(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(addx_op3) | rs1(s1) | opf(addxccc_opf) | rs2(s2));
}
inline void Assembler::movstosw(FloatRegister s, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S));
@ -1073,6 +1093,10 @@ inline void Assembler::xmulxhi(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2));
}
inline void Assembler::umulxhi(Register s1, Register s2, Register d) {
vis3_only();
emit_int32(op(arith_op) | rd(d) | op3(umulx_op3) | rs1(s1) | opf(umulxhi_opf) | rs2(s2));
}
// Crypto SHA instructions
@ -1096,4 +1120,11 @@ inline void Assembler::crc32c(FloatRegister s1, FloatRegister s2, FloatRegister
emit_int32(op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(crc32c_op3) | fs1(s1, FloatRegisterImpl::D) | opf(crc32c_opf) | fs2(s2, FloatRegisterImpl::D));
}
// MPMUL instruction
inline void Assembler::mpmul(int uimm5) {
mpmul_only();
emit_int32(op(arith_op) | rd(0) | op3(mpmul_op3) | rs1(0) | opf(mpmul_opf) | uimm(uimm5, 5));
}
#endif // CPU_SPARC_VM_ASSEMBLER_SPARC_INLINE_HPP

View File

@ -119,8 +119,8 @@ address RegisterMap::pd_location(VMReg regname) const {
reg = regname->as_Register();
}
if (reg->is_out()) {
assert(_younger_window != NULL, "Younger window should be available");
return second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
return _younger_window == NULL ? NULL :
second_word + (address)&_younger_window[reg->after_save()->sp_offset_in_saved_window()];
}
if (reg->is_local() || reg->is_in()) {
assert(_window != NULL, "Window should be available");

View File

@ -97,12 +97,15 @@ define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
writeable) \
\
product(intx, UseVIS, 99, \
"Highest supported VIS instructions set on Sparc") \
"Highest supported VIS instructions set on SPARC") \
range(0, 99) \
\
product(bool, UseCBCond, false, \
"Use compare and branch instruction on SPARC") \
\
product(bool, UseMPMUL, false, \
"Use multi-precision multiply instruction (mpmul) on SPARC") \
\
product(bool, UseBlockZeroing, false, \
"Use special cpu instructions for block zeroing") \
\

View File

@ -1574,29 +1574,39 @@ void MacroAssembler::br_null_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(zero, ptr_cc, s1, 0, L);
return;
} else {
br_null(s1, false, p, L);
delayed()->nop();
}
br_null(s1, false, p, L);
delayed()->nop();
}
void MacroAssembler::br_notnull_short(Register s1, Predict p, Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(notZero, ptr_cc, s1, 0, L);
return;
} else {
br_notnull(s1, false, p, L);
delayed()->nop();
}
br_notnull(s1, false, p, L);
delayed()->nop();
}
// Unconditional short branch
void MacroAssembler::ba_short(Label& L) {
assert_not_delayed();
if (use_cbcond(L)) {
Assembler::cbcond(equal, icc, G0, G0, L);
return;
} else {
br(always, false, pt, L);
delayed()->nop();
}
br(always, false, pt, L);
}
// Branch if 'icc' says zero or not (i.e. icc.z == 1|0).
void MacroAssembler::br_icc_zero(bool iszero, Predict p, Label &L) {
assert_not_delayed();
Condition cf = (iszero ? Assembler::zero : Assembler::notZero);
br(cf, false, p, L);
delayed()->nop();
}
@ -3834,6 +3844,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
ld_ptr(mirror, in_bytes(ConstMethod::constants_offset()), mirror);
ld_ptr(mirror, ConstantPool::pool_holder_offset_in_bytes(), mirror);
ld_ptr(mirror, mirror_offset, mirror);
resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register src_oop, Register klass) {

View File

@ -606,7 +606,7 @@ class MacroAssembler : public Assembler {
// offset. No explicit code generation is needed if the offset is within a certain
// range (0 <= offset <= page_size).
//
// %%%%%% Currently not done for SPARC
// FIXME: Currently not done for SPARC
void null_check(Register reg, int offset = -1);
static bool needs_explicit_null_check(intptr_t offset);
@ -648,6 +648,9 @@ class MacroAssembler : public Assembler {
// unconditional short branch
void ba_short(Label& L);
// Branch on icc.z (true or not).
void br_icc_zero(bool iszero, Predict p, Label &L);
inline void bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
inline void bp( Condition c, bool a, CC cc, Predict p, Label& L );
@ -663,19 +666,19 @@ class MacroAssembler : public Assembler {
inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
// Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
inline void cmp( Register s1, Register s2 );
inline void cmp( Register s1, int simm13a );
inline void cmp( Register s1, Register s2 );
inline void cmp( Register s1, int simm13a );
inline void jmp( Register s1, Register s2 );
inline void jmp( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );
// Check if the call target is out of wdisp30 range (relative to the code cache)
static inline bool is_far_target(address d);
inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( address d, RelocationHolder const& rspec);
inline void call( address d, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( address d, RelocationHolder const& rspec);
inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( Label& L, RelocationHolder const& rspec);
inline void call( Label& L, relocInfo::relocType rt = relocInfo::runtime_call_type );
inline void call( Label& L, RelocationHolder const& rspec);
inline void callr( Register s1, Register s2 );
inline void callr( Register s1, int simm13a, RelocationHolder const& rspec = RelocationHolder() );

View File

@ -185,7 +185,7 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
}
inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
// See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
// See note[+] on 'avoid_pipeline_stall()', in "assembler_sparc.inline.hpp".
avoid_pipeline_stall();
br(c, a, p, target(L));
}

View File

@ -236,7 +236,7 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
inline VMReg as_VMReg( );
// accessors
int encoding() const { assert(is_valid(), "invalid register"); return value(); }
int encoding() const { assert(is_valid(), "invalid register"); return value(); }
public:
int encoding(Width w) const {
@ -258,10 +258,12 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
return -1;
}
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_valid() const { return 0 <= value() && value() < number_of_registers; }
bool is_even() const { return (encoding() & 1) == 0; }
const char* name() const;
FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
FloatRegister successor() const { return as_FloatRegister(encoding() + 1); }
};

View File

@ -2628,7 +2628,6 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
%}
enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
@ -2651,7 +2650,71 @@ enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
__ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fnmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fnmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fnmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fnmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fnmsubs (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
__ fnmsub(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
%}
enc_class fnmsubd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
MacroAssembler _masm(&cbuf);
FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
__ fnmsub(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
%}
enc_class fmovs (dflt_reg dst, dflt_reg src) %{
@ -7597,7 +7660,7 @@ instruct sqrtD_reg_reg(regD dst, regD src) %{
ins_pipe(fdivD_reg_reg);
%}
// Single precision fused floating-point multiply-add (d = a * b + c).
// Single/Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF c (Binary a b)));
@ -7606,7 +7669,6 @@ instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
ins_pipe(fmaF_regx4);
%}
// Double precision fused floating-point multiply-add (d = a * b + c).
instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD c (Binary a b)));
@ -7615,6 +7677,66 @@ instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
ins_pipe(fmaD_regx4);
%}
// Additional patterns matching complement versions that we can map directly to
// variants of the fused multiply-add instructions.
// Single/Double precision fused floating-point multiply-sub (d = a * b - c)
instruct fmsubF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF (NegF c) (Binary a b)));
format %{ "fmsubs $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
ins_encode(fmsubs(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fmsubD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD (NegD c) (Binary a b)));
format %{ "fmsubd $a,$b,$c,$dst\t# $dst = $a * $b - $c" %}
ins_encode(fmsubd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
// Single/Double precision fused floating-point neg. multiply-add,
// d = -1 * a * b - c = -(a * b + c)
instruct fnmaddF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF (NegF c) (Binary (NegF a) b)));
match(Set dst (FmaF (NegF c) (Binary a (NegF b))));
format %{ "fnmadds $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
ins_encode(fnmadds(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fnmaddD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD (NegD c) (Binary (NegD a) b)));
match(Set dst (FmaD (NegD c) (Binary a (NegD b))));
format %{ "fnmaddd $a,$b,$c,$dst\t# $dst = -($a * $b + $c)" %}
ins_encode(fnmaddd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
// Single/Double precision fused floating-point neg. multiply-sub,
// d = -1 * a * b + c = -(a * b - c)
instruct fnmsubF_regx4(regF dst, regF a, regF b, regF c) %{
predicate(UseFMA);
match(Set dst (FmaF c (Binary (NegF a) b)));
match(Set dst (FmaF c (Binary a (NegF b))));
format %{ "fnmsubs $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
ins_encode(fnmsubs(dst, a, b, c));
ins_pipe(fmaF_regx4);
%}
instruct fnmsubD_regx4(regD dst, regD a, regD b, regD c) %{
predicate(UseFMA);
match(Set dst (FmaD c (Binary (NegD a) b)));
match(Set dst (FmaD c (Binary a (NegD b))));
format %{ "fnmsubd $a,$b,$c,$dst\t# $dst = -($a * $b - $c)" %}
ins_encode(fnmsubd(dst, a, b, c));
ins_pipe(fmaD_regx4);
%}
//----------Logical Instructions-----------------------------------------------
// And Instructions
// Register And

View File

@ -58,7 +58,6 @@
// Note: The register L7 is used as L7_thread_cache, and may not be used
// any other way within this module.
static const Register& Lstub_temp = L2;
// -------------------------------------------------------------------------------------------------------------------------
@ -4943,7 +4942,7 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
/**
/**
* Arguments:
*
* Inputs:
@ -4975,6 +4974,773 @@ class StubGenerator: public StubCodeGenerator {
return start;
}
/**
* Arguments:
*
* Inputs:
* I0 - int* x-addr
* I1 - int x-len
* I2 - int* y-addr
* I3 - int y-len
* I4 - int* z-addr (output vector)
* I5 - int z-len
*/
address generate_multiplyToLen() {
assert(UseMultiplyToLenIntrinsic, "need VIS3 instructions");
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
address start = __ pc();
__ save_frame(0);
const Register xptr = I0; // input address
const Register xlen = I1; // ...and length in 32b-words
const Register yptr = I2; //
const Register ylen = I3; //
const Register zptr = I4; // output address
const Register zlen = I5; // ...and length in 32b-words
/* The minimal "limb" representation suggest that odd length vectors are as
* likely as even length dittos. This in turn suggests that we need to cope
* with odd/even length arrays and data not aligned properly for 64-bit read
* and write operations. We thus use a number of different kernels:
*
* if (is_even(x.len) && is_even(y.len))
* if (is_align64(x) && is_align64(y) && is_align64(z))
* if (x.len == y.len && 16 <= x.len && x.len <= 64)
* memv_mult_mpmul(...)
* else
* memv_mult_64x64(...)
* else
* memv_mult_64x64u(...)
* else
* memv_mult_32x32(...)
*
* Here we assume VIS3 support (for 'umulxhi', 'addxc' and 'addxccc').
* In case CBCOND instructions are supported, we will use 'cxbX'. If the
* MPMUL instruction is supported, we will generate a kernel using 'mpmul'
* (for vectors with proper characteristics).
*/
const Register tmp0 = L0;
const Register tmp1 = L1;
Label L_mult_32x32;
Label L_mult_64x64u;
Label L_mult_64x64;
Label L_exit;
if_both_even(xlen, ylen, tmp0, false, L_mult_32x32);
if_all3_aligned(xptr, yptr, zptr, tmp1, 64, false, L_mult_64x64u);
if (UseMPMUL) {
if_eq(xlen, ylen, false, L_mult_64x64);
if_in_rng(xlen, 16, 64, tmp0, tmp1, false, L_mult_64x64);
// 1. Multiply naturally aligned 64b-datums using a generic 'mpmul' kernel,
// operating on equal length vectors of size [16..64].
gen_mult_mpmul(xlen, xptr, yptr, zptr, L_exit);
}
// 2. Multiply naturally aligned 64-bit datums (64x64).
__ bind(L_mult_64x64);
gen_mult_64x64(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
// 3. Multiply unaligned 64-bit datums (64x64).
__ bind(L_mult_64x64u);
gen_mult_64x64_unaligned(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
// 4. Multiply naturally aligned 32-bit datums (32x32).
__ bind(L_mult_32x32);
gen_mult_32x32(xptr, xlen, yptr, ylen, zptr, zlen, L_exit);
__ bind(L_exit);
__ ret();
__ delayed()->restore();
return start;
}
// Additional help functions used by multiplyToLen generation.
void if_both_even(Register r1, Register r2, Register tmp, bool iseven, Label &L)
{
__ or3(r1, r2, tmp);
__ andcc(tmp, 0x1, tmp);
__ br_icc_zero(iseven, Assembler::pn, L);
}
void if_all3_aligned(Register r1, Register r2, Register r3,
Register tmp, uint align, bool isalign, Label &L)
{
__ or3(r1, r2, tmp);
__ or3(r3, tmp, tmp);
__ andcc(tmp, (align - 1), tmp);
__ br_icc_zero(isalign, Assembler::pn, L);
}
void if_eq(Register x, Register y, bool iseq, Label &L)
{
Assembler::Condition cf = (iseq ? Assembler::equal : Assembler::notEqual);
__ cmp_and_br_short(x, y, cf, Assembler::pt, L);
}
void if_in_rng(Register x, int lb, int ub, Register t1, Register t2, bool inrng, Label &L)
{
assert(Assembler::is_simm13(lb), "Small ints only!");
assert(Assembler::is_simm13(ub), "Small ints only!");
// Compute (x - lb) * (ub - x) >= 0
// NOTE: With the local use of this routine, we rely on small integers to
// guarantee that we do not overflow in the multiplication.
__ add(G0, ub, t2);
__ sub(x, lb, t1);
__ sub(t2, x, t2);
__ mulx(t1, t2, t1);
Assembler::Condition cf = (inrng ? Assembler::greaterEqual : Assembler::less);
__ cmp_and_br_short(t1, G0, cf, Assembler::pt, L);
}
void ldd_entry(Register base, Register offs, FloatRegister dest)
{
__ ldd(base, offs, dest);
__ inc(offs, 8);
}
void ldx_entry(Register base, Register offs, Register dest)
{
__ ldx(base, offs, dest);
__ inc(offs, 8);
}
void mpmul_entry(int m, Label &next)
{
__ mpmul(m);
__ cbcond(Assembler::equal, Assembler::icc, G0, G0, next);
}
void stx_entry(Label &L, Register r1, Register r2, Register base, Register offs)
{
__ bind(L);
__ stx(r1, base, offs);
__ inc(offs, 8);
__ stx(r2, base, offs);
__ inc(offs, 8);
}
void offs_entry(Label &Lbl0, Label &Lbl1)
{
assert(Lbl0.is_bound(), "must be");
assert(Lbl1.is_bound(), "must be");
int offset = Lbl0.loc_pos() - Lbl1.loc_pos();
__ emit_data(offset);
}
/* Generate the actual multiplication kernels for BigInteger vectors:
*
* 1. gen_mult_mpmul(...)
*
* 2. gen_mult_64x64(...)
*
* 3. gen_mult_64x64_unaligned(...)
*
* 4. gen_mult_32x32(...)
*/
void gen_mult_mpmul(Register len, Register xptr, Register yptr, Register zptr,
Label &L_exit)
{
const Register zero = G0;
const Register gxp = G1; // Need to use global registers across RWs.
const Register gyp = G2;
const Register gzp = G3;
const Register offs = G4;
const Register disp = G5;
__ mov(xptr, gxp);
__ mov(yptr, gyp);
__ mov(zptr, gzp);
/* Compute jump vector entry:
*
* 1. mpmul input size (0..31) x 64b
* 2. vector input size in 32b limbs (even number)
* 3. branch entries in reverse order (31..0), using two
* instructions per entry (2 * 4 bytes).
*
* displacement = byte_offset(bra_offset(len))
* = byte_offset((64 - len)/2)
* = 8 * (64 - len)/2
* = 4 * (64 - len)
*/
Register temp = I5; // Alright to use input regs. in first batch.
__ sub(zero, len, temp);
__ add(temp, 64, temp);
__ sllx(temp, 2, disp); // disp := (64 - len) << 2
// Dispatch relative current PC, into instruction table below.
__ rdpc(temp);
__ add(temp, 16, temp);
__ jmp(temp, disp);
__ delayed()->clr(offs);
ldd_entry(gxp, offs, F22);
ldd_entry(gxp, offs, F20);
ldd_entry(gxp, offs, F18);
ldd_entry(gxp, offs, F16);
ldd_entry(gxp, offs, F14);
ldd_entry(gxp, offs, F12);
ldd_entry(gxp, offs, F10);
ldd_entry(gxp, offs, F8);
ldd_entry(gxp, offs, F6);
ldd_entry(gxp, offs, F4);
ldx_entry(gxp, offs, I5);
ldx_entry(gxp, offs, I4);
ldx_entry(gxp, offs, I3);
ldx_entry(gxp, offs, I2);
ldx_entry(gxp, offs, I1);
ldx_entry(gxp, offs, I0);
ldx_entry(gxp, offs, L7);
ldx_entry(gxp, offs, L6);
ldx_entry(gxp, offs, L5);
ldx_entry(gxp, offs, L4);
ldx_entry(gxp, offs, L3);
ldx_entry(gxp, offs, L2);
ldx_entry(gxp, offs, L1);
ldx_entry(gxp, offs, L0);
ldd_entry(gxp, offs, F2);
ldd_entry(gxp, offs, F0);
ldx_entry(gxp, offs, O5);
ldx_entry(gxp, offs, O4);
ldx_entry(gxp, offs, O3);
ldx_entry(gxp, offs, O2);
ldx_entry(gxp, offs, O1);
ldx_entry(gxp, offs, O0);
__ save(SP, -176, SP);
const Register addr = gxp; // Alright to reuse 'gxp'.
// Dispatch relative current PC, into instruction table below.
__ rdpc(addr);
__ add(addr, 16, addr);
__ jmp(addr, disp);
__ delayed()->clr(offs);
ldd_entry(gyp, offs, F58);
ldd_entry(gyp, offs, F56);
ldd_entry(gyp, offs, F54);
ldd_entry(gyp, offs, F52);
ldd_entry(gyp, offs, F50);
ldd_entry(gyp, offs, F48);
ldd_entry(gyp, offs, F46);
ldd_entry(gyp, offs, F44);
ldd_entry(gyp, offs, F42);
ldd_entry(gyp, offs, F40);
ldd_entry(gyp, offs, F38);
ldd_entry(gyp, offs, F36);
ldd_entry(gyp, offs, F34);
ldd_entry(gyp, offs, F32);
ldd_entry(gyp, offs, F30);
ldd_entry(gyp, offs, F28);
ldd_entry(gyp, offs, F26);
ldd_entry(gyp, offs, F24);
ldx_entry(gyp, offs, O5);
ldx_entry(gyp, offs, O4);
ldx_entry(gyp, offs, O3);
ldx_entry(gyp, offs, O2);
ldx_entry(gyp, offs, O1);
ldx_entry(gyp, offs, O0);
ldx_entry(gyp, offs, L7);
ldx_entry(gyp, offs, L6);
ldx_entry(gyp, offs, L5);
ldx_entry(gyp, offs, L4);
ldx_entry(gyp, offs, L3);
ldx_entry(gyp, offs, L2);
ldx_entry(gyp, offs, L1);
ldx_entry(gyp, offs, L0);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
__ save(SP, -176, SP);
Label L_mpmul_restore_4, L_mpmul_restore_3, L_mpmul_restore_2;
Label L_mpmul_restore_1, L_mpmul_restore_0;
// Dispatch relative current PC, into instruction table below.
__ rdpc(addr);
__ add(addr, 16, addr);
__ jmp(addr, disp);
__ delayed()->clr(offs);
mpmul_entry(31, L_mpmul_restore_0);
mpmul_entry(30, L_mpmul_restore_0);
mpmul_entry(29, L_mpmul_restore_0);
mpmul_entry(28, L_mpmul_restore_0);
mpmul_entry(27, L_mpmul_restore_1);
mpmul_entry(26, L_mpmul_restore_1);
mpmul_entry(25, L_mpmul_restore_1);
mpmul_entry(24, L_mpmul_restore_1);
mpmul_entry(23, L_mpmul_restore_1);
mpmul_entry(22, L_mpmul_restore_1);
mpmul_entry(21, L_mpmul_restore_1);
mpmul_entry(20, L_mpmul_restore_2);
mpmul_entry(19, L_mpmul_restore_2);
mpmul_entry(18, L_mpmul_restore_2);
mpmul_entry(17, L_mpmul_restore_2);
mpmul_entry(16, L_mpmul_restore_2);
mpmul_entry(15, L_mpmul_restore_2);
mpmul_entry(14, L_mpmul_restore_2);
mpmul_entry(13, L_mpmul_restore_3);
mpmul_entry(12, L_mpmul_restore_3);
mpmul_entry(11, L_mpmul_restore_3);
mpmul_entry(10, L_mpmul_restore_3);
mpmul_entry( 9, L_mpmul_restore_3);
mpmul_entry( 8, L_mpmul_restore_3);
mpmul_entry( 7, L_mpmul_restore_3);
mpmul_entry( 6, L_mpmul_restore_4);
mpmul_entry( 5, L_mpmul_restore_4);
mpmul_entry( 4, L_mpmul_restore_4);
mpmul_entry( 3, L_mpmul_restore_4);
mpmul_entry( 2, L_mpmul_restore_4);
mpmul_entry( 1, L_mpmul_restore_4);
mpmul_entry( 0, L_mpmul_restore_4);
Label L_z31, L_z30, L_z29, L_z28, L_z27, L_z26, L_z25, L_z24;
Label L_z23, L_z22, L_z21, L_z20, L_z19, L_z18, L_z17, L_z16;
Label L_z15, L_z14, L_z13, L_z12, L_z11, L_z10, L_z09, L_z08;
Label L_z07, L_z06, L_z05, L_z04, L_z03, L_z02, L_z01, L_z00;
Label L_zst_base; // Store sequence base address.
__ bind(L_zst_base);
stx_entry(L_z31, L7, L6, gzp, offs);
stx_entry(L_z30, L5, L4, gzp, offs);
stx_entry(L_z29, L3, L2, gzp, offs);
stx_entry(L_z28, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z27, O5, O4, gzp, offs);
stx_entry(L_z26, O3, O2, gzp, offs);
stx_entry(L_z25, O1, O0, gzp, offs);
stx_entry(L_z24, L7, L6, gzp, offs);
stx_entry(L_z23, L5, L4, gzp, offs);
stx_entry(L_z22, L3, L2, gzp, offs);
stx_entry(L_z21, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z20, O5, O4, gzp, offs);
stx_entry(L_z19, O3, O2, gzp, offs);
stx_entry(L_z18, O1, O0, gzp, offs);
stx_entry(L_z17, L7, L6, gzp, offs);
stx_entry(L_z16, L5, L4, gzp, offs);
stx_entry(L_z15, L3, L2, gzp, offs);
stx_entry(L_z14, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z13, O5, O4, gzp, offs);
stx_entry(L_z12, O3, O2, gzp, offs);
stx_entry(L_z11, O1, O0, gzp, offs);
stx_entry(L_z10, L7, L6, gzp, offs);
stx_entry(L_z09, L5, L4, gzp, offs);
stx_entry(L_z08, L3, L2, gzp, offs);
stx_entry(L_z07, L1, L0, gzp, offs);
__ restore();
stx_entry(L_z06, O5, O4, gzp, offs);
stx_entry(L_z05, O3, O2, gzp, offs);
stx_entry(L_z04, O1, O0, gzp, offs);
stx_entry(L_z03, L7, L6, gzp, offs);
stx_entry(L_z02, L5, L4, gzp, offs);
stx_entry(L_z01, L3, L2, gzp, offs);
stx_entry(L_z00, L1, L0, gzp, offs);
__ restore();
__ restore();
// Exit out of 'mpmul' routine, back to multiplyToLen.
__ ba_short(L_exit);
Label L_zst_offs;
__ bind(L_zst_offs);
offs_entry(L_z31, L_zst_base); // index 31: 2048x2048
offs_entry(L_z30, L_zst_base);
offs_entry(L_z29, L_zst_base);
offs_entry(L_z28, L_zst_base);
offs_entry(L_z27, L_zst_base);
offs_entry(L_z26, L_zst_base);
offs_entry(L_z25, L_zst_base);
offs_entry(L_z24, L_zst_base);
offs_entry(L_z23, L_zst_base);
offs_entry(L_z22, L_zst_base);
offs_entry(L_z21, L_zst_base);
offs_entry(L_z20, L_zst_base);
offs_entry(L_z19, L_zst_base);
offs_entry(L_z18, L_zst_base);
offs_entry(L_z17, L_zst_base);
offs_entry(L_z16, L_zst_base);
offs_entry(L_z15, L_zst_base);
offs_entry(L_z14, L_zst_base);
offs_entry(L_z13, L_zst_base);
offs_entry(L_z12, L_zst_base);
offs_entry(L_z11, L_zst_base);
offs_entry(L_z10, L_zst_base);
offs_entry(L_z09, L_zst_base);
offs_entry(L_z08, L_zst_base);
offs_entry(L_z07, L_zst_base);
offs_entry(L_z06, L_zst_base);
offs_entry(L_z05, L_zst_base);
offs_entry(L_z04, L_zst_base);
offs_entry(L_z03, L_zst_base);
offs_entry(L_z02, L_zst_base);
offs_entry(L_z01, L_zst_base);
offs_entry(L_z00, L_zst_base); // index 0: 64x64
__ bind(L_mpmul_restore_4);
__ restore();
__ bind(L_mpmul_restore_3);
__ restore();
__ bind(L_mpmul_restore_2);
__ restore();
__ bind(L_mpmul_restore_1);
__ restore();
__ bind(L_mpmul_restore_0);
// Dispatch via offset vector entry, into z-store sequence.
Label L_zst_rdpc;
__ bind(L_zst_rdpc);
assert(L_zst_base.is_bound(), "must be");
assert(L_zst_offs.is_bound(), "must be");
assert(L_zst_rdpc.is_bound(), "must be");
int dbase = L_zst_rdpc.loc_pos() - L_zst_base.loc_pos();
int doffs = L_zst_rdpc.loc_pos() - L_zst_offs.loc_pos();
temp = gyp; // Alright to reuse 'gyp'.
__ rdpc(addr);
__ sub(addr, doffs, temp);
__ srlx(disp, 1, disp);
__ lduw(temp, disp, offs);
__ sub(addr, dbase, temp);
__ jmp(temp, offs);
__ delayed()->clr(offs);
}
void gen_mult_64x64(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for immediate use.
const Register ri = L0; // Outer loop index, xv[i]
const Register rj = L1; // Inner loop index, yv[j]
const Register rk = L2; // Output loop index, zv[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register lop = O0; // lo-64b product
const Register hip = O1; // hi-64b product
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
__ dec(xn); // Adjust [0..(N/2)-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u64 c = 0
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
__ sllx(yn, 3, rj); // int j = yn (byte offset i = 8*xn)
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
__ ldx(yp, rj, ry); // u64 y = yp[yn]
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ ldx(xp, ri, rx); // x = xp[i]
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
__ stx(lop, zp, rk); // z[k] = lop
__ dec(rk, 8); // k--
__ dec(ri, 8); // i--
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ stx(rc, zp, rk); // z[k] = c
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 3, rj); // int j = yn - 1 (byte offset j = 8*yn)
__ dec(rj, 8);
__ bind(L_loop_j);
__ cmp_and_br_short(rj, 0, // j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u64 c = 0
__ ldx(yp, rj, ry); // u64 y = yp[j]
// for (int i = xn, k = --zn; i >= 0; i--)
__ dec(zn); // --zn
__ sllx(xn, 3, ri); // int i = xn (byte offset i = 8*xn)
__ sllx(zn, 3, rk); // int k = zn (byte offset k = 8*zn)
__ bind(L_loop_i2);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ ldx(xp, ri, rx); // x = xp[i]
__ ldx(zp, rk, rz); // z = zp[k], accumulator
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rz, rc, rz); // Accumulate lower order bits,
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
__ addcc(rz, lop, rz); // z += lo(p) + c
__ addxc(rc, zero, rc);
__ stx(rz, zp, rk); // zp[k] = z
__ dec(rk, 8); // k--
__ dec(ri, 8); // i--
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ stx(rc, zp, rk); // z[k] = c
__ dec(rj, 8); // j--
__ ba_short(L_loop_j);
}
void gen_mult_64x64_unaligned(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for use.
const Register xpc = L0; // Outer loop cursor, xp[i]
const Register ypc = L1; // Inner loop cursor, yp[j]
const Register zpc = L2; // Output loop cursor, zp[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register rt = O2;
const Register lop = O0; // lo-64b product
const Register hip = O1; // hi-64b product
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ srlx(xn, 1, xn); // index for u32 to u64 ditto
__ srlx(yn, 1, yn); // index for u32 to u64 ditto
__ srlx(zn, 1, zn); // index for u32 to u64 ditto
__ dec(xn); // Adjust [0..(N/2)-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u64 c = 0
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
__ add(xp, xpc, xpc);
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
__ add(yp, ypc, ypc);
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
__ add(zp, zpc, zpc);
__ lduw(ypc, 0, rt); // u64 y = yp[yn]
__ lduw(ypc, 4, ry); // ...
__ sllx(rt, 32, rt);
__ or3(rt, ry, ry);
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(xpc, xp,// i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ lduw(xpc, 0, rt); // u64 x = xp[i]
__ lduw(xpc, 4, rx); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rx, rx);
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rc, lop, lop); // Accumulate lower order bits (producing carry)
__ addxc(hip, zero, rc); // carry over to next datum [k-1]
__ srlx(lop, 32, rt);
__ stw(rt, zpc, 0); // z[k] = lop
__ stw(lop, zpc, 4); // ...
__ dec(zpc, 8); // k-- (zpc--)
__ dec(xpc, 8); // i-- (xpc--)
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ srlx(rc, 32, rt);
__ stw(rt, zpc, 0); // z[k] = c
__ stw(rc, zpc, 4);
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 3, ypc); // u32* ypc = &yp[yn] (byte offset 8*yn)
__ add(yp, ypc, ypc);
__ dec(ypc, 8); // yn - 1 (ypc--)
__ bind(L_loop_j);
__ cmp_and_br_short(ypc, yp,// j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u64 c = 0
__ lduw(ypc, 0, rt); // u64 y = yp[j] (= *ypc)
__ lduw(ypc, 4, ry); // ...
__ sllx(rt, 32, rt);
__ or3(rt, ry, ry);
// for (int i = xn, k = --zn; i >= 0; i--)
__ sllx(xn, 3, xpc); // u32* xpc = &xp[xn] (byte offset 8*xn)
__ add(xp, xpc, xpc);
__ dec(zn); // --zn
__ sllx(zn, 3, zpc); // u32* zpc = &zp[zn] (byte offset 8*zn)
__ add(zp, zpc, zpc);
__ bind(L_loop_i2);
__ cmp_and_br_short(xpc, xp,// i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ lduw(xpc, 0, rt); // u64 x = xp[i] (= *xpc)
__ lduw(xpc, 4, rx); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rx, rx);
__ lduw(zpc, 0, rt); // u64 z = zp[k] (= *zpc)
__ lduw(zpc, 4, rz); // ...
__ sllx(rt, 32, rt);
__ or3(rt, rz, rz);
__ mulx(rx, ry, lop); // lo-64b-part of result 64x64
__ umulxhi(rx, ry, hip); // hi-64b-part of result 64x64
__ addcc(rz, rc, rz); // Accumulate lower order bits...
__ addxc(hip, zero, rc); // Accumulate higher order bits to carry
__ addcc(rz, lop, rz); // ... z += lo(p) + c
__ addxccc(rc, zero, rc);
__ srlx(rz, 32, rt);
__ stw(rt, zpc, 0); // zp[k] = z (*zpc = z)
__ stw(rz, zpc, 4);
__ dec(zpc, 8); // k-- (zpc--)
__ dec(xpc, 8); // i-- (xpc--)
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ srlx(rc, 32, rt);
__ stw(rt, zpc, 0); // z[k] = c
__ stw(rc, zpc, 4);
__ dec(ypc, 8); // j-- (ypc--)
__ ba_short(L_loop_j);
}
void gen_mult_32x32(Register xp, Register xn,
Register yp, Register yn,
Register zp, Register zn, Label &L_exit)
{
// Assuming that a stack frame has already been created, i.e. local and
// output registers are available for use.
const Register ri = L0; // Outer loop index, xv[i]
const Register rj = L1; // Inner loop index, yv[j]
const Register rk = L2; // Output loop index, zv[k]
const Register rx = L4; // x-vector datum [i]
const Register ry = L5; // y-vector datum [j]
const Register rz = L6; // z-vector datum [k]
const Register rc = L7; // carry over (to z-vector datum [k-1])
const Register p64 = O0; // 64b product
const Register z65 = O1; // carry+64b accumulator
const Register c65 = O2; // carry at bit 65
const Register c33 = O2; // carry at bit 33 (after shift)
const Register zero = G0;
Label L_loop_i, L_exit_loop_i;
Label L_loop_j;
Label L_loop_i2, L_exit_loop_i2;
__ dec(xn); // Adjust [0..N-1]
__ dec(yn);
__ dec(zn);
__ clr(rc); // u32 c = 0
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
__ sllx(yn, 2, rj); // int j = yn (byte offset i = 4*xn)
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
__ lduw(yp, rj, ry); // u32 y = yp[yn]
// for (int i = xn; i >= 0; i--)
__ bind(L_loop_i);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i);
__ lduw(xp, ri, rx); // x = xp[i]
__ mulx(rx, ry, p64); // 64b result of 32x32
__ addcc(rc, p64, z65); // Accumulate to 65 bits (producing carry)
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
__ sllx(c65, 32, c33); // and shift into bit 33
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
__ add(c33, rc, rc); // carry over to next datum [k-1]
__ stw(z65, zp, rk); // z[k] = lo(z65)
__ dec(rk, 4); // k--
__ dec(ri, 4); // i--
__ ba_short(L_loop_i);
__ bind(L_exit_loop_i);
__ stw(rc, zp, rk); // z[k] = c
// for (int j = yn - 1; j >= 0; j--)
__ sllx(yn, 2, rj); // int j = yn - 1 (byte offset j = 4*yn)
__ dec(rj, 4);
__ bind(L_loop_j);
__ cmp_and_br_short(rj, 0, // j >= 0
Assembler::less, Assembler::pn, L_exit);
__ clr(rc); // u32 c = 0
__ lduw(yp, rj, ry); // u32 y = yp[j]
// for (int i = xn, k = --zn; i >= 0; i--)
__ dec(zn); // --zn
__ sllx(xn, 2, ri); // int i = xn (byte offset i = 4*xn)
__ sllx(zn, 2, rk); // int k = zn (byte offset k = 4*zn)
__ bind(L_loop_i2);
__ cmp_and_br_short(ri, 0, // i >= 0
Assembler::less, Assembler::pn, L_exit_loop_i2);
__ lduw(xp, ri, rx); // x = xp[i]
__ lduw(zp, rk, rz); // z = zp[k], accumulator
__ mulx(rx, ry, p64); // 64b result of 32x32
__ add(rz, rc, rz); // Accumulate lower order bits,
__ addcc(rz, p64, z65); // z += lo(p64) + c
__ addxc(zero, zero, c65); // Materialise carry (in bit 65) into lsb,
__ sllx(c65, 32, c33); // and shift into bit 33
__ srlx(z65, 32, rc); // carry = c33 | hi(z65) >> 32
__ add(c33, rc, rc); // carry over to next datum [k-1]
__ stw(z65, zp, rk); // zp[k] = lo(z65)
__ dec(rk, 4); // k--
__ dec(ri, 4); // i--
__ ba_short(L_loop_i2);
__ bind(L_exit_loop_i2);
__ stw(rc, zp, rk); // z[k] = c
__ dec(rj, 4); // j--
__ ba_short(L_loop_j);
}
void generate_initial() {
// Generates all stubs and initializes the entry points
@ -5073,8 +5839,14 @@ class StubGenerator: public StubCodeGenerator {
if (UseAdler32Intrinsics) {
StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
}
}
#ifdef COMPILER2
// Intrinsics supported by C2 only:
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}
#endif // COMPILER2
}
public:
StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {

View File

@ -41,7 +41,7 @@ static bool returns_to_call_stub(address return_pc) {
enum /* platform_dependent_constants */ {
// %%%%%%%% May be able to shrink this a lot
code_size1 = 20000, // simply increase if too small (assembler will crash if too small)
code_size2 = 27000 // simply increase if too small (assembler will crash if too small)
code_size2 = 29000 // simply increase if too small (assembler will crash if too small)
};
class Sparc {

View File

@ -2049,6 +2049,7 @@ void TemplateTable::load_field_cp_cache_entry(Register Robj,
__ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f1_offset(), Robj);
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ ld_ptr( Robj, mirror_offset, Robj);
__ resolve_oop_handle(Robj);
}
}

View File

@ -101,6 +101,14 @@
declare_constant(VM_Version::ISA_XMONT) \
declare_constant(VM_Version::ISA_PAUSE_NSEC) \
declare_constant(VM_Version::ISA_VAMASK) \
declare_constant(VM_Version::ISA_SPARC6) \
declare_constant(VM_Version::ISA_DICTUNP) \
declare_constant(VM_Version::ISA_FPCMPSHL) \
declare_constant(VM_Version::ISA_RLE) \
declare_constant(VM_Version::ISA_SHA3) \
declare_constant(VM_Version::ISA_VIS3C) \
declare_constant(VM_Version::ISA_SPARC5B) \
declare_constant(VM_Version::ISA_MME) \
declare_constant(VM_Version::CPU_FAST_IDIV) \
declare_constant(VM_Version::CPU_FAST_RDPC) \
declare_constant(VM_Version::CPU_FAST_BIS) \

View File

@ -103,7 +103,7 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
}
else if (has_sparc5()) {
// Use prefetch instruction to avoid partial RAW issue on Core S4 processors,
// Use prefetch instruction to avoid partial RAW issue on Core C4 processors,
// also use prefetch style 3.
FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
@ -128,7 +128,7 @@ void VM_Version::initialize() {
// We increase the number of prefetched cache lines, to use just a bit more
// aggressive approach, when the L2-cache line size is small (32 bytes), or
// when running on newer processor implementations, such as the Core S4.
// when running on newer processor implementations, such as the Core C4.
bool inc_prefetch = cache_line_size > 0 && (cache_line_size < 64 || has_sparc5());
if (inc_prefetch) {
@ -168,6 +168,16 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCBCond, false);
}
// Use 'mpmul' instruction if available.
if (has_mpmul()) {
if (FLAG_IS_DEFAULT(UseMPMUL)) {
FLAG_SET_DEFAULT(UseMPMUL, true);
}
} else if (UseMPMUL) {
warning("MPMUL instruction is not available on this CPU");
FLAG_SET_DEFAULT(UseMPMUL, false);
}
assert(BlockZeroingLowLimit > 0, "invalid value");
if (has_blk_zeroing() && cache_line_size > 0) {
@ -208,7 +218,9 @@ void VM_Version::initialize() {
char buf[512];
jio_snprintf(buf, sizeof(buf),
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
"%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s"
"%s%s%s%s%s%s%s%s%s" "%s%s%s%s%s%s%s%s%s"
"%s%s%s%s%s%s%s",
(has_v9() ? "v9" : ""),
(has_popc() ? ", popc" : ""),
(has_vis1() ? ", vis1" : ""),
@ -241,6 +253,16 @@ void VM_Version::initialize() {
(has_pause_nsec() ? ", pause_nsec" : ""),
(has_vamask() ? ", vamask" : ""),
(has_sparc6() ? ", sparc6" : ""),
(has_dictunp() ? ", dictunp" : ""),
(has_fpcmpshl() ? ", fpcmpshl" : ""),
(has_rle() ? ", rle" : ""),
(has_sha3() ? ", sha3" : ""),
(has_athena_plus2()? ", athena_plus2" : ""),
(has_vis3c() ? ", vis3c" : ""),
(has_sparc5b() ? ", sparc5b" : ""),
(has_mme() ? ", mme" : ""),
(has_fast_idiv() ? ", *idiv" : ""),
(has_fast_rdpc() ? ", *rdpc" : ""),
(has_fast_bis() ? ", *bis" : ""),
@ -409,6 +431,15 @@ void VM_Version::initialize() {
FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
}
if (UseVIS > 2) {
if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, true);
}
} else if (UseMultiplyToLenIntrinsic) {
warning("SPARC multiplyToLen intrinsics require VIS3 instructions support. Intrinsics will be disabled");
FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
}
if (UseVectorizedMismatchIntrinsic) {
warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);

View File

@ -67,6 +67,16 @@ protected:
ISA_PAUSE_NSEC,
ISA_VAMASK,
ISA_SPARC6,
ISA_DICTUNP,
ISA_FPCMPSHL,
ISA_RLE,
ISA_SHA3,
ISA_FJATHPLUS2,
ISA_VIS3C,
ISA_SPARC5B,
ISA_MME,
// Synthesised properties:
CPU_FAST_IDIV,
@ -79,7 +89,7 @@ protected:
};
private:
enum { ISA_last_feature = ISA_VAMASK,
enum { ISA_last_feature = ISA_MME,
CPU_last_feature = CPU_BLK_ZEROING };
enum {
@ -119,6 +129,16 @@ private:
ISA_pause_nsec_msk = UINT64_C(1) << ISA_PAUSE_NSEC,
ISA_vamask_msk = UINT64_C(1) << ISA_VAMASK,
ISA_sparc6_msk = UINT64_C(1) << ISA_SPARC6,
ISA_dictunp_msk = UINT64_C(1) << ISA_DICTUNP,
ISA_fpcmpshl_msk = UINT64_C(1) << ISA_FPCMPSHL,
ISA_rle_msk = UINT64_C(1) << ISA_RLE,
ISA_sha3_msk = UINT64_C(1) << ISA_SHA3,
ISA_fjathplus2_msk = UINT64_C(1) << ISA_FJATHPLUS2,
ISA_vis3c_msk = UINT64_C(1) << ISA_VIS3C,
ISA_sparc5b_msk = UINT64_C(1) << ISA_SPARC5B,
ISA_mme_msk = UINT64_C(1) << ISA_MME,
CPU_fast_idiv_msk = UINT64_C(1) << CPU_FAST_IDIV,
CPU_fast_rdpc_msk = UINT64_C(1) << CPU_FAST_RDPC,
CPU_fast_bis_msk = UINT64_C(1) << CPU_FAST_BIS,
@ -153,40 +173,51 @@ private:
* UltraSPARC T2+: (Victoria Falls, etc.)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
* UltraSPARC T3: (Rainbow Falls/S2)
* UltraSPARC T3: (Rainbow Falls/C2)
* SPARC-V9, VIS, VIS2, ASI_BIS, POPC (Crypto/hash in SPU)
*
* Oracle SPARC T4/T5/M5: (Core S3)
* Oracle SPARC T4/T5/M5: (Core C3)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Kasumi, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL
*
* Oracle SPARC M7: (Core S4)
* Oracle SPARC M7: (Core C4)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK
*
* Oracle SPARC M8: (Core C5)
* SPARC-V9, VIS, VIS2, VIS3, ASI_BIS, HPC, POPC, FMAF, IMA, PAUSE, CBCOND,
* AES, DES, Camellia, MD5, SHA1, SHA256, SHA512, CRC32C, MONT, MPMUL, VIS3b,
* ADI, SPARC5, MWAIT, XMPMUL, XMONT, PAUSE_NSEC, VAMASK, SPARC6, FPCMPSHL,
* DICTUNP, RLE, SHA3, MME
*
* NOTE: Oracle Number support ignored.
*/
enum {
niagara1_msk = ISA_v9_msk | ISA_vis1_msk | ISA_blk_init_msk,
niagara2_msk = niagara1_msk | ISA_popc_msk,
core_S2_msk = niagara2_msk | ISA_vis2_msk,
core_C2_msk = niagara2_msk | ISA_vis2_msk,
core_S3_msk = core_S2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
core_C3_msk = core_C2_msk | ISA_fmaf_msk | ISA_vis3_msk | ISA_hpc_msk |
ISA_ima_msk | ISA_aes_msk | ISA_des_msk | ISA_kasumi_msk |
ISA_camellia_msk | ISA_md5_msk | ISA_sha1_msk | ISA_sha256_msk |
ISA_sha512_msk | ISA_mpmul_msk | ISA_mont_msk | ISA_pause_msk |
ISA_cbcond_msk | ISA_crc32c_msk,
core_S4_msk = core_S3_msk - ISA_kasumi_msk |
core_C4_msk = core_C3_msk - ISA_kasumi_msk |
ISA_vis3b_msk | ISA_adi_msk | ISA_sparc5_msk | ISA_mwait_msk |
ISA_xmpmul_msk | ISA_xmont_msk | ISA_pause_nsec_msk | ISA_vamask_msk,
core_C5_msk = core_C4_msk | ISA_sparc6_msk | ISA_dictunp_msk |
ISA_fpcmpshl_msk | ISA_rle_msk | ISA_sha3_msk | ISA_mme_msk,
ultra_sparc_t1_msk = niagara1_msk,
ultra_sparc_t2_msk = niagara2_msk,
ultra_sparc_t3_msk = core_S2_msk,
ultra_sparc_m5_msk = core_S3_msk, // NOTE: First out-of-order pipeline.
ultra_sparc_m7_msk = core_S4_msk
ultra_sparc_t3_msk = core_C2_msk,
ultra_sparc_m5_msk = core_C3_msk, // NOTE: First out-of-order pipeline.
ultra_sparc_m7_msk = core_C4_msk,
ultra_sparc_m8_msk = core_C5_msk
};
static uint _L2_data_cache_line_size;
@ -247,6 +278,16 @@ public:
static bool has_pause_nsec() { return (_features & ISA_pause_nsec_msk) != 0; }
static bool has_vamask() { return (_features & ISA_vamask_msk) != 0; }
static bool has_sparc6() { return (_features & ISA_sparc6_msk) != 0; }
static bool has_dictunp() { return (_features & ISA_dictunp_msk) != 0; }
static bool has_fpcmpshl() { return (_features & ISA_fpcmpshl_msk) != 0; }
static bool has_rle() { return (_features & ISA_rle_msk) != 0; }
static bool has_sha3() { return (_features & ISA_sha3_msk) != 0; }
static bool has_athena_plus2() { return (_features & ISA_fjathplus2_msk) != 0; }
static bool has_vis3c() { return (_features & ISA_vis3c_msk) != 0; }
static bool has_sparc5b() { return (_features & ISA_sparc5b_msk) != 0; }
static bool has_mme() { return (_features & ISA_mme_msk) != 0; }
static bool has_fast_idiv() { return (_features & CPU_fast_idiv_msk) != 0; }
static bool has_fast_rdpc() { return (_features & CPU_fast_rdpc_msk) != 0; }
static bool has_fast_bis() { return (_features & CPU_fast_bis_msk) != 0; }

View File

@ -6617,6 +6617,7 @@ void MacroAssembler::load_mirror(Register mirror, Register method) {
movptr(mirror, Address(mirror, ConstMethod::constants_offset()));
movptr(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
movptr(mirror, Address(mirror, mirror_offset));
resolve_oop_handle(mirror);
}
void MacroAssembler::load_klass(Register dst, Register src) {

View File

@ -2665,6 +2665,7 @@ void TemplateTable::load_field_cp_cache_entry(Register obj,
ConstantPoolCacheEntry::f1_offset())));
const int mirror_offset = in_bytes(Klass::java_mirror_offset());
__ movptr(obj, Address(obj, mirror_offset));
__ resolve_oop_handle(obj);
}
}

View File

@ -46,7 +46,7 @@ address VM_Version::_cpuinfo_segv_addr = 0;
address VM_Version::_cpuinfo_cont_addr = 0;
static BufferBlob* stub_blob;
static const int stub_size = 1000;
static const int stub_size = 1100;
extern "C" {
typedef void (*get_cpu_info_stub_t)(void*);
@ -70,7 +70,7 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, done, wrapup;
Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
@ -267,14 +267,30 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
__ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
__ jcc(Assembler::belowEqual, done);
__ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
__ jccb(Assembler::belowEqual, ext_cpuid1);
__ jcc(Assembler::belowEqual, ext_cpuid1);
__ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
__ jccb(Assembler::belowEqual, ext_cpuid5);
__ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
__ jccb(Assembler::belowEqual, ext_cpuid7);
__ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
__ jccb(Assembler::belowEqual, ext_cpuid8);
__ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
__ jccb(Assembler::below, ext_cpuid8);
//
// Extended cpuid(0x8000001E)
//
__ movl(rax, 0x8000001E);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
__ movl(Address(rsi, 0), rax);
__ movl(Address(rsi, 4), rbx);
__ movl(Address(rsi, 8), rcx);
__ movl(Address(rsi,12), rdx);
//
// Extended cpuid(0x80000008)
//
__ bind(ext_cpuid8);
__ movl(rax, 0x80000008);
__ cpuid();
__ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
@ -1109,11 +1125,27 @@ void VM_Version::get_processor_features() {
}
#ifdef COMPILER2
if (MaxVectorSize > 16) {
// Limit vectors size to 16 bytes on current AMD cpus.
if (cpu_family() < 0x17 && MaxVectorSize > 16) {
// Limit vectors size to 16 bytes on AMD cpus < 17h.
FLAG_SET_DEFAULT(MaxVectorSize, 16);
}
#endif // COMPILER2
// Some defaults for AMD family 17h
if ( cpu_family() == 0x17 ) {
// On family 17h processors use XMM and UnalignedLoadStores for Array Copy
if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
}
if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
}
#ifdef COMPILER2
if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
FLAG_SET_DEFAULT(UseFPUForSpilling, true);
}
#endif
}
}
if( is_intel() ) { // Intel cpus specific settings

View File

@ -228,6 +228,15 @@ class VM_Version : public Abstract_VM_Version {
} bits;
};
union ExtCpuid1EEbx {
uint32_t value;
struct {
uint32_t : 8,
threads_per_core : 8,
: 16;
} bits;
};
union XemXcr0Eax {
uint32_t value;
struct {
@ -398,6 +407,12 @@ protected:
ExtCpuid8Ecx ext_cpuid8_ecx;
uint32_t ext_cpuid8_edx; // reserved
// cpuid function 0x8000001E // AMD 17h
uint32_t ext_cpuid1E_eax;
ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
uint32_t ext_cpuid1E_ecx;
uint32_t ext_cpuid1E_edx; // unused currently
// extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
XemXcr0Eax xem_xcr0_eax;
uint32_t xem_xcr0_edx; // reserved
@ -505,6 +520,14 @@ protected:
result |= CPU_CLMUL;
if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
result |= CPU_RTM;
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
result |= CPU_ADX;
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
result |= CPU_BMI2;
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
result |= CPU_SHA;
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
result |= CPU_FMA;
// AMD features.
if (is_amd()) {
@ -518,16 +541,8 @@ protected:
}
// Intel features.
if(is_intel()) {
if(_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
result |= CPU_ADX;
if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
result |= CPU_BMI2;
if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
result |= CPU_SHA;
if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
result |= CPU_LZCNT;
if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
result |= CPU_FMA;
// for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
if (_cpuid_info.ext_cpuid1_ecx.bits.misalignsse != 0) {
result |= CPU_3DNOW_PREFETCH;
@ -590,6 +605,7 @@ public:
static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
@ -673,8 +689,12 @@ public:
if (is_intel() && supports_processor_topology()) {
result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
} else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu();
if (cpu_family() >= 0x17) {
result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
} else {
result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
cores_per_cpu();
}
}
return (result == 0 ? 1 : result);
}

View File

@ -770,8 +770,15 @@ static void *thread_native_entry(Thread *thread) {
const pthread_t pthread_id = ::pthread_self();
const tid_t kernel_thread_id = ::thread_self();
log_info(os, thread)("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT ").",
os::current_thread_id(), (uintx) kernel_thread_id);
LogTarget(Info, os, thread) lt;
if (lt.is_enabled()) {
address low_address = thread->stack_end();
address high_address = thread->stack_base();
lt.print("Thread is alive (tid: " UINTX_FORMAT ", kernel thread id: " UINTX_FORMAT
", stack [" PTR_FORMAT " - " PTR_FORMAT " (" SIZE_FORMAT "k using %uk pages)).",
os::current_thread_id(), (uintx) kernel_thread_id, low_address, high_address,
(high_address - low_address) / K, os::Aix::query_pagesize(low_address) / K);
}
// Normally, pthread stacks on AIX live in the data segment (are allocated with malloc()
// by the pthread library). In rare cases, this may not be the case, e.g. when third-party
@ -864,6 +871,14 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
// Calculate stack size if it's not specified by caller.
size_t stack_size = os::Posix::get_initial_stack_size(thr_type, req_stack_size);
// JDK-8187028: It was observed that on some configurations (4K backed thread stacks)
// the real thread stack size may be smaller than the requested stack size, by as much as 64K.
// This very much looks like a pthread lib error. As a workaround, increase the stack size
// by 64K for small thread stacks (arbitrarily choosen to be < 4MB)
if (stack_size < 4096 * K) {
stack_size += 64 * K;
}
// On Aix, pthread_attr_setstacksize fails with huge values and leaves the
// thread size in attr unchanged. If this is the minimal stack size as set
// by pthread_attr_init this leads to crashes after thread creation. E.g. the
@ -3443,8 +3458,6 @@ void os::init(void) {
init_random(1234567);
ThreadCritical::initialize();
// Main_thread points to the aboriginal thread.
Aix::_main_thread = pthread_self();

View File

@ -38,12 +38,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
}
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {

View File

@ -3353,8 +3353,6 @@ void os::init(void) {
init_random(1234567);
ThreadCritical::initialize();
Bsd::set_page_size(getpagesize());
if (Bsd::page_size() == -1) {
fatal("os_bsd.cpp: os::init: sysconf failed (%s)", os::strerror(errno));

View File

@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
}
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {

View File

@ -4768,8 +4768,6 @@ void os::init(void) {
init_random(1234567);
ThreadCritical::initialize();
Linux::set_page_size(sysconf(_SC_PAGESIZE));
if (Linux::page_size() == -1) {
fatal("os_linux.cpp: os::init: sysconf failed (%s)",

View File

@ -37,12 +37,6 @@ static pthread_t tc_owner = 0;
static pthread_mutex_t tc_mutex = PTHREAD_MUTEX_INITIALIZER;
static int tc_count = 0;
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
}
ThreadCritical::ThreadCritical() {
pthread_t self = pthread_self();
if (self != tc_owner) {

View File

@ -4076,6 +4076,7 @@ int_fnP_cond_tP os::Solaris::_cond_broadcast;
int_fnP_cond_tP_i_vP os::Solaris::_cond_init;
int_fnP_cond_tP os::Solaris::_cond_destroy;
int os::Solaris::_cond_scope = USYNC_THREAD;
bool os::Solaris::_synchronization_initialized;
void os::Solaris::synchronization_init() {
if (UseLWPSynchronization) {
@ -4125,6 +4126,7 @@ void os::Solaris::synchronization_init() {
os::Solaris::set_cond_destroy(::cond_destroy);
}
}
_synchronization_initialized = true;
}
bool os::Solaris::liblgrp_init() {
@ -4198,9 +4200,6 @@ void os::init(void) {
dladdr1_func = CAST_TO_FN_PTR(dladdr1_func_type, dlsym(hdl, "dladdr1"));
}
// (Solaris only) this switches to calls that actually do locking.
ThreadCritical::initialize();
main_thread = thr_self();
// dynamic lookup of functions that may not be available in our lowest

View File

@ -65,6 +65,8 @@ class Solaris {
static int_fnP_cond_tP _cond_destroy;
static int _cond_scope;
static bool _synchronization_initialized;
typedef uintptr_t lgrp_cookie_t;
typedef id_t lgrp_id_t;
typedef int lgrp_rsrc_t;
@ -227,6 +229,8 @@ class Solaris {
static void set_cond_destroy(int_fnP_cond_tP func) { _cond_destroy = func; }
static void set_cond_scope(int scope) { _cond_scope = scope; }
static bool synchronization_initialized() { return _synchronization_initialized; }
static void set_lgrp_home(lgrp_home_func_t func) { _lgrp_home = func; }
static void set_lgrp_init(lgrp_init_func_t func) { _lgrp_init = func; }
static void set_lgrp_fini(lgrp_fini_func_t func) { _lgrp_fini = func; }

View File

@ -42,10 +42,9 @@
static mutex_t global_mut;
static thread_t global_mut_owner = -1;
static int global_mut_count = 0;
static bool initialized = false;
ThreadCritical::ThreadCritical() {
if (initialized) {
if (os::Solaris::synchronization_initialized()) {
thread_t owner = thr_self();
if (global_mut_owner != owner) {
if (os::Solaris::mutex_lock(&global_mut))
@ -62,7 +61,7 @@ ThreadCritical::ThreadCritical() {
}
ThreadCritical::~ThreadCritical() {
if (initialized) {
if (os::Solaris::synchronization_initialized()) {
assert(global_mut_owner == thr_self(), "must have correct owner");
assert(global_mut_count > 0, "must have correct count");
--global_mut_count;
@ -75,12 +74,3 @@ ThreadCritical::~ThreadCritical() {
assert (Threads::number_of_threads() == 0, "valid only during initialization");
}
}
void ThreadCritical::initialize() {
// This method is called at the end of os::init(). Until
// then, we don't do real locking.
initialized = true;
}
void ThreadCritical::release() {
}

View File

@ -428,7 +428,7 @@ static unsigned __stdcall thread_native_entry(Thread* thread) {
// When the VMThread gets here, the main thread may have already exited
// which frees the CodeHeap containing the Atomic::add code
if (thread != VMThread::vm_thread() && VMThread::vm_thread() != NULL) {
Atomic::dec_ptr((intptr_t*)&os::win32::_os_thread_count);
Atomic::dec(&os::win32::_os_thread_count);
}
// If a thread has not deleted itself ("delete this") as part of its
@ -634,7 +634,7 @@ bool os::create_thread(Thread* thread, ThreadType thr_type,
return NULL;
}
Atomic::inc_ptr((intptr_t*)&os::win32::_os_thread_count);
Atomic::inc(&os::win32::_os_thread_count);
// Store info on the Win32 thread into the OSThread
osthread->set_thread_handle(thread_handle);

View File

@ -51,16 +51,6 @@ static DWORD lock_owner = -1;
// and found them ~30 times slower than the critical region code.
//
void ThreadCritical::initialize() {
}
void ThreadCritical::release() {
assert(lock_owner == -1, "Mutex being deleted while owned.");
assert(lock_count == -1, "Mutex being deleted while recursively locked");
assert(lock_event != NULL, "Sanity check");
CloseHandle(lock_event);
}
ThreadCritical::ThreadCritical() {
DWORD current_thread = GetCurrentThreadId();

View File

@ -148,90 +148,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return result;
}
inline void Atomic::inc (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
unsigned int old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -259,15 +184,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
"memory"
);
return (jint) old_value;
return old_value;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
long old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -295,11 +223,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
"memory"
);
return (intptr_t) old_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
return old_value;
}
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {

View File

@ -61,25 +61,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc (volatile jint* dest) {
__asm__ volatile ( "lock addl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
__asm__ volatile ( "lock subl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -87,10 +73,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return exchange_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@ -136,21 +118,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ( "lock addq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ( "lock subq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
__asm__ __volatile__ ("xchgq (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -176,18 +148,6 @@ inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#else // !AMD64
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
inc((volatile jint*)dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
dec((volatile jint*)dest);
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
}
extern "C" {
// defined in bsd_x86.s
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong, bool);

View File

@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until success.
int prev = *ptr;
@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until a __kernel_cmpxchg succeeds.
int prev = *ptr;
@ -207,42 +207,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return __sync_add_and_fetch(dest, add_value);
}
inline void Atomic::inc(volatile jint* dest) {
add(1, dest);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
add_ptr(1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest) {
add_ptr(1, dest);
}
inline void Atomic::dec(volatile jint* dest) {
add(-1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
add_ptr(-1, dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
add_ptr(-1, dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
#else
// __sync_lock_test_and_set is a bizarrely named atomic exchange
// operation. Note that some platforms only support this with the
// limitation that the only valid value to store is the immediate
// constant 1. There is a test for this in JNI_CreateJavaVM().
jint result = __sync_lock_test_and_set (dest, exchange_value);
T result = __sync_lock_test_and_set (dest, exchange_value);
// All atomic operations are expected to be full memory barriers
// (see atomic.hpp). However, __sync_lock_test_and_set is not
// a full memory barrier, but an acquire barrier. Hence, this added
@ -253,24 +233,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
#endif // ARM
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
volatile intptr_t* dest) {
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
#else
intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T result = __sync_lock_test_and_set (dest, exchange_value);
__sync_synchronize();
return result;
#endif // M68K
#endif // ARM
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void *) xchg_ptr((intptr_t) exchange_value,
(volatile intptr_t*) dest);
}
// No direct support for cmpxchg of bytes; emulate using int.

View File

@ -57,39 +57,16 @@ struct Atomic::PlatformAdd
}
};
inline void Atomic::inc(volatile jint* dest)
{
add(1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest)
{
add_ptr(1, dest);
}
inline void Atomic::dec (volatile jint* dest)
{
add(-1, dest);
}
inline void Atomic::dec_ptr(volatile void* dest)
{
add_ptr(-1, dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest)
{
jint res = __sync_lock_test_and_set (dest, exchange_value);
template<size_t byte_size>
template<typename T>
inline T Atomic::PlatformXchg<byte_size>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(byte_size == sizeof(T));
T res = __sync_lock_test_and_set(dest, exchange_value);
FULL_MEM_BARRIER;
return res;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest)
{
return (void *) xchg_ptr((intptr_t) exchange_value,
(volatile intptr_t*) dest);
}
template<size_t byte_size>
template<typename T>
inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
@ -110,23 +87,6 @@ inline T Atomic::PlatformCmpxchg<byte_size>::operator()(T exchange_value,
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline void Atomic::inc_ptr(volatile intptr_t* dest)
{
add_ptr(1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest)
{
add_ptr(-1, dest);
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest)
{
intptr_t res = __sync_lock_test_and_set (dest, exchange_value);
FULL_MEM_BARRIER;
return res;
}
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#endif // OS_CPU_LINUX_AARCH64_VM_ATOMIC_LINUX_AARCH64_HPP

View File

@ -122,14 +122,6 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
#endif
}
inline void Atomic::inc(volatile jint* dest) {
Atomic::add(1, (volatile jint *)dest);
}
inline void Atomic::dec(volatile jint* dest) {
Atomic::add(-1, (volatile jint *)dest);
}
#ifdef AARCH64
template<>
template<typename I, typename D>
@ -149,28 +141,15 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
: "memory");
return val;
}
#endif // AARCH64
#endif
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
Atomic::add_ptr(1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
Atomic::add_ptr(-1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
#ifdef AARCH64
jint old_val;
T old_val;
int tmp;
__asm__ volatile(
"1:\n\t"
@ -182,13 +161,17 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
: "memory");
return old_val;
#else
return (*os::atomic_xchg_func)(exchange_value, dest);
return xchg_using_helper<jint>(os::atomic_xchg_func, exchange_value, dest);
#endif
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
#ifdef AARCH64
intptr_t old_val;
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T old_val;
int tmp;
__asm__ volatile(
"1:\n\t"
@ -199,14 +182,8 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
: [new_val] "r" (exchange_value), [dest] "r" (dest)
: "memory");
return old_val;
#else
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
#endif
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
#endif // AARCH64
// The memory_order parameter is ignored - we always provide the strongest/most-conservative ordering

View File

@ -146,90 +146,14 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return result;
}
inline void Atomic::inc (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, 1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
unsigned int temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: lwarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stwcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
long temp;
__asm__ __volatile__ (
strasm_nobarrier
"1: ldarx %0, 0, %2 \n"
" addic %0, %0, -1 \n"
" stdcx. %0, 0, %2 \n"
" bne- 1b \n"
strasm_nobarrier
: /*%0*/"=&r" (temp), "=m" (*dest)
: /*%2*/"r" (dest), "m" (*dest)
: "cc" strasm_nobarrier_clobber_memory);
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
unsigned int old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -257,15 +181,18 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
"memory"
);
return (jint) old_value;
return old_value;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
// Note that xchg_ptr doesn't necessarily do an acquire
// (see synchronizer.cpp).
long old_value;
T old_value;
const uint64_t zero = 0;
__asm__ __volatile__ (
@ -293,11 +220,7 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
"memory"
);
return (intptr_t) old_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
return old_value;
}
inline void cmpxchg_pre_membar(cmpxchg_memory_order order) {

View File

@ -192,219 +192,6 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I inc, D volatile* dest) const {
}
//------------
// Atomic::inc
//------------
// These methods force the value in memory to be incremented (augmented by 1).
// Both, memory value and increment, are treated as 32bit signed binary integers.
// No overflow exceptions are recognized, and the condition code does not hold
// information about the value in memory.
//
// The value in memory is updated by using a compare-and-swap instruction. The
// instruction is retried as often as required.
inline void Atomic::inc(volatile jint* dest) {
unsigned int old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
// tty->print_cr("Atomic::inc called... dest @%p", dest);
__asm__ __volatile__ (
" LGHI 2,1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAA 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xf8 \n\t" // LAA minor opcode
" AGHI 2,1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LLGF %[old],%[mem] \n\t" // get old value
"0: LA %[upd],1(,%[old]) \n\t" // calc result
" CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
unsigned long old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
__asm__ __volatile__ (
" LGHI 2,1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAAG 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xe8 \n\t" // LAA minor opcode
" AGHI 2,1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LG %[old],%[mem] \n\t" // get old value
"0: LA %[upd],1(,%[old]) \n\t" // calc result
" CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
//------------
// Atomic::dec
//------------
// These methods force the value in memory to be decremented (augmented by -1).
// Both, memory value and decrement, are treated as 32bit signed binary integers.
// No overflow exceptions are recognized, and the condition code does not hold
// information about the value in memory.
//
// The value in memory is updated by using a compare-and-swap instruction. The
// instruction is retried as often as required.
inline void Atomic::dec(volatile jint* dest) {
unsigned int old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
__asm__ __volatile__ (
" LGHI 2,-1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAA %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAA 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xf8 \n\t" // LAA minor opcode
" AGHI 2,-1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LLGF %[old],%[mem] \n\t" // get old value
// LAY not supported by inline assembler
// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
"0: LR %[upd],%[old] \n\t" // calc result
" AHI %[upd],-1 \n\t"
" CS %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
unsigned long old, upd;
if (VM_Version::has_LoadAndALUAtomicV1()) {
__asm__ __volatile__ (
" LGHI 2,-1 \n\t" // load increment
" LA 3,%[mem] \n\t" // force data address into ARG2
// " LAAG %[upd],%[inc],%[mem] \n\t" // increment and get old value
// " LAAG 2,2,0(3) \n\t" // actually coded instruction
" .byte 0xeb \n\t" // LAA main opcode
" .byte 0x22 \n\t" // R1,R3
" .byte 0x30 \n\t" // R2,disp1
" .byte 0x00 \n\t" // disp2,disp3
" .byte 0x00 \n\t" // disp4,disp5
" .byte 0xe8 \n\t" // LAA minor opcode
" AGHI 2,-1 \n\t" // calc new value in register
" LR %[upd],2 \n\t" // move to result register
//---< outputs >---
: [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
// : [inc] "a" (inc) // read-only.
//---< clobbered >---
: "cc", "r2", "r3", "memory"
);
} else {
__asm__ __volatile__ (
" LG %[old],%[mem] \n\t" // get old value
// LAY not supported by inline assembler
// "0: LAY %[upd],-1(,%[old]) \n\t" // calc result
"0: LGR %[upd],%[old] \n\t" // calc result
" AGHI %[upd],-1 \n\t"
" CSG %[old],%[upd],%[mem] \n\t" // try to xchg res with mem
" JNE 0b \n\t" // no success? -> retry
//---< outputs >---
: [old] "=&a" (old) // write-only, old counter value
, [upd] "=&d" (upd) // write-only, updated counter value
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
:
//---< clobbered >---
: "cc", "memory"
);
}
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
//-------------
// Atomic::xchg
//-------------
@ -421,8 +208,12 @@ inline void Atomic::dec_ptr(volatile void* dest) {
//
// The return value is the (unchanged) value from memory as it was when the
// replacement succeeded.
inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
unsigned int old;
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
T old;
__asm__ __volatile__ (
" LLGF %[old],%[mem] \n\t" // get old value
@ -432,16 +223,20 @@ inline jint Atomic::xchg (jint xchg_val, volatile jint* dest) {
: [old] "=&d" (old) // write-only, prev value irrelevant
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
: [upd] "d" (xchg_val) // read-only, value to be written to memory
: [upd] "d" (exchange_value) // read-only, value to be written to memory
//---< clobbered >---
: "cc", "memory"
);
return (jint)old;
return old;
}
inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
unsigned long old;
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T old;
__asm__ __volatile__ (
" LG %[old],%[mem] \n\t" // get old value
@ -451,16 +246,12 @@ inline intptr_t Atomic::xchg_ptr(intptr_t xchg_val, volatile intptr_t* dest) {
: [old] "=&d" (old) // write-only, init from memory
, [mem] "+Q" (*dest) // read/write, memory to be updated atomically
//---< inputs >---
: [upd] "d" (xchg_val) // read-only, value to be written to memory
: [upd] "d" (exchange_value) // read-only, value to be written to memory
//---< clobbered >---
: "cc", "memory"
);
return (intptr_t)old;
}
inline void *Atomic::xchg_ptr(void *exchange_value, volatile void *dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
return old;
}
//----------------

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016 SAP SE. All rights reserved.
* Copyright (c) 2016, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2016, 2017 SAP SE. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -471,7 +471,7 @@ JVM_handle_linux_signal(int sig,
// Info->si_addr need not be the exact address, it is only
// guaranteed to be on the same page as the address that caused
// the SIGSEGV.
if ((sig == SIGSEGV) &&
if ((sig == SIGSEGV) && !UseMembar &&
(os::get_memory_serialize_page() ==
(address)((uintptr_t)info->si_addr & ~(os::vm_page_size()-1)))) {
return true;

View File

@ -41,14 +41,6 @@ inline void Atomic::store (jlong store_value, volatile jlong* dest) { *
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
template<size_t byte_size>
@ -103,9 +95,12 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return rv;
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
intptr_t rv = exchange_value;
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
T rv = exchange_value;
__asm__ volatile(
" swap [%2],%1\n\t"
: "=r" (rv)
@ -114,8 +109,12 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return rv;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
intptr_t rv = exchange_value;
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T rv = exchange_value;
__asm__ volatile(
"1:\n\t"
" mov %1, %%o3\n\t"
@ -131,10 +130,6 @@ inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* des
return rv;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
// No direct support for cmpxchg of bytes; emulate using int.
template<>
struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};

View File

@ -61,25 +61,11 @@ inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc (volatile jint* dest) {
__asm__ volatile ( "lock addl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc_ptr((volatile intptr_t*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
__asm__ volatile ( "lock subl $1,(%0)" :
: "r" (dest) : "cc", "memory");
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec_ptr((volatile intptr_t*)dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "xchgl (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -87,10 +73,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
return exchange_value;
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
@ -136,21 +118,11 @@ inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest) co
return old_value;
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ("lock addq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
__asm__ __volatile__ ("lock subq $1,(%0)"
:
: "r" (dest)
: "cc", "memory");
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
__asm__ __volatile__ ("xchgq (%2),%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
@ -176,18 +148,6 @@ inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#else // !AMD64
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
inc((volatile jint*)dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
dec((volatile jint*)dest);
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
}
extern "C" {
// defined in linux_x86.s
jlong _Atomic_cmpxchg_long(jlong, volatile jlong*, jlong);

View File

@ -87,7 +87,7 @@ static inline int m68k_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int m68k_lock_test_and_set(volatile int *ptr, int newval) {
static inline int m68k_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until success.
int prev = *ptr;
@ -148,7 +148,7 @@ static inline int arm_add_and_fetch(int add_value, volatile int *ptr) {
/* Atomically write VALUE into `*PTR' and returns the previous
contents of `*PTR'. */
static inline int arm_lock_test_and_set(volatile int *ptr, int newval) {
static inline int arm_lock_test_and_set(int newval, volatile int *ptr) {
for (;;) {
// Loop until a __kernel_cmpxchg succeeds.
int prev = *ptr;
@ -201,42 +201,22 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return __sync_add_and_fetch(dest, add_value);
}
inline void Atomic::inc(volatile jint* dest) {
add(1, dest);
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
add_ptr(1, dest);
}
inline void Atomic::inc_ptr(volatile void* dest) {
add_ptr(1, dest);
}
inline void Atomic::dec(volatile jint* dest) {
add(-1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
add_ptr(-1, dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
add_ptr(-1, dest);
}
inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(arm_lock_test_and_set, exchange_value, dest);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
return xchg_using_helper<int>(m68k_lock_test_and_set, exchange_value, dest);
#else
// __sync_lock_test_and_set is a bizarrely named atomic exchange
// operation. Note that some platforms only support this with the
// limitation that the only valid value to store is the immediate
// constant 1. There is a test for this in JNI_CreateJavaVM().
jint result = __sync_lock_test_and_set (dest, exchange_value);
T result = __sync_lock_test_and_set (dest, exchange_value);
// All atomic operations are expected to be full memory barriers
// (see atomic.hpp). However, __sync_lock_test_and_set is not
// a full memory barrier, but an acquire barrier. Hence, this added
@ -247,24 +227,14 @@ inline jint Atomic::xchg(jint exchange_value, volatile jint* dest) {
#endif // ARM
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value,
volatile intptr_t* dest) {
#ifdef ARM
return arm_lock_test_and_set(dest, exchange_value);
#else
#ifdef M68K
return m68k_lock_test_and_set(dest, exchange_value);
#else
intptr_t result = __sync_lock_test_and_set (dest, exchange_value);
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T result = __sync_lock_test_and_set (dest, exchange_value);
__sync_synchronize();
return result;
#endif // M68K
#endif // ARM
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void *) xchg_ptr((intptr_t) exchange_value,
(volatile intptr_t*) dest);
}
// No direct support for cmpxchg of bytes; emulate using int.

View File

@ -56,8 +56,16 @@ typedef void (__kernel_dmb_t) (void);
#else // PPC
#ifdef ALPHA
#define LIGHT_MEM_BARRIER __sync_synchronize()
#else // ALPHA
#define LIGHT_MEM_BARRIER __asm __volatile ("":::"memory")
#endif // ALPHA
#endif // PPC
#endif // ARM

View File

@ -39,29 +39,10 @@ inline void Atomic::store (jint store_value, volatile jint* dest) { *
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
inline void Atomic::store(jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store(jlong store_value, volatile jlong* dest) { *dest = store_value; }
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
// This is the interface to the atomic instructions in solaris_sparc.il.
// It's very messy because we need to support v8 and these instructions
// are illegal there. When sparc v8 is dropped, we can drop out lots of
// this code. Also compiler2 does not support v8 so the conditional code
// omits the instruction set check.
extern "C" jint _Atomic_swap32(jint exchange_value, volatile jint* dest);
extern "C" intptr_t _Atomic_swap64(intptr_t exchange_value, volatile intptr_t* dest);
// Implement ADD using a CAS loop.
template<size_t byte_size>
struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
@ -78,16 +59,30 @@ struct Atomic::PlatformAdd VALUE_OBJ_CLASS_SPEC {
}
};
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
return _Atomic_swap32(exchange_value, dest);
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
__asm__ volatile ( "swap [%2],%0"
: "=r" (exchange_value)
: "0" (exchange_value), "r" (dest)
: "memory");
return exchange_value;
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return _Atomic_swap64(exchange_value, dest);
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg_ptr((intptr_t)exchange_value, (volatile intptr_t*)dest);
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
T old_value = *dest;
while (true) {
T result = cmpxchg(exchange_value, dest, old_value);
if (result == old_value) break;
old_value = result;
}
return old_value;
}
// No direct support for cmpxchg of bytes; emulate using int.

View File

@ -32,47 +32,6 @@
.end
// Support for jint Atomic::xchg(jint exchange_value, volatile jint* dest).
//
// Arguments:
// exchange_value: O0
// dest: O1
//
// Results:
// O0: the value previously stored in dest
.inline _Atomic_swap32, 2
.volatile
swap [%o1],%o0
.nonvolatile
.end
// Support for intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t * dest).
//
// 64-bit
//
// Arguments:
// exchange_value: O0
// dest: O1
//
// Results:
// O0: the value previously stored in dest
.inline _Atomic_swap64, 2
.volatile
1:
mov %o0, %o3
ldx [%o1], %o2
casx [%o1], %o2, %o3
cmp %o2, %o3
bne %xcc, 1b
nop
mov %o2, %o0
.nonvolatile
.end
// Support for jlong Atomic::load and Atomic::store on v9.
//
// void _Atomic_move_long_v9(volatile jlong* src, volatile jlong* dst)

View File

@ -380,7 +380,7 @@ void VM_Version::platform_features() {
if (av & AV_SPARC_CRC32C) features |= ISA_crc32c_msk;
#ifndef AV2_SPARC_FJATHPLUS
#define AV2_SPARC_FJATHPLUS 0x00000001 // Fujitsu Athena+
#define AV2_SPARC_FJATHPLUS 0x00000001 // Fujitsu Athena+ insns
#endif
#ifndef AV2_SPARC_VIS3B
#define AV2_SPARC_VIS3B 0x00000002 // VIS3 present on multiple chips
@ -405,6 +405,34 @@ void VM_Version::platform_features() {
#endif
#ifndef AV2_SPARC_VAMASK
#define AV2_SPARC_VAMASK 0x00000100 // Virtual Address masking
#endif
#ifndef AV2_SPARC_SPARC6
#define AV2_SPARC_SPARC6 0x00000200 // REVB*, FPSLL*, RDENTROPY, LDM* and STM*
#endif
#ifndef AV2_SPARC_DICTUNP
#define AV2_SPARC_DICTUNP 0x00002000 // Dictionary unpack instruction
#endif
#ifndef AV2_SPARC_FPCMPSHL
#define AV2_SPARC_FPCMPSHL 0x00004000 // Partition compare with shifted result
#endif
#ifndef AV2_SPARC_RLE
#define AV2_SPARC_RLE 0x00008000 // Run-length encoded burst and length
#endif
#ifndef AV2_SPARC_SHA3
#define AV2_SPARC_SHA3 0x00010000 // SHA3 instructions
#endif
#ifndef AV2_SPARC_FJATHPLUS2
#define AV2_SPARC_FJATHPLUS2 0x00020000 // Fujitsu Athena++ insns
#endif
#ifndef AV2_SPARC_VIS3C
#define AV2_SPARC_VIS3C 0x00040000 // Subset of VIS3 insns provided by Athena++
#endif
#ifndef AV2_SPARC_SPARC5B
#define AV2_SPARC_SPARC5B 0x00080000 // subset of SPARC5 insns (fpadd8, fpsub8)
#endif
#ifndef AV2_SPARC_MME
#define AV2_SPARC_MME 0x00100000 // Misaligned Mitigation Enable
#endif
if (avn > 1) {
@ -419,19 +447,30 @@ void VM_Version::platform_features() {
if (av2 & AV2_SPARC_XMONT) features |= ISA_xmont_msk;
if (av2 & AV2_SPARC_PAUSE_NSEC) features |= ISA_pause_nsec_msk;
if (av2 & AV2_SPARC_VAMASK) features |= ISA_vamask_msk;
if (av2 & AV2_SPARC_SPARC6) features |= ISA_sparc6_msk;
if (av2 & AV2_SPARC_DICTUNP) features |= ISA_dictunp_msk;
if (av2 & AV2_SPARC_FPCMPSHL) features |= ISA_fpcmpshl_msk;
if (av2 & AV2_SPARC_RLE) features |= ISA_rle_msk;
if (av2 & AV2_SPARC_SHA3) features |= ISA_sha3_msk;
if (av2 & AV2_SPARC_FJATHPLUS2) features |= ISA_fjathplus2_msk;
if (av2 & AV2_SPARC_VIS3C) features |= ISA_vis3c_msk;
if (av2 & AV2_SPARC_SPARC5B) features |= ISA_sparc5b_msk;
if (av2 & AV2_SPARC_MME) features |= ISA_mme_msk;
}
_features = features; // ISA feature set completed, update state.
Sysinfo machine(SI_MACHINE);
bool is_sun4v = machine.match("sun4v"); // All Oracle SPARC + Fujitsu Athena+
bool is_sun4v = machine.match("sun4v"); // All Oracle SPARC + Fujitsu Athena+/++
bool is_sun4u = machine.match("sun4u"); // All other Fujitsu
// Handle Athena+ conservatively (simply because we are lacking info.).
// Handle Athena+/++ conservatively (simply because we are lacking info.).
bool do_sun4v = is_sun4v && !has_athena_plus();
bool do_sun4u = is_sun4u || has_athena_plus();
bool an_athena = has_athena_plus() || has_athena_plus2();
bool do_sun4v = is_sun4v && !an_athena;
bool do_sun4u = is_sun4u || an_athena;
uint64_t synthetic = 0;
@ -441,16 +480,16 @@ void VM_Version::platform_features() {
// Fast IDIV, BIS and LD available on Niagara Plus.
if (has_vis2()) {
synthetic |= (CPU_fast_idiv_msk | CPU_fast_ld_msk);
// ...on Core S4 however, we prefer not to use BIS.
// ...on Core C4 however, we prefer not to use BIS.
if (!has_sparc5()) {
synthetic |= CPU_fast_bis_msk;
}
}
// Niagara Core S3 supports fast RDPC and block zeroing.
// SPARC Core C3 supports fast RDPC and block zeroing.
if (has_ima()) {
synthetic |= (CPU_fast_rdpc_msk | CPU_blk_zeroing_msk);
}
// Niagara Core S3 and S4 have slow CMOVE.
// SPARC Core C3 and C4 have slow CMOVE.
if (!has_ima()) {
synthetic |= CPU_fast_cmove_msk;
}

View File

@ -39,14 +39,6 @@ inline void Atomic::store (jint store_value, volatile jint* dest) { *
inline void Atomic::store_ptr(intptr_t store_value, volatile intptr_t* dest) { *dest = store_value; }
inline void Atomic::store_ptr(void* store_value, volatile void* dest) { *(void* volatile *)dest = store_value; }
inline void Atomic::inc (volatile jint* dest) { (void)add (1, dest); }
inline void Atomic::inc_ptr(volatile intptr_t* dest) { (void)add_ptr(1, dest); }
inline void Atomic::inc_ptr(volatile void* dest) { (void)add_ptr(1, dest); }
inline void Atomic::dec (volatile jint* dest) { (void)add (-1, dest); }
inline void Atomic::dec_ptr(volatile intptr_t* dest) { (void)add_ptr(-1, dest); }
inline void Atomic::dec_ptr(volatile void* dest) { (void)add_ptr(-1, dest); }
// For Sun Studio - implementation is in solaris_x86_64.il.
extern "C" {
@ -92,8 +84,26 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
reinterpret_cast<jlong volatile*>(dest)));
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
return _Atomic_xchg(exchange_value, dest);
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
return PrimitiveConversions::cast<T>(
_Atomic_xchg(PrimitiveConversions::cast<jint>(exchange_value),
reinterpret_cast<jint volatile*>(dest)));
}
extern "C" jlong _Atomic_xchg_long(jlong exchange_value, volatile jlong* dest);
template<>
template<typename T>
inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(8 == sizeof(T));
return PrimitiveConversions::cast<T>(
_Atomic_xchg_long(PrimitiveConversions::cast<jlong>(exchange_value),
reinterpret_cast<jlong volatile*>(dest)));
}
// Not using cmpxchg_using_helper here, because some configurations of
@ -143,16 +153,6 @@ inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
inline void Atomic::store (jlong store_value, jlong* dest) { *dest = store_value; }
inline void Atomic::store (jlong store_value, volatile jlong* dest) { *dest = store_value; }
extern "C" jlong _Atomic_xchg_long(jlong exchange_value, volatile jlong* dest);
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return (intptr_t)_Atomic_xchg_long((jlong)exchange_value, (volatile jlong*)dest);
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)_Atomic_xchg_long((jlong)exchange_value, (volatile jlong*)dest);
}
inline jlong Atomic::load(const volatile jlong* src) { return *src; }
#endif // OS_CPU_SOLARIS_X86_VM_ATOMIC_SOLARIS_X86_HPP

View File

@ -81,41 +81,19 @@ inline D Atomic::PlatformAdd<8>::add_and_fetch(I add_value, D volatile* dest) co
return add_using_helper<intptr_t>(os::atomic_add_ptr_func, add_value, dest);
}
inline void Atomic::inc (volatile jint* dest) {
(void)add (1, dest);
}
#define DEFINE_STUB_XCHG(ByteSize, StubType, StubName) \
template<> \
template<typename T> \
inline T Atomic::PlatformXchg<ByteSize>::operator()(T exchange_value, \
T volatile* dest) const { \
STATIC_ASSERT(ByteSize == sizeof(T)); \
return xchg_using_helper<StubType>(StubName, exchange_value, dest); \
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
(void)add_ptr(1, dest);
}
DEFINE_STUB_XCHG(4, jint, os::atomic_xchg_func)
DEFINE_STUB_XCHG(8, jlong, os::atomic_xchg_ptr_func)
inline void Atomic::inc_ptr(volatile void* dest) {
(void)add_ptr(1, dest);
}
inline void Atomic::dec (volatile jint* dest) {
(void)add (-1, dest);
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
(void)add_ptr(-1, dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
(void)add_ptr(-1, dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
return (jint)(*os::atomic_xchg_func)(exchange_value, dest);
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return (intptr_t)(os::atomic_xchg_ptr_func)(exchange_value, dest);
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void *)(os::atomic_xchg_ptr_func)((intptr_t)exchange_value, (volatile intptr_t*)dest);
}
#undef DEFINE_STUB_XCHG
#define DEFINE_STUB_CMPXCHG(ByteSize, StubType, StubName) \
template<> \
@ -152,39 +130,11 @@ inline D Atomic::PlatformAdd<4>::add_and_fetch(I add_value, D volatile* dest) co
}
}
inline void Atomic::inc (volatile jint* dest) {
// alternative for InterlockedIncrement
__asm {
mov edx, dest;
lock add dword ptr [edx], 1;
}
}
inline void Atomic::inc_ptr(volatile intptr_t* dest) {
inc((volatile jint*)dest);
}
inline void Atomic::inc_ptr(volatile void* dest) {
inc((volatile jint*)dest);
}
inline void Atomic::dec (volatile jint* dest) {
// alternative for InterlockedDecrement
__asm {
mov edx, dest;
lock sub dword ptr [edx], 1;
}
}
inline void Atomic::dec_ptr(volatile intptr_t* dest) {
dec((volatile jint*)dest);
}
inline void Atomic::dec_ptr(volatile void* dest) {
dec((volatile jint*)dest);
}
inline jint Atomic::xchg (jint exchange_value, volatile jint* dest) {
template<>
template<typename T>
inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
T volatile* dest) const {
STATIC_ASSERT(4 == sizeof(T));
// alternative for InterlockedExchange
__asm {
mov eax, exchange_value;
@ -193,14 +143,6 @@ inline jint Atomic::xchg (jint exchange_value, volatile jint* des
}
}
inline intptr_t Atomic::xchg_ptr(intptr_t exchange_value, volatile intptr_t* dest) {
return (intptr_t)xchg((jint)exchange_value, (volatile jint*)dest);
}
inline void* Atomic::xchg_ptr(void* exchange_value, volatile void* dest) {
return (void*)xchg((jint)exchange_value, (volatile jint*)dest);
}
template<>
template<typename T>
inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,

View File

@ -2276,6 +2276,10 @@ private:
if (strcmp(rep_var,"$XMMRegister") == 0) return "as_XMMRegister";
#endif
if (strcmp(rep_var,"$CondRegister") == 0) return "as_ConditionRegister";
#if defined(PPC64)
if (strcmp(rep_var,"$VectorRegister") == 0) return "as_VectorRegister";
if (strcmp(rep_var,"$VectorSRegister") == 0) return "as_VectorSRegister";
#endif
return NULL;
}

View File

@ -1304,7 +1304,9 @@ void LIRGenerator::do_getClass(Intrinsic* x) {
// FIXME T_ADDRESS should actually be T_METADATA but it can't because the
// meaning of these two is mixed up (see JDK-8026837).
__ move(new LIR_Address(rcvr.result(), oopDesc::klass_offset_in_bytes(), T_ADDRESS), temp, info);
__ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
__ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_ADDRESS), result);
// mirror = ((OopHandle)mirror)->resolve();
__ move_wide(new LIR_Address(result, T_OBJECT), result);
}
// java.lang.Class::isPrimitive()

View File

@ -665,9 +665,8 @@ class StaticFinalFieldPrinter : public FieldClosure {
_out->print_cr("null");
} else if (value->is_instance()) {
if (value->is_a(SystemDictionary::String_klass())) {
_out->print("\"");
_out->print_raw(java_lang_String::as_quoted_ascii(value));
_out->print_cr("\"");
const char* ascii_value = java_lang_String::as_quoted_ascii(value);
_out->print("\"%s\"", (ascii_value != NULL) ? ascii_value : "");
} else {
const char* klass_name = value->klass()->name()->as_quoted_ascii();
_out->print_cr("%s", klass_name);

View File

@ -802,7 +802,6 @@ void ClassLoader::setup_search_path(const char *class_path, bool bootstrap_searc
if (DumpSharedSpaces) {
JImageFile *jimage = _jrt_entry->jimage();
assert(jimage != NULL, "No java runtime image file present");
ClassLoader::initialize_module_loader_map(jimage);
}
#endif
}
@ -1144,61 +1143,6 @@ int ClassLoader::crc32(int crc, const char* buf, int len) {
return (*Crc32)(crc, (const jbyte*)buf, len);
}
#if INCLUDE_CDS
void ClassLoader::initialize_module_loader_map(JImageFile* jimage) {
if (!DumpSharedSpaces) {
return; // only needed for CDS dump time
}
ResourceMark rm;
jlong size;
JImageLocationRef location = (*JImageFindResource)(jimage, JAVA_BASE_NAME, get_jimage_version_string(), MODULE_LOADER_MAP, &size);
if (location == 0) {
vm_exit_during_initialization(
"Cannot find ModuleLoaderMap location from modules jimage.", NULL);
}
char* buffer = NEW_RESOURCE_ARRAY(char, size + 1);
buffer[size] = '\0';
jlong read = (*JImageGetResource)(jimage, location, buffer, size);
if (read != size) {
vm_exit_during_initialization(
"Cannot find ModuleLoaderMap resource from modules jimage.", NULL);
}
char* char_buf = (char*)buffer;
int buflen = (int)strlen(char_buf);
char* begin_ptr = char_buf;
char* end_ptr = strchr(begin_ptr, '\n');
bool process_boot_modules = false;
_boot_modules_array = new (ResourceObj::C_HEAP, mtModule)
GrowableArray<char*>(INITIAL_BOOT_MODULES_ARRAY_SIZE, true);
_platform_modules_array = new (ResourceObj::C_HEAP, mtModule)
GrowableArray<char*>(INITIAL_PLATFORM_MODULES_ARRAY_SIZE, true);
while (end_ptr != NULL && (end_ptr - char_buf) < buflen) {
// Allocate a buffer from the C heap to be appended to the _boot_modules_array
// or the _platform_modules_array.
char* temp_name = NEW_C_HEAP_ARRAY(char, (size_t)(end_ptr - begin_ptr + 1), mtInternal);
strncpy(temp_name, begin_ptr, end_ptr - begin_ptr);
temp_name[end_ptr - begin_ptr] = '\0';
if (strncmp(temp_name, "BOOT", 4) == 0) {
process_boot_modules = true;
FREE_C_HEAP_ARRAY(char, temp_name);
} else if (strncmp(temp_name, "PLATFORM", 8) == 0) {
process_boot_modules = false;
FREE_C_HEAP_ARRAY(char, temp_name);
} else {
// module name
if (process_boot_modules) {
_boot_modules_array->append(temp_name);
} else {
_platform_modules_array->append(temp_name);
}
}
begin_ptr = ++end_ptr;
end_ptr = strchr(begin_ptr, '\n');
}
}
#endif
// Function add_package extracts the package from the fully qualified class name
// and checks if the package is in the boot loader's package entry table. If so,
// then it sets the classpath_index in the package entry record.
@ -1290,58 +1234,6 @@ objArrayOop ClassLoader::get_system_packages(TRAPS) {
return result();
}
#if INCLUDE_CDS
s2 ClassLoader::module_to_classloader(const char* module_name) {
assert(DumpSharedSpaces, "dump time only");
assert(_boot_modules_array != NULL, "_boot_modules_array is NULL");
assert(_platform_modules_array != NULL, "_platform_modules_array is NULL");
int array_size = _boot_modules_array->length();
for (int i = 0; i < array_size; i++) {
if (strcmp(module_name, _boot_modules_array->at(i)) == 0) {
return BOOT_LOADER;
}
}
array_size = _platform_modules_array->length();
for (int i = 0; i < array_size; i++) {
if (strcmp(module_name, _platform_modules_array->at(i)) == 0) {
return PLATFORM_LOADER;
}
}
return APP_LOADER;
}
s2 ClassLoader::classloader_type(Symbol* class_name, ClassPathEntry* e, int classpath_index, TRAPS) {
assert(DumpSharedSpaces, "Only used for CDS dump time");
// obtain the classloader type based on the class name.
// First obtain the package name based on the class name. Then obtain
// the classloader type based on the package name from the jimage using
// a jimage API. If the classloader type cannot be found from the
// jimage, it is determined by the class path entry.
jshort loader_type = ClassLoader::APP_LOADER;
if (e->is_jrt()) {
ResourceMark rm;
TempNewSymbol pkg_name = InstanceKlass::package_from_name(class_name, CHECK_0);
if (pkg_name != NULL) {
const char* pkg_name_C_string = (const char*)(pkg_name->as_C_string());
ClassPathImageEntry* cpie = (ClassPathImageEntry*)e;
JImageFile* jimage = cpie->jimage();
char* module_name = (char*)(*JImagePackageToModule)(jimage, pkg_name_C_string);
if (module_name != NULL) {
loader_type = ClassLoader::module_to_classloader(module_name);
}
}
} else if (ClassLoaderExt::is_boot_classpath(classpath_index)) {
loader_type = ClassLoader::BOOT_LOADER;
}
return loader_type;
}
#endif
// caller needs ResourceMark
const char* ClassLoader::file_name_for_class_name(const char* class_name,
int class_name_len) {

View File

@ -37,13 +37,6 @@
// Name of boot "modules" image
#define MODULES_IMAGE_NAME "modules"
// Name of the resource containing mapping from module names to defining class loader type
#define MODULE_LOADER_MAP "jdk/internal/vm/cds/resources/ModuleLoaderMap.dat"
// Initial sizes of the following arrays are based on the generated ModuleLoaderMap.dat
#define INITIAL_BOOT_MODULES_ARRAY_SIZE 30
#define INITIAL_PLATFORM_MODULES_ARRAY_SIZE 15
// Class path entry (directory or zip file)
class JImageFile;
@ -403,7 +396,8 @@ class ClassLoader: AllStatic {
static int compute_Object_vtable();
static ClassPathEntry* classpath_entry(int n) {
assert(n >= 0 && n < _num_entries, "sanity");
assert(n >= 0, "sanity");
assert(!has_jrt_entry() || n < _num_entries, "sanity");
if (n == 0) {
assert(has_jrt_entry(), "No class path entry at 0 for exploded module builds");
return ClassLoader::_jrt_entry;
@ -438,10 +432,6 @@ class ClassLoader: AllStatic {
static bool check_shared_paths_misc_info(void* info, int size);
static void exit_with_path_failure(const char* error, const char* message);
static s2 module_to_classloader(const char* module_name);
static void initialize_module_loader_map(JImageFile* jimage);
static s2 classloader_type(Symbol* class_name, ClassPathEntry* e,
int classpath_index, TRAPS);
static void record_shared_class_loader_type(InstanceKlass* ik, const ClassFileStream* stream);
#endif
static JImageLocationRef jimage_find_resource(JImageFile* jf, const char* module_name,

View File

@ -98,7 +98,8 @@ ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous, Depen
_keep_alive((is_anonymous || h_class_loader.is_null()) ? 1 : 0),
_metaspace(NULL), _unloading(false), _klasses(NULL),
_modules(NULL), _packages(NULL),
_claimed(0), _jmethod_ids(NULL), _handles(), _deallocate_list(NULL),
_claimed(0), _modified_oops(true), _accumulated_modified_oops(false),
_jmethod_ids(NULL), _handles(), _deallocate_list(NULL),
_next(NULL), _dependencies(dependencies),
_metaspace_lock(new Mutex(Monitor::leaf+1, "Metaspace allocation lock", true,
Monitor::_safepoint_check_never)) {
@ -207,7 +208,7 @@ bool ClassLoaderData::ChunkedHandleList::contains(oop* p) {
oops_do(&cl);
return cl.found();
}
#endif
#endif // ASSERT
bool ClassLoaderData::claim() {
if (_claimed == 1) {
@ -236,19 +237,19 @@ void ClassLoaderData::dec_keep_alive() {
}
}
void ClassLoaderData::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
void ClassLoaderData::oops_do(OopClosure* f, bool must_claim, bool clear_mod_oops) {
if (must_claim && !claim()) {
return;
}
// Only clear modified_oops after the ClassLoaderData is claimed.
if (clear_mod_oops) {
clear_modified_oops();
}
f->do_oop(&_class_loader);
_dependencies.oops_do(f);
_handles.oops_do(f);
if (klass_closure != NULL) {
classes_do(klass_closure);
}
}
void ClassLoaderData::Dependencies::oops_do(OopClosure* f) {
@ -368,6 +369,9 @@ void ClassLoaderData::record_dependency(const Klass* k, TRAPS) {
// Must handle over GC point.
Handle dependency(THREAD, to);
from_cld->_dependencies.add(dependency, CHECK);
// Added a potentially young gen oop to the ClassLoaderData
record_modified_oops();
}
@ -764,6 +768,7 @@ Metaspace* ClassLoaderData::metaspace_non_null() {
OopHandle ClassLoaderData::add_handle(Handle h) {
MutexLockerEx ml(metaspace_lock(), Mutex::_no_safepoint_check_flag);
record_modified_oops();
return OopHandle(_handles.add(h()));
}
@ -875,8 +880,7 @@ void ClassLoaderData::dump(outputStream * const out) {
if (Verbose) {
Klass* k = _klasses;
while (k != NULL) {
out->print_cr("klass " PTR_FORMAT ", %s, CT: %d, MUT: %d", k, k->name()->as_C_string(),
k->has_modified_oops(), k->has_accumulated_modified_oops());
out->print_cr("klass " PTR_FORMAT ", %s", p2i(k), k->name()->as_C_string());
assert(k != k->next_link(), "no loops!");
k = k->next_link();
}
@ -1003,25 +1007,25 @@ void ClassLoaderDataGraph::print_creation(outputStream* out, Handle loader, Clas
}
void ClassLoaderDataGraph::oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
void ClassLoaderDataGraph::oops_do(OopClosure* f, bool must_claim) {
for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
cld->oops_do(f, klass_closure, must_claim);
cld->oops_do(f, must_claim);
}
}
void ClassLoaderDataGraph::keep_alive_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
void ClassLoaderDataGraph::keep_alive_oops_do(OopClosure* f, bool must_claim) {
for (ClassLoaderData* cld = _head; cld != NULL; cld = cld->next()) {
if (cld->keep_alive()) {
cld->oops_do(f, klass_closure, must_claim);
cld->oops_do(f, must_claim);
}
}
}
void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, bool must_claim) {
if (ClassUnloading) {
keep_alive_oops_do(f, klass_closure, must_claim);
keep_alive_oops_do(f, must_claim);
} else {
oops_do(f, klass_closure, must_claim);
oops_do(f, must_claim);
}
}

View File

@ -87,9 +87,9 @@ class ClassLoaderDataGraph : public AllStatic {
static void purge();
static void clear_claimed_marks();
// oops do
static void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
static void always_strong_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
static void oops_do(OopClosure* f, bool must_claim);
static void keep_alive_oops_do(OopClosure* blk, bool must_claim);
static void always_strong_oops_do(OopClosure* blk, bool must_claim);
// cld do
static void cld_do(CLDClosure* cl);
static void cld_unloading_do(CLDClosure* cl);
@ -230,10 +230,16 @@ class ClassLoaderData : public CHeapObj<mtClass> {
Mutex* _metaspace_lock; // Locks the metaspace for allocations and setup.
bool _unloading; // true if this class loader goes away
bool _is_anonymous; // if this CLD is for an anonymous class
// Remembered sets support for the oops in the class loader data.
bool _modified_oops; // Card Table Equivalent (YC/CMS support)
bool _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
s2 _keep_alive; // if this CLD is kept alive without a keep_alive_object().
// Used for anonymous classes and the boot class
// loader. _keep_alive does not need to be volatile or
// atomic since there is one unique CLD per anonymous class.
volatile int _claimed; // true if claimed, for example during GC traces.
// To avoid applying oop closure more than once.
// Has to be an int because we cas it.
@ -276,6 +282,19 @@ class ClassLoaderData : public CHeapObj<mtClass> {
bool claimed() const { return _claimed == 1; }
bool claim();
// The CLD are not placed in the Heap, so the Card Table or
// the Mod Union Table can't be used to mark when CLD have modified oops.
// The CT and MUT bits saves this information for the whole class loader data.
void clear_modified_oops() { _modified_oops = false; }
public:
void record_modified_oops() { _modified_oops = true; }
bool has_modified_oops() { return _modified_oops; }
void accumulate_modified_oops() { if (has_modified_oops()) _accumulated_modified_oops = true; }
void clear_accumulated_modified_oops() { _accumulated_modified_oops = false; }
bool has_accumulated_modified_oops() { return _accumulated_modified_oops; }
private:
void unload();
bool keep_alive() const { return _keep_alive > 0; }
void classes_do(void f(Klass*));
@ -346,8 +365,7 @@ class ClassLoaderData : public CHeapObj<mtClass> {
inline unsigned int identity_hash() const { return (unsigned int)(((intptr_t)this) >> 3); }
// Used when tracing from klasses.
void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
void oops_do(OopClosure* f, bool must_claim, bool clear_modified_oops = false);
void classes_do(KlassClosure* klass_closure);
Klass* klasses() { return _klasses; }

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -767,15 +767,14 @@ static void create_default_methods( InstanceKlass* klass,
// This is the guts of the default methods implementation. This is called just
// after the classfile has been parsed if some ancestor has default methods.
//
// First if finds any name/signature slots that need any implementation (either
// First it finds any name/signature slots that need any implementation (either
// because they are miranda or a superclass's implementation is an overpass
// itself). For each slot, iterate over the hierarchy, to see if they contain a
// signature that matches the slot we are looking at.
//
// For each slot filled, we generate an overpass method that either calls the
// unique default method candidate using invokespecial, or throws an exception
// (in the case of no default method candidates, or more than one valid
// candidate). These methods are then added to the class's method list.
// For each slot filled, we either record the default method candidate in the
// klass default_methods list or, only to handle exception cases, we create an
// overpass method that throws an exception and add it to the klass methods list.
// The JVM does not create bridges nor handle generic signatures here.
void DefaultMethods::generate_default_methods(
InstanceKlass* klass, const GrowableArray<Method*>* mirandas, TRAPS) {
@ -901,6 +900,11 @@ static void switchover_constant_pool(BytecodeConstantPool* bpool,
// This allows virtual methods to override the overpass, but ensures
// that a local method search will find the exception rather than an abstract
// or default method that is not a valid candidate.
//
// Note that if overpass method are ever created that are not exception
// throwing methods then the loader constraint checking logic for vtable and
// itable creation needs to be changed to check loader constraints for the
// overpass methods that do not throw exceptions.
static void create_defaults_and_exceptions(
GrowableArray<EmptyVtableSlot*>* slots,
InstanceKlass* klass, TRAPS) {

View File

@ -889,7 +889,7 @@ void java_lang_Class::create_mirror(Klass* k, Handle class_loader,
// Setup indirection from klass->mirror
// after any exceptions can happen during allocations.
k->set_java_mirror(mirror());
k->set_java_mirror(mirror);
// Set the module field in the java_lang_Class instance. This must be done
// after the mirror is set.

View File

@ -461,6 +461,8 @@
template(getProtectionDomain_signature, "(Ljava/security/CodeSource;)Ljava/security/ProtectionDomain;") \
template(url_code_signer_array_void_signature, "(Ljava/net/URL;[Ljava/security/CodeSigner;)V") \
template(module_entry_name, "module_entry") \
template(resolved_references_name, "<resolved_references>") \
template(init_lock_name, "<init_lock>") \
\
/* name symbols needed by intrinsics */ \
VM_INTRINSICS_DO(VM_INTRINSIC_IGNORE, VM_SYMBOL_IGNORE, template, VM_SYMBOL_IGNORE, VM_ALIAS_IGNORE) \

View File

@ -332,7 +332,7 @@ public:
static void disable_compilation_forever() {
UseCompiler = false;
AlwaysCompileLoopMethods = false;
Atomic::xchg(shutdown_compilation, &_should_compile_new_jobs);
Atomic::xchg(jint(shutdown_compilation), &_should_compile_new_jobs);
}
static bool is_compilation_disabled_forever() {

View File

@ -96,7 +96,7 @@ bool MethodMatcher::canonicalize(char * line, const char *& error_msg) {
bool have_colon = (colon != NULL);
if (have_colon) {
// Don't allow multiple '::'
if (colon + 2 != '\0') {
if (colon[2] != '\0') {
if (strstr(colon+2, "::")) {
error_msg = "Method pattern only allows one '::' allowed";
return false;

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -40,9 +40,6 @@
#ifdef COMPILER2
#include "opto/optoreg.hpp"
#endif
#ifdef SPARC
#include "vmreg_sparc.inline.hpp"
#endif
// OopMapStream

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -48,12 +48,7 @@ class ParMarkFromRootsClosure;
// because some CMS OopClosures derive from OopsInGenClosure. It would be
// good to get rid of them completely.
class MetadataAwareOopsInGenClosure: public OopsInGenClosure {
KlassToOopClosure _klass_closure;
public:
MetadataAwareOopsInGenClosure() {
_klass_closure.initialize(this);
}
virtual bool do_metadata() { return do_metadata_nv(); }
inline bool do_metadata_nv() { return true; }

View File

@ -40,10 +40,8 @@ inline void MetadataAwareOopsInGenClosure::do_klass_nv(Klass* k) {
inline void MetadataAwareOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
inline void MetadataAwareOopsInGenClosure::do_cld_nv(ClassLoaderData* cld) {
assert(_klass_closure._oop_closure == this, "Must be");
bool claim = true; // Must claim the class loader data before processing.
cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
cld->oops_do(this, claim);
}
// Decode the oop and call do_oop on it.

View File

@ -1075,7 +1075,7 @@ ConcurrentMarkSweepGeneration::par_promote(int thread_num,
obj_ptr, old->is_objArray(), word_sz);
NOT_PRODUCT(
Atomic::inc_ptr(&_numObjectsPromoted);
Atomic::inc(&_numObjectsPromoted);
Atomic::add_ptr(alloc_sz, &_numWordsPromoted);
)
@ -1553,9 +1553,10 @@ void CMSCollector::do_compaction_work(bool clear_all_soft_refs) {
assert(_collectorState != Idling || _modUnionTable.isAllClear(),
"_modUnionTable should be clear if the baton was not passed");
_modUnionTable.clear_all();
assert(_collectorState != Idling || _ct->klass_rem_set()->mod_union_is_clear(),
assert(_collectorState != Idling || _ct->cld_rem_set()->mod_union_is_clear(),
"mod union for klasses should be clear if the baton was passed");
_ct->klass_rem_set()->clear_mod_union();
_ct->cld_rem_set()->clear_mod_union();
// We must adjust the allocation statistics being maintained
// in the free list space. We do so by reading and clearing
@ -2025,7 +2026,7 @@ void CMSCollector::gc_prologue(bool full) {
// that information. Tell the young collection to save the union of all
// modified klasses.
if (duringMarking) {
_ct->klass_rem_set()->set_accumulate_modified_oops(true);
_ct->cld_rem_set()->set_accumulate_modified_oops(true);
}
bool registerClosure = duringMarking;
@ -2101,7 +2102,7 @@ void CMSCollector::gc_epilogue(bool full) {
assert(haveFreelistLocks(), "must have freelist locks");
assert_lock_strong(bitMapLock());
_ct->klass_rem_set()->set_accumulate_modified_oops(false);
_ct->cld_rem_set()->set_accumulate_modified_oops(false);
_cmsGen->gc_epilogue_work(full);
@ -2380,18 +2381,18 @@ void CMSCollector::verify_after_remark_work_1() {
}
}
class VerifyKlassOopsKlassClosure : public KlassClosure {
class VerifyKlassOopsClosure : public OopClosure {
class VerifyCLDOopsCLDClosure : public CLDClosure {
class VerifyCLDOopsClosure : public OopClosure {
CMSBitMap* _bitmap;
public:
VerifyKlassOopsClosure(CMSBitMap* bitmap) : _bitmap(bitmap) { }
VerifyCLDOopsClosure(CMSBitMap* bitmap) : _bitmap(bitmap) { }
void do_oop(oop* p) { guarantee(*p == NULL || _bitmap->isMarked((HeapWord*) *p), "Should be marked"); }
void do_oop(narrowOop* p) { ShouldNotReachHere(); }
} _oop_closure;
public:
VerifyKlassOopsKlassClosure(CMSBitMap* bitmap) : _oop_closure(bitmap) {}
void do_klass(Klass* k) {
k->oops_do(&_oop_closure);
VerifyCLDOopsCLDClosure(CMSBitMap* bitmap) : _oop_closure(bitmap) {}
void do_cld(ClassLoaderData* cld) {
cld->oops_do(&_oop_closure, false, false);
}
};
@ -2437,8 +2438,8 @@ void CMSCollector::verify_after_remark_work_2() {
assert(verification_mark_stack()->isEmpty(), "Should have been drained");
verify_work_stacks_empty();
VerifyKlassOopsKlassClosure verify_klass_oops(verification_mark_bm());
ClassLoaderDataGraph::classes_do(&verify_klass_oops);
VerifyCLDOopsCLDClosure verify_cld_oops(verification_mark_bm());
ClassLoaderDataGraph::cld_do(&verify_cld_oops);
// Marking completed -- now verify that each bit marked in
// verification_mark_bm() is also marked in markBitMap(); flag all
@ -2911,7 +2912,7 @@ void CMSCollector::checkpointRootsInitialWork() {
" or no bits are set in the gc_prologue before the start of the next "
"subsequent marking phase.");
assert(_ct->klass_rem_set()->mod_union_is_clear(), "Must be");
assert(_ct->cld_rem_set()->mod_union_is_clear(), "Must be");
// Save the end of the used_region of the constituent generations
// to be used to limit the extent of sweep in each generation.
@ -3848,7 +3849,7 @@ size_t CMSCollector::preclean_work(bool clean_refs, bool clean_survivor) {
}
}
preclean_klasses(&mrias_cl, _cmsGen->freelistLock());
preclean_cld(&mrias_cl, _cmsGen->freelistLock());
curNumCards = preclean_card_table(_cmsGen, &smoac_cl);
cumNumCards += curNumCards;
@ -4067,21 +4068,21 @@ size_t CMSCollector::preclean_card_table(ConcurrentMarkSweepGeneration* old_gen,
return cumNumDirtyCards;
}
class PrecleanKlassClosure : public KlassClosure {
KlassToOopClosure _cm_klass_closure;
class PrecleanCLDClosure : public CLDClosure {
MetadataAwareOopsInGenClosure* _cm_closure;
public:
PrecleanKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
void do_klass(Klass* k) {
if (k->has_accumulated_modified_oops()) {
k->clear_accumulated_modified_oops();
PrecleanCLDClosure(MetadataAwareOopsInGenClosure* oop_closure) : _cm_closure(oop_closure) {}
void do_cld(ClassLoaderData* cld) {
if (cld->has_accumulated_modified_oops()) {
cld->clear_accumulated_modified_oops();
_cm_klass_closure.do_klass(k);
_cm_closure->do_cld(cld);
}
}
};
// The freelist lock is needed to prevent asserts, is it really needed?
void CMSCollector::preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock) {
void CMSCollector::preclean_cld(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock) {
cl->set_freelistLock(freelistLock);
@ -4089,8 +4090,8 @@ void CMSCollector::preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freel
// SSS: Add equivalent to ScanMarkedObjectsAgainCarefullyClosure::do_yield_check and should_abort_preclean?
// SSS: We should probably check if precleaning should be aborted, at suitable intervals?
PrecleanKlassClosure preclean_klass_closure(cl);
ClassLoaderDataGraph::classes_do(&preclean_klass_closure);
PrecleanCLDClosure preclean_closure(cl);
ClassLoaderDataGraph::cld_do(&preclean_closure);
verify_work_stacks_empty();
verify_overflow_empty();
@ -4250,7 +4251,7 @@ void CMSCollector::checkpointRootsFinalWork() {
// Call isAllClear() under bitMapLock
assert(_modUnionTable.isAllClear(),
"Should be clear by end of the final marking");
assert(_ct->klass_rem_set()->mod_union_is_clear(),
assert(_ct->cld_rem_set()->mod_union_is_clear(),
"Should be clear by end of the final marking");
}
@ -4332,26 +4333,26 @@ class CMSParRemarkTask: public CMSParMarkTask {
void do_work_steal(int i, ParMarkRefsIntoAndScanClosure* cl, int* seed);
};
class RemarkKlassClosure : public KlassClosure {
KlassToOopClosure _cm_klass_closure;
class RemarkCLDClosure : public CLDClosure {
CLDToOopClosure _cm_closure;
public:
RemarkKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
void do_klass(Klass* k) {
// Check if we have modified any oops in the Klass during the concurrent marking.
if (k->has_accumulated_modified_oops()) {
k->clear_accumulated_modified_oops();
RemarkCLDClosure(OopClosure* oop_closure) : _cm_closure(oop_closure) {}
void do_cld(ClassLoaderData* cld) {
// Check if we have modified any oops in the CLD during the concurrent marking.
if (cld->has_accumulated_modified_oops()) {
cld->clear_accumulated_modified_oops();
// We could have transfered the current modified marks to the accumulated marks,
// like we do with the Card Table to Mod Union Table. But it's not really necessary.
} else if (k->has_modified_oops()) {
} else if (cld->has_modified_oops()) {
// Don't clear anything, this info is needed by the next young collection.
} else {
// No modified oops in the Klass.
// No modified oops in the ClassLoaderData.
return;
}
// The klass has modified fields, need to scan the klass.
_cm_klass_closure.do_klass(k);
_cm_closure.do_cld(cld);
}
};
@ -4439,24 +4440,24 @@ void CMSParRemarkTask::work(uint worker_id) {
log_trace(gc, task)("Finished unhandled CLD scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
}
// ---------- dirty klass scanning ----------
// We might have added oops to ClassLoaderData::_handles during the
// concurrent marking phase. These oops do not always point to newly allocated objects
// that are guaranteed to be kept alive. Hence,
// we do have to revisit the _handles block during the remark phase.
// ---------- dirty CLD scanning ----------
if (worker_id == 0) { // Single threaded at the moment.
_timer.reset();
_timer.start();
// Scan all classes that was dirtied during the concurrent marking phase.
RemarkKlassClosure remark_klass_closure(&par_mrias_cl);
ClassLoaderDataGraph::classes_do(&remark_klass_closure);
RemarkCLDClosure remark_closure(&par_mrias_cl);
ClassLoaderDataGraph::cld_do(&remark_closure);
_timer.stop();
log_trace(gc, task)("Finished dirty klass scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
log_trace(gc, task)("Finished dirty CLD scanning work in %dth thread: %3.3f sec", worker_id, _timer.seconds());
}
// We might have added oops to ClassLoaderData::_handles during the
// concurrent marking phase. These oops point to newly allocated objects
// that are guaranteed to be kept alive. Either by the direct allocation
// code, or when the young collector processes the roots. Hence,
// we don't have to revisit the _handles block during the remark phase.
// ---------- rescan dirty cards ------------
_timer.reset();
@ -4981,23 +4982,21 @@ void CMSCollector::do_remark_non_parallel() {
verify_work_stacks_empty();
}
// We might have added oops to ClassLoaderData::_handles during the
// concurrent marking phase. These oops do not point to newly allocated objects
// that are guaranteed to be kept alive. Hence,
// we do have to revisit the _handles block during the remark phase.
{
GCTraceTime(Trace, gc, phases) t("Dirty Klass Scan", _gc_timer_cm);
GCTraceTime(Trace, gc, phases) t("Dirty CLD Scan", _gc_timer_cm);
verify_work_stacks_empty();
RemarkKlassClosure remark_klass_closure(&mrias_cl);
ClassLoaderDataGraph::classes_do(&remark_klass_closure);
RemarkCLDClosure remark_closure(&mrias_cl);
ClassLoaderDataGraph::cld_do(&remark_closure);
verify_work_stacks_empty();
}
// We might have added oops to ClassLoaderData::_handles during the
// concurrent marking phase. These oops point to newly allocated objects
// that are guaranteed to be kept alive. Either by the direct allocation
// code, or when the young collector processes the roots. Hence,
// we don't have to revisit the _handles block during the remark phase.
verify_work_stacks_empty();
// Restore evacuated mark words, if any, used for overflow list links
restore_preserved_marks_if_any();
@ -7974,7 +7973,7 @@ void CMSCollector::push_on_overflow_list(oop p) {
// Multi-threaded; use CAS to prepend to overflow list
void CMSCollector::par_push_on_overflow_list(oop p) {
NOT_PRODUCT(Atomic::inc_ptr(&_num_par_pushes);)
NOT_PRODUCT(Atomic::inc(&_num_par_pushes);)
assert(oopDesc::is_oop(p), "Not an oop");
par_preserve_mark_if_necessary(p);
oop observed_overflow_list = _overflow_list;

View File

@ -777,7 +777,7 @@ class CMSCollector: public CHeapObj<mtGC> {
// Does precleaning work, returning a quantity indicative of
// the amount of "useful work" done.
size_t preclean_work(bool clean_refs, bool clean_survivors);
void preclean_klasses(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock);
void preclean_cld(MarkRefsIntoAndScanClosure* cl, Mutex* freelistLock);
void abortable_preclean(); // Preclean while looking for possible abort
void initialize_sequential_subtasks_for_young_gen_rescan(int i);
// Helper function for above; merge-sorts the per-thread plab samples

Some files were not shown because too many files have changed in this diff Show More