8231441: AArch64: Initial SVE backend support

Co-authored-by: Joshua Zhu <joshua.zhu@arm.com>
Co-authored-by: Yang Zhang <yang.zhang@arm.com>
Reviewed-by: adinn, pli, ihse, vlivanov, eosterlund
This commit is contained in:
Ningsheng Jian 2020-09-02 09:45:44 +08:00
parent 0dadf81e14
commit 9b5a9b6189
42 changed files with 4881 additions and 498 deletions

View File

@ -129,6 +129,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
$d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
)))
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
)))
endif
ifeq ($(call check-jvm-feature, shenandoahgc), true)
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \

View File

@ -68,6 +68,49 @@ class GeneralRegisterOrSp(Register):
else:
return self.astr("r")
class SVEVectorRegister(FloatRegister):
def __str__(self):
return self.astr("z")
class SVEPRegister(Register):
def __str__(self):
return self.astr("p")
def generate(self):
self.number = random.randint(0, 15)
return self
class SVEGoverningPRegister(Register):
def __str__(self):
return self.astr("p")
def generate(self):
self.number = random.randint(0, 7)
return self
class RegVariant(object):
def __init__(self, low, high):
self.number = random.randint(low, high)
def astr(self):
nameMap = {
0: ".b",
1: ".h",
2: ".s",
3: ".d",
4: ".q"
}
return nameMap.get(self.number)
def cstr(self):
nameMap = {
0: "__ B",
1: "__ H",
2: "__ S",
3: "__ D",
4: "__ Q"
}
return nameMap.get(self.number)
class FloatZero(Operand):
def __str__(self):
@ -82,7 +125,10 @@ class OperandFactory:
'w' : GeneralRegister,
's' : FloatRegister,
'd' : FloatRegister,
'z' : FloatZero}
'z' : FloatZero,
'p' : SVEPRegister,
'P' : SVEGoverningPRegister,
'Z' : SVEVectorRegister}
@classmethod
def create(cls, mode):
@ -839,6 +885,100 @@ class FloatInstruction(Instruction):
% tuple([Instruction.astr(self)] +
[(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)]))
class SVEVectorOp(Instruction):
def __init__(self, args):
name = args[0]
regTypes = args[1]
regs = []
for c in regTypes:
regs.append(OperandFactory.create(c).generate())
self.reg = regs
self.numRegs = len(regs)
if regTypes[0] != "p" and regTypes[1] == 'P':
self._isPredicated = True
self._merge = "/m"
else:
self._isPredicated = False
self._merge =""
self._bitwiseop = False
if name[0] == 'f':
self._width = RegVariant(2, 3)
elif not self._isPredicated and (name == "and" or name == "eor" or name == "orr"):
self._width = RegVariant(3, 3)
self._bitwiseop = True
else:
self._width = RegVariant(0, 3)
if len(args) > 2:
self._dnm = args[2]
else:
self._dnm = None
Instruction.__init__(self, name)
def cstr(self):
formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"])
if self._bitwiseop:
width = []
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"])
else:
width = [self._width.cstr()]
return (formatStr
% tuple(["__ sve_" + self._name + "("] +
[str(self.reg[0])] +
width +
[str(self.reg[i]) for i in range(1, self.numRegs)]))
def astr(self):
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
if self._dnm == 'dn':
formatStr += ", %s"
dnReg = [str(self.reg[0]) + self._width.astr()]
else:
dnReg = []
if self._isPredicated:
restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)]
else:
restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)]
return (formatStr
% tuple([Instruction.astr(self)] +
[str(self.reg[0]) + self._width.astr()] +
restRegs))
def generate(self):
return self
class SVEReductionOp(Instruction):
def __init__(self, args):
name = args[0]
lowRegType = args[1]
self.reg = []
Instruction.__init__(self, name)
self.reg.append(OperandFactory.create('s').generate())
self.reg.append(OperandFactory.create('P').generate())
self.reg.append(OperandFactory.create('Z').generate())
self._width = RegVariant(lowRegType, 3)
def cstr(self):
return "__ sve_%s(%s, %s, %s, %s);" % (self.name(),
str(self.reg[0]),
self._width.cstr(),
str(self.reg[1]),
str(self.reg[2]))
def astr(self):
if self.name() == "uaddv":
dstRegName = "d" + str(self.reg[0].number)
else:
dstRegName = self._width.astr()[1] + str(self.reg[0].number)
formatStr = "%s %s, %s, %s"
if self.name() == "fadda":
formatStr += ", %s"
moreReg = [dstRegName]
else:
moreReg = []
return formatStr % tuple([self.name()] +
[dstRegName] +
[str(self.reg[1])] +
moreReg +
[str(self.reg[2]) + self._width.astr()])
class LdStSIMDOp(Instruction):
def __init__(self, args):
self._name, self.regnum, self.arrangement, self.addresskind = args
@ -1160,7 +1300,42 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"],
["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"],
["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"],
["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]])
["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
# SVE instructions
["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"],
["inc", "__ sve_inc(r0, __ S);", "incw\tx0"],
["dec", "__ sve_dec(r1, __ H);", "dech\tx1"],
["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"],
["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"],
["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"],
["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"],
["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"],
["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"],
["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"],
["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"],
["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"],
["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"],
["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"],
["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"],
["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"],
["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"],
["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"],
["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"],
["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"],
["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],
["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"],
["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"],
["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"],
["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"],
["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"],
["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"],
["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r18));", "st1d\t{z0.d}, p4, [x0, x18, LSL #3]"],
["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"],
["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"],
["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"],
])
print "\n// FloatImmediateOp"
for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
@ -1185,6 +1360,49 @@ for size in ("x", "w"):
["ldumin", "ldumin", size, suffix],
["ldumax", "ldumax", size, suffix]]);
generate(SVEVectorOp, [["add", "ZZZ"],
["sub", "ZZZ"],
["fadd", "ZZZ"],
["fmul", "ZZZ"],
["fsub", "ZZZ"],
["abs", "ZPZ"],
["add", "ZPZ", "dn"],
["asr", "ZPZ", "dn"],
["cnt", "ZPZ"],
["lsl", "ZPZ", "dn"],
["lsr", "ZPZ", "dn"],
["mul", "ZPZ", "dn"],
["neg", "ZPZ"],
["not", "ZPZ"],
["smax", "ZPZ", "dn"],
["smin", "ZPZ", "dn"],
["sub", "ZPZ", "dn"],
["fabs", "ZPZ"],
["fadd", "ZPZ", "dn"],
["fdiv", "ZPZ", "dn"],
["fmax", "ZPZ", "dn"],
["fmin", "ZPZ", "dn"],
["fmul", "ZPZ", "dn"],
["fneg", "ZPZ"],
["frintm", "ZPZ"],
["frintn", "ZPZ"],
["frintp", "ZPZ"],
["fsqrt", "ZPZ"],
["fsub", "ZPZ", "dn"],
["fmla", "ZPZZ"],
["fmls", "ZPZZ"],
["fnmla", "ZPZZ"],
["fnmls", "ZPZZ"],
["mla", "ZPZZ"],
["mls", "ZPZZ"],
["and", "ZZZ"],
["eor", "ZZZ"],
["orr", "ZZZ"],
])
generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
print "\n __ bind(forth);"
outfile.write("forth:\n")
@ -1193,8 +1411,8 @@ outfile.close()
import subprocess
import sys
# compile for 8.1 and sha2 because of lse atomics and sha512 crypto extension.
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2", "aarch64ops.s", "-o", "aarch64ops.o"])
# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
print
print "/*",

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,767 @@
//
// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2020, Arm Limited. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//
dnl Generate the warning
// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
dnl
// AArch64 SVE Architecture Description File
dnl
dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 )
dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len)
define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
operand vmemA_imm$1Offset$3()
%{
predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
Matcher::scalable_vector_reg_size(T_BYTE)));
match(Con$1);
op_cost(0);
format %{ %}
interface(CONST_INTER);
%}')
dnl
// 4 bit signed offset -- for predicated load/store
OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4)
OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4)
dnl
dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 )
dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off)
%{
constraint(ALLOC_IN_RC(ptr_reg));
match(AddP reg off);
op_cost(0);
format %{ "[$reg, $off, MUL VL]" %}
interface(MEMORY_INTER) %{
base($reg);
`index'(0xffffffff);
scale(0x0);
disp($off);
%}
%}')
dnl
OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
source_hpp %{
bool op_sve_supported(int opcode);
%}
source %{
static inline BasicType vector_element_basic_type(const MachNode* n) {
const TypeVect* vt = n->bottom_type()->is_vect();
return vt->element_basic_type();
}
static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
int def_idx = use->operand_index(opnd);
Node* def = use->in(def_idx);
const TypeVect* vt = def->bottom_type()->is_vect();
return vt->element_basic_type();
}
typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
PRegister Pg, const Address &adr);
// Predicated load/store, with optional ptrue to all elements of given predicate register.
static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store,
FloatRegister reg, PRegister pg, BasicType bt,
int opcode, Register base, int index, int size, int disp) {
sve_mem_insn_predicate insn;
Assembler::SIMD_RegVariant type;
int esize = type2aelembytes(bt);
if (index == -1) {
assert(size == 0, "unsupported address mode: scale size = %d", size);
switch(esize) {
case 1:
insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
type = Assembler::B;
break;
case 2:
insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
type = Assembler::H;
break;
case 4:
insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
type = Assembler::S;
break;
case 8:
insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
type = Assembler::D;
break;
default:
assert(false, "unsupported");
ShouldNotReachHere();
}
(masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
} else {
assert(false, "unimplemented");
ShouldNotReachHere();
}
}
bool op_sve_supported(int opcode) {
switch (opcode) {
case Op_MulAddVS2VI:
// No multiply reduction instructions
case Op_MulReductionVD:
case Op_MulReductionVF:
case Op_MulReductionVI:
case Op_MulReductionVL:
// Others
case Op_Extract:
case Op_ExtractB:
case Op_ExtractC:
case Op_ExtractD:
case Op_ExtractF:
case Op_ExtractI:
case Op_ExtractL:
case Op_ExtractS:
case Op_ExtractUB:
return false;
default:
return true;
}
}
%}
definitions %{
int_def SVE_COST (200, 200);
%}
dnl
dnl ELEMENT_SHORT_CHART($1, $2)
dnl ELEMENT_SHORT_CHART(etype, node)
define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
`($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
`($2->bottom_type()->is_vect()->element_basic_type() == $1)')')
dnl
// All SVE instructions
// vector load/store
// Use predicated vector load/store
instruct loadV(vReg dst, vmemA mem) %{
predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
match(Set dst (LoadVector mem));
ins_cost(SVE_COST);
format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
ins_encode %{
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
vector_element_basic_type(this), $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
ins_pipe(pipe_slow);
%}
instruct storeV(vReg src, vmemA mem) %{
predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
match(Set mem (StoreVector mem src));
ins_cost(SVE_COST);
format %{ "sve_str $mem, $src\t # vector (sve)" %}
ins_encode %{
FloatRegister src_reg = as_FloatRegister($src$$reg);
loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
vector_element_basic_type(this, $src), $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
%}
ins_pipe(pipe_slow);
%}
dnl
dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 )
dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
define(`UNARY_OP_TRUE_PREDICATE_ETYPE', `
instruct $1(vReg dst, vReg src) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
n->bottom_type()->is_vect()->element_basic_type() == $3);
match(Set dst ($2 src));
ins_cost(SVE_COST);
format %{ "$6 $dst, $src\t# vector (sve) ($4)" %}
ins_encode %{
__ $6(as_FloatRegister($dst$$reg), __ $4,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector abs
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsB, AbsVB, T_BYTE, B, 16, sve_abs)
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsS, AbsVS, T_SHORT, H, 8, sve_abs)
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsI, AbsVI, T_INT, S, 4, sve_abs)
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsL, AbsVL, T_LONG, D, 2, sve_abs)
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsF, AbsVF, T_FLOAT, S, 4, sve_fabs)
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsD, AbsVD, T_DOUBLE, D, 2, sve_fabs)
dnl
dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 )
dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn)
define(`BINARY_OP_UNPREDICATED', `
instruct $1(vReg dst, vReg src1, vReg src2) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
match(Set dst ($2 src1 src2));
ins_cost(SVE_COST);
format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
ins_encode %{
__ $5(as_FloatRegister($dst$$reg), __ $3,
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector add
BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add)
BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add)
BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add)
BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add)
BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd)
BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd)
dnl
dnl BINARY_OP_UNSIZED($1, $2, $3, $4 )
dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn)
define(`BINARY_OP_UNSIZED', `
instruct $1(vReg dst, vReg src1, vReg src2) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3);
match(Set dst ($2 src1 src2));
ins_cost(SVE_COST);
format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %}
ins_encode %{
__ $4(as_FloatRegister($dst$$reg),
as_FloatRegister($src1$$reg),
as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector and
BINARY_OP_UNSIZED(vand, AndV, 16, sve_and)
// vector or
BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)
// vector xor
BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)
dnl
dnl VDIVF($1, $2 , $3 )
dnl VDIVF(name_suffix, size, min_vec_len)
define(`VDIVF', `
instruct vdiv$1(vReg dst_src1, vReg src2) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
match(Set dst_src1 (DivV$1 dst_src1 src2));
ins_cost(SVE_COST);
format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
ins_encode %{
__ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector float div
VDIVF(F, S, 4)
VDIVF(D, D, 2)
dnl
dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, $6 )
dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
define(`BINARY_OP_TRUE_PREDICATE_ETYPE', `
instruct $1(vReg dst_src1, vReg src2) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
n->bottom_type()->is_vect()->element_basic_type() == $3);
match(Set dst_src1 ($2 dst_src1 src2));
ins_cost(SVE_COST);
format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %}
ins_encode %{
__ $6(as_FloatRegister($dst_src1$$reg), __ $4,
ptrue, as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector max
BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxF, MaxV, T_FLOAT, S, 4, sve_fmax)
BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxD, MaxV, T_DOUBLE, D, 2, sve_fmax)
BINARY_OP_TRUE_PREDICATE_ETYPE(vminF, MinV, T_FLOAT, S, 4, sve_fmin)
BINARY_OP_TRUE_PREDICATE_ETYPE(vminD, MinV, T_DOUBLE, D, 2, sve_fmin)
dnl
dnl VFMLA($1 $2 $3 )
dnl VFMLA(name_suffix, size, min_vec_len)
define(`VFMLA', `
// dst_src1 = dst_src1 + src2 * src3
instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
ins_cost(SVE_COST);
format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fmla
VFMLA(F, S, 4)
VFMLA(D, D, 2)
dnl
dnl VFMLS($1 $2 $3 )
dnl VFMLS(name_suffix, size, min_vec_len)
define(`VFMLS', `
// dst_src1 = dst_src1 + -src2 * src3
// dst_src1 = dst_src1 + src2 * -src3
instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
ins_cost(SVE_COST);
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fmls
VFMLS(F, S, 4)
VFMLS(D, D, 2)
dnl
dnl VFNMLA($1 $2 $3 )
dnl VFNMLA(name_suffix, size, min_vec_len)
define(`VFNMLA', `
// dst_src1 = -dst_src1 + -src2 * src3
// dst_src1 = -dst_src1 + src2 * -src3
instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
ins_cost(SVE_COST);
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fnmla
VFNMLA(F, S, 4)
VFNMLA(D, D, 2)
dnl
dnl VFNMLS($1 $2 $3 )
dnl VFNMLS(name_suffix, size, min_vec_len)
define(`VFNMLS', `
// dst_src1 = -dst_src1 + src2 * src3
instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
ins_cost(SVE_COST);
format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fnmls
VFNMLS(F, S, 4)
VFNMLS(D, D, 2)
dnl
dnl VMLA($1 $2 $3 )
dnl VMLA(name_suffix, size, min_vec_len)
define(`VMLA', `
// dst_src1 = dst_src1 + src2 * src3
instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
%{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
ins_cost(SVE_COST);
format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector mla
VMLA(B, B, 16)
VMLA(S, H, 8)
VMLA(I, S, 4)
VMLA(L, D, 2)
dnl
dnl VMLS($1 $2 $3 )
dnl VMLS(name_suffix, size, min_vec_len)
define(`VMLS', `
// dst_src1 = dst_src1 - src2 * src3
instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
%{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
ins_cost(SVE_COST);
format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
ins_encode %{
__ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector mls
VMLS(B, B, 16)
VMLS(S, H, 8)
VMLS(I, S, 4)
VMLS(L, D, 2)
dnl
dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
define(`BINARY_OP_TRUE_PREDICATE', `
instruct $1(vReg dst_src1, vReg src2) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
match(Set dst_src1 ($2 dst_src1 src2));
ins_cost(SVE_COST);
format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
ins_encode %{
__ $5(as_FloatRegister($dst_src1$$reg), __ $3,
ptrue, as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector mul
BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul)
BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul)
BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul)
BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul)
BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul)
BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul)
dnl
dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn)
define(`UNARY_OP_TRUE_PREDICATE', `
instruct $1(vReg dst, vReg src) %{
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4);
match(Set dst ($2 src));
ins_cost(SVE_COST);
format %{ "$5 $dst, $src\t# vector (sve) ($3)" %}
ins_encode %{
__ $5(as_FloatRegister($dst$$reg), __ $3,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector fneg
UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg)
UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg)
// popcount vector
instruct vpopcountI(vReg dst, vReg src) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
match(Set dst (PopCountVI src));
format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
ins_encode %{
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}
dnl
dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 )
dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
define(`REDUCE_ADD', `
instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
ELEMENT_SHORT_CHAR($6, n->in(2)));
match(Set dst ($2 src1 src2));
effect(TEMP_DEF dst, TEMP tmp);
ins_cost(SVE_COST);
format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
"umov $dst, $tmp, $5, 0\n\t"
"$7 $dst, $dst, $src1\t # add reduction $5" %}
ins_encode %{
__ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
ptrue, as_FloatRegister($src2$$reg));
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
__ $7($dst$$Register, $dst$$Register, $src1$$Register);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl REDUCE_ADDF($1, $2, $3, $4 )
dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
define(`REDUCE_ADDF', `
instruct $1($3 src1_dst, vReg src2) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
match(Set src1_dst ($2 src1_dst src2));
ins_cost(SVE_COST);
format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
ins_encode %{
__ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
ptrue, as_FloatRegister($src2$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
// vector add reduction
REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D)
dnl
dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 )
dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
define(`REDUCE_FMINMAX', `
instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
ins_cost(INSN_COST);
effect(TEMP_DEF dst);
format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t"
"f$1s $dst, $dst, $src1\t # $1 reduction $2" %}
ins_encode %{
__ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
ptrue, as_FloatRegister($src2$$reg));
__ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector max reduction
REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF)
REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
// vector min reduction
REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF)
REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
// vector Math.rint, floor, ceil
instruct vroundD(vReg dst, vReg src, immI rmode) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
match(Set dst (RoundDoubleModeV src rmode));
format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
ins_encode %{
switch ($rmode$$constant) {
case RoundDoubleModeNode::rmode_rint:
__ sve_frintn(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
break;
case RoundDoubleModeNode::rmode_floor:
__ sve_frintm(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
break;
case RoundDoubleModeNode::rmode_ceil:
__ sve_frintp(as_FloatRegister($dst$$reg), __ D,
ptrue, as_FloatRegister($src$$reg));
break;
}
%}
ins_pipe(pipe_slow);
%}
dnl
dnl REPLICATE($1, $2, $3, $4, $5 )
dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
define(`REPLICATE', `
instruct $1(vReg dst, $3 src) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
match(Set dst ($2 src));
ins_cost(SVE_COST);
format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %}
ins_encode %{
__ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl REPLICATE_IMM8($1, $2, $3, $4, $5 )
dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
define(`REPLICATE_IMM8', `
instruct $1(vReg dst, $3 con) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
match(Set dst ($2 con));
ins_cost(SVE_COST);
format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %}
ins_encode %{
__ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl FREPLICATE($1, $2, $3, $4, $5 )
dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
define(`FREPLICATE', `
instruct $1(vReg dst, $3 src) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
match(Set dst ($2 src));
ins_cost(SVE_COST);
format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %}
ins_encode %{
__ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
ptrue, as_FloatRegister($src$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector replicate
REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
REPLICATE(replicateL, ReplicateL, iRegL, D, 2)
REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16)
REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
dnl
dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 )
dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
define(`VSHIFT_TRUE_PREDICATE', `
instruct $1(vReg dst, vReg shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
match(Set dst ($2 dst shift));
ins_cost(SVE_COST);
format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
ins_encode %{
__ $5(as_FloatRegister($dst$$reg), __ $3,
ptrue, as_FloatRegister($shift$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 )
dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
define(`VSHIFT_IMM_UNPREDICATE', `
instruct $1(vReg dst, vReg src, immI shift) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
match(Set dst ($2 src shift));
ins_cost(SVE_COST);
format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
ins_encode %{
int con = (int)$shift$$constant;dnl
ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
if (con == 0) {
__ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
}')dnl
ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
if (con >= 16) con = 15;')')dnl
ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
if (con >= 8) {
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
as_FloatRegister($src$$reg));
return;
}')
__ $5(as_FloatRegister($dst$$reg), __ $3,
as_FloatRegister($src$$reg), con);
%}
ins_pipe(pipe_slow);
%}')dnl
dnl
dnl VSHIFT_COUNT($1, $2, $3, $4 )
dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
define(`VSHIFT_COUNT', `
instruct $1(vReg dst, iRegIorL2I cnt) %{
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 &&
ELEMENT_SHORT_CHAR($4, n));
match(Set dst (LShiftCntV cnt));
match(Set dst (RShiftCntV cnt));
format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
ins_encode %{
__ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
%}
ins_pipe(pipe_slow);
%}')dnl
// vector shift
VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB, B, 16, sve_asr)
VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS, H, 8, sve_asr)
VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI, S, 4, sve_asr)
VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL, D, 2, sve_asr)
VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB, B, 16, sve_lsl)
VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS, H, 8, sve_lsl)
VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI, S, 4, sve_lsl)
VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL, D, 2, sve_lsl)
VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr)
VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr)
VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr)
VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr)
VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr)
VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr)
VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr)
VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr)
VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr)
VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl)
VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl)
VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl)
VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl)
VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT)
VSHIFT_COUNT(vshiftcntI, S, 4, T_INT)
VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG)
// vector sqrt
UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt)
UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt)
// vector sub
BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub)
BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub)
BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)

View File

@ -636,6 +636,39 @@ void entry(CodeBuffer *cb) {
__ mov(v1, __ T4H, 2, zr); // mov v1.h[2], wzr
__ mov(v1, __ T8B, 3, zr); // mov v1.b[3], wzr
__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); // ld1 {v31.2d, v0.2d}, [x1], x0
__ sve_cpy(z0, __ S, p0, v1); // mov z0.s, p0/m, s1
__ sve_inc(r0, __ S); // incw x0
__ sve_dec(r1, __ H); // dech x1
__ sve_lsl(z0, __ B, z1, 7); // lsl z0.b, z1.b, #7
__ sve_lsl(z21, __ H, z1, 15); // lsl z21.h, z1.h, #15
__ sve_lsl(z0, __ S, z1, 31); // lsl z0.s, z1.s, #31
__ sve_lsl(z0, __ D, z1, 63); // lsl z0.d, z1.d, #63
__ sve_lsr(z0, __ B, z1, 7); // lsr z0.b, z1.b, #7
__ sve_asr(z0, __ H, z11, 15); // asr z0.h, z11.h, #15
__ sve_lsr(z30, __ S, z1, 31); // lsr z30.s, z1.s, #31
__ sve_asr(z0, __ D, z1, 63); // asr z0.d, z1.d, #63
__ sve_addvl(sp, r0, 31); // addvl sp, x0, #31
__ sve_addpl(r1, sp, -32); // addpl x1, sp, -32
__ sve_cntp(r8, __ B, p0, p1); // cntp x8, p0, p1.b
__ sve_dup(z0, __ B, 127); // dup z0.b, 127
__ sve_dup(z1, __ H, -128); // dup z1.h, -128
__ sve_dup(z2, __ S, 32512); // dup z2.s, 32512
__ sve_dup(z7, __ D, -32768); // dup z7.d, -32768
__ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp]
__ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL]
__ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL]
__ sve_ld1b(z30, __ B, p3, Address(sp, r8)); // ld1b {z30.b}, p3/z, [sp, x8]
__ sve_ld1w(z0, __ S, p4, Address(sp, r28)); // ld1w {z0.s}, p4/z, [sp, x28, LSL #2]
__ sve_ld1d(z11, __ D, p5, Address(r0, r1)); // ld1d {z11.d}, p5/z, [x0, x1, LSL #3]
__ sve_st1b(z22, __ B, p6, Address(sp)); // st1b {z22.b}, p6, [sp]
__ sve_st1b(z31, __ B, p7, Address(sp, -8)); // st1b {z31.b}, p7, [sp, #-8, MUL VL]
__ sve_st1w(z0, __ S, p1, Address(r0, 7)); // st1w {z0.s}, p1, [x0, #7, MUL VL]
__ sve_st1b(z0, __ B, p2, Address(sp, r1)); // st1b {z0.b}, p2, [sp, x1]
__ sve_st1h(z0, __ H, p3, Address(sp, r8)); // st1h {z0.h}, p3, [sp, x8, LSL #1]
__ sve_st1d(z0, __ D, p4, Address(r0, r18)); // st1d {z0.d}, p4, [x0, x18, LSL #3]
__ sve_ldr(z0, Address(sp)); // ldr z0, [sp]
__ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL]
__ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL]
// FloatImmediateOp
__ fmovd(v0, 2.0); // fmov d0, #2.0
@ -759,6 +792,57 @@ void entry(CodeBuffer *cb) {
__ lduminl(Assembler::word, r12, r15, r13); // lduminl w12, w15, [x13]
__ ldumaxl(Assembler::word, r2, r7, r20); // ldumaxl w2, w7, [x20]
// SVEVectorOp
__ sve_add(z25, __ B, z15, z4); // add z25.b, z15.b, z4.b
__ sve_sub(z4, __ S, z11, z17); // sub z4.s, z11.s, z17.s
__ sve_fadd(z16, __ D, z17, z10); // fadd z16.d, z17.d, z10.d
__ sve_fmul(z22, __ D, z12, z25); // fmul z22.d, z12.d, z25.d
__ sve_fsub(z28, __ D, z14, z10); // fsub z28.d, z14.d, z10.d
__ sve_abs(z1, __ H, p3, z30); // abs z1.h, p3/m, z30.h
__ sve_add(z15, __ B, p1, z2); // add z15.b, p1/m, z15.b, z2.b
__ sve_asr(z13, __ S, p4, z16); // asr z13.s, p4/m, z13.s, z16.s
__ sve_cnt(z3, __ D, p0, z11); // cnt z3.d, p0/m, z11.d
__ sve_lsl(z5, __ D, p2, z14); // lsl z5.d, p2/m, z5.d, z14.d
__ sve_lsr(z29, __ B, p0, z20); // lsr z29.b, p0/m, z29.b, z20.b
__ sve_mul(z20, __ S, p5, z27); // mul z20.s, p5/m, z20.s, z27.s
__ sve_neg(z26, __ B, p6, z4); // neg z26.b, p6/m, z4.b
__ sve_not(z22, __ B, p4, z30); // not z22.b, p4/m, z30.b
__ sve_smax(z11, __ H, p2, z27); // smax z11.h, p2/m, z11.h, z27.h
__ sve_smin(z28, __ S, p5, z30); // smin z28.s, p5/m, z28.s, z30.s
__ sve_sub(z30, __ S, p1, z13); // sub z30.s, p1/m, z30.s, z13.s
__ sve_fabs(z30, __ D, p4, z26); // fabs z30.d, p4/m, z26.d
__ sve_fadd(z15, __ S, p3, z11); // fadd z15.s, p3/m, z15.s, z11.s
__ sve_fdiv(z6, __ D, p7, z16); // fdiv z6.d, p7/m, z6.d, z16.d
__ sve_fmax(z27, __ S, p7, z7); // fmax z27.s, p7/m, z27.s, z7.s
__ sve_fmin(z19, __ D, p2, z4); // fmin z19.d, p2/m, z19.d, z4.d
__ sve_fmul(z17, __ S, p4, z22); // fmul z17.s, p4/m, z17.s, z22.s
__ sve_fneg(z28, __ D, p3, z21); // fneg z28.d, p3/m, z21.d
__ sve_frintm(z18, __ S, p5, z2); // frintm z18.s, p5/m, z2.s
__ sve_frintn(z6, __ S, p3, z15); // frintn z6.s, p3/m, z15.s
__ sve_frintp(z12, __ D, p5, z1); // frintp z12.d, p5/m, z1.d
__ sve_fsqrt(z18, __ S, p1, z17); // fsqrt z18.s, p1/m, z17.s
__ sve_fsub(z15, __ S, p5, z13); // fsub z15.s, p5/m, z15.s, z13.s
__ sve_fmla(z20, __ D, p7, z27, z11); // fmla z20.d, p7/m, z27.d, z11.d
__ sve_fmls(z3, __ D, p0, z30, z23); // fmls z3.d, p0/m, z30.d, z23.d
__ sve_fnmla(z17, __ S, p2, z27, z26); // fnmla z17.s, p2/m, z27.s, z26.s
__ sve_fnmls(z6, __ D, p5, z22, z30); // fnmls z6.d, p5/m, z22.d, z30.d
__ sve_mla(z2, __ H, p7, z26, z18); // mla z2.h, p7/m, z26.h, z18.h
__ sve_mls(z22, __ B, p4, z2, z17); // mls z22.b, p4/m, z2.b, z17.b
__ sve_and(z24, z25, z22); // and z24.d, z25.d, z22.d
__ sve_eor(z18, z12, z3); // eor z18.d, z12.d, z3.d
__ sve_orr(z29, z28, z16); // orr z29.d, z28.d, z16.d
// SVEReductionOp
__ sve_andv(v6, __ S, p2, z28); // andv s6, p2, z28.s
__ sve_orv(v7, __ H, p1, z7); // orv h7, p1, z7.h
__ sve_eorv(v9, __ B, p5, z8); // eorv b9, p5, z8.b
__ sve_smaxv(v27, __ B, p5, z30); // smaxv b27, p5, z30.b
__ sve_sminv(v26, __ H, p0, z16); // sminv h26, p0, z16.h
__ sve_fminv(v3, __ D, p6, z8); // fminv d3, p6, z8.d
__ sve_fmaxv(v21, __ D, p6, z26); // fmaxv d21, p6, z26.d
__ sve_fadda(v22, __ S, p0, z4); // fadda s22, p0, s22, z4.s
__ sve_uaddv(v17, __ H, p0, z3); // uaddv d17, p0, z3.h
__ bind(forth);
/*
@ -810,32 +894,32 @@ Disassembly of section .text:
9c: f26aad01 ands x1, x8, #0xffffffffffc00003
a0: 14000000 b a0 <back+0xa0>
a4: 17ffffd7 b 0 <back>
a8: 140001f2 b 870 <forth>
a8: 14000242 b 9b0 <forth>
ac: 94000000 bl ac <back+0xac>
b0: 97ffffd4 bl 0 <back>
b4: 940001ef bl 870 <forth>
b4: 9400023f bl 9b0 <forth>
b8: 3400000a cbz w10, b8 <back+0xb8>
bc: 34fffa2a cbz w10, 0 <back>
c0: 34003d8a cbz w10, 870 <forth>
c0: 3400478a cbz w10, 9b0 <forth>
c4: 35000008 cbnz w8, c4 <back+0xc4>
c8: 35fff9c8 cbnz w8, 0 <back>
cc: 35003d28 cbnz w8, 870 <forth>
cc: 35004728 cbnz w8, 9b0 <forth>
d0: b400000b cbz x11, d0 <back+0xd0>
d4: b4fff96b cbz x11, 0 <back>
d8: b4003ccb cbz x11, 870 <forth>
d8: b40046cb cbz x11, 9b0 <forth>
dc: b500001d cbnz x29, dc <back+0xdc>
e0: b5fff91d cbnz x29, 0 <back>
e4: b5003c7d cbnz x29, 870 <forth>
e4: b500467d cbnz x29, 9b0 <forth>
e8: 10000013 adr x19, e8 <back+0xe8>
ec: 10fff8b3 adr x19, 0 <back>
f0: 10003c13 adr x19, 870 <forth>
f0: 10004613 adr x19, 9b0 <forth>
f4: 90000013 adrp x19, 0 <back>
f8: 36300016 tbz w22, #6, f8 <back+0xf8>
fc: 3637f836 tbz w22, #6, 0 <back>
100: 36303b96 tbz w22, #6, 870 <forth>
100: 36304596 tbz w22, #6, 9b0 <forth>
104: 3758000c tbnz w12, #11, 104 <back+0x104>
108: 375ff7cc tbnz w12, #11, 0 <back>
10c: 37583b2c tbnz w12, #11, 870 <forth>
10c: 3758452c tbnz w12, #11, 9b0 <forth>
110: 128313a0 mov w0, #0xffffe762 // #-6302
114: 528a32c7 mov w7, #0x5196 // #20886
118: 7289173b movk w27, #0x48b9
@ -852,58 +936,58 @@ Disassembly of section .text:
144: 93c3dbc8 extr x8, x30, x3, #54
148: 54000000 b.eq 148 <back+0x148> // b.none
14c: 54fff5a0 b.eq 0 <back> // b.none
150: 54003900 b.eq 870 <forth> // b.none
150: 54004300 b.eq 9b0 <forth> // b.none
154: 54000001 b.ne 154 <back+0x154> // b.any
158: 54fff541 b.ne 0 <back> // b.any
15c: 540038a1 b.ne 870 <forth> // b.any
15c: 540042a1 b.ne 9b0 <forth> // b.any
160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast
164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast
168: 54003842 b.cs 870 <forth> // b.hs, b.nlast
168: 54004242 b.cs 9b0 <forth> // b.hs, b.nlast
16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast
170: 54fff482 b.cs 0 <back> // b.hs, b.nlast
174: 540037e2 b.cs 870 <forth> // b.hs, b.nlast
174: 540041e2 b.cs 9b0 <forth> // b.hs, b.nlast
178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last
17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last
180: 54003783 b.cc 870 <forth> // b.lo, b.ul, b.last
180: 54004183 b.cc 9b0 <forth> // b.lo, b.ul, b.last
184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last
188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last
18c: 54003723 b.cc 870 <forth> // b.lo, b.ul, b.last
18c: 54004123 b.cc 9b0 <forth> // b.lo, b.ul, b.last
190: 54000004 b.mi 190 <back+0x190> // b.first
194: 54fff364 b.mi 0 <back> // b.first
198: 540036c4 b.mi 870 <forth> // b.first
198: 540040c4 b.mi 9b0 <forth> // b.first
19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst
1a0: 54fff305 b.pl 0 <back> // b.nfrst
1a4: 54003665 b.pl 870 <forth> // b.nfrst
1a4: 54004065 b.pl 9b0 <forth> // b.nfrst
1a8: 54000006 b.vs 1a8 <back+0x1a8>
1ac: 54fff2a6 b.vs 0 <back>
1b0: 54003606 b.vs 870 <forth>
1b0: 54004006 b.vs 9b0 <forth>
1b4: 54000007 b.vc 1b4 <back+0x1b4>
1b8: 54fff247 b.vc 0 <back>
1bc: 540035a7 b.vc 870 <forth>
1bc: 54003fa7 b.vc 9b0 <forth>
1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore
1c4: 54fff1e8 b.hi 0 <back> // b.pmore
1c8: 54003548 b.hi 870 <forth> // b.pmore
1c8: 54003f48 b.hi 9b0 <forth> // b.pmore
1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast
1d0: 54fff189 b.ls 0 <back> // b.plast
1d4: 540034e9 b.ls 870 <forth> // b.plast
1d4: 54003ee9 b.ls 9b0 <forth> // b.plast
1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont
1dc: 54fff12a b.ge 0 <back> // b.tcont
1e0: 5400348a b.ge 870 <forth> // b.tcont
1e0: 54003e8a b.ge 9b0 <forth> // b.tcont
1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop
1e8: 54fff0cb b.lt 0 <back> // b.tstop
1ec: 5400342b b.lt 870 <forth> // b.tstop
1ec: 54003e2b b.lt 9b0 <forth> // b.tstop
1f0: 5400000c b.gt 1f0 <back+0x1f0>
1f4: 54fff06c b.gt 0 <back>
1f8: 540033cc b.gt 870 <forth>
1f8: 54003dcc b.gt 9b0 <forth>
1fc: 5400000d b.le 1fc <back+0x1fc>
200: 54fff00d b.le 0 <back>
204: 5400336d b.le 870 <forth>
204: 54003d6d b.le 9b0 <forth>
208: 5400000e b.al 208 <back+0x208>
20c: 54ffefae b.al 0 <back>
210: 5400330e b.al 870 <forth>
210: 54003d0e b.al 9b0 <forth>
214: 5400000f b.nv 214 <back+0x214>
218: 54ffef4f b.nv 0 <back>
21c: 540032af b.nv 870 <forth>
21c: 54003caf b.nv 9b0 <forth>
220: d40658e1 svc #0x32c7
224: d4014d22 hvc #0xa69
228: d4046543 smc #0x232a
@ -1029,7 +1113,7 @@ Disassembly of section .text:
408: bd5fa1d9 ldr s25, [x14, #8096]
40c: fd1d595a str d26, [x10, #15024]
410: bd1b1869 str s9, [x3, #6936]
414: 580022fb ldr x27, 870 <forth>
414: 58002cfb ldr x27, 9b0 <forth>
418: 1800000b ldr w11, 418 <back+0x418>
41c: f8945060 prfum pldl1keep, [x3, #-187]
420: d8000000 prfm pldl1keep, 420 <back+0x420>
@ -1204,110 +1288,190 @@ Disassembly of section .text:
6c4: 4e0a1fe1 mov v1.h[2], wzr
6c8: 4e071fe1 mov v1.b[3], wzr
6cc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
6d0: 1e601000 fmov d0, #2.000000000000000000e+00
6d4: 1e603000 fmov d0, #2.125000000000000000e+00
6d8: 1e621000 fmov d0, #4.000000000000000000e+00
6dc: 1e623000 fmov d0, #4.250000000000000000e+00
6e0: 1e641000 fmov d0, #8.000000000000000000e+00
6e4: 1e643000 fmov d0, #8.500000000000000000e+00
6e8: 1e661000 fmov d0, #1.600000000000000000e+01
6ec: 1e663000 fmov d0, #1.700000000000000000e+01
6f0: 1e681000 fmov d0, #1.250000000000000000e-01
6f4: 1e683000 fmov d0, #1.328125000000000000e-01
6f8: 1e6a1000 fmov d0, #2.500000000000000000e-01
6fc: 1e6a3000 fmov d0, #2.656250000000000000e-01
700: 1e6c1000 fmov d0, #5.000000000000000000e-01
704: 1e6c3000 fmov d0, #5.312500000000000000e-01
708: 1e6e1000 fmov d0, #1.000000000000000000e+00
70c: 1e6e3000 fmov d0, #1.062500000000000000e+00
710: 1e701000 fmov d0, #-2.000000000000000000e+00
714: 1e703000 fmov d0, #-2.125000000000000000e+00
718: 1e721000 fmov d0, #-4.000000000000000000e+00
71c: 1e723000 fmov d0, #-4.250000000000000000e+00
720: 1e741000 fmov d0, #-8.000000000000000000e+00
724: 1e743000 fmov d0, #-8.500000000000000000e+00
728: 1e761000 fmov d0, #-1.600000000000000000e+01
72c: 1e763000 fmov d0, #-1.700000000000000000e+01
730: 1e781000 fmov d0, #-1.250000000000000000e-01
734: 1e783000 fmov d0, #-1.328125000000000000e-01
738: 1e7a1000 fmov d0, #-2.500000000000000000e-01
73c: 1e7a3000 fmov d0, #-2.656250000000000000e-01
740: 1e7c1000 fmov d0, #-5.000000000000000000e-01
744: 1e7c3000 fmov d0, #-5.312500000000000000e-01
748: 1e7e1000 fmov d0, #-1.000000000000000000e+00
74c: 1e7e3000 fmov d0, #-1.062500000000000000e+00
750: f8388098 swp x24, x24, [x4]
754: f8340010 ldadd x20, x16, [x0]
758: f8241175 ldclr x4, x21, [x11]
75c: f83e22d0 ldeor x30, x16, [x22]
760: f82432ef ldset x4, x15, [x23]
764: f83a5186 ldsmin x26, x6, [x12]
768: f82f41ee ldsmax x15, x14, [x15]
76c: f82973b9 ldumin x9, x25, [x29]
770: f82b6194 ldumax x11, x20, [x12]
774: f8b28216 swpa x18, x22, [x16]
778: f8b50358 ldadda x21, x24, [x26]
77c: f8a61206 ldclra x6, x6, [x16]
780: f8b02219 ldeora x16, x25, [x16]
784: f8bc3218 ldseta x28, x24, [x16]
788: f8ba514f ldsmina x26, x15, [x10]
78c: f8ad428e ldsmaxa x13, x14, [x20]
790: f8a173d7 ldumina x1, x23, [x30]
794: f8ae60c2 ldumaxa x14, x2, [x6]
798: f8e38328 swpal x3, x8, [x25]
79c: f8e003db ldaddal x0, x27, [x30]
7a0: f8e513c5 ldclral x5, x5, [x30]
7a4: f8eb2019 ldeoral x11, x25, [x0]
7a8: f8ff3260 ldsetal xzr, x0, [x19]
7ac: f8fd513a ldsminal x29, x26, [x9]
7b0: f8fa41ec ldsmaxal x26, x12, [x15]
7b4: f8eb724b lduminal x11, x11, [x18]
7b8: f8f96316 ldumaxal x25, x22, [x24]
7bc: f8608171 swpl x0, x17, [x11]
7c0: f86600dd ldaddl x6, x29, [x6]
7c4: f86512a5 ldclrl x5, x5, [x21]
7c8: f8732250 ldeorl x19, x16, [x18]
7cc: f87e339b ldsetl x30, x27, [x28]
7d0: f861503c ldsminl x1, x28, [x1]
7d4: f874421d ldsmaxl x20, x29, [x16]
7d8: f86d73aa lduminl x13, x10, [x29]
7dc: f87d62d3 ldumaxl x29, x19, [x22]
7e0: b82a83e4 swp w10, w4, [sp]
7e4: b83503e8 ldadd w21, w8, [sp]
7e8: b833138a ldclr w19, w10, [x28]
7ec: b82220b9 ldeor w2, w25, [x5]
7f0: b82332c8 ldset w3, w8, [x22]
7f4: b83350ad ldsmin w19, w13, [x5]
7f8: b83d42b8 ldsmax w29, w24, [x21]
7fc: b83a7078 ldumin w26, w24, [x3]
800: b83862fa ldumax w24, w26, [x23]
804: b8af8075 swpa w15, w21, [x3]
808: b8b80328 ldadda w24, w8, [x25]
80c: b8b41230 ldclra w20, w16, [x17]
810: b8a22001 ldeora w2, w1, [x0]
814: b8b83064 ldseta w24, w4, [x3]
818: b8ac539f ldsmina w12, wzr, [x28]
81c: b8aa405a ldsmaxa w10, w26, [x2]
820: b8ac73f2 ldumina w12, w18, [sp]
824: b8a163ad ldumaxa w1, w13, [x29]
828: b8e08193 swpal w0, w19, [x12]
82c: b8f101b6 ldaddal w17, w22, [x13]
830: b8fc13fe ldclral w28, w30, [sp]
834: b8e1239a ldeoral w1, w26, [x28]
838: b8e4309e ldsetal w4, w30, [x4]
83c: b8e6535e ldsminal w6, w30, [x26]
840: b8f24109 ldsmaxal w18, w9, [x8]
844: b8ec7280 lduminal w12, w0, [x20]
848: b8e16058 ldumaxal w1, w24, [x2]
84c: b8608309 swpl w0, w9, [x24]
850: b87a03d0 ldaddl w26, w16, [x30]
854: b86312ea ldclrl w3, w10, [x23]
858: b86a2244 ldeorl w10, w4, [x18]
85c: b862310b ldsetl w2, w11, [x8]
860: b86a522f ldsminl w10, w15, [x17]
864: b862418a ldsmaxl w2, w10, [x12]
868: b86c71af lduminl w12, w15, [x13]
86c: b8626287 ldumaxl w2, w7, [x20]
6d0: 05a08020 mov z0.s, p0/m, s1
6d4: 04b0e3e0 incw x0
6d8: 0470e7e1 dech x1
6dc: 042f9c20 lsl z0.b, z1.b, #7
6e0: 043f9c35 lsl z21.h, z1.h, #15
6e4: 047f9c20 lsl z0.s, z1.s, #31
6e8: 04ff9c20 lsl z0.d, z1.d, #63
6ec: 04299420 lsr z0.b, z1.b, #7
6f0: 04319160 asr z0.h, z11.h, #15
6f4: 0461943e lsr z30.s, z1.s, #31
6f8: 04a19020 asr z0.d, z1.d, #63
6fc: 042053ff addvl sp, x0, #31
700: 047f5401 addpl x1, sp, #-32
704: 25208028 cntp x8, p0, p1.b
708: 2538cfe0 mov z0.b, #127
70c: 2578d001 mov z1.h, #-128
710: 25b8efe2 mov z2.s, #32512
714: 25f8f007 mov z7.d, #-32768
718: a400a3e0 ld1b {z0.b}, p0/z, [sp]
71c: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl]
720: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl]
724: a4084ffe ld1b {z30.b}, p3/z, [sp, x8]
728: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2]
72c: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3]
730: e400fbf6 st1b {z22.b}, p6, [sp]
734: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl]
738: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl]
73c: e4014be0 st1b {z0.b}, p2, [sp, x1]
740: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1]
744: e5f25000 st1d {z0.d}, p4, [x0, x18, lsl #3]
748: 858043e0 ldr z0, [sp]
74c: 85a043ff ldr z31, [sp, #-256, mul vl]
750: e59f5d08 str z8, [x8, #255, mul vl]
754: 1e601000 fmov d0, #2.000000000000000000e+00
758: 1e603000 fmov d0, #2.125000000000000000e+00
75c: 1e621000 fmov d0, #4.000000000000000000e+00
760: 1e623000 fmov d0, #4.250000000000000000e+00
764: 1e641000 fmov d0, #8.000000000000000000e+00
768: 1e643000 fmov d0, #8.500000000000000000e+00
76c: 1e661000 fmov d0, #1.600000000000000000e+01
770: 1e663000 fmov d0, #1.700000000000000000e+01
774: 1e681000 fmov d0, #1.250000000000000000e-01
778: 1e683000 fmov d0, #1.328125000000000000e-01
77c: 1e6a1000 fmov d0, #2.500000000000000000e-01
780: 1e6a3000 fmov d0, #2.656250000000000000e-01
784: 1e6c1000 fmov d0, #5.000000000000000000e-01
788: 1e6c3000 fmov d0, #5.312500000000000000e-01
78c: 1e6e1000 fmov d0, #1.000000000000000000e+00
790: 1e6e3000 fmov d0, #1.062500000000000000e+00
794: 1e701000 fmov d0, #-2.000000000000000000e+00
798: 1e703000 fmov d0, #-2.125000000000000000e+00
79c: 1e721000 fmov d0, #-4.000000000000000000e+00
7a0: 1e723000 fmov d0, #-4.250000000000000000e+00
7a4: 1e741000 fmov d0, #-8.000000000000000000e+00
7a8: 1e743000 fmov d0, #-8.500000000000000000e+00
7ac: 1e761000 fmov d0, #-1.600000000000000000e+01
7b0: 1e763000 fmov d0, #-1.700000000000000000e+01
7b4: 1e781000 fmov d0, #-1.250000000000000000e-01
7b8: 1e783000 fmov d0, #-1.328125000000000000e-01
7bc: 1e7a1000 fmov d0, #-2.500000000000000000e-01
7c0: 1e7a3000 fmov d0, #-2.656250000000000000e-01
7c4: 1e7c1000 fmov d0, #-5.000000000000000000e-01
7c8: 1e7c3000 fmov d0, #-5.312500000000000000e-01
7cc: 1e7e1000 fmov d0, #-1.000000000000000000e+00
7d0: 1e7e3000 fmov d0, #-1.062500000000000000e+00
7d4: f8388098 swp x24, x24, [x4]
7d8: f8340010 ldadd x20, x16, [x0]
7dc: f8241175 ldclr x4, x21, [x11]
7e0: f83e22d0 ldeor x30, x16, [x22]
7e4: f82432ef ldset x4, x15, [x23]
7e8: f83a5186 ldsmin x26, x6, [x12]
7ec: f82f41ee ldsmax x15, x14, [x15]
7f0: f82973b9 ldumin x9, x25, [x29]
7f4: f82b6194 ldumax x11, x20, [x12]
7f8: f8b28216 swpa x18, x22, [x16]
7fc: f8b50358 ldadda x21, x24, [x26]
800: f8a61206 ldclra x6, x6, [x16]
804: f8b02219 ldeora x16, x25, [x16]
808: f8bc3218 ldseta x28, x24, [x16]
80c: f8ba514f ldsmina x26, x15, [x10]
810: f8ad428e ldsmaxa x13, x14, [x20]
814: f8a173d7 ldumina x1, x23, [x30]
818: f8ae60c2 ldumaxa x14, x2, [x6]
81c: f8e38328 swpal x3, x8, [x25]
820: f8e003db ldaddal x0, x27, [x30]
824: f8e513c5 ldclral x5, x5, [x30]
828: f8eb2019 ldeoral x11, x25, [x0]
82c: f8ff3260 ldsetal xzr, x0, [x19]
830: f8fd513a ldsminal x29, x26, [x9]
834: f8fa41ec ldsmaxal x26, x12, [x15]
838: f8eb724b lduminal x11, x11, [x18]
83c: f8f96316 ldumaxal x25, x22, [x24]
840: f8608171 swpl x0, x17, [x11]
844: f86600dd ldaddl x6, x29, [x6]
848: f86512a5 ldclrl x5, x5, [x21]
84c: f8732250 ldeorl x19, x16, [x18]
850: f87e339b ldsetl x30, x27, [x28]
854: f861503c ldsminl x1, x28, [x1]
858: f874421d ldsmaxl x20, x29, [x16]
85c: f86d73aa lduminl x13, x10, [x29]
860: f87d62d3 ldumaxl x29, x19, [x22]
864: b82a83e4 swp w10, w4, [sp]
868: b83503e8 ldadd w21, w8, [sp]
86c: b833138a ldclr w19, w10, [x28]
870: b82220b9 ldeor w2, w25, [x5]
874: b82332c8 ldset w3, w8, [x22]
878: b83350ad ldsmin w19, w13, [x5]
87c: b83d42b8 ldsmax w29, w24, [x21]
880: b83a7078 ldumin w26, w24, [x3]
884: b83862fa ldumax w24, w26, [x23]
888: b8af8075 swpa w15, w21, [x3]
88c: b8b80328 ldadda w24, w8, [x25]
890: b8b41230 ldclra w20, w16, [x17]
894: b8a22001 ldeora w2, w1, [x0]
898: b8b83064 ldseta w24, w4, [x3]
89c: b8ac539f ldsmina w12, wzr, [x28]
8a0: b8aa405a ldsmaxa w10, w26, [x2]
8a4: b8ac73f2 ldumina w12, w18, [sp]
8a8: b8a163ad ldumaxa w1, w13, [x29]
8ac: b8e08193 swpal w0, w19, [x12]
8b0: b8f101b6 ldaddal w17, w22, [x13]
8b4: b8fc13fe ldclral w28, w30, [sp]
8b8: b8e1239a ldeoral w1, w26, [x28]
8bc: b8e4309e ldsetal w4, w30, [x4]
8c0: b8e6535e ldsminal w6, w30, [x26]
8c4: b8f24109 ldsmaxal w18, w9, [x8]
8c8: b8ec7280 lduminal w12, w0, [x20]
8cc: b8e16058 ldumaxal w1, w24, [x2]
8d0: b8608309 swpl w0, w9, [x24]
8d4: b87a03d0 ldaddl w26, w16, [x30]
8d8: b86312ea ldclrl w3, w10, [x23]
8dc: b86a2244 ldeorl w10, w4, [x18]
8e0: b862310b ldsetl w2, w11, [x8]
8e4: b86a522f ldsminl w10, w15, [x17]
8e8: b862418a ldsmaxl w2, w10, [x12]
8ec: b86c71af lduminl w12, w15, [x13]
8f0: b8626287 ldumaxl w2, w7, [x20]
8f4: 042401f9 add z25.b, z15.b, z4.b
8f8: 04b10564 sub z4.s, z11.s, z17.s
8fc: 65ca0230 fadd z16.d, z17.d, z10.d
900: 65d90996 fmul z22.d, z12.d, z25.d
904: 65ca05dc fsub z28.d, z14.d, z10.d
908: 0456afc1 abs z1.h, p3/m, z30.h
90c: 0400044f add z15.b, p1/m, z15.b, z2.b
910: 0490920d asr z13.s, p4/m, z13.s, z16.s
914: 04daa163 cnt z3.d, p0/m, z11.d
918: 04d389c5 lsl z5.d, p2/m, z5.d, z14.d
91c: 0411829d lsr z29.b, p0/m, z29.b, z20.b
920: 04901774 mul z20.s, p5/m, z20.s, z27.s
924: 0417b89a neg z26.b, p6/m, z4.b
928: 041eb3d6 not z22.b, p4/m, z30.b
92c: 04480b6b smax z11.h, p2/m, z11.h, z27.h
930: 048a17dc smin z28.s, p5/m, z28.s, z30.s
934: 048105be sub z30.s, p1/m, z30.s, z13.s
938: 04dcb35e fabs z30.d, p4/m, z26.d
93c: 65808d6f fadd z15.s, p3/m, z15.s, z11.s
940: 65cd9e06 fdiv z6.d, p7/m, z6.d, z16.d
944: 65869cfb fmax z27.s, p7/m, z27.s, z7.s
948: 65c78893 fmin z19.d, p2/m, z19.d, z4.d
94c: 658292d1 fmul z17.s, p4/m, z17.s, z22.s
950: 04ddaebc fneg z28.d, p3/m, z21.d
954: 6582b452 frintm z18.s, p5/m, z2.s
958: 6580ade6 frintn z6.s, p3/m, z15.s
95c: 65c1b42c frintp z12.d, p5/m, z1.d
960: 658da632 fsqrt z18.s, p1/m, z17.s
964: 658195af fsub z15.s, p5/m, z15.s, z13.s
968: 65eb1f74 fmla z20.d, p7/m, z27.d, z11.d
96c: 65f723c3 fmls z3.d, p0/m, z30.d, z23.d
970: 65ba4b71 fnmla z17.s, p2/m, z27.s, z26.s
974: 65fe76c6 fnmls z6.d, p5/m, z22.d, z30.d
978: 04525f42 mla z2.h, p7/m, z26.h, z18.h
97c: 04117056 mls z22.b, p4/m, z2.b, z17.b
980: 04363338 and z24.d, z25.d, z22.d
984: 04a33192 eor z18.d, z12.d, z3.d
988: 0470339d orr z29.d, z28.d, z16.d
98c: 049a2b86 andv s6, p2, z28.s
990: 045824e7 orv h7, p1, z7.h
994: 04193509 eorv b9, p5, z8.b
998: 040837db smaxv b27, p5, z30.b
99c: 044a221a sminv h26, p0, z16.h
9a0: 65c73903 fminv d3, p6, z8.d
9a4: 65c63b55 fmaxv d21, p6, z26.d
9a8: 65982096 fadda s22, p0, s22, z4.s
9ac: 04412071 uaddv d17, p0, z3.h
*/
static const unsigned int insns[] =
@ -1322,30 +1486,30 @@ Disassembly of section .text:
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
0x120cb166, 0x321764bc, 0x52174681, 0x720c0247,
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
0x14000000, 0x17ffffd7, 0x140001f2, 0x94000000,
0x97ffffd4, 0x940001ef, 0x3400000a, 0x34fffa2a,
0x34003d8a, 0x35000008, 0x35fff9c8, 0x35003d28,
0xb400000b, 0xb4fff96b, 0xb4003ccb, 0xb500001d,
0xb5fff91d, 0xb5003c7d, 0x10000013, 0x10fff8b3,
0x10003c13, 0x90000013, 0x36300016, 0x3637f836,
0x36303b96, 0x3758000c, 0x375ff7cc, 0x37583b2c,
0x14000000, 0x17ffffd7, 0x14000242, 0x94000000,
0x97ffffd4, 0x9400023f, 0x3400000a, 0x34fffa2a,
0x3400478a, 0x35000008, 0x35fff9c8, 0x35004728,
0xb400000b, 0xb4fff96b, 0xb40046cb, 0xb500001d,
0xb5fff91d, 0xb500467d, 0x10000013, 0x10fff8b3,
0x10004613, 0x90000013, 0x36300016, 0x3637f836,
0x36304596, 0x3758000c, 0x375ff7cc, 0x3758452c,
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
0x54003900, 0x54000001, 0x54fff541, 0x540038a1,
0x54000002, 0x54fff4e2, 0x54003842, 0x54000002,
0x54fff482, 0x540037e2, 0x54000003, 0x54fff423,
0x54003783, 0x54000003, 0x54fff3c3, 0x54003723,
0x54000004, 0x54fff364, 0x540036c4, 0x54000005,
0x54fff305, 0x54003665, 0x54000006, 0x54fff2a6,
0x54003606, 0x54000007, 0x54fff247, 0x540035a7,
0x54000008, 0x54fff1e8, 0x54003548, 0x54000009,
0x54fff189, 0x540034e9, 0x5400000a, 0x54fff12a,
0x5400348a, 0x5400000b, 0x54fff0cb, 0x5400342b,
0x5400000c, 0x54fff06c, 0x540033cc, 0x5400000d,
0x54fff00d, 0x5400336d, 0x5400000e, 0x54ffefae,
0x5400330e, 0x5400000f, 0x54ffef4f, 0x540032af,
0x54004300, 0x54000001, 0x54fff541, 0x540042a1,
0x54000002, 0x54fff4e2, 0x54004242, 0x54000002,
0x54fff482, 0x540041e2, 0x54000003, 0x54fff423,
0x54004183, 0x54000003, 0x54fff3c3, 0x54004123,
0x54000004, 0x54fff364, 0x540040c4, 0x54000005,
0x54fff305, 0x54004065, 0x54000006, 0x54fff2a6,
0x54004006, 0x54000007, 0x54fff247, 0x54003fa7,
0x54000008, 0x54fff1e8, 0x54003f48, 0x54000009,
0x54fff189, 0x54003ee9, 0x5400000a, 0x54fff12a,
0x54003e8a, 0x5400000b, 0x54fff0cb, 0x54003e2b,
0x5400000c, 0x54fff06c, 0x54003dcc, 0x5400000d,
0x54fff00d, 0x54003d6d, 0x5400000e, 0x54ffefae,
0x54003d0e, 0x5400000f, 0x54ffef4f, 0x54003caf,
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
@ -1377,7 +1541,7 @@ Disassembly of section .text:
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
0xbd1b1869, 0x580022fb, 0x1800000b, 0xf8945060,
0xbd1b1869, 0x58002cfb, 0x1800000b, 0xf8945060,
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b12,
@ -1421,32 +1585,52 @@ Disassembly of section .text:
0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1,
0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f,
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
0xf8388098, 0xf8340010, 0xf8241175, 0xf83e22d0,
0xf82432ef, 0xf83a5186, 0xf82f41ee, 0xf82973b9,
0xf82b6194, 0xf8b28216, 0xf8b50358, 0xf8a61206,
0xf8b02219, 0xf8bc3218, 0xf8ba514f, 0xf8ad428e,
0xf8a173d7, 0xf8ae60c2, 0xf8e38328, 0xf8e003db,
0xf8e513c5, 0xf8eb2019, 0xf8ff3260, 0xf8fd513a,
0xf8fa41ec, 0xf8eb724b, 0xf8f96316, 0xf8608171,
0xf86600dd, 0xf86512a5, 0xf8732250, 0xf87e339b,
0xf861503c, 0xf874421d, 0xf86d73aa, 0xf87d62d3,
0xb82a83e4, 0xb83503e8, 0xb833138a, 0xb82220b9,
0xb82332c8, 0xb83350ad, 0xb83d42b8, 0xb83a7078,
0xb83862fa, 0xb8af8075, 0xb8b80328, 0xb8b41230,
0xb8a22001, 0xb8b83064, 0xb8ac539f, 0xb8aa405a,
0xb8ac73f2, 0xb8a163ad, 0xb8e08193, 0xb8f101b6,
0xb8fc13fe, 0xb8e1239a, 0xb8e4309e, 0xb8e6535e,
0xb8f24109, 0xb8ec7280, 0xb8e16058, 0xb8608309,
0xb87a03d0, 0xb86312ea, 0xb86a2244, 0xb862310b,
0xb86a522f, 0xb862418a, 0xb86c71af, 0xb8626287,
0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20,
0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
0x04319160, 0x0461943e, 0x04a19020, 0x042053ff,
0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001,
0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea,
0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0,
0xe4a84fe0, 0xe5f25000, 0x858043e0, 0x85a043ff,
0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000,
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
0x1e7e3000, 0xf8388098, 0xf8340010, 0xf8241175,
0xf83e22d0, 0xf82432ef, 0xf83a5186, 0xf82f41ee,
0xf82973b9, 0xf82b6194, 0xf8b28216, 0xf8b50358,
0xf8a61206, 0xf8b02219, 0xf8bc3218, 0xf8ba514f,
0xf8ad428e, 0xf8a173d7, 0xf8ae60c2, 0xf8e38328,
0xf8e003db, 0xf8e513c5, 0xf8eb2019, 0xf8ff3260,
0xf8fd513a, 0xf8fa41ec, 0xf8eb724b, 0xf8f96316,
0xf8608171, 0xf86600dd, 0xf86512a5, 0xf8732250,
0xf87e339b, 0xf861503c, 0xf874421d, 0xf86d73aa,
0xf87d62d3, 0xb82a83e4, 0xb83503e8, 0xb833138a,
0xb82220b9, 0xb82332c8, 0xb83350ad, 0xb83d42b8,
0xb83a7078, 0xb83862fa, 0xb8af8075, 0xb8b80328,
0xb8b41230, 0xb8a22001, 0xb8b83064, 0xb8ac539f,
0xb8aa405a, 0xb8ac73f2, 0xb8a163ad, 0xb8e08193,
0xb8f101b6, 0xb8fc13fe, 0xb8e1239a, 0xb8e4309e,
0xb8e6535e, 0xb8f24109, 0xb8ec7280, 0xb8e16058,
0xb8608309, 0xb87a03d0, 0xb86312ea, 0xb86a2244,
0xb862310b, 0xb86a522f, 0xb862418a, 0xb86c71af,
0xb8626287, 0x042401f9, 0x04b10564, 0x65ca0230,
0x65d90996, 0x65ca05dc, 0x0456afc1, 0x0400044f,
0x0490920d, 0x04daa163, 0x04d389c5, 0x0411829d,
0x04901774, 0x0417b89a, 0x041eb3d6, 0x04480b6b,
0x048a17dc, 0x048105be, 0x04dcb35e, 0x65808d6f,
0x65cd9e06, 0x65869cfb, 0x65c78893, 0x658292d1,
0x04ddaebc, 0x6582b452, 0x6580ade6, 0x65c1b42c,
0x658da632, 0x658195af, 0x65eb1f74, 0x65f723c3,
0x65ba4b71, 0x65fe76c6, 0x04525f42, 0x04117056,
0x04363338, 0x04a33192, 0x0470339d, 0x049a2b86,
0x045824e7, 0x04193509, 0x040837db, 0x044a221a,
0x65c73903, 0x65c63b55, 0x65982096, 0x04412071,
};
// END Generated code -- do not edit

View File

@ -139,6 +139,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21);
// Java stack pointer
REGISTER_DECLARATION(Register, esp, r20);
// Preserved predicate register with all elements set TRUE.
REGISTER_DECLARATION(PRegister, ptrue, p7);
#define assert_cond(ARG1) assert(ARG1, #ARG1)
namespace asm_util {
@ -273,6 +276,14 @@ public:
f(r->encoding_nocheck(), lsb + 4, lsb);
}
void prf(PRegister r, int lsb) {
f(r->encoding_nocheck(), lsb + 3, lsb);
}
void pgrf(PRegister r, int lsb) {
f(r->encoding_nocheck(), lsb + 2, lsb);
}
unsigned get(int msb = 31, int lsb = 0) {
int nbits = msb - lsb + 1;
unsigned mask = ((1U << nbits) - 1) << lsb;
@ -561,6 +572,18 @@ class Address {
void lea(MacroAssembler *, Register) const;
static bool offset_ok_for_immed(int64_t offset, uint shift);
static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
if (offset % vl == 0) {
// Convert address offset into sve imm offset (MUL VL).
int sve_offset = offset / vl;
if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
// sve_offset can be encoded
return true;
}
}
return false;
}
};
// Convience classes
@ -684,6 +707,12 @@ public:
void rf(FloatRegister reg, int lsb) {
current->rf(reg, lsb);
}
void prf(PRegister reg, int lsb) {
current->prf(reg, lsb);
}
void pgrf(PRegister reg, int lsb) {
current->pgrf(reg, lsb);
}
void fixed(unsigned value, unsigned mask) {
current->fixed(value, mask);
}
@ -2473,13 +2502,18 @@ public:
f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
}
void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {
starti;
f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);
f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10);
rf(Vn, 5), rf(Rd, 0);
#define INSN(NAME, op) \
void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \
starti; \
f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); \
f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10); \
rf(Vn, 5), rf(Rd, 0); \
}
INSN(umov, 0b001111);
INSN(smov, 0b001011);
#undef INSN
#define INSN(NAME, opc, opc2, isSHR) \
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
starti; \
@ -2711,7 +2745,7 @@ public:
#undef INSN
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
{
starti;
assert(T == T8B || T == T16B, "invalid arrangement");
@ -2721,6 +2755,292 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V
f(0, 10), rf(Vn, 5), rf(Vd, 0);
}
// SVE arithmetics - unpredicated
#define INSN(NAME, opcode) \
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
starti; \
assert(T != Q, "invalid register variant"); \
f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \
rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
}
INSN(sve_add, 0b000);
INSN(sve_sub, 0b001);
#undef INSN
// SVE floating-point arithmetic - unpredicated
#define INSN(NAME, opcode) \
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
starti; \
assert(T == S || T == D, "invalid register variant"); \
f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \
rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
}
INSN(sve_fadd, 0b000);
INSN(sve_fmul, 0b010);
INSN(sve_fsub, 0b001);
#undef INSN
private:
void sve_predicate_reg_insn(unsigned op24, unsigned op13,
FloatRegister Zd_or_Vd, SIMD_RegVariant T,
PRegister Pg, FloatRegister Zn_or_Vn) {
starti;
f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
}
public:
// SVE integer arithmetics - predicate
#define INSN(NAME, op1, op2) \
void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \
assert(T != Q, "invalid register variant"); \
sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \
}
INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary
INSN(sve_add, 0b00000100, 0b000000000); // vector add
INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits
INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left
INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right
INSN(sve_mul, 0b00000100, 0b010000000); // vector mul
INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary
INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary
INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar
INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
#undef INSN
// SVE floating-point arithmetics - predicate
#define INSN(NAME, op1, op2) \
void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
assert(T == S || T == D, "invalid register variant"); \
sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \
}
INSN(sve_fabs, 0b00000100, 0b011100101);
INSN(sve_fadd, 0b01100101, 0b000000100);
INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
INSN(sve_fdiv, 0b01100101, 0b001101100);
INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum
INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum
INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
INSN(sve_fmul, 0b01100101, 0b000010100);
INSN(sve_fneg, 0b00000100, 0b011101101);
INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
INSN(sve_fsqrt, 0b01100101, 0b001101101);
INSN(sve_fsub, 0b01100101, 0b000001100);
#undef INSN
// SVE multiple-add/sub - predicated
#define INSN(NAME, op0, op1, op2) \
void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
starti; \
assert(T != Q, "invalid size"); \
f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \
f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \
}
INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
#undef INSN
// SVE bitwise logical - unpredicated
#define INSN(NAME, opc) \
void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \
starti; \
f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \
rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \
}
INSN(sve_and, 0b00);
INSN(sve_eor, 0b10);
INSN(sve_orr, 0b01);
#undef INSN
// SVE shift immediate - unpredicated
#define INSN(NAME, opc, isSHR) \
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
starti; \
/* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \
* for shift right is calculated as: \
* 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \
* 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \
* 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \
* 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \
* for shift left is calculated as: \
* 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \
* 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \
* 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \
* 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \
*/ \
assert(T != Q, "Invalid register variant"); \
if (isSHR) { \
assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
} else { \
assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
} \
int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \
int encodedShift = isSHR ? cVal - shift : cVal + shift; \
int tszh = encodedShift >> 5; \
int tszl_imm = encodedShift & 0x1f; \
f(0b00000100, 31, 24); \
f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \
f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \
}
INSN(sve_asr, 0b100, /* isSHR = */ true);
INSN(sve_lsl, 0b111, /* isSHR = */ false);
INSN(sve_lsr, 0b101, /* isSHR = */ true);
#undef INSN
private:
// Scalar base + immediate index
void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
SIMD_RegVariant T, int op1, int type, int op2) {
starti;
assert_cond(T >= type);
f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
}
// Scalar base + scalar index
void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
SIMD_RegVariant T, int op1, int type, int op2) {
starti;
assert_cond(T >= type);
f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
rf(Xm, 16), f(op2, 15, 13);
pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
}
void sve_ld_st1(FloatRegister Zt, PRegister Pg,
SIMD_RegVariant T, const Address &a,
int op1, int type, int imm_op2, int scalar_op2) {
switch (a.getMode()) {
case Address::base_plus_offset:
sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
break;
case Address::base_plus_offset_reg:
sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
break;
default:
ShouldNotReachHere();
}
}
public:
// SVE load/store - predicated
#define INSN(NAME, op1, type, imm_op2, scalar_op2) \
void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \
assert(T != Q, "invalid register variant"); \
sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \
}
INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
#undef INSN
// SVE load/store - unpredicated
#define INSN(NAME, op1) \
void NAME(FloatRegister Zt, const Address &a) { \
starti; \
assert(a.index() == noreg, "invalid address variant"); \
f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \
f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
}
INSN(sve_ldr, 0b100); // LDR (vector)
INSN(sve_str, 0b111); // STR (vector)
#undef INSN
#define INSN(NAME, op) \
void NAME(Register Xd, Register Xn, int imm6) { \
starti; \
f(0b000001000, 31, 23), f(op, 22, 21); \
srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
}
INSN(sve_addvl, 0b01);
INSN(sve_addpl, 0b11);
#undef INSN
// SVE inc/dec register by element count
#define INSN(NAME, op) \
void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
starti; \
assert(T != Q, "invalid size"); \
f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \
f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \
}
INSN(sve_inc, 0);
INSN(sve_dec, 1);
#undef INSN
// SVE predicate count
void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
starti;
assert(T != Q, "invalid size");
f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
}
// SVE dup scalar
void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
starti;
assert(T != Q, "invalid size");
f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
srf(Rn, 5), rf(Zd, 0);
}
// SVE dup imm
void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
starti;
assert(T != Q, "invalid size");
int sh = 0;
if (imm8 <= 127 && imm8 >= -128) {
sh = 0;
} else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
sh = 1;
imm8 = (imm8 >> 8);
} else {
guarantee(false, "invalid immediate");
}
f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
}
void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
starti;
f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
}
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
}

View File

@ -431,8 +431,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
ZSetupArguments setup_arguments(masm, stub);
__ mov(rscratch1, stub->slow_path());
__ blr(rscratch1);
if (UseSVE > 0) {
// Reinitialize the ptrue predicate register, in case the external runtime
// call clobbers ptrue reg, as we may return to SVE compiled code.
__ reinitialize_ptrue();
}
}
// Stub exit
__ b(*stub->continuation());
}

View File

@ -99,6 +99,9 @@ define_pd_global(intx, InlineSmallCode, 1000);
"Avoid generating unaligned memory accesses") \
product(bool, UseLSE, false, \
"Use LSE instructions") \
product(uint, UseSVE, 0, \
"Highest supported SVE instruction set version") \
range(0, 2) \
product(bool, UseBlockZeroing, true, \
"Use DC ZVA for block zeroing") \
product(intx, BlockZeroingLowLimit, 256, \

View File

@ -2117,9 +2117,16 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
}
// Push lots of registers in the bit set supplied. Don't push sp.
// Return the number of words pushed
// Return the number of dwords pushed
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
int words_pushed = 0;
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
#ifdef COMPILER2
use_sve = Matcher::supports_scalable_vector();
sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
#endif
// Scan bitset to accumulate register pairs
unsigned char regs[32];
@ -2134,9 +2141,19 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
return 0;
}
// SVE
if (use_sve && sve_vector_size_in_bytes > 16) {
sub(stack, stack, sve_vector_size_in_bytes * count);
for (int i = 0; i < count; i++) {
sve_str(as_FloatRegister(regs[i]), Address(stack, i));
}
return count * sve_vector_size_in_bytes / 8;
}
// NEON
if (count == 1) {
strq(as_FloatRegister(regs[0]), Address(pre(stack, -wordSize * 2)));
return 1;
return 2;
}
bool odd = (count & 1) == 1;
@ -2157,12 +2174,19 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
}
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
return count;
return count * 2;
}
// Return the number of dwords poped
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
int words_pushed = 0;
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
#ifdef COMPILER2
use_sve = Matcher::supports_scalable_vector();
sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
#endif
// Scan bitset to accumulate register pairs
unsigned char regs[32];
int count = 0;
@ -2176,9 +2200,19 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
return 0;
}
// SVE
if (use_sve && sve_vector_size_in_bytes > 16) {
for (int i = count - 1; i >= 0; i--) {
sve_ldr(as_FloatRegister(regs[i]), Address(stack, i));
}
add(stack, stack, sve_vector_size_in_bytes * count);
return count * sve_vector_size_in_bytes / 8;
}
// NEON
if (count == 1) {
ldrq(as_FloatRegister(regs[0]), Address(post(stack, wordSize * 2)));
return 1;
return 2;
}
bool odd = (count & 1) == 1;
@ -2199,7 +2233,7 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
return count;
return count * 2;
}
#ifdef ASSERT
@ -2647,23 +2681,39 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) - exclude, sp);
}
void MacroAssembler::push_CPU_state(bool save_vectors) {
int step = (save_vectors ? 8 : 4) * wordSize;
void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
int sve_vector_size_in_bytes) {
push(0x3fffffff, sp); // integer registers except lr & sp
mov(rscratch1, -step);
sub(sp, sp, step);
for (int i = 28; i >= 4; i -= 4) {
st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
sve_str(as_FloatRegister(i), Address(sp, i));
}
} else {
int step = (save_vectors ? 8 : 4) * wordSize;
mov(rscratch1, -step);
sub(sp, sp, step);
for (int i = 28; i >= 4; i -= 4) {
st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
}
st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
}
st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
}
void MacroAssembler::pop_CPU_state(bool restore_vectors) {
int step = (restore_vectors ? 8 : 4) * wordSize;
for (int i = 0; i <= 28; i += 4)
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve,
int sve_vector_size_in_bytes) {
if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) {
for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) {
sve_ldr(as_FloatRegister(i), Address(sp, i));
}
add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
} else {
int step = (restore_vectors ? 8 : 4) * wordSize;
for (int i = 0; i <= 28; i += 4)
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
}
pop(0x3fffffff, sp); // integer registers except lr & sp
}
@ -2712,6 +2762,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp)
return Address(base, offset);
}
Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) {
assert(offset >= 0, "spill to negative address?");
Register base = sp;
// An immediate offset in the range 0 to 255 which is multiplied
// by the current vector or predicate register size in bytes.
if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) {
return Address(base, offset / sve_reg_size_in_bytes);
}
add(tmp, base, offset);
return Address(tmp);
}
// Checks whether offset is aligned.
// Returns true if it is, else false.
bool MacroAssembler::merge_alignment_check(Register base,
@ -5221,3 +5286,24 @@ void MacroAssembler::cache_wbsync(bool is_pre) {
membar(Assembler::AnyAny);
}
}
void MacroAssembler::verify_sve_vector_length() {
Label verify_ok;
assert(UseSVE > 0, "should only be used for SVE");
movw(rscratch1, zr);
sve_inc(rscratch1, B);
subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length());
br(EQ, verify_ok);
stop("Error: SVE vector length has changed since jvm startup");
bind(verify_ok);
}
void MacroAssembler::verify_ptrue() {
Label verify_ok;
assert(UseSVE > 0, "should only be used for SVE");
sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count.
sve_dec(rscratch1, B);
cbz(rscratch1, verify_ok);
stop("Error: the preserved predicate register (p7) elements are not all true");
bind(verify_ok);
}

View File

@ -873,8 +873,10 @@ public:
DEBUG_ONLY(void verify_heapbase(const char* msg);)
void push_CPU_state(bool save_vectors = false);
void pop_CPU_state(bool restore_vectors = false) ;
void push_CPU_state(bool save_vectors = false, bool use_sve = false,
int sve_vector_size_in_bytes = 0);
void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
int sve_vector_size_in_bytes = 0);
// Round up to a power of two
void round_to(Register reg, int modulus);
@ -954,6 +956,11 @@ public:
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
void verify_sve_vector_length();
void reinitialize_ptrue() {
sve_ptrue(ptrue, B);
}
void verify_ptrue();
// Debugging
@ -1303,6 +1310,7 @@ private:
// Returns an address on the stack which is reachable with a ldr/str of size
// Uses rscratch2 if the address is not directly reachable
Address spill_address(int size, int offset, Register tmp=rscratch2);
Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
@ -1326,6 +1334,9 @@ public:
void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
str(Vx, T, spill_address(1 << (int)T, offset));
}
void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
}
void unspill(Register Rx, bool is64, int offset) {
if (is64) {
ldr(Rx, spill_address(8, offset));
@ -1336,6 +1347,9 @@ public:
void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
ldr(Vx, T, spill_address(1 << (int)T, offset));
}
void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
}
void spill_copy128(int src_offset, int dst_offset,
Register tmp1=rscratch1, Register tmp2=rscratch2) {
if (src_offset < 512 && (src_offset & 7) == 0 &&
@ -1349,7 +1363,15 @@ public:
spill(tmp1, true, dst_offset+8);
}
}
void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
int sve_vec_reg_size_in_bytes) {
assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
spill_copy128(src_offset, dst_offset);
src_offset += 16;
dst_offset += 16;
}
}
void cache_wb(Address line);
void cache_wbsync(bool is_pre);
};

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -33,6 +33,9 @@ const int ConcreteRegisterImpl::max_fpr
= ConcreteRegisterImpl::max_gpr +
FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
const int ConcreteRegisterImpl::max_pr
= ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers;
const char* RegisterImpl::name() const {
const char* names[number_of_registers] = {
"c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
@ -54,3 +57,11 @@ const char* FloatRegisterImpl::name() const {
};
return is_valid() ? names[encoding()] : "noreg";
}
const char* PRegisterImpl::name() const {
const char* names[number_of_registers] = {
"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"
};
return is_valid() ? names[encoding()] : "noreg";
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -129,9 +129,10 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 32,
max_slots_per_register = 4,
max_slots_per_register = 8,
save_slots_per_register = 2,
extra_save_slots_per_register = max_slots_per_register - save_slots_per_register
slots_per_neon_register = 4,
extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register
};
// construction
@ -187,6 +188,88 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29));
CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30));
CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31));
// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0].
CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30));
CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31));
class PRegisterImpl;
typedef PRegisterImpl* PRegister;
inline PRegister as_PRegister(int encoding) {
return (PRegister)(intptr_t)encoding;
}
// The implementation of predicate registers for the architecture
class PRegisterImpl: public AbstractRegisterImpl {
public:
enum {
number_of_registers = 16,
max_slots_per_register = 1
};
// construction
inline friend PRegister as_PRegister(int encoding);
VMReg as_VMReg();
// derived registers, offsets, and addresses
PRegister successor() const { return as_PRegister(encoding() + 1); }
// accessors
int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
int encoding_nocheck() const { return (intptr_t)this; }
bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
const char* name() const;
};
// The predicate registers of SVE.
CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0));
CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1));
CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2));
CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3));
CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4));
CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5));
CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6));
CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7));
CONSTANT_REGISTER_DECLARATION(PRegister, p8, ( 8));
CONSTANT_REGISTER_DECLARATION(PRegister, p9, ( 9));
CONSTANT_REGISTER_DECLARATION(PRegister, p10, (10));
CONSTANT_REGISTER_DECLARATION(PRegister, p11, (11));
CONSTANT_REGISTER_DECLARATION(PRegister, p12, (12));
CONSTANT_REGISTER_DECLARATION(PRegister, p13, (13));
CONSTANT_REGISTER_DECLARATION(PRegister, p14, (14));
CONSTANT_REGISTER_DECLARATION(PRegister, p15, (15));
// Need to know the total number of registers of all sorts for SharedInfo.
// Define a class that exports it.
class ConcreteRegisterImpl : public AbstractRegisterImpl {
@ -199,12 +282,14 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers +
1) // flags
};
// added to make it compile
static const int max_gpr;
static const int max_fpr;
static const int max_pr;
};
// A set of registers

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
@ -154,3 +154,55 @@ REGISTER_DEFINITION(Register, rthread);
REGISTER_DEFINITION(Register, rheapbase);
REGISTER_DEFINITION(Register, r31_sp);
REGISTER_DEFINITION(FloatRegister, z0);
REGISTER_DEFINITION(FloatRegister, z1);
REGISTER_DEFINITION(FloatRegister, z2);
REGISTER_DEFINITION(FloatRegister, z3);
REGISTER_DEFINITION(FloatRegister, z4);
REGISTER_DEFINITION(FloatRegister, z5);
REGISTER_DEFINITION(FloatRegister, z6);
REGISTER_DEFINITION(FloatRegister, z7);
REGISTER_DEFINITION(FloatRegister, z8);
REGISTER_DEFINITION(FloatRegister, z9);
REGISTER_DEFINITION(FloatRegister, z10);
REGISTER_DEFINITION(FloatRegister, z11);
REGISTER_DEFINITION(FloatRegister, z12);
REGISTER_DEFINITION(FloatRegister, z13);
REGISTER_DEFINITION(FloatRegister, z14);
REGISTER_DEFINITION(FloatRegister, z15);
REGISTER_DEFINITION(FloatRegister, z16);
REGISTER_DEFINITION(FloatRegister, z17);
REGISTER_DEFINITION(FloatRegister, z18);
REGISTER_DEFINITION(FloatRegister, z19);
REGISTER_DEFINITION(FloatRegister, z20);
REGISTER_DEFINITION(FloatRegister, z21);
REGISTER_DEFINITION(FloatRegister, z22);
REGISTER_DEFINITION(FloatRegister, z23);
REGISTER_DEFINITION(FloatRegister, z24);
REGISTER_DEFINITION(FloatRegister, z25);
REGISTER_DEFINITION(FloatRegister, z26);
REGISTER_DEFINITION(FloatRegister, z27);
REGISTER_DEFINITION(FloatRegister, z28);
REGISTER_DEFINITION(FloatRegister, z29);
REGISTER_DEFINITION(FloatRegister, z30);
REGISTER_DEFINITION(FloatRegister, z31);
REGISTER_DEFINITION(PRegister, p0);
REGISTER_DEFINITION(PRegister, p1);
REGISTER_DEFINITION(PRegister, p2);
REGISTER_DEFINITION(PRegister, p3);
REGISTER_DEFINITION(PRegister, p4);
REGISTER_DEFINITION(PRegister, p5);
REGISTER_DEFINITION(PRegister, p6);
REGISTER_DEFINITION(PRegister, p7);
REGISTER_DEFINITION(PRegister, p8);
REGISTER_DEFINITION(PRegister, p9);
REGISTER_DEFINITION(PRegister, p10);
REGISTER_DEFINITION(PRegister, p11);
REGISTER_DEFINITION(PRegister, p12);
REGISTER_DEFINITION(PRegister, p13);
REGISTER_DEFINITION(PRegister, p14);
REGISTER_DEFINITION(PRegister, p15);
REGISTER_DEFINITION(PRegister, ptrue);

View File

@ -115,11 +115,28 @@ class RegisterSaver {
};
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
bool use_sve = false;
int sve_vector_size_in_bytes = 0;
int sve_vector_size_in_slots = 0;
#ifdef COMPILER2
use_sve = Matcher::supports_scalable_vector();
sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
#endif
#if COMPILER2_OR_JVMCI
if (save_vectors) {
int vect_words = 0;
int extra_save_slots_per_register = 0;
// Save upper half of vector registers
int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register /
VMRegImpl::slots_per_word;
if (use_sve) {
extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
} else {
extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
}
vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
VMRegImpl::slots_per_word;
additional_frame_words += vect_words;
}
#else
@ -138,7 +155,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
// Save Integer and Float registers.
__ enter();
__ push_CPU_state(save_vectors);
__ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
// Set an oopmap for the call site. This oopmap will map all
// oop-registers and debug-info registers as callee-saved. This
@ -162,8 +179,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
FloatRegister r = as_FloatRegister(i);
int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) :
(FloatRegisterImpl::save_slots_per_register * i);
int sp_offset = 0;
if (save_vectors) {
sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
(FloatRegisterImpl::slots_per_neon_register * i);
} else {
sp_offset = FloatRegisterImpl::save_slots_per_register * i;
}
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
r->as_VMReg());
}
@ -172,10 +194,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
}
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
#if !COMPILER2_OR_JVMCI
#ifdef COMPILER2
__ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
Matcher::scalable_vector_reg_size(T_BYTE));
#else
#if !INCLUDE_JVMCI
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
#endif
__ pop_CPU_state(restore_vectors);
#endif
__ leave();
}
@ -1842,6 +1869,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// Force this write out before the read below
__ dmb(Assembler::ISH);
if (UseSVE > 0) {
// Make sure that jni code does not change SVE vector length.
__ verify_sve_vector_length();
}
// check for safepoint operation in progress and/or pending suspend requests
Label safepoint_in_progress, safepoint_in_progress_done;
{
@ -2774,6 +2806,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
__ maybe_isb();
__ membar(Assembler::LoadLoad | Assembler::LoadStore);
if (UseSVE > 0 && save_vectors) {
// Reinitialize the ptrue predicate register, in case the external runtime
// call clobbers ptrue reg, as we may return to SVE compiled code.
__ reinitialize_ptrue();
}
__ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
__ cbz(rscratch1, noException);

View File

@ -488,6 +488,11 @@ class StubGenerator: public StubCodeGenerator {
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
SharedRuntime::exception_handler_for_return_address),
rthread, c_rarg1);
if (UseSVE > 0 ) {
// Reinitialize the ptrue predicate register, in case the external runtime
// call clobbers ptrue reg, as we may return to SVE compiled code.
__ reinitialize_ptrue();
}
// we should not really care that lr is no longer the callee
// address. we saved the value the handler needs in r19 so we can
// just copy it to r3. however, the C2 handler will push its own
@ -5018,6 +5023,12 @@ class StubGenerator: public StubCodeGenerator {
__ reset_last_Java_frame(true);
__ maybe_isb();
if (UseSVE > 0) {
// Reinitialize the ptrue predicate register, in case the external runtime
// call clobbers ptrue reg, as we may return to SVE compiled code.
__ reinitialize_ptrue();
}
__ leave();
// check for pending exceptions

View File

@ -1372,6 +1372,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
__ push(dtos);
__ push(ltos);
if (UseSVE > 0) {
// Make sure that jni code does not change SVE vector length.
__ verify_sve_vector_length();
}
// change thread state
__ mov(rscratch1, _thread_in_native_trans);
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));

View File

@ -32,12 +32,14 @@
#include "runtime/os.hpp"
#include "runtime/stubCodeGenerator.hpp"
#include "runtime/vm_version.hpp"
#include "utilities/formatBuffer.hpp"
#include "utilities/macros.hpp"
#include OS_HEADER_INLINE(os)
#include <sys/auxv.h>
#include <asm/hwcap.h>
#include <sys/auxv.h>
#include <sys/prctl.h>
#ifndef HWCAP_AES
#define HWCAP_AES (1<<3)
@ -67,6 +69,20 @@
#define HWCAP_SHA512 (1 << 21)
#endif
#ifndef HWCAP_SVE
#define HWCAP_SVE (1 << 22)
#endif
#ifndef HWCAP2_SVE2
#define HWCAP2_SVE2 (1 << 1)
#endif
#ifndef PR_SVE_GET_VL
// For old toolchains which do not have SVE related macros defined.
#define PR_SVE_SET_VL 50
#define PR_SVE_GET_VL 51
#endif
int VM_Version::_cpu;
int VM_Version::_model;
int VM_Version::_model2;
@ -74,6 +90,7 @@ int VM_Version::_variant;
int VM_Version::_revision;
int VM_Version::_stepping;
bool VM_Version::_dcpop;
int VM_Version::_initial_sve_vector_length;
VM_Version::PsrInfo VM_Version::_psr_info = { 0, };
static BufferBlob* stub_blob;
@ -116,7 +133,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
}
};
void VM_Version::get_processor_features() {
_supports_cx8 = true;
_supports_atomic_getset4 = true;
@ -167,6 +183,7 @@ void VM_Version::get_processor_features() {
}
uint64_t auxv = getauxval(AT_HWCAP);
uint64_t auxv2 = getauxval(AT_HWCAP2);
char buf[512];
@ -298,6 +315,8 @@ void VM_Version::get_processor_features() {
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
if (auxv & HWCAP_SHA512) strcat(buf, ", sha512");
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
if (auxv & HWCAP_SVE) strcat(buf, ", sve");
if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2");
_features_string = os::strdup(buf);
@ -437,6 +456,18 @@ void VM_Version::get_processor_features() {
FLAG_SET_DEFAULT(UseBlockZeroing, false);
}
if (auxv & HWCAP_SVE) {
if (FLAG_IS_DEFAULT(UseSVE)) {
FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1);
}
if (UseSVE > 0) {
_initial_sve_vector_length = prctl(PR_SVE_GET_VL);
}
} else if (UseSVE > 0) {
warning("UseSVE specified, but not supported on current CPU. Disabling SVE.");
FLAG_SET_DEFAULT(UseSVE, 0);
}
// This machine allows unaligned memory accesses
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
@ -471,6 +502,50 @@ void VM_Version::get_processor_features() {
UseMontgomerySquareIntrinsic = true;
}
if (UseSVE > 0) {
if (FLAG_IS_DEFAULT(MaxVectorSize)) {
MaxVectorSize = _initial_sve_vector_length;
} else if (MaxVectorSize < 16) {
warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
UseSVE = 0;
} else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize);
_initial_sve_vector_length = new_vl;
// If MaxVectorSize is larger than system largest supported SVE vector length, above prctl()
// call will set task vector length to the system largest supported value. So, we also update
// MaxVectorSize to that largest supported value.
if (new_vl < 0) {
vm_exit_during_initialization(
err_msg("Current system does not support SVE vector length for MaxVectorSize: %d",
(int)MaxVectorSize));
} else if (new_vl != MaxVectorSize) {
warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d",
new_vl, new_vl);
}
MaxVectorSize = new_vl;
} else {
vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
}
}
if (UseSVE == 0) { // NEON
int min_vector_size = 8;
int max_vector_size = 16;
if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
if (!is_power_of_2(MaxVectorSize)) {
vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
} else if (MaxVectorSize < min_vector_size) {
warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
} else if (MaxVectorSize > max_vector_size) {
warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
}
} else {
FLAG_SET_DEFAULT(MaxVectorSize, 16);
}
}
if (FLAG_IS_DEFAULT(OptoScheduling)) {
OptoScheduling = true;
}

View File

@ -41,6 +41,8 @@ protected:
static int _revision;
static int _stepping;
static bool _dcpop;
static int _initial_sve_vector_length;
struct PsrInfo {
uint32_t dczid_el0;
uint32_t ctr_el0;
@ -106,6 +108,7 @@ public:
static int cpu_variant() { return _variant; }
static int cpu_revision() { return _revision; }
static bool supports_dcpop() { return _dcpop; }
static int get_initial_sve_vector_length() { return _initial_sve_vector_length; };
static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); }
static bool is_zva_enabled() {

View File

@ -1,6 +1,6 @@
/*
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -36,4 +36,8 @@ inline VMReg FloatRegisterImpl::as_VMReg() {
ConcreteRegisterImpl::max_gpr);
}
inline VMReg PRegisterImpl::as_VMReg() {
return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr);
}
#endif // CPU_AARCH64_VMREG_AARCH64_INLINE_HPP

View File

@ -1006,6 +1006,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
return MaxVectorSize;
}
const bool Matcher::supports_scalable_vector() {
return false;
}
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
return -1;
}
// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize >= size, "");

View File

@ -2379,6 +2379,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return max_vector_size(bt); // Same as max.
}
const bool Matcher::supports_scalable_vector() {
return false;
}
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
return -1;
}
// PPC implementation uses VSX load/store instructions (if
// SuperwordUseVSX) which support 4 byte but not arbitrary alignment
const bool Matcher::misaligned_vectors_ok() {

View File

@ -1610,6 +1610,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return max_vector_size(bt); // Same as max.
}
const bool Matcher::supports_scalable_vector() {
return false;
}
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
return -1;
}
// z/Architecture does support misaligned store/load at minimal extra cost.
const bool Matcher::misaligned_vectors_ok() {
return true;

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -1615,6 +1615,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
return MIN2(size,max_size);
}
const bool Matcher::supports_scalable_vector() {
return false;
}
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
return -1;
}
// Vector ideal reg corresponding to specified size in bytes
const uint Matcher::vector_ideal_reg(int size) {
assert(MaxVectorSize >= size, "");

View File

@ -2834,7 +2834,7 @@ frame
RAX_H_num // Op_RegL
};
// Excluded flags and vector registers.
assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
%}
%}

View File

@ -1,5 +1,5 @@
//
// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
@ -934,6 +934,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
// Match Vector types.
if (strncmp(idealOp, "Vec",3)==0) {
switch(last_char) {
case 'A': return "TypeVect::VECTA";
case 'S': return "TypeVect::VECTS";
case 'D': return "TypeVect::VECTD";
case 'X': return "TypeVect::VECTX";
@ -944,6 +945,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
}
}
if (strncmp(idealOp, "RegVMask", 8) == 0) {
return "Type::BOTTOM";
}
// !!!!!
switch(last_char) {
case 'I': return "TypeInt::INT";

View File

@ -3942,6 +3942,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
strcmp(opType,"RegL")==0 ||
strcmp(opType,"RegF")==0 ||
strcmp(opType,"RegD")==0 ||
strcmp(opType,"RegVMask")==0 ||
strcmp(opType,"VecA")==0 ||
strcmp(opType,"VecS")==0 ||
strcmp(opType,"VecD")==0 ||
strcmp(opType,"VecX")==0 ||

View File

@ -77,6 +77,7 @@ void LRG::dump() const {
if( _is_oop ) tty->print("Oop ");
if( _is_float ) tty->print("Float ");
if( _is_vector ) tty->print("Vector ");
if( _is_scalable ) tty->print("Scalable ");
if( _was_spilled1 ) tty->print("Spilled ");
if( _was_spilled2 ) tty->print("Spilled2 ");
if( _direct_conflict ) tty->print("Direct_conflict ");
@ -644,7 +645,15 @@ void PhaseChaitin::Register_Allocate() {
// Live ranges record the highest register in their mask.
// We want the low register for the AD file writer's convenience.
OptoReg::Name hi = lrg.reg(); // Get hi register
OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
int num_regs = lrg.num_regs();
if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
// For scalable vector registers, when they are allocated in physical
// registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
// vector. If they are allocated on stack, we need to get the actual
// num_regs, which reflects the physical length of scalable registers.
num_regs = lrg.scalable_reg_slots();
}
OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
// We have to use pair [lo,lo+1] even for wide vectors because
// the rest of code generation works only with pairs. It is safe
// since for registers encoding only 'lo' is used.
@ -802,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
// Check for vector live range (only if vector register is used).
// On SPARC vector uses RegD which could be misaligned so it is not
// processes as vector in RA.
if (RegMask::is_vector(ireg))
if (RegMask::is_vector(ireg)) {
lrg._is_vector = 1;
if (ireg == Op_VecA) {
assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
lrg._is_scalable = 1;
// For scalable vector, when it is allocated in physical register,
// num_regs is RegMask::SlotsPerVecA for reg mask,
// which may not be the actual physical register size.
// If it is allocated in stack, we need to get the actual
// physical length of scalable vector register.
lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
}
}
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
"vector must be in vector registers");
@ -905,6 +925,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
lrg.set_num_regs(1);
lrg.set_reg_pressure(1);
break;
case Op_VecA:
assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
lrg.set_num_regs(RegMask::SlotsPerVecA);
lrg.set_reg_pressure(1);
break;
case Op_VecS:
assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
@ -1305,6 +1332,46 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
return false;
}
static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
int num_regs = lrg.num_regs();
OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
if (lrg.is_scalable()) {
// a physical register is found
if (chunk == 0 && OptoReg::is_reg(assigned)) {
return assigned;
}
// find available stack slots for scalable register
if (lrg._is_vector) {
num_regs = lrg.scalable_reg_slots();
// if actual scalable vector register is exactly SlotsPerVecA * 32 bits
if (num_regs == RegMask::SlotsPerVecA) {
return assigned;
}
// mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
// does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
// instead of SlotsPerVecA bits.
assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
// Verify the found reg has scalable_reg_slots() bits set.
if (mask.is_valid_reg(assigned, num_regs)) {
return assigned;
} else {
// Remove more for each iteration
mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
assigned = mask.find_first_set(lrg, num_regs);
}
}
return OptoReg::Bad; // will cause chunk change, and retry next chunk
}
}
return assigned;
}
// Choose a color using the biasing heuristic
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
@ -1338,7 +1405,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
RegMask tempmask = lrg.mask();
tempmask.AND(lrgs(copy_lrg).mask());
tempmask.clear_to_sets(lrg.num_regs());
OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
if (OptoReg::is_valid(reg))
return reg;
}
@ -1347,7 +1414,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
// If no bias info exists, just go with the register selection ordering
if (lrg._is_vector || lrg.num_regs() == 2) {
// Find an aligned set
return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
}
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
@ -1402,7 +1469,6 @@ uint PhaseChaitin::Select( ) {
LRG *lrg = &lrgs(lidx);
_simplified = lrg->_next;
#ifndef PRODUCT
if (trace_spilling()) {
ttyLocker ttyl;
@ -1484,7 +1550,6 @@ uint PhaseChaitin::Select( ) {
// Bump register mask up to next stack chunk
chunk += RegMask::CHUNK_SIZE;
lrg->Set_All();
goto retry_next_chunk;
}
@ -1509,12 +1574,21 @@ uint PhaseChaitin::Select( ) {
int n_regs = lrg->num_regs();
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
if (n_regs == 1 || !lrg->_fat_proj) {
assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
if (Matcher::supports_scalable_vector()) {
assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
} else {
assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
}
lrg->Clear(); // Clear the mask
lrg->Insert(reg); // Set regmask to match selected reg
// For vectors and pairs, also insert the low bit of the pair
for (int i = 1; i < n_regs; i++)
// We always choose the high bit, then mask the low bits by register size
if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
n_regs = lrg->scalable_reg_slots();
}
for (int i = 1; i < n_regs; i++) {
lrg->Insert(OptoReg::add(reg,-i));
}
lrg->set_mask_size(n_regs);
} else { // Else fatproj
// mask must be equal to fatproj bits, by definition

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -114,7 +114,9 @@ public:
_msize_valid=1;
if (_is_vector) {
assert(!_fat_proj, "sanity");
assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
if (!(_is_scalable && OptoReg::is_stack(_reg))) {
assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
}
} else if (_num_regs == 2 && !_fat_proj) {
assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
}
@ -137,14 +139,37 @@ public:
void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) }
void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
// Number of registers this live range uses when it colors
private:
// Number of registers this live range uses when it colors
uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else
// except _num_regs is kill count for fat_proj
// For scalable register, num_regs may not be the actual physical register size.
// We need to get the actual physical length of scalable register when scalable
// register is spilled. The size of one slot is 32-bit.
uint _scalable_reg_slots; // Actual scalable register length of slots.
// Meaningful only when _is_scalable is true.
public:
int num_regs() const { return _num_regs; }
void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
uint scalable_reg_slots() { return _scalable_reg_slots; }
void set_scalable_reg_slots(uint slots) {
assert(_is_scalable, "scalable register");
assert(slots > 0, "slots of scalable register is not valid");
_scalable_reg_slots = slots;
}
bool is_scalable() {
#ifdef ASSERT
if (_is_scalable) {
// Should only be a vector for now, but it could also be a RegVMask in future.
assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
}
#endif
return _is_scalable;
}
private:
// Number of physical registers this live range uses when it colors
// Architecture and register-set dependent
@ -170,6 +195,8 @@ public:
uint _is_oop:1, // Live-range holds an oop
_is_float:1, // True if in float registers
_is_vector:1, // True if in vector registers
_is_scalable:1, // True if register size is scalable
// e.g. Arm SVE vector/predicate registers.
_was_spilled1:1, // True if prior spilling on def
_was_spilled2:1, // True if twice prior spilling on def
_is_bound:1, // live range starts life with no

View File

@ -88,6 +88,7 @@ Matcher::Matcher()
idealreg2spillmask [Op_RegF] = NULL;
idealreg2spillmask [Op_RegD] = NULL;
idealreg2spillmask [Op_RegP] = NULL;
idealreg2spillmask [Op_VecA] = NULL;
idealreg2spillmask [Op_VecS] = NULL;
idealreg2spillmask [Op_VecD] = NULL;
idealreg2spillmask [Op_VecX] = NULL;
@ -101,6 +102,7 @@ Matcher::Matcher()
idealreg2debugmask [Op_RegF] = NULL;
idealreg2debugmask [Op_RegD] = NULL;
idealreg2debugmask [Op_RegP] = NULL;
idealreg2debugmask [Op_VecA] = NULL;
idealreg2debugmask [Op_VecS] = NULL;
idealreg2debugmask [Op_VecD] = NULL;
idealreg2debugmask [Op_VecX] = NULL;
@ -114,6 +116,7 @@ Matcher::Matcher()
idealreg2mhdebugmask[Op_RegF] = NULL;
idealreg2mhdebugmask[Op_RegD] = NULL;
idealreg2mhdebugmask[Op_RegP] = NULL;
idealreg2mhdebugmask[Op_VecA] = NULL;
idealreg2mhdebugmask[Op_VecS] = NULL;
idealreg2mhdebugmask[Op_VecD] = NULL;
idealreg2mhdebugmask[Op_VecX] = NULL;
@ -427,7 +430,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
return rms;
}
#define NOF_STACK_MASKS (3*6+5)
#define NOF_STACK_MASKS (3*6+6)
// Create the initial stack mask used by values spilling to the stack.
// Disallow any debug info in outgoing argument areas by setting the
@ -463,11 +466,12 @@ void Matcher::init_first_stack_mask() {
idealreg2mhdebugmask[Op_RegD] = &rms[16];
idealreg2mhdebugmask[Op_RegP] = &rms[17];
idealreg2spillmask [Op_VecS] = &rms[18];
idealreg2spillmask [Op_VecD] = &rms[19];
idealreg2spillmask [Op_VecX] = &rms[20];
idealreg2spillmask [Op_VecY] = &rms[21];
idealreg2spillmask [Op_VecZ] = &rms[22];
idealreg2spillmask [Op_VecA] = &rms[18];
idealreg2spillmask [Op_VecS] = &rms[19];
idealreg2spillmask [Op_VecD] = &rms[20];
idealreg2spillmask [Op_VecX] = &rms[21];
idealreg2spillmask [Op_VecY] = &rms[22];
idealreg2spillmask [Op_VecZ] = &rms[23];
OptoReg::Name i;
@ -494,6 +498,7 @@ void Matcher::init_first_stack_mask() {
// Keep spill masks aligned.
aligned_stack_mask.clear_to_pairs();
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
RegMask scalable_stack_mask = aligned_stack_mask;
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
#ifdef _LP64
@ -564,28 +569,48 @@ void Matcher::init_first_stack_mask() {
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
}
if (UseFPUForSpilling) {
// This mask logic assumes that the spill operations are
// symmetric and that the registers involved are the same size.
// On sparc for instance we may have to use 64 bit moves will
// kill 2 registers when used with F0-F31.
idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
if (Matcher::supports_scalable_vector()) {
int k = 1;
OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
// Exclude last input arg stack slots to avoid spilling vector register there,
// otherwise vector spills could stomp over stack slots in caller frame.
for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
scalable_stack_mask.Remove(in);
in = OptoReg::add(in, -1);
}
// For VecA
scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
*idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
} else {
*idealreg2spillmask[Op_VecA] = RegMask::Empty;
}
if (UseFPUForSpilling) {
// This mask logic assumes that the spill operations are
// symmetric and that the registers involved are the same size.
// On sparc for instance we may have to use 64 bit moves will
// kill 2 registers when used with F0-F31.
idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
#ifdef _LP64
idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
#else
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
#ifdef ARM
// ARM has support for moving 64bit values between a pair of
// integer registers and a double register
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
// ARM has support for moving 64bit values between a pair of
// integer registers and a double register
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
#endif
#endif
}
}
// Make up debug masks. Any spill slot plus callee-save (SOE) registers.
// Caller-save (SOC, AS) registers are assumed to be trashable by the various
@ -878,6 +903,7 @@ void Matcher::init_spill_mask( Node *ret ) {
idealreg2regmask[Op_RegF] = regmask_for_ideal_register(Op_RegF, ret);
idealreg2regmask[Op_RegD] = regmask_for_ideal_register(Op_RegD, ret);
idealreg2regmask[Op_RegL] = regmask_for_ideal_register(Op_RegL, ret);
idealreg2regmask[Op_VecA] = regmask_for_ideal_register(Op_VecA, ret);
idealreg2regmask[Op_VecS] = regmask_for_ideal_register(Op_VecS, ret);
idealreg2regmask[Op_VecD] = regmask_for_ideal_register(Op_VecD, ret);
idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret);
@ -1563,7 +1589,6 @@ Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem)
}
}
// Call DFA to match this node, and return
svec->DFA( n->Opcode(), n );
@ -2421,7 +2446,7 @@ bool Matcher::gen_narrow_oop_implicit_null_checks() {
const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
const Type* t = Type::mreg2type[ideal_reg];
if (t == NULL) {
assert(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
return NULL; // not supported
}
Node* fp = ret->in(TypeFunc::FramePtr);
@ -2438,6 +2463,7 @@ const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
case Op_RegD: spill = new LoadDNode(NULL, mem, fp, atp, t, mo); break;
case Op_RegL: spill = new LoadLNode(NULL, mem, fp, atp, t->is_long(), mo); break;
case Op_VecA: // fall-through
case Op_VecS: // fall-through
case Op_VecD: // fall-through
case Op_VecX: // fall-through

View File

@ -338,6 +338,10 @@ public:
Matcher::min_vector_size(bt) <= size);
}
static const bool supports_scalable_vector();
// Actual max scalable vector register length.
static const int scalable_vector_reg_size(const BasicType bt);
// Vector ideal reg
static const uint vector_ideal_reg(int len);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
"RegF",
"RegD",
"RegL",
"RegFlags",
"VecA",
"VecS",
"VecD",
"VecX",
"VecY",
"VecZ",
"RegVMask",
"RegFlags",
"_last_machine_leaf",
#include "classes.hpp"
"_last_class_name",

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -37,11 +37,13 @@ enum Opcodes {
macro(RegF) // Machine float register
macro(RegD) // Machine double register
macro(RegL) // Machine long register
macro(VecA) // Machine vectora register
macro(VecS) // Machine vectors register
macro(VecD) // Machine vectord register
macro(VecX) // Machine vectorx register
macro(VecY) // Machine vectory register
macro(VecZ) // Machine vectorz register
macro(RegVMask) // Vector mask/predicate register
macro(RegFlags) // Machine flags register
_last_machine_leaf, // Split between regular opcodes and machine
#include "classes.hpp"

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -266,9 +266,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
Node *val = skip_copies(n->in(k));
if (val == x) return blk_adjust; // No progress?
int n_regs = RegMask::num_registers(val->ideal_reg());
uint val_idx = _lrg_map.live_range_id(val);
OptoReg::Name val_reg = lrgs(val_idx).reg();
int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
// See if it happens to already be in the correct register!
// (either Phi's direct register, or the common case of the name
@ -305,8 +305,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
}
Node *vv = value[reg];
// For scalable register, number of registers may be inconsistent between
// "val_reg" and "reg". For example, when "val" resides in register
// but "reg" is located in stack.
if (lrgs(val_idx).is_scalable()) {
assert(val->ideal_reg() == Op_VecA, "scalable vector register");
if (OptoReg::is_stack(reg)) {
n_regs = lrgs(val_idx).scalable_reg_slots();
} else {
n_regs = RegMask::SlotsPerVecA;
}
}
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
uint last = (n_regs-1); // Looking for the last part of a set
uint last;
if (lrgs(val_idx).is_scalable()) {
assert(val->ideal_reg() == Op_VecA, "scalable vector register");
// For scalable vector register, regmask is always SlotsPerVecA bits aligned
last = RegMask::SlotsPerVecA - 1;
} else {
last = (n_regs-1); // Looking for the last part of a set
}
if ((reg&last) != last) continue; // Wrong part of a set
if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
}
@ -591,7 +609,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
uint k;
Node *phi = block->get_node(j);
uint pidx = _lrg_map.live_range_id(phi);
OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
OptoReg::Name preg = lrgs(pidx).reg();
// Remove copies remaining on edges. Check for junk phi.
Node *u = NULL;
@ -619,7 +637,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
if( pidx ) {
value.map(preg,phi);
regnd.map(preg,phi);
int n_regs = RegMask::num_registers(phi->ideal_reg());
int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
for (int l = 1; l < n_regs; l++) {
OptoReg::Name preg_lo = OptoReg::add(preg,-l);
value.map(preg_lo,phi);
@ -663,7 +681,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
regnd.map(ureg, def);
// Record other half of doubles
uint def_ideal_reg = def->ideal_reg();
int n_regs = RegMask::num_registers(def_ideal_reg);
int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
for (int l = 1; l < n_regs; l++) {
OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
if (!value[ureg_lo] &&
@ -707,7 +725,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
}
uint n_ideal_reg = n->ideal_reg();
int n_regs = RegMask::num_registers(n_ideal_reg);
int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
if (n_regs == 1) {
// If Node 'n' does not change the value mapped by the register,
// then 'n' is a useless copy. Do not update the register->node

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -24,6 +24,7 @@
#include "precompiled.hpp"
#include "opto/ad.hpp"
#include "opto/chaitin.hpp"
#include "opto/compile.hpp"
#include "opto/matcher.hpp"
#include "opto/node.hpp"
@ -59,30 +60,47 @@ const RegMask RegMask::Empty(
//=============================================================================
bool RegMask::is_vector(uint ireg) {
return (ireg == Op_VecS || ireg == Op_VecD ||
return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
}
int RegMask::num_registers(uint ireg) {
switch(ireg) {
case Op_VecZ:
return 16;
return SlotsPerVecZ;
case Op_VecY:
return 8;
return SlotsPerVecY;
case Op_VecX:
return 4;
return SlotsPerVecX;
case Op_VecD:
return SlotsPerVecD;
case Op_RegD:
case Op_RegL:
#ifdef _LP64
case Op_RegP:
#endif
return 2;
case Op_VecA:
assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
return SlotsPerVecA;
}
// Op_VecS and the rest ideal registers.
return 1;
}
int RegMask::num_registers(uint ireg, LRG &lrg) {
int n_regs = num_registers(ireg);
// assigned is OptoReg which is selected by register allocator
OptoReg::Name assigned = lrg.reg();
assert(OptoReg::is_valid(assigned), "should be valid opto register");
if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
n_regs = lrg.scalable_reg_slots();
}
return n_regs;
}
// Clear out partial bits; leave only bit pairs
void RegMask::clear_to_pairs() {
assert(valid_watermarks(), "sanity");
@ -157,6 +175,16 @@ bool RegMask::is_bound(uint ireg) const {
}
return false;
}
// Check that whether given reg number with size is valid
// for current regmask, where reg is the highest number.
bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
for (int i = 0; i < size; i++) {
if (!Member(reg - i)) {
return false;
}
}
return true;
}
// only indicies of power 2 are accessed, so index 3 is only filled in for storage.
static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
@ -164,8 +192,13 @@ static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x000
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Works also for size 1.
OptoReg::Name RegMask::find_first_set(const int size) const {
assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
if (lrg.is_scalable()) {
// For scalable vector register, regmask is SlotsPerVecA bits aligned.
assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
} else {
assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
}
assert(valid_watermarks(), "sanity");
for (int i = _lwm; i <= _hwm; i++) {
if (_A[i]) { // Found some bits
@ -245,12 +278,16 @@ bool RegMask::is_aligned_sets(const int size) const {
while (bits) { // Check bits for pairing
int bit = bits & -bits; // Extract low bit
// Low bit is not odd means its mis-aligned.
if ((bit & low_bits_mask) == 0) return false;
if ((bit & low_bits_mask) == 0) {
return false;
}
// Do extra work since (bit << size) may overflow.
int hi_bit = bit << (size-1); // high bit
int set = hi_bit + ((hi_bit-1) & ~(bit-1));
// Check for aligned adjacent bits in this set
if ((bits & set) != set) return false;
if ((bits & set) != set) {
return false;
}
bits -= set; // Remove this set
}
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -30,6 +30,8 @@
#include "utilities/count_leading_zeros.hpp"
#include "utilities/count_trailing_zeros.hpp"
class LRG;
//-------------Non-zero bit search methods used by RegMask---------------------
// Find lowest 1, undefined if empty/0
static int find_lowest_bit(uint32_t mask) {
@ -91,11 +93,13 @@ class RegMask {
// requirement is internal to the allocator, and independent of any
// particular platform.
enum { SlotsPerLong = 2,
SlotsPerVecA = 8,
SlotsPerVecS = 1,
SlotsPerVecD = 2,
SlotsPerVecX = 4,
SlotsPerVecY = 8,
SlotsPerVecZ = 16 };
SlotsPerVecZ = 16,
};
// A constructor only used by the ADLC output. All mask fields are filled
// in directly. Calls to this look something like RM(1,2,3,4);
@ -219,10 +223,14 @@ class RegMask {
// Test for a single adjacent set of ideal register's size.
bool is_bound(uint ireg) const;
// Check that whether given reg number with size is valid
// for current regmask, where reg is the highest number.
bool is_valid_reg(OptoReg::Name reg, const int size) const;
// Find the lowest-numbered register set in the mask. Return the
// HIGHEST register number in the set, or BAD if no sets.
// Assert that the mask contains only bit sets.
OptoReg::Name find_first_set(const int size) const;
OptoReg::Name find_first_set(LRG &lrg, const int size) const;
// Clear out partial bits; leave only aligned adjacent bit sets of size.
void clear_to_sets(const int size);
@ -236,6 +244,7 @@ class RegMask {
static bool is_vector(uint ireg);
static int num_registers(uint ireg);
static int num_registers(uint ireg, LRG &lrg);
// Fast overlap test. Non-zero if any registers in common.
int overlap(const RegMask &rm) const {

View File

@ -94,8 +94,11 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
//------------------------------transform_loop---------------------------
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
assert(UseSuperWord, "should be");
// Do vectors exist on this architecture?
if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
// SuperWord only works with power of two vector sizes.
int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
if (vector_width < 2 || !is_power_of_2(vector_width)) {
return;
}
assert(lpt->_head->is_CountedLoop(), "must be");
CountedLoopNode *cl = lpt->_head->as_CountedLoop();

View File

@ -74,6 +74,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
#else // all other
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
@ -646,6 +647,10 @@ void Type::Initialize_shared(Compile* current) {
// get_zero_type() should not happen for T_CONFLICT
_zero_type[T_CONFLICT]= NULL;
if (Matcher::supports_scalable_vector()) {
TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
}
// Vector predefined types, it needs initialized _const_basic_type[].
if (Matcher::vector_size_supported(T_BYTE,4)) {
TypeVect::VECTS = TypeVect::make(T_BYTE,4);
@ -662,6 +667,8 @@ void Type::Initialize_shared(Compile* current) {
if (Matcher::vector_size_supported(T_FLOAT,16)) {
TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
}
mreg2type[Op_VecA] = TypeVect::VECTA;
mreg2type[Op_VecS] = TypeVect::VECTS;
mreg2type[Op_VecD] = TypeVect::VECTD;
mreg2type[Op_VecX] = TypeVect::VECTX;
@ -981,6 +988,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
Bad, // Tuple - handled in v-call
Bad, // Array - handled in v-call
Bad, // VectorA - handled in v-call
Bad, // VectorS - handled in v-call
Bad, // VectorD - handled in v-call
Bad, // VectorX - handled in v-call
@ -1881,7 +1889,6 @@ const TypeTuple *TypeTuple::LONG_PAIR;
const TypeTuple *TypeTuple::INT_CC_PAIR;
const TypeTuple *TypeTuple::LONG_CC_PAIR;
//------------------------------make-------------------------------------------
// Make a TypeTuple from the range of a method signature
const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
@ -2252,6 +2259,7 @@ bool TypeAry::ary_must_be_exact() const {
//==============================TypeVect=======================================
// Convenience common pre-built types.
const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
@ -2262,10 +2270,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
const TypeVect* TypeVect::make(const Type *elem, uint length) {
BasicType elem_bt = elem->array_element_basic_type();
assert(is_java_primitive(elem_bt), "only primitive types in vector");
assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
int size = length * type2aelembytes(elem_bt);
switch (Matcher::vector_ideal_reg(size)) {
case Op_VecA:
return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
case Op_VecS:
return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
case Op_RegL:
@ -2297,7 +2306,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
default: // All else is a mistake
typerr(t);
case VectorA:
case VectorS:
case VectorD:
case VectorX:
@ -2352,6 +2361,8 @@ bool TypeVect::empty(void) const {
#ifndef PRODUCT
void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
switch (base()) {
case VectorA:
st->print("vectora["); break;
case VectorS:
st->print("vectors["); break;
case VectorD:

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -53,6 +53,7 @@ class TypeNarrowKlass;
class TypeAry;
class TypeTuple;
class TypeVect;
class TypeVectA;
class TypeVectS;
class TypeVectD;
class TypeVectX;
@ -87,6 +88,7 @@ public:
Tuple, // Method signature or object layout
Array, // Array types
VectorA, // (Scalable) Vector types for vector length agnostic
VectorS, // 32bit Vector types
VectorD, // 64bit Vector types
VectorX, // 128bit Vector types
@ -757,6 +759,7 @@ public:
virtual const Type *xmeet( const Type *t) const;
virtual const Type *xdual() const; // Compute dual right now.
static const TypeVect *VECTA;
static const TypeVect *VECTS;
static const TypeVect *VECTD;
static const TypeVect *VECTX;
@ -768,6 +771,11 @@ public:
#endif
};
class TypeVectA : public TypeVect {
friend class TypeVect;
TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
};
class TypeVectS : public TypeVect {
friend class TypeVect;
TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
@ -1622,12 +1630,12 @@ inline const TypeAry *Type::isa_ary() const {
}
inline const TypeVect *Type::is_vect() const {
assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
return (TypeVect*)this;
}
inline const TypeVect *Type::isa_vect() const {
return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
}
inline const TypePtr *Type::is_ptr() const {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -818,7 +818,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
(vlen > 1) && is_power_of_2(vlen) &&
Matcher::vector_size_supported(bt, vlen)) {
int vopc = ReductionNode::opcode(opc, bt);
return vopc != opc && Matcher::match_rule_supported(vopc);
return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
}
return false;
}

View File

@ -0,0 +1,128 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
/**
* @test
*
* @requires os.arch == "aarch64" & vm.compiler2.enabled
* @summary Verify VM SVE checking behavior
* @library /test/lib
* @run main/othervm/native compiler.c2.aarch64.TestSVEWithJNI
*
*/
package compiler.c2.aarch64;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import jdk.test.lib.process.ProcessTools;
import jdk.test.lib.process.OutputAnalyzer;
public class TestSVEWithJNI {
static {
System.loadLibrary("TestSVEWithJNI");
}
static final int EXIT_CODE = 99;
// Returns a nonnegative on success, or a negative value on error.
public static native int setVectorLength(int arg);
// Returns a nonnegative value on success, or a negative value on error.
public static native int getVectorLength();
public static final String MSG = "Current Vector Size: ";
public static void testNormal() {
int vlen = getVectorLength();
System.out.println(MSG + vlen);
// Should be fine if no vector length changed.
if (setVectorLength(vlen) < 0) {
throw new Error("Error in setting vector length.");
}
}
public static void testAbort() {
int vlen = getVectorLength();
if (vlen <= 16) {
throw new Error("Error: unsupported vector length.");
}
if (setVectorLength(16) < 0) {
throw new Error("Error: setting vector length failed.");
}
}
public static ProcessBuilder createProcessBuilder(String [] args, String mode) {
List<String> vmopts = new ArrayList<>();
String testjdkPath = System.getProperty("test.jdk");
Collections.addAll(vmopts, "-Dtest.jdk=" + testjdkPath);
Collections.addAll(vmopts, args);
Collections.addAll(vmopts, TestSVEWithJNI.class.getName(), mode);
return ProcessTools.createJavaProcessBuilder(vmopts.toArray(new String[vmopts.size()]));
}
public static void main(String [] args) throws Exception {
if (args.length == 0) {
int vlen = getVectorLength();
if (vlen < 0) {
return;
}
String [][] testOpts = {
{"-Xint", "-XX:UseSVE=1"},
{"-Xcomp", "-XX:UseSVE=1"},
};
ProcessBuilder pb;
OutputAnalyzer output;
for (String [] opts : testOpts) {
pb = createProcessBuilder(opts, "normal");
output = new OutputAnalyzer(pb.start());
output.shouldHaveExitValue(EXIT_CODE);
pb = createProcessBuilder(opts, "abort");
output = new OutputAnalyzer(pb.start());
output.shouldNotHaveExitValue(EXIT_CODE);
output.shouldMatch("(error|Error|ERROR)");
}
// Verify MaxVectorSize
// Any SVE architecture should support 128-bit vector size.
pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=16"}, "normal");
output = new OutputAnalyzer(pb.start());
output.shouldHaveExitValue(EXIT_CODE);
output.shouldContain(MSG + 16);
// An unsupported large vector size value.
pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=512"}, "normal");
output = new OutputAnalyzer(pb.start());
output.shouldHaveExitValue(EXIT_CODE);
output.shouldContain("warning");
} else if (args[0].equals("normal")) {
testNormal();
System.exit(EXIT_CODE);
} else if (args[0].equals("abort")) {
testAbort();
System.exit(EXIT_CODE);
}
}
}

View File

@ -0,0 +1,68 @@
/*
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, Arm Limited. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*
*/
#ifdef __aarch64__
#include <jni.h>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/prctl.h>
#include <unistd.h>
#ifndef PR_SVE_GET_VL
// For old toolchains which do not have SVE related macros defined.
#define PR_SVE_SET_VL 50
#define PR_SVE_GET_VL 51
#endif
int get_current_thread_vl() {
return prctl(PR_SVE_GET_VL);
}
int set_current_thread_vl(unsigned long arg) {
return prctl(PR_SVE_SET_VL, arg);
}
#ifdef __cplusplus
extern "C" {
#endif
JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_setVectorLength
(JNIEnv * env, jclass clz, jint length) {
return set_current_thread_vl(length);
}
JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_getVectorLength
(JNIEnv *env, jclass clz) {
return get_current_thread_vl();
}
#ifdef __cplusplus
}
#endif
#endif