8231441: AArch64: Initial SVE backend support
Co-authored-by: Joshua Zhu <joshua.zhu@arm.com> Co-authored-by: Yang Zhang <yang.zhang@arm.com> Reviewed-by: adinn, pli, ihse, vlivanov, eosterlund
This commit is contained in:
parent
0dadf81e14
commit
9b5a9b6189
@ -129,6 +129,12 @@ ifeq ($(call check-jvm-feature, compiler2), true)
|
||||
$d/os_cpu/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_OS)_$(HOTSPOT_TARGET_CPU_ARCH).ad \
|
||||
)))
|
||||
|
||||
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
|
||||
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
||||
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/$(HOTSPOT_TARGET_CPU_ARCH)_sve.ad \
|
||||
)))
|
||||
endif
|
||||
|
||||
ifeq ($(call check-jvm-feature, shenandoahgc), true)
|
||||
AD_SRC_FILES += $(call uniq, $(wildcard $(foreach d, $(AD_SRC_ROOTS), \
|
||||
$d/cpu/$(HOTSPOT_TARGET_CPU_ARCH)/gc/shenandoah/shenandoah_$(HOTSPOT_TARGET_CPU).ad \
|
||||
|
@ -68,6 +68,49 @@ class GeneralRegisterOrSp(Register):
|
||||
else:
|
||||
return self.astr("r")
|
||||
|
||||
class SVEVectorRegister(FloatRegister):
|
||||
def __str__(self):
|
||||
return self.astr("z")
|
||||
|
||||
class SVEPRegister(Register):
|
||||
def __str__(self):
|
||||
return self.astr("p")
|
||||
|
||||
def generate(self):
|
||||
self.number = random.randint(0, 15)
|
||||
return self
|
||||
|
||||
class SVEGoverningPRegister(Register):
|
||||
def __str__(self):
|
||||
return self.astr("p")
|
||||
def generate(self):
|
||||
self.number = random.randint(0, 7)
|
||||
return self
|
||||
|
||||
class RegVariant(object):
|
||||
def __init__(self, low, high):
|
||||
self.number = random.randint(low, high)
|
||||
|
||||
def astr(self):
|
||||
nameMap = {
|
||||
0: ".b",
|
||||
1: ".h",
|
||||
2: ".s",
|
||||
3: ".d",
|
||||
4: ".q"
|
||||
}
|
||||
return nameMap.get(self.number)
|
||||
|
||||
def cstr(self):
|
||||
nameMap = {
|
||||
0: "__ B",
|
||||
1: "__ H",
|
||||
2: "__ S",
|
||||
3: "__ D",
|
||||
4: "__ Q"
|
||||
}
|
||||
return nameMap.get(self.number)
|
||||
|
||||
class FloatZero(Operand):
|
||||
|
||||
def __str__(self):
|
||||
@ -82,7 +125,10 @@ class OperandFactory:
|
||||
'w' : GeneralRegister,
|
||||
's' : FloatRegister,
|
||||
'd' : FloatRegister,
|
||||
'z' : FloatZero}
|
||||
'z' : FloatZero,
|
||||
'p' : SVEPRegister,
|
||||
'P' : SVEGoverningPRegister,
|
||||
'Z' : SVEVectorRegister}
|
||||
|
||||
@classmethod
|
||||
def create(cls, mode):
|
||||
@ -839,6 +885,100 @@ class FloatInstruction(Instruction):
|
||||
% tuple([Instruction.astr(self)] +
|
||||
[(self.reg[i].astr(self.modes[i])) for i in range(self.numRegs)]))
|
||||
|
||||
class SVEVectorOp(Instruction):
|
||||
def __init__(self, args):
|
||||
name = args[0]
|
||||
regTypes = args[1]
|
||||
regs = []
|
||||
for c in regTypes:
|
||||
regs.append(OperandFactory.create(c).generate())
|
||||
self.reg = regs
|
||||
self.numRegs = len(regs)
|
||||
if regTypes[0] != "p" and regTypes[1] == 'P':
|
||||
self._isPredicated = True
|
||||
self._merge = "/m"
|
||||
else:
|
||||
self._isPredicated = False
|
||||
self._merge =""
|
||||
|
||||
self._bitwiseop = False
|
||||
if name[0] == 'f':
|
||||
self._width = RegVariant(2, 3)
|
||||
elif not self._isPredicated and (name == "and" or name == "eor" or name == "orr"):
|
||||
self._width = RegVariant(3, 3)
|
||||
self._bitwiseop = True
|
||||
else:
|
||||
self._width = RegVariant(0, 3)
|
||||
if len(args) > 2:
|
||||
self._dnm = args[2]
|
||||
else:
|
||||
self._dnm = None
|
||||
Instruction.__init__(self, name)
|
||||
|
||||
def cstr(self):
|
||||
formatStr = "%s%s" + ''.join([", %s" for i in range(0, self.numRegs)] + [");"])
|
||||
if self._bitwiseop:
|
||||
width = []
|
||||
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)] + [");"])
|
||||
else:
|
||||
width = [self._width.cstr()]
|
||||
return (formatStr
|
||||
% tuple(["__ sve_" + self._name + "("] +
|
||||
[str(self.reg[0])] +
|
||||
width +
|
||||
[str(self.reg[i]) for i in range(1, self.numRegs)]))
|
||||
def astr(self):
|
||||
formatStr = "%s%s" + ''.join([", %s" for i in range(1, self.numRegs)])
|
||||
if self._dnm == 'dn':
|
||||
formatStr += ", %s"
|
||||
dnReg = [str(self.reg[0]) + self._width.astr()]
|
||||
else:
|
||||
dnReg = []
|
||||
|
||||
if self._isPredicated:
|
||||
restRegs = [str(self.reg[1]) + self._merge] + dnReg + [str(self.reg[i]) + self._width.astr() for i in range(2, self.numRegs)]
|
||||
else:
|
||||
restRegs = dnReg + [str(self.reg[i]) + self._width.astr() for i in range(1, self.numRegs)]
|
||||
return (formatStr
|
||||
% tuple([Instruction.astr(self)] +
|
||||
[str(self.reg[0]) + self._width.astr()] +
|
||||
restRegs))
|
||||
def generate(self):
|
||||
return self
|
||||
|
||||
class SVEReductionOp(Instruction):
|
||||
def __init__(self, args):
|
||||
name = args[0]
|
||||
lowRegType = args[1]
|
||||
self.reg = []
|
||||
Instruction.__init__(self, name)
|
||||
self.reg.append(OperandFactory.create('s').generate())
|
||||
self.reg.append(OperandFactory.create('P').generate())
|
||||
self.reg.append(OperandFactory.create('Z').generate())
|
||||
self._width = RegVariant(lowRegType, 3)
|
||||
def cstr(self):
|
||||
return "__ sve_%s(%s, %s, %s, %s);" % (self.name(),
|
||||
str(self.reg[0]),
|
||||
self._width.cstr(),
|
||||
str(self.reg[1]),
|
||||
str(self.reg[2]))
|
||||
def astr(self):
|
||||
if self.name() == "uaddv":
|
||||
dstRegName = "d" + str(self.reg[0].number)
|
||||
else:
|
||||
dstRegName = self._width.astr()[1] + str(self.reg[0].number)
|
||||
formatStr = "%s %s, %s, %s"
|
||||
if self.name() == "fadda":
|
||||
formatStr += ", %s"
|
||||
moreReg = [dstRegName]
|
||||
else:
|
||||
moreReg = []
|
||||
return formatStr % tuple([self.name()] +
|
||||
[dstRegName] +
|
||||
[str(self.reg[1])] +
|
||||
moreReg +
|
||||
[str(self.reg[2]) + self._width.astr()])
|
||||
|
||||
class LdStSIMDOp(Instruction):
|
||||
def __init__(self, args):
|
||||
self._name, self.regnum, self.arrangement, self.addresskind = args
|
||||
@ -1160,7 +1300,42 @@ generate(SpecialCases, [["ccmn", "__ ccmn(zr, zr, 3u, Assembler::LE);",
|
||||
["mov", "__ mov(v1, __ T2S, 1, zr);", "mov\tv1.s[1], wzr"],
|
||||
["mov", "__ mov(v1, __ T4H, 2, zr);", "mov\tv1.h[2], wzr"],
|
||||
["mov", "__ mov(v1, __ T8B, 3, zr);", "mov\tv1.b[3], wzr"],
|
||||
["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"]])
|
||||
["ld1", "__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0)));", "ld1\t{v31.2d, v0.2d}, [x1], x0"],
|
||||
# SVE instructions
|
||||
["cpy", "__ sve_cpy(z0, __ S, p0, v1);", "mov\tz0.s, p0/m, s1"],
|
||||
["inc", "__ sve_inc(r0, __ S);", "incw\tx0"],
|
||||
["dec", "__ sve_dec(r1, __ H);", "dech\tx1"],
|
||||
["lsl", "__ sve_lsl(z0, __ B, z1, 7);", "lsl\tz0.b, z1.b, #7"],
|
||||
["lsl", "__ sve_lsl(z21, __ H, z1, 15);", "lsl\tz21.h, z1.h, #15"],
|
||||
["lsl", "__ sve_lsl(z0, __ S, z1, 31);", "lsl\tz0.s, z1.s, #31"],
|
||||
["lsl", "__ sve_lsl(z0, __ D, z1, 63);", "lsl\tz0.d, z1.d, #63"],
|
||||
["lsr", "__ sve_lsr(z0, __ B, z1, 7);", "lsr\tz0.b, z1.b, #7"],
|
||||
["asr", "__ sve_asr(z0, __ H, z11, 15);", "asr\tz0.h, z11.h, #15"],
|
||||
["lsr", "__ sve_lsr(z30, __ S, z1, 31);", "lsr\tz30.s, z1.s, #31"],
|
||||
["asr", "__ sve_asr(z0, __ D, z1, 63);", "asr\tz0.d, z1.d, #63"],
|
||||
["addvl", "__ sve_addvl(sp, r0, 31);", "addvl\tsp, x0, #31"],
|
||||
["addpl", "__ sve_addpl(r1, sp, -32);", "addpl\tx1, sp, -32"],
|
||||
["cntp", "__ sve_cntp(r8, __ B, p0, p1);", "cntp\tx8, p0, p1.b"],
|
||||
["dup", "__ sve_dup(z0, __ B, 127);", "dup\tz0.b, 127"],
|
||||
["dup", "__ sve_dup(z1, __ H, -128);", "dup\tz1.h, -128"],
|
||||
["dup", "__ sve_dup(z2, __ S, 32512);", "dup\tz2.s, 32512"],
|
||||
["dup", "__ sve_dup(z7, __ D, -32768);", "dup\tz7.d, -32768"],
|
||||
["ld1b", "__ sve_ld1b(z0, __ B, p0, Address(sp));", "ld1b\t{z0.b}, p0/z, [sp]"],
|
||||
["ld1h", "__ sve_ld1h(z10, __ H, p1, Address(sp, -8));", "ld1h\t{z10.h}, p1/z, [sp, #-8, MUL VL]"],
|
||||
["ld1w", "__ sve_ld1w(z20, __ S, p2, Address(r0, 7));", "ld1w\t{z20.s}, p2/z, [x0, #7, MUL VL]"],
|
||||
["ld1b", "__ sve_ld1b(z30, __ B, p3, Address(sp, r8));", "ld1b\t{z30.b}, p3/z, [sp, x8]"],
|
||||
["ld1w", "__ sve_ld1w(z0, __ S, p4, Address(sp, r28));", "ld1w\t{z0.s}, p4/z, [sp, x28, LSL #2]"],
|
||||
["ld1d", "__ sve_ld1d(z11, __ D, p5, Address(r0, r1));", "ld1d\t{z11.d}, p5/z, [x0, x1, LSL #3]"],
|
||||
["st1b", "__ sve_st1b(z22, __ B, p6, Address(sp));", "st1b\t{z22.b}, p6, [sp]"],
|
||||
["st1b", "__ sve_st1b(z31, __ B, p7, Address(sp, -8));", "st1b\t{z31.b}, p7, [sp, #-8, MUL VL]"],
|
||||
["st1w", "__ sve_st1w(z0, __ S, p1, Address(r0, 7));", "st1w\t{z0.s}, p1, [x0, #7, MUL VL]"],
|
||||
["st1b", "__ sve_st1b(z0, __ B, p2, Address(sp, r1));", "st1b\t{z0.b}, p2, [sp, x1]"],
|
||||
["st1h", "__ sve_st1h(z0, __ H, p3, Address(sp, r8));", "st1h\t{z0.h}, p3, [sp, x8, LSL #1]"],
|
||||
["st1d", "__ sve_st1d(z0, __ D, p4, Address(r0, r18));", "st1d\t{z0.d}, p4, [x0, x18, LSL #3]"],
|
||||
["ldr", "__ sve_ldr(z0, Address(sp));", "ldr\tz0, [sp]"],
|
||||
["ldr", "__ sve_ldr(z31, Address(sp, -256));", "ldr\tz31, [sp, #-256, MUL VL]"],
|
||||
["str", "__ sve_str(z8, Address(r8, 255));", "str\tz8, [x8, #255, MUL VL]"],
|
||||
])
|
||||
|
||||
print "\n// FloatImmediateOp"
|
||||
for float in ("2.0", "2.125", "4.0", "4.25", "8.0", "8.5", "16.0", "17.0", "0.125",
|
||||
@ -1185,6 +1360,49 @@ for size in ("x", "w"):
|
||||
["ldumin", "ldumin", size, suffix],
|
||||
["ldumax", "ldumax", size, suffix]]);
|
||||
|
||||
generate(SVEVectorOp, [["add", "ZZZ"],
|
||||
["sub", "ZZZ"],
|
||||
["fadd", "ZZZ"],
|
||||
["fmul", "ZZZ"],
|
||||
["fsub", "ZZZ"],
|
||||
["abs", "ZPZ"],
|
||||
["add", "ZPZ", "dn"],
|
||||
["asr", "ZPZ", "dn"],
|
||||
["cnt", "ZPZ"],
|
||||
["lsl", "ZPZ", "dn"],
|
||||
["lsr", "ZPZ", "dn"],
|
||||
["mul", "ZPZ", "dn"],
|
||||
["neg", "ZPZ"],
|
||||
["not", "ZPZ"],
|
||||
["smax", "ZPZ", "dn"],
|
||||
["smin", "ZPZ", "dn"],
|
||||
["sub", "ZPZ", "dn"],
|
||||
["fabs", "ZPZ"],
|
||||
["fadd", "ZPZ", "dn"],
|
||||
["fdiv", "ZPZ", "dn"],
|
||||
["fmax", "ZPZ", "dn"],
|
||||
["fmin", "ZPZ", "dn"],
|
||||
["fmul", "ZPZ", "dn"],
|
||||
["fneg", "ZPZ"],
|
||||
["frintm", "ZPZ"],
|
||||
["frintn", "ZPZ"],
|
||||
["frintp", "ZPZ"],
|
||||
["fsqrt", "ZPZ"],
|
||||
["fsub", "ZPZ", "dn"],
|
||||
["fmla", "ZPZZ"],
|
||||
["fmls", "ZPZZ"],
|
||||
["fnmla", "ZPZZ"],
|
||||
["fnmls", "ZPZZ"],
|
||||
["mla", "ZPZZ"],
|
||||
["mls", "ZPZZ"],
|
||||
["and", "ZZZ"],
|
||||
["eor", "ZZZ"],
|
||||
["orr", "ZZZ"],
|
||||
])
|
||||
|
||||
generate(SVEReductionOp, [["andv", 0], ["orv", 0], ["eorv", 0], ["smaxv", 0], ["sminv", 0],
|
||||
["fminv", 2], ["fmaxv", 2], ["fadda", 2], ["uaddv", 0]])
|
||||
|
||||
print "\n __ bind(forth);"
|
||||
outfile.write("forth:\n")
|
||||
|
||||
@ -1193,8 +1411,8 @@ outfile.close()
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
# compile for 8.1 and sha2 because of lse atomics and sha512 crypto extension.
|
||||
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2", "aarch64ops.s", "-o", "aarch64ops.o"])
|
||||
# compile for sve with 8.1 and sha2 because of lse atomics and sha512 crypto extension.
|
||||
subprocess.check_call([AARCH64_AS, "-march=armv8.1-a+sha2+sve", "aarch64ops.s", "-o", "aarch64ops.o"])
|
||||
|
||||
print
|
||||
print "/*",
|
||||
|
File diff suppressed because it is too large
Load Diff
1637
src/hotspot/cpu/aarch64/aarch64_sve.ad
Normal file
1637
src/hotspot/cpu/aarch64/aarch64_sve.ad
Normal file
File diff suppressed because it is too large
Load Diff
767
src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Normal file
767
src/hotspot/cpu/aarch64/aarch64_sve_ad.m4
Normal file
@ -0,0 +1,767 @@
|
||||
//
|
||||
// Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2020, Arm Limited. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
// under the terms of the GNU General Public License version 2 only, as
|
||||
// published by the Free Software Foundation.
|
||||
//
|
||||
// This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
// version 2 for more details (a copy is included in the LICENSE file that
|
||||
// accompanied this code).
|
||||
//
|
||||
// You should have received a copy of the GNU General Public License version
|
||||
// 2 along with this work; if not, write to the Free Software Foundation,
|
||||
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
//
|
||||
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
// or visit www.oracle.com if you need additional information or have any
|
||||
// questions.
|
||||
//
|
||||
//
|
||||
|
||||
dnl Generate the warning
|
||||
// This file is automatically generated by running "m4 aarch64_sve_ad.m4". Do not edit ----
|
||||
dnl
|
||||
|
||||
// AArch64 SVE Architecture Description File
|
||||
|
||||
dnl
|
||||
dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET($1, $2, $3 )
|
||||
dnl OPERAND_VMEMORYA_IMMEDIATE_OFFSET(imm_type_abbr, imm_type, imm_len)
|
||||
define(`OPERAND_VMEMORYA_IMMEDIATE_OFFSET', `
|
||||
operand vmemA_imm$1Offset$3()
|
||||
%{
|
||||
predicate(Address::offset_ok_for_sve_immed(n->get_$2(), $3,
|
||||
Matcher::scalable_vector_reg_size(T_BYTE)));
|
||||
match(Con$1);
|
||||
|
||||
op_cost(0);
|
||||
format %{ %}
|
||||
interface(CONST_INTER);
|
||||
%}')
|
||||
dnl
|
||||
// 4 bit signed offset -- for predicated load/store
|
||||
OPERAND_VMEMORYA_IMMEDIATE_OFFSET(I, int, 4)
|
||||
OPERAND_VMEMORYA_IMMEDIATE_OFFSET(L, long, 4)
|
||||
dnl
|
||||
dnl OPERAND_VMEMORYA_INDIRECT_OFFSET($1, $2 )
|
||||
dnl OPERAND_VMEMORYA_INDIRECT_OFFSET(imm_type_abbr, imm_len)
|
||||
define(`OPERAND_VMEMORYA_INDIRECT_OFFSET', `
|
||||
operand vmemA_indOff$1$2(iRegP reg, vmemA_imm$1Offset$2 off)
|
||||
%{
|
||||
constraint(ALLOC_IN_RC(ptr_reg));
|
||||
match(AddP reg off);
|
||||
op_cost(0);
|
||||
format %{ "[$reg, $off, MUL VL]" %}
|
||||
interface(MEMORY_INTER) %{
|
||||
base($reg);
|
||||
`index'(0xffffffff);
|
||||
scale(0x0);
|
||||
disp($off);
|
||||
%}
|
||||
%}')
|
||||
dnl
|
||||
OPERAND_VMEMORYA_INDIRECT_OFFSET(I, 4)
|
||||
OPERAND_VMEMORYA_INDIRECT_OFFSET(L, 4)
|
||||
|
||||
opclass vmemA(indirect, vmemA_indOffI4, vmemA_indOffL4);
|
||||
|
||||
source_hpp %{
|
||||
bool op_sve_supported(int opcode);
|
||||
%}
|
||||
|
||||
source %{
|
||||
|
||||
static inline BasicType vector_element_basic_type(const MachNode* n) {
|
||||
const TypeVect* vt = n->bottom_type()->is_vect();
|
||||
return vt->element_basic_type();
|
||||
}
|
||||
|
||||
static inline BasicType vector_element_basic_type(const MachNode* use, const MachOper* opnd) {
|
||||
int def_idx = use->operand_index(opnd);
|
||||
Node* def = use->in(def_idx);
|
||||
const TypeVect* vt = def->bottom_type()->is_vect();
|
||||
return vt->element_basic_type();
|
||||
}
|
||||
|
||||
typedef void (C2_MacroAssembler::* sve_mem_insn_predicate)(FloatRegister Rt, Assembler::SIMD_RegVariant T,
|
||||
PRegister Pg, const Address &adr);
|
||||
|
||||
// Predicated load/store, with optional ptrue to all elements of given predicate register.
|
||||
static void loadStoreA_predicate(C2_MacroAssembler masm, bool is_store,
|
||||
FloatRegister reg, PRegister pg, BasicType bt,
|
||||
int opcode, Register base, int index, int size, int disp) {
|
||||
sve_mem_insn_predicate insn;
|
||||
Assembler::SIMD_RegVariant type;
|
||||
int esize = type2aelembytes(bt);
|
||||
if (index == -1) {
|
||||
assert(size == 0, "unsupported address mode: scale size = %d", size);
|
||||
switch(esize) {
|
||||
case 1:
|
||||
insn = is_store ? &C2_MacroAssembler::sve_st1b : &C2_MacroAssembler::sve_ld1b;
|
||||
type = Assembler::B;
|
||||
break;
|
||||
case 2:
|
||||
insn = is_store ? &C2_MacroAssembler::sve_st1h : &C2_MacroAssembler::sve_ld1h;
|
||||
type = Assembler::H;
|
||||
break;
|
||||
case 4:
|
||||
insn = is_store ? &C2_MacroAssembler::sve_st1w : &C2_MacroAssembler::sve_ld1w;
|
||||
type = Assembler::S;
|
||||
break;
|
||||
case 8:
|
||||
insn = is_store ? &C2_MacroAssembler::sve_st1d : &C2_MacroAssembler::sve_ld1d;
|
||||
type = Assembler::D;
|
||||
break;
|
||||
default:
|
||||
assert(false, "unsupported");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
(masm.*insn)(reg, type, pg, Address(base, disp / Matcher::scalable_vector_reg_size(T_BYTE)));
|
||||
} else {
|
||||
assert(false, "unimplemented");
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
bool op_sve_supported(int opcode) {
|
||||
switch (opcode) {
|
||||
case Op_MulAddVS2VI:
|
||||
// No multiply reduction instructions
|
||||
case Op_MulReductionVD:
|
||||
case Op_MulReductionVF:
|
||||
case Op_MulReductionVI:
|
||||
case Op_MulReductionVL:
|
||||
// Others
|
||||
case Op_Extract:
|
||||
case Op_ExtractB:
|
||||
case Op_ExtractC:
|
||||
case Op_ExtractD:
|
||||
case Op_ExtractF:
|
||||
case Op_ExtractI:
|
||||
case Op_ExtractL:
|
||||
case Op_ExtractS:
|
||||
case Op_ExtractUB:
|
||||
return false;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
%}
|
||||
|
||||
definitions %{
|
||||
int_def SVE_COST (200, 200);
|
||||
%}
|
||||
|
||||
|
||||
dnl
|
||||
dnl ELEMENT_SHORT_CHART($1, $2)
|
||||
dnl ELEMENT_SHORT_CHART(etype, node)
|
||||
define(`ELEMENT_SHORT_CHAR',`ifelse(`$1', `T_SHORT',
|
||||
`($2->bottom_type()->is_vect()->element_basic_type() == T_SHORT ||
|
||||
($2->bottom_type()->is_vect()->element_basic_type() == T_CHAR))',
|
||||
`($2->bottom_type()->is_vect()->element_basic_type() == $1)')')
|
||||
dnl
|
||||
|
||||
// All SVE instructions
|
||||
|
||||
// vector load/store
|
||||
|
||||
// Use predicated vector load/store
|
||||
instruct loadV(vReg dst, vmemA mem) %{
|
||||
predicate(UseSVE > 0 && n->as_LoadVector()->memory_size() >= 16);
|
||||
match(Set dst (LoadVector mem));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_ldr $dst, $mem\t # vector (sve)" %}
|
||||
ins_encode %{
|
||||
FloatRegister dst_reg = as_FloatRegister($dst$$reg);
|
||||
loadStoreA_predicate(C2_MacroAssembler(&cbuf), false, dst_reg, ptrue,
|
||||
vector_element_basic_type(this), $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
instruct storeV(vReg src, vmemA mem) %{
|
||||
predicate(UseSVE > 0 && n->as_StoreVector()->memory_size() >= 16);
|
||||
match(Set mem (StoreVector mem src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_str $mem, $src\t # vector (sve)" %}
|
||||
ins_encode %{
|
||||
FloatRegister src_reg = as_FloatRegister($src$$reg);
|
||||
loadStoreA_predicate(C2_MacroAssembler(&cbuf), true, src_reg, ptrue,
|
||||
vector_element_basic_type(this, $src), $mem->opcode(),
|
||||
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
dnl
|
||||
dnl UNARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, %6 )
|
||||
dnl UNARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
|
||||
define(`UNARY_OP_TRUE_PREDICATE_ETYPE', `
|
||||
instruct $1(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == $3);
|
||||
match(Set dst ($2 src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$6 $dst, $src\t# vector (sve) ($4)" %}
|
||||
ins_encode %{
|
||||
__ $6(as_FloatRegister($dst$$reg), __ $4,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
|
||||
// vector abs
|
||||
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsB, AbsVB, T_BYTE, B, 16, sve_abs)
|
||||
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsS, AbsVS, T_SHORT, H, 8, sve_abs)
|
||||
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsI, AbsVI, T_INT, S, 4, sve_abs)
|
||||
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsL, AbsVL, T_LONG, D, 2, sve_abs)
|
||||
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsF, AbsVF, T_FLOAT, S, 4, sve_fabs)
|
||||
UNARY_OP_TRUE_PREDICATE_ETYPE(vabsD, AbsVD, T_DOUBLE, D, 2, sve_fabs)
|
||||
dnl
|
||||
dnl BINARY_OP_UNPREDICATED($1, $2 $3, $4 $5 )
|
||||
dnl BINARY_OP_UNPREDICATED(insn_name, op_name, size, min_vec_len, insn)
|
||||
define(`BINARY_OP_UNPREDICATED', `
|
||||
instruct $1(vReg dst, vReg src1, vReg src2) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
||||
match(Set dst ($2 src1 src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$5 $dst, $src1, $src2\t # vector (sve) ($3)" %}
|
||||
ins_encode %{
|
||||
__ $5(as_FloatRegister($dst$$reg), __ $3,
|
||||
as_FloatRegister($src1$$reg),
|
||||
as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
|
||||
// vector add
|
||||
BINARY_OP_UNPREDICATED(vaddB, AddVB, B, 16, sve_add)
|
||||
BINARY_OP_UNPREDICATED(vaddS, AddVS, H, 8, sve_add)
|
||||
BINARY_OP_UNPREDICATED(vaddI, AddVI, S, 4, sve_add)
|
||||
BINARY_OP_UNPREDICATED(vaddL, AddVL, D, 2, sve_add)
|
||||
BINARY_OP_UNPREDICATED(vaddF, AddVF, S, 4, sve_fadd)
|
||||
BINARY_OP_UNPREDICATED(vaddD, AddVD, D, 2, sve_fadd)
|
||||
dnl
|
||||
dnl BINARY_OP_UNSIZED($1, $2, $3, $4 )
|
||||
dnl BINARY_OP_UNSIZED(insn_name, op_name, min_vec_len, insn)
|
||||
define(`BINARY_OP_UNSIZED', `
|
||||
instruct $1(vReg dst, vReg src1, vReg src2) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $3);
|
||||
match(Set dst ($2 src1 src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$4 $dst, $src1, $src2\t# vector (sve)" %}
|
||||
ins_encode %{
|
||||
__ $4(as_FloatRegister($dst$$reg),
|
||||
as_FloatRegister($src1$$reg),
|
||||
as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
|
||||
// vector and
|
||||
BINARY_OP_UNSIZED(vand, AndV, 16, sve_and)
|
||||
|
||||
// vector or
|
||||
BINARY_OP_UNSIZED(vor, OrV, 16, sve_orr)
|
||||
|
||||
// vector xor
|
||||
BINARY_OP_UNSIZED(vxor, XorV, 16, sve_eor)
|
||||
dnl
|
||||
dnl VDIVF($1, $2 , $3 )
|
||||
dnl VDIVF(name_suffix, size, min_vec_len)
|
||||
define(`VDIVF', `
|
||||
instruct vdiv$1(vReg dst_src1, vReg src2) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
|
||||
match(Set dst_src1 (DivV$1 dst_src1 src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_fdiv $dst_src1, $dst_src1, $src2\t# vector (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_fdiv(as_FloatRegister($dst_src1$$reg), __ $2,
|
||||
ptrue, as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
|
||||
// vector float div
|
||||
VDIVF(F, S, 4)
|
||||
VDIVF(D, D, 2)
|
||||
|
||||
dnl
|
||||
dnl BINARY_OP_TRUE_PREDICATE_ETYPE($1, $2, $3, $4, $5, $6 )
|
||||
dnl BINARY_OP_TRUE_PREDICATE_ETYPE(insn_name, op_name, element_type, size, min_vec_len, insn)
|
||||
define(`BINARY_OP_TRUE_PREDICATE_ETYPE', `
|
||||
instruct $1(vReg dst_src1, vReg src2) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5 &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == $3);
|
||||
match(Set dst_src1 ($2 dst_src1 src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$6 $dst_src1, $dst_src1, $src2\t # vector (sve) ($4)" %}
|
||||
ins_encode %{
|
||||
__ $6(as_FloatRegister($dst_src1$$reg), __ $4,
|
||||
ptrue, as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector max
|
||||
BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxF, MaxV, T_FLOAT, S, 4, sve_fmax)
|
||||
BINARY_OP_TRUE_PREDICATE_ETYPE(vmaxD, MaxV, T_DOUBLE, D, 2, sve_fmax)
|
||||
BINARY_OP_TRUE_PREDICATE_ETYPE(vminF, MinV, T_FLOAT, S, 4, sve_fmin)
|
||||
BINARY_OP_TRUE_PREDICATE_ETYPE(vminD, MinV, T_DOUBLE, D, 2, sve_fmin)
|
||||
|
||||
dnl
|
||||
dnl VFMLA($1 $2 $3 )
|
||||
dnl VFMLA(name_suffix, size, min_vec_len)
|
||||
define(`VFMLA', `
|
||||
// dst_src1 = dst_src1 + src2 * src3
|
||||
instruct vfmla$1(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
||||
match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 src3)));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_fmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_fmla(as_FloatRegister($dst_src1$$reg), __ $2,
|
||||
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector fmla
|
||||
VFMLA(F, S, 4)
|
||||
VFMLA(D, D, 2)
|
||||
|
||||
dnl
|
||||
dnl VFMLS($1 $2 $3 )
|
||||
dnl VFMLS(name_suffix, size, min_vec_len)
|
||||
define(`VFMLS', `
|
||||
// dst_src1 = dst_src1 + -src2 * src3
|
||||
// dst_src1 = dst_src1 + src2 * -src3
|
||||
instruct vfmls$1(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
||||
match(Set dst_src1 (FmaV$1 dst_src1 (Binary (NegV$1 src2) src3)));
|
||||
match(Set dst_src1 (FmaV$1 dst_src1 (Binary src2 (NegV$1 src3))));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_fmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_fmls(as_FloatRegister($dst_src1$$reg), __ $2,
|
||||
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector fmls
|
||||
VFMLS(F, S, 4)
|
||||
VFMLS(D, D, 2)
|
||||
|
||||
dnl
|
||||
dnl VFNMLA($1 $2 $3 )
|
||||
dnl VFNMLA(name_suffix, size, min_vec_len)
|
||||
define(`VFNMLA', `
|
||||
// dst_src1 = -dst_src1 + -src2 * src3
|
||||
// dst_src1 = -dst_src1 + src2 * -src3
|
||||
instruct vfnmla$1(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
||||
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary (NegV$1 src2) src3)));
|
||||
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 (NegV$1 src3))));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_fnmla $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_fnmla(as_FloatRegister($dst_src1$$reg), __ $2,
|
||||
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector fnmla
|
||||
VFNMLA(F, S, 4)
|
||||
VFNMLA(D, D, 2)
|
||||
|
||||
dnl
|
||||
dnl VFNMLS($1 $2 $3 )
|
||||
dnl VFNMLS(name_suffix, size, min_vec_len)
|
||||
define(`VFNMLS', `
|
||||
// dst_src1 = -dst_src1 + src2 * src3
|
||||
instruct vfnmls$1(vReg dst_src1, vReg src2, vReg src3) %{
|
||||
predicate(UseFMA && UseSVE > 0 && n->as_Vector()->length() >= $3);
|
||||
match(Set dst_src1 (FmaV$1 (NegV$1 dst_src1) (Binary src2 src3)));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_fnmls $dst_src1, $src2, $src3\t # vector (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_fnmls(as_FloatRegister($dst_src1$$reg), __ $2,
|
||||
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector fnmls
|
||||
VFNMLS(F, S, 4)
|
||||
VFNMLS(D, D, 2)
|
||||
|
||||
dnl
|
||||
dnl VMLA($1 $2 $3 )
|
||||
dnl VMLA(name_suffix, size, min_vec_len)
|
||||
define(`VMLA', `
|
||||
// dst_src1 = dst_src1 + src2 * src3
|
||||
instruct vmla$1(vReg dst_src1, vReg src2, vReg src3)
|
||||
%{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
|
||||
match(Set dst_src1 (AddV$1 dst_src1 (MulV$1 src2 src3)));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_mla $dst_src1, src2, src3\t # vector (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_mla(as_FloatRegister($dst_src1$$reg), __ $2,
|
||||
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector mla
|
||||
VMLA(B, B, 16)
|
||||
VMLA(S, H, 8)
|
||||
VMLA(I, S, 4)
|
||||
VMLA(L, D, 2)
|
||||
|
||||
dnl
|
||||
dnl VMLS($1 $2 $3 )
|
||||
dnl VMLS(name_suffix, size, min_vec_len)
|
||||
define(`VMLS', `
|
||||
// dst_src1 = dst_src1 - src2 * src3
|
||||
instruct vmls$1(vReg dst_src1, vReg src2, vReg src3)
|
||||
%{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3);
|
||||
match(Set dst_src1 (SubV$1 dst_src1 (MulV$1 src2 src3)));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_mls $dst_src1, src2, src3\t # vector (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_mls(as_FloatRegister($dst_src1$$reg), __ $2,
|
||||
ptrue, as_FloatRegister($src2$$reg), as_FloatRegister($src3$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector mls
|
||||
VMLS(B, B, 16)
|
||||
VMLS(S, H, 8)
|
||||
VMLS(I, S, 4)
|
||||
VMLS(L, D, 2)
|
||||
|
||||
dnl
|
||||
dnl BINARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
|
||||
dnl BINARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
|
||||
define(`BINARY_OP_TRUE_PREDICATE', `
|
||||
instruct $1(vReg dst_src1, vReg src2) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
||||
match(Set dst_src1 ($2 dst_src1 src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$5 $dst_src1, $dst_src1, $src2\t # vector (sve) ($3)" %}
|
||||
ins_encode %{
|
||||
__ $5(as_FloatRegister($dst_src1$$reg), __ $3,
|
||||
ptrue, as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
|
||||
// vector mul
|
||||
BINARY_OP_TRUE_PREDICATE(vmulB, MulVB, B, 16, sve_mul)
|
||||
BINARY_OP_TRUE_PREDICATE(vmulS, MulVS, H, 8, sve_mul)
|
||||
BINARY_OP_TRUE_PREDICATE(vmulI, MulVI, S, 4, sve_mul)
|
||||
BINARY_OP_TRUE_PREDICATE(vmulL, MulVL, D, 2, sve_mul)
|
||||
BINARY_OP_UNPREDICATED(vmulF, MulVF, S, 4, sve_fmul)
|
||||
BINARY_OP_UNPREDICATED(vmulD, MulVD, D, 2, sve_fmul)
|
||||
|
||||
dnl
|
||||
dnl UNARY_OP_TRUE_PREDICATE($1, $2, $3, $4, $5 )
|
||||
dnl UNARY_OP_TRUE_PREDICATE(insn_name, op_name, size, min_vec_bytes, insn)
|
||||
define(`UNARY_OP_TRUE_PREDICATE', `
|
||||
instruct $1(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length_in_bytes() >= $4);
|
||||
match(Set dst ($2 src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$5 $dst, $src\t# vector (sve) ($3)" %}
|
||||
ins_encode %{
|
||||
__ $5(as_FloatRegister($dst$$reg), __ $3,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector fneg
|
||||
UNARY_OP_TRUE_PREDICATE(vnegF, NegVF, S, 16, sve_fneg)
|
||||
UNARY_OP_TRUE_PREDICATE(vnegD, NegVD, D, 16, sve_fneg)
|
||||
|
||||
// popcount vector
|
||||
|
||||
instruct vpopcountI(vReg dst, vReg src) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= 4);
|
||||
match(Set dst (PopCountVI src));
|
||||
format %{ "sve_cnt $dst, $src\t# vector (sve) (S)\n\t" %}
|
||||
ins_encode %{
|
||||
__ sve_cnt(as_FloatRegister($dst$$reg), __ S, ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
|
||||
dnl
|
||||
dnl REDUCE_ADD($1, $2, $3, $4, $5, $6, $7 )
|
||||
dnl REDUCE_ADD(insn_name, op_name, reg_dst, reg_src, size, elem_type, insn1)
|
||||
define(`REDUCE_ADD', `
|
||||
instruct $1($3 dst, $4 src1, vReg src2, vRegD tmp) %{
|
||||
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16 &&
|
||||
ELEMENT_SHORT_CHAR($6, n->in(2)));
|
||||
match(Set dst ($2 src1 src2));
|
||||
effect(TEMP_DEF dst, TEMP tmp);
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_uaddv $tmp, $src2\t# vector (sve) ($5)\n\t"
|
||||
"umov $dst, $tmp, $5, 0\n\t"
|
||||
"$7 $dst, $dst, $src1\t # add reduction $5" %}
|
||||
ins_encode %{
|
||||
__ sve_uaddv(as_FloatRegister($tmp$$reg), __ $5,
|
||||
ptrue, as_FloatRegister($src2$$reg));
|
||||
__ umov($dst$$Register, as_FloatRegister($tmp$$reg), __ $5, 0);
|
||||
__ $7($dst$$Register, $dst$$Register, $src1$$Register);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl REDUCE_ADDF($1, $2, $3, $4 )
|
||||
dnl REDUCE_ADDF(insn_name, op_name, reg_dst, size)
|
||||
define(`REDUCE_ADDF', `
|
||||
instruct $1($3 src1_dst, vReg src2) %{
|
||||
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
|
||||
match(Set src1_dst ($2 src1_dst src2));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_fadda $src1_dst, $src1_dst, $src2\t# vector (sve) ($4)" %}
|
||||
ins_encode %{
|
||||
__ sve_fadda(as_FloatRegister($src1_dst$$reg), __ $4,
|
||||
ptrue, as_FloatRegister($src2$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
// vector add reduction
|
||||
REDUCE_ADD(reduce_addI, AddReductionVI, iRegINoSp, iRegIorL2I, S, T_INT, addw)
|
||||
REDUCE_ADD(reduce_addL, AddReductionVL, iRegLNoSp, iRegL, D, T_LONG, add)
|
||||
REDUCE_ADDF(reduce_addF, AddReductionVF, vRegF, S)
|
||||
REDUCE_ADDF(reduce_addD, AddReductionVD, vRegD, D)
|
||||
|
||||
dnl
|
||||
dnl REDUCE_FMINMAX($1, $2, $3, $4, $5 )
|
||||
dnl REDUCE_FMINMAX(min_max, name_suffix, element_type, size, reg_src_dst)
|
||||
define(`REDUCE_FMINMAX', `
|
||||
instruct reduce_$1$2($5 dst, $5 src1, vReg src2) %{
|
||||
predicate(UseSVE > 0 && n->in(2)->bottom_type()->is_vect()->element_basic_type() == $3 &&
|
||||
n->in(2)->bottom_type()->is_vect()->length_in_bytes() >= 16);
|
||||
match(Set dst (translit($1, `m', `M')ReductionV src1 src2));
|
||||
ins_cost(INSN_COST);
|
||||
effect(TEMP_DEF dst);
|
||||
format %{ "sve_f$1v $dst, $src2 # vector (sve) (S)\n\t"
|
||||
"f$1s $dst, $dst, $src1\t # $1 reduction $2" %}
|
||||
ins_encode %{
|
||||
__ sve_f$1v(as_FloatRegister($dst$$reg), __ $4,
|
||||
ptrue, as_FloatRegister($src2$$reg));
|
||||
__ f`$1'translit($4, `SD', `sd')(as_FloatRegister($dst$$reg), as_FloatRegister($dst$$reg), as_FloatRegister($src1$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
// vector max reduction
|
||||
REDUCE_FMINMAX(max, F, T_FLOAT, S, vRegF)
|
||||
REDUCE_FMINMAX(max, D, T_DOUBLE, D, vRegD)
|
||||
|
||||
// vector min reduction
|
||||
REDUCE_FMINMAX(min, F, T_FLOAT, S, vRegF)
|
||||
REDUCE_FMINMAX(min, D, T_DOUBLE, D, vRegD)
|
||||
|
||||
// vector Math.rint, floor, ceil
|
||||
|
||||
instruct vroundD(vReg dst, vReg src, immI rmode) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= 2 &&
|
||||
n->bottom_type()->is_vect()->element_basic_type() == T_DOUBLE);
|
||||
match(Set dst (RoundDoubleModeV src rmode));
|
||||
format %{ "sve_frint $dst, $src, $rmode\t# vector (sve) (D)" %}
|
||||
ins_encode %{
|
||||
switch ($rmode$$constant) {
|
||||
case RoundDoubleModeNode::rmode_rint:
|
||||
__ sve_frintn(as_FloatRegister($dst$$reg), __ D,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
break;
|
||||
case RoundDoubleModeNode::rmode_floor:
|
||||
__ sve_frintm(as_FloatRegister($dst$$reg), __ D,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
break;
|
||||
case RoundDoubleModeNode::rmode_ceil:
|
||||
__ sve_frintp(as_FloatRegister($dst$$reg), __ D,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
break;
|
||||
}
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}
|
||||
dnl
|
||||
dnl REPLICATE($1, $2, $3, $4, $5 )
|
||||
dnl REPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
|
||||
define(`REPLICATE', `
|
||||
instruct $1(vReg dst, $3 src) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
|
||||
match(Set dst ($2 src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_dup $dst, $src\t# vector (sve) ($4)" %}
|
||||
ins_encode %{
|
||||
__ sve_dup(as_FloatRegister($dst$$reg), __ $4, as_Register($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl REPLICATE_IMM8($1, $2, $3, $4, $5 )
|
||||
dnl REPLICATE_IMM8(insn_name, op_name, imm_type, size, min_vec_len)
|
||||
define(`REPLICATE_IMM8', `
|
||||
instruct $1(vReg dst, $3 con) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
|
||||
match(Set dst ($2 con));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_dup $dst, $con\t# vector (sve) ($4)" %}
|
||||
ins_encode %{
|
||||
__ sve_dup(as_FloatRegister($dst$$reg), __ $4, $con$$constant);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl FREPLICATE($1, $2, $3, $4, $5 )
|
||||
dnl FREPLICATE(insn_name, op_name, reg_src, size, min_vec_len)
|
||||
define(`FREPLICATE', `
|
||||
instruct $1(vReg dst, $3 src) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $5);
|
||||
match(Set dst ($2 src));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "sve_cpy $dst, $src\t# vector (sve) ($4)" %}
|
||||
ins_encode %{
|
||||
__ sve_cpy(as_FloatRegister($dst$$reg), __ $4,
|
||||
ptrue, as_FloatRegister($src$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
|
||||
// vector replicate
|
||||
REPLICATE(replicateB, ReplicateB, iRegIorL2I, B, 16)
|
||||
REPLICATE(replicateS, ReplicateS, iRegIorL2I, H, 8)
|
||||
REPLICATE(replicateI, ReplicateI, iRegIorL2I, S, 4)
|
||||
REPLICATE(replicateL, ReplicateL, iRegL, D, 2)
|
||||
|
||||
REPLICATE_IMM8(replicateB_imm8, ReplicateB, immI8, B, 16)
|
||||
REPLICATE_IMM8(replicateS_imm8, ReplicateS, immI8_shift8, H, 8)
|
||||
REPLICATE_IMM8(replicateI_imm8, ReplicateI, immI8_shift8, S, 4)
|
||||
REPLICATE_IMM8(replicateL_imm8, ReplicateL, immL8_shift8, D, 2)
|
||||
|
||||
FREPLICATE(replicateF, ReplicateF, vRegF, S, 4)
|
||||
FREPLICATE(replicateD, ReplicateD, vRegD, D, 2)
|
||||
dnl
|
||||
dnl VSHIFT_TRUE_PREDICATE($1, $2, $3, $4, $5 )
|
||||
dnl VSHIFT_TRUE_PREDICATE(insn_name, op_name, size, min_vec_len, insn)
|
||||
define(`VSHIFT_TRUE_PREDICATE', `
|
||||
instruct $1(vReg dst, vReg shift) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
||||
match(Set dst ($2 dst shift));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$5 $dst, $dst, $shift\t# vector (sve) ($3)" %}
|
||||
ins_encode %{
|
||||
__ $5(as_FloatRegister($dst$$reg), __ $3,
|
||||
ptrue, as_FloatRegister($shift$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl VSHIFT_IMM_UNPREDICATE($1, $2, $3, $4, $5 )
|
||||
dnl VSHIFT_IMM_UNPREDICATE(insn_name, op_name, size, min_vec_len, insn)
|
||||
define(`VSHIFT_IMM_UNPREDICATE', `
|
||||
instruct $1(vReg dst, vReg src, immI shift) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $4);
|
||||
match(Set dst ($2 src shift));
|
||||
ins_cost(SVE_COST);
|
||||
format %{ "$5 $dst, $src, $shift\t# vector (sve) ($3)" %}
|
||||
ins_encode %{
|
||||
int con = (int)$shift$$constant;dnl
|
||||
ifelse(eval(index(`$1', `vasr') == 0 || index(`$1', `vlsr') == 0), 1, `
|
||||
if (con == 0) {
|
||||
__ sve_orr(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
return;
|
||||
}')dnl
|
||||
ifelse(eval(index(`$1', `vasr') == 0), 1, `ifelse(eval(index(`$3', `B') == 0), 1, `
|
||||
if (con >= 8) con = 7;')ifelse(eval(index(`$3', `H') == 0), 1, `
|
||||
if (con >= 16) con = 15;')')dnl
|
||||
ifelse(eval((index(`$1', `vlsl') == 0 || index(`$1', `vlsr') == 0) && (index(`$3', `B') == 0 || index(`$3', `H') == 0)), 1, `
|
||||
if (con >= 8) {
|
||||
__ sve_eor(as_FloatRegister($dst$$reg), as_FloatRegister($src$$reg),
|
||||
as_FloatRegister($src$$reg));
|
||||
return;
|
||||
}')
|
||||
__ $5(as_FloatRegister($dst$$reg), __ $3,
|
||||
as_FloatRegister($src$$reg), con);
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
dnl
|
||||
dnl VSHIFT_COUNT($1, $2, $3, $4 )
|
||||
dnl VSHIFT_COUNT(insn_name, size, min_vec_len, type)
|
||||
define(`VSHIFT_COUNT', `
|
||||
instruct $1(vReg dst, iRegIorL2I cnt) %{
|
||||
predicate(UseSVE > 0 && n->as_Vector()->length() >= $3 &&
|
||||
ELEMENT_SHORT_CHAR($4, n));
|
||||
match(Set dst (LShiftCntV cnt));
|
||||
match(Set dst (RShiftCntV cnt));
|
||||
format %{ "sve_dup $dst, $cnt\t# vector shift count (sve) ($2)" %}
|
||||
ins_encode %{
|
||||
__ sve_dup(as_FloatRegister($dst$$reg), __ $2, as_Register($cnt$$reg));
|
||||
%}
|
||||
ins_pipe(pipe_slow);
|
||||
%}')dnl
|
||||
|
||||
// vector shift
|
||||
VSHIFT_TRUE_PREDICATE(vasrB, RShiftVB, B, 16, sve_asr)
|
||||
VSHIFT_TRUE_PREDICATE(vasrS, RShiftVS, H, 8, sve_asr)
|
||||
VSHIFT_TRUE_PREDICATE(vasrI, RShiftVI, S, 4, sve_asr)
|
||||
VSHIFT_TRUE_PREDICATE(vasrL, RShiftVL, D, 2, sve_asr)
|
||||
VSHIFT_TRUE_PREDICATE(vlslB, LShiftVB, B, 16, sve_lsl)
|
||||
VSHIFT_TRUE_PREDICATE(vlslS, LShiftVS, H, 8, sve_lsl)
|
||||
VSHIFT_TRUE_PREDICATE(vlslI, LShiftVI, S, 4, sve_lsl)
|
||||
VSHIFT_TRUE_PREDICATE(vlslL, LShiftVL, D, 2, sve_lsl)
|
||||
VSHIFT_TRUE_PREDICATE(vlsrB, URShiftVB, B, 16, sve_lsr)
|
||||
VSHIFT_TRUE_PREDICATE(vlsrS, URShiftVS, H, 8, sve_lsr)
|
||||
VSHIFT_TRUE_PREDICATE(vlsrI, URShiftVI, S, 4, sve_lsr)
|
||||
VSHIFT_TRUE_PREDICATE(vlsrL, URShiftVL, D, 2, sve_lsr)
|
||||
VSHIFT_IMM_UNPREDICATE(vasrB_imm, RShiftVB, B, 16, sve_asr)
|
||||
VSHIFT_IMM_UNPREDICATE(vasrS_imm, RShiftVS, H, 8, sve_asr)
|
||||
VSHIFT_IMM_UNPREDICATE(vasrI_imm, RShiftVI, S, 4, sve_asr)
|
||||
VSHIFT_IMM_UNPREDICATE(vasrL_imm, RShiftVL, D, 2, sve_asr)
|
||||
VSHIFT_IMM_UNPREDICATE(vlsrB_imm, URShiftVB, B, 16, sve_lsr)
|
||||
VSHIFT_IMM_UNPREDICATE(vlsrS_imm, URShiftVS, H, 8, sve_lsr)
|
||||
VSHIFT_IMM_UNPREDICATE(vlsrI_imm, URShiftVI, S, 4, sve_lsr)
|
||||
VSHIFT_IMM_UNPREDICATE(vlsrL_imm, URShiftVL, D, 2, sve_lsr)
|
||||
VSHIFT_IMM_UNPREDICATE(vlslB_imm, LShiftVB, B, 16, sve_lsl)
|
||||
VSHIFT_IMM_UNPREDICATE(vlslS_imm, LShiftVS, H, 8, sve_lsl)
|
||||
VSHIFT_IMM_UNPREDICATE(vlslI_imm, LShiftVI, S, 4, sve_lsl)
|
||||
VSHIFT_IMM_UNPREDICATE(vlslL_imm, LShiftVL, D, 2, sve_lsl)
|
||||
VSHIFT_COUNT(vshiftcntB, B, 16, T_BYTE)
|
||||
VSHIFT_COUNT(vshiftcntS, H, 8, T_SHORT)
|
||||
VSHIFT_COUNT(vshiftcntI, S, 4, T_INT)
|
||||
VSHIFT_COUNT(vshiftcntL, D, 2, T_LONG)
|
||||
|
||||
// vector sqrt
|
||||
UNARY_OP_TRUE_PREDICATE(vsqrtF, SqrtVF, S, 16, sve_fsqrt)
|
||||
UNARY_OP_TRUE_PREDICATE(vsqrtD, SqrtVD, D, 16, sve_fsqrt)
|
||||
|
||||
// vector sub
|
||||
BINARY_OP_UNPREDICATED(vsubB, SubVB, B, 16, sve_sub)
|
||||
BINARY_OP_UNPREDICATED(vsubS, SubVS, H, 8, sve_sub)
|
||||
BINARY_OP_UNPREDICATED(vsubI, SubVI, S, 4, sve_sub)
|
||||
BINARY_OP_UNPREDICATED(vsubL, SubVL, D, 2, sve_sub)
|
||||
BINARY_OP_UNPREDICATED(vsubF, SubVF, S, 4, sve_fsub)
|
||||
BINARY_OP_UNPREDICATED(vsubD, SubVD, D, 2, sve_fsub)
|
||||
|
@ -636,6 +636,39 @@ void entry(CodeBuffer *cb) {
|
||||
__ mov(v1, __ T4H, 2, zr); // mov v1.h[2], wzr
|
||||
__ mov(v1, __ T8B, 3, zr); // mov v1.b[3], wzr
|
||||
__ ld1(v31, v0, __ T2D, Address(__ post(r1, r0))); // ld1 {v31.2d, v0.2d}, [x1], x0
|
||||
__ sve_cpy(z0, __ S, p0, v1); // mov z0.s, p0/m, s1
|
||||
__ sve_inc(r0, __ S); // incw x0
|
||||
__ sve_dec(r1, __ H); // dech x1
|
||||
__ sve_lsl(z0, __ B, z1, 7); // lsl z0.b, z1.b, #7
|
||||
__ sve_lsl(z21, __ H, z1, 15); // lsl z21.h, z1.h, #15
|
||||
__ sve_lsl(z0, __ S, z1, 31); // lsl z0.s, z1.s, #31
|
||||
__ sve_lsl(z0, __ D, z1, 63); // lsl z0.d, z1.d, #63
|
||||
__ sve_lsr(z0, __ B, z1, 7); // lsr z0.b, z1.b, #7
|
||||
__ sve_asr(z0, __ H, z11, 15); // asr z0.h, z11.h, #15
|
||||
__ sve_lsr(z30, __ S, z1, 31); // lsr z30.s, z1.s, #31
|
||||
__ sve_asr(z0, __ D, z1, 63); // asr z0.d, z1.d, #63
|
||||
__ sve_addvl(sp, r0, 31); // addvl sp, x0, #31
|
||||
__ sve_addpl(r1, sp, -32); // addpl x1, sp, -32
|
||||
__ sve_cntp(r8, __ B, p0, p1); // cntp x8, p0, p1.b
|
||||
__ sve_dup(z0, __ B, 127); // dup z0.b, 127
|
||||
__ sve_dup(z1, __ H, -128); // dup z1.h, -128
|
||||
__ sve_dup(z2, __ S, 32512); // dup z2.s, 32512
|
||||
__ sve_dup(z7, __ D, -32768); // dup z7.d, -32768
|
||||
__ sve_ld1b(z0, __ B, p0, Address(sp)); // ld1b {z0.b}, p0/z, [sp]
|
||||
__ sve_ld1h(z10, __ H, p1, Address(sp, -8)); // ld1h {z10.h}, p1/z, [sp, #-8, MUL VL]
|
||||
__ sve_ld1w(z20, __ S, p2, Address(r0, 7)); // ld1w {z20.s}, p2/z, [x0, #7, MUL VL]
|
||||
__ sve_ld1b(z30, __ B, p3, Address(sp, r8)); // ld1b {z30.b}, p3/z, [sp, x8]
|
||||
__ sve_ld1w(z0, __ S, p4, Address(sp, r28)); // ld1w {z0.s}, p4/z, [sp, x28, LSL #2]
|
||||
__ sve_ld1d(z11, __ D, p5, Address(r0, r1)); // ld1d {z11.d}, p5/z, [x0, x1, LSL #3]
|
||||
__ sve_st1b(z22, __ B, p6, Address(sp)); // st1b {z22.b}, p6, [sp]
|
||||
__ sve_st1b(z31, __ B, p7, Address(sp, -8)); // st1b {z31.b}, p7, [sp, #-8, MUL VL]
|
||||
__ sve_st1w(z0, __ S, p1, Address(r0, 7)); // st1w {z0.s}, p1, [x0, #7, MUL VL]
|
||||
__ sve_st1b(z0, __ B, p2, Address(sp, r1)); // st1b {z0.b}, p2, [sp, x1]
|
||||
__ sve_st1h(z0, __ H, p3, Address(sp, r8)); // st1h {z0.h}, p3, [sp, x8, LSL #1]
|
||||
__ sve_st1d(z0, __ D, p4, Address(r0, r18)); // st1d {z0.d}, p4, [x0, x18, LSL #3]
|
||||
__ sve_ldr(z0, Address(sp)); // ldr z0, [sp]
|
||||
__ sve_ldr(z31, Address(sp, -256)); // ldr z31, [sp, #-256, MUL VL]
|
||||
__ sve_str(z8, Address(r8, 255)); // str z8, [x8, #255, MUL VL]
|
||||
|
||||
// FloatImmediateOp
|
||||
__ fmovd(v0, 2.0); // fmov d0, #2.0
|
||||
@ -759,6 +792,57 @@ void entry(CodeBuffer *cb) {
|
||||
__ lduminl(Assembler::word, r12, r15, r13); // lduminl w12, w15, [x13]
|
||||
__ ldumaxl(Assembler::word, r2, r7, r20); // ldumaxl w2, w7, [x20]
|
||||
|
||||
// SVEVectorOp
|
||||
__ sve_add(z25, __ B, z15, z4); // add z25.b, z15.b, z4.b
|
||||
__ sve_sub(z4, __ S, z11, z17); // sub z4.s, z11.s, z17.s
|
||||
__ sve_fadd(z16, __ D, z17, z10); // fadd z16.d, z17.d, z10.d
|
||||
__ sve_fmul(z22, __ D, z12, z25); // fmul z22.d, z12.d, z25.d
|
||||
__ sve_fsub(z28, __ D, z14, z10); // fsub z28.d, z14.d, z10.d
|
||||
__ sve_abs(z1, __ H, p3, z30); // abs z1.h, p3/m, z30.h
|
||||
__ sve_add(z15, __ B, p1, z2); // add z15.b, p1/m, z15.b, z2.b
|
||||
__ sve_asr(z13, __ S, p4, z16); // asr z13.s, p4/m, z13.s, z16.s
|
||||
__ sve_cnt(z3, __ D, p0, z11); // cnt z3.d, p0/m, z11.d
|
||||
__ sve_lsl(z5, __ D, p2, z14); // lsl z5.d, p2/m, z5.d, z14.d
|
||||
__ sve_lsr(z29, __ B, p0, z20); // lsr z29.b, p0/m, z29.b, z20.b
|
||||
__ sve_mul(z20, __ S, p5, z27); // mul z20.s, p5/m, z20.s, z27.s
|
||||
__ sve_neg(z26, __ B, p6, z4); // neg z26.b, p6/m, z4.b
|
||||
__ sve_not(z22, __ B, p4, z30); // not z22.b, p4/m, z30.b
|
||||
__ sve_smax(z11, __ H, p2, z27); // smax z11.h, p2/m, z11.h, z27.h
|
||||
__ sve_smin(z28, __ S, p5, z30); // smin z28.s, p5/m, z28.s, z30.s
|
||||
__ sve_sub(z30, __ S, p1, z13); // sub z30.s, p1/m, z30.s, z13.s
|
||||
__ sve_fabs(z30, __ D, p4, z26); // fabs z30.d, p4/m, z26.d
|
||||
__ sve_fadd(z15, __ S, p3, z11); // fadd z15.s, p3/m, z15.s, z11.s
|
||||
__ sve_fdiv(z6, __ D, p7, z16); // fdiv z6.d, p7/m, z6.d, z16.d
|
||||
__ sve_fmax(z27, __ S, p7, z7); // fmax z27.s, p7/m, z27.s, z7.s
|
||||
__ sve_fmin(z19, __ D, p2, z4); // fmin z19.d, p2/m, z19.d, z4.d
|
||||
__ sve_fmul(z17, __ S, p4, z22); // fmul z17.s, p4/m, z17.s, z22.s
|
||||
__ sve_fneg(z28, __ D, p3, z21); // fneg z28.d, p3/m, z21.d
|
||||
__ sve_frintm(z18, __ S, p5, z2); // frintm z18.s, p5/m, z2.s
|
||||
__ sve_frintn(z6, __ S, p3, z15); // frintn z6.s, p3/m, z15.s
|
||||
__ sve_frintp(z12, __ D, p5, z1); // frintp z12.d, p5/m, z1.d
|
||||
__ sve_fsqrt(z18, __ S, p1, z17); // fsqrt z18.s, p1/m, z17.s
|
||||
__ sve_fsub(z15, __ S, p5, z13); // fsub z15.s, p5/m, z15.s, z13.s
|
||||
__ sve_fmla(z20, __ D, p7, z27, z11); // fmla z20.d, p7/m, z27.d, z11.d
|
||||
__ sve_fmls(z3, __ D, p0, z30, z23); // fmls z3.d, p0/m, z30.d, z23.d
|
||||
__ sve_fnmla(z17, __ S, p2, z27, z26); // fnmla z17.s, p2/m, z27.s, z26.s
|
||||
__ sve_fnmls(z6, __ D, p5, z22, z30); // fnmls z6.d, p5/m, z22.d, z30.d
|
||||
__ sve_mla(z2, __ H, p7, z26, z18); // mla z2.h, p7/m, z26.h, z18.h
|
||||
__ sve_mls(z22, __ B, p4, z2, z17); // mls z22.b, p4/m, z2.b, z17.b
|
||||
__ sve_and(z24, z25, z22); // and z24.d, z25.d, z22.d
|
||||
__ sve_eor(z18, z12, z3); // eor z18.d, z12.d, z3.d
|
||||
__ sve_orr(z29, z28, z16); // orr z29.d, z28.d, z16.d
|
||||
|
||||
// SVEReductionOp
|
||||
__ sve_andv(v6, __ S, p2, z28); // andv s6, p2, z28.s
|
||||
__ sve_orv(v7, __ H, p1, z7); // orv h7, p1, z7.h
|
||||
__ sve_eorv(v9, __ B, p5, z8); // eorv b9, p5, z8.b
|
||||
__ sve_smaxv(v27, __ B, p5, z30); // smaxv b27, p5, z30.b
|
||||
__ sve_sminv(v26, __ H, p0, z16); // sminv h26, p0, z16.h
|
||||
__ sve_fminv(v3, __ D, p6, z8); // fminv d3, p6, z8.d
|
||||
__ sve_fmaxv(v21, __ D, p6, z26); // fmaxv d21, p6, z26.d
|
||||
__ sve_fadda(v22, __ S, p0, z4); // fadda s22, p0, s22, z4.s
|
||||
__ sve_uaddv(v17, __ H, p0, z3); // uaddv d17, p0, z3.h
|
||||
|
||||
__ bind(forth);
|
||||
|
||||
/*
|
||||
@ -810,32 +894,32 @@ Disassembly of section .text:
|
||||
9c: f26aad01 ands x1, x8, #0xffffffffffc00003
|
||||
a0: 14000000 b a0 <back+0xa0>
|
||||
a4: 17ffffd7 b 0 <back>
|
||||
a8: 140001f2 b 870 <forth>
|
||||
a8: 14000242 b 9b0 <forth>
|
||||
ac: 94000000 bl ac <back+0xac>
|
||||
b0: 97ffffd4 bl 0 <back>
|
||||
b4: 940001ef bl 870 <forth>
|
||||
b4: 9400023f bl 9b0 <forth>
|
||||
b8: 3400000a cbz w10, b8 <back+0xb8>
|
||||
bc: 34fffa2a cbz w10, 0 <back>
|
||||
c0: 34003d8a cbz w10, 870 <forth>
|
||||
c0: 3400478a cbz w10, 9b0 <forth>
|
||||
c4: 35000008 cbnz w8, c4 <back+0xc4>
|
||||
c8: 35fff9c8 cbnz w8, 0 <back>
|
||||
cc: 35003d28 cbnz w8, 870 <forth>
|
||||
cc: 35004728 cbnz w8, 9b0 <forth>
|
||||
d0: b400000b cbz x11, d0 <back+0xd0>
|
||||
d4: b4fff96b cbz x11, 0 <back>
|
||||
d8: b4003ccb cbz x11, 870 <forth>
|
||||
d8: b40046cb cbz x11, 9b0 <forth>
|
||||
dc: b500001d cbnz x29, dc <back+0xdc>
|
||||
e0: b5fff91d cbnz x29, 0 <back>
|
||||
e4: b5003c7d cbnz x29, 870 <forth>
|
||||
e4: b500467d cbnz x29, 9b0 <forth>
|
||||
e8: 10000013 adr x19, e8 <back+0xe8>
|
||||
ec: 10fff8b3 adr x19, 0 <back>
|
||||
f0: 10003c13 adr x19, 870 <forth>
|
||||
f0: 10004613 adr x19, 9b0 <forth>
|
||||
f4: 90000013 adrp x19, 0 <back>
|
||||
f8: 36300016 tbz w22, #6, f8 <back+0xf8>
|
||||
fc: 3637f836 tbz w22, #6, 0 <back>
|
||||
100: 36303b96 tbz w22, #6, 870 <forth>
|
||||
100: 36304596 tbz w22, #6, 9b0 <forth>
|
||||
104: 3758000c tbnz w12, #11, 104 <back+0x104>
|
||||
108: 375ff7cc tbnz w12, #11, 0 <back>
|
||||
10c: 37583b2c tbnz w12, #11, 870 <forth>
|
||||
10c: 3758452c tbnz w12, #11, 9b0 <forth>
|
||||
110: 128313a0 mov w0, #0xffffe762 // #-6302
|
||||
114: 528a32c7 mov w7, #0x5196 // #20886
|
||||
118: 7289173b movk w27, #0x48b9
|
||||
@ -852,58 +936,58 @@ Disassembly of section .text:
|
||||
144: 93c3dbc8 extr x8, x30, x3, #54
|
||||
148: 54000000 b.eq 148 <back+0x148> // b.none
|
||||
14c: 54fff5a0 b.eq 0 <back> // b.none
|
||||
150: 54003900 b.eq 870 <forth> // b.none
|
||||
150: 54004300 b.eq 9b0 <forth> // b.none
|
||||
154: 54000001 b.ne 154 <back+0x154> // b.any
|
||||
158: 54fff541 b.ne 0 <back> // b.any
|
||||
15c: 540038a1 b.ne 870 <forth> // b.any
|
||||
15c: 540042a1 b.ne 9b0 <forth> // b.any
|
||||
160: 54000002 b.cs 160 <back+0x160> // b.hs, b.nlast
|
||||
164: 54fff4e2 b.cs 0 <back> // b.hs, b.nlast
|
||||
168: 54003842 b.cs 870 <forth> // b.hs, b.nlast
|
||||
168: 54004242 b.cs 9b0 <forth> // b.hs, b.nlast
|
||||
16c: 54000002 b.cs 16c <back+0x16c> // b.hs, b.nlast
|
||||
170: 54fff482 b.cs 0 <back> // b.hs, b.nlast
|
||||
174: 540037e2 b.cs 870 <forth> // b.hs, b.nlast
|
||||
174: 540041e2 b.cs 9b0 <forth> // b.hs, b.nlast
|
||||
178: 54000003 b.cc 178 <back+0x178> // b.lo, b.ul, b.last
|
||||
17c: 54fff423 b.cc 0 <back> // b.lo, b.ul, b.last
|
||||
180: 54003783 b.cc 870 <forth> // b.lo, b.ul, b.last
|
||||
180: 54004183 b.cc 9b0 <forth> // b.lo, b.ul, b.last
|
||||
184: 54000003 b.cc 184 <back+0x184> // b.lo, b.ul, b.last
|
||||
188: 54fff3c3 b.cc 0 <back> // b.lo, b.ul, b.last
|
||||
18c: 54003723 b.cc 870 <forth> // b.lo, b.ul, b.last
|
||||
18c: 54004123 b.cc 9b0 <forth> // b.lo, b.ul, b.last
|
||||
190: 54000004 b.mi 190 <back+0x190> // b.first
|
||||
194: 54fff364 b.mi 0 <back> // b.first
|
||||
198: 540036c4 b.mi 870 <forth> // b.first
|
||||
198: 540040c4 b.mi 9b0 <forth> // b.first
|
||||
19c: 54000005 b.pl 19c <back+0x19c> // b.nfrst
|
||||
1a0: 54fff305 b.pl 0 <back> // b.nfrst
|
||||
1a4: 54003665 b.pl 870 <forth> // b.nfrst
|
||||
1a4: 54004065 b.pl 9b0 <forth> // b.nfrst
|
||||
1a8: 54000006 b.vs 1a8 <back+0x1a8>
|
||||
1ac: 54fff2a6 b.vs 0 <back>
|
||||
1b0: 54003606 b.vs 870 <forth>
|
||||
1b0: 54004006 b.vs 9b0 <forth>
|
||||
1b4: 54000007 b.vc 1b4 <back+0x1b4>
|
||||
1b8: 54fff247 b.vc 0 <back>
|
||||
1bc: 540035a7 b.vc 870 <forth>
|
||||
1bc: 54003fa7 b.vc 9b0 <forth>
|
||||
1c0: 54000008 b.hi 1c0 <back+0x1c0> // b.pmore
|
||||
1c4: 54fff1e8 b.hi 0 <back> // b.pmore
|
||||
1c8: 54003548 b.hi 870 <forth> // b.pmore
|
||||
1c8: 54003f48 b.hi 9b0 <forth> // b.pmore
|
||||
1cc: 54000009 b.ls 1cc <back+0x1cc> // b.plast
|
||||
1d0: 54fff189 b.ls 0 <back> // b.plast
|
||||
1d4: 540034e9 b.ls 870 <forth> // b.plast
|
||||
1d4: 54003ee9 b.ls 9b0 <forth> // b.plast
|
||||
1d8: 5400000a b.ge 1d8 <back+0x1d8> // b.tcont
|
||||
1dc: 54fff12a b.ge 0 <back> // b.tcont
|
||||
1e0: 5400348a b.ge 870 <forth> // b.tcont
|
||||
1e0: 54003e8a b.ge 9b0 <forth> // b.tcont
|
||||
1e4: 5400000b b.lt 1e4 <back+0x1e4> // b.tstop
|
||||
1e8: 54fff0cb b.lt 0 <back> // b.tstop
|
||||
1ec: 5400342b b.lt 870 <forth> // b.tstop
|
||||
1ec: 54003e2b b.lt 9b0 <forth> // b.tstop
|
||||
1f0: 5400000c b.gt 1f0 <back+0x1f0>
|
||||
1f4: 54fff06c b.gt 0 <back>
|
||||
1f8: 540033cc b.gt 870 <forth>
|
||||
1f8: 54003dcc b.gt 9b0 <forth>
|
||||
1fc: 5400000d b.le 1fc <back+0x1fc>
|
||||
200: 54fff00d b.le 0 <back>
|
||||
204: 5400336d b.le 870 <forth>
|
||||
204: 54003d6d b.le 9b0 <forth>
|
||||
208: 5400000e b.al 208 <back+0x208>
|
||||
20c: 54ffefae b.al 0 <back>
|
||||
210: 5400330e b.al 870 <forth>
|
||||
210: 54003d0e b.al 9b0 <forth>
|
||||
214: 5400000f b.nv 214 <back+0x214>
|
||||
218: 54ffef4f b.nv 0 <back>
|
||||
21c: 540032af b.nv 870 <forth>
|
||||
21c: 54003caf b.nv 9b0 <forth>
|
||||
220: d40658e1 svc #0x32c7
|
||||
224: d4014d22 hvc #0xa69
|
||||
228: d4046543 smc #0x232a
|
||||
@ -1029,7 +1113,7 @@ Disassembly of section .text:
|
||||
408: bd5fa1d9 ldr s25, [x14, #8096]
|
||||
40c: fd1d595a str d26, [x10, #15024]
|
||||
410: bd1b1869 str s9, [x3, #6936]
|
||||
414: 580022fb ldr x27, 870 <forth>
|
||||
414: 58002cfb ldr x27, 9b0 <forth>
|
||||
418: 1800000b ldr w11, 418 <back+0x418>
|
||||
41c: f8945060 prfum pldl1keep, [x3, #-187]
|
||||
420: d8000000 prfm pldl1keep, 420 <back+0x420>
|
||||
@ -1204,110 +1288,190 @@ Disassembly of section .text:
|
||||
6c4: 4e0a1fe1 mov v1.h[2], wzr
|
||||
6c8: 4e071fe1 mov v1.b[3], wzr
|
||||
6cc: 4cc0ac3f ld1 {v31.2d, v0.2d}, [x1], x0
|
||||
6d0: 1e601000 fmov d0, #2.000000000000000000e+00
|
||||
6d4: 1e603000 fmov d0, #2.125000000000000000e+00
|
||||
6d8: 1e621000 fmov d0, #4.000000000000000000e+00
|
||||
6dc: 1e623000 fmov d0, #4.250000000000000000e+00
|
||||
6e0: 1e641000 fmov d0, #8.000000000000000000e+00
|
||||
6e4: 1e643000 fmov d0, #8.500000000000000000e+00
|
||||
6e8: 1e661000 fmov d0, #1.600000000000000000e+01
|
||||
6ec: 1e663000 fmov d0, #1.700000000000000000e+01
|
||||
6f0: 1e681000 fmov d0, #1.250000000000000000e-01
|
||||
6f4: 1e683000 fmov d0, #1.328125000000000000e-01
|
||||
6f8: 1e6a1000 fmov d0, #2.500000000000000000e-01
|
||||
6fc: 1e6a3000 fmov d0, #2.656250000000000000e-01
|
||||
700: 1e6c1000 fmov d0, #5.000000000000000000e-01
|
||||
704: 1e6c3000 fmov d0, #5.312500000000000000e-01
|
||||
708: 1e6e1000 fmov d0, #1.000000000000000000e+00
|
||||
70c: 1e6e3000 fmov d0, #1.062500000000000000e+00
|
||||
710: 1e701000 fmov d0, #-2.000000000000000000e+00
|
||||
714: 1e703000 fmov d0, #-2.125000000000000000e+00
|
||||
718: 1e721000 fmov d0, #-4.000000000000000000e+00
|
||||
71c: 1e723000 fmov d0, #-4.250000000000000000e+00
|
||||
720: 1e741000 fmov d0, #-8.000000000000000000e+00
|
||||
724: 1e743000 fmov d0, #-8.500000000000000000e+00
|
||||
728: 1e761000 fmov d0, #-1.600000000000000000e+01
|
||||
72c: 1e763000 fmov d0, #-1.700000000000000000e+01
|
||||
730: 1e781000 fmov d0, #-1.250000000000000000e-01
|
||||
734: 1e783000 fmov d0, #-1.328125000000000000e-01
|
||||
738: 1e7a1000 fmov d0, #-2.500000000000000000e-01
|
||||
73c: 1e7a3000 fmov d0, #-2.656250000000000000e-01
|
||||
740: 1e7c1000 fmov d0, #-5.000000000000000000e-01
|
||||
744: 1e7c3000 fmov d0, #-5.312500000000000000e-01
|
||||
748: 1e7e1000 fmov d0, #-1.000000000000000000e+00
|
||||
74c: 1e7e3000 fmov d0, #-1.062500000000000000e+00
|
||||
750: f8388098 swp x24, x24, [x4]
|
||||
754: f8340010 ldadd x20, x16, [x0]
|
||||
758: f8241175 ldclr x4, x21, [x11]
|
||||
75c: f83e22d0 ldeor x30, x16, [x22]
|
||||
760: f82432ef ldset x4, x15, [x23]
|
||||
764: f83a5186 ldsmin x26, x6, [x12]
|
||||
768: f82f41ee ldsmax x15, x14, [x15]
|
||||
76c: f82973b9 ldumin x9, x25, [x29]
|
||||
770: f82b6194 ldumax x11, x20, [x12]
|
||||
774: f8b28216 swpa x18, x22, [x16]
|
||||
778: f8b50358 ldadda x21, x24, [x26]
|
||||
77c: f8a61206 ldclra x6, x6, [x16]
|
||||
780: f8b02219 ldeora x16, x25, [x16]
|
||||
784: f8bc3218 ldseta x28, x24, [x16]
|
||||
788: f8ba514f ldsmina x26, x15, [x10]
|
||||
78c: f8ad428e ldsmaxa x13, x14, [x20]
|
||||
790: f8a173d7 ldumina x1, x23, [x30]
|
||||
794: f8ae60c2 ldumaxa x14, x2, [x6]
|
||||
798: f8e38328 swpal x3, x8, [x25]
|
||||
79c: f8e003db ldaddal x0, x27, [x30]
|
||||
7a0: f8e513c5 ldclral x5, x5, [x30]
|
||||
7a4: f8eb2019 ldeoral x11, x25, [x0]
|
||||
7a8: f8ff3260 ldsetal xzr, x0, [x19]
|
||||
7ac: f8fd513a ldsminal x29, x26, [x9]
|
||||
7b0: f8fa41ec ldsmaxal x26, x12, [x15]
|
||||
7b4: f8eb724b lduminal x11, x11, [x18]
|
||||
7b8: f8f96316 ldumaxal x25, x22, [x24]
|
||||
7bc: f8608171 swpl x0, x17, [x11]
|
||||
7c0: f86600dd ldaddl x6, x29, [x6]
|
||||
7c4: f86512a5 ldclrl x5, x5, [x21]
|
||||
7c8: f8732250 ldeorl x19, x16, [x18]
|
||||
7cc: f87e339b ldsetl x30, x27, [x28]
|
||||
7d0: f861503c ldsminl x1, x28, [x1]
|
||||
7d4: f874421d ldsmaxl x20, x29, [x16]
|
||||
7d8: f86d73aa lduminl x13, x10, [x29]
|
||||
7dc: f87d62d3 ldumaxl x29, x19, [x22]
|
||||
7e0: b82a83e4 swp w10, w4, [sp]
|
||||
7e4: b83503e8 ldadd w21, w8, [sp]
|
||||
7e8: b833138a ldclr w19, w10, [x28]
|
||||
7ec: b82220b9 ldeor w2, w25, [x5]
|
||||
7f0: b82332c8 ldset w3, w8, [x22]
|
||||
7f4: b83350ad ldsmin w19, w13, [x5]
|
||||
7f8: b83d42b8 ldsmax w29, w24, [x21]
|
||||
7fc: b83a7078 ldumin w26, w24, [x3]
|
||||
800: b83862fa ldumax w24, w26, [x23]
|
||||
804: b8af8075 swpa w15, w21, [x3]
|
||||
808: b8b80328 ldadda w24, w8, [x25]
|
||||
80c: b8b41230 ldclra w20, w16, [x17]
|
||||
810: b8a22001 ldeora w2, w1, [x0]
|
||||
814: b8b83064 ldseta w24, w4, [x3]
|
||||
818: b8ac539f ldsmina w12, wzr, [x28]
|
||||
81c: b8aa405a ldsmaxa w10, w26, [x2]
|
||||
820: b8ac73f2 ldumina w12, w18, [sp]
|
||||
824: b8a163ad ldumaxa w1, w13, [x29]
|
||||
828: b8e08193 swpal w0, w19, [x12]
|
||||
82c: b8f101b6 ldaddal w17, w22, [x13]
|
||||
830: b8fc13fe ldclral w28, w30, [sp]
|
||||
834: b8e1239a ldeoral w1, w26, [x28]
|
||||
838: b8e4309e ldsetal w4, w30, [x4]
|
||||
83c: b8e6535e ldsminal w6, w30, [x26]
|
||||
840: b8f24109 ldsmaxal w18, w9, [x8]
|
||||
844: b8ec7280 lduminal w12, w0, [x20]
|
||||
848: b8e16058 ldumaxal w1, w24, [x2]
|
||||
84c: b8608309 swpl w0, w9, [x24]
|
||||
850: b87a03d0 ldaddl w26, w16, [x30]
|
||||
854: b86312ea ldclrl w3, w10, [x23]
|
||||
858: b86a2244 ldeorl w10, w4, [x18]
|
||||
85c: b862310b ldsetl w2, w11, [x8]
|
||||
860: b86a522f ldsminl w10, w15, [x17]
|
||||
864: b862418a ldsmaxl w2, w10, [x12]
|
||||
868: b86c71af lduminl w12, w15, [x13]
|
||||
86c: b8626287 ldumaxl w2, w7, [x20]
|
||||
6d0: 05a08020 mov z0.s, p0/m, s1
|
||||
6d4: 04b0e3e0 incw x0
|
||||
6d8: 0470e7e1 dech x1
|
||||
6dc: 042f9c20 lsl z0.b, z1.b, #7
|
||||
6e0: 043f9c35 lsl z21.h, z1.h, #15
|
||||
6e4: 047f9c20 lsl z0.s, z1.s, #31
|
||||
6e8: 04ff9c20 lsl z0.d, z1.d, #63
|
||||
6ec: 04299420 lsr z0.b, z1.b, #7
|
||||
6f0: 04319160 asr z0.h, z11.h, #15
|
||||
6f4: 0461943e lsr z30.s, z1.s, #31
|
||||
6f8: 04a19020 asr z0.d, z1.d, #63
|
||||
6fc: 042053ff addvl sp, x0, #31
|
||||
700: 047f5401 addpl x1, sp, #-32
|
||||
704: 25208028 cntp x8, p0, p1.b
|
||||
708: 2538cfe0 mov z0.b, #127
|
||||
70c: 2578d001 mov z1.h, #-128
|
||||
710: 25b8efe2 mov z2.s, #32512
|
||||
714: 25f8f007 mov z7.d, #-32768
|
||||
718: a400a3e0 ld1b {z0.b}, p0/z, [sp]
|
||||
71c: a4a8a7ea ld1h {z10.h}, p1/z, [sp, #-8, mul vl]
|
||||
720: a547a814 ld1w {z20.s}, p2/z, [x0, #7, mul vl]
|
||||
724: a4084ffe ld1b {z30.b}, p3/z, [sp, x8]
|
||||
728: a55c53e0 ld1w {z0.s}, p4/z, [sp, x28, lsl #2]
|
||||
72c: a5e1540b ld1d {z11.d}, p5/z, [x0, x1, lsl #3]
|
||||
730: e400fbf6 st1b {z22.b}, p6, [sp]
|
||||
734: e408ffff st1b {z31.b}, p7, [sp, #-8, mul vl]
|
||||
738: e547e400 st1w {z0.s}, p1, [x0, #7, mul vl]
|
||||
73c: e4014be0 st1b {z0.b}, p2, [sp, x1]
|
||||
740: e4a84fe0 st1h {z0.h}, p3, [sp, x8, lsl #1]
|
||||
744: e5f25000 st1d {z0.d}, p4, [x0, x18, lsl #3]
|
||||
748: 858043e0 ldr z0, [sp]
|
||||
74c: 85a043ff ldr z31, [sp, #-256, mul vl]
|
||||
750: e59f5d08 str z8, [x8, #255, mul vl]
|
||||
754: 1e601000 fmov d0, #2.000000000000000000e+00
|
||||
758: 1e603000 fmov d0, #2.125000000000000000e+00
|
||||
75c: 1e621000 fmov d0, #4.000000000000000000e+00
|
||||
760: 1e623000 fmov d0, #4.250000000000000000e+00
|
||||
764: 1e641000 fmov d0, #8.000000000000000000e+00
|
||||
768: 1e643000 fmov d0, #8.500000000000000000e+00
|
||||
76c: 1e661000 fmov d0, #1.600000000000000000e+01
|
||||
770: 1e663000 fmov d0, #1.700000000000000000e+01
|
||||
774: 1e681000 fmov d0, #1.250000000000000000e-01
|
||||
778: 1e683000 fmov d0, #1.328125000000000000e-01
|
||||
77c: 1e6a1000 fmov d0, #2.500000000000000000e-01
|
||||
780: 1e6a3000 fmov d0, #2.656250000000000000e-01
|
||||
784: 1e6c1000 fmov d0, #5.000000000000000000e-01
|
||||
788: 1e6c3000 fmov d0, #5.312500000000000000e-01
|
||||
78c: 1e6e1000 fmov d0, #1.000000000000000000e+00
|
||||
790: 1e6e3000 fmov d0, #1.062500000000000000e+00
|
||||
794: 1e701000 fmov d0, #-2.000000000000000000e+00
|
||||
798: 1e703000 fmov d0, #-2.125000000000000000e+00
|
||||
79c: 1e721000 fmov d0, #-4.000000000000000000e+00
|
||||
7a0: 1e723000 fmov d0, #-4.250000000000000000e+00
|
||||
7a4: 1e741000 fmov d0, #-8.000000000000000000e+00
|
||||
7a8: 1e743000 fmov d0, #-8.500000000000000000e+00
|
||||
7ac: 1e761000 fmov d0, #-1.600000000000000000e+01
|
||||
7b0: 1e763000 fmov d0, #-1.700000000000000000e+01
|
||||
7b4: 1e781000 fmov d0, #-1.250000000000000000e-01
|
||||
7b8: 1e783000 fmov d0, #-1.328125000000000000e-01
|
||||
7bc: 1e7a1000 fmov d0, #-2.500000000000000000e-01
|
||||
7c0: 1e7a3000 fmov d0, #-2.656250000000000000e-01
|
||||
7c4: 1e7c1000 fmov d0, #-5.000000000000000000e-01
|
||||
7c8: 1e7c3000 fmov d0, #-5.312500000000000000e-01
|
||||
7cc: 1e7e1000 fmov d0, #-1.000000000000000000e+00
|
||||
7d0: 1e7e3000 fmov d0, #-1.062500000000000000e+00
|
||||
7d4: f8388098 swp x24, x24, [x4]
|
||||
7d8: f8340010 ldadd x20, x16, [x0]
|
||||
7dc: f8241175 ldclr x4, x21, [x11]
|
||||
7e0: f83e22d0 ldeor x30, x16, [x22]
|
||||
7e4: f82432ef ldset x4, x15, [x23]
|
||||
7e8: f83a5186 ldsmin x26, x6, [x12]
|
||||
7ec: f82f41ee ldsmax x15, x14, [x15]
|
||||
7f0: f82973b9 ldumin x9, x25, [x29]
|
||||
7f4: f82b6194 ldumax x11, x20, [x12]
|
||||
7f8: f8b28216 swpa x18, x22, [x16]
|
||||
7fc: f8b50358 ldadda x21, x24, [x26]
|
||||
800: f8a61206 ldclra x6, x6, [x16]
|
||||
804: f8b02219 ldeora x16, x25, [x16]
|
||||
808: f8bc3218 ldseta x28, x24, [x16]
|
||||
80c: f8ba514f ldsmina x26, x15, [x10]
|
||||
810: f8ad428e ldsmaxa x13, x14, [x20]
|
||||
814: f8a173d7 ldumina x1, x23, [x30]
|
||||
818: f8ae60c2 ldumaxa x14, x2, [x6]
|
||||
81c: f8e38328 swpal x3, x8, [x25]
|
||||
820: f8e003db ldaddal x0, x27, [x30]
|
||||
824: f8e513c5 ldclral x5, x5, [x30]
|
||||
828: f8eb2019 ldeoral x11, x25, [x0]
|
||||
82c: f8ff3260 ldsetal xzr, x0, [x19]
|
||||
830: f8fd513a ldsminal x29, x26, [x9]
|
||||
834: f8fa41ec ldsmaxal x26, x12, [x15]
|
||||
838: f8eb724b lduminal x11, x11, [x18]
|
||||
83c: f8f96316 ldumaxal x25, x22, [x24]
|
||||
840: f8608171 swpl x0, x17, [x11]
|
||||
844: f86600dd ldaddl x6, x29, [x6]
|
||||
848: f86512a5 ldclrl x5, x5, [x21]
|
||||
84c: f8732250 ldeorl x19, x16, [x18]
|
||||
850: f87e339b ldsetl x30, x27, [x28]
|
||||
854: f861503c ldsminl x1, x28, [x1]
|
||||
858: f874421d ldsmaxl x20, x29, [x16]
|
||||
85c: f86d73aa lduminl x13, x10, [x29]
|
||||
860: f87d62d3 ldumaxl x29, x19, [x22]
|
||||
864: b82a83e4 swp w10, w4, [sp]
|
||||
868: b83503e8 ldadd w21, w8, [sp]
|
||||
86c: b833138a ldclr w19, w10, [x28]
|
||||
870: b82220b9 ldeor w2, w25, [x5]
|
||||
874: b82332c8 ldset w3, w8, [x22]
|
||||
878: b83350ad ldsmin w19, w13, [x5]
|
||||
87c: b83d42b8 ldsmax w29, w24, [x21]
|
||||
880: b83a7078 ldumin w26, w24, [x3]
|
||||
884: b83862fa ldumax w24, w26, [x23]
|
||||
888: b8af8075 swpa w15, w21, [x3]
|
||||
88c: b8b80328 ldadda w24, w8, [x25]
|
||||
890: b8b41230 ldclra w20, w16, [x17]
|
||||
894: b8a22001 ldeora w2, w1, [x0]
|
||||
898: b8b83064 ldseta w24, w4, [x3]
|
||||
89c: b8ac539f ldsmina w12, wzr, [x28]
|
||||
8a0: b8aa405a ldsmaxa w10, w26, [x2]
|
||||
8a4: b8ac73f2 ldumina w12, w18, [sp]
|
||||
8a8: b8a163ad ldumaxa w1, w13, [x29]
|
||||
8ac: b8e08193 swpal w0, w19, [x12]
|
||||
8b0: b8f101b6 ldaddal w17, w22, [x13]
|
||||
8b4: b8fc13fe ldclral w28, w30, [sp]
|
||||
8b8: b8e1239a ldeoral w1, w26, [x28]
|
||||
8bc: b8e4309e ldsetal w4, w30, [x4]
|
||||
8c0: b8e6535e ldsminal w6, w30, [x26]
|
||||
8c4: b8f24109 ldsmaxal w18, w9, [x8]
|
||||
8c8: b8ec7280 lduminal w12, w0, [x20]
|
||||
8cc: b8e16058 ldumaxal w1, w24, [x2]
|
||||
8d0: b8608309 swpl w0, w9, [x24]
|
||||
8d4: b87a03d0 ldaddl w26, w16, [x30]
|
||||
8d8: b86312ea ldclrl w3, w10, [x23]
|
||||
8dc: b86a2244 ldeorl w10, w4, [x18]
|
||||
8e0: b862310b ldsetl w2, w11, [x8]
|
||||
8e4: b86a522f ldsminl w10, w15, [x17]
|
||||
8e8: b862418a ldsmaxl w2, w10, [x12]
|
||||
8ec: b86c71af lduminl w12, w15, [x13]
|
||||
8f0: b8626287 ldumaxl w2, w7, [x20]
|
||||
8f4: 042401f9 add z25.b, z15.b, z4.b
|
||||
8f8: 04b10564 sub z4.s, z11.s, z17.s
|
||||
8fc: 65ca0230 fadd z16.d, z17.d, z10.d
|
||||
900: 65d90996 fmul z22.d, z12.d, z25.d
|
||||
904: 65ca05dc fsub z28.d, z14.d, z10.d
|
||||
908: 0456afc1 abs z1.h, p3/m, z30.h
|
||||
90c: 0400044f add z15.b, p1/m, z15.b, z2.b
|
||||
910: 0490920d asr z13.s, p4/m, z13.s, z16.s
|
||||
914: 04daa163 cnt z3.d, p0/m, z11.d
|
||||
918: 04d389c5 lsl z5.d, p2/m, z5.d, z14.d
|
||||
91c: 0411829d lsr z29.b, p0/m, z29.b, z20.b
|
||||
920: 04901774 mul z20.s, p5/m, z20.s, z27.s
|
||||
924: 0417b89a neg z26.b, p6/m, z4.b
|
||||
928: 041eb3d6 not z22.b, p4/m, z30.b
|
||||
92c: 04480b6b smax z11.h, p2/m, z11.h, z27.h
|
||||
930: 048a17dc smin z28.s, p5/m, z28.s, z30.s
|
||||
934: 048105be sub z30.s, p1/m, z30.s, z13.s
|
||||
938: 04dcb35e fabs z30.d, p4/m, z26.d
|
||||
93c: 65808d6f fadd z15.s, p3/m, z15.s, z11.s
|
||||
940: 65cd9e06 fdiv z6.d, p7/m, z6.d, z16.d
|
||||
944: 65869cfb fmax z27.s, p7/m, z27.s, z7.s
|
||||
948: 65c78893 fmin z19.d, p2/m, z19.d, z4.d
|
||||
94c: 658292d1 fmul z17.s, p4/m, z17.s, z22.s
|
||||
950: 04ddaebc fneg z28.d, p3/m, z21.d
|
||||
954: 6582b452 frintm z18.s, p5/m, z2.s
|
||||
958: 6580ade6 frintn z6.s, p3/m, z15.s
|
||||
95c: 65c1b42c frintp z12.d, p5/m, z1.d
|
||||
960: 658da632 fsqrt z18.s, p1/m, z17.s
|
||||
964: 658195af fsub z15.s, p5/m, z15.s, z13.s
|
||||
968: 65eb1f74 fmla z20.d, p7/m, z27.d, z11.d
|
||||
96c: 65f723c3 fmls z3.d, p0/m, z30.d, z23.d
|
||||
970: 65ba4b71 fnmla z17.s, p2/m, z27.s, z26.s
|
||||
974: 65fe76c6 fnmls z6.d, p5/m, z22.d, z30.d
|
||||
978: 04525f42 mla z2.h, p7/m, z26.h, z18.h
|
||||
97c: 04117056 mls z22.b, p4/m, z2.b, z17.b
|
||||
980: 04363338 and z24.d, z25.d, z22.d
|
||||
984: 04a33192 eor z18.d, z12.d, z3.d
|
||||
988: 0470339d orr z29.d, z28.d, z16.d
|
||||
98c: 049a2b86 andv s6, p2, z28.s
|
||||
990: 045824e7 orv h7, p1, z7.h
|
||||
994: 04193509 eorv b9, p5, z8.b
|
||||
998: 040837db smaxv b27, p5, z30.b
|
||||
99c: 044a221a sminv h26, p0, z16.h
|
||||
9a0: 65c73903 fminv d3, p6, z8.d
|
||||
9a4: 65c63b55 fmaxv d21, p6, z26.d
|
||||
9a8: 65982096 fadda s22, p0, s22, z4.s
|
||||
9ac: 04412071 uaddv d17, p0, z3.h
|
||||
*/
|
||||
|
||||
static const unsigned int insns[] =
|
||||
@ -1322,30 +1486,30 @@ Disassembly of section .text:
|
||||
0x9101a1a0, 0xb10a5cc8, 0xd10810aa, 0xf10fd061,
|
||||
0x120cb166, 0x321764bc, 0x52174681, 0x720c0247,
|
||||
0x9241018e, 0xb25a2969, 0xd278b411, 0xf26aad01,
|
||||
0x14000000, 0x17ffffd7, 0x140001f2, 0x94000000,
|
||||
0x97ffffd4, 0x940001ef, 0x3400000a, 0x34fffa2a,
|
||||
0x34003d8a, 0x35000008, 0x35fff9c8, 0x35003d28,
|
||||
0xb400000b, 0xb4fff96b, 0xb4003ccb, 0xb500001d,
|
||||
0xb5fff91d, 0xb5003c7d, 0x10000013, 0x10fff8b3,
|
||||
0x10003c13, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36303b96, 0x3758000c, 0x375ff7cc, 0x37583b2c,
|
||||
0x14000000, 0x17ffffd7, 0x14000242, 0x94000000,
|
||||
0x97ffffd4, 0x9400023f, 0x3400000a, 0x34fffa2a,
|
||||
0x3400478a, 0x35000008, 0x35fff9c8, 0x35004728,
|
||||
0xb400000b, 0xb4fff96b, 0xb40046cb, 0xb500001d,
|
||||
0xb5fff91d, 0xb500467d, 0x10000013, 0x10fff8b3,
|
||||
0x10004613, 0x90000013, 0x36300016, 0x3637f836,
|
||||
0x36304596, 0x3758000c, 0x375ff7cc, 0x3758452c,
|
||||
0x128313a0, 0x528a32c7, 0x7289173b, 0x92ab3acc,
|
||||
0xd2a0bf94, 0xf2c285e8, 0x9358722f, 0x330e652f,
|
||||
0x53067f3b, 0x93577c53, 0xb34a1aac, 0xd35a4016,
|
||||
0x13946c63, 0x93c3dbc8, 0x54000000, 0x54fff5a0,
|
||||
0x54003900, 0x54000001, 0x54fff541, 0x540038a1,
|
||||
0x54000002, 0x54fff4e2, 0x54003842, 0x54000002,
|
||||
0x54fff482, 0x540037e2, 0x54000003, 0x54fff423,
|
||||
0x54003783, 0x54000003, 0x54fff3c3, 0x54003723,
|
||||
0x54000004, 0x54fff364, 0x540036c4, 0x54000005,
|
||||
0x54fff305, 0x54003665, 0x54000006, 0x54fff2a6,
|
||||
0x54003606, 0x54000007, 0x54fff247, 0x540035a7,
|
||||
0x54000008, 0x54fff1e8, 0x54003548, 0x54000009,
|
||||
0x54fff189, 0x540034e9, 0x5400000a, 0x54fff12a,
|
||||
0x5400348a, 0x5400000b, 0x54fff0cb, 0x5400342b,
|
||||
0x5400000c, 0x54fff06c, 0x540033cc, 0x5400000d,
|
||||
0x54fff00d, 0x5400336d, 0x5400000e, 0x54ffefae,
|
||||
0x5400330e, 0x5400000f, 0x54ffef4f, 0x540032af,
|
||||
0x54004300, 0x54000001, 0x54fff541, 0x540042a1,
|
||||
0x54000002, 0x54fff4e2, 0x54004242, 0x54000002,
|
||||
0x54fff482, 0x540041e2, 0x54000003, 0x54fff423,
|
||||
0x54004183, 0x54000003, 0x54fff3c3, 0x54004123,
|
||||
0x54000004, 0x54fff364, 0x540040c4, 0x54000005,
|
||||
0x54fff305, 0x54004065, 0x54000006, 0x54fff2a6,
|
||||
0x54004006, 0x54000007, 0x54fff247, 0x54003fa7,
|
||||
0x54000008, 0x54fff1e8, 0x54003f48, 0x54000009,
|
||||
0x54fff189, 0x54003ee9, 0x5400000a, 0x54fff12a,
|
||||
0x54003e8a, 0x5400000b, 0x54fff0cb, 0x54003e2b,
|
||||
0x5400000c, 0x54fff06c, 0x54003dcc, 0x5400000d,
|
||||
0x54fff00d, 0x54003d6d, 0x5400000e, 0x54ffefae,
|
||||
0x54003d0e, 0x5400000f, 0x54ffef4f, 0x54003caf,
|
||||
0xd40658e1, 0xd4014d22, 0xd4046543, 0xd4273f60,
|
||||
0xd44cad80, 0xd503201f, 0xd69f03e0, 0xd6bf03e0,
|
||||
0xd5033fdf, 0xd5033e9f, 0xd50332bf, 0xd61f0200,
|
||||
@ -1377,7 +1541,7 @@ Disassembly of section .text:
|
||||
0x791f226d, 0xf95aa2f3, 0xb9587bb7, 0x395f7176,
|
||||
0x795d9143, 0x399e7e08, 0x799a2697, 0x79df3422,
|
||||
0xb99c2624, 0xfd5c2374, 0xbd5fa1d9, 0xfd1d595a,
|
||||
0xbd1b1869, 0x580022fb, 0x1800000b, 0xf8945060,
|
||||
0xbd1b1869, 0x58002cfb, 0x1800000b, 0xf8945060,
|
||||
0xd8000000, 0xf8ae6ba0, 0xf99a0080, 0x1a070035,
|
||||
0x3a0700a8, 0x5a0e0367, 0x7a11009b, 0x9a000380,
|
||||
0xba1e030c, 0xda0f0320, 0xfa030301, 0x0b340b12,
|
||||
@ -1421,32 +1585,52 @@ Disassembly of section .text:
|
||||
0x7a42cbe2, 0x93df03ff, 0xc820ffff, 0x8822fc7f,
|
||||
0xc8247cbf, 0x88267fff, 0x4e010fe0, 0x4e081fe1,
|
||||
0x4e0c1fe1, 0x4e0a1fe1, 0x4e071fe1, 0x4cc0ac3f,
|
||||
0x1e601000, 0x1e603000, 0x1e621000, 0x1e623000,
|
||||
0x1e641000, 0x1e643000, 0x1e661000, 0x1e663000,
|
||||
0x1e681000, 0x1e683000, 0x1e6a1000, 0x1e6a3000,
|
||||
0x1e6c1000, 0x1e6c3000, 0x1e6e1000, 0x1e6e3000,
|
||||
0x1e701000, 0x1e703000, 0x1e721000, 0x1e723000,
|
||||
0x1e741000, 0x1e743000, 0x1e761000, 0x1e763000,
|
||||
0x1e781000, 0x1e783000, 0x1e7a1000, 0x1e7a3000,
|
||||
0x1e7c1000, 0x1e7c3000, 0x1e7e1000, 0x1e7e3000,
|
||||
0xf8388098, 0xf8340010, 0xf8241175, 0xf83e22d0,
|
||||
0xf82432ef, 0xf83a5186, 0xf82f41ee, 0xf82973b9,
|
||||
0xf82b6194, 0xf8b28216, 0xf8b50358, 0xf8a61206,
|
||||
0xf8b02219, 0xf8bc3218, 0xf8ba514f, 0xf8ad428e,
|
||||
0xf8a173d7, 0xf8ae60c2, 0xf8e38328, 0xf8e003db,
|
||||
0xf8e513c5, 0xf8eb2019, 0xf8ff3260, 0xf8fd513a,
|
||||
0xf8fa41ec, 0xf8eb724b, 0xf8f96316, 0xf8608171,
|
||||
0xf86600dd, 0xf86512a5, 0xf8732250, 0xf87e339b,
|
||||
0xf861503c, 0xf874421d, 0xf86d73aa, 0xf87d62d3,
|
||||
0xb82a83e4, 0xb83503e8, 0xb833138a, 0xb82220b9,
|
||||
0xb82332c8, 0xb83350ad, 0xb83d42b8, 0xb83a7078,
|
||||
0xb83862fa, 0xb8af8075, 0xb8b80328, 0xb8b41230,
|
||||
0xb8a22001, 0xb8b83064, 0xb8ac539f, 0xb8aa405a,
|
||||
0xb8ac73f2, 0xb8a163ad, 0xb8e08193, 0xb8f101b6,
|
||||
0xb8fc13fe, 0xb8e1239a, 0xb8e4309e, 0xb8e6535e,
|
||||
0xb8f24109, 0xb8ec7280, 0xb8e16058, 0xb8608309,
|
||||
0xb87a03d0, 0xb86312ea, 0xb86a2244, 0xb862310b,
|
||||
0xb86a522f, 0xb862418a, 0xb86c71af, 0xb8626287,
|
||||
0x05a08020, 0x04b0e3e0, 0x0470e7e1, 0x042f9c20,
|
||||
0x043f9c35, 0x047f9c20, 0x04ff9c20, 0x04299420,
|
||||
0x04319160, 0x0461943e, 0x04a19020, 0x042053ff,
|
||||
0x047f5401, 0x25208028, 0x2538cfe0, 0x2578d001,
|
||||
0x25b8efe2, 0x25f8f007, 0xa400a3e0, 0xa4a8a7ea,
|
||||
0xa547a814, 0xa4084ffe, 0xa55c53e0, 0xa5e1540b,
|
||||
0xe400fbf6, 0xe408ffff, 0xe547e400, 0xe4014be0,
|
||||
0xe4a84fe0, 0xe5f25000, 0x858043e0, 0x85a043ff,
|
||||
0xe59f5d08, 0x1e601000, 0x1e603000, 0x1e621000,
|
||||
0x1e623000, 0x1e641000, 0x1e643000, 0x1e661000,
|
||||
0x1e663000, 0x1e681000, 0x1e683000, 0x1e6a1000,
|
||||
0x1e6a3000, 0x1e6c1000, 0x1e6c3000, 0x1e6e1000,
|
||||
0x1e6e3000, 0x1e701000, 0x1e703000, 0x1e721000,
|
||||
0x1e723000, 0x1e741000, 0x1e743000, 0x1e761000,
|
||||
0x1e763000, 0x1e781000, 0x1e783000, 0x1e7a1000,
|
||||
0x1e7a3000, 0x1e7c1000, 0x1e7c3000, 0x1e7e1000,
|
||||
0x1e7e3000, 0xf8388098, 0xf8340010, 0xf8241175,
|
||||
0xf83e22d0, 0xf82432ef, 0xf83a5186, 0xf82f41ee,
|
||||
0xf82973b9, 0xf82b6194, 0xf8b28216, 0xf8b50358,
|
||||
0xf8a61206, 0xf8b02219, 0xf8bc3218, 0xf8ba514f,
|
||||
0xf8ad428e, 0xf8a173d7, 0xf8ae60c2, 0xf8e38328,
|
||||
0xf8e003db, 0xf8e513c5, 0xf8eb2019, 0xf8ff3260,
|
||||
0xf8fd513a, 0xf8fa41ec, 0xf8eb724b, 0xf8f96316,
|
||||
0xf8608171, 0xf86600dd, 0xf86512a5, 0xf8732250,
|
||||
0xf87e339b, 0xf861503c, 0xf874421d, 0xf86d73aa,
|
||||
0xf87d62d3, 0xb82a83e4, 0xb83503e8, 0xb833138a,
|
||||
0xb82220b9, 0xb82332c8, 0xb83350ad, 0xb83d42b8,
|
||||
0xb83a7078, 0xb83862fa, 0xb8af8075, 0xb8b80328,
|
||||
0xb8b41230, 0xb8a22001, 0xb8b83064, 0xb8ac539f,
|
||||
0xb8aa405a, 0xb8ac73f2, 0xb8a163ad, 0xb8e08193,
|
||||
0xb8f101b6, 0xb8fc13fe, 0xb8e1239a, 0xb8e4309e,
|
||||
0xb8e6535e, 0xb8f24109, 0xb8ec7280, 0xb8e16058,
|
||||
0xb8608309, 0xb87a03d0, 0xb86312ea, 0xb86a2244,
|
||||
0xb862310b, 0xb86a522f, 0xb862418a, 0xb86c71af,
|
||||
0xb8626287, 0x042401f9, 0x04b10564, 0x65ca0230,
|
||||
0x65d90996, 0x65ca05dc, 0x0456afc1, 0x0400044f,
|
||||
0x0490920d, 0x04daa163, 0x04d389c5, 0x0411829d,
|
||||
0x04901774, 0x0417b89a, 0x041eb3d6, 0x04480b6b,
|
||||
0x048a17dc, 0x048105be, 0x04dcb35e, 0x65808d6f,
|
||||
0x65cd9e06, 0x65869cfb, 0x65c78893, 0x658292d1,
|
||||
0x04ddaebc, 0x6582b452, 0x6580ade6, 0x65c1b42c,
|
||||
0x658da632, 0x658195af, 0x65eb1f74, 0x65f723c3,
|
||||
0x65ba4b71, 0x65fe76c6, 0x04525f42, 0x04117056,
|
||||
0x04363338, 0x04a33192, 0x0470339d, 0x049a2b86,
|
||||
0x045824e7, 0x04193509, 0x040837db, 0x044a221a,
|
||||
0x65c73903, 0x65c63b55, 0x65982096, 0x04412071,
|
||||
|
||||
};
|
||||
// END Generated code -- do not edit
|
||||
|
@ -139,6 +139,9 @@ REGISTER_DECLARATION(Register, rdispatch, r21);
|
||||
// Java stack pointer
|
||||
REGISTER_DECLARATION(Register, esp, r20);
|
||||
|
||||
// Preserved predicate register with all elements set TRUE.
|
||||
REGISTER_DECLARATION(PRegister, ptrue, p7);
|
||||
|
||||
#define assert_cond(ARG1) assert(ARG1, #ARG1)
|
||||
|
||||
namespace asm_util {
|
||||
@ -273,6 +276,14 @@ public:
|
||||
f(r->encoding_nocheck(), lsb + 4, lsb);
|
||||
}
|
||||
|
||||
void prf(PRegister r, int lsb) {
|
||||
f(r->encoding_nocheck(), lsb + 3, lsb);
|
||||
}
|
||||
|
||||
void pgrf(PRegister r, int lsb) {
|
||||
f(r->encoding_nocheck(), lsb + 2, lsb);
|
||||
}
|
||||
|
||||
unsigned get(int msb = 31, int lsb = 0) {
|
||||
int nbits = msb - lsb + 1;
|
||||
unsigned mask = ((1U << nbits) - 1) << lsb;
|
||||
@ -561,6 +572,18 @@ class Address {
|
||||
void lea(MacroAssembler *, Register) const;
|
||||
|
||||
static bool offset_ok_for_immed(int64_t offset, uint shift);
|
||||
|
||||
static bool offset_ok_for_sve_immed(long offset, int shift, int vl /* sve vector length */) {
|
||||
if (offset % vl == 0) {
|
||||
// Convert address offset into sve imm offset (MUL VL).
|
||||
int sve_offset = offset / vl;
|
||||
if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
|
||||
// sve_offset can be encoded
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
// Convience classes
|
||||
@ -684,6 +707,12 @@ public:
|
||||
void rf(FloatRegister reg, int lsb) {
|
||||
current->rf(reg, lsb);
|
||||
}
|
||||
void prf(PRegister reg, int lsb) {
|
||||
current->prf(reg, lsb);
|
||||
}
|
||||
void pgrf(PRegister reg, int lsb) {
|
||||
current->pgrf(reg, lsb);
|
||||
}
|
||||
void fixed(unsigned value, unsigned mask) {
|
||||
current->fixed(value, mask);
|
||||
}
|
||||
@ -2473,13 +2502,18 @@ public:
|
||||
f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
void umov(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {
|
||||
starti;
|
||||
f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21);
|
||||
f(((idx<<1)|1)<<(int)T, 20, 16), f(0b001111, 15, 10);
|
||||
rf(Vn, 5), rf(Rd, 0);
|
||||
#define INSN(NAME, op) \
|
||||
void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \
|
||||
starti; \
|
||||
f(0, 31), f(T==D ? 1:0, 30), f(0b001110000, 29, 21); \
|
||||
f(((idx<<1)|1)<<(int)T, 20, 16), f(op, 15, 10); \
|
||||
rf(Vn, 5), rf(Rd, 0); \
|
||||
}
|
||||
|
||||
INSN(umov, 0b001111);
|
||||
INSN(smov, 0b001011);
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, opc, opc2, isSHR) \
|
||||
void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
|
||||
starti; \
|
||||
@ -2711,7 +2745,7 @@ public:
|
||||
|
||||
#undef INSN
|
||||
|
||||
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
|
||||
void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
|
||||
{
|
||||
starti;
|
||||
assert(T == T8B || T == T16B, "invalid arrangement");
|
||||
@ -2721,6 +2755,292 @@ void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister V
|
||||
f(0, 10), rf(Vn, 5), rf(Vd, 0);
|
||||
}
|
||||
|
||||
// SVE arithmetics - unpredicated
|
||||
#define INSN(NAME, opcode) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
|
||||
starti; \
|
||||
assert(T != Q, "invalid register variant"); \
|
||||
f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \
|
||||
rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
|
||||
}
|
||||
INSN(sve_add, 0b000);
|
||||
INSN(sve_sub, 0b001);
|
||||
#undef INSN
|
||||
|
||||
// SVE floating-point arithmetic - unpredicated
|
||||
#define INSN(NAME, opcode) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
|
||||
starti; \
|
||||
assert(T == S || T == D, "invalid register variant"); \
|
||||
f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \
|
||||
rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_fadd, 0b000);
|
||||
INSN(sve_fmul, 0b010);
|
||||
INSN(sve_fsub, 0b001);
|
||||
#undef INSN
|
||||
|
||||
private:
|
||||
void sve_predicate_reg_insn(unsigned op24, unsigned op13,
|
||||
FloatRegister Zd_or_Vd, SIMD_RegVariant T,
|
||||
PRegister Pg, FloatRegister Zn_or_Vn) {
|
||||
starti;
|
||||
f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
|
||||
pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
// SVE integer arithmetics - predicate
|
||||
#define INSN(NAME, op1, op2) \
|
||||
void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \
|
||||
assert(T != Q, "invalid register variant"); \
|
||||
sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \
|
||||
}
|
||||
|
||||
INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary
|
||||
INSN(sve_add, 0b00000100, 0b000000000); // vector add
|
||||
INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
|
||||
INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right
|
||||
INSN(sve_cnt, 0b00000100, 0b011010101) // count non-zero bits
|
||||
INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element
|
||||
INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
|
||||
INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left
|
||||
INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right
|
||||
INSN(sve_mul, 0b00000100, 0b010000000); // vector mul
|
||||
INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary
|
||||
INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary
|
||||
INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar
|
||||
INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
|
||||
INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
|
||||
INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
|
||||
INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
|
||||
INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
|
||||
INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
|
||||
#undef INSN
|
||||
|
||||
// SVE floating-point arithmetics - predicate
|
||||
#define INSN(NAME, op1, op2) \
|
||||
void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
|
||||
assert(T == S || T == D, "invalid register variant"); \
|
||||
sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \
|
||||
}
|
||||
|
||||
INSN(sve_fabs, 0b00000100, 0b011100101);
|
||||
INSN(sve_fadd, 0b01100101, 0b000000100);
|
||||
INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
|
||||
INSN(sve_fdiv, 0b01100101, 0b001101100);
|
||||
INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum
|
||||
INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
|
||||
INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum
|
||||
INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
|
||||
INSN(sve_fmul, 0b01100101, 0b000010100);
|
||||
INSN(sve_fneg, 0b00000100, 0b011101101);
|
||||
INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
|
||||
INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
|
||||
INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
|
||||
INSN(sve_fsqrt, 0b01100101, 0b001101101);
|
||||
INSN(sve_fsub, 0b01100101, 0b000001100);
|
||||
#undef INSN
|
||||
|
||||
// SVE multiple-add/sub - predicated
|
||||
#define INSN(NAME, op0, op1, op2) \
|
||||
void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
|
||||
starti; \
|
||||
assert(T != Q, "invalid size"); \
|
||||
f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \
|
||||
f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add: Zda = Zda + Zn * Zm
|
||||
INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
|
||||
INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
|
||||
INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
|
||||
INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
|
||||
INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
|
||||
#undef INSN
|
||||
|
||||
// SVE bitwise logical - unpredicated
|
||||
#define INSN(NAME, opc) \
|
||||
void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \
|
||||
starti; \
|
||||
f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \
|
||||
rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \
|
||||
}
|
||||
INSN(sve_and, 0b00);
|
||||
INSN(sve_eor, 0b10);
|
||||
INSN(sve_orr, 0b01);
|
||||
#undef INSN
|
||||
|
||||
// SVE shift immediate - unpredicated
|
||||
#define INSN(NAME, opc, isSHR) \
|
||||
void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
|
||||
starti; \
|
||||
/* The encodings for the tszh:tszl:imm3 fields (bits 23:22 20:19 18:16) \
|
||||
* for shift right is calculated as: \
|
||||
* 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) \
|
||||
* 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) \
|
||||
* 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) \
|
||||
* 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) \
|
||||
* for shift left is calculated as: \
|
||||
* 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 \
|
||||
* 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 \
|
||||
* 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 \
|
||||
* 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 \
|
||||
*/ \
|
||||
assert(T != Q, "Invalid register variant"); \
|
||||
if (isSHR) { \
|
||||
assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); \
|
||||
} else { \
|
||||
assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); \
|
||||
} \
|
||||
int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); \
|
||||
int encodedShift = isSHR ? cVal - shift : cVal + shift; \
|
||||
int tszh = encodedShift >> 5; \
|
||||
int tszl_imm = encodedShift & 0x1f; \
|
||||
f(0b00000100, 31, 24); \
|
||||
f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \
|
||||
f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_asr, 0b100, /* isSHR = */ true);
|
||||
INSN(sve_lsl, 0b111, /* isSHR = */ false);
|
||||
INSN(sve_lsr, 0b101, /* isSHR = */ true);
|
||||
#undef INSN
|
||||
|
||||
private:
|
||||
|
||||
// Scalar base + immediate index
|
||||
void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
|
||||
SIMD_RegVariant T, int op1, int type, int op2) {
|
||||
starti;
|
||||
assert_cond(T >= type);
|
||||
f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
|
||||
f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
|
||||
pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
|
||||
}
|
||||
|
||||
// Scalar base + scalar index
|
||||
void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
|
||||
SIMD_RegVariant T, int op1, int type, int op2) {
|
||||
starti;
|
||||
assert_cond(T >= type);
|
||||
f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
|
||||
rf(Xm, 16), f(op2, 15, 13);
|
||||
pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
|
||||
}
|
||||
|
||||
void sve_ld_st1(FloatRegister Zt, PRegister Pg,
|
||||
SIMD_RegVariant T, const Address &a,
|
||||
int op1, int type, int imm_op2, int scalar_op2) {
|
||||
switch (a.getMode()) {
|
||||
case Address::base_plus_offset:
|
||||
sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
|
||||
break;
|
||||
case Address::base_plus_offset_reg:
|
||||
sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
// SVE load/store - predicated
|
||||
#define INSN(NAME, op1, type, imm_op2, scalar_op2) \
|
||||
void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \
|
||||
assert(T != Q, "invalid register variant"); \
|
||||
sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \
|
||||
}
|
||||
|
||||
INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
|
||||
INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
|
||||
INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
|
||||
INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
|
||||
INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
|
||||
INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
|
||||
INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
|
||||
INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
|
||||
#undef INSN
|
||||
|
||||
// SVE load/store - unpredicated
|
||||
#define INSN(NAME, op1) \
|
||||
void NAME(FloatRegister Zt, const Address &a) { \
|
||||
starti; \
|
||||
assert(a.index() == noreg, "invalid address variant"); \
|
||||
f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \
|
||||
f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_ldr, 0b100); // LDR (vector)
|
||||
INSN(sve_str, 0b111); // STR (vector)
|
||||
#undef INSN
|
||||
|
||||
#define INSN(NAME, op) \
|
||||
void NAME(Register Xd, Register Xn, int imm6) { \
|
||||
starti; \
|
||||
f(0b000001000, 31, 23), f(op, 22, 21); \
|
||||
srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_addvl, 0b01);
|
||||
INSN(sve_addpl, 0b11);
|
||||
#undef INSN
|
||||
|
||||
// SVE inc/dec register by element count
|
||||
#define INSN(NAME, op) \
|
||||
void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
|
||||
starti; \
|
||||
assert(T != Q, "invalid size"); \
|
||||
f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \
|
||||
f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \
|
||||
}
|
||||
|
||||
INSN(sve_inc, 0);
|
||||
INSN(sve_dec, 1);
|
||||
#undef INSN
|
||||
|
||||
// SVE predicate count
|
||||
void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
|
||||
prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
|
||||
}
|
||||
|
||||
// SVE dup scalar
|
||||
void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
|
||||
srf(Rn, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
// SVE dup imm
|
||||
void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
|
||||
starti;
|
||||
assert(T != Q, "invalid size");
|
||||
int sh = 0;
|
||||
if (imm8 <= 127 && imm8 >= -128) {
|
||||
sh = 0;
|
||||
} else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
|
||||
sh = 1;
|
||||
imm8 = (imm8 >> 8);
|
||||
} else {
|
||||
guarantee(false, "invalid immediate");
|
||||
}
|
||||
f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
|
||||
f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
|
||||
}
|
||||
|
||||
void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
|
||||
starti;
|
||||
f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
|
||||
f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
|
||||
}
|
||||
|
||||
Assembler(CodeBuffer* code) : AbstractAssembler(code) {
|
||||
}
|
||||
|
||||
|
@ -431,8 +431,12 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
|
||||
ZSetupArguments setup_arguments(masm, stub);
|
||||
__ mov(rscratch1, stub->slow_path());
|
||||
__ blr(rscratch1);
|
||||
if (UseSVE > 0) {
|
||||
// Reinitialize the ptrue predicate register, in case the external runtime
|
||||
// call clobbers ptrue reg, as we may return to SVE compiled code.
|
||||
__ reinitialize_ptrue();
|
||||
}
|
||||
}
|
||||
|
||||
// Stub exit
|
||||
__ b(*stub->continuation());
|
||||
}
|
||||
|
@ -99,6 +99,9 @@ define_pd_global(intx, InlineSmallCode, 1000);
|
||||
"Avoid generating unaligned memory accesses") \
|
||||
product(bool, UseLSE, false, \
|
||||
"Use LSE instructions") \
|
||||
product(uint, UseSVE, 0, \
|
||||
"Highest supported SVE instruction set version") \
|
||||
range(0, 2) \
|
||||
product(bool, UseBlockZeroing, true, \
|
||||
"Use DC ZVA for block zeroing") \
|
||||
product(intx, BlockZeroingLowLimit, 256, \
|
||||
|
@ -2117,9 +2117,16 @@ int MacroAssembler::pop(unsigned int bitset, Register stack) {
|
||||
}
|
||||
|
||||
// Push lots of registers in the bit set supplied. Don't push sp.
|
||||
// Return the number of words pushed
|
||||
// Return the number of dwords pushed
|
||||
int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
|
||||
int words_pushed = 0;
|
||||
bool use_sve = false;
|
||||
int sve_vector_size_in_bytes = 0;
|
||||
|
||||
#ifdef COMPILER2
|
||||
use_sve = Matcher::supports_scalable_vector();
|
||||
sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
||||
#endif
|
||||
|
||||
// Scan bitset to accumulate register pairs
|
||||
unsigned char regs[32];
|
||||
@ -2134,9 +2141,19 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// SVE
|
||||
if (use_sve && sve_vector_size_in_bytes > 16) {
|
||||
sub(stack, stack, sve_vector_size_in_bytes * count);
|
||||
for (int i = 0; i < count; i++) {
|
||||
sve_str(as_FloatRegister(regs[i]), Address(stack, i));
|
||||
}
|
||||
return count * sve_vector_size_in_bytes / 8;
|
||||
}
|
||||
|
||||
// NEON
|
||||
if (count == 1) {
|
||||
strq(as_FloatRegister(regs[0]), Address(pre(stack, -wordSize * 2)));
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
bool odd = (count & 1) == 1;
|
||||
@ -2157,12 +2174,19 @@ int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
|
||||
}
|
||||
|
||||
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
|
||||
return count;
|
||||
return count * 2;
|
||||
}
|
||||
|
||||
// Return the number of dwords poped
|
||||
int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
|
||||
int words_pushed = 0;
|
||||
bool use_sve = false;
|
||||
int sve_vector_size_in_bytes = 0;
|
||||
|
||||
#ifdef COMPILER2
|
||||
use_sve = Matcher::supports_scalable_vector();
|
||||
sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
||||
#endif
|
||||
// Scan bitset to accumulate register pairs
|
||||
unsigned char regs[32];
|
||||
int count = 0;
|
||||
@ -2176,9 +2200,19 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// SVE
|
||||
if (use_sve && sve_vector_size_in_bytes > 16) {
|
||||
for (int i = count - 1; i >= 0; i--) {
|
||||
sve_ldr(as_FloatRegister(regs[i]), Address(stack, i));
|
||||
}
|
||||
add(stack, stack, sve_vector_size_in_bytes * count);
|
||||
return count * sve_vector_size_in_bytes / 8;
|
||||
}
|
||||
|
||||
// NEON
|
||||
if (count == 1) {
|
||||
ldrq(as_FloatRegister(regs[0]), Address(post(stack, wordSize * 2)));
|
||||
return 1;
|
||||
return 2;
|
||||
}
|
||||
|
||||
bool odd = (count & 1) == 1;
|
||||
@ -2199,7 +2233,7 @@ int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
|
||||
|
||||
assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
|
||||
|
||||
return count;
|
||||
return count * 2;
|
||||
}
|
||||
|
||||
#ifdef ASSERT
|
||||
@ -2647,23 +2681,39 @@ void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
|
||||
pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2) - exclude, sp);
|
||||
}
|
||||
|
||||
void MacroAssembler::push_CPU_state(bool save_vectors) {
|
||||
int step = (save_vectors ? 8 : 4) * wordSize;
|
||||
void MacroAssembler::push_CPU_state(bool save_vectors, bool use_sve,
|
||||
int sve_vector_size_in_bytes) {
|
||||
push(0x3fffffff, sp); // integer registers except lr & sp
|
||||
mov(rscratch1, -step);
|
||||
sub(sp, sp, step);
|
||||
for (int i = 28; i >= 4; i -= 4) {
|
||||
st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
||||
as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
|
||||
if (save_vectors && use_sve && sve_vector_size_in_bytes > 16) {
|
||||
sub(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
|
||||
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
|
||||
sve_str(as_FloatRegister(i), Address(sp, i));
|
||||
}
|
||||
} else {
|
||||
int step = (save_vectors ? 8 : 4) * wordSize;
|
||||
mov(rscratch1, -step);
|
||||
sub(sp, sp, step);
|
||||
for (int i = 28; i >= 4; i -= 4) {
|
||||
st1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
||||
as_FloatRegister(i+3), save_vectors ? T2D : T1D, Address(post(sp, rscratch1)));
|
||||
}
|
||||
st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
|
||||
}
|
||||
st1(v0, v1, v2, v3, save_vectors ? T2D : T1D, sp);
|
||||
}
|
||||
|
||||
void MacroAssembler::pop_CPU_state(bool restore_vectors) {
|
||||
int step = (restore_vectors ? 8 : 4) * wordSize;
|
||||
for (int i = 0; i <= 28; i += 4)
|
||||
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
||||
as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
|
||||
void MacroAssembler::pop_CPU_state(bool restore_vectors, bool use_sve,
|
||||
int sve_vector_size_in_bytes) {
|
||||
if (restore_vectors && use_sve && sve_vector_size_in_bytes > 16) {
|
||||
for (int i = FloatRegisterImpl::number_of_registers - 1; i >= 0; i--) {
|
||||
sve_ldr(as_FloatRegister(i), Address(sp, i));
|
||||
}
|
||||
add(sp, sp, sve_vector_size_in_bytes * FloatRegisterImpl::number_of_registers);
|
||||
} else {
|
||||
int step = (restore_vectors ? 8 : 4) * wordSize;
|
||||
for (int i = 0; i <= 28; i += 4)
|
||||
ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
|
||||
as_FloatRegister(i+3), restore_vectors ? T2D : T1D, Address(post(sp, step)));
|
||||
}
|
||||
pop(0x3fffffff, sp); // integer registers except lr & sp
|
||||
}
|
||||
|
||||
@ -2712,6 +2762,21 @@ Address MacroAssembler::spill_address(int size, int offset, Register tmp)
|
||||
return Address(base, offset);
|
||||
}
|
||||
|
||||
Address MacroAssembler::sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp) {
|
||||
assert(offset >= 0, "spill to negative address?");
|
||||
|
||||
Register base = sp;
|
||||
|
||||
// An immediate offset in the range 0 to 255 which is multiplied
|
||||
// by the current vector or predicate register size in bytes.
|
||||
if (offset % sve_reg_size_in_bytes == 0 && offset < ((1<<8)*sve_reg_size_in_bytes)) {
|
||||
return Address(base, offset / sve_reg_size_in_bytes);
|
||||
}
|
||||
|
||||
add(tmp, base, offset);
|
||||
return Address(tmp);
|
||||
}
|
||||
|
||||
// Checks whether offset is aligned.
|
||||
// Returns true if it is, else false.
|
||||
bool MacroAssembler::merge_alignment_check(Register base,
|
||||
@ -5221,3 +5286,24 @@ void MacroAssembler::cache_wbsync(bool is_pre) {
|
||||
membar(Assembler::AnyAny);
|
||||
}
|
||||
}
|
||||
|
||||
void MacroAssembler::verify_sve_vector_length() {
|
||||
Label verify_ok;
|
||||
assert(UseSVE > 0, "should only be used for SVE");
|
||||
movw(rscratch1, zr);
|
||||
sve_inc(rscratch1, B);
|
||||
subsw(zr, rscratch1, VM_Version::get_initial_sve_vector_length());
|
||||
br(EQ, verify_ok);
|
||||
stop("Error: SVE vector length has changed since jvm startup");
|
||||
bind(verify_ok);
|
||||
}
|
||||
|
||||
void MacroAssembler::verify_ptrue() {
|
||||
Label verify_ok;
|
||||
assert(UseSVE > 0, "should only be used for SVE");
|
||||
sve_cntp(rscratch1, B, ptrue, ptrue); // get true elements count.
|
||||
sve_dec(rscratch1, B);
|
||||
cbz(rscratch1, verify_ok);
|
||||
stop("Error: the preserved predicate register (p7) elements are not all true");
|
||||
bind(verify_ok);
|
||||
}
|
||||
|
@ -873,8 +873,10 @@ public:
|
||||
|
||||
DEBUG_ONLY(void verify_heapbase(const char* msg);)
|
||||
|
||||
void push_CPU_state(bool save_vectors = false);
|
||||
void pop_CPU_state(bool restore_vectors = false) ;
|
||||
void push_CPU_state(bool save_vectors = false, bool use_sve = false,
|
||||
int sve_vector_size_in_bytes = 0);
|
||||
void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
|
||||
int sve_vector_size_in_bytes = 0);
|
||||
|
||||
// Round up to a power of two
|
||||
void round_to(Register reg, int modulus);
|
||||
@ -954,6 +956,11 @@ public:
|
||||
|
||||
Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
|
||||
|
||||
void verify_sve_vector_length();
|
||||
void reinitialize_ptrue() {
|
||||
sve_ptrue(ptrue, B);
|
||||
}
|
||||
void verify_ptrue();
|
||||
|
||||
// Debugging
|
||||
|
||||
@ -1303,6 +1310,7 @@ private:
|
||||
// Returns an address on the stack which is reachable with a ldr/str of size
|
||||
// Uses rscratch2 if the address is not directly reachable
|
||||
Address spill_address(int size, int offset, Register tmp=rscratch2);
|
||||
Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
|
||||
|
||||
bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
|
||||
|
||||
@ -1326,6 +1334,9 @@ public:
|
||||
void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
|
||||
str(Vx, T, spill_address(1 << (int)T, offset));
|
||||
}
|
||||
void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
|
||||
sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
|
||||
}
|
||||
void unspill(Register Rx, bool is64, int offset) {
|
||||
if (is64) {
|
||||
ldr(Rx, spill_address(8, offset));
|
||||
@ -1336,6 +1347,9 @@ public:
|
||||
void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
|
||||
ldr(Vx, T, spill_address(1 << (int)T, offset));
|
||||
}
|
||||
void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
|
||||
sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
|
||||
}
|
||||
void spill_copy128(int src_offset, int dst_offset,
|
||||
Register tmp1=rscratch1, Register tmp2=rscratch2) {
|
||||
if (src_offset < 512 && (src_offset & 7) == 0 &&
|
||||
@ -1349,7 +1363,15 @@ public:
|
||||
spill(tmp1, true, dst_offset+8);
|
||||
}
|
||||
}
|
||||
|
||||
void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
|
||||
int sve_vec_reg_size_in_bytes) {
|
||||
assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
|
||||
for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
|
||||
spill_copy128(src_offset, dst_offset);
|
||||
src_offset += 16;
|
||||
dst_offset += 16;
|
||||
}
|
||||
}
|
||||
void cache_wb(Address line);
|
||||
void cache_wbsync(bool is_pre);
|
||||
};
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -33,6 +33,9 @@ const int ConcreteRegisterImpl::max_fpr
|
||||
= ConcreteRegisterImpl::max_gpr +
|
||||
FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
|
||||
|
||||
const int ConcreteRegisterImpl::max_pr
|
||||
= ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers;
|
||||
|
||||
const char* RegisterImpl::name() const {
|
||||
const char* names[number_of_registers] = {
|
||||
"c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
|
||||
@ -54,3 +57,11 @@ const char* FloatRegisterImpl::name() const {
|
||||
};
|
||||
return is_valid() ? names[encoding()] : "noreg";
|
||||
}
|
||||
|
||||
const char* PRegisterImpl::name() const {
|
||||
const char* names[number_of_registers] = {
|
||||
"p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7",
|
||||
"p8", "p9", "p10", "p11", "p12", "p13", "p14", "p15"
|
||||
};
|
||||
return is_valid() ? names[encoding()] : "noreg";
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -129,9 +129,10 @@ class FloatRegisterImpl: public AbstractRegisterImpl {
|
||||
public:
|
||||
enum {
|
||||
number_of_registers = 32,
|
||||
max_slots_per_register = 4,
|
||||
max_slots_per_register = 8,
|
||||
save_slots_per_register = 2,
|
||||
extra_save_slots_per_register = max_slots_per_register - save_slots_per_register
|
||||
slots_per_neon_register = 4,
|
||||
extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register
|
||||
};
|
||||
|
||||
// construction
|
||||
@ -187,6 +188,88 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31));
|
||||
|
||||
// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0].
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30));
|
||||
CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31));
|
||||
|
||||
|
||||
class PRegisterImpl;
|
||||
typedef PRegisterImpl* PRegister;
|
||||
inline PRegister as_PRegister(int encoding) {
|
||||
return (PRegister)(intptr_t)encoding;
|
||||
}
|
||||
|
||||
// The implementation of predicate registers for the architecture
|
||||
class PRegisterImpl: public AbstractRegisterImpl {
|
||||
public:
|
||||
enum {
|
||||
number_of_registers = 16,
|
||||
max_slots_per_register = 1
|
||||
};
|
||||
|
||||
// construction
|
||||
inline friend PRegister as_PRegister(int encoding);
|
||||
|
||||
VMReg as_VMReg();
|
||||
|
||||
// derived registers, offsets, and addresses
|
||||
PRegister successor() const { return as_PRegister(encoding() + 1); }
|
||||
|
||||
// accessors
|
||||
int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; }
|
||||
int encoding_nocheck() const { return (intptr_t)this; }
|
||||
bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
|
||||
const char* name() const;
|
||||
};
|
||||
|
||||
// The predicate registers of SVE.
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p8, ( 8));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p9, ( 9));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p10, (10));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p11, (11));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p12, (12));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p13, (13));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p14, (14));
|
||||
CONSTANT_REGISTER_DECLARATION(PRegister, p15, (15));
|
||||
|
||||
// Need to know the total number of registers of all sorts for SharedInfo.
|
||||
// Define a class that exports it.
|
||||
class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
||||
@ -199,12 +282,14 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl {
|
||||
|
||||
number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers +
|
||||
FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers +
|
||||
PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers +
|
||||
1) // flags
|
||||
};
|
||||
|
||||
// added to make it compile
|
||||
static const int max_gpr;
|
||||
static const int max_fpr;
|
||||
static const int max_pr;
|
||||
};
|
||||
|
||||
// A set of registers
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
@ -154,3 +154,55 @@ REGISTER_DEFINITION(Register, rthread);
|
||||
REGISTER_DEFINITION(Register, rheapbase);
|
||||
|
||||
REGISTER_DEFINITION(Register, r31_sp);
|
||||
|
||||
REGISTER_DEFINITION(FloatRegister, z0);
|
||||
REGISTER_DEFINITION(FloatRegister, z1);
|
||||
REGISTER_DEFINITION(FloatRegister, z2);
|
||||
REGISTER_DEFINITION(FloatRegister, z3);
|
||||
REGISTER_DEFINITION(FloatRegister, z4);
|
||||
REGISTER_DEFINITION(FloatRegister, z5);
|
||||
REGISTER_DEFINITION(FloatRegister, z6);
|
||||
REGISTER_DEFINITION(FloatRegister, z7);
|
||||
REGISTER_DEFINITION(FloatRegister, z8);
|
||||
REGISTER_DEFINITION(FloatRegister, z9);
|
||||
REGISTER_DEFINITION(FloatRegister, z10);
|
||||
REGISTER_DEFINITION(FloatRegister, z11);
|
||||
REGISTER_DEFINITION(FloatRegister, z12);
|
||||
REGISTER_DEFINITION(FloatRegister, z13);
|
||||
REGISTER_DEFINITION(FloatRegister, z14);
|
||||
REGISTER_DEFINITION(FloatRegister, z15);
|
||||
REGISTER_DEFINITION(FloatRegister, z16);
|
||||
REGISTER_DEFINITION(FloatRegister, z17);
|
||||
REGISTER_DEFINITION(FloatRegister, z18);
|
||||
REGISTER_DEFINITION(FloatRegister, z19);
|
||||
REGISTER_DEFINITION(FloatRegister, z20);
|
||||
REGISTER_DEFINITION(FloatRegister, z21);
|
||||
REGISTER_DEFINITION(FloatRegister, z22);
|
||||
REGISTER_DEFINITION(FloatRegister, z23);
|
||||
REGISTER_DEFINITION(FloatRegister, z24);
|
||||
REGISTER_DEFINITION(FloatRegister, z25);
|
||||
REGISTER_DEFINITION(FloatRegister, z26);
|
||||
REGISTER_DEFINITION(FloatRegister, z27);
|
||||
REGISTER_DEFINITION(FloatRegister, z28);
|
||||
REGISTER_DEFINITION(FloatRegister, z29);
|
||||
REGISTER_DEFINITION(FloatRegister, z30);
|
||||
REGISTER_DEFINITION(FloatRegister, z31);
|
||||
|
||||
REGISTER_DEFINITION(PRegister, p0);
|
||||
REGISTER_DEFINITION(PRegister, p1);
|
||||
REGISTER_DEFINITION(PRegister, p2);
|
||||
REGISTER_DEFINITION(PRegister, p3);
|
||||
REGISTER_DEFINITION(PRegister, p4);
|
||||
REGISTER_DEFINITION(PRegister, p5);
|
||||
REGISTER_DEFINITION(PRegister, p6);
|
||||
REGISTER_DEFINITION(PRegister, p7);
|
||||
REGISTER_DEFINITION(PRegister, p8);
|
||||
REGISTER_DEFINITION(PRegister, p9);
|
||||
REGISTER_DEFINITION(PRegister, p10);
|
||||
REGISTER_DEFINITION(PRegister, p11);
|
||||
REGISTER_DEFINITION(PRegister, p12);
|
||||
REGISTER_DEFINITION(PRegister, p13);
|
||||
REGISTER_DEFINITION(PRegister, p14);
|
||||
REGISTER_DEFINITION(PRegister, p15);
|
||||
|
||||
REGISTER_DEFINITION(PRegister, ptrue);
|
||||
|
@ -115,11 +115,28 @@ class RegisterSaver {
|
||||
};
|
||||
|
||||
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
|
||||
bool use_sve = false;
|
||||
int sve_vector_size_in_bytes = 0;
|
||||
int sve_vector_size_in_slots = 0;
|
||||
|
||||
#ifdef COMPILER2
|
||||
use_sve = Matcher::supports_scalable_vector();
|
||||
sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
|
||||
sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
|
||||
#endif
|
||||
|
||||
#if COMPILER2_OR_JVMCI
|
||||
if (save_vectors) {
|
||||
int vect_words = 0;
|
||||
int extra_save_slots_per_register = 0;
|
||||
// Save upper half of vector registers
|
||||
int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register /
|
||||
VMRegImpl::slots_per_word;
|
||||
if (use_sve) {
|
||||
extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register;
|
||||
} else {
|
||||
extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register;
|
||||
}
|
||||
vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register /
|
||||
VMRegImpl::slots_per_word;
|
||||
additional_frame_words += vect_words;
|
||||
}
|
||||
#else
|
||||
@ -138,7 +155,7 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
|
||||
// Save Integer and Float registers.
|
||||
__ enter();
|
||||
__ push_CPU_state(save_vectors);
|
||||
__ push_CPU_state(save_vectors, use_sve, sve_vector_size_in_bytes);
|
||||
|
||||
// Set an oopmap for the call site. This oopmap will map all
|
||||
// oop-registers and debug-info registers as callee-saved. This
|
||||
@ -162,8 +179,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
|
||||
for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
|
||||
FloatRegister r = as_FloatRegister(i);
|
||||
int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) :
|
||||
(FloatRegisterImpl::save_slots_per_register * i);
|
||||
int sp_offset = 0;
|
||||
if (save_vectors) {
|
||||
sp_offset = use_sve ? (sve_vector_size_in_slots * i) :
|
||||
(FloatRegisterImpl::slots_per_neon_register * i);
|
||||
} else {
|
||||
sp_offset = FloatRegisterImpl::save_slots_per_register * i;
|
||||
}
|
||||
oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
|
||||
r->as_VMReg());
|
||||
}
|
||||
@ -172,10 +194,15 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_
|
||||
}
|
||||
|
||||
void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
|
||||
#if !COMPILER2_OR_JVMCI
|
||||
#ifdef COMPILER2
|
||||
__ pop_CPU_state(restore_vectors, Matcher::supports_scalable_vector(),
|
||||
Matcher::scalable_vector_reg_size(T_BYTE));
|
||||
#else
|
||||
#if !INCLUDE_JVMCI
|
||||
assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
|
||||
#endif
|
||||
__ pop_CPU_state(restore_vectors);
|
||||
#endif
|
||||
__ leave();
|
||||
|
||||
}
|
||||
@ -1842,6 +1869,11 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
|
||||
// Force this write out before the read below
|
||||
__ dmb(Assembler::ISH);
|
||||
|
||||
if (UseSVE > 0) {
|
||||
// Make sure that jni code does not change SVE vector length.
|
||||
__ verify_sve_vector_length();
|
||||
}
|
||||
|
||||
// check for safepoint operation in progress and/or pending suspend requests
|
||||
Label safepoint_in_progress, safepoint_in_progress_done;
|
||||
{
|
||||
@ -2774,6 +2806,12 @@ SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_t
|
||||
__ maybe_isb();
|
||||
__ membar(Assembler::LoadLoad | Assembler::LoadStore);
|
||||
|
||||
if (UseSVE > 0 && save_vectors) {
|
||||
// Reinitialize the ptrue predicate register, in case the external runtime
|
||||
// call clobbers ptrue reg, as we may return to SVE compiled code.
|
||||
__ reinitialize_ptrue();
|
||||
}
|
||||
|
||||
__ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
|
||||
__ cbz(rscratch1, noException);
|
||||
|
||||
|
@ -488,6 +488,11 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ call_VM_leaf(CAST_FROM_FN_PTR(address,
|
||||
SharedRuntime::exception_handler_for_return_address),
|
||||
rthread, c_rarg1);
|
||||
if (UseSVE > 0 ) {
|
||||
// Reinitialize the ptrue predicate register, in case the external runtime
|
||||
// call clobbers ptrue reg, as we may return to SVE compiled code.
|
||||
__ reinitialize_ptrue();
|
||||
}
|
||||
// we should not really care that lr is no longer the callee
|
||||
// address. we saved the value the handler needs in r19 so we can
|
||||
// just copy it to r3. however, the C2 handler will push its own
|
||||
@ -5018,6 +5023,12 @@ class StubGenerator: public StubCodeGenerator {
|
||||
__ reset_last_Java_frame(true);
|
||||
__ maybe_isb();
|
||||
|
||||
if (UseSVE > 0) {
|
||||
// Reinitialize the ptrue predicate register, in case the external runtime
|
||||
// call clobbers ptrue reg, as we may return to SVE compiled code.
|
||||
__ reinitialize_ptrue();
|
||||
}
|
||||
|
||||
__ leave();
|
||||
|
||||
// check for pending exceptions
|
||||
|
@ -1372,6 +1372,11 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
|
||||
__ push(dtos);
|
||||
__ push(ltos);
|
||||
|
||||
if (UseSVE > 0) {
|
||||
// Make sure that jni code does not change SVE vector length.
|
||||
__ verify_sve_vector_length();
|
||||
}
|
||||
|
||||
// change thread state
|
||||
__ mov(rscratch1, _thread_in_native_trans);
|
||||
__ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
|
||||
|
@ -32,12 +32,14 @@
|
||||
#include "runtime/os.hpp"
|
||||
#include "runtime/stubCodeGenerator.hpp"
|
||||
#include "runtime/vm_version.hpp"
|
||||
#include "utilities/formatBuffer.hpp"
|
||||
#include "utilities/macros.hpp"
|
||||
|
||||
#include OS_HEADER_INLINE(os)
|
||||
|
||||
#include <sys/auxv.h>
|
||||
#include <asm/hwcap.h>
|
||||
#include <sys/auxv.h>
|
||||
#include <sys/prctl.h>
|
||||
|
||||
#ifndef HWCAP_AES
|
||||
#define HWCAP_AES (1<<3)
|
||||
@ -67,6 +69,20 @@
|
||||
#define HWCAP_SHA512 (1 << 21)
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP_SVE
|
||||
#define HWCAP_SVE (1 << 22)
|
||||
#endif
|
||||
|
||||
#ifndef HWCAP2_SVE2
|
||||
#define HWCAP2_SVE2 (1 << 1)
|
||||
#endif
|
||||
|
||||
#ifndef PR_SVE_GET_VL
|
||||
// For old toolchains which do not have SVE related macros defined.
|
||||
#define PR_SVE_SET_VL 50
|
||||
#define PR_SVE_GET_VL 51
|
||||
#endif
|
||||
|
||||
int VM_Version::_cpu;
|
||||
int VM_Version::_model;
|
||||
int VM_Version::_model2;
|
||||
@ -74,6 +90,7 @@ int VM_Version::_variant;
|
||||
int VM_Version::_revision;
|
||||
int VM_Version::_stepping;
|
||||
bool VM_Version::_dcpop;
|
||||
int VM_Version::_initial_sve_vector_length;
|
||||
VM_Version::PsrInfo VM_Version::_psr_info = { 0, };
|
||||
|
||||
static BufferBlob* stub_blob;
|
||||
@ -116,7 +133,6 @@ class VM_Version_StubGenerator: public StubCodeGenerator {
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void VM_Version::get_processor_features() {
|
||||
_supports_cx8 = true;
|
||||
_supports_atomic_getset4 = true;
|
||||
@ -167,6 +183,7 @@ void VM_Version::get_processor_features() {
|
||||
}
|
||||
|
||||
uint64_t auxv = getauxval(AT_HWCAP);
|
||||
uint64_t auxv2 = getauxval(AT_HWCAP2);
|
||||
|
||||
char buf[512];
|
||||
|
||||
@ -298,6 +315,8 @@ void VM_Version::get_processor_features() {
|
||||
if (auxv & HWCAP_SHA2) strcat(buf, ", sha256");
|
||||
if (auxv & HWCAP_SHA512) strcat(buf, ", sha512");
|
||||
if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
|
||||
if (auxv & HWCAP_SVE) strcat(buf, ", sve");
|
||||
if (auxv2 & HWCAP2_SVE2) strcat(buf, ", sve2");
|
||||
|
||||
_features_string = os::strdup(buf);
|
||||
|
||||
@ -437,6 +456,18 @@ void VM_Version::get_processor_features() {
|
||||
FLAG_SET_DEFAULT(UseBlockZeroing, false);
|
||||
}
|
||||
|
||||
if (auxv & HWCAP_SVE) {
|
||||
if (FLAG_IS_DEFAULT(UseSVE)) {
|
||||
FLAG_SET_DEFAULT(UseSVE, (auxv2 & HWCAP2_SVE2) ? 2 : 1);
|
||||
}
|
||||
if (UseSVE > 0) {
|
||||
_initial_sve_vector_length = prctl(PR_SVE_GET_VL);
|
||||
}
|
||||
} else if (UseSVE > 0) {
|
||||
warning("UseSVE specified, but not supported on current CPU. Disabling SVE.");
|
||||
FLAG_SET_DEFAULT(UseSVE, 0);
|
||||
}
|
||||
|
||||
// This machine allows unaligned memory accesses
|
||||
if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
|
||||
FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
|
||||
@ -471,6 +502,50 @@ void VM_Version::get_processor_features() {
|
||||
UseMontgomerySquareIntrinsic = true;
|
||||
}
|
||||
|
||||
if (UseSVE > 0) {
|
||||
if (FLAG_IS_DEFAULT(MaxVectorSize)) {
|
||||
MaxVectorSize = _initial_sve_vector_length;
|
||||
} else if (MaxVectorSize < 16) {
|
||||
warning("SVE does not support vector length less than 16 bytes. Disabling SVE.");
|
||||
UseSVE = 0;
|
||||
} else if ((MaxVectorSize % 16) == 0 && is_power_of_2(MaxVectorSize)) {
|
||||
int new_vl = prctl(PR_SVE_SET_VL, MaxVectorSize);
|
||||
_initial_sve_vector_length = new_vl;
|
||||
// If MaxVectorSize is larger than system largest supported SVE vector length, above prctl()
|
||||
// call will set task vector length to the system largest supported value. So, we also update
|
||||
// MaxVectorSize to that largest supported value.
|
||||
if (new_vl < 0) {
|
||||
vm_exit_during_initialization(
|
||||
err_msg("Current system does not support SVE vector length for MaxVectorSize: %d",
|
||||
(int)MaxVectorSize));
|
||||
} else if (new_vl != MaxVectorSize) {
|
||||
warning("Current system only supports max SVE vector length %d. Set MaxVectorSize to %d",
|
||||
new_vl, new_vl);
|
||||
}
|
||||
MaxVectorSize = new_vl;
|
||||
} else {
|
||||
vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
|
||||
}
|
||||
}
|
||||
|
||||
if (UseSVE == 0) { // NEON
|
||||
int min_vector_size = 8;
|
||||
int max_vector_size = 16;
|
||||
if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
|
||||
if (!is_power_of_2(MaxVectorSize)) {
|
||||
vm_exit_during_initialization(err_msg("Unsupported MaxVectorSize: %d", (int)MaxVectorSize));
|
||||
} else if (MaxVectorSize < min_vector_size) {
|
||||
warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
|
||||
FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
|
||||
} else if (MaxVectorSize > max_vector_size) {
|
||||
warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
|
||||
FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
|
||||
}
|
||||
} else {
|
||||
FLAG_SET_DEFAULT(MaxVectorSize, 16);
|
||||
}
|
||||
}
|
||||
|
||||
if (FLAG_IS_DEFAULT(OptoScheduling)) {
|
||||
OptoScheduling = true;
|
||||
}
|
||||
|
@ -41,6 +41,8 @@ protected:
|
||||
static int _revision;
|
||||
static int _stepping;
|
||||
static bool _dcpop;
|
||||
static int _initial_sve_vector_length;
|
||||
|
||||
struct PsrInfo {
|
||||
uint32_t dczid_el0;
|
||||
uint32_t ctr_el0;
|
||||
@ -106,6 +108,7 @@ public:
|
||||
static int cpu_variant() { return _variant; }
|
||||
static int cpu_revision() { return _revision; }
|
||||
static bool supports_dcpop() { return _dcpop; }
|
||||
static int get_initial_sve_vector_length() { return _initial_sve_vector_length; };
|
||||
static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
|
||||
static ByteSize ctr_el0_offset() { return byte_offset_of(PsrInfo, ctr_el0); }
|
||||
static bool is_zva_enabled() {
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, Red Hat Inc. All rights reserved.
|
||||
* Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -36,4 +36,8 @@ inline VMReg FloatRegisterImpl::as_VMReg() {
|
||||
ConcreteRegisterImpl::max_gpr);
|
||||
}
|
||||
|
||||
inline VMReg PRegisterImpl::as_VMReg() {
|
||||
return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr);
|
||||
}
|
||||
|
||||
#endif // CPU_AARCH64_VMREG_AARCH64_INLINE_HPP
|
||||
|
@ -1006,6 +1006,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) {
|
||||
return MaxVectorSize;
|
||||
}
|
||||
|
||||
const bool Matcher::supports_scalable_vector() {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Vector ideal reg corresponding to specified size in bytes
|
||||
const uint Matcher::vector_ideal_reg(int size) {
|
||||
assert(MaxVectorSize >= size, "");
|
||||
|
@ -2379,6 +2379,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
||||
return max_vector_size(bt); // Same as max.
|
||||
}
|
||||
|
||||
const bool Matcher::supports_scalable_vector() {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// PPC implementation uses VSX load/store instructions (if
|
||||
// SuperwordUseVSX) which support 4 byte but not arbitrary alignment
|
||||
const bool Matcher::misaligned_vectors_ok() {
|
||||
|
@ -1610,6 +1610,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
||||
return max_vector_size(bt); // Same as max.
|
||||
}
|
||||
|
||||
const bool Matcher::supports_scalable_vector() {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// z/Architecture does support misaligned store/load at minimal extra cost.
|
||||
const bool Matcher::misaligned_vectors_ok() {
|
||||
return true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -1615,6 +1615,14 @@ const int Matcher::min_vector_size(const BasicType bt) {
|
||||
return MIN2(size,max_size);
|
||||
}
|
||||
|
||||
const bool Matcher::supports_scalable_vector() {
|
||||
return false;
|
||||
}
|
||||
|
||||
const int Matcher::scalable_vector_reg_size(const BasicType bt) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Vector ideal reg corresponding to specified size in bytes
|
||||
const uint Matcher::vector_ideal_reg(int size) {
|
||||
assert(MaxVectorSize >= size, "");
|
||||
|
@ -2834,7 +2834,7 @@ frame
|
||||
RAX_H_num // Op_RegL
|
||||
};
|
||||
// Excluded flags and vector registers.
|
||||
assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type");
|
||||
assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type");
|
||||
return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
|
||||
%}
|
||||
%}
|
||||
|
@ -1,5 +1,5 @@
|
||||
//
|
||||
// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
|
||||
// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
//
|
||||
// This code is free software; you can redistribute it and/or modify it
|
||||
@ -934,6 +934,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
|
||||
// Match Vector types.
|
||||
if (strncmp(idealOp, "Vec",3)==0) {
|
||||
switch(last_char) {
|
||||
case 'A': return "TypeVect::VECTA";
|
||||
case 'S': return "TypeVect::VECTS";
|
||||
case 'D': return "TypeVect::VECTD";
|
||||
case 'X': return "TypeVect::VECTX";
|
||||
@ -944,6 +945,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) {
|
||||
}
|
||||
}
|
||||
|
||||
if (strncmp(idealOp, "RegVMask", 8) == 0) {
|
||||
return "Type::BOTTOM";
|
||||
}
|
||||
|
||||
// !!!!!
|
||||
switch(last_char) {
|
||||
case 'I': return "TypeInt::INT";
|
||||
|
@ -3942,6 +3942,8 @@ bool MatchRule::is_base_register(FormDict &globals) const {
|
||||
strcmp(opType,"RegL")==0 ||
|
||||
strcmp(opType,"RegF")==0 ||
|
||||
strcmp(opType,"RegD")==0 ||
|
||||
strcmp(opType,"RegVMask")==0 ||
|
||||
strcmp(opType,"VecA")==0 ||
|
||||
strcmp(opType,"VecS")==0 ||
|
||||
strcmp(opType,"VecD")==0 ||
|
||||
strcmp(opType,"VecX")==0 ||
|
||||
|
@ -77,6 +77,7 @@ void LRG::dump() const {
|
||||
if( _is_oop ) tty->print("Oop ");
|
||||
if( _is_float ) tty->print("Float ");
|
||||
if( _is_vector ) tty->print("Vector ");
|
||||
if( _is_scalable ) tty->print("Scalable ");
|
||||
if( _was_spilled1 ) tty->print("Spilled ");
|
||||
if( _was_spilled2 ) tty->print("Spilled2 ");
|
||||
if( _direct_conflict ) tty->print("Direct_conflict ");
|
||||
@ -644,7 +645,15 @@ void PhaseChaitin::Register_Allocate() {
|
||||
// Live ranges record the highest register in their mask.
|
||||
// We want the low register for the AD file writer's convenience.
|
||||
OptoReg::Name hi = lrg.reg(); // Get hi register
|
||||
OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo
|
||||
int num_regs = lrg.num_regs();
|
||||
if (lrg.is_scalable() && OptoReg::is_stack(hi)) {
|
||||
// For scalable vector registers, when they are allocated in physical
|
||||
// registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable
|
||||
// vector. If they are allocated on stack, we need to get the actual
|
||||
// num_regs, which reflects the physical length of scalable registers.
|
||||
num_regs = lrg.scalable_reg_slots();
|
||||
}
|
||||
OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo
|
||||
// We have to use pair [lo,lo+1] even for wide vectors because
|
||||
// the rest of code generation works only with pairs. It is safe
|
||||
// since for registers encoding only 'lo' is used.
|
||||
@ -802,8 +811,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
// Check for vector live range (only if vector register is used).
|
||||
// On SPARC vector uses RegD which could be misaligned so it is not
|
||||
// processes as vector in RA.
|
||||
if (RegMask::is_vector(ireg))
|
||||
if (RegMask::is_vector(ireg)) {
|
||||
lrg._is_vector = 1;
|
||||
if (ireg == Op_VecA) {
|
||||
assert(Matcher::supports_scalable_vector(), "scalable vector should be supported");
|
||||
lrg._is_scalable = 1;
|
||||
// For scalable vector, when it is allocated in physical register,
|
||||
// num_regs is RegMask::SlotsPerVecA for reg mask,
|
||||
// which may not be the actual physical register size.
|
||||
// If it is allocated in stack, we need to get the actual
|
||||
// physical length of scalable vector register.
|
||||
lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT));
|
||||
}
|
||||
}
|
||||
assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL,
|
||||
"vector must be in vector registers");
|
||||
|
||||
@ -905,6 +925,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
|
||||
lrg.set_num_regs(1);
|
||||
lrg.set_reg_pressure(1);
|
||||
break;
|
||||
case Op_VecA:
|
||||
assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
|
||||
assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity");
|
||||
assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned");
|
||||
lrg.set_num_regs(RegMask::SlotsPerVecA);
|
||||
lrg.set_reg_pressure(1);
|
||||
break;
|
||||
case Op_VecS:
|
||||
assert(Matcher::vector_size_supported(T_BYTE,4), "sanity");
|
||||
assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity");
|
||||
@ -1305,6 +1332,46 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) {
|
||||
return false;
|
||||
}
|
||||
|
||||
static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) {
|
||||
int num_regs = lrg.num_regs();
|
||||
OptoReg::Name assigned = mask.find_first_set(lrg, num_regs);
|
||||
|
||||
if (lrg.is_scalable()) {
|
||||
// a physical register is found
|
||||
if (chunk == 0 && OptoReg::is_reg(assigned)) {
|
||||
return assigned;
|
||||
}
|
||||
|
||||
// find available stack slots for scalable register
|
||||
if (lrg._is_vector) {
|
||||
num_regs = lrg.scalable_reg_slots();
|
||||
// if actual scalable vector register is exactly SlotsPerVecA * 32 bits
|
||||
if (num_regs == RegMask::SlotsPerVecA) {
|
||||
return assigned;
|
||||
}
|
||||
|
||||
// mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it
|
||||
// does not work for scalable size. We have to find adjacent scalable_reg_slots() bits
|
||||
// instead of SlotsPerVecA bits.
|
||||
assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg
|
||||
while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) {
|
||||
// Verify the found reg has scalable_reg_slots() bits set.
|
||||
if (mask.is_valid_reg(assigned, num_regs)) {
|
||||
return assigned;
|
||||
} else {
|
||||
// Remove more for each iteration
|
||||
mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg
|
||||
mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits
|
||||
assigned = mask.find_first_set(lrg, num_regs);
|
||||
}
|
||||
}
|
||||
return OptoReg::Bad; // will cause chunk change, and retry next chunk
|
||||
}
|
||||
}
|
||||
|
||||
return assigned;
|
||||
}
|
||||
|
||||
// Choose a color using the biasing heuristic
|
||||
OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
||||
|
||||
@ -1338,7 +1405,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
||||
RegMask tempmask = lrg.mask();
|
||||
tempmask.AND(lrgs(copy_lrg).mask());
|
||||
tempmask.clear_to_sets(lrg.num_regs());
|
||||
OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs());
|
||||
OptoReg::Name reg = find_first_set(lrg, tempmask, chunk);
|
||||
if (OptoReg::is_valid(reg))
|
||||
return reg;
|
||||
}
|
||||
@ -1347,7 +1414,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
|
||||
// If no bias info exists, just go with the register selection ordering
|
||||
if (lrg._is_vector || lrg.num_regs() == 2) {
|
||||
// Find an aligned set
|
||||
return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk);
|
||||
return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk);
|
||||
}
|
||||
|
||||
// CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate
|
||||
@ -1402,7 +1469,6 @@ uint PhaseChaitin::Select( ) {
|
||||
LRG *lrg = &lrgs(lidx);
|
||||
_simplified = lrg->_next;
|
||||
|
||||
|
||||
#ifndef PRODUCT
|
||||
if (trace_spilling()) {
|
||||
ttyLocker ttyl;
|
||||
@ -1484,7 +1550,6 @@ uint PhaseChaitin::Select( ) {
|
||||
// Bump register mask up to next stack chunk
|
||||
chunk += RegMask::CHUNK_SIZE;
|
||||
lrg->Set_All();
|
||||
|
||||
goto retry_next_chunk;
|
||||
}
|
||||
|
||||
@ -1509,12 +1574,21 @@ uint PhaseChaitin::Select( ) {
|
||||
int n_regs = lrg->num_regs();
|
||||
assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity");
|
||||
if (n_regs == 1 || !lrg->_fat_proj) {
|
||||
assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
|
||||
if (Matcher::supports_scalable_vector()) {
|
||||
assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity");
|
||||
} else {
|
||||
assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity");
|
||||
}
|
||||
lrg->Clear(); // Clear the mask
|
||||
lrg->Insert(reg); // Set regmask to match selected reg
|
||||
// For vectors and pairs, also insert the low bit of the pair
|
||||
for (int i = 1; i < n_regs; i++)
|
||||
// We always choose the high bit, then mask the low bits by register size
|
||||
if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack
|
||||
n_regs = lrg->scalable_reg_slots();
|
||||
}
|
||||
for (int i = 1; i < n_regs; i++) {
|
||||
lrg->Insert(OptoReg::add(reg,-i));
|
||||
}
|
||||
lrg->set_mask_size(n_regs);
|
||||
} else { // Else fatproj
|
||||
// mask must be equal to fatproj bits, by definition
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -114,7 +114,9 @@ public:
|
||||
_msize_valid=1;
|
||||
if (_is_vector) {
|
||||
assert(!_fat_proj, "sanity");
|
||||
assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
|
||||
if (!(_is_scalable && OptoReg::is_stack(_reg))) {
|
||||
assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
|
||||
}
|
||||
} else if (_num_regs == 2 && !_fat_proj) {
|
||||
assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
|
||||
}
|
||||
@ -137,14 +139,37 @@ public:
|
||||
void Remove( OptoReg::Name reg ) { _mask.Remove(reg); debug_only(_msize_valid=0;) }
|
||||
void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) }
|
||||
|
||||
// Number of registers this live range uses when it colors
|
||||
private:
|
||||
// Number of registers this live range uses when it colors
|
||||
uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else
|
||||
// except _num_regs is kill count for fat_proj
|
||||
|
||||
// For scalable register, num_regs may not be the actual physical register size.
|
||||
// We need to get the actual physical length of scalable register when scalable
|
||||
// register is spilled. The size of one slot is 32-bit.
|
||||
uint _scalable_reg_slots; // Actual scalable register length of slots.
|
||||
// Meaningful only when _is_scalable is true.
|
||||
public:
|
||||
int num_regs() const { return _num_regs; }
|
||||
void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
|
||||
|
||||
uint scalable_reg_slots() { return _scalable_reg_slots; }
|
||||
void set_scalable_reg_slots(uint slots) {
|
||||
assert(_is_scalable, "scalable register");
|
||||
assert(slots > 0, "slots of scalable register is not valid");
|
||||
_scalable_reg_slots = slots;
|
||||
}
|
||||
|
||||
bool is_scalable() {
|
||||
#ifdef ASSERT
|
||||
if (_is_scalable) {
|
||||
// Should only be a vector for now, but it could also be a RegVMask in future.
|
||||
assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg");
|
||||
}
|
||||
#endif
|
||||
return _is_scalable;
|
||||
}
|
||||
|
||||
private:
|
||||
// Number of physical registers this live range uses when it colors
|
||||
// Architecture and register-set dependent
|
||||
@ -170,6 +195,8 @@ public:
|
||||
uint _is_oop:1, // Live-range holds an oop
|
||||
_is_float:1, // True if in float registers
|
||||
_is_vector:1, // True if in vector registers
|
||||
_is_scalable:1, // True if register size is scalable
|
||||
// e.g. Arm SVE vector/predicate registers.
|
||||
_was_spilled1:1, // True if prior spilling on def
|
||||
_was_spilled2:1, // True if twice prior spilling on def
|
||||
_is_bound:1, // live range starts life with no
|
||||
|
@ -88,6 +88,7 @@ Matcher::Matcher()
|
||||
idealreg2spillmask [Op_RegF] = NULL;
|
||||
idealreg2spillmask [Op_RegD] = NULL;
|
||||
idealreg2spillmask [Op_RegP] = NULL;
|
||||
idealreg2spillmask [Op_VecA] = NULL;
|
||||
idealreg2spillmask [Op_VecS] = NULL;
|
||||
idealreg2spillmask [Op_VecD] = NULL;
|
||||
idealreg2spillmask [Op_VecX] = NULL;
|
||||
@ -101,6 +102,7 @@ Matcher::Matcher()
|
||||
idealreg2debugmask [Op_RegF] = NULL;
|
||||
idealreg2debugmask [Op_RegD] = NULL;
|
||||
idealreg2debugmask [Op_RegP] = NULL;
|
||||
idealreg2debugmask [Op_VecA] = NULL;
|
||||
idealreg2debugmask [Op_VecS] = NULL;
|
||||
idealreg2debugmask [Op_VecD] = NULL;
|
||||
idealreg2debugmask [Op_VecX] = NULL;
|
||||
@ -114,6 +116,7 @@ Matcher::Matcher()
|
||||
idealreg2mhdebugmask[Op_RegF] = NULL;
|
||||
idealreg2mhdebugmask[Op_RegD] = NULL;
|
||||
idealreg2mhdebugmask[Op_RegP] = NULL;
|
||||
idealreg2mhdebugmask[Op_VecA] = NULL;
|
||||
idealreg2mhdebugmask[Op_VecS] = NULL;
|
||||
idealreg2mhdebugmask[Op_VecD] = NULL;
|
||||
idealreg2mhdebugmask[Op_VecX] = NULL;
|
||||
@ -427,7 +430,7 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
|
||||
return rms;
|
||||
}
|
||||
|
||||
#define NOF_STACK_MASKS (3*6+5)
|
||||
#define NOF_STACK_MASKS (3*6+6)
|
||||
|
||||
// Create the initial stack mask used by values spilling to the stack.
|
||||
// Disallow any debug info in outgoing argument areas by setting the
|
||||
@ -463,11 +466,12 @@ void Matcher::init_first_stack_mask() {
|
||||
idealreg2mhdebugmask[Op_RegD] = &rms[16];
|
||||
idealreg2mhdebugmask[Op_RegP] = &rms[17];
|
||||
|
||||
idealreg2spillmask [Op_VecS] = &rms[18];
|
||||
idealreg2spillmask [Op_VecD] = &rms[19];
|
||||
idealreg2spillmask [Op_VecX] = &rms[20];
|
||||
idealreg2spillmask [Op_VecY] = &rms[21];
|
||||
idealreg2spillmask [Op_VecZ] = &rms[22];
|
||||
idealreg2spillmask [Op_VecA] = &rms[18];
|
||||
idealreg2spillmask [Op_VecS] = &rms[19];
|
||||
idealreg2spillmask [Op_VecD] = &rms[20];
|
||||
idealreg2spillmask [Op_VecX] = &rms[21];
|
||||
idealreg2spillmask [Op_VecY] = &rms[22];
|
||||
idealreg2spillmask [Op_VecZ] = &rms[23];
|
||||
|
||||
OptoReg::Name i;
|
||||
|
||||
@ -494,6 +498,7 @@ void Matcher::init_first_stack_mask() {
|
||||
// Keep spill masks aligned.
|
||||
aligned_stack_mask.clear_to_pairs();
|
||||
assert(aligned_stack_mask.is_AllStack(), "should be infinite stack");
|
||||
RegMask scalable_stack_mask = aligned_stack_mask;
|
||||
|
||||
*idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
|
||||
#ifdef _LP64
|
||||
@ -564,28 +569,48 @@ void Matcher::init_first_stack_mask() {
|
||||
*idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ];
|
||||
idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask);
|
||||
}
|
||||
if (UseFPUForSpilling) {
|
||||
// This mask logic assumes that the spill operations are
|
||||
// symmetric and that the registers involved are the same size.
|
||||
// On sparc for instance we may have to use 64 bit moves will
|
||||
// kill 2 registers when used with F0-F31.
|
||||
idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
|
||||
idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
|
||||
|
||||
if (Matcher::supports_scalable_vector()) {
|
||||
int k = 1;
|
||||
OptoReg::Name in = OptoReg::add(_in_arg_limit, -1);
|
||||
// Exclude last input arg stack slots to avoid spilling vector register there,
|
||||
// otherwise vector spills could stomp over stack slots in caller frame.
|
||||
for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) {
|
||||
scalable_stack_mask.Remove(in);
|
||||
in = OptoReg::add(in, -1);
|
||||
}
|
||||
|
||||
// For VecA
|
||||
scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA);
|
||||
assert(scalable_stack_mask.is_AllStack(), "should be infinite stack");
|
||||
*idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA];
|
||||
idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask);
|
||||
} else {
|
||||
*idealreg2spillmask[Op_VecA] = RegMask::Empty;
|
||||
}
|
||||
|
||||
if (UseFPUForSpilling) {
|
||||
// This mask logic assumes that the spill operations are
|
||||
// symmetric and that the registers involved are the same size.
|
||||
// On sparc for instance we may have to use 64 bit moves will
|
||||
// kill 2 registers when used with F0-F31.
|
||||
idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]);
|
||||
idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]);
|
||||
#ifdef _LP64
|
||||
idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
|
||||
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
||||
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
||||
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
|
||||
idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]);
|
||||
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
||||
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
||||
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]);
|
||||
#else
|
||||
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
|
||||
idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]);
|
||||
#ifdef ARM
|
||||
// ARM has support for moving 64bit values between a pair of
|
||||
// integer registers and a double register
|
||||
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
||||
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
||||
// ARM has support for moving 64bit values between a pair of
|
||||
// integer registers and a double register
|
||||
idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]);
|
||||
idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// Make up debug masks. Any spill slot plus callee-save (SOE) registers.
|
||||
// Caller-save (SOC, AS) registers are assumed to be trashable by the various
|
||||
@ -878,6 +903,7 @@ void Matcher::init_spill_mask( Node *ret ) {
|
||||
idealreg2regmask[Op_RegF] = regmask_for_ideal_register(Op_RegF, ret);
|
||||
idealreg2regmask[Op_RegD] = regmask_for_ideal_register(Op_RegD, ret);
|
||||
idealreg2regmask[Op_RegL] = regmask_for_ideal_register(Op_RegL, ret);
|
||||
idealreg2regmask[Op_VecA] = regmask_for_ideal_register(Op_VecA, ret);
|
||||
idealreg2regmask[Op_VecS] = regmask_for_ideal_register(Op_VecS, ret);
|
||||
idealreg2regmask[Op_VecD] = regmask_for_ideal_register(Op_VecD, ret);
|
||||
idealreg2regmask[Op_VecX] = regmask_for_ideal_register(Op_VecX, ret);
|
||||
@ -1563,7 +1589,6 @@ Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Call DFA to match this node, and return
|
||||
svec->DFA( n->Opcode(), n );
|
||||
|
||||
@ -2421,7 +2446,7 @@ bool Matcher::gen_narrow_oop_implicit_null_checks() {
|
||||
const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
|
||||
const Type* t = Type::mreg2type[ideal_reg];
|
||||
if (t == NULL) {
|
||||
assert(ideal_reg >= Op_VecS && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
|
||||
assert(ideal_reg >= Op_VecA && ideal_reg <= Op_VecZ, "not a vector: %d", ideal_reg);
|
||||
return NULL; // not supported
|
||||
}
|
||||
Node* fp = ret->in(TypeFunc::FramePtr);
|
||||
@ -2438,6 +2463,7 @@ const RegMask* Matcher::regmask_for_ideal_register(uint ideal_reg, Node* ret) {
|
||||
case Op_RegD: spill = new LoadDNode(NULL, mem, fp, atp, t, mo); break;
|
||||
case Op_RegL: spill = new LoadLNode(NULL, mem, fp, atp, t->is_long(), mo); break;
|
||||
|
||||
case Op_VecA: // fall-through
|
||||
case Op_VecS: // fall-through
|
||||
case Op_VecD: // fall-through
|
||||
case Op_VecX: // fall-through
|
||||
|
@ -338,6 +338,10 @@ public:
|
||||
Matcher::min_vector_size(bt) <= size);
|
||||
}
|
||||
|
||||
static const bool supports_scalable_vector();
|
||||
// Actual max scalable vector register length.
|
||||
static const int scalable_vector_reg_size(const BasicType bt);
|
||||
|
||||
// Vector ideal reg
|
||||
static const uint vector_ideal_reg(int len);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -38,12 +38,14 @@ const char *NodeClassNames[] = {
|
||||
"RegF",
|
||||
"RegD",
|
||||
"RegL",
|
||||
"RegFlags",
|
||||
"VecA",
|
||||
"VecS",
|
||||
"VecD",
|
||||
"VecX",
|
||||
"VecY",
|
||||
"VecZ",
|
||||
"RegVMask",
|
||||
"RegFlags",
|
||||
"_last_machine_leaf",
|
||||
#include "classes.hpp"
|
||||
"_last_class_name",
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -37,11 +37,13 @@ enum Opcodes {
|
||||
macro(RegF) // Machine float register
|
||||
macro(RegD) // Machine double register
|
||||
macro(RegL) // Machine long register
|
||||
macro(VecA) // Machine vectora register
|
||||
macro(VecS) // Machine vectors register
|
||||
macro(VecD) // Machine vectord register
|
||||
macro(VecX) // Machine vectorx register
|
||||
macro(VecY) // Machine vectory register
|
||||
macro(VecZ) // Machine vectorz register
|
||||
macro(RegVMask) // Vector mask/predicate register
|
||||
macro(RegFlags) // Machine flags register
|
||||
_last_machine_leaf, // Split between regular opcodes and machine
|
||||
#include "classes.hpp"
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -266,9 +266,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
|
||||
Node *val = skip_copies(n->in(k));
|
||||
if (val == x) return blk_adjust; // No progress?
|
||||
|
||||
int n_regs = RegMask::num_registers(val->ideal_reg());
|
||||
uint val_idx = _lrg_map.live_range_id(val);
|
||||
OptoReg::Name val_reg = lrgs(val_idx).reg();
|
||||
int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx));
|
||||
|
||||
// See if it happens to already be in the correct register!
|
||||
// (either Phi's direct register, or the common case of the name
|
||||
@ -305,8 +305,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v
|
||||
}
|
||||
|
||||
Node *vv = value[reg];
|
||||
// For scalable register, number of registers may be inconsistent between
|
||||
// "val_reg" and "reg". For example, when "val" resides in register
|
||||
// but "reg" is located in stack.
|
||||
if (lrgs(val_idx).is_scalable()) {
|
||||
assert(val->ideal_reg() == Op_VecA, "scalable vector register");
|
||||
if (OptoReg::is_stack(reg)) {
|
||||
n_regs = lrgs(val_idx).scalable_reg_slots();
|
||||
} else {
|
||||
n_regs = RegMask::SlotsPerVecA;
|
||||
}
|
||||
}
|
||||
if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set
|
||||
uint last = (n_regs-1); // Looking for the last part of a set
|
||||
uint last;
|
||||
if (lrgs(val_idx).is_scalable()) {
|
||||
assert(val->ideal_reg() == Op_VecA, "scalable vector register");
|
||||
// For scalable vector register, regmask is always SlotsPerVecA bits aligned
|
||||
last = RegMask::SlotsPerVecA - 1;
|
||||
} else {
|
||||
last = (n_regs-1); // Looking for the last part of a set
|
||||
}
|
||||
if ((reg&last) != last) continue; // Wrong part of a set
|
||||
if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value
|
||||
}
|
||||
@ -591,7 +609,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
uint k;
|
||||
Node *phi = block->get_node(j);
|
||||
uint pidx = _lrg_map.live_range_id(phi);
|
||||
OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg();
|
||||
OptoReg::Name preg = lrgs(pidx).reg();
|
||||
|
||||
// Remove copies remaining on edges. Check for junk phi.
|
||||
Node *u = NULL;
|
||||
@ -619,7 +637,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
if( pidx ) {
|
||||
value.map(preg,phi);
|
||||
regnd.map(preg,phi);
|
||||
int n_regs = RegMask::num_registers(phi->ideal_reg());
|
||||
int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx));
|
||||
for (int l = 1; l < n_regs; l++) {
|
||||
OptoReg::Name preg_lo = OptoReg::add(preg,-l);
|
||||
value.map(preg_lo,phi);
|
||||
@ -663,7 +681,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
regnd.map(ureg, def);
|
||||
// Record other half of doubles
|
||||
uint def_ideal_reg = def->ideal_reg();
|
||||
int n_regs = RegMask::num_registers(def_ideal_reg);
|
||||
int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def)));
|
||||
for (int l = 1; l < n_regs; l++) {
|
||||
OptoReg::Name ureg_lo = OptoReg::add(ureg,-l);
|
||||
if (!value[ureg_lo] &&
|
||||
@ -707,7 +725,7 @@ void PhaseChaitin::post_allocate_copy_removal() {
|
||||
}
|
||||
|
||||
uint n_ideal_reg = n->ideal_reg();
|
||||
int n_regs = RegMask::num_registers(n_ideal_reg);
|
||||
int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx));
|
||||
if (n_regs == 1) {
|
||||
// If Node 'n' does not change the value mapped by the register,
|
||||
// then 'n' is a useless copy. Do not update the register->node
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -24,6 +24,7 @@
|
||||
|
||||
#include "precompiled.hpp"
|
||||
#include "opto/ad.hpp"
|
||||
#include "opto/chaitin.hpp"
|
||||
#include "opto/compile.hpp"
|
||||
#include "opto/matcher.hpp"
|
||||
#include "opto/node.hpp"
|
||||
@ -59,30 +60,47 @@ const RegMask RegMask::Empty(
|
||||
|
||||
//=============================================================================
|
||||
bool RegMask::is_vector(uint ireg) {
|
||||
return (ireg == Op_VecS || ireg == Op_VecD ||
|
||||
return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD ||
|
||||
ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ );
|
||||
}
|
||||
|
||||
int RegMask::num_registers(uint ireg) {
|
||||
switch(ireg) {
|
||||
case Op_VecZ:
|
||||
return 16;
|
||||
return SlotsPerVecZ;
|
||||
case Op_VecY:
|
||||
return 8;
|
||||
return SlotsPerVecY;
|
||||
case Op_VecX:
|
||||
return 4;
|
||||
return SlotsPerVecX;
|
||||
case Op_VecD:
|
||||
return SlotsPerVecD;
|
||||
case Op_RegD:
|
||||
case Op_RegL:
|
||||
#ifdef _LP64
|
||||
case Op_RegP:
|
||||
#endif
|
||||
return 2;
|
||||
case Op_VecA:
|
||||
assert(Matcher::supports_scalable_vector(), "does not support scalable vector");
|
||||
return SlotsPerVecA;
|
||||
}
|
||||
// Op_VecS and the rest ideal registers.
|
||||
return 1;
|
||||
}
|
||||
|
||||
int RegMask::num_registers(uint ireg, LRG &lrg) {
|
||||
int n_regs = num_registers(ireg);
|
||||
|
||||
// assigned is OptoReg which is selected by register allocator
|
||||
OptoReg::Name assigned = lrg.reg();
|
||||
assert(OptoReg::is_valid(assigned), "should be valid opto register");
|
||||
|
||||
if (lrg.is_scalable() && OptoReg::is_stack(assigned)) {
|
||||
n_regs = lrg.scalable_reg_slots();
|
||||
}
|
||||
return n_regs;
|
||||
}
|
||||
|
||||
// Clear out partial bits; leave only bit pairs
|
||||
void RegMask::clear_to_pairs() {
|
||||
assert(valid_watermarks(), "sanity");
|
||||
@ -157,6 +175,16 @@ bool RegMask::is_bound(uint ireg) const {
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Check that whether given reg number with size is valid
|
||||
// for current regmask, where reg is the highest number.
|
||||
bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const {
|
||||
for (int i = 0; i < size; i++) {
|
||||
if (!Member(reg - i)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// only indicies of power 2 are accessed, so index 3 is only filled in for storage.
|
||||
static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 };
|
||||
@ -164,8 +192,13 @@ static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x000
|
||||
// Find the lowest-numbered register set in the mask. Return the
|
||||
// HIGHEST register number in the set, or BAD if no sets.
|
||||
// Works also for size 1.
|
||||
OptoReg::Name RegMask::find_first_set(const int size) const {
|
||||
assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
|
||||
OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const {
|
||||
if (lrg.is_scalable()) {
|
||||
// For scalable vector register, regmask is SlotsPerVecA bits aligned.
|
||||
assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets");
|
||||
} else {
|
||||
assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
|
||||
}
|
||||
assert(valid_watermarks(), "sanity");
|
||||
for (int i = _lwm; i <= _hwm; i++) {
|
||||
if (_A[i]) { // Found some bits
|
||||
@ -245,12 +278,16 @@ bool RegMask::is_aligned_sets(const int size) const {
|
||||
while (bits) { // Check bits for pairing
|
||||
int bit = bits & -bits; // Extract low bit
|
||||
// Low bit is not odd means its mis-aligned.
|
||||
if ((bit & low_bits_mask) == 0) return false;
|
||||
if ((bit & low_bits_mask) == 0) {
|
||||
return false;
|
||||
}
|
||||
// Do extra work since (bit << size) may overflow.
|
||||
int hi_bit = bit << (size-1); // high bit
|
||||
int set = hi_bit + ((hi_bit-1) & ~(bit-1));
|
||||
// Check for aligned adjacent bits in this set
|
||||
if ((bits & set) != set) return false;
|
||||
if ((bits & set) != set) {
|
||||
return false;
|
||||
}
|
||||
bits -= set; // Remove this set
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -30,6 +30,8 @@
|
||||
#include "utilities/count_leading_zeros.hpp"
|
||||
#include "utilities/count_trailing_zeros.hpp"
|
||||
|
||||
class LRG;
|
||||
|
||||
//-------------Non-zero bit search methods used by RegMask---------------------
|
||||
// Find lowest 1, undefined if empty/0
|
||||
static int find_lowest_bit(uint32_t mask) {
|
||||
@ -91,11 +93,13 @@ class RegMask {
|
||||
// requirement is internal to the allocator, and independent of any
|
||||
// particular platform.
|
||||
enum { SlotsPerLong = 2,
|
||||
SlotsPerVecA = 8,
|
||||
SlotsPerVecS = 1,
|
||||
SlotsPerVecD = 2,
|
||||
SlotsPerVecX = 4,
|
||||
SlotsPerVecY = 8,
|
||||
SlotsPerVecZ = 16 };
|
||||
SlotsPerVecZ = 16,
|
||||
};
|
||||
|
||||
// A constructor only used by the ADLC output. All mask fields are filled
|
||||
// in directly. Calls to this look something like RM(1,2,3,4);
|
||||
@ -219,10 +223,14 @@ class RegMask {
|
||||
// Test for a single adjacent set of ideal register's size.
|
||||
bool is_bound(uint ireg) const;
|
||||
|
||||
// Check that whether given reg number with size is valid
|
||||
// for current regmask, where reg is the highest number.
|
||||
bool is_valid_reg(OptoReg::Name reg, const int size) const;
|
||||
|
||||
// Find the lowest-numbered register set in the mask. Return the
|
||||
// HIGHEST register number in the set, or BAD if no sets.
|
||||
// Assert that the mask contains only bit sets.
|
||||
OptoReg::Name find_first_set(const int size) const;
|
||||
OptoReg::Name find_first_set(LRG &lrg, const int size) const;
|
||||
|
||||
// Clear out partial bits; leave only aligned adjacent bit sets of size.
|
||||
void clear_to_sets(const int size);
|
||||
@ -236,6 +244,7 @@ class RegMask {
|
||||
|
||||
static bool is_vector(uint ireg);
|
||||
static int num_registers(uint ireg);
|
||||
static int num_registers(uint ireg, LRG &lrg);
|
||||
|
||||
// Fast overlap test. Non-zero if any registers in common.
|
||||
int overlap(const RegMask &rm) const {
|
||||
|
@ -94,8 +94,11 @@ SuperWord::SuperWord(PhaseIdealLoop* phase) :
|
||||
//------------------------------transform_loop---------------------------
|
||||
void SuperWord::transform_loop(IdealLoopTree* lpt, bool do_optimization) {
|
||||
assert(UseSuperWord, "should be");
|
||||
// Do vectors exist on this architecture?
|
||||
if (Matcher::vector_width_in_bytes(T_BYTE) < 2) return;
|
||||
// SuperWord only works with power of two vector sizes.
|
||||
int vector_width = Matcher::vector_width_in_bytes(T_BYTE);
|
||||
if (vector_width < 2 || !is_power_of_2(vector_width)) {
|
||||
return;
|
||||
}
|
||||
|
||||
assert(lpt->_head->is_CountedLoop(), "must be");
|
||||
CountedLoopNode *cl = lpt->_head->as_CountedLoop();
|
||||
|
@ -74,6 +74,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = {
|
||||
{ Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY
|
||||
{ Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ
|
||||
#else // all other
|
||||
{ Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA.
|
||||
{ Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS
|
||||
{ Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD
|
||||
{ Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX
|
||||
@ -646,6 +647,10 @@ void Type::Initialize_shared(Compile* current) {
|
||||
// get_zero_type() should not happen for T_CONFLICT
|
||||
_zero_type[T_CONFLICT]= NULL;
|
||||
|
||||
if (Matcher::supports_scalable_vector()) {
|
||||
TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE));
|
||||
}
|
||||
|
||||
// Vector predefined types, it needs initialized _const_basic_type[].
|
||||
if (Matcher::vector_size_supported(T_BYTE,4)) {
|
||||
TypeVect::VECTS = TypeVect::make(T_BYTE,4);
|
||||
@ -662,6 +667,8 @@ void Type::Initialize_shared(Compile* current) {
|
||||
if (Matcher::vector_size_supported(T_FLOAT,16)) {
|
||||
TypeVect::VECTZ = TypeVect::make(T_FLOAT,16);
|
||||
}
|
||||
|
||||
mreg2type[Op_VecA] = TypeVect::VECTA;
|
||||
mreg2type[Op_VecS] = TypeVect::VECTS;
|
||||
mreg2type[Op_VecD] = TypeVect::VECTD;
|
||||
mreg2type[Op_VecX] = TypeVect::VECTX;
|
||||
@ -981,6 +988,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = {
|
||||
|
||||
Bad, // Tuple - handled in v-call
|
||||
Bad, // Array - handled in v-call
|
||||
Bad, // VectorA - handled in v-call
|
||||
Bad, // VectorS - handled in v-call
|
||||
Bad, // VectorD - handled in v-call
|
||||
Bad, // VectorX - handled in v-call
|
||||
@ -1881,7 +1889,6 @@ const TypeTuple *TypeTuple::LONG_PAIR;
|
||||
const TypeTuple *TypeTuple::INT_CC_PAIR;
|
||||
const TypeTuple *TypeTuple::LONG_CC_PAIR;
|
||||
|
||||
|
||||
//------------------------------make-------------------------------------------
|
||||
// Make a TypeTuple from the range of a method signature
|
||||
const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
|
||||
@ -2252,6 +2259,7 @@ bool TypeAry::ary_must_be_exact() const {
|
||||
|
||||
//==============================TypeVect=======================================
|
||||
// Convenience common pre-built types.
|
||||
const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic
|
||||
const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors
|
||||
const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors
|
||||
const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors
|
||||
@ -2262,10 +2270,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors
|
||||
const TypeVect* TypeVect::make(const Type *elem, uint length) {
|
||||
BasicType elem_bt = elem->array_element_basic_type();
|
||||
assert(is_java_primitive(elem_bt), "only primitive types in vector");
|
||||
assert(length > 1 && is_power_of_2(length), "vector length is power of 2");
|
||||
assert(Matcher::vector_size_supported(elem_bt, length), "length in range");
|
||||
int size = length * type2aelembytes(elem_bt);
|
||||
switch (Matcher::vector_ideal_reg(size)) {
|
||||
case Op_VecA:
|
||||
return (TypeVect*)(new TypeVectA(elem, length))->hashcons();
|
||||
case Op_VecS:
|
||||
return (TypeVect*)(new TypeVectS(elem, length))->hashcons();
|
||||
case Op_RegL:
|
||||
@ -2297,7 +2306,7 @@ const Type *TypeVect::xmeet( const Type *t ) const {
|
||||
|
||||
default: // All else is a mistake
|
||||
typerr(t);
|
||||
|
||||
case VectorA:
|
||||
case VectorS:
|
||||
case VectorD:
|
||||
case VectorX:
|
||||
@ -2352,6 +2361,8 @@ bool TypeVect::empty(void) const {
|
||||
#ifndef PRODUCT
|
||||
void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const {
|
||||
switch (base()) {
|
||||
case VectorA:
|
||||
st->print("vectora["); break;
|
||||
case VectorS:
|
||||
st->print("vectors["); break;
|
||||
case VectorD:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -53,6 +53,7 @@ class TypeNarrowKlass;
|
||||
class TypeAry;
|
||||
class TypeTuple;
|
||||
class TypeVect;
|
||||
class TypeVectA;
|
||||
class TypeVectS;
|
||||
class TypeVectD;
|
||||
class TypeVectX;
|
||||
@ -87,6 +88,7 @@ public:
|
||||
|
||||
Tuple, // Method signature or object layout
|
||||
Array, // Array types
|
||||
VectorA, // (Scalable) Vector types for vector length agnostic
|
||||
VectorS, // 32bit Vector types
|
||||
VectorD, // 64bit Vector types
|
||||
VectorX, // 128bit Vector types
|
||||
@ -757,6 +759,7 @@ public:
|
||||
virtual const Type *xmeet( const Type *t) const;
|
||||
virtual const Type *xdual() const; // Compute dual right now.
|
||||
|
||||
static const TypeVect *VECTA;
|
||||
static const TypeVect *VECTS;
|
||||
static const TypeVect *VECTD;
|
||||
static const TypeVect *VECTX;
|
||||
@ -768,6 +771,11 @@ public:
|
||||
#endif
|
||||
};
|
||||
|
||||
class TypeVectA : public TypeVect {
|
||||
friend class TypeVect;
|
||||
TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {}
|
||||
};
|
||||
|
||||
class TypeVectS : public TypeVect {
|
||||
friend class TypeVect;
|
||||
TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {}
|
||||
@ -1622,12 +1630,12 @@ inline const TypeAry *Type::isa_ary() const {
|
||||
}
|
||||
|
||||
inline const TypeVect *Type::is_vect() const {
|
||||
assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" );
|
||||
assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" );
|
||||
return (TypeVect*)this;
|
||||
}
|
||||
|
||||
inline const TypeVect *Type::isa_vect() const {
|
||||
return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
||||
return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL;
|
||||
}
|
||||
|
||||
inline const TypePtr *Type::is_ptr() const {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2007, 2017, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2007, 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -818,7 +818,7 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
|
||||
(vlen > 1) && is_power_of_2(vlen) &&
|
||||
Matcher::vector_size_supported(bt, vlen)) {
|
||||
int vopc = ReductionNode::opcode(opc, bt);
|
||||
return vopc != opc && Matcher::match_rule_supported(vopc);
|
||||
return vopc != opc && Matcher::match_rule_supported_vector(vopc, vlen, bt);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
128
test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java
Normal file
128
test/hotspot/jtreg/compiler/c2/aarch64/TestSVEWithJNI.java
Normal file
@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* @test
|
||||
*
|
||||
* @requires os.arch == "aarch64" & vm.compiler2.enabled
|
||||
* @summary Verify VM SVE checking behavior
|
||||
* @library /test/lib
|
||||
* @run main/othervm/native compiler.c2.aarch64.TestSVEWithJNI
|
||||
*
|
||||
*/
|
||||
|
||||
package compiler.c2.aarch64;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import jdk.test.lib.process.ProcessTools;
|
||||
import jdk.test.lib.process.OutputAnalyzer;
|
||||
|
||||
public class TestSVEWithJNI {
|
||||
static {
|
||||
System.loadLibrary("TestSVEWithJNI");
|
||||
}
|
||||
|
||||
static final int EXIT_CODE = 99;
|
||||
// Returns a nonnegative on success, or a negative value on error.
|
||||
public static native int setVectorLength(int arg);
|
||||
// Returns a nonnegative value on success, or a negative value on error.
|
||||
public static native int getVectorLength();
|
||||
|
||||
public static final String MSG = "Current Vector Size: ";
|
||||
public static void testNormal() {
|
||||
int vlen = getVectorLength();
|
||||
System.out.println(MSG + vlen);
|
||||
// Should be fine if no vector length changed.
|
||||
if (setVectorLength(vlen) < 0) {
|
||||
throw new Error("Error in setting vector length.");
|
||||
}
|
||||
}
|
||||
|
||||
public static void testAbort() {
|
||||
int vlen = getVectorLength();
|
||||
if (vlen <= 16) {
|
||||
throw new Error("Error: unsupported vector length.");
|
||||
}
|
||||
if (setVectorLength(16) < 0) {
|
||||
throw new Error("Error: setting vector length failed.");
|
||||
}
|
||||
}
|
||||
|
||||
public static ProcessBuilder createProcessBuilder(String [] args, String mode) {
|
||||
List<String> vmopts = new ArrayList<>();
|
||||
String testjdkPath = System.getProperty("test.jdk");
|
||||
Collections.addAll(vmopts, "-Dtest.jdk=" + testjdkPath);
|
||||
Collections.addAll(vmopts, args);
|
||||
Collections.addAll(vmopts, TestSVEWithJNI.class.getName(), mode);
|
||||
return ProcessTools.createJavaProcessBuilder(vmopts.toArray(new String[vmopts.size()]));
|
||||
}
|
||||
|
||||
public static void main(String [] args) throws Exception {
|
||||
if (args.length == 0) {
|
||||
int vlen = getVectorLength();
|
||||
if (vlen < 0) {
|
||||
return;
|
||||
}
|
||||
String [][] testOpts = {
|
||||
{"-Xint", "-XX:UseSVE=1"},
|
||||
{"-Xcomp", "-XX:UseSVE=1"},
|
||||
};
|
||||
ProcessBuilder pb;
|
||||
OutputAnalyzer output;
|
||||
for (String [] opts : testOpts) {
|
||||
pb = createProcessBuilder(opts, "normal");
|
||||
output = new OutputAnalyzer(pb.start());
|
||||
output.shouldHaveExitValue(EXIT_CODE);
|
||||
|
||||
pb = createProcessBuilder(opts, "abort");
|
||||
output = new OutputAnalyzer(pb.start());
|
||||
output.shouldNotHaveExitValue(EXIT_CODE);
|
||||
output.shouldMatch("(error|Error|ERROR)");
|
||||
}
|
||||
|
||||
// Verify MaxVectorSize
|
||||
|
||||
// Any SVE architecture should support 128-bit vector size.
|
||||
pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=16"}, "normal");
|
||||
output = new OutputAnalyzer(pb.start());
|
||||
output.shouldHaveExitValue(EXIT_CODE);
|
||||
output.shouldContain(MSG + 16);
|
||||
|
||||
// An unsupported large vector size value.
|
||||
pb = createProcessBuilder(new String []{"-XX:UseSVE=1", "-XX:MaxVectorSize=512"}, "normal");
|
||||
output = new OutputAnalyzer(pb.start());
|
||||
output.shouldHaveExitValue(EXIT_CODE);
|
||||
output.shouldContain("warning");
|
||||
} else if (args[0].equals("normal")) {
|
||||
testNormal();
|
||||
System.exit(EXIT_CODE);
|
||||
} else if (args[0].equals("abort")) {
|
||||
testAbort();
|
||||
System.exit(EXIT_CODE);
|
||||
}
|
||||
}
|
||||
}
|
68
test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c
Normal file
68
test/hotspot/jtreg/compiler/c2/aarch64/libTestSVEWithJNI.c
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2020, Arm Limited. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef __aarch64__
|
||||
|
||||
#include <jni.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#ifndef PR_SVE_GET_VL
|
||||
// For old toolchains which do not have SVE related macros defined.
|
||||
#define PR_SVE_SET_VL 50
|
||||
#define PR_SVE_GET_VL 51
|
||||
#endif
|
||||
|
||||
int get_current_thread_vl() {
|
||||
return prctl(PR_SVE_GET_VL);
|
||||
}
|
||||
|
||||
int set_current_thread_vl(unsigned long arg) {
|
||||
return prctl(PR_SVE_SET_VL, arg);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_setVectorLength
|
||||
(JNIEnv * env, jclass clz, jint length) {
|
||||
return set_current_thread_vl(length);
|
||||
}
|
||||
|
||||
JNIEXPORT jint JNICALL Java_compiler_c2_aarch64_TestSVEWithJNI_getVectorLength
|
||||
(JNIEnv *env, jclass clz) {
|
||||
return get_current_thread_vl();
|
||||
}
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user