8294194: [AArch64] Create intrinsics compress and expand

Reviewed-by: xgong, adinn, haosun, aph
This commit is contained in:
Stuart Monteith 2023-02-13 11:07:11 +00:00 committed by Andrew Dinn
parent 4e327db1d1
commit bbd8ae7820
3 changed files with 165 additions and 6 deletions

View File

@ -2301,6 +2301,12 @@ const bool Matcher::match_rule_supported(int opcode) {
ret_value = false;
}
break;
case Op_ExpandBits:
case Op_CompressBits:
if (!(UseSVE > 1 && VM_Version::supports_svebitperm())) {
ret_value = false;
}
break;
}
return ret_value; // Per default match rules are supported.
@ -17350,6 +17356,157 @@ instruct encode_ascii_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len,
ins_pipe(pipe_class_memory);
%}
//----------------------------- CompressBits/ExpandBits ------------------------
instruct compressBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (CompressBits src mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "mov $tsrc, $src\n\t"
"mov $tmask, $mask\n\t"
"bext $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
__ mov($tsrc$$FloatRegister, __ S, 0, $src$$Register);
__ mov($tmask$$FloatRegister, __ S, 0, $mask$$Register);
__ sve_bext($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0);
%}
ins_pipe(pipe_slow);
%}
instruct compressBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (CompressBits (LoadI mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "ldrs $tsrc, $mem\n\t"
"ldrs $tmask, $mask\n\t"
"bext $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
__ ldrs($tmask$$FloatRegister, $constantaddress($mask));
__ sve_bext($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0);
%}
ins_pipe(pipe_slow);
%}
instruct compressBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask,
vRegD tdst, vRegD tsrc, vRegD tmask) %{
match(Set dst (CompressBits src mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "mov $tsrc, $src\n\t"
"mov $tmask, $mask\n\t"
"bext $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
__ mov($tsrc$$FloatRegister, __ D, 0, $src$$Register);
__ mov($tmask$$FloatRegister, __ D, 0, $mask$$Register);
__ sve_bext($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0);
%}
ins_pipe(pipe_slow);
%}
instruct compressBitsL_memcon(iRegLNoSp dst, memory8 mem, immL mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (CompressBits (LoadL mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "ldrd $tsrc, $mem\n\t"
"ldrd $tmask, $mask\n\t"
"bext $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
__ ldrd($tmask$$FloatRegister, $constantaddress($mask));
__ sve_bext($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0);
%}
ins_pipe(pipe_slow);
%}
instruct expandBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (ExpandBits src mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "mov $tsrc, $src\n\t"
"mov $tmask, $mask\n\t"
"bdep $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
__ mov($tsrc$$FloatRegister, __ S, 0, $src$$Register);
__ mov($tmask$$FloatRegister, __ S, 0, $mask$$Register);
__ sve_bdep($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0);
%}
ins_pipe(pipe_slow);
%}
instruct expandBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (ExpandBits (LoadI mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "ldrs $tsrc, $mem\n\t"
"ldrs $tmask, $mask\n\t"
"bdep $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
__ ldrs($tmask$$FloatRegister, $constantaddress($mask));
__ sve_bdep($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0);
%}
ins_pipe(pipe_slow);
%}
instruct expandBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask,
vRegD tdst, vRegD tsrc, vRegD tmask) %{
match(Set dst (ExpandBits src mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "mov $tsrc, $src\n\t"
"mov $tmask, $mask\n\t"
"bdep $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
__ mov($tsrc$$FloatRegister, __ D, 0, $src$$Register);
__ mov($tmask$$FloatRegister, __ D, 0, $mask$$Register);
__ sve_bdep($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0);
%}
ins_pipe(pipe_slow);
%}
instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask,
vRegF tdst, vRegF tsrc, vRegF tmask) %{
match(Set dst (ExpandBits (LoadL mem) mask));
effect(TEMP tdst, TEMP tsrc, TEMP tmask);
format %{ "ldrd $tsrc, $mem\n\t"
"ldrd $tmask, $mask\n\t"
"bdep $tdst, $tsrc, $tmask\n\t"
"mov $dst, $tdst"
%}
ins_encode %{
loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(),
as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
__ ldrd($tmask$$FloatRegister, $constantaddress($mask));
__ sve_bdep($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister);
__ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0);
%}
ins_pipe(pipe_slow);
%}
// ============================================================================
// This name is KNOWN by the ADLC and cannot be changed.
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -269,6 +269,7 @@ ConstantTable::Constant ConstantTable::add(MachConstantNode* n, MachOper* oper)
BasicType type = oper->type()->basic_type();
switch (type) {
case T_LONG: value.j = oper->constantL(); break;
case T_INT: value.i = oper->constant(); break;
case T_FLOAT: value.f = oper->constantF(); break;
case T_DOUBLE: value.d = oper->constantD(); break;
case T_OBJECT:

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -27,13 +27,14 @@
* @key randomness
* @summary To test various transforms added for bit COMPRESS_BITS and EXPAND_BITS operations
* @requires vm.compiler2.enabled
* @requires vm.cpu.features ~= ".*bmi2.*"
* @requires vm.cpu.features ~= ".*bmi1.*"
* @requires vm.cpu.features ~= ".*sse2.*"
* @requires (((os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64") &
* (vm.cpu.features ~= ".*bmi2.*" & vm.cpu.features ~= ".*bmi1.*" &
* vm.cpu.features ~= ".*sse2.*")) |
* ((vm.opt.UseSVE == "null" | vm.opt.UseSVE > 1) &
* os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*"))
* @library /test/lib /
* @run driver compiler.intrinsics.TestBitShuffleOpers
*/
package compiler.intrinsics;
import java.util.concurrent.Callable;