From bbd8ae78200e4128d4eddf8694835956b5c5f142 Mon Sep 17 00:00:00 2001 From: Stuart Monteith Date: Mon, 13 Feb 2023 11:07:11 +0000 Subject: [PATCH] 8294194: [AArch64] Create intrinsics compress and expand Reviewed-by: xgong, adinn, haosun, aph --- src/hotspot/cpu/aarch64/aarch64.ad | 157 ++++++++++++++++++ src/hotspot/share/opto/constantTable.cpp | 3 +- .../intrinsics/TestBitShuffleOpers.java | 11 +- 3 files changed, 165 insertions(+), 6 deletions(-) diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index 79a7b016626..9d8339a9f27 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -2301,6 +2301,12 @@ const bool Matcher::match_rule_supported(int opcode) { ret_value = false; } break; + case Op_ExpandBits: + case Op_CompressBits: + if (!(UseSVE > 1 && VM_Version::supports_svebitperm())) { + ret_value = false; + } + break; } return ret_value; // Per default match rules are supported. @@ -17350,6 +17356,157 @@ instruct encode_ascii_array(iRegP_R2 src, iRegP_R1 dst, iRegI_R3 len, ins_pipe(pipe_class_memory); %} +//----------------------------- CompressBits/ExpandBits ------------------------ + +instruct compressBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, + vRegF tdst, vRegF tsrc, vRegF tmask) %{ + match(Set dst (CompressBits src mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "mov $tsrc, $src\n\t" + "mov $tmask, $mask\n\t" + "bext $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + __ mov($tsrc$$FloatRegister, __ S, 0, $src$$Register); + __ mov($tmask$$FloatRegister, __ S, 0, $mask$$Register); + __ sve_bext($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct compressBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask, + vRegF tdst, vRegF tsrc, vRegF tmask) %{ + match(Set dst (CompressBits (LoadI mem) mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "ldrs $tsrc, $mem\n\t" + "ldrs $tmask, $mask\n\t" + "bext $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); + __ ldrs($tmask$$FloatRegister, $constantaddress($mask)); + __ sve_bext($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct compressBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask, + vRegD tdst, vRegD tsrc, vRegD tmask) %{ + match(Set dst (CompressBits src mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "mov $tsrc, $src\n\t" + "mov $tmask, $mask\n\t" + "bext $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + __ mov($tsrc$$FloatRegister, __ D, 0, $src$$Register); + __ mov($tmask$$FloatRegister, __ D, 0, $mask$$Register); + __ sve_bext($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct compressBitsL_memcon(iRegLNoSp dst, memory8 mem, immL mask, + vRegF tdst, vRegF tsrc, vRegF tmask) %{ + match(Set dst (CompressBits (LoadL mem) mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "ldrd $tsrc, $mem\n\t" + "ldrd $tmask, $mask\n\t" + "bext $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8); + __ ldrd($tmask$$FloatRegister, $constantaddress($mask)); + __ sve_bext($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct expandBitsI_reg(iRegINoSp dst, iRegIorL2I src, iRegIorL2I mask, + vRegF tdst, vRegF tsrc, vRegF tmask) %{ + match(Set dst (ExpandBits src mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "mov $tsrc, $src\n\t" + "mov $tmask, $mask\n\t" + "bdep $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + __ mov($tsrc$$FloatRegister, __ S, 0, $src$$Register); + __ mov($tmask$$FloatRegister, __ S, 0, $mask$$Register); + __ sve_bdep($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct expandBitsI_memcon(iRegINoSp dst, memory4 mem, immI mask, + vRegF tdst, vRegF tsrc, vRegF tmask) %{ + match(Set dst (ExpandBits (LoadI mem) mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "ldrs $tsrc, $mem\n\t" + "ldrs $tmask, $mask\n\t" + "bdep $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, $tsrc$$FloatRegister, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4); + __ ldrs($tmask$$FloatRegister, $constantaddress($mask)); + __ sve_bdep($tdst$$FloatRegister, __ S, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ S, 0); + %} + ins_pipe(pipe_slow); +%} + +instruct expandBitsL_reg(iRegLNoSp dst, iRegL src, iRegL mask, + vRegD tdst, vRegD tsrc, vRegD tmask) %{ + match(Set dst (ExpandBits src mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "mov $tsrc, $src\n\t" + "mov $tmask, $mask\n\t" + "bdep $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + __ mov($tsrc$$FloatRegister, __ D, 0, $src$$Register); + __ mov($tmask$$FloatRegister, __ D, 0, $mask$$Register); + __ sve_bdep($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0); + %} + ins_pipe(pipe_slow); +%} + + +instruct expandBitsL_memcon(iRegINoSp dst, memory8 mem, immL mask, + vRegF tdst, vRegF tsrc, vRegF tmask) %{ + match(Set dst (ExpandBits (LoadL mem) mask)); + effect(TEMP tdst, TEMP tsrc, TEMP tmask); + format %{ "ldrd $tsrc, $mem\n\t" + "ldrd $tmask, $mask\n\t" + "bdep $tdst, $tsrc, $tmask\n\t" + "mov $dst, $tdst" + %} + ins_encode %{ + loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, $tsrc$$FloatRegister, $mem->opcode(), + as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8); + __ ldrd($tmask$$FloatRegister, $constantaddress($mask)); + __ sve_bdep($tdst$$FloatRegister, __ D, $tsrc$$FloatRegister, $tmask$$FloatRegister); + __ mov($dst$$Register, $tdst$$FloatRegister, __ D, 0); + %} + ins_pipe(pipe_slow); +%} + // ============================================================================ // This name is KNOWN by the ADLC and cannot be changed. // The ADLC forces a 'TypeRawPtr::BOTTOM' output type diff --git a/src/hotspot/share/opto/constantTable.cpp b/src/hotspot/share/opto/constantTable.cpp index 91452408c2b..d5b20c3334a 100644 --- a/src/hotspot/share/opto/constantTable.cpp +++ b/src/hotspot/share/opto/constantTable.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -269,6 +269,7 @@ ConstantTable::Constant ConstantTable::add(MachConstantNode* n, MachOper* oper) BasicType type = oper->type()->basic_type(); switch (type) { case T_LONG: value.j = oper->constantL(); break; + case T_INT: value.i = oper->constant(); break; case T_FLOAT: value.f = oper->constantF(); break; case T_DOUBLE: value.d = oper->constantD(); break; case T_OBJECT: diff --git a/test/hotspot/jtreg/compiler/intrinsics/TestBitShuffleOpers.java b/test/hotspot/jtreg/compiler/intrinsics/TestBitShuffleOpers.java index f142bfaf775..46469777771 100644 --- a/test/hotspot/jtreg/compiler/intrinsics/TestBitShuffleOpers.java +++ b/test/hotspot/jtreg/compiler/intrinsics/TestBitShuffleOpers.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2022, 2023, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -27,13 +27,14 @@ * @key randomness * @summary To test various transforms added for bit COMPRESS_BITS and EXPAND_BITS operations * @requires vm.compiler2.enabled - * @requires vm.cpu.features ~= ".*bmi2.*" - * @requires vm.cpu.features ~= ".*bmi1.*" - * @requires vm.cpu.features ~= ".*sse2.*" + * @requires (((os.arch=="x86" | os.arch=="amd64" | os.arch=="x86_64") & + * (vm.cpu.features ~= ".*bmi2.*" & vm.cpu.features ~= ".*bmi1.*" & + * vm.cpu.features ~= ".*sse2.*")) | + * ((vm.opt.UseSVE == "null" | vm.opt.UseSVE > 1) & + * os.arch=="aarch64" & vm.cpu.features ~= ".*svebitperm.*")) * @library /test/lib / * @run driver compiler.intrinsics.TestBitShuffleOpers */ - package compiler.intrinsics; import java.util.concurrent.Callable;