5365 lines
200 KiB
Plaintext
5365 lines
200 KiB
Plaintext
//
|
|
// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
|
|
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
//
|
|
// This code is free software; you can redistribute it and/or modify it
|
|
// under the terms of the GNU General Public License version 2 only, as
|
|
// published by the Free Software Foundation.
|
|
//
|
|
// This code is distributed in the hope that it will be useful, but WITHOUT
|
|
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
// version 2 for more details (a copy is included in the LICENSE file that
|
|
// accompanied this code).
|
|
//
|
|
// You should have received a copy of the GNU General Public License version
|
|
// 2 along with this work; if not, write to the Free Software Foundation,
|
|
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
//
|
|
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
// or visit www.oracle.com if you need additional information or have any
|
|
// questions.
|
|
//
|
|
//
|
|
|
|
// X86 Common Architecture Description File
|
|
|
|
//----------REGISTER DEFINITION BLOCK------------------------------------------
|
|
// This information is used by the matcher and the register allocator to
|
|
// describe individual registers and classes of registers within the target
|
|
// archtecture.
|
|
|
|
register %{
|
|
//----------Architecture Description Register Definitions----------------------
|
|
// General Registers
|
|
// "reg_def" name ( register save type, C convention save type,
|
|
// ideal register type, encoding );
|
|
// Register Save Types:
|
|
//
|
|
// NS = No-Save: The register allocator assumes that these registers
|
|
// can be used without saving upon entry to the method, &
|
|
// that they do not need to be saved at call sites.
|
|
//
|
|
// SOC = Save-On-Call: The register allocator assumes that these registers
|
|
// can be used without saving upon entry to the method,
|
|
// but that they must be saved at call sites.
|
|
//
|
|
// SOE = Save-On-Entry: The register allocator assumes that these registers
|
|
// must be saved before using them upon entry to the
|
|
// method, but they do not need to be saved at call
|
|
// sites.
|
|
//
|
|
// AS = Always-Save: The register allocator assumes that these registers
|
|
// must be saved before using them upon entry to the
|
|
// method, & that they must be saved at call sites.
|
|
//
|
|
// Ideal Register Type is used to determine how to save & restore a
|
|
// register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
|
|
// spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
|
|
//
|
|
// The encoding number is the actual bit-pattern placed into the opcodes.
|
|
|
|
// XMM registers. 512-bit registers or 8 words each, labeled (a)-p.
|
|
// Word a in each register holds a Float, words ab hold a Double.
|
|
// The whole registers are used in SSE4.2 version intrinsics,
|
|
// array copy stubs and superword operations (see UseSSE42Intrinsics,
|
|
// UseXMMForArrayCopy and UseSuperword flags).
|
|
// For pre EVEX enabled architectures:
|
|
// XMM8-XMM15 must be encoded with REX (VEX for UseAVX)
|
|
// For EVEX enabled architectures:
|
|
// XMM8-XMM31 must be encoded with REX (EVEX for UseAVX).
|
|
//
|
|
// Linux ABI: No register preserved across function calls
|
|
// XMM0-XMM7 might hold parameters
|
|
// Windows ABI: XMM6-XMM31 preserved across function calls
|
|
// XMM0-XMM3 might hold parameters
|
|
|
|
reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
|
|
reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(1));
|
|
reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(2));
|
|
reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(3));
|
|
reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(4));
|
|
reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(5));
|
|
reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(6));
|
|
reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(7));
|
|
reg_def XMM0i( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(8));
|
|
reg_def XMM0j( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(9));
|
|
reg_def XMM0k( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(10));
|
|
reg_def XMM0l( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(11));
|
|
reg_def XMM0m( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(12));
|
|
reg_def XMM0n( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(13));
|
|
reg_def XMM0o( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(14));
|
|
reg_def XMM0p( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next(15));
|
|
|
|
reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
|
|
reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(1));
|
|
reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(2));
|
|
reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(3));
|
|
reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(4));
|
|
reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(5));
|
|
reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(6));
|
|
reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(7));
|
|
reg_def XMM1i( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(8));
|
|
reg_def XMM1j( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(9));
|
|
reg_def XMM1k( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(10));
|
|
reg_def XMM1l( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(11));
|
|
reg_def XMM1m( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(12));
|
|
reg_def XMM1n( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(13));
|
|
reg_def XMM1o( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(14));
|
|
reg_def XMM1p( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next(15));
|
|
|
|
reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
|
|
reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(1));
|
|
reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(2));
|
|
reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(3));
|
|
reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(4));
|
|
reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(5));
|
|
reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(6));
|
|
reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(7));
|
|
reg_def XMM2i( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(8));
|
|
reg_def XMM2j( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(9));
|
|
reg_def XMM2k( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(10));
|
|
reg_def XMM2l( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(11));
|
|
reg_def XMM2m( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(12));
|
|
reg_def XMM2n( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(13));
|
|
reg_def XMM2o( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(14));
|
|
reg_def XMM2p( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next(15));
|
|
|
|
reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
|
|
reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(1));
|
|
reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(2));
|
|
reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(3));
|
|
reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(4));
|
|
reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(5));
|
|
reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(6));
|
|
reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(7));
|
|
reg_def XMM3i( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(8));
|
|
reg_def XMM3j( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(9));
|
|
reg_def XMM3k( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(10));
|
|
reg_def XMM3l( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(11));
|
|
reg_def XMM3m( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(12));
|
|
reg_def XMM3n( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(13));
|
|
reg_def XMM3o( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(14));
|
|
reg_def XMM3p( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next(15));
|
|
|
|
reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
|
|
reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(1));
|
|
reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(2));
|
|
reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(3));
|
|
reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(4));
|
|
reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(5));
|
|
reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(6));
|
|
reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(7));
|
|
reg_def XMM4i( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(8));
|
|
reg_def XMM4j( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(9));
|
|
reg_def XMM4k( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(10));
|
|
reg_def XMM4l( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(11));
|
|
reg_def XMM4m( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(12));
|
|
reg_def XMM4n( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(13));
|
|
reg_def XMM4o( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(14));
|
|
reg_def XMM4p( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next(15));
|
|
|
|
reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
|
|
reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(1));
|
|
reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(2));
|
|
reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(3));
|
|
reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(4));
|
|
reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(5));
|
|
reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(6));
|
|
reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(7));
|
|
reg_def XMM5i( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(8));
|
|
reg_def XMM5j( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(9));
|
|
reg_def XMM5k( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(10));
|
|
reg_def XMM5l( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(11));
|
|
reg_def XMM5m( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(12));
|
|
reg_def XMM5n( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(13));
|
|
reg_def XMM5o( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(14));
|
|
reg_def XMM5p( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next(15));
|
|
|
|
reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
|
|
reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(1));
|
|
reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(2));
|
|
reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(3));
|
|
reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(4));
|
|
reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(5));
|
|
reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(6));
|
|
reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(7));
|
|
reg_def XMM6i( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(8));
|
|
reg_def XMM6j( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(9));
|
|
reg_def XMM6k( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(10));
|
|
reg_def XMM6l( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(11));
|
|
reg_def XMM6m( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(12));
|
|
reg_def XMM6n( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(13));
|
|
reg_def XMM6o( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(14));
|
|
reg_def XMM6p( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next(15));
|
|
|
|
reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
|
|
reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(1));
|
|
reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(2));
|
|
reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(3));
|
|
reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(4));
|
|
reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(5));
|
|
reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(6));
|
|
reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(7));
|
|
reg_def XMM7i( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(8));
|
|
reg_def XMM7j( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(9));
|
|
reg_def XMM7k( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(10));
|
|
reg_def XMM7l( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(11));
|
|
reg_def XMM7m( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(12));
|
|
reg_def XMM7n( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(13));
|
|
reg_def XMM7o( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(14));
|
|
reg_def XMM7p( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next(15));
|
|
|
|
#ifdef _LP64
|
|
|
|
reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
|
|
reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(1));
|
|
reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(2));
|
|
reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(3));
|
|
reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(4));
|
|
reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(5));
|
|
reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(6));
|
|
reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(7));
|
|
reg_def XMM8i( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(8));
|
|
reg_def XMM8j( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(9));
|
|
reg_def XMM8k( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(10));
|
|
reg_def XMM8l( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(11));
|
|
reg_def XMM8m( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(12));
|
|
reg_def XMM8n( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(13));
|
|
reg_def XMM8o( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(14));
|
|
reg_def XMM8p( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next(15));
|
|
|
|
reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
|
|
reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(1));
|
|
reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(2));
|
|
reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(3));
|
|
reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(4));
|
|
reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(5));
|
|
reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(6));
|
|
reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(7));
|
|
reg_def XMM9i( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(8));
|
|
reg_def XMM9j( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(9));
|
|
reg_def XMM9k( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(10));
|
|
reg_def XMM9l( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(11));
|
|
reg_def XMM9m( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(12));
|
|
reg_def XMM9n( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(13));
|
|
reg_def XMM9o( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(14));
|
|
reg_def XMM9p( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next(15));
|
|
|
|
reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
|
|
reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(1));
|
|
reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(2));
|
|
reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(3));
|
|
reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(4));
|
|
reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(5));
|
|
reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(6));
|
|
reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(7));
|
|
reg_def XMM10i( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(8));
|
|
reg_def XMM10j( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(9));
|
|
reg_def XMM10k( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(10));
|
|
reg_def XMM10l( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(11));
|
|
reg_def XMM10m( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(12));
|
|
reg_def XMM10n( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(13));
|
|
reg_def XMM10o( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(14));
|
|
reg_def XMM10p( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next(15));
|
|
|
|
reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
|
|
reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(1));
|
|
reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(2));
|
|
reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(3));
|
|
reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(4));
|
|
reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(5));
|
|
reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(6));
|
|
reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(7));
|
|
reg_def XMM11i( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(8));
|
|
reg_def XMM11j( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(9));
|
|
reg_def XMM11k( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(10));
|
|
reg_def XMM11l( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(11));
|
|
reg_def XMM11m( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(12));
|
|
reg_def XMM11n( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(13));
|
|
reg_def XMM11o( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(14));
|
|
reg_def XMM11p( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next(15));
|
|
|
|
reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
|
|
reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(1));
|
|
reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(2));
|
|
reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(3));
|
|
reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(4));
|
|
reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(5));
|
|
reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(6));
|
|
reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(7));
|
|
reg_def XMM12i( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(8));
|
|
reg_def XMM12j( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(9));
|
|
reg_def XMM12k( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(10));
|
|
reg_def XMM12l( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(11));
|
|
reg_def XMM12m( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(12));
|
|
reg_def XMM12n( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(13));
|
|
reg_def XMM12o( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(14));
|
|
reg_def XMM12p( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next(15));
|
|
|
|
reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
|
|
reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(1));
|
|
reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(2));
|
|
reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(3));
|
|
reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(4));
|
|
reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(5));
|
|
reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(6));
|
|
reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(7));
|
|
reg_def XMM13i( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(8));
|
|
reg_def XMM13j( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(9));
|
|
reg_def XMM13k( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(10));
|
|
reg_def XMM13l( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(11));
|
|
reg_def XMM13m( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(12));
|
|
reg_def XMM13n( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(13));
|
|
reg_def XMM13o( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(14));
|
|
reg_def XMM13p( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next(15));
|
|
|
|
reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
|
|
reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(1));
|
|
reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(2));
|
|
reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(3));
|
|
reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(4));
|
|
reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(5));
|
|
reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(6));
|
|
reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(7));
|
|
reg_def XMM14i( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(8));
|
|
reg_def XMM14j( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(9));
|
|
reg_def XMM14k( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(10));
|
|
reg_def XMM14l( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(11));
|
|
reg_def XMM14m( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(12));
|
|
reg_def XMM14n( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(13));
|
|
reg_def XMM14o( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(14));
|
|
reg_def XMM14p( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next(15));
|
|
|
|
reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
|
|
reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(1));
|
|
reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(2));
|
|
reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(3));
|
|
reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(4));
|
|
reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(5));
|
|
reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(6));
|
|
reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(7));
|
|
reg_def XMM15i( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(8));
|
|
reg_def XMM15j( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(9));
|
|
reg_def XMM15k( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(10));
|
|
reg_def XMM15l( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(11));
|
|
reg_def XMM15m( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(12));
|
|
reg_def XMM15n( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(13));
|
|
reg_def XMM15o( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(14));
|
|
reg_def XMM15p( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next(15));
|
|
|
|
reg_def XMM16 ( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg());
|
|
reg_def XMM16b( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(1));
|
|
reg_def XMM16c( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(2));
|
|
reg_def XMM16d( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(3));
|
|
reg_def XMM16e( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(4));
|
|
reg_def XMM16f( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(5));
|
|
reg_def XMM16g( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(6));
|
|
reg_def XMM16h( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(7));
|
|
reg_def XMM16i( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(8));
|
|
reg_def XMM16j( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(9));
|
|
reg_def XMM16k( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(10));
|
|
reg_def XMM16l( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(11));
|
|
reg_def XMM16m( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(12));
|
|
reg_def XMM16n( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(13));
|
|
reg_def XMM16o( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(14));
|
|
reg_def XMM16p( SOC, SOC, Op_RegF, 16, xmm16->as_VMReg()->next(15));
|
|
|
|
reg_def XMM17 ( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg());
|
|
reg_def XMM17b( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(1));
|
|
reg_def XMM17c( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(2));
|
|
reg_def XMM17d( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(3));
|
|
reg_def XMM17e( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(4));
|
|
reg_def XMM17f( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(5));
|
|
reg_def XMM17g( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(6));
|
|
reg_def XMM17h( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(7));
|
|
reg_def XMM17i( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(8));
|
|
reg_def XMM17j( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(9));
|
|
reg_def XMM17k( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(10));
|
|
reg_def XMM17l( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(11));
|
|
reg_def XMM17m( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(12));
|
|
reg_def XMM17n( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(13));
|
|
reg_def XMM17o( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(14));
|
|
reg_def XMM17p( SOC, SOC, Op_RegF, 17, xmm17->as_VMReg()->next(15));
|
|
|
|
reg_def XMM18 ( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg());
|
|
reg_def XMM18b( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(1));
|
|
reg_def XMM18c( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(2));
|
|
reg_def XMM18d( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(3));
|
|
reg_def XMM18e( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(4));
|
|
reg_def XMM18f( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(5));
|
|
reg_def XMM18g( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(6));
|
|
reg_def XMM18h( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(7));
|
|
reg_def XMM18i( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(8));
|
|
reg_def XMM18j( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(9));
|
|
reg_def XMM18k( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(10));
|
|
reg_def XMM18l( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(11));
|
|
reg_def XMM18m( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(12));
|
|
reg_def XMM18n( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(13));
|
|
reg_def XMM18o( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(14));
|
|
reg_def XMM18p( SOC, SOC, Op_RegF, 18, xmm18->as_VMReg()->next(15));
|
|
|
|
reg_def XMM19 ( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg());
|
|
reg_def XMM19b( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(1));
|
|
reg_def XMM19c( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(2));
|
|
reg_def XMM19d( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(3));
|
|
reg_def XMM19e( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(4));
|
|
reg_def XMM19f( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(5));
|
|
reg_def XMM19g( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(6));
|
|
reg_def XMM19h( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(7));
|
|
reg_def XMM19i( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(8));
|
|
reg_def XMM19j( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(9));
|
|
reg_def XMM19k( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(10));
|
|
reg_def XMM19l( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(11));
|
|
reg_def XMM19m( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(12));
|
|
reg_def XMM19n( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(13));
|
|
reg_def XMM19o( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(14));
|
|
reg_def XMM19p( SOC, SOC, Op_RegF, 19, xmm19->as_VMReg()->next(15));
|
|
|
|
reg_def XMM20 ( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg());
|
|
reg_def XMM20b( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(1));
|
|
reg_def XMM20c( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(2));
|
|
reg_def XMM20d( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(3));
|
|
reg_def XMM20e( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(4));
|
|
reg_def XMM20f( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(5));
|
|
reg_def XMM20g( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(6));
|
|
reg_def XMM20h( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(7));
|
|
reg_def XMM20i( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(8));
|
|
reg_def XMM20j( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(9));
|
|
reg_def XMM20k( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(10));
|
|
reg_def XMM20l( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(11));
|
|
reg_def XMM20m( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(12));
|
|
reg_def XMM20n( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(13));
|
|
reg_def XMM20o( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(14));
|
|
reg_def XMM20p( SOC, SOC, Op_RegF, 20, xmm20->as_VMReg()->next(15));
|
|
|
|
reg_def XMM21 ( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg());
|
|
reg_def XMM21b( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(1));
|
|
reg_def XMM21c( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(2));
|
|
reg_def XMM21d( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(3));
|
|
reg_def XMM21e( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(4));
|
|
reg_def XMM21f( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(5));
|
|
reg_def XMM21g( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(6));
|
|
reg_def XMM21h( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(7));
|
|
reg_def XMM21i( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(8));
|
|
reg_def XMM21j( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(9));
|
|
reg_def XMM21k( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(10));
|
|
reg_def XMM21l( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(11));
|
|
reg_def XMM21m( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(12));
|
|
reg_def XMM21n( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(13));
|
|
reg_def XMM21o( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(14));
|
|
reg_def XMM21p( SOC, SOC, Op_RegF, 21, xmm21->as_VMReg()->next(15));
|
|
|
|
reg_def XMM22 ( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg());
|
|
reg_def XMM22b( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(1));
|
|
reg_def XMM22c( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(2));
|
|
reg_def XMM22d( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(3));
|
|
reg_def XMM22e( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(4));
|
|
reg_def XMM22f( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(5));
|
|
reg_def XMM22g( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(6));
|
|
reg_def XMM22h( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(7));
|
|
reg_def XMM22i( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(8));
|
|
reg_def XMM22j( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(9));
|
|
reg_def XMM22k( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(10));
|
|
reg_def XMM22l( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(11));
|
|
reg_def XMM22m( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(12));
|
|
reg_def XMM22n( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(13));
|
|
reg_def XMM22o( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(14));
|
|
reg_def XMM22p( SOC, SOC, Op_RegF, 22, xmm22->as_VMReg()->next(15));
|
|
|
|
reg_def XMM23 ( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg());
|
|
reg_def XMM23b( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(1));
|
|
reg_def XMM23c( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(2));
|
|
reg_def XMM23d( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(3));
|
|
reg_def XMM23e( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(4));
|
|
reg_def XMM23f( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(5));
|
|
reg_def XMM23g( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(6));
|
|
reg_def XMM23h( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(7));
|
|
reg_def XMM23i( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(8));
|
|
reg_def XMM23j( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(9));
|
|
reg_def XMM23k( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(10));
|
|
reg_def XMM23l( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(11));
|
|
reg_def XMM23m( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(12));
|
|
reg_def XMM23n( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(13));
|
|
reg_def XMM23o( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(14));
|
|
reg_def XMM23p( SOC, SOC, Op_RegF, 23, xmm23->as_VMReg()->next(15));
|
|
|
|
reg_def XMM24 ( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg());
|
|
reg_def XMM24b( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(1));
|
|
reg_def XMM24c( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(2));
|
|
reg_def XMM24d( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(3));
|
|
reg_def XMM24e( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(4));
|
|
reg_def XMM24f( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(5));
|
|
reg_def XMM24g( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(6));
|
|
reg_def XMM24h( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(7));
|
|
reg_def XMM24i( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(8));
|
|
reg_def XMM24j( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(9));
|
|
reg_def XMM24k( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(10));
|
|
reg_def XMM24l( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(11));
|
|
reg_def XMM24m( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(12));
|
|
reg_def XMM24n( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(13));
|
|
reg_def XMM24o( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(14));
|
|
reg_def XMM24p( SOC, SOC, Op_RegF, 24, xmm24->as_VMReg()->next(15));
|
|
|
|
reg_def XMM25 ( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg());
|
|
reg_def XMM25b( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(1));
|
|
reg_def XMM25c( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(2));
|
|
reg_def XMM25d( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(3));
|
|
reg_def XMM25e( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(4));
|
|
reg_def XMM25f( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(5));
|
|
reg_def XMM25g( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(6));
|
|
reg_def XMM25h( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(7));
|
|
reg_def XMM25i( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(8));
|
|
reg_def XMM25j( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(9));
|
|
reg_def XMM25k( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(10));
|
|
reg_def XMM25l( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(11));
|
|
reg_def XMM25m( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(12));
|
|
reg_def XMM25n( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(13));
|
|
reg_def XMM25o( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(14));
|
|
reg_def XMM25p( SOC, SOC, Op_RegF, 25, xmm25->as_VMReg()->next(15));
|
|
|
|
reg_def XMM26 ( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg());
|
|
reg_def XMM26b( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(1));
|
|
reg_def XMM26c( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(2));
|
|
reg_def XMM26d( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(3));
|
|
reg_def XMM26e( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(4));
|
|
reg_def XMM26f( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(5));
|
|
reg_def XMM26g( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(6));
|
|
reg_def XMM26h( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(7));
|
|
reg_def XMM26i( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(8));
|
|
reg_def XMM26j( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(9));
|
|
reg_def XMM26k( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(10));
|
|
reg_def XMM26l( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(11));
|
|
reg_def XMM26m( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(12));
|
|
reg_def XMM26n( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(13));
|
|
reg_def XMM26o( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(14));
|
|
reg_def XMM26p( SOC, SOC, Op_RegF, 26, xmm26->as_VMReg()->next(15));
|
|
|
|
reg_def XMM27 ( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg());
|
|
reg_def XMM27b( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(1));
|
|
reg_def XMM27c( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(2));
|
|
reg_def XMM27d( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(3));
|
|
reg_def XMM27e( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(4));
|
|
reg_def XMM27f( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(5));
|
|
reg_def XMM27g( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(6));
|
|
reg_def XMM27h( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(7));
|
|
reg_def XMM27i( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(8));
|
|
reg_def XMM27j( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(9));
|
|
reg_def XMM27k( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(10));
|
|
reg_def XMM27l( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(11));
|
|
reg_def XMM27m( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(12));
|
|
reg_def XMM27n( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(13));
|
|
reg_def XMM27o( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(14));
|
|
reg_def XMM27p( SOC, SOC, Op_RegF, 27, xmm27->as_VMReg()->next(15));
|
|
|
|
reg_def XMM28 ( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg());
|
|
reg_def XMM28b( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(1));
|
|
reg_def XMM28c( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(2));
|
|
reg_def XMM28d( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(3));
|
|
reg_def XMM28e( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(4));
|
|
reg_def XMM28f( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(5));
|
|
reg_def XMM28g( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(6));
|
|
reg_def XMM28h( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(7));
|
|
reg_def XMM28i( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(8));
|
|
reg_def XMM28j( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(9));
|
|
reg_def XMM28k( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(10));
|
|
reg_def XMM28l( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(11));
|
|
reg_def XMM28m( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(12));
|
|
reg_def XMM28n( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(13));
|
|
reg_def XMM28o( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(14));
|
|
reg_def XMM28p( SOC, SOC, Op_RegF, 28, xmm28->as_VMReg()->next(15));
|
|
|
|
reg_def XMM29 ( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg());
|
|
reg_def XMM29b( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(1));
|
|
reg_def XMM29c( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(2));
|
|
reg_def XMM29d( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(3));
|
|
reg_def XMM29e( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(4));
|
|
reg_def XMM29f( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(5));
|
|
reg_def XMM29g( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(6));
|
|
reg_def XMM29h( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(7));
|
|
reg_def XMM29i( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(8));
|
|
reg_def XMM29j( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(9));
|
|
reg_def XMM29k( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(10));
|
|
reg_def XMM29l( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(11));
|
|
reg_def XMM29m( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(12));
|
|
reg_def XMM29n( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(13));
|
|
reg_def XMM29o( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(14));
|
|
reg_def XMM29p( SOC, SOC, Op_RegF, 29, xmm29->as_VMReg()->next(15));
|
|
|
|
reg_def XMM30 ( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg());
|
|
reg_def XMM30b( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(1));
|
|
reg_def XMM30c( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(2));
|
|
reg_def XMM30d( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(3));
|
|
reg_def XMM30e( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(4));
|
|
reg_def XMM30f( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(5));
|
|
reg_def XMM30g( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(6));
|
|
reg_def XMM30h( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(7));
|
|
reg_def XMM30i( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(8));
|
|
reg_def XMM30j( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(9));
|
|
reg_def XMM30k( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(10));
|
|
reg_def XMM30l( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(11));
|
|
reg_def XMM30m( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(12));
|
|
reg_def XMM30n( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(13));
|
|
reg_def XMM30o( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(14));
|
|
reg_def XMM30p( SOC, SOC, Op_RegF, 30, xmm30->as_VMReg()->next(15));
|
|
|
|
reg_def XMM31 ( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg());
|
|
reg_def XMM31b( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(1));
|
|
reg_def XMM31c( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(2));
|
|
reg_def XMM31d( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(3));
|
|
reg_def XMM31e( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(4));
|
|
reg_def XMM31f( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(5));
|
|
reg_def XMM31g( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(6));
|
|
reg_def XMM31h( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(7));
|
|
reg_def XMM31i( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(8));
|
|
reg_def XMM31j( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(9));
|
|
reg_def XMM31k( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(10));
|
|
reg_def XMM31l( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(11));
|
|
reg_def XMM31m( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(12));
|
|
reg_def XMM31n( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(13));
|
|
reg_def XMM31o( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(14));
|
|
reg_def XMM31p( SOC, SOC, Op_RegF, 31, xmm31->as_VMReg()->next(15));
|
|
|
|
#endif // _LP64
|
|
|
|
#ifdef _LP64
|
|
reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
|
|
#else
|
|
reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
|
|
#endif // _LP64
|
|
|
|
alloc_class chunk1(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
|
|
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
|
|
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
|
|
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
|
|
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
|
|
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
|
|
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
|
|
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
|
|
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
|
|
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
|
|
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
|
|
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
|
|
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
|
|
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
|
|
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
|
|
,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
|
|
XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
|
|
XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
|
|
XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
|
|
XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
|
|
XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
|
|
XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
|
|
XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
|
|
XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
|
|
XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
|
|
XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
|
|
XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
|
|
XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
|
|
XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
|
|
XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
|
|
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
|
|
#endif
|
|
);
|
|
|
|
// flags allocation class should be last.
|
|
alloc_class chunk2(RFLAGS);
|
|
|
|
// Singleton class for condition codes
|
|
reg_class int_flags(RFLAGS);
|
|
|
|
// Class for pre evex float registers
|
|
reg_class float_reg_legacy(XMM0,
|
|
XMM1,
|
|
XMM2,
|
|
XMM3,
|
|
XMM4,
|
|
XMM5,
|
|
XMM6,
|
|
XMM7
|
|
#ifdef _LP64
|
|
,XMM8,
|
|
XMM9,
|
|
XMM10,
|
|
XMM11,
|
|
XMM12,
|
|
XMM13,
|
|
XMM14,
|
|
XMM15
|
|
#endif
|
|
);
|
|
|
|
// Class for evex float registers
|
|
reg_class float_reg_evex(XMM0,
|
|
XMM1,
|
|
XMM2,
|
|
XMM3,
|
|
XMM4,
|
|
XMM5,
|
|
XMM6,
|
|
XMM7
|
|
#ifdef _LP64
|
|
,XMM8,
|
|
XMM9,
|
|
XMM10,
|
|
XMM11,
|
|
XMM12,
|
|
XMM13,
|
|
XMM14,
|
|
XMM15,
|
|
XMM16,
|
|
XMM17,
|
|
XMM18,
|
|
XMM19,
|
|
XMM20,
|
|
XMM21,
|
|
XMM22,
|
|
XMM23,
|
|
XMM24,
|
|
XMM25,
|
|
XMM26,
|
|
XMM27,
|
|
XMM28,
|
|
XMM29,
|
|
XMM30,
|
|
XMM31
|
|
#endif
|
|
);
|
|
|
|
reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
|
|
reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
|
|
|
|
// Class for pre evex double registers
|
|
reg_class double_reg_legacy(XMM0, XMM0b,
|
|
XMM1, XMM1b,
|
|
XMM2, XMM2b,
|
|
XMM3, XMM3b,
|
|
XMM4, XMM4b,
|
|
XMM5, XMM5b,
|
|
XMM6, XMM6b,
|
|
XMM7, XMM7b
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b,
|
|
XMM9, XMM9b,
|
|
XMM10, XMM10b,
|
|
XMM11, XMM11b,
|
|
XMM12, XMM12b,
|
|
XMM13, XMM13b,
|
|
XMM14, XMM14b,
|
|
XMM15, XMM15b
|
|
#endif
|
|
);
|
|
|
|
// Class for evex double registers
|
|
reg_class double_reg_evex(XMM0, XMM0b,
|
|
XMM1, XMM1b,
|
|
XMM2, XMM2b,
|
|
XMM3, XMM3b,
|
|
XMM4, XMM4b,
|
|
XMM5, XMM5b,
|
|
XMM6, XMM6b,
|
|
XMM7, XMM7b
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b,
|
|
XMM9, XMM9b,
|
|
XMM10, XMM10b,
|
|
XMM11, XMM11b,
|
|
XMM12, XMM12b,
|
|
XMM13, XMM13b,
|
|
XMM14, XMM14b,
|
|
XMM15, XMM15b,
|
|
XMM16, XMM16b,
|
|
XMM17, XMM17b,
|
|
XMM18, XMM18b,
|
|
XMM19, XMM19b,
|
|
XMM20, XMM20b,
|
|
XMM21, XMM21b,
|
|
XMM22, XMM22b,
|
|
XMM23, XMM23b,
|
|
XMM24, XMM24b,
|
|
XMM25, XMM25b,
|
|
XMM26, XMM26b,
|
|
XMM27, XMM27b,
|
|
XMM28, XMM28b,
|
|
XMM29, XMM29b,
|
|
XMM30, XMM30b,
|
|
XMM31, XMM31b
|
|
#endif
|
|
);
|
|
|
|
reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
|
|
reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
|
|
|
|
// Class for pre evex 32bit vector registers
|
|
reg_class vectors_reg_legacy(XMM0,
|
|
XMM1,
|
|
XMM2,
|
|
XMM3,
|
|
XMM4,
|
|
XMM5,
|
|
XMM6,
|
|
XMM7
|
|
#ifdef _LP64
|
|
,XMM8,
|
|
XMM9,
|
|
XMM10,
|
|
XMM11,
|
|
XMM12,
|
|
XMM13,
|
|
XMM14,
|
|
XMM15
|
|
#endif
|
|
);
|
|
|
|
// Class for evex 32bit vector registers
|
|
reg_class vectors_reg_evex(XMM0,
|
|
XMM1,
|
|
XMM2,
|
|
XMM3,
|
|
XMM4,
|
|
XMM5,
|
|
XMM6,
|
|
XMM7
|
|
#ifdef _LP64
|
|
,XMM8,
|
|
XMM9,
|
|
XMM10,
|
|
XMM11,
|
|
XMM12,
|
|
XMM13,
|
|
XMM14,
|
|
XMM15,
|
|
XMM16,
|
|
XMM17,
|
|
XMM18,
|
|
XMM19,
|
|
XMM20,
|
|
XMM21,
|
|
XMM22,
|
|
XMM23,
|
|
XMM24,
|
|
XMM25,
|
|
XMM26,
|
|
XMM27,
|
|
XMM28,
|
|
XMM29,
|
|
XMM30,
|
|
XMM31
|
|
#endif
|
|
);
|
|
|
|
reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
|
|
reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
|
|
|
|
// Class for all 64bit vector registers
|
|
reg_class vectord_reg_legacy(XMM0, XMM0b,
|
|
XMM1, XMM1b,
|
|
XMM2, XMM2b,
|
|
XMM3, XMM3b,
|
|
XMM4, XMM4b,
|
|
XMM5, XMM5b,
|
|
XMM6, XMM6b,
|
|
XMM7, XMM7b
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b,
|
|
XMM9, XMM9b,
|
|
XMM10, XMM10b,
|
|
XMM11, XMM11b,
|
|
XMM12, XMM12b,
|
|
XMM13, XMM13b,
|
|
XMM14, XMM14b,
|
|
XMM15, XMM15b
|
|
#endif
|
|
);
|
|
|
|
// Class for all 64bit vector registers
|
|
reg_class vectord_reg_evex(XMM0, XMM0b,
|
|
XMM1, XMM1b,
|
|
XMM2, XMM2b,
|
|
XMM3, XMM3b,
|
|
XMM4, XMM4b,
|
|
XMM5, XMM5b,
|
|
XMM6, XMM6b,
|
|
XMM7, XMM7b
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b,
|
|
XMM9, XMM9b,
|
|
XMM10, XMM10b,
|
|
XMM11, XMM11b,
|
|
XMM12, XMM12b,
|
|
XMM13, XMM13b,
|
|
XMM14, XMM14b,
|
|
XMM15, XMM15b,
|
|
XMM16, XMM16b,
|
|
XMM17, XMM17b,
|
|
XMM18, XMM18b,
|
|
XMM19, XMM19b,
|
|
XMM20, XMM20b,
|
|
XMM21, XMM21b,
|
|
XMM22, XMM22b,
|
|
XMM23, XMM23b,
|
|
XMM24, XMM24b,
|
|
XMM25, XMM25b,
|
|
XMM26, XMM26b,
|
|
XMM27, XMM27b,
|
|
XMM28, XMM28b,
|
|
XMM29, XMM29b,
|
|
XMM30, XMM30b,
|
|
XMM31, XMM31b
|
|
#endif
|
|
);
|
|
|
|
reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
|
|
reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
|
|
|
|
// Class for all 128bit vector registers
|
|
reg_class vectorx_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d,
|
|
XMM1, XMM1b, XMM1c, XMM1d,
|
|
XMM2, XMM2b, XMM2c, XMM2d,
|
|
XMM3, XMM3b, XMM3c, XMM3d,
|
|
XMM4, XMM4b, XMM4c, XMM4d,
|
|
XMM5, XMM5b, XMM5c, XMM5d,
|
|
XMM6, XMM6b, XMM6c, XMM6d,
|
|
XMM7, XMM7b, XMM7c, XMM7d
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b, XMM8c, XMM8d,
|
|
XMM9, XMM9b, XMM9c, XMM9d,
|
|
XMM10, XMM10b, XMM10c, XMM10d,
|
|
XMM11, XMM11b, XMM11c, XMM11d,
|
|
XMM12, XMM12b, XMM12c, XMM12d,
|
|
XMM13, XMM13b, XMM13c, XMM13d,
|
|
XMM14, XMM14b, XMM14c, XMM14d,
|
|
XMM15, XMM15b, XMM15c, XMM15d
|
|
#endif
|
|
);
|
|
|
|
// Class for all 128bit vector registers
|
|
reg_class vectorx_reg_evex(XMM0, XMM0b, XMM0c, XMM0d,
|
|
XMM1, XMM1b, XMM1c, XMM1d,
|
|
XMM2, XMM2b, XMM2c, XMM2d,
|
|
XMM3, XMM3b, XMM3c, XMM3d,
|
|
XMM4, XMM4b, XMM4c, XMM4d,
|
|
XMM5, XMM5b, XMM5c, XMM5d,
|
|
XMM6, XMM6b, XMM6c, XMM6d,
|
|
XMM7, XMM7b, XMM7c, XMM7d
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b, XMM8c, XMM8d,
|
|
XMM9, XMM9b, XMM9c, XMM9d,
|
|
XMM10, XMM10b, XMM10c, XMM10d,
|
|
XMM11, XMM11b, XMM11c, XMM11d,
|
|
XMM12, XMM12b, XMM12c, XMM12d,
|
|
XMM13, XMM13b, XMM13c, XMM13d,
|
|
XMM14, XMM14b, XMM14c, XMM14d,
|
|
XMM15, XMM15b, XMM15c, XMM15d,
|
|
XMM16, XMM16b, XMM16c, XMM16d,
|
|
XMM17, XMM17b, XMM17c, XMM17d,
|
|
XMM18, XMM18b, XMM18c, XMM18d,
|
|
XMM19, XMM19b, XMM19c, XMM19d,
|
|
XMM20, XMM20b, XMM20c, XMM20d,
|
|
XMM21, XMM21b, XMM21c, XMM21d,
|
|
XMM22, XMM22b, XMM22c, XMM22d,
|
|
XMM23, XMM23b, XMM23c, XMM23d,
|
|
XMM24, XMM24b, XMM24c, XMM24d,
|
|
XMM25, XMM25b, XMM25c, XMM25d,
|
|
XMM26, XMM26b, XMM26c, XMM26d,
|
|
XMM27, XMM27b, XMM27c, XMM27d,
|
|
XMM28, XMM28b, XMM28c, XMM28d,
|
|
XMM29, XMM29b, XMM29c, XMM29d,
|
|
XMM30, XMM30b, XMM30c, XMM30d,
|
|
XMM31, XMM31b, XMM31c, XMM31d
|
|
#endif
|
|
);
|
|
|
|
reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
|
|
reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
|
|
|
|
// Class for all 256bit vector registers
|
|
reg_class vectory_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
|
|
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
|
|
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
|
|
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
|
|
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
|
|
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
|
|
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
|
|
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
|
|
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
|
|
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
|
|
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
|
|
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
|
|
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
|
|
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
|
|
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
|
|
#endif
|
|
);
|
|
|
|
// Class for all 256bit vector registers
|
|
reg_class vectory_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h,
|
|
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h,
|
|
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h,
|
|
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h,
|
|
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h,
|
|
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h,
|
|
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h,
|
|
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h,
|
|
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h,
|
|
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
|
|
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
|
|
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
|
|
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
|
|
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
|
|
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h,
|
|
XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h,
|
|
XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h,
|
|
XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h,
|
|
XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h,
|
|
XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h,
|
|
XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h,
|
|
XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h,
|
|
XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h,
|
|
XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h,
|
|
XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h,
|
|
XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h,
|
|
XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h,
|
|
XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h,
|
|
XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h,
|
|
XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h,
|
|
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h
|
|
#endif
|
|
);
|
|
|
|
reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
|
|
reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
|
|
|
|
// Class for all 512bit vector registers
|
|
reg_class vectorz_reg_evex(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
|
|
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
|
|
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
|
|
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
|
|
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
|
|
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
|
|
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
|
|
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
|
|
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
|
|
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
|
|
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
|
|
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
|
|
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
|
|
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
|
|
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
|
|
,XMM16, XMM16b, XMM16c, XMM16d, XMM16e, XMM16f, XMM16g, XMM16h, XMM16i, XMM16j, XMM16k, XMM16l, XMM16m, XMM16n, XMM16o, XMM16p,
|
|
XMM17, XMM17b, XMM17c, XMM17d, XMM17e, XMM17f, XMM17g, XMM17h, XMM17i, XMM17j, XMM17k, XMM17l, XMM17m, XMM17n, XMM17o, XMM17p,
|
|
XMM18, XMM18b, XMM18c, XMM18d, XMM18e, XMM18f, XMM18g, XMM18h, XMM18i, XMM18j, XMM18k, XMM18l, XMM18m, XMM18n, XMM18o, XMM18p,
|
|
XMM19, XMM19b, XMM19c, XMM19d, XMM19e, XMM19f, XMM19g, XMM19h, XMM19i, XMM19j, XMM19k, XMM19l, XMM19m, XMM19n, XMM19o, XMM19p,
|
|
XMM20, XMM20b, XMM20c, XMM20d, XMM20e, XMM20f, XMM20g, XMM20h, XMM20i, XMM20j, XMM20k, XMM20l, XMM20m, XMM20n, XMM20o, XMM20p,
|
|
XMM21, XMM21b, XMM21c, XMM21d, XMM21e, XMM21f, XMM21g, XMM21h, XMM21i, XMM21j, XMM21k, XMM21l, XMM21m, XMM21n, XMM21o, XMM21p,
|
|
XMM22, XMM22b, XMM22c, XMM22d, XMM22e, XMM22f, XMM22g, XMM22h, XMM22i, XMM22j, XMM22k, XMM22l, XMM22m, XMM22n, XMM22o, XMM22p,
|
|
XMM23, XMM23b, XMM23c, XMM23d, XMM23e, XMM23f, XMM23g, XMM23h, XMM23i, XMM23j, XMM23k, XMM23l, XMM23m, XMM23n, XMM23o, XMM23p,
|
|
XMM24, XMM24b, XMM24c, XMM24d, XMM24e, XMM24f, XMM24g, XMM24h, XMM24i, XMM24j, XMM24k, XMM24l, XMM24m, XMM24n, XMM24o, XMM24p,
|
|
XMM25, XMM25b, XMM25c, XMM25d, XMM25e, XMM25f, XMM25g, XMM25h, XMM25i, XMM25j, XMM25k, XMM25l, XMM25m, XMM25n, XMM25o, XMM25p,
|
|
XMM26, XMM26b, XMM26c, XMM26d, XMM26e, XMM26f, XMM26g, XMM26h, XMM26i, XMM26j, XMM26k, XMM26l, XMM26m, XMM26n, XMM26o, XMM26p,
|
|
XMM27, XMM27b, XMM27c, XMM27d, XMM27e, XMM27f, XMM27g, XMM27h, XMM27i, XMM27j, XMM27k, XMM27l, XMM27m, XMM27n, XMM27o, XMM27p,
|
|
XMM28, XMM28b, XMM28c, XMM28d, XMM28e, XMM28f, XMM28g, XMM28h, XMM28i, XMM28j, XMM28k, XMM28l, XMM28m, XMM28n, XMM28o, XMM28p,
|
|
XMM29, XMM29b, XMM29c, XMM29d, XMM29e, XMM29f, XMM29g, XMM29h, XMM29i, XMM29j, XMM29k, XMM29l, XMM29m, XMM29n, XMM29o, XMM29p,
|
|
XMM30, XMM30b, XMM30c, XMM30d, XMM30e, XMM30f, XMM30g, XMM30h, XMM30i, XMM30j, XMM30k, XMM30l, XMM30m, XMM30n, XMM30o, XMM30p,
|
|
XMM31, XMM31b, XMM31c, XMM31d, XMM31e, XMM31f, XMM31g, XMM31h, XMM31i, XMM31j, XMM31k, XMM31l, XMM31m, XMM31n, XMM31o, XMM31p
|
|
#endif
|
|
);
|
|
|
|
// Class for restricted 512bit vector registers
|
|
reg_class vectorz_reg_legacy(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p,
|
|
XMM1, XMM1b, XMM1c, XMM1d, XMM1e, XMM1f, XMM1g, XMM1h, XMM1i, XMM1j, XMM1k, XMM1l, XMM1m, XMM1n, XMM1o, XMM1p,
|
|
XMM2, XMM2b, XMM2c, XMM2d, XMM2e, XMM2f, XMM2g, XMM2h, XMM2i, XMM2j, XMM2k, XMM2l, XMM2m, XMM2n, XMM2o, XMM2p,
|
|
XMM3, XMM3b, XMM3c, XMM3d, XMM3e, XMM3f, XMM3g, XMM3h, XMM3i, XMM3j, XMM3k, XMM3l, XMM3m, XMM3n, XMM3o, XMM3p,
|
|
XMM4, XMM4b, XMM4c, XMM4d, XMM4e, XMM4f, XMM4g, XMM4h, XMM4i, XMM4j, XMM4k, XMM4l, XMM4m, XMM4n, XMM4o, XMM4p,
|
|
XMM5, XMM5b, XMM5c, XMM5d, XMM5e, XMM5f, XMM5g, XMM5h, XMM5i, XMM5j, XMM5k, XMM5l, XMM5m, XMM5n, XMM5o, XMM5p,
|
|
XMM6, XMM6b, XMM6c, XMM6d, XMM6e, XMM6f, XMM6g, XMM6h, XMM6i, XMM6j, XMM6k, XMM6l, XMM6m, XMM6n, XMM6o, XMM6p,
|
|
XMM7, XMM7b, XMM7c, XMM7d, XMM7e, XMM7f, XMM7g, XMM7h, XMM7i, XMM7j, XMM7k, XMM7l, XMM7m, XMM7n, XMM7o, XMM7p
|
|
#ifdef _LP64
|
|
,XMM8, XMM8b, XMM8c, XMM8d, XMM8e, XMM8f, XMM8g, XMM8h, XMM8i, XMM8j, XMM8k, XMM8l, XMM8m, XMM8n, XMM8o, XMM8p,
|
|
XMM9, XMM9b, XMM9c, XMM9d, XMM9e, XMM9f, XMM9g, XMM9h, XMM9i, XMM9j, XMM9k, XMM9l, XMM9m, XMM9n, XMM9o, XMM9p,
|
|
XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
|
|
XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
|
|
XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
|
|
XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
|
|
XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
|
|
XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
|
|
#endif
|
|
);
|
|
|
|
reg_class_dynamic vectorz_reg (vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
|
|
reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
|
|
|
|
%}
|
|
|
|
|
|
//----------SOURCE BLOCK-------------------------------------------------------
|
|
// This is a block of C++ code which provides values, functions, and
|
|
// definitions necessary in the rest of the architecture description
|
|
|
|
source_hpp %{
|
|
// Header information of the source block.
|
|
// Method declarations/definitions which are used outside
|
|
// the ad-scope can conveniently be defined here.
|
|
//
|
|
// To keep related declarations/definitions/uses close together,
|
|
// we switch between source %{ }% and source_hpp %{ }% freely as needed.
|
|
|
|
class NativeJump;
|
|
|
|
class CallStubImpl {
|
|
|
|
//--------------------------------------------------------------
|
|
//---< Used for optimization in Compile::shorten_branches >---
|
|
//--------------------------------------------------------------
|
|
|
|
public:
|
|
// Size of call trampoline stub.
|
|
static uint size_call_trampoline() {
|
|
return 0; // no call trampolines on this platform
|
|
}
|
|
|
|
// number of relocations needed by a call trampoline stub
|
|
static uint reloc_call_trampoline() {
|
|
return 0; // no call trampolines on this platform
|
|
}
|
|
};
|
|
|
|
class HandlerImpl {
|
|
|
|
public:
|
|
|
|
static int emit_exception_handler(CodeBuffer &cbuf);
|
|
static int emit_deopt_handler(CodeBuffer& cbuf);
|
|
|
|
static uint size_exception_handler() {
|
|
// NativeCall instruction size is the same as NativeJump.
|
|
// exception handler starts out as jump and can be patched to
|
|
// a call be deoptimization. (4932387)
|
|
// Note that this value is also credited (in output.cpp) to
|
|
// the size of the code section.
|
|
return NativeJump::instruction_size;
|
|
}
|
|
|
|
#ifdef _LP64
|
|
static uint size_deopt_handler() {
|
|
// three 5 byte instructions plus one move for unreachable address.
|
|
return 15+3;
|
|
}
|
|
#else
|
|
static uint size_deopt_handler() {
|
|
// NativeCall instruction size is the same as NativeJump.
|
|
// exception handler starts out as jump and can be patched to
|
|
// a call be deoptimization. (4932387)
|
|
// Note that this value is also credited (in output.cpp) to
|
|
// the size of the code section.
|
|
return 5 + NativeJump::instruction_size; // pushl(); jmp;
|
|
}
|
|
#endif
|
|
};
|
|
|
|
class Node::PD {
|
|
public:
|
|
enum NodeFlags {
|
|
Flag_intel_jcc_erratum = Node::_last_flag << 1,
|
|
_last_flag = Flag_intel_jcc_erratum
|
|
};
|
|
};
|
|
|
|
%} // end source_hpp
|
|
|
|
source %{
|
|
|
|
#include "opto/addnode.hpp"
|
|
#include "c2_intelJccErratum_x86.hpp"
|
|
|
|
void PhaseOutput::pd_perform_mach_node_analysis() {
|
|
if (VM_Version::has_intel_jcc_erratum()) {
|
|
int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
|
|
_buf_sizes._code += extra_padding;
|
|
}
|
|
}
|
|
|
|
int MachNode::pd_alignment_required() const {
|
|
PhaseOutput* output = Compile::current()->output();
|
|
Block* block = output->block();
|
|
int index = output->index();
|
|
if (VM_Version::has_intel_jcc_erratum() && IntelJccErratum::is_jcc_erratum_branch(block, this, index)) {
|
|
// Conservatively add worst case padding. We assume that relocInfo::addr_unit() is 1 on x86.
|
|
return IntelJccErratum::largest_jcc_size() + 1;
|
|
} else {
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
int MachNode::compute_padding(int current_offset) const {
|
|
if (flags() & Node::PD::Flag_intel_jcc_erratum) {
|
|
Compile* C = Compile::current();
|
|
PhaseOutput* output = C->output();
|
|
Block* block = output->block();
|
|
int index = output->index();
|
|
return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
|
|
} else {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// Emit exception handler code.
|
|
// Stuff framesize into a register and call a VM stub routine.
|
|
int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
|
|
|
|
// Note that the code buffer's insts_mark is always relative to insts.
|
|
// That's why we must use the macroassembler to generate a handler.
|
|
C2_MacroAssembler _masm(&cbuf);
|
|
address base = __ start_a_stub(size_exception_handler());
|
|
if (base == NULL) {
|
|
ciEnv::current()->record_failure("CodeCache is full");
|
|
return 0; // CodeBuffer::expand failed
|
|
}
|
|
int offset = __ offset();
|
|
__ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
|
|
assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
|
|
__ end_a_stub();
|
|
return offset;
|
|
}
|
|
|
|
// Emit deopt handler code.
|
|
int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
|
|
|
|
// Note that the code buffer's insts_mark is always relative to insts.
|
|
// That's why we must use the macroassembler to generate a handler.
|
|
C2_MacroAssembler _masm(&cbuf);
|
|
address base = __ start_a_stub(size_deopt_handler());
|
|
if (base == NULL) {
|
|
ciEnv::current()->record_failure("CodeCache is full");
|
|
return 0; // CodeBuffer::expand failed
|
|
}
|
|
int offset = __ offset();
|
|
|
|
#ifdef _LP64
|
|
address the_pc = (address) __ pc();
|
|
Label next;
|
|
// push a "the_pc" on the stack without destroying any registers
|
|
// as they all may be live.
|
|
|
|
// push address of "next"
|
|
__ call(next, relocInfo::none); // reloc none is fine since it is a disp32
|
|
__ bind(next);
|
|
// adjust it so it matches "the_pc"
|
|
__ subptr(Address(rsp, 0), __ offset() - offset);
|
|
#else
|
|
InternalAddress here(__ pc());
|
|
__ pushptr(here.addr());
|
|
#endif
|
|
|
|
__ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
|
|
assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow %d", (__ offset() - offset));
|
|
__ end_a_stub();
|
|
return offset;
|
|
}
|
|
|
|
|
|
//=============================================================================
|
|
|
|
// Float masks come from different places depending on platform.
|
|
#ifdef _LP64
|
|
static address float_signmask() { return StubRoutines::x86::float_sign_mask(); }
|
|
static address float_signflip() { return StubRoutines::x86::float_sign_flip(); }
|
|
static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
|
|
static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
|
|
#else
|
|
static address float_signmask() { return (address)float_signmask_pool; }
|
|
static address float_signflip() { return (address)float_signflip_pool; }
|
|
static address double_signmask() { return (address)double_signmask_pool; }
|
|
static address double_signflip() { return (address)double_signflip_pool; }
|
|
#endif
|
|
static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
|
|
static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
|
|
static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
|
|
|
|
//=============================================================================
|
|
const bool Matcher::match_rule_supported(int opcode) {
|
|
if (!has_match_rule(opcode)) {
|
|
return false; // no match rule present
|
|
}
|
|
switch (opcode) {
|
|
case Op_AbsVL:
|
|
if (UseAVX < 3) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_PopCountI:
|
|
case Op_PopCountL:
|
|
if (!UsePopCountInstruction) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_PopCountVI:
|
|
if (!UsePopCountInstruction || !VM_Version::supports_avx512_vpopcntdq()) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_MulVI:
|
|
if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_MulVL:
|
|
case Op_MulReductionVL:
|
|
if (VM_Version::supports_avx512dq() == false) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_AbsVB:
|
|
case Op_AbsVS:
|
|
case Op_AbsVI:
|
|
case Op_AddReductionVI:
|
|
case Op_AndReductionV:
|
|
case Op_OrReductionV:
|
|
case Op_XorReductionV:
|
|
if (UseSSE < 3) { // requires at least SSSE3
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_MulReductionVI:
|
|
if (UseSSE < 4) { // requires at least SSE4
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_SqrtVD:
|
|
case Op_SqrtVF:
|
|
if (UseAVX < 1) { // enabled for AVX only
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_CompareAndSwapL:
|
|
#ifdef _LP64
|
|
case Op_CompareAndSwapP:
|
|
#endif
|
|
if (!VM_Version::supports_cx8()) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_CMoveVF:
|
|
case Op_CMoveVD:
|
|
if (UseAVX < 1 || UseAVX > 2) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_StrIndexOf:
|
|
if (!UseSSE42Intrinsics) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_StrIndexOfChar:
|
|
if (!UseSSE42Intrinsics) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_OnSpinWait:
|
|
if (VM_Version::supports_on_spin_wait() == false) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_MulVB:
|
|
case Op_LShiftVB:
|
|
case Op_RShiftVB:
|
|
case Op_URShiftVB:
|
|
if (UseSSE < 4) {
|
|
return false;
|
|
}
|
|
break;
|
|
#ifdef _LP64
|
|
case Op_MaxD:
|
|
case Op_MaxF:
|
|
case Op_MinD:
|
|
case Op_MinF:
|
|
if (UseAVX < 1) { // enabled for AVX only
|
|
return false;
|
|
}
|
|
break;
|
|
#endif
|
|
case Op_CacheWB:
|
|
case Op_CacheWBPreSync:
|
|
case Op_CacheWBPostSync:
|
|
if (!VM_Version::supports_data_cache_line_flush()) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_RoundDoubleMode:
|
|
if (UseSSE < 4) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_RoundDoubleModeV:
|
|
if (VM_Version::supports_avx() == false) {
|
|
return false; // 128bit vroundpd is not available
|
|
}
|
|
break;
|
|
case Op_MacroLogicV:
|
|
if (UseAVX < 3 || !UseVectorMacroLogic) {
|
|
return false;
|
|
}
|
|
break;
|
|
#ifndef _LP64
|
|
case Op_AddReductionVF:
|
|
case Op_AddReductionVD:
|
|
case Op_MulReductionVF:
|
|
case Op_MulReductionVD:
|
|
if (UseSSE < 1) { // requires at least SSE
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_MulAddVS2VI:
|
|
case Op_RShiftVL:
|
|
case Op_AbsVD:
|
|
case Op_NegVD:
|
|
if (UseSSE < 2) {
|
|
return false;
|
|
}
|
|
break;
|
|
#endif // !LP64
|
|
}
|
|
return true; // Match rules are supported by default.
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
|
|
// Identify extra cases that we might want to provide match rules for vector nodes and
|
|
// other intrinsics guarded with vector length (vlen) and element type (bt).
|
|
const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
|
|
if (!match_rule_supported(opcode)) {
|
|
return false;
|
|
}
|
|
// Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
|
|
// * SSE2 supports 128bit vectors for all types;
|
|
// * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
|
|
// * AVX2 supports 256bit vectors for all types;
|
|
// * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
|
|
// * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
|
|
// There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
|
|
// And MaxVectorSize is taken into account as well.
|
|
if (!vector_size_supported(bt, vlen)) {
|
|
return false;
|
|
}
|
|
// Special cases which require vector length follow:
|
|
// * implementation limitations
|
|
// * some 512bit vector operations on FLOAT and DOUBLE types require AVX512DQ
|
|
// * 128bit vroundpd instruction is present only in AVX1
|
|
int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
|
|
switch (opcode) {
|
|
case Op_AbsVF:
|
|
case Op_NegVF:
|
|
if ((vlen == 16) && (VM_Version::supports_avx512dq() == false)) {
|
|
return false; // 512bit vandps and vxorps are not available
|
|
}
|
|
break;
|
|
case Op_AbsVD:
|
|
case Op_NegVD:
|
|
if ((vlen == 8) && (VM_Version::supports_avx512dq() == false)) {
|
|
return false; // 512bit vandpd and vxorpd are not available
|
|
}
|
|
break;
|
|
case Op_CMoveVF:
|
|
if (vlen != 8) {
|
|
return false; // implementation limitation (only vcmov8F_reg is present)
|
|
}
|
|
break;
|
|
case Op_MacroLogicV:
|
|
if (!VM_Version::supports_evex() ||
|
|
((size_in_bits != 512) && !VM_Version::supports_avx512vl())) {
|
|
return false;
|
|
}
|
|
break;
|
|
case Op_CMoveVD:
|
|
if (vlen != 4) {
|
|
return false; // implementation limitation (only vcmov4D_reg is present)
|
|
}
|
|
break;
|
|
}
|
|
return true; // Per default match rules are supported.
|
|
}
|
|
|
|
// x86 supports generic vector operands: vec and legVec.
|
|
const bool Matcher::supports_generic_vector_operands = true;
|
|
|
|
MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
|
|
assert(Matcher::is_generic_vector(generic_opnd), "not generic");
|
|
bool legacy = (generic_opnd->opcode() == LEGVEC);
|
|
if (!VM_Version::supports_avx512vlbwdq() && // KNL
|
|
is_temp && !legacy && (ideal_reg == Op_VecZ)) {
|
|
// Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
|
|
return new legVecZOper();
|
|
}
|
|
if (legacy) {
|
|
switch (ideal_reg) {
|
|
case Op_VecS: return new legVecSOper();
|
|
case Op_VecD: return new legVecDOper();
|
|
case Op_VecX: return new legVecXOper();
|
|
case Op_VecY: return new legVecYOper();
|
|
case Op_VecZ: return new legVecZOper();
|
|
}
|
|
} else {
|
|
switch (ideal_reg) {
|
|
case Op_VecS: return new vecSOper();
|
|
case Op_VecD: return new vecDOper();
|
|
case Op_VecX: return new vecXOper();
|
|
case Op_VecY: return new vecYOper();
|
|
case Op_VecZ: return new vecZOper();
|
|
}
|
|
}
|
|
ShouldNotReachHere();
|
|
return NULL;
|
|
}
|
|
|
|
bool Matcher::is_generic_reg2reg_move(MachNode* m) {
|
|
switch (m->rule()) {
|
|
case MoveVec2Leg_rule:
|
|
case MoveLeg2Vec_rule:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool Matcher::is_generic_vector(MachOper* opnd) {
|
|
switch (opnd->opcode()) {
|
|
case VEC:
|
|
case LEGVEC:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
|
|
const bool Matcher::has_predicated_vectors(void) {
|
|
bool ret_value = false;
|
|
if (UseAVX > 2) {
|
|
ret_value = VM_Version::supports_avx512vl();
|
|
}
|
|
|
|
return ret_value;
|
|
}
|
|
|
|
const int Matcher::float_pressure(int default_pressure_threshold) {
|
|
int float_pressure_threshold = default_pressure_threshold;
|
|
#ifdef _LP64
|
|
if (UseAVX > 2) {
|
|
// Increase pressure threshold on machines with AVX3 which have
|
|
// 2x more XMM registers.
|
|
float_pressure_threshold = default_pressure_threshold * 2;
|
|
}
|
|
#endif
|
|
return float_pressure_threshold;
|
|
}
|
|
|
|
// Max vector size in bytes. 0 if not supported.
|
|
const int Matcher::vector_width_in_bytes(BasicType bt) {
|
|
assert(is_java_primitive(bt), "only primitive type vectors");
|
|
if (UseSSE < 2) return 0;
|
|
// SSE2 supports 128bit vectors for all types.
|
|
// AVX2 supports 256bit vectors for all types.
|
|
// AVX2/EVEX supports 512bit vectors for all types.
|
|
int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
|
|
// AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
|
|
if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
|
|
size = (UseAVX > 2) ? 64 : 32;
|
|
if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
|
|
size = (VM_Version::supports_avx512bw()) ? 64 : 32;
|
|
// Use flag to limit vector size.
|
|
size = MIN2(size,(int)MaxVectorSize);
|
|
// Minimum 2 values in vector (or 4 for bytes).
|
|
switch (bt) {
|
|
case T_DOUBLE:
|
|
case T_LONG:
|
|
if (size < 16) return 0;
|
|
break;
|
|
case T_FLOAT:
|
|
case T_INT:
|
|
if (size < 8) return 0;
|
|
break;
|
|
case T_BOOLEAN:
|
|
if (size < 4) return 0;
|
|
break;
|
|
case T_CHAR:
|
|
if (size < 4) return 0;
|
|
break;
|
|
case T_BYTE:
|
|
if (size < 4) return 0;
|
|
break;
|
|
case T_SHORT:
|
|
if (size < 4) return 0;
|
|
break;
|
|
default:
|
|
ShouldNotReachHere();
|
|
}
|
|
return size;
|
|
}
|
|
|
|
// Limits on vector size (number of elements) loaded into vector.
|
|
const int Matcher::max_vector_size(const BasicType bt) {
|
|
return vector_width_in_bytes(bt)/type2aelembytes(bt);
|
|
}
|
|
const int Matcher::min_vector_size(const BasicType bt) {
|
|
int max_size = max_vector_size(bt);
|
|
// Min size which can be loaded into vector is 4 bytes.
|
|
int size = (type2aelembytes(bt) == 1) ? 4 : 2;
|
|
return MIN2(size,max_size);
|
|
}
|
|
|
|
// Vector ideal reg corresponding to specified size in bytes
|
|
const uint Matcher::vector_ideal_reg(int size) {
|
|
assert(MaxVectorSize >= size, "");
|
|
switch(size) {
|
|
case 4: return Op_VecS;
|
|
case 8: return Op_VecD;
|
|
case 16: return Op_VecX;
|
|
case 32: return Op_VecY;
|
|
case 64: return Op_VecZ;
|
|
}
|
|
ShouldNotReachHere();
|
|
return 0;
|
|
}
|
|
|
|
// x86 supports misaligned vectors store/load.
|
|
const bool Matcher::misaligned_vectors_ok() {
|
|
return true;
|
|
}
|
|
|
|
// x86 AES instructions are compatible with SunJCE expanded
|
|
// keys, hence we do not need to pass the original key to stubs
|
|
const bool Matcher::pass_original_key_for_aes() {
|
|
return false;
|
|
}
|
|
|
|
|
|
const bool Matcher::convi2l_type_required = true;
|
|
|
|
// Check for shift by small constant as well
|
|
static bool clone_shift(Node* shift, Matcher* matcher, Matcher::MStack& mstack, VectorSet& address_visited) {
|
|
if (shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
|
|
shift->in(2)->get_int() <= 3 &&
|
|
// Are there other uses besides address expressions?
|
|
!matcher->is_visited(shift)) {
|
|
address_visited.set(shift->_idx); // Flag as address_visited
|
|
mstack.push(shift->in(2), Matcher::Visit);
|
|
Node *conv = shift->in(1);
|
|
#ifdef _LP64
|
|
// Allow Matcher to match the rule which bypass
|
|
// ConvI2L operation for an array index on LP64
|
|
// if the index value is positive.
|
|
if (conv->Opcode() == Op_ConvI2L &&
|
|
conv->as_Type()->type()->is_long()->_lo >= 0 &&
|
|
// Are there other uses besides address expressions?
|
|
!matcher->is_visited(conv)) {
|
|
address_visited.set(conv->_idx); // Flag as address_visited
|
|
mstack.push(conv->in(1), Matcher::Pre_Visit);
|
|
} else
|
|
#endif
|
|
mstack.push(conv, Matcher::Pre_Visit);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// This function identifies sub-graphs in which a 'load' node is
|
|
// input to two different nodes, and such that it can be matched
|
|
// with BMI instructions like blsi, blsr, etc.
|
|
// Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
|
|
// The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
|
|
// refers to the same node.
|
|
//
|
|
// Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
|
|
// This is a temporary solution until we make DAGs expressible in ADL.
|
|
template<typename ConType>
|
|
class FusedPatternMatcher {
|
|
Node* _op1_node;
|
|
Node* _mop_node;
|
|
int _con_op;
|
|
|
|
static int match_next(Node* n, int next_op, int next_op_idx) {
|
|
if (n->in(1) == NULL || n->in(2) == NULL) {
|
|
return -1;
|
|
}
|
|
|
|
if (next_op_idx == -1) { // n is commutative, try rotations
|
|
if (n->in(1)->Opcode() == next_op) {
|
|
return 1;
|
|
} else if (n->in(2)->Opcode() == next_op) {
|
|
return 2;
|
|
}
|
|
} else {
|
|
assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
|
|
if (n->in(next_op_idx)->Opcode() == next_op) {
|
|
return next_op_idx;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
public:
|
|
FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
|
|
_op1_node(op1_node), _mop_node(mop_node), _con_op(con_op) { }
|
|
|
|
bool match(int op1, int op1_op2_idx, // op1 and the index of the op1->op2 edge, -1 if op1 is commutative
|
|
int op2, int op2_con_idx, // op2 and the index of the op2->con edge, -1 if op2 is commutative
|
|
typename ConType::NativeType con_value) {
|
|
if (_op1_node->Opcode() != op1) {
|
|
return false;
|
|
}
|
|
if (_mop_node->outcnt() > 2) {
|
|
return false;
|
|
}
|
|
op1_op2_idx = match_next(_op1_node, op2, op1_op2_idx);
|
|
if (op1_op2_idx == -1) {
|
|
return false;
|
|
}
|
|
// Memory operation must be the other edge
|
|
int op1_mop_idx = (op1_op2_idx & 1) + 1;
|
|
|
|
// Check that the mop node is really what we want
|
|
if (_op1_node->in(op1_mop_idx) == _mop_node) {
|
|
Node* op2_node = _op1_node->in(op1_op2_idx);
|
|
if (op2_node->outcnt() > 1) {
|
|
return false;
|
|
}
|
|
assert(op2_node->Opcode() == op2, "Should be");
|
|
op2_con_idx = match_next(op2_node, _con_op, op2_con_idx);
|
|
if (op2_con_idx == -1) {
|
|
return false;
|
|
}
|
|
// Memory operation must be the other edge
|
|
int op2_mop_idx = (op2_con_idx & 1) + 1;
|
|
// Check that the memory operation is the same node
|
|
if (op2_node->in(op2_mop_idx) == _mop_node) {
|
|
// Now check the constant
|
|
const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
|
|
if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
};
|
|
|
|
static bool is_bmi_pattern(Node* n, Node* m) {
|
|
assert(UseBMI1Instructions, "sanity");
|
|
if (n != NULL && m != NULL) {
|
|
if (m->Opcode() == Op_LoadI) {
|
|
FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
|
|
return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
|
|
bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
|
|
bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
|
|
} else if (m->Opcode() == Op_LoadL) {
|
|
FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
|
|
return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
|
|
bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
|
|
bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Should the matcher clone input 'm' of node 'n'?
|
|
bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
|
|
// If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
|
|
if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
|
|
mstack.push(m, Visit);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Should the Matcher clone shifts on addressing modes, expecting them
|
|
// to be subsumed into complex addressing expressions or compute them
|
|
// into registers?
|
|
bool Matcher::pd_clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
|
|
Node *off = m->in(AddPNode::Offset);
|
|
if (off->is_Con()) {
|
|
address_visited.test_set(m->_idx); // Flag as address_visited
|
|
Node *adr = m->in(AddPNode::Address);
|
|
|
|
// Intel can handle 2 adds in addressing mode
|
|
// AtomicAdd is not an addressing expression.
|
|
// Cheap to find it by looking for screwy base.
|
|
if (adr->is_AddP() &&
|
|
!adr->in(AddPNode::Base)->is_top() &&
|
|
LP64_ONLY( off->get_long() == (int) (off->get_long()) && ) // immL32
|
|
// Are there other uses besides address expressions?
|
|
!is_visited(adr)) {
|
|
address_visited.set(adr->_idx); // Flag as address_visited
|
|
Node *shift = adr->in(AddPNode::Offset);
|
|
if (!clone_shift(shift, this, mstack, address_visited)) {
|
|
mstack.push(shift, Pre_Visit);
|
|
}
|
|
mstack.push(adr->in(AddPNode::Address), Pre_Visit);
|
|
mstack.push(adr->in(AddPNode::Base), Pre_Visit);
|
|
} else {
|
|
mstack.push(adr, Pre_Visit);
|
|
}
|
|
|
|
// Clone X+offset as it also folds into most addressing expressions
|
|
mstack.push(off, Visit);
|
|
mstack.push(m->in(AddPNode::Base), Pre_Visit);
|
|
return true;
|
|
} else if (clone_shift(off, this, mstack, address_visited)) {
|
|
address_visited.test_set(m->_idx); // Flag as address_visited
|
|
mstack.push(m->in(AddPNode::Address), Pre_Visit);
|
|
mstack.push(m->in(AddPNode::Base), Pre_Visit);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void Compile::reshape_address(AddPNode* addp) {
|
|
}
|
|
|
|
static inline uint vector_length(const MachNode* n) {
|
|
const TypeVect* vt = n->bottom_type()->is_vect();
|
|
return vt->length();
|
|
}
|
|
|
|
static inline uint vector_length(const MachNode* use, MachOper* opnd) {
|
|
uint def_idx = use->operand_index(opnd);
|
|
Node* def = use->in(def_idx);
|
|
return def->bottom_type()->is_vect()->length();
|
|
}
|
|
|
|
static inline uint vector_length_in_bytes(const MachNode* n) {
|
|
const TypeVect* vt = n->bottom_type()->is_vect();
|
|
return vt->length_in_bytes();
|
|
}
|
|
|
|
static inline uint vector_length_in_bytes(const MachNode* use, MachOper* opnd) {
|
|
uint def_idx = use->operand_index(opnd);
|
|
Node* def = use->in(def_idx);
|
|
return def->bottom_type()->is_vect()->length_in_bytes();
|
|
}
|
|
|
|
static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* n) {
|
|
switch(vector_length_in_bytes(n)) {
|
|
case 4: // fall-through
|
|
case 8: // fall-through
|
|
case 16: return Assembler::AVX_128bit;
|
|
case 32: return Assembler::AVX_256bit;
|
|
case 64: return Assembler::AVX_512bit;
|
|
|
|
default: {
|
|
ShouldNotReachHere();
|
|
return Assembler::AVX_NoVec;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Helper methods for MachSpillCopyNode::implementation().
|
|
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
|
|
int src_hi, int dst_hi, uint ireg, outputStream* st) {
|
|
// In 64-bit VM size calculation is very complex. Emitting instructions
|
|
// into scratch buffer is used to get size in 64-bit VM.
|
|
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
|
|
assert(ireg == Op_VecS || // 32bit vector
|
|
(src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
|
|
(dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
|
|
"no non-adjacent vector moves" );
|
|
if (cbuf) {
|
|
C2_MacroAssembler _masm(cbuf);
|
|
int offset = __ offset();
|
|
switch (ireg) {
|
|
case Op_VecS: // copy whole register
|
|
case Op_VecD:
|
|
case Op_VecX:
|
|
#ifndef _LP64
|
|
__ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
|
|
#else
|
|
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
|
|
__ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
|
|
} else {
|
|
__ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
|
|
}
|
|
#endif
|
|
break;
|
|
case Op_VecY:
|
|
#ifndef _LP64
|
|
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
|
|
#else
|
|
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
|
|
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
|
|
} else {
|
|
__ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
|
|
}
|
|
#endif
|
|
break;
|
|
case Op_VecZ:
|
|
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
|
|
break;
|
|
default:
|
|
ShouldNotReachHere();
|
|
}
|
|
int size = __ offset() - offset;
|
|
#ifdef ASSERT
|
|
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
|
|
assert(!do_size || size == 4, "incorrect size calculattion");
|
|
#endif
|
|
return size;
|
|
#ifndef PRODUCT
|
|
} else if (!do_size) {
|
|
switch (ireg) {
|
|
case Op_VecS:
|
|
case Op_VecD:
|
|
case Op_VecX:
|
|
st->print("movdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
|
|
break;
|
|
case Op_VecY:
|
|
case Op_VecZ:
|
|
st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
|
|
break;
|
|
default:
|
|
ShouldNotReachHere();
|
|
}
|
|
#endif
|
|
}
|
|
// VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
|
|
return (UseAVX > 2) ? 6 : 4;
|
|
}
|
|
|
|
int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
|
|
int stack_offset, int reg, uint ireg, outputStream* st) {
|
|
// In 64-bit VM size calculation is very complex. Emitting instructions
|
|
// into scratch buffer is used to get size in 64-bit VM.
|
|
LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
|
|
if (cbuf) {
|
|
C2_MacroAssembler _masm(cbuf);
|
|
int offset = __ offset();
|
|
if (is_load) {
|
|
switch (ireg) {
|
|
case Op_VecS:
|
|
__ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
|
break;
|
|
case Op_VecD:
|
|
__ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
|
break;
|
|
case Op_VecX:
|
|
#ifndef _LP64
|
|
__ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
|
#else
|
|
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
|
|
__ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
|
} else {
|
|
__ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
|
|
__ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
|
|
}
|
|
#endif
|
|
break;
|
|
case Op_VecY:
|
|
#ifndef _LP64
|
|
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
|
#else
|
|
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
|
|
__ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
|
|
} else {
|
|
__ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
|
|
__ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
|
|
}
|
|
#endif
|
|
break;
|
|
case Op_VecZ:
|
|
__ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
|
|
break;
|
|
default:
|
|
ShouldNotReachHere();
|
|
}
|
|
} else { // store
|
|
switch (ireg) {
|
|
case Op_VecS:
|
|
__ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
|
break;
|
|
case Op_VecD:
|
|
__ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
|
break;
|
|
case Op_VecX:
|
|
#ifndef _LP64
|
|
__ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
|
#else
|
|
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
|
|
__ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
|
}
|
|
else {
|
|
__ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
|
|
}
|
|
#endif
|
|
break;
|
|
case Op_VecY:
|
|
#ifndef _LP64
|
|
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
|
#else
|
|
if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
|
|
__ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
|
|
}
|
|
else {
|
|
__ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
|
|
}
|
|
#endif
|
|
break;
|
|
case Op_VecZ:
|
|
__ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
|
|
break;
|
|
default:
|
|
ShouldNotReachHere();
|
|
}
|
|
}
|
|
int size = __ offset() - offset;
|
|
#ifdef ASSERT
|
|
int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
|
|
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
|
|
assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
|
|
#endif
|
|
return size;
|
|
#ifndef PRODUCT
|
|
} else if (!do_size) {
|
|
if (is_load) {
|
|
switch (ireg) {
|
|
case Op_VecS:
|
|
st->print("movd %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
|
|
break;
|
|
case Op_VecD:
|
|
st->print("movq %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
|
|
break;
|
|
case Op_VecX:
|
|
st->print("movdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
|
|
break;
|
|
case Op_VecY:
|
|
case Op_VecZ:
|
|
st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
|
|
break;
|
|
default:
|
|
ShouldNotReachHere();
|
|
}
|
|
} else { // store
|
|
switch (ireg) {
|
|
case Op_VecS:
|
|
st->print("movd [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
|
|
break;
|
|
case Op_VecD:
|
|
st->print("movq [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
|
|
break;
|
|
case Op_VecX:
|
|
st->print("movdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
|
|
break;
|
|
case Op_VecY:
|
|
case Op_VecZ:
|
|
st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
|
|
break;
|
|
default:
|
|
ShouldNotReachHere();
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
bool is_single_byte = false;
|
|
int vec_len = 0;
|
|
if ((UseAVX > 2) && (stack_offset != 0)) {
|
|
int tuple_type = Assembler::EVEX_FVM;
|
|
int input_size = Assembler::EVEX_32bit;
|
|
switch (ireg) {
|
|
case Op_VecS:
|
|
tuple_type = Assembler::EVEX_T1S;
|
|
break;
|
|
case Op_VecD:
|
|
tuple_type = Assembler::EVEX_T1S;
|
|
input_size = Assembler::EVEX_64bit;
|
|
break;
|
|
case Op_VecX:
|
|
break;
|
|
case Op_VecY:
|
|
vec_len = 1;
|
|
break;
|
|
case Op_VecZ:
|
|
vec_len = 2;
|
|
break;
|
|
}
|
|
is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
|
|
}
|
|
int offset_size = 0;
|
|
int size = 5;
|
|
if (UseAVX > 2 ) {
|
|
if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
|
|
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
|
|
size += 2; // Need an additional two bytes for EVEX encoding
|
|
} else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
|
|
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
|
|
} else {
|
|
offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
|
|
size += 2; // Need an additional two bytes for EVEX encodding
|
|
}
|
|
} else {
|
|
offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
|
|
}
|
|
// VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
|
|
return size+offset_size;
|
|
}
|
|
|
|
static inline jint replicate4_imm(int con, int width) {
|
|
// Load a constant of "width" (in bytes) and replicate it to fill 32bit.
|
|
assert(width == 1 || width == 2, "only byte or short types here");
|
|
int bit_width = width * 8;
|
|
jint val = con;
|
|
val &= (1 << bit_width) - 1; // mask off sign bits
|
|
while(bit_width < 32) {
|
|
val |= (val << bit_width);
|
|
bit_width <<= 1;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
static inline jlong replicate8_imm(int con, int width) {
|
|
// Load a constant of "width" (in bytes) and replicate it to fill 64bit.
|
|
assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
|
|
int bit_width = width * 8;
|
|
jlong val = con;
|
|
val &= (((jlong) 1) << bit_width) - 1; // mask off sign bits
|
|
while(bit_width < 64) {
|
|
val |= (val << bit_width);
|
|
bit_width <<= 1;
|
|
}
|
|
return val;
|
|
}
|
|
|
|
#ifndef PRODUCT
|
|
void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
|
|
st->print("nop \t# %d bytes pad for loops and calls", _count);
|
|
}
|
|
#endif
|
|
|
|
void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc*) const {
|
|
C2_MacroAssembler _masm(&cbuf);
|
|
__ nop(_count);
|
|
}
|
|
|
|
uint MachNopNode::size(PhaseRegAlloc*) const {
|
|
return _count;
|
|
}
|
|
|
|
#ifndef PRODUCT
|
|
void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
|
|
st->print("# breakpoint");
|
|
}
|
|
#endif
|
|
|
|
void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
|
|
C2_MacroAssembler _masm(&cbuf);
|
|
__ int3();
|
|
}
|
|
|
|
uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
|
|
return MachNode::size(ra_);
|
|
}
|
|
|
|
%}
|
|
|
|
encode %{
|
|
|
|
enc_class call_epilog %{
|
|
if (VerifyStackAtCalls) {
|
|
// Check that stack depth is unchanged: find majik cookie on stack
|
|
int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
|
|
C2_MacroAssembler _masm(&cbuf);
|
|
Label L;
|
|
__ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
|
|
__ jccb(Assembler::equal, L);
|
|
// Die if stack mismatch
|
|
__ int3();
|
|
__ bind(L);
|
|
}
|
|
%}
|
|
|
|
%}
|
|
|
|
|
|
//----------OPERANDS-----------------------------------------------------------
|
|
// Operand definitions must precede instruction definitions for correct parsing
|
|
// in the ADLC because operands constitute user defined types which are used in
|
|
// instruction definitions.
|
|
|
|
// Vectors
|
|
|
|
// Dummy generic vector class. Should be used for all vector operands.
|
|
// Replaced with vec[SDXYZ] during post-selection pass.
|
|
operand vec() %{
|
|
constraint(ALLOC_IN_RC(dynamic));
|
|
match(VecX);
|
|
match(VecY);
|
|
match(VecZ);
|
|
match(VecS);
|
|
match(VecD);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Dummy generic legacy vector class. Should be used for all legacy vector operands.
|
|
// Replaced with legVec[SDXYZ] during post-selection cleanup.
|
|
// Note: legacy register class is used to avoid extra (unneeded in 32-bit VM)
|
|
// runtime code generation via reg_class_dynamic.
|
|
operand legVec() %{
|
|
constraint(ALLOC_IN_RC(dynamic));
|
|
match(VecX);
|
|
match(VecY);
|
|
match(VecZ);
|
|
match(VecS);
|
|
match(VecD);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces vec during post-selection cleanup. See above.
|
|
operand vecS() %{
|
|
constraint(ALLOC_IN_RC(vectors_reg_vlbwdq));
|
|
match(VecS);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces legVec during post-selection cleanup. See above.
|
|
operand legVecS() %{
|
|
constraint(ALLOC_IN_RC(vectors_reg_legacy));
|
|
match(VecS);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces vec during post-selection cleanup. See above.
|
|
operand vecD() %{
|
|
constraint(ALLOC_IN_RC(vectord_reg_vlbwdq));
|
|
match(VecD);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces legVec during post-selection cleanup. See above.
|
|
operand legVecD() %{
|
|
constraint(ALLOC_IN_RC(vectord_reg_legacy));
|
|
match(VecD);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces vec during post-selection cleanup. See above.
|
|
operand vecX() %{
|
|
constraint(ALLOC_IN_RC(vectorx_reg_vlbwdq));
|
|
match(VecX);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces legVec during post-selection cleanup. See above.
|
|
operand legVecX() %{
|
|
constraint(ALLOC_IN_RC(vectorx_reg_legacy));
|
|
match(VecX);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces vec during post-selection cleanup. See above.
|
|
operand vecY() %{
|
|
constraint(ALLOC_IN_RC(vectory_reg_vlbwdq));
|
|
match(VecY);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces legVec during post-selection cleanup. See above.
|
|
operand legVecY() %{
|
|
constraint(ALLOC_IN_RC(vectory_reg_legacy));
|
|
match(VecY);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces vec during post-selection cleanup. See above.
|
|
operand vecZ() %{
|
|
constraint(ALLOC_IN_RC(vectorz_reg));
|
|
match(VecZ);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Replaces legVec during post-selection cleanup. See above.
|
|
operand legVecZ() %{
|
|
constraint(ALLOC_IN_RC(vectorz_reg_legacy));
|
|
match(VecZ);
|
|
|
|
format %{ %}
|
|
interface(REG_INTER);
|
|
%}
|
|
|
|
// Comparison Code for FP conditional move
|
|
operand cmpOp_vcmppd() %{
|
|
match(Bool);
|
|
|
|
predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
|
|
n->as_Bool()->_test._test != BoolTest::no_overflow);
|
|
format %{ "" %}
|
|
interface(COND_INTER) %{
|
|
equal (0x0, "eq");
|
|
less (0x1, "lt");
|
|
less_equal (0x2, "le");
|
|
not_equal (0xC, "ne");
|
|
greater_equal(0xD, "ge");
|
|
greater (0xE, "gt");
|
|
//TODO cannot compile (adlc breaks) without two next lines with error:
|
|
// x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{
|
|
// equal' for overflow.
|
|
overflow (0x20, "o"); // not really supported by the instruction
|
|
no_overflow (0x21, "no"); // not really supported by the instruction
|
|
%}
|
|
%}
|
|
|
|
|
|
// INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
|
|
|
|
// ============================================================================
|
|
|
|
instruct ShouldNotReachHere() %{
|
|
match(Halt);
|
|
format %{ "stop\t# ShouldNotReachHere" %}
|
|
ins_encode %{
|
|
if (is_reachable()) {
|
|
__ stop(_halt_reason);
|
|
}
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
// =================================EVEX special===============================
|
|
|
|
instruct setMask(rRegI dst, rRegI src) %{
|
|
predicate(Matcher::has_predicated_vectors());
|
|
match(Set dst (SetVectMaskI src));
|
|
effect(TEMP dst);
|
|
format %{ "setvectmask $dst, $src" %}
|
|
ins_encode %{
|
|
__ setvectmask($dst$$Register, $src$$Register);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
// ============================================================================
|
|
|
|
instruct addF_reg(regF dst, regF src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (AddF dst src));
|
|
|
|
format %{ "addss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ addss($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addF_mem(regF dst, memory src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (AddF dst (LoadF src)));
|
|
|
|
format %{ "addss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ addss($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addF_imm(regF dst, immF con) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (AddF dst con));
|
|
format %{ "addss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ addss($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddF src1 src2));
|
|
|
|
format %{ "vaddss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addF_reg_mem(regF dst, regF src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddF src1 (LoadF src2)));
|
|
|
|
format %{ "vaddss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vaddss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addF_reg_imm(regF dst, regF src, immF con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddF src con));
|
|
|
|
format %{ "vaddss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vaddss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addD_reg(regD dst, regD src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (AddD dst src));
|
|
|
|
format %{ "addsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ addsd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addD_mem(regD dst, memory src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (AddD dst (LoadD src)));
|
|
|
|
format %{ "addsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ addsd($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addD_imm(regD dst, immD con) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (AddD dst con));
|
|
format %{ "addsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ addsd($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddD src1 src2));
|
|
|
|
format %{ "vaddsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addD_reg_mem(regD dst, regD src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddD src1 (LoadD src2)));
|
|
|
|
format %{ "vaddsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vaddsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct addD_reg_imm(regD dst, regD src, immD con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddD src con));
|
|
|
|
format %{ "vaddsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vaddsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subF_reg(regF dst, regF src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (SubF dst src));
|
|
|
|
format %{ "subss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ subss($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subF_mem(regF dst, memory src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (SubF dst (LoadF src)));
|
|
|
|
format %{ "subss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ subss($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subF_imm(regF dst, immF con) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (SubF dst con));
|
|
format %{ "subss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ subss($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubF src1 src2));
|
|
|
|
format %{ "vsubss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subF_reg_mem(regF dst, regF src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubF src1 (LoadF src2)));
|
|
|
|
format %{ "vsubss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vsubss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subF_reg_imm(regF dst, regF src, immF con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubF src con));
|
|
|
|
format %{ "vsubss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vsubss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subD_reg(regD dst, regD src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (SubD dst src));
|
|
|
|
format %{ "subsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ subsd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subD_mem(regD dst, memory src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (SubD dst (LoadD src)));
|
|
|
|
format %{ "subsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ subsd($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subD_imm(regD dst, immD con) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (SubD dst con));
|
|
format %{ "subsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ subsd($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubD src1 src2));
|
|
|
|
format %{ "vsubsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subD_reg_mem(regD dst, regD src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubD src1 (LoadD src2)));
|
|
|
|
format %{ "vsubsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vsubsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct subD_reg_imm(regD dst, regD src, immD con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubD src con));
|
|
|
|
format %{ "vsubsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vsubsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulF_reg(regF dst, regF src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (MulF dst src));
|
|
|
|
format %{ "mulss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ mulss($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulF_mem(regF dst, memory src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (MulF dst (LoadF src)));
|
|
|
|
format %{ "mulss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ mulss($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulF_imm(regF dst, immF con) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (MulF dst con));
|
|
format %{ "mulss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ mulss($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulF src1 src2));
|
|
|
|
format %{ "vmulss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulF src1 (LoadF src2)));
|
|
|
|
format %{ "vmulss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vmulss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulF_reg_imm(regF dst, regF src, immF con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulF src con));
|
|
|
|
format %{ "vmulss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vmulss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulD_reg(regD dst, regD src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (MulD dst src));
|
|
|
|
format %{ "mulsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ mulsd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulD_mem(regD dst, memory src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (MulD dst (LoadD src)));
|
|
|
|
format %{ "mulsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ mulsd($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulD_imm(regD dst, immD con) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (MulD dst con));
|
|
format %{ "mulsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ mulsd($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulD src1 src2));
|
|
|
|
format %{ "vmulsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulD_reg_mem(regD dst, regD src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulD src1 (LoadD src2)));
|
|
|
|
format %{ "vmulsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vmulsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct mulD_reg_imm(regD dst, regD src, immD con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulD src con));
|
|
|
|
format %{ "vmulsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vmulsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divF_reg(regF dst, regF src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (DivF dst src));
|
|
|
|
format %{ "divss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ divss($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divF_mem(regF dst, memory src) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (DivF dst (LoadF src)));
|
|
|
|
format %{ "divss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ divss($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divF_imm(regF dst, immF con) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (DivF dst con));
|
|
format %{ "divss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ divss($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivF src1 src2));
|
|
|
|
format %{ "vdivss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divF_reg_mem(regF dst, regF src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivF src1 (LoadF src2)));
|
|
|
|
format %{ "vdivss $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vdivss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divF_reg_imm(regF dst, regF src, immF con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivF src con));
|
|
|
|
format %{ "vdivss $dst, $src, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vdivss($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divD_reg(regD dst, regD src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (DivD dst src));
|
|
|
|
format %{ "divsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ divsd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divD_mem(regD dst, memory src) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (DivD dst (LoadD src)));
|
|
|
|
format %{ "divsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ divsd($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divD_imm(regD dst, immD con) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (DivD dst con));
|
|
format %{ "divsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ divsd($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivD src1 src2));
|
|
|
|
format %{ "vdivsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divD_reg_mem(regD dst, regD src1, memory src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivD src1 (LoadD src2)));
|
|
|
|
format %{ "vdivsd $dst, $src1, $src2" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vdivsd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct divD_reg_imm(regD dst, regD src, immD con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivD src con));
|
|
|
|
format %{ "vdivsd $dst, $src, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ vdivsd($dst$$XMMRegister, $src$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct absF_reg(regF dst) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (AbsF dst));
|
|
ins_cost(150);
|
|
format %{ "andps $dst, [0x7fffffff]\t# abs float by sign masking" %}
|
|
ins_encode %{
|
|
__ andps($dst$$XMMRegister, ExternalAddress(float_signmask()));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AbsF src));
|
|
ins_cost(150);
|
|
format %{ "vandps $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
|
|
ins_encode %{
|
|
int vector_len = 0;
|
|
__ vandps($dst$$XMMRegister, $src$$XMMRegister,
|
|
ExternalAddress(float_signmask()), vector_len);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct absD_reg(regD dst) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (AbsD dst));
|
|
ins_cost(150);
|
|
format %{ "andpd $dst, [0x7fffffffffffffff]\t"
|
|
"# abs double by sign masking" %}
|
|
ins_encode %{
|
|
__ andpd($dst$$XMMRegister, ExternalAddress(double_signmask()));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AbsD src));
|
|
ins_cost(150);
|
|
format %{ "vandpd $dst, $src, [0x7fffffffffffffff]\t"
|
|
"# abs double by sign masking" %}
|
|
ins_encode %{
|
|
int vector_len = 0;
|
|
__ vandpd($dst$$XMMRegister, $src$$XMMRegister,
|
|
ExternalAddress(double_signmask()), vector_len);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct negF_reg(regF dst) %{
|
|
predicate((UseSSE>=1) && (UseAVX == 0));
|
|
match(Set dst (NegF dst));
|
|
ins_cost(150);
|
|
format %{ "xorps $dst, [0x80000000]\t# neg float by sign flipping" %}
|
|
ins_encode %{
|
|
__ xorps($dst$$XMMRegister, ExternalAddress(float_signflip()));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (NegF src));
|
|
ins_cost(150);
|
|
format %{ "vnegatess $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
|
|
ins_encode %{
|
|
__ vnegatess($dst$$XMMRegister, $src$$XMMRegister,
|
|
ExternalAddress(float_signflip()));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct negD_reg(regD dst) %{
|
|
predicate((UseSSE>=2) && (UseAVX == 0));
|
|
match(Set dst (NegD dst));
|
|
ins_cost(150);
|
|
format %{ "xorpd $dst, [0x8000000000000000]\t"
|
|
"# neg double by sign flipping" %}
|
|
ins_encode %{
|
|
__ xorpd($dst$$XMMRegister, ExternalAddress(double_signflip()));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (NegD src));
|
|
ins_cost(150);
|
|
format %{ "vnegatesd $dst, $src, [0x8000000000000000]\t"
|
|
"# neg double by sign flipping" %}
|
|
ins_encode %{
|
|
__ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
|
|
ExternalAddress(double_signflip()));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct sqrtF_reg(regF dst, regF src) %{
|
|
predicate(UseSSE>=1);
|
|
match(Set dst (SqrtF src));
|
|
|
|
format %{ "sqrtss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ sqrtss($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct sqrtF_mem(regF dst, memory src) %{
|
|
predicate(UseSSE>=1);
|
|
match(Set dst (SqrtF (LoadF src)));
|
|
|
|
format %{ "sqrtss $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ sqrtss($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct sqrtF_imm(regF dst, immF con) %{
|
|
predicate(UseSSE>=1);
|
|
match(Set dst (SqrtF con));
|
|
|
|
format %{ "sqrtss $dst, [$constantaddress]\t# load from constant table: float=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ sqrtss($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct sqrtD_reg(regD dst, regD src) %{
|
|
predicate(UseSSE>=2);
|
|
match(Set dst (SqrtD src));
|
|
|
|
format %{ "sqrtsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ sqrtsd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct sqrtD_mem(regD dst, memory src) %{
|
|
predicate(UseSSE>=2);
|
|
match(Set dst (SqrtD (LoadD src)));
|
|
|
|
format %{ "sqrtsd $dst, $src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ sqrtsd($dst$$XMMRegister, $src$$Address);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct sqrtD_imm(regD dst, immD con) %{
|
|
predicate(UseSSE>=2);
|
|
match(Set dst (SqrtD con));
|
|
format %{ "sqrtsd $dst, [$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ sqrtsd($dst$$XMMRegister, $constantaddress($con));
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
|
|
#ifdef _LP64
|
|
instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
|
|
match(Set dst (RoundDoubleMode src rmode));
|
|
format %{ "roundsd $dst,$src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
assert(UseSSE >= 4, "required");
|
|
__ roundsd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct roundD_mem(legRegD dst, memory src, immU8 rmode) %{
|
|
match(Set dst (RoundDoubleMode (LoadD src) rmode));
|
|
format %{ "roundsd $dst,$src" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
assert(UseSSE >= 4, "required");
|
|
__ roundsd($dst$$XMMRegister, $src$$Address, $rmode$$constant);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct roundD_imm(legRegD dst, immD con, immU8 rmode, rRegI scratch_reg) %{
|
|
match(Set dst (RoundDoubleMode con rmode));
|
|
effect(TEMP scratch_reg);
|
|
format %{ "roundsd $dst,[$constantaddress]\t# load from constant table: double=$con" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
assert(UseSSE >= 4, "required");
|
|
__ roundsd($dst$$XMMRegister, $constantaddress($con), $rmode$$constant, $scratch_reg$$Register);
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
instruct vroundD_reg(legVec dst, legVec src, immU8 rmode) %{
|
|
predicate(n->as_Vector()->length() < 8);
|
|
match(Set dst (RoundDoubleModeV src rmode));
|
|
format %{ "vroundpd $dst,$src,$rmode\t! round packedD" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 0, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vroundpd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vround8D_reg(vec dst, vec src, immU8 rmode) %{
|
|
predicate(n->as_Vector()->length() == 8);
|
|
match(Set dst (RoundDoubleModeV src rmode));
|
|
format %{ "vrndscalepd $dst,$src,$rmode\t! round packed8D" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
__ vrndscalepd($dst$$XMMRegister, $src$$XMMRegister, $rmode$$constant, Assembler::AVX_512bit);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vroundD_mem(legVec dst, memory mem, immU8 rmode) %{
|
|
predicate(n->as_Vector()->length() < 8);
|
|
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
|
|
format %{ "vroundpd $dst, $mem, $rmode\t! round packedD" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 0, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vroundpd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vround8D_mem(vec dst, memory mem, immU8 rmode) %{
|
|
predicate(n->as_Vector()->length() == 8);
|
|
match(Set dst (RoundDoubleModeV (LoadVector mem) rmode));
|
|
format %{ "vrndscalepd $dst,$mem,$rmode\t! round packed8D" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
__ vrndscalepd($dst$$XMMRegister, $mem$$Address, $rmode$$constant, Assembler::AVX_512bit);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
#endif // _LP64
|
|
|
|
instruct onspinwait() %{
|
|
match(OnSpinWait);
|
|
ins_cost(200);
|
|
|
|
format %{
|
|
$$template
|
|
$$emit$$"pause\t! membar_onspinwait"
|
|
%}
|
|
ins_encode %{
|
|
__ pause();
|
|
%}
|
|
ins_pipe(pipe_slow);
|
|
%}
|
|
|
|
// a * b + c
|
|
instruct fmaD_reg(regD a, regD b, regD c) %{
|
|
predicate(UseFMA);
|
|
match(Set c (FmaD c (Binary a b)));
|
|
format %{ "fmasd $a,$b,$c\t# $c = $a * $b + $c" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ fmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// a * b + c
|
|
instruct fmaF_reg(regF a, regF b, regF c) %{
|
|
predicate(UseFMA);
|
|
match(Set c (FmaF c (Binary a b)));
|
|
format %{ "fmass $a,$b,$c\t# $c = $a * $b + $c" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
__ fmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// ====================VECTOR INSTRUCTIONS=====================================
|
|
|
|
// Dummy reg-to-reg vector moves. Removed during post-selection cleanup.
|
|
instruct MoveVec2Leg(legVec dst, vec src) %{
|
|
match(Set dst src);
|
|
format %{ "" %}
|
|
ins_encode %{
|
|
ShouldNotReachHere();
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
instruct MoveLeg2Vec(vec dst, legVec src) %{
|
|
match(Set dst src);
|
|
format %{ "" %}
|
|
ins_encode %{
|
|
ShouldNotReachHere();
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
// ============================================================================
|
|
|
|
// Load vectors
|
|
instruct loadV(vec dst, memory mem) %{
|
|
match(Set dst (LoadVector mem));
|
|
ins_cost(125);
|
|
format %{ "load_vector $dst,$mem" %}
|
|
ins_encode %{
|
|
switch (vector_length_in_bytes(this)) {
|
|
case 4: __ movdl ($dst$$XMMRegister, $mem$$Address); break;
|
|
case 8: __ movq ($dst$$XMMRegister, $mem$$Address); break;
|
|
case 16: __ movdqu ($dst$$XMMRegister, $mem$$Address); break;
|
|
case 32: __ vmovdqu ($dst$$XMMRegister, $mem$$Address); break;
|
|
case 64: __ evmovdqul($dst$$XMMRegister, $mem$$Address, Assembler::AVX_512bit); break;
|
|
default: ShouldNotReachHere();
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Store vectors generic operand pattern.
|
|
instruct storeV(memory mem, vec src) %{
|
|
match(Set mem (StoreVector mem src));
|
|
ins_cost(145);
|
|
format %{ "store_vector $mem,$src\n\t" %}
|
|
ins_encode %{
|
|
switch (vector_length_in_bytes(this, $src)) {
|
|
case 4: __ movdl ($mem$$Address, $src$$XMMRegister); break;
|
|
case 8: __ movq ($mem$$Address, $src$$XMMRegister); break;
|
|
case 16: __ movdqu ($mem$$Address, $src$$XMMRegister); break;
|
|
case 32: __ vmovdqu ($mem$$Address, $src$$XMMRegister); break;
|
|
case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
|
|
default: ShouldNotReachHere();
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// ====================REPLICATE=======================================
|
|
|
|
// Replicate byte scalar to be vector
|
|
instruct ReplB_reg(vec dst, rRegI src) %{
|
|
match(Set dst (ReplicateB src));
|
|
format %{ "replicateB $dst,$src" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
|
|
assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
|
|
} else {
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
|
|
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
|
if (vlen >= 16) {
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
if (vlen >= 32) {
|
|
assert(vlen == 32, "sanity");
|
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
}
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplB_mem(vec dst, memory mem) %{
|
|
predicate(VM_Version::supports_avx2());
|
|
match(Set dst (ReplicateB (LoadB mem)));
|
|
format %{ "replicateB $dst,$mem" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplB_imm(vec dst, immI con) %{
|
|
match(Set dst (ReplicateB con));
|
|
format %{ "replicateB $dst,$con" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 1));
|
|
if (vlen == 4) {
|
|
__ movdl($dst$$XMMRegister, const_addr);
|
|
} else {
|
|
__ movq($dst$$XMMRegister, const_addr);
|
|
if (vlen >= 16) {
|
|
if (VM_Version::supports_avx2()) {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
} else {
|
|
assert(vlen == 16, "sanity");
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
}
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Replicate byte scalar zero to be vector
|
|
instruct ReplB_zero(vec dst, immI0 zero) %{
|
|
match(Set dst (ReplicateB zero));
|
|
format %{ "replicateB $dst,$zero" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 16) {
|
|
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
// Use vpxor since AVX512F does not have 512bit vxorpd (requires AVX512DQ).
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
// ====================ReplicateS=======================================
|
|
|
|
instruct ReplS_reg(vec dst, rRegI src) %{
|
|
match(Set dst (ReplicateS src));
|
|
format %{ "replicateS $dst,$src" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 32 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
|
|
assert(VM_Version::supports_avx512bw(), "required"); // 512-bit short vectors assume AVX512BW
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ evpbroadcastw($dst$$XMMRegister, $src$$Register, vlen_enc);
|
|
} else {
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
|
if (vlen >= 8) {
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
if (vlen >= 16) {
|
|
assert(vlen == 16, "sanity");
|
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
}
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplS_mem(vec dst, memory mem) %{
|
|
predicate(VM_Version::supports_avx2());
|
|
match(Set dst (ReplicateS (LoadS mem)));
|
|
format %{ "replicateS $dst,$mem" %}
|
|
ins_encode %{
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vlen_enc);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplS_imm(vec dst, immI con) %{
|
|
match(Set dst (ReplicateS con));
|
|
format %{ "replicateS $dst,$con" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 2));
|
|
if (vlen == 2) {
|
|
__ movdl($dst$$XMMRegister, const_addr);
|
|
} else {
|
|
__ movq($dst$$XMMRegister, const_addr);
|
|
if (vlen >= 8) {
|
|
if (VM_Version::supports_avx2()) {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
} else {
|
|
assert(vlen == 8, "sanity");
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
}
|
|
}
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
instruct ReplS_zero(vec dst, immI0 zero) %{
|
|
match(Set dst (ReplicateS zero));
|
|
format %{ "replicateS $dst,$zero" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 8) {
|
|
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
// ====================ReplicateI=======================================
|
|
|
|
instruct ReplI_reg(vec dst, rRegI src) %{
|
|
match(Set dst (ReplicateI src));
|
|
format %{ "replicateI $dst,$src" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 16 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ evpbroadcastd($dst$$XMMRegister, $src$$Register, vlen_enc);
|
|
} else {
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
|
if (vlen >= 8) {
|
|
assert(vlen == 8, "sanity");
|
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplI_mem(vec dst, memory mem) %{
|
|
match(Set dst (ReplicateI (LoadI mem)));
|
|
format %{ "replicateI $dst,$mem" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 4) {
|
|
__ movdl($dst$$XMMRegister, $mem$$Address);
|
|
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
|
} else {
|
|
assert(VM_Version::supports_avx2(), "sanity");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplI_imm(vec dst, immI con) %{
|
|
match(Set dst (ReplicateI con));
|
|
format %{ "replicateI $dst,$con" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
InternalAddress const_addr = $constantaddress(replicate8_imm($con$$constant, 4));
|
|
if (vlen <= 4) {
|
|
__ movq($dst$$XMMRegister, const_addr);
|
|
if (vlen == 4) {
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
} else {
|
|
assert(VM_Version::supports_avx2(), "sanity");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ movq($dst$$XMMRegister, const_addr);
|
|
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Replicate integer (4 byte) scalar zero to be vector
|
|
instruct ReplI_zero(vec dst, immI0 zero) %{
|
|
match(Set dst (ReplicateI zero));
|
|
format %{ "replicateI $dst,$zero" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 4) {
|
|
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
instruct ReplI_M1(vec dst, immI_M1 con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (ReplicateB con));
|
|
match(Set dst (ReplicateS con));
|
|
match(Set dst (ReplicateI con));
|
|
effect(TEMP dst);
|
|
format %{ "vallones $dst" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vallones($dst$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// ====================ReplicateL=======================================
|
|
|
|
#ifdef _LP64
|
|
// Replicate long (8 byte) scalar to be vector
|
|
instruct ReplL_reg(vec dst, rRegL src) %{
|
|
match(Set dst (ReplicateL src));
|
|
format %{ "replicateL $dst,$src" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ movdq($dst$$XMMRegister, $src$$Register);
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else if (vlen == 8 || VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ evpbroadcastq($dst$$XMMRegister, $src$$Register, vlen_enc);
|
|
} else {
|
|
assert(vlen == 4, "sanity");
|
|
__ movdq($dst$$XMMRegister, $src$$Register);
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
#else // _LP64
|
|
// Replicate long (8 byte) scalar to be vector
|
|
instruct ReplL_reg(vec dst, eRegL src, vec tmp) %{
|
|
predicate(n->as_Vector()->length() <= 4);
|
|
match(Set dst (ReplicateL src));
|
|
effect(TEMP dst, USE src, TEMP tmp);
|
|
format %{ "replicateL $dst,$src" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
|
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else if (VM_Version::supports_avx512vl()) { // AVX512VL for <512bit operands
|
|
int vector_len = Assembler::AVX_256bit;
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
|
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
} else {
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
|
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplL_reg_leg(legVec dst, eRegL src, legVec tmp) %{
|
|
predicate(n->as_Vector()->length() == 8);
|
|
match(Set dst (ReplicateL src));
|
|
effect(TEMP dst, USE src, TEMP tmp);
|
|
format %{ "replicateL $dst,$src" %}
|
|
ins_encode %{
|
|
if (VM_Version::supports_avx512vl()) {
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
|
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
__ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
__ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
|
|
} else {
|
|
int vector_len = Assembler::AVX_512bit;
|
|
__ movdl($dst$$XMMRegister, $src$$Register);
|
|
__ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
|
|
__ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
#endif // _LP64
|
|
|
|
instruct ReplL_mem(vec dst, memory mem) %{
|
|
match(Set dst (ReplicateL (LoadL mem)));
|
|
format %{ "replicateL $dst,$mem" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ movq($dst$$XMMRegister, $mem$$Address);
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
assert(VM_Version::supports_avx2(), "sanity");
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
|
|
instruct ReplL_imm(vec dst, immL con) %{
|
|
match(Set dst (ReplicateL con));
|
|
format %{ "replicateL $dst,$con" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
InternalAddress const_addr = $constantaddress($con);
|
|
if (vlen == 2) {
|
|
__ movq($dst$$XMMRegister, const_addr);
|
|
__ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
assert(VM_Version::supports_avx2(), "sanity");
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ movq($dst$$XMMRegister, const_addr);
|
|
__ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplL_zero(vec dst, immL0 zero) %{
|
|
match(Set dst (ReplicateL zero));
|
|
format %{ "replicateL $dst,$zero" %}
|
|
ins_encode %{
|
|
int vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ pxor($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
instruct ReplL_M1(vec dst, immL_M1 con) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (ReplicateL con));
|
|
effect(TEMP dst);
|
|
format %{ "vallones $dst" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vallones($dst$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// ====================ReplicateF=======================================
|
|
|
|
instruct ReplF_reg(vec dst, vlRegF src) %{
|
|
match(Set dst (ReplicateF src));
|
|
format %{ "replicateF $dst,$src" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 4) {
|
|
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
|
} else if (VM_Version::supports_avx2()) {
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
|
|
} else {
|
|
assert(vlen == 8, "sanity");
|
|
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
|
|
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplF_mem(vec dst, memory mem) %{
|
|
match(Set dst (ReplicateF (LoadF mem)));
|
|
format %{ "replicateF $dst,$mem" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 4) {
|
|
__ movdl($dst$$XMMRegister, $mem$$Address);
|
|
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
|
|
} else {
|
|
assert(VM_Version::supports_avx(), "sanity");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplF_zero(vec dst, immF0 zero) %{
|
|
match(Set dst (ReplicateF zero));
|
|
format %{ "replicateF $dst,$zero" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 4) {
|
|
__ xorps($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
|
|
}
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
// ====================ReplicateD=======================================
|
|
|
|
// Replicate double (8 bytes) scalar to be vector
|
|
instruct ReplD_reg(vec dst, vlRegD src) %{
|
|
match(Set dst (ReplicateD src));
|
|
format %{ "replicateD $dst,$src" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
|
} else if (VM_Version::supports_avx2()) {
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len); // reg-to-reg variant requires AVX2
|
|
} else {
|
|
assert(vlen == 4, "sanity");
|
|
__ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
|
|
__ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplD_mem(vec dst, memory mem) %{
|
|
match(Set dst (ReplicateD (LoadD mem)));
|
|
format %{ "replicateD $dst,$mem" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ movq($dst$$XMMRegister, $mem$$Address);
|
|
__ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x44);
|
|
} else {
|
|
assert(VM_Version::supports_avx(), "sanity");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct ReplD_zero(vec dst, immD0 zero) %{
|
|
match(Set dst (ReplicateD zero));
|
|
format %{ "replicateD $dst,$zero" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc); // 512bit vxorps requires AVX512DQ
|
|
}
|
|
%}
|
|
ins_pipe( fpu_reg_reg );
|
|
%}
|
|
|
|
// ====================REDUCTION ARITHMETIC=======================================
|
|
// =======================Int Reduction==========================================
|
|
|
|
instruct reductionI(rRegI dst, rRegI src1, vec src2, vec vtmp1, vec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
|
|
n->in(2)->bottom_type()->is_vect()->length() < 16);
|
|
match(Set dst (AddReductionVI src1 src2));
|
|
match(Set dst (MulReductionVI src1 src2));
|
|
match(Set dst (AndReductionV src1 src2));
|
|
match(Set dst ( OrReductionV src1 src2));
|
|
match(Set dst (XorReductionV src1 src2));
|
|
effect(TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src2);
|
|
__ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct reduction16I(rRegI dst, rRegI src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_INT &&
|
|
n->in(2)->bottom_type()->is_vect()->length() == 16);
|
|
match(Set dst (AddReductionVI src1 src2));
|
|
match(Set dst (MulReductionVI src1 src2));
|
|
match(Set dst (AndReductionV src1 src2));
|
|
match(Set dst ( OrReductionV src1 src2));
|
|
match(Set dst (XorReductionV src1 src2));
|
|
effect(TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_int $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src2);
|
|
__ reduceI(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// =======================Long Reduction==========================================
|
|
|
|
#ifdef _LP64
|
|
instruct reductionL(rRegL dst, rRegL src1, vec src2, vec vtmp1, vec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
|
|
n->in(2)->bottom_type()->is_vect()->length() < 8);
|
|
match(Set dst (AddReductionVL src1 src2));
|
|
match(Set dst (MulReductionVL src1 src2));
|
|
match(Set dst (AndReductionV src1 src2));
|
|
match(Set dst ( OrReductionV src1 src2));
|
|
match(Set dst (XorReductionV src1 src2));
|
|
effect(TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src2);
|
|
__ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct reduction8L(rRegL dst, rRegL src1, legVec src2, legVec vtmp1, legVec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->element_basic_type() == T_LONG &&
|
|
n->in(2)->bottom_type()->is_vect()->length() == 8);
|
|
match(Set dst (AddReductionVL src1 src2));
|
|
match(Set dst (MulReductionVL src1 src2));
|
|
match(Set dst (AndReductionV src1 src2));
|
|
match(Set dst ( OrReductionV src1 src2));
|
|
match(Set dst (XorReductionV src1 src2));
|
|
effect(TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_long $dst,$src1,$src2 ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src2);
|
|
__ reduceL(opcode, vlen, $dst$$Register, $src1$$Register, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
#endif // _LP64
|
|
|
|
// =======================Float Reduction==========================================
|
|
|
|
instruct reductionF128(regF dst, vec src, vec vtmp) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->length() <= 4);
|
|
match(Set dst (AddReductionVF dst src));
|
|
match(Set dst (MulReductionVF dst src));
|
|
effect(TEMP dst, TEMP vtmp);
|
|
format %{ "vector_reduction_fp $dst,$src ; using $vtmp as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src);
|
|
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct reduction8F(regF dst, vec src, vec vtmp1, vec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
|
|
match(Set dst (AddReductionVF dst src));
|
|
match(Set dst (MulReductionVF dst src));
|
|
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src);
|
|
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct reduction16F(regF dst, legVec src, legVec vtmp1, legVec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->length() == 16);
|
|
match(Set dst (AddReductionVF dst src));
|
|
match(Set dst (MulReductionVF dst src));
|
|
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_float $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src);
|
|
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// =======================Double Reduction==========================================
|
|
|
|
instruct reduction2D(regD dst, vec src, vec vtmp) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->length() == 2);
|
|
match(Set dst (AddReductionVD dst src));
|
|
match(Set dst (MulReductionVD dst src));
|
|
effect(TEMP dst, TEMP vtmp);
|
|
format %{ "vector_reduction_double $dst,$src ; using $vtmp as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src);
|
|
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct reduction4D(regD dst, vec src, vec vtmp1, vec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->length() == 4);
|
|
match(Set dst (AddReductionVD dst src));
|
|
match(Set dst (MulReductionVD dst src));
|
|
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src);
|
|
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct reduction8D(regD dst, legVec src, legVec vtmp1, legVec vtmp2) %{
|
|
predicate(n->in(2)->bottom_type()->is_vect()->length() == 8);
|
|
match(Set dst (AddReductionVD dst src));
|
|
match(Set dst (MulReductionVD dst src));
|
|
effect(TEMP dst, TEMP vtmp1, TEMP vtmp2);
|
|
format %{ "vector_reduction_double $dst,$src ; using $vtmp1, $vtmp2 as TEMP" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this, $src);
|
|
__ reduce_fp(opcode, vlen, $dst$$XMMRegister, $src$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// ====================VECTOR ARITHMETIC=======================================
|
|
|
|
// --------------------------------- ADD --------------------------------------
|
|
|
|
// Bytes vector add
|
|
instruct vaddB(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (AddVB dst src));
|
|
format %{ "paddb $dst,$src\t! add packedB" %}
|
|
ins_encode %{
|
|
__ paddb($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddB_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVB src1 src2));
|
|
format %{ "vpaddb $dst,$src1,$src2\t! add packedB" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddB_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVB src (LoadVector mem)));
|
|
format %{ "vpaddb $dst,$src,$mem\t! add packedB" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Shorts/Chars vector add
|
|
instruct vaddS(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (AddVS dst src));
|
|
format %{ "paddw $dst,$src\t! add packedS" %}
|
|
ins_encode %{
|
|
__ paddw($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddS_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVS src1 src2));
|
|
format %{ "vpaddw $dst,$src1,$src2\t! add packedS" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddS_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVS src (LoadVector mem)));
|
|
format %{ "vpaddw $dst,$src,$mem\t! add packedS" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Integers vector add
|
|
instruct vaddI(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (AddVI dst src));
|
|
format %{ "paddd $dst,$src\t! add packedI" %}
|
|
ins_encode %{
|
|
__ paddd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddI_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVI src1 src2));
|
|
format %{ "vpaddd $dst,$src1,$src2\t! add packedI" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
|
|
instruct vaddI_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVI src (LoadVector mem)));
|
|
format %{ "vpaddd $dst,$src,$mem\t! add packedI" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Longs vector add
|
|
instruct vaddL(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (AddVL dst src));
|
|
format %{ "paddq $dst,$src\t! add packedL" %}
|
|
ins_encode %{
|
|
__ paddq($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddL_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVL src1 src2));
|
|
format %{ "vpaddq $dst,$src1,$src2\t! add packedL" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddL_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVL src (LoadVector mem)));
|
|
format %{ "vpaddq $dst,$src,$mem\t! add packedL" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Floats vector add
|
|
instruct vaddF(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (AddVF dst src));
|
|
format %{ "addps $dst,$src\t! add packedF" %}
|
|
ins_encode %{
|
|
__ addps($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddF_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVF src1 src2));
|
|
format %{ "vaddps $dst,$src1,$src2\t! add packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddF_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVF src (LoadVector mem)));
|
|
format %{ "vaddps $dst,$src,$mem\t! add packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Doubles vector add
|
|
instruct vaddD(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (AddVD dst src));
|
|
format %{ "addpd $dst,$src\t! add packedD" %}
|
|
ins_encode %{
|
|
__ addpd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddD_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVD src1 src2));
|
|
format %{ "vaddpd $dst,$src1,$src2\t! add packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vaddD_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AddVD src (LoadVector mem)));
|
|
format %{ "vaddpd $dst,$src,$mem\t! add packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- SUB --------------------------------------
|
|
|
|
// Bytes vector sub
|
|
instruct vsubB(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (SubVB dst src));
|
|
format %{ "psubb $dst,$src\t! sub packedB" %}
|
|
ins_encode %{
|
|
__ psubb($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubB_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVB src1 src2));
|
|
format %{ "vpsubb $dst,$src1,$src2\t! sub packedB" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubB_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVB src (LoadVector mem)));
|
|
format %{ "vpsubb $dst,$src,$mem\t! sub packedB" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Shorts/Chars vector sub
|
|
instruct vsubS(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (SubVS dst src));
|
|
format %{ "psubw $dst,$src\t! sub packedS" %}
|
|
ins_encode %{
|
|
__ psubw($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
|
|
instruct vsubS_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVS src1 src2));
|
|
format %{ "vpsubw $dst,$src1,$src2\t! sub packedS" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubS_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVS src (LoadVector mem)));
|
|
format %{ "vpsubw $dst,$src,$mem\t! sub packedS" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Integers vector sub
|
|
instruct vsubI(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (SubVI dst src));
|
|
format %{ "psubd $dst,$src\t! sub packedI" %}
|
|
ins_encode %{
|
|
__ psubd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubI_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVI src1 src2));
|
|
format %{ "vpsubd $dst,$src1,$src2\t! sub packedI" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubI_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVI src (LoadVector mem)));
|
|
format %{ "vpsubd $dst,$src,$mem\t! sub packedI" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Longs vector sub
|
|
instruct vsubL(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (SubVL dst src));
|
|
format %{ "psubq $dst,$src\t! sub packedL" %}
|
|
ins_encode %{
|
|
__ psubq($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubL_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVL src1 src2));
|
|
format %{ "vpsubq $dst,$src1,$src2\t! sub packedL" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
|
|
instruct vsubL_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVL src (LoadVector mem)));
|
|
format %{ "vpsubq $dst,$src,$mem\t! sub packedL" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Floats vector sub
|
|
instruct vsubF(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (SubVF dst src));
|
|
format %{ "subps $dst,$src\t! sub packedF" %}
|
|
ins_encode %{
|
|
__ subps($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubF_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVF src1 src2));
|
|
format %{ "vsubps $dst,$src1,$src2\t! sub packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubF_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVF src (LoadVector mem)));
|
|
format %{ "vsubps $dst,$src,$mem\t! sub packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Doubles vector sub
|
|
instruct vsubD(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (SubVD dst src));
|
|
format %{ "subpd $dst,$src\t! sub packedD" %}
|
|
ins_encode %{
|
|
__ subpd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubD_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVD src1 src2));
|
|
format %{ "vsubpd $dst,$src1,$src2\t! sub packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsubD_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (SubVD src (LoadVector mem)));
|
|
format %{ "vsubpd $dst,$src,$mem\t! sub packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- MUL --------------------------------------
|
|
|
|
// Byte vector mul
|
|
instruct mulB_reg(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 4 ||
|
|
n->as_Vector()->length() == 8);
|
|
match(Set dst (MulVB src1 src2));
|
|
effect(TEMP dst, TEMP tmp, TEMP scratch);
|
|
format %{"vector_mulB $dst,$src1,$src2" %}
|
|
ins_encode %{
|
|
assert(UseSSE > 3, "required");
|
|
__ pmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister);
|
|
__ pmovsxbw($dst$$XMMRegister, $src2$$XMMRegister);
|
|
__ pmullw($tmp$$XMMRegister, $dst$$XMMRegister);
|
|
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ pand($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct mul16B_reg(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
|
|
match(Set dst (MulVB src1 src2));
|
|
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
|
|
format %{"vector_mulB $dst,$src1,$src2" %}
|
|
ins_encode %{
|
|
assert(UseSSE > 3, "required");
|
|
__ pmovsxbw($tmp1$$XMMRegister, $src1$$XMMRegister);
|
|
__ pmovsxbw($tmp2$$XMMRegister, $src2$$XMMRegister);
|
|
__ pmullw($tmp1$$XMMRegister, $tmp2$$XMMRegister);
|
|
__ pshufd($tmp2$$XMMRegister, $src1$$XMMRegister, 0xEE);
|
|
__ pshufd($dst$$XMMRegister, $src2$$XMMRegister, 0xEE);
|
|
__ pmovsxbw($tmp2$$XMMRegister, $tmp2$$XMMRegister);
|
|
__ pmovsxbw($dst$$XMMRegister, $dst$$XMMRegister);
|
|
__ pmullw($tmp2$$XMMRegister, $dst$$XMMRegister);
|
|
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
|
|
__ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
|
|
__ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmul16B_reg_avx(vec dst, vec src1, vec src2, vec tmp, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
|
|
match(Set dst (MulVB src1 src2));
|
|
effect(TEMP dst, TEMP tmp, TEMP scratch);
|
|
format %{"vector_mulB $dst,$src1,$src2" %}
|
|
ins_encode %{
|
|
int vector_len = Assembler::AVX_256bit;
|
|
__ vpmovsxbw($tmp$$XMMRegister, $src1$$XMMRegister, vector_len);
|
|
__ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
__ vpmullw($tmp$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
|
|
__ vextracti128_high($tmp$$XMMRegister, $dst$$XMMRegister);
|
|
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, 0);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmul32B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 32);
|
|
match(Set dst (MulVB src1 src2));
|
|
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
|
|
format %{"vector_mulB $dst,$src1,$src2" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 1, "required");
|
|
int vector_len = Assembler::AVX_256bit;
|
|
__ vextracti128_high($tmp1$$XMMRegister, $src1$$XMMRegister);
|
|
__ vextracti128_high($dst$$XMMRegister, $src2$$XMMRegister);
|
|
__ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
|
|
__ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
|
|
__ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
__ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
|
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp1$$XMMRegister, vector_len);
|
|
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmul64B_reg_avx(vec dst, vec src1, vec src2, vec tmp1, vec tmp2, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 64);
|
|
match(Set dst (MulVB src1 src2));
|
|
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
|
|
format %{"vector_mulB $dst,$src1,$src2\n\t" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
int vector_len = Assembler::AVX_512bit;
|
|
__ vextracti64x4_high($tmp1$$XMMRegister, $src1$$XMMRegister);
|
|
__ vextracti64x4_high($dst$$XMMRegister, $src2$$XMMRegister);
|
|
__ vpmovsxbw($tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
|
|
__ vpmovsxbw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpmullw($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpmovsxbw($tmp2$$XMMRegister, $src1$$XMMRegister, vector_len);
|
|
__ vpmovsxbw($dst$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
__ vpmullw($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
|
__ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
|
|
__ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Shorts/Chars vector mul
|
|
instruct vmulS(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (MulVS dst src));
|
|
format %{ "pmullw $dst,$src\t! mul packedS" %}
|
|
ins_encode %{
|
|
__ pmullw($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulS_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVS src1 src2));
|
|
format %{ "vpmullw $dst,$src1,$src2\t! mul packedS" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulS_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVS src (LoadVector mem)));
|
|
format %{ "vpmullw $dst,$src,$mem\t! mul packedS" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Integers vector mul
|
|
instruct vmulI(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (MulVI dst src));
|
|
format %{ "pmulld $dst,$src\t! mul packedI" %}
|
|
ins_encode %{
|
|
assert(UseSSE > 3, "required");
|
|
__ pmulld($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulI_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVI src1 src2));
|
|
format %{ "vpmulld $dst,$src1,$src2\t! mul packedI" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulI_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVI src (LoadVector mem)));
|
|
format %{ "vpmulld $dst,$src,$mem\t! mul packedI" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Longs vector mul
|
|
instruct vmulL_reg(vec dst, vec src1, vec src2) %{
|
|
match(Set dst (MulVL src1 src2));
|
|
format %{ "vpmullq $dst,$src1,$src2\t! mul packedL" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpmullq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulL_mem(vec dst, vec src, memory mem) %{
|
|
match(Set dst (MulVL src (LoadVector mem)));
|
|
format %{ "vpmullq $dst,$src,$mem\t! mul packedL" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpmullq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Floats vector mul
|
|
instruct vmulF(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (MulVF dst src));
|
|
format %{ "mulps $dst,$src\t! mul packedF" %}
|
|
ins_encode %{
|
|
__ mulps($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulF_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVF src1 src2));
|
|
format %{ "vmulps $dst,$src1,$src2\t! mul packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulF_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVF src (LoadVector mem)));
|
|
format %{ "vmulps $dst,$src,$mem\t! mul packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Doubles vector mul
|
|
instruct vmulD(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (MulVD dst src));
|
|
format %{ "mulpd $dst,$src\t! mul packedD" %}
|
|
ins_encode %{
|
|
__ mulpd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulD_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVD src1 src2));
|
|
format %{ "vmulpd $dst,$src1,$src2\t! mul packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmulD_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulVD src (LoadVector mem)));
|
|
format %{ "vmulpd $dst,$src,$mem\t! mul packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vcmov8F_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
|
|
predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
|
|
match(Set dst (CMoveVF (Binary copnd cop) (Binary src1 src2)));
|
|
effect(TEMP dst, USE src1, USE src2);
|
|
format %{ "cmpps.$copnd $dst, $src1, $src2 ! vcmovevf, cond=$cop\n\t"
|
|
"blendvps $dst,$src1,$src2,$dst ! vcmovevf\n\t"
|
|
%}
|
|
ins_encode %{
|
|
int vector_len = 1;
|
|
int cond = (Assembler::Condition)($copnd$$cmpcode);
|
|
__ cmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
|
|
__ blendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vcmov4D_reg(legVec dst, legVec src1, legVec src2, immI8 cop, cmpOp_vcmppd copnd) %{
|
|
predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
|
|
match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
|
|
effect(TEMP dst, USE src1, USE src2);
|
|
format %{ "cmppd.$copnd $dst, $src1, $src2 ! vcmovevd, cond=$cop\n\t"
|
|
"blendvpd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
|
|
%}
|
|
ins_encode %{
|
|
int vector_len = 1;
|
|
int cond = (Assembler::Condition)($copnd$$cmpcode);
|
|
__ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
|
|
__ blendvpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- DIV --------------------------------------
|
|
|
|
// Floats vector div
|
|
instruct vdivF(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (DivVF dst src));
|
|
format %{ "divps $dst,$src\t! div packedF" %}
|
|
ins_encode %{
|
|
__ divps($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vdivF_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivVF src1 src2));
|
|
format %{ "vdivps $dst,$src1,$src2\t! div packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vdivF_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivVF src (LoadVector mem)));
|
|
format %{ "vdivps $dst,$src,$mem\t! div packedF" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Doubles vector div
|
|
instruct vdivD(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (DivVD dst src));
|
|
format %{ "divpd $dst,$src\t! div packedD" %}
|
|
ins_encode %{
|
|
__ divpd($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vdivD_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivVD src1 src2));
|
|
format %{ "vdivpd $dst,$src1,$src2\t! div packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vdivD_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (DivVD src (LoadVector mem)));
|
|
format %{ "vdivpd $dst,$src,$mem\t! div packedD" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- Sqrt --------------------------------------
|
|
|
|
instruct vsqrtF_reg(vec dst, vec src) %{
|
|
match(Set dst (SqrtVF src));
|
|
format %{ "vsqrtps $dst,$src\t! sqrt packedF" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 0, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsqrtF_mem(vec dst, memory mem) %{
|
|
match(Set dst (SqrtVF (LoadVector mem)));
|
|
format %{ "vsqrtps $dst,$mem\t! sqrt packedF" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 0, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsqrtps($dst$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Floating point vector sqrt
|
|
instruct vsqrtD_reg(vec dst, vec src) %{
|
|
match(Set dst (SqrtVD src));
|
|
format %{ "vsqrtpd $dst,$src\t! sqrt packedD" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 0, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vsqrtD_mem(vec dst, memory mem) %{
|
|
match(Set dst (SqrtVD (LoadVector mem)));
|
|
format %{ "vsqrtpd $dst,$mem\t! sqrt packedD" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 0, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vsqrtpd($dst$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// ------------------------------ Shift ---------------------------------------
|
|
|
|
// Left and right shift count vectors are the same on x86
|
|
// (only lowest bits of xmm reg are used for count).
|
|
instruct vshiftcnt(vec dst, rRegI cnt) %{
|
|
match(Set dst (LShiftCntV cnt));
|
|
match(Set dst (RShiftCntV cnt));
|
|
format %{ "movdl $dst,$cnt\t! load shift count" %}
|
|
ins_encode %{
|
|
__ movdl($dst$$XMMRegister, $cnt$$Register);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Byte vector shift
|
|
instruct vshiftB(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() <= 8);
|
|
match(Set dst (LShiftVB src shift));
|
|
match(Set dst (RShiftVB src shift));
|
|
match(Set dst (URShiftVB src shift));
|
|
effect(TEMP dst, USE src, USE shift, TEMP tmp, TEMP scratch);
|
|
format %{"vector_byte_shift $dst,$src,$shift" %}
|
|
ins_encode %{
|
|
assert(UseSSE > 3, "required");
|
|
int opcode = this->ideal_Opcode();
|
|
__ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftw(opcode, $tmp$$XMMRegister, $shift$$XMMRegister);
|
|
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ pand($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ packuswb($dst$$XMMRegister, $dst$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vshift16B(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 16 && UseAVX <= 1);
|
|
match(Set dst (LShiftVB src shift));
|
|
match(Set dst (RShiftVB src shift));
|
|
match(Set dst (URShiftVB src shift));
|
|
effect(TEMP dst, USE src, USE shift, TEMP tmp1, TEMP tmp2, TEMP scratch);
|
|
format %{"vector_byte_shift $dst,$src,$shift" %}
|
|
ins_encode %{
|
|
assert(UseSSE > 3, "required");
|
|
int opcode = this->ideal_Opcode();
|
|
|
|
__ vextendbw(opcode, $tmp1$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftw(opcode, $tmp1$$XMMRegister, $shift$$XMMRegister);
|
|
__ pshufd($tmp2$$XMMRegister, $src$$XMMRegister, 0xE);
|
|
__ vextendbw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister);
|
|
__ vshiftw(opcode, $tmp2$$XMMRegister, $shift$$XMMRegister);
|
|
__ movdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ pand($tmp2$$XMMRegister, $dst$$XMMRegister);
|
|
__ pand($dst$$XMMRegister, $tmp1$$XMMRegister);
|
|
__ packuswb($dst$$XMMRegister, $tmp2$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vshift16B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 16 && UseAVX > 1);
|
|
match(Set dst (LShiftVB src shift));
|
|
match(Set dst (RShiftVB src shift));
|
|
match(Set dst (URShiftVB src shift));
|
|
effect(TEMP dst, TEMP tmp, TEMP scratch);
|
|
format %{"vector_byte_shift $dst,$src,$shift" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vector_len = Assembler::AVX_256bit;
|
|
__ vextendbw(opcode, $tmp$$XMMRegister, $src$$XMMRegister, vector_len);
|
|
__ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
__ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
|
|
__ vextracti128_high($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ vpackuswb($dst$$XMMRegister, $tmp$$XMMRegister, $dst$$XMMRegister, 0);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vshift32B_avx(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 32);
|
|
match(Set dst (LShiftVB src shift));
|
|
match(Set dst (RShiftVB src shift));
|
|
match(Set dst (URShiftVB src shift));
|
|
effect(TEMP dst, TEMP tmp, TEMP scratch);
|
|
format %{"vector_byte_shift $dst,$src,$shift" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 1, "required");
|
|
int opcode = this->ideal_Opcode();
|
|
int vector_len = Assembler::AVX_256bit;
|
|
__ vextracti128_high($tmp$$XMMRegister, $src$$XMMRegister);
|
|
__ vextendbw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, vector_len);
|
|
__ vextendbw(opcode, $dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
|
__ vshiftw(opcode, $tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
__ vshiftw(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
__ vpand($tmp$$XMMRegister, $tmp$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
|
|
__ vpand($dst$$XMMRegister, $dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), vector_len, $scratch$$Register);
|
|
__ vpackuswb($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
|
|
__ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 0xD8, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vshift64B_avx(vec dst, vec src, vec shift, vec tmp1, vec tmp2, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 64);
|
|
match(Set dst (LShiftVB src shift));
|
|
match(Set dst (RShiftVB src shift));
|
|
match(Set dst (URShiftVB src shift));
|
|
effect(TEMP dst, TEMP tmp1, TEMP tmp2, TEMP scratch);
|
|
format %{"vector_byte_shift $dst,$src,$shift" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
int opcode = this->ideal_Opcode();
|
|
int vector_len = Assembler::AVX_512bit;
|
|
__ vextracti64x4($tmp1$$XMMRegister, $src$$XMMRegister, 1);
|
|
__ vextendbw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, vector_len);
|
|
__ vextendbw(opcode, $tmp2$$XMMRegister, $src$$XMMRegister, vector_len);
|
|
__ vshiftw(opcode, $tmp1$$XMMRegister, $tmp1$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
__ vshiftw(opcode, $tmp2$$XMMRegister, $tmp2$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
__ vmovdqu($dst$$XMMRegister, ExternalAddress(vector_short_to_byte_mask()), $scratch$$Register);
|
|
__ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpand($tmp1$$XMMRegister, $tmp1$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpand($tmp2$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
__ vpackuswb($dst$$XMMRegister, $tmp1$$XMMRegister, $tmp2$$XMMRegister, vector_len);
|
|
__ evmovdquq($tmp2$$XMMRegister, ExternalAddress(vector_byte_perm_mask()), vector_len, $scratch$$Register);
|
|
__ vpermq($dst$$XMMRegister, $tmp2$$XMMRegister, $dst$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Shorts vector logical right shift produces incorrect Java result
|
|
// for negative data because java code convert short value into int with
|
|
// sign extension before a shift. But char vectors are fine since chars are
|
|
// unsigned values.
|
|
// Shorts/Chars vector left shift
|
|
instruct vshiftS(vec dst, vec src, vec shift) %{
|
|
match(Set dst (LShiftVS src shift));
|
|
match(Set dst (RShiftVS src shift));
|
|
match(Set dst (URShiftVS src shift));
|
|
effect(TEMP dst, USE src, USE shift);
|
|
format %{ "vshiftw $dst,$src,$shift\t! shift packedS" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
if (UseAVX > 0) {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vshiftw(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vlen_enc);
|
|
} else {
|
|
int vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ movflt($dst$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
|
|
} else if (vlen == 4) {
|
|
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
|
|
} else {
|
|
assert (vlen == 8, "sanity");
|
|
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftw(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
|
|
}
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Integers vector left shift
|
|
instruct vshiftI(vec dst, vec src, vec shift) %{
|
|
match(Set dst (LShiftVI src shift));
|
|
match(Set dst (RShiftVI src shift));
|
|
match(Set dst (URShiftVI src shift));
|
|
effect(TEMP dst, USE src, USE shift);
|
|
format %{ "vshiftd $dst,$src,$shift\t! shift packedI" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
if (UseAVX > 0) {
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vshiftd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
} else {
|
|
int vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ movdbl($dst$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
|
|
} else {
|
|
assert(vlen == 4, "sanity");
|
|
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftd(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
|
|
}
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// Longs vector shift
|
|
instruct vshiftL(vec dst, vec src, vec shift) %{
|
|
match(Set dst (LShiftVL src shift));
|
|
match(Set dst (URShiftVL src shift));
|
|
effect(TEMP dst, USE src, USE shift);
|
|
format %{ "vshiftq $dst,$src,$shift\t! shift packedL" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
if (UseAVX > 0) {
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vshiftq(opcode, $dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
} else {
|
|
assert(vector_length(this) == 2, "");
|
|
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
|
|
__ vshiftq(opcode, $dst$$XMMRegister, $shift$$XMMRegister);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// -------------------ArithmeticRightShift -----------------------------------
|
|
// Long vector arithmetic right shift
|
|
instruct vshiftL_arith_reg(vec dst, vec src, vec shift, vec tmp, rRegI scratch) %{
|
|
predicate(UseAVX <= 2);
|
|
match(Set dst (RShiftVL src shift));
|
|
effect(TEMP dst, TEMP tmp, TEMP scratch);
|
|
format %{ "vshiftq $dst,$src,$shift" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
assert(UseSSE >= 2, "required");
|
|
__ movdqu($dst$$XMMRegister, $src$$XMMRegister);
|
|
__ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
|
|
__ movdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
|
|
__ psrlq($tmp$$XMMRegister, $shift$$XMMRegister);
|
|
__ pxor($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
__ psubq($dst$$XMMRegister, $tmp$$XMMRegister);
|
|
} else {
|
|
assert(vlen == 4, "sanity");
|
|
assert(UseAVX > 1, "required");
|
|
int vector_len = Assembler::AVX_256bit;
|
|
__ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
__ vmovdqu($tmp$$XMMRegister, ExternalAddress(vector_long_sign_mask()), $scratch$$Register);
|
|
__ vpsrlq($tmp$$XMMRegister, $tmp$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
__ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
|
|
__ vpsubq($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister, vector_len);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vshiftL_arith_reg_evex(vec dst, vec src, vec shift) %{
|
|
predicate(UseAVX > 2);
|
|
match(Set dst (RShiftVL src shift));
|
|
format %{ "vshiftq $dst,$src,$shift" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ evpsraq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- AND --------------------------------------
|
|
|
|
instruct vand(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (AndV dst src));
|
|
format %{ "pand $dst,$src\t! and vectors" %}
|
|
ins_encode %{
|
|
__ pand($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vand_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AndV src1 src2));
|
|
format %{ "vpand $dst,$src1,$src2\t! and vectors" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vand_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (AndV src (LoadVector mem)));
|
|
format %{ "vpand $dst,$src,$mem\t! and vectors" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- OR ---------------------------------------
|
|
|
|
instruct vor(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (OrV dst src));
|
|
format %{ "por $dst,$src\t! or vectors" %}
|
|
ins_encode %{
|
|
__ por($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vor_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (OrV src1 src2));
|
|
format %{ "vpor $dst,$src1,$src2\t! or vectors" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vor_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (OrV src (LoadVector mem)));
|
|
format %{ "vpor $dst,$src,$mem\t! or vectors" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- XOR --------------------------------------
|
|
|
|
instruct vxor(vec dst, vec src) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (XorV dst src));
|
|
format %{ "pxor $dst,$src\t! xor vectors" %}
|
|
ins_encode %{
|
|
__ pxor($dst$$XMMRegister, $src$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vxor_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (XorV src1 src2));
|
|
format %{ "vpxor $dst,$src1,$src2\t! xor vectors" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vxor_mem(vec dst, vec src, memory mem) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (XorV src (LoadVector mem)));
|
|
format %{ "vpxor $dst,$src,$mem\t! xor vectors" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- ABS --------------------------------------
|
|
// a = |a|
|
|
instruct vabsB_reg(vec dst, vec src) %{
|
|
match(Set dst (AbsVB src));
|
|
format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 16) {
|
|
__ pabsb($dst$$XMMRegister, $src$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vabsS_reg(vec dst, vec src) %{
|
|
match(Set dst (AbsVS src));
|
|
format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 8) {
|
|
__ pabsw($dst$$XMMRegister, $src$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vabsI_reg(vec dst, vec src) %{
|
|
match(Set dst (AbsVI src));
|
|
format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
|
|
ins_encode %{
|
|
uint vlen = vector_length(this);
|
|
if (vlen <= 4) {
|
|
__ pabsd($dst$$XMMRegister, $src$$XMMRegister);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vabsL_reg(vec dst, vec src) %{
|
|
match(Set dst (AbsVL src));
|
|
format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- ABSNEG --------------------------------------
|
|
|
|
instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() != 4); // handled by 1-operand instruction vabsneg4F
|
|
match(Set dst (AbsVF src));
|
|
match(Set dst (NegVF src));
|
|
effect(TEMP scratch);
|
|
format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
int vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
__ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
|
|
} else {
|
|
assert(vlen == 8 || vlen == 16, "required");
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vabsnegf(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vabsneg4F(vec dst, rRegI scratch) %{
|
|
predicate(n->as_Vector()->length() == 4);
|
|
match(Set dst (AbsVF dst));
|
|
match(Set dst (NegVF dst));
|
|
effect(TEMP scratch);
|
|
format %{ "vabsnegf $dst,[mask]\t# absneg packed4F" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
__ vabsnegf(opcode, $dst$$XMMRegister, $dst$$XMMRegister, $scratch$$Register);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vabsnegD(vec dst, vec src, rRegI scratch) %{
|
|
match(Set dst (AbsVD src));
|
|
match(Set dst (NegVD src));
|
|
effect(TEMP scratch);
|
|
format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
|
|
ins_encode %{
|
|
int opcode = this->ideal_Opcode();
|
|
uint vlen = vector_length(this);
|
|
if (vlen == 2) {
|
|
assert(UseSSE >= 2, "required");
|
|
__ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
|
|
} else {
|
|
int vlen_enc = vector_length_encoding(this);
|
|
__ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
|
|
}
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- FMA --------------------------------------
|
|
// a * b + c
|
|
|
|
instruct vfmaF_reg(vec a, vec b, vec c) %{
|
|
match(Set c (FmaVF c (Binary a b)));
|
|
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
assert(UseFMA, "not enabled");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vfmaF_mem(vec a, memory b, vec c) %{
|
|
match(Set c (FmaVF c (Binary a (LoadVector b))));
|
|
format %{ "fmaps $a,$b,$c\t# $c = $a * $b + $c fma packedF" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
assert(UseFMA, "not enabled");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vfmaf($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vfmaD_reg(vec a, vec b, vec c) %{
|
|
match(Set c (FmaVD c (Binary a b)));
|
|
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
assert(UseFMA, "not enabled");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$XMMRegister, $c$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vfmaD_mem(vec a, memory b, vec c) %{
|
|
match(Set c (FmaVD c (Binary a (LoadVector b))));
|
|
format %{ "fmapd $a,$b,$c\t# $c = $a * $b + $c fma packedD" %}
|
|
ins_cost(150);
|
|
ins_encode %{
|
|
assert(UseFMA, "not enabled");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vfmad($c$$XMMRegister, $a$$XMMRegister, $b$$Address, $c$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- Vector Multiply Add --------------------------------------
|
|
|
|
instruct vmuladdS2I_reg_sse(vec dst, vec src1) %{
|
|
predicate(UseAVX == 0);
|
|
match(Set dst (MulAddVS2VI dst src1));
|
|
format %{ "pmaddwd $dst,$dst,$src1\t! muladd packedStoI" %}
|
|
ins_encode %{
|
|
__ pmaddwd($dst$$XMMRegister, $src1$$XMMRegister);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vmuladdS2I_reg_avx(vec dst, vec src1, vec src2) %{
|
|
predicate(UseAVX > 0);
|
|
match(Set dst (MulAddVS2VI src1 src2));
|
|
format %{ "vpmaddwd $dst,$src1,$src2\t! muladd packedStoI" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpmaddwd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- Vector Multiply Add Add ----------------------------------
|
|
|
|
instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
|
|
predicate(VM_Version::supports_avx512_vnni());
|
|
match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
|
|
format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
|
|
ins_encode %{
|
|
assert(UseAVX > 2, "required");
|
|
int vector_len = vector_length_encoding(this);
|
|
__ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
ins_cost(10);
|
|
%}
|
|
|
|
// --------------------------------- PopCount --------------------------------------
|
|
|
|
instruct vpopcountI(vec dst, vec src) %{
|
|
match(Set dst (PopCountVI src));
|
|
format %{ "vpopcntd $dst,$src\t! vector popcount packedI" %}
|
|
ins_encode %{
|
|
assert(UsePopCountInstruction, "not enabled");
|
|
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpopcntd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
// --------------------------------- Bitwise Ternary Logic ----------------------------------
|
|
|
|
instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
|
|
match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
|
|
effect(TEMP dst);
|
|
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|
|
|
|
instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
|
|
match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
|
|
effect(TEMP dst);
|
|
format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
|
|
ins_encode %{
|
|
int vector_len = vector_length_encoding(this);
|
|
__ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$Address, vector_len);
|
|
%}
|
|
ins_pipe( pipe_slow );
|
|
%}
|