diff --git a/hotspot/src/cpu/aarch64/vm/aarch64.ad b/hotspot/src/cpu/aarch64/vm/aarch64.ad index a9cd41a7949..f555721eaa1 100644 --- a/hotspot/src/cpu/aarch64/vm/aarch64.ad +++ b/hotspot/src/cpu/aarch64/vm/aarch64.ad @@ -13276,7 +13276,7 @@ instruct replicate16B_imm(vecX dst, immI con) ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(16B)" %} ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant); + __ mov(as_FloatRegister($dst$$reg), __ T16B, $con$$constant & 0xff); %} ins_pipe(pipe_class_default); %} @@ -13298,7 +13298,7 @@ instruct replicate8S_imm(vecX dst, immI con) ins_cost(INSN_COST); format %{ "movi $dst, $con\t# vector(8H)" %} ins_encode %{ - __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant); + __ mov(as_FloatRegister($dst$$reg), __ T8H, $con$$constant & 0xffff); %} ins_pipe(pipe_class_default); %} diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp index 5150a172f48..1de8ed8f1fb 100644 --- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp @@ -491,6 +491,11 @@ class Address VALUE_OBJ_CLASS_SPEC { i->rf(_index, 16); i->f(_ext.option(), 15, 13); unsigned size = i->get(31, 30); + if (i->get(26, 26) && i->get(23, 23)) { + // SIMD Q Type - Size = 128 bits + assert(size == 0, "bad size"); + size = 0b100; + } if (size == 0) // It's a byte i->f(_ext.shift() >= 0, 12); else { diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp index 2440ecb3c7f..2e230f7bc42 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp @@ -1408,6 +1408,52 @@ void MacroAssembler::movptr(Register r, uintptr_t imm64) { movk(r, imm64 & 0xffff, 32); } +// Macro to mov replicated immediate to vector register. +// Vd will get the following values for different arrangements in T +// imm32 == hex 000000gh T8B: Vd = ghghghghghghghgh +// imm32 == hex 000000gh T16B: Vd = ghghghghghghghghghghghghghghghgh +// imm32 == hex 0000efgh T4H: Vd = efghefghefghefgh +// imm32 == hex 0000efgh T8H: Vd = efghefghefghefghefghefghefghefgh +// imm32 == hex abcdefgh T2S: Vd = abcdefghabcdefgh +// imm32 == hex abcdefgh T4S: Vd = abcdefghabcdefghabcdefghabcdefgh +// T1D/T2D: invalid +void MacroAssembler::mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { + assert(T != T1D && T != T2D, "invalid arrangement"); + if (T == T8B || T == T16B) { + assert((imm32 & ~0xff) == 0, "extraneous bits in unsigned imm32 (T8B/T16B)"); + movi(Vd, T, imm32 & 0xff, 0); + return; + } + u_int32_t nimm32 = ~imm32; + if (T == T4H || T == T8H) { + assert((imm32 & ~0xffff) == 0, "extraneous bits in unsigned imm32 (T4H/T8H)"); + imm32 &= 0xffff; + nimm32 &= 0xffff; + } + u_int32_t x = imm32; + int movi_cnt = 0; + int movn_cnt = 0; + while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } + x = nimm32; + while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } + if (movn_cnt < movi_cnt) imm32 = nimm32; + unsigned lsl = 0; + while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + mvni(Vd, T, imm32 & 0xff, lsl); + else + movi(Vd, T, imm32 & 0xff, lsl); + imm32 >>= 8; lsl += 8; + while (imm32) { + while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } + if (movn_cnt < movi_cnt) + bici(Vd, T, imm32 & 0xff, lsl); + else + orri(Vd, T, imm32 & 0xff, lsl); + lsl += 8; imm32 >>= 8; + } +} + void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) { #ifndef PRODUCT diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp index 6c47590716b..00efdacee66 100644 --- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp @@ -465,44 +465,7 @@ public: void movptr(Register r, uintptr_t imm64); - // Macro to mov replicated immediate to vector register. - // Where imm32 == hex abcdefgh, Vd will get the following values - // for different arrangements in T - // T8B: Vd = ghghghghghghghgh - // T16B: Vd = ghghghghghghghghghghghghghghghgh - // T4H: Vd = efghefghefghefgh - // T8H: Vd = efghefghefghefghefghefghefghefgh - // T2S: Vd = abcdefghabcdefgh - // T4S: Vd = abcdefghabcdefghabcdefghabcdefgh - // T1D/T2D: invalid - void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32) { - assert(T != T1D && T != T2D, "invalid arrangement"); - u_int32_t nimm32 = ~imm32; - if (T == T8B || T == T16B) { imm32 &= 0xff; nimm32 &= 0xff; } - if (T == T4H || T == T8H) { imm32 &= 0xffff; nimm32 &= 0xffff; } - u_int32_t x = imm32; - int movi_cnt = 0; - int movn_cnt = 0; - while (x) { if (x & 0xff) movi_cnt++; x >>= 8; } - x = nimm32; - while (x) { if (x & 0xff) movn_cnt++; x >>= 8; } - if (movn_cnt < movi_cnt) imm32 = nimm32; - unsigned lsl = 0; - while (imm32 && (imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } - if (movn_cnt < movi_cnt) - mvni(Vd, T, imm32 & 0xff, lsl); - else - movi(Vd, T, imm32 & 0xff, lsl); - imm32 >>= 8; lsl += 8; - while (imm32) { - while ((imm32 & 0xff) == 0) { lsl += 8; imm32 >>= 8; } - if (movn_cnt < movi_cnt) - bici(Vd, T, imm32 & 0xff, lsl); - else - orri(Vd, T, imm32 & 0xff, lsl); - lsl += 8; imm32 >>= 8; - } - } + void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32); // macro instructions for accessing and updating floating point // status register