8309531: Incorrect result with unwrapped iotaShuffle.
Reviewed-by: sviswanathan, xgong, thartmann
This commit is contained in:
parent
7b3c2dc5f4
commit
d6578bff1c
src/hotspot/share/opto
test/hotspot/jtreg/compiler/vectorapi
@ -592,16 +592,12 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
|
||||
const TypeInt* step_val = gvn().type(argument(5))->isa_int();
|
||||
const TypeInt* wrap = gvn().type(argument(6))->isa_int();
|
||||
|
||||
Node* start = argument(4);
|
||||
Node* step = argument(5);
|
||||
|
||||
if (shuffle_klass == nullptr || vlen == nullptr || start_val == nullptr || step_val == nullptr || wrap == nullptr) {
|
||||
return false; // dead code
|
||||
}
|
||||
if (!vlen->is_con() || !is_power_of_2(vlen->get_con()) ||
|
||||
shuffle_klass->const_oop() == nullptr || !wrap->is_con()) {
|
||||
if (shuffle_klass == nullptr || shuffle_klass->const_oop() == nullptr ||
|
||||
vlen == nullptr || !vlen->is_con() || start_val == nullptr || step_val == nullptr ||
|
||||
wrap == nullptr || !wrap->is_con()) {
|
||||
return false; // not enough info for intrinsification
|
||||
}
|
||||
|
||||
if (!is_klass_initialized(shuffle_klass)) {
|
||||
if (C->print_intrinsics()) {
|
||||
tty->print_cr(" ** klass argument not initialized");
|
||||
@ -613,33 +609,50 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
|
||||
int num_elem = vlen->get_con();
|
||||
BasicType elem_bt = T_BYTE;
|
||||
|
||||
if (!arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
bool effective_indices_in_range = false;
|
||||
if (start_val->is_con() && step_val->is_con()) {
|
||||
int effective_min_index = start_val->get_con();
|
||||
int effective_max_index = start_val->get_con() + step_val->get_con() * (num_elem - 1);
|
||||
effective_indices_in_range = effective_max_index >= effective_min_index && effective_min_index >= -128 && effective_max_index <= 127;
|
||||
}
|
||||
|
||||
if (!do_wrap && !effective_indices_in_range) {
|
||||
// Disable instrinsification for unwrapped shuffle iota if start/step
|
||||
// values are non-constant OR if intermediate result overflows byte value range.
|
||||
return false;
|
||||
}
|
||||
if (!arch_supports_vector(Op_AddVB, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
|
||||
if (!arch_supports_vector(Op_AddVB, num_elem, elem_bt, VecMaskNotUsed) ||
|
||||
!arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed) ||
|
||||
!arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed) ||
|
||||
!arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
return false;
|
||||
}
|
||||
if (!arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
|
||||
if (!do_wrap &&
|
||||
(!arch_supports_vector(Op_SubVB, num_elem, elem_bt, VecMaskNotUsed) ||
|
||||
!arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskNotUsed) ||
|
||||
!arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskNotUsed))) {
|
||||
return false;
|
||||
}
|
||||
if (!arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed)) {
|
||||
return false;
|
||||
}
|
||||
if (!arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) {
|
||||
return false;
|
||||
}
|
||||
if (!arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskUseStore)) {
|
||||
|
||||
bool step_multiply = !step_val->is_con() || !is_power_of_2(step_val->get_con());
|
||||
if ((step_multiply && !arch_supports_vector(Op_MulVB, num_elem, elem_bt, VecMaskNotUsed)) ||
|
||||
(!step_multiply && !arch_supports_vector(Op_LShiftVB, num_elem, elem_bt, VecMaskNotUsed))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const Type * type_bt = Type::get_const_basic_type(elem_bt);
|
||||
const TypeVect * vt = TypeVect::make(type_bt, num_elem);
|
||||
|
||||
Node* res = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt));
|
||||
Node* res = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt));
|
||||
|
||||
if(!step_val->is_con() || !is_power_of_2(step_val->get_con())) {
|
||||
Node* start = argument(4);
|
||||
Node* step = argument(5);
|
||||
|
||||
if (step_multiply) {
|
||||
Node* bcast_step = gvn().transform(VectorNode::scalar2vector(step, num_elem, type_bt));
|
||||
res = gvn().transform(VectorNode::make(Op_MulI, res, bcast_step, num_elem, elem_bt));
|
||||
res = gvn().transform(VectorNode::make(Op_MulVB, res, bcast_step, vt));
|
||||
} else if (step_val->get_con() > 1) {
|
||||
Node* cnt = gvn().makecon(TypeInt::make(log2i_exact(step_val->get_con())));
|
||||
Node* shift_cnt = vector_shift_count(cnt, Op_LShiftI, elem_bt, num_elem);
|
||||
@ -648,24 +661,25 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
|
||||
|
||||
if (!start_val->is_con() || start_val->get_con() != 0) {
|
||||
Node* bcast_start = gvn().transform(VectorNode::scalar2vector(start, num_elem, type_bt));
|
||||
res = gvn().transform(VectorNode::make(Op_AddI, res, bcast_start, num_elem, elem_bt));
|
||||
res = gvn().transform(VectorNode::make(Op_AddVB, res, bcast_start, vt));
|
||||
}
|
||||
|
||||
Node * mod_val = gvn().makecon(TypeInt::make(num_elem-1));
|
||||
Node * bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt));
|
||||
if(do_wrap) {
|
||||
|
||||
if (do_wrap) {
|
||||
// Wrap the indices greater than lane count.
|
||||
res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
|
||||
res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt));
|
||||
} else {
|
||||
ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ge));
|
||||
ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ugt));
|
||||
Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem));
|
||||
Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
|
||||
const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem);
|
||||
Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vmask_type));
|
||||
Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ugt, bcast_lane_cnt, res, pred_node, vmask_type));
|
||||
|
||||
// Make the indices greater than lane count as -ve values. This matches the java side implementation.
|
||||
res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
|
||||
Node * biased_val = gvn().transform(VectorNode::make(Op_SubI, res, bcast_lane_cnt, num_elem, elem_bt));
|
||||
// Make the indices greater than lane count as -ve values to match the java side implementation.
|
||||
res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt));
|
||||
Node * biased_val = gvn().transform(VectorNode::make(Op_SubVB, res, bcast_lane_cnt, vt));
|
||||
res = gvn().transform(new VectorBlendNode(biased_val, res, mask));
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
/*
|
||||
* Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
|
||||
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -23,7 +24,9 @@
|
||||
|
||||
package compiler.vectorapi;
|
||||
|
||||
import jdk.incubator.vector.ByteVector;
|
||||
import jdk.incubator.vector.IntVector;
|
||||
import jdk.incubator.vector.ShortVector;
|
||||
import jdk.incubator.vector.VectorSpecies;
|
||||
import jdk.incubator.vector.VectorShuffle;
|
||||
|
||||
@ -31,16 +34,94 @@ import jdk.incubator.vector.VectorShuffle;
|
||||
* @test
|
||||
* @bug 8265907
|
||||
* @modules jdk.incubator.vector
|
||||
* @run main/othervm compiler.vectorapi.TestVectorShuffleIota
|
||||
* @run main/othervm -XX:CompileThresholdScaling=0.3 -XX:-TieredCompilation compiler.vectorapi.TestVectorShuffleIota
|
||||
*/
|
||||
|
||||
public class TestVectorShuffleIota {
|
||||
static final VectorSpecies<Integer> SPECIESi = IntVector.SPECIES_128;
|
||||
static final VectorSpecies<Short> SPECIESs = ShortVector.SPECIES_128;
|
||||
static final VectorSpecies<Byte> SPECIESb = ByteVector.SPECIES_128;
|
||||
|
||||
static final int INVOC_COUNT = 50000;
|
||||
static final int INVOC_COUNT = 5000;
|
||||
|
||||
static int[] ai = {87, 65, 78, 71};
|
||||
|
||||
static long expected_value(VectorSpecies<?> SPECIES, int start, int step, boolean wrap) {
|
||||
long res = 0;
|
||||
int lanesM1 = SPECIES.length() - 1;
|
||||
if (wrap) {
|
||||
res = (lanesM1 & (start + step * lanesM1));
|
||||
} else {
|
||||
int effective_index = start + step * lanesM1;
|
||||
int wrapped_effective_index = effective_index & lanesM1;
|
||||
res = (effective_index == wrapped_effective_index ?
|
||||
wrapped_effective_index :
|
||||
-SPECIES.length() + wrapped_effective_index);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static void validateTests(long actual, VectorSpecies<?> SPECIES, int start, int step, boolean wrap) {
|
||||
long expected = expected_value(SPECIES, start, step, wrap);
|
||||
if (actual != expected) {
|
||||
throw new AssertionError("Result Mismatch!, actual = " + actual + " expected = " + expected);
|
||||
}
|
||||
}
|
||||
|
||||
static void testShuffleIotaB128(int start, int step, boolean wrap) {
|
||||
long res = SPECIESb.iotaShuffle(start, step, wrap)
|
||||
.laneSource(SPECIESb.length()-1);
|
||||
validateTests(res, SPECIESb, start, step, wrap);
|
||||
}
|
||||
|
||||
static void testShuffleIotaS128(int start, int step, boolean wrap) {
|
||||
long res = SPECIESs.iotaShuffle(start, step, wrap)
|
||||
.laneSource(SPECIESs.length()-1);
|
||||
validateTests(res, SPECIESs, start, step, wrap);
|
||||
}
|
||||
|
||||
static void testShuffleIotaI128(int start, int step, boolean wrap) {
|
||||
long res = SPECIESi.iotaShuffle(start, step, wrap)
|
||||
.laneSource(SPECIESi.length()-1);
|
||||
validateTests(res, SPECIESi, start, step, wrap);
|
||||
}
|
||||
|
||||
static void testShuffleIotaConst0B128() {
|
||||
long res = SPECIESb.iotaShuffle(-32, 1, false)
|
||||
.laneSource(SPECIESb.length()-1);
|
||||
validateTests(res, SPECIESb, -32, 1, false);
|
||||
}
|
||||
|
||||
static void testShuffleIotaConst0S128() {
|
||||
long res = SPECIESs.iotaShuffle(-32, 1, false)
|
||||
.laneSource(SPECIESs.length()-1);
|
||||
validateTests(res, SPECIESs, -32, 1, false);
|
||||
}
|
||||
|
||||
static void testShuffleIotaConst0I128() {
|
||||
long res = SPECIESi.iotaShuffle(-32, 1, false)
|
||||
.laneSource(SPECIESi.length()-1);
|
||||
validateTests(res, SPECIESi, -32, 1, false);
|
||||
}
|
||||
|
||||
static void testShuffleIotaConst1B128() {
|
||||
long res = SPECIESb.iotaShuffle(-32, 1, true)
|
||||
.laneSource(SPECIESb.length()-1);
|
||||
validateTests(res, SPECIESb, -32, 1, true);
|
||||
}
|
||||
|
||||
static void testShuffleIotaConst1S128() {
|
||||
long res = SPECIESs.iotaShuffle(-32, 1, true)
|
||||
.laneSource(SPECIESs.length()-1);
|
||||
validateTests(res, SPECIESs, -32, 1, true);
|
||||
}
|
||||
|
||||
static void testShuffleIotaConst1I128() {
|
||||
long res = SPECIESi.iotaShuffle(-32, 1, true)
|
||||
.laneSource(SPECIESi.length()-1);
|
||||
validateTests(res, SPECIESi, -32, 1, true);
|
||||
}
|
||||
|
||||
static void testShuffleI() {
|
||||
IntVector iv = (IntVector) VectorShuffle.iota(SPECIESi, 0, 2, false).toVector();
|
||||
iv.intoArray(ai, 0);
|
||||
@ -53,6 +134,34 @@ public class TestVectorShuffleIota {
|
||||
for (int i = 0; i < ai.length; i++) {
|
||||
System.out.print(ai[i] + ", ");
|
||||
}
|
||||
for (int i = 0; i < INVOC_COUNT; i++) {
|
||||
testShuffleIotaI128(128, 1, true);
|
||||
testShuffleIotaI128(128, 1, false);
|
||||
testShuffleIotaI128(-128, 1, true);
|
||||
testShuffleIotaI128(-128, 1, false);
|
||||
testShuffleIotaI128(1, 1, true);
|
||||
testShuffleIotaI128(1, 1, false);
|
||||
|
||||
testShuffleIotaS128(128, 1, true);
|
||||
testShuffleIotaS128(128, 1, false);
|
||||
testShuffleIotaS128(-128, 1, true);
|
||||
testShuffleIotaS128(-128, 1, false);
|
||||
testShuffleIotaS128(1, 1, true);
|
||||
testShuffleIotaS128(1, 1, false);
|
||||
|
||||
testShuffleIotaB128(128, 1, true);
|
||||
testShuffleIotaB128(128, 1, false);
|
||||
testShuffleIotaB128(-128, 1, true);
|
||||
testShuffleIotaB128(-128, 1, false);
|
||||
testShuffleIotaB128(1, 1, true);
|
||||
testShuffleIotaB128(1, 1, false);
|
||||
testShuffleIotaConst0B128();
|
||||
testShuffleIotaConst0S128();
|
||||
testShuffleIotaConst0I128();
|
||||
testShuffleIotaConst1B128();
|
||||
testShuffleIotaConst1S128();
|
||||
testShuffleIotaConst1I128();
|
||||
}
|
||||
System.out.println();
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user