8309531: Incorrect result with unwrapped iotaShuffle.

Reviewed-by: sviswanathan, xgong, thartmann
This commit is contained in:
Jatin Bhateja 2023-07-05 05:46:24 +00:00
parent 7b3c2dc5f4
commit d6578bff1c
2 changed files with 154 additions and 31 deletions
src/hotspot/share/opto
test/hotspot/jtreg/compiler/vectorapi

@ -592,16 +592,12 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
const TypeInt* step_val = gvn().type(argument(5))->isa_int();
const TypeInt* wrap = gvn().type(argument(6))->isa_int();
Node* start = argument(4);
Node* step = argument(5);
if (shuffle_klass == nullptr || vlen == nullptr || start_val == nullptr || step_val == nullptr || wrap == nullptr) {
return false; // dead code
}
if (!vlen->is_con() || !is_power_of_2(vlen->get_con()) ||
shuffle_klass->const_oop() == nullptr || !wrap->is_con()) {
if (shuffle_klass == nullptr || shuffle_klass->const_oop() == nullptr ||
vlen == nullptr || !vlen->is_con() || start_val == nullptr || step_val == nullptr ||
wrap == nullptr || !wrap->is_con()) {
return false; // not enough info for intrinsification
}
if (!is_klass_initialized(shuffle_klass)) {
if (C->print_intrinsics()) {
tty->print_cr(" ** klass argument not initialized");
@ -613,33 +609,50 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
int num_elem = vlen->get_con();
BasicType elem_bt = T_BYTE;
if (!arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed)) {
bool effective_indices_in_range = false;
if (start_val->is_con() && step_val->is_con()) {
int effective_min_index = start_val->get_con();
int effective_max_index = start_val->get_con() + step_val->get_con() * (num_elem - 1);
effective_indices_in_range = effective_max_index >= effective_min_index && effective_min_index >= -128 && effective_max_index <= 127;
}
if (!do_wrap && !effective_indices_in_range) {
// Disable instrinsification for unwrapped shuffle iota if start/step
// values are non-constant OR if intermediate result overflows byte value range.
return false;
}
if (!arch_supports_vector(Op_AddVB, num_elem, elem_bt, VecMaskNotUsed)) {
if (!arch_supports_vector(Op_AddVB, num_elem, elem_bt, VecMaskNotUsed) ||
!arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed) ||
!arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed) ||
!arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed)) {
return false;
}
if (!arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskNotUsed)) {
if (!do_wrap &&
(!arch_supports_vector(Op_SubVB, num_elem, elem_bt, VecMaskNotUsed) ||
!arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskNotUsed) ||
!arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskNotUsed))) {
return false;
}
if (!arch_supports_vector(Op_VectorLoadConst, num_elem, elem_bt, VecMaskNotUsed)) {
return false;
}
if (!arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) {
return false;
}
if (!arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskUseStore)) {
bool step_multiply = !step_val->is_con() || !is_power_of_2(step_val->get_con());
if ((step_multiply && !arch_supports_vector(Op_MulVB, num_elem, elem_bt, VecMaskNotUsed)) ||
(!step_multiply && !arch_supports_vector(Op_LShiftVB, num_elem, elem_bt, VecMaskNotUsed))) {
return false;
}
const Type * type_bt = Type::get_const_basic_type(elem_bt);
const TypeVect * vt = TypeVect::make(type_bt, num_elem);
Node* res = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt));
Node* res = gvn().transform(new VectorLoadConstNode(gvn().makecon(TypeInt::ZERO), vt));
if(!step_val->is_con() || !is_power_of_2(step_val->get_con())) {
Node* start = argument(4);
Node* step = argument(5);
if (step_multiply) {
Node* bcast_step = gvn().transform(VectorNode::scalar2vector(step, num_elem, type_bt));
res = gvn().transform(VectorNode::make(Op_MulI, res, bcast_step, num_elem, elem_bt));
res = gvn().transform(VectorNode::make(Op_MulVB, res, bcast_step, vt));
} else if (step_val->get_con() > 1) {
Node* cnt = gvn().makecon(TypeInt::make(log2i_exact(step_val->get_con())));
Node* shift_cnt = vector_shift_count(cnt, Op_LShiftI, elem_bt, num_elem);
@ -648,24 +661,25 @@ bool LibraryCallKit::inline_vector_shuffle_iota() {
if (!start_val->is_con() || start_val->get_con() != 0) {
Node* bcast_start = gvn().transform(VectorNode::scalar2vector(start, num_elem, type_bt));
res = gvn().transform(VectorNode::make(Op_AddI, res, bcast_start, num_elem, elem_bt));
res = gvn().transform(VectorNode::make(Op_AddVB, res, bcast_start, vt));
}
Node * mod_val = gvn().makecon(TypeInt::make(num_elem-1));
Node * bcast_mod = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt));
if(do_wrap) {
if (do_wrap) {
// Wrap the indices greater than lane count.
res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt));
} else {
ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ge));
ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ugt));
Node * lane_cnt = gvn().makecon(TypeInt::make(num_elem));
Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem);
Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vmask_type));
Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ugt, bcast_lane_cnt, res, pred_node, vmask_type));
// Make the indices greater than lane count as -ve values. This matches the java side implementation.
res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
Node * biased_val = gvn().transform(VectorNode::make(Op_SubI, res, bcast_lane_cnt, num_elem, elem_bt));
// Make the indices greater than lane count as -ve values to match the java side implementation.
res = gvn().transform(VectorNode::make(Op_AndV, res, bcast_mod, vt));
Node * biased_val = gvn().transform(VectorNode::make(Op_SubVB, res, bcast_lane_cnt, vt));
res = gvn().transform(new VectorBlendNode(biased_val, res, mask));
}

@ -1,5 +1,6 @@
/*
* Copyright (c) 2021, Huawei Technologies Co., Ltd. All rights reserved.
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@ -23,7 +24,9 @@
package compiler.vectorapi;
import jdk.incubator.vector.ByteVector;
import jdk.incubator.vector.IntVector;
import jdk.incubator.vector.ShortVector;
import jdk.incubator.vector.VectorSpecies;
import jdk.incubator.vector.VectorShuffle;
@ -31,16 +34,94 @@ import jdk.incubator.vector.VectorShuffle;
* @test
* @bug 8265907
* @modules jdk.incubator.vector
* @run main/othervm compiler.vectorapi.TestVectorShuffleIota
* @run main/othervm -XX:CompileThresholdScaling=0.3 -XX:-TieredCompilation compiler.vectorapi.TestVectorShuffleIota
*/
public class TestVectorShuffleIota {
static final VectorSpecies<Integer> SPECIESi = IntVector.SPECIES_128;
static final VectorSpecies<Short> SPECIESs = ShortVector.SPECIES_128;
static final VectorSpecies<Byte> SPECIESb = ByteVector.SPECIES_128;
static final int INVOC_COUNT = 50000;
static final int INVOC_COUNT = 5000;
static int[] ai = {87, 65, 78, 71};
static long expected_value(VectorSpecies<?> SPECIES, int start, int step, boolean wrap) {
long res = 0;
int lanesM1 = SPECIES.length() - 1;
if (wrap) {
res = (lanesM1 & (start + step * lanesM1));
} else {
int effective_index = start + step * lanesM1;
int wrapped_effective_index = effective_index & lanesM1;
res = (effective_index == wrapped_effective_index ?
wrapped_effective_index :
-SPECIES.length() + wrapped_effective_index);
}
return res;
}
static void validateTests(long actual, VectorSpecies<?> SPECIES, int start, int step, boolean wrap) {
long expected = expected_value(SPECIES, start, step, wrap);
if (actual != expected) {
throw new AssertionError("Result Mismatch!, actual = " + actual + " expected = " + expected);
}
}
static void testShuffleIotaB128(int start, int step, boolean wrap) {
long res = SPECIESb.iotaShuffle(start, step, wrap)
.laneSource(SPECIESb.length()-1);
validateTests(res, SPECIESb, start, step, wrap);
}
static void testShuffleIotaS128(int start, int step, boolean wrap) {
long res = SPECIESs.iotaShuffle(start, step, wrap)
.laneSource(SPECIESs.length()-1);
validateTests(res, SPECIESs, start, step, wrap);
}
static void testShuffleIotaI128(int start, int step, boolean wrap) {
long res = SPECIESi.iotaShuffle(start, step, wrap)
.laneSource(SPECIESi.length()-1);
validateTests(res, SPECIESi, start, step, wrap);
}
static void testShuffleIotaConst0B128() {
long res = SPECIESb.iotaShuffle(-32, 1, false)
.laneSource(SPECIESb.length()-1);
validateTests(res, SPECIESb, -32, 1, false);
}
static void testShuffleIotaConst0S128() {
long res = SPECIESs.iotaShuffle(-32, 1, false)
.laneSource(SPECIESs.length()-1);
validateTests(res, SPECIESs, -32, 1, false);
}
static void testShuffleIotaConst0I128() {
long res = SPECIESi.iotaShuffle(-32, 1, false)
.laneSource(SPECIESi.length()-1);
validateTests(res, SPECIESi, -32, 1, false);
}
static void testShuffleIotaConst1B128() {
long res = SPECIESb.iotaShuffle(-32, 1, true)
.laneSource(SPECIESb.length()-1);
validateTests(res, SPECIESb, -32, 1, true);
}
static void testShuffleIotaConst1S128() {
long res = SPECIESs.iotaShuffle(-32, 1, true)
.laneSource(SPECIESs.length()-1);
validateTests(res, SPECIESs, -32, 1, true);
}
static void testShuffleIotaConst1I128() {
long res = SPECIESi.iotaShuffle(-32, 1, true)
.laneSource(SPECIESi.length()-1);
validateTests(res, SPECIESi, -32, 1, true);
}
static void testShuffleI() {
IntVector iv = (IntVector) VectorShuffle.iota(SPECIESi, 0, 2, false).toVector();
iv.intoArray(ai, 0);
@ -53,6 +134,34 @@ public class TestVectorShuffleIota {
for (int i = 0; i < ai.length; i++) {
System.out.print(ai[i] + ", ");
}
for (int i = 0; i < INVOC_COUNT; i++) {
testShuffleIotaI128(128, 1, true);
testShuffleIotaI128(128, 1, false);
testShuffleIotaI128(-128, 1, true);
testShuffleIotaI128(-128, 1, false);
testShuffleIotaI128(1, 1, true);
testShuffleIotaI128(1, 1, false);
testShuffleIotaS128(128, 1, true);
testShuffleIotaS128(128, 1, false);
testShuffleIotaS128(-128, 1, true);
testShuffleIotaS128(-128, 1, false);
testShuffleIotaS128(1, 1, true);
testShuffleIotaS128(1, 1, false);
testShuffleIotaB128(128, 1, true);
testShuffleIotaB128(128, 1, false);
testShuffleIotaB128(-128, 1, true);
testShuffleIotaB128(-128, 1, false);
testShuffleIotaB128(1, 1, true);
testShuffleIotaB128(1, 1, false);
testShuffleIotaConst0B128();
testShuffleIotaConst0S128();
testShuffleIotaConst0I128();
testShuffleIotaConst1B128();
testShuffleIotaConst1S128();
testShuffleIotaConst1I128();
}
System.out.println();
}
}