8154943: AArch64: redundant address computation instructions with vectorization

Duplicated i2l nodes can be eliminated to optimize redundant address computations

Reviewed-by: kvn, dlong
This commit is contained in:
Roland Westrelin 2016-04-29 17:24:16 +02:00
parent 0ab61a6e28
commit 160798c7e7
7 changed files with 50 additions and 1 deletions

View File

@ -3566,6 +3566,8 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return FP_REG_mask(); return FP_REG_mask();
} }
const bool Matcher::convi2l_type_required = false;
// helper for encoding java_to_runtime calls on sim // helper for encoding java_to_runtime calls on sim
// //
// this is needed to compute the extra arguments required when // this is needed to compute the extra arguments required when

View File

@ -2313,6 +2313,8 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return RegMask(); return RegMask();
} }
const bool Matcher::convi2l_type_required = true;
%} %}
//----------ENCODING BLOCK----------------------------------------------------- //----------ENCODING BLOCK-----------------------------------------------------

View File

@ -2133,6 +2133,8 @@ const RegMask Matcher::method_handle_invoke_SP_save_mask() {
return L7_REGP_mask(); return L7_REGP_mask();
} }
const bool Matcher::convi2l_type_required = true;
%} %}

View File

@ -1861,6 +1861,8 @@ const bool Matcher::pass_original_key_for_aes() {
return false; return false;
} }
const bool Matcher::convi2l_type_required = true;
// Helper methods for MachSpillCopyNode::implementation(). // Helper methods for MachSpillCopyNode::implementation().
static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo, static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
int src_hi, int dst_hi, uint ireg, outputStream* st) { int src_hi, int dst_hi, uint ireg, outputStream* st) {

View File

@ -3263,6 +3263,43 @@ void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
frc._tests.push(iff); frc._tests.push(iff);
break; break;
} }
case Op_ConvI2L: {
if (!Matcher::convi2l_type_required) {
// Code generation on some platforms doesn't need accurate
// ConvI2L types. Widening the type can help remove redundant
// address computations.
n->as_Type()->set_type(TypeLong::INT);
ResourceMark rm;
Node_List wq;
wq.push(n);
for (uint next = 0; next < wq.size(); next++) {
Node *m = wq.at(next);
for(;;) {
// Loop over all nodes with identical inputs edges as m
Node* k = m->find_similar(m->Opcode());
if (k == NULL) {
break;
}
// Push their uses so we get a chance to remove node made
// redundant
for (DUIterator_Fast imax, i = k->fast_outs(imax); i < imax; i++) {
Node* u = k->fast_out(i);
assert(!wq.contains(u), "shouldn't process one node several times");
if (u->Opcode() == Op_LShiftL ||
u->Opcode() == Op_AddL ||
u->Opcode() == Op_SubL ||
u->Opcode() == Op_AddP) {
wq.push(u);
}
}
// Replace all nodes with identical edges as m with m
k->subsume_by(m, this);
}
}
}
break;
}
default: default:
assert( !n->is_Call(), "" ); assert( !n->is_Call(), "" );
assert( !n->is_Mem(), "" ); assert( !n->is_Mem(), "" );

View File

@ -488,6 +488,9 @@ public:
// ourselves. // ourselves.
static const bool need_masked_shift_count; static const bool need_masked_shift_count;
// Whether code generation need accurate ConvI2L types.
static const bool convi2l_type_required;
// This routine is run whenever a graph fails to match. // This routine is run whenever a graph fails to match.
// If it returns, the compiler should bailout to interpreter without error. // If it returns, the compiler should bailout to interpreter without error.
// In non-product mode, SoftMatchFailure is false to detect non-canonical // In non-product mode, SoftMatchFailure is false to detect non-canonical

View File

@ -2297,7 +2297,8 @@ Node* Node::find_similar(int opc) {
if (def && def->outcnt() >= 2) { if (def && def->outcnt() >= 2) {
for (DUIterator_Fast dmax, i = def->fast_outs(dmax); i < dmax; i++) { for (DUIterator_Fast dmax, i = def->fast_outs(dmax); i < dmax; i++) {
Node* use = def->fast_out(i); Node* use = def->fast_out(i);
if (use->Opcode() == opc && if (use != this &&
use->Opcode() == opc &&
use->req() == req()) { use->req() == req()) {
uint j; uint j;
for (j = 0; j < use->req(); j++) { for (j = 0; j < use->req(); j++) {