diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index 0850bec9a45..6139b352f9f 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -9936,6 +9936,7 @@ instruct vmuladdadd4S2I_reg(vecD dst, vecD src1, vecD src2) %{ __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); + ins_cost(10); %} instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ @@ -9947,6 +9948,7 @@ instruct vmuladdadd8S4I_reg(vecX dst, vecX src1, vecX src2) %{ __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); + ins_cost(10); %} instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ @@ -9958,6 +9960,7 @@ instruct vmuladdadd16S8I_reg(vecY dst, vecY src1, vecY src2) %{ __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); + ins_cost(10); %} instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ @@ -9969,6 +9972,7 @@ instruct vmuladdadd32S16I_reg(vecZ dst, vecZ src1, vecZ src2) %{ __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len); %} ins_pipe( pipe_slow ); + ins_cost(10); %} // --------------------------------- PopCount -------------------------------------- diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp index 91eee740fb0..5b955cb8775 100644 --- a/src/hotspot/share/opto/loopopts.cpp +++ b/src/hotspot/share/opto/loopopts.cpp @@ -532,6 +532,12 @@ Node *PhaseIdealLoop::convert_add_to_muladd(Node* n) { register_new_node(nn, get_ctrl(n)); _igvn.replace_node(n, nn); return nn; + } else if ((adr1->in(AddPNode::Base) == adr4->in(AddPNode::Base)) && + (adr2->in(AddPNode::Base) == adr3->in(AddPNode::Base))) { + nn = new MulAddS2INode(mul_in1, mul_in2, mul_in4, mul_in3); + register_new_node(nn, get_ctrl(n)); + _igvn.replace_node(n, nn); + return nn; } } } diff --git a/src/hotspot/share/opto/mulnode.cpp b/src/hotspot/share/opto/mulnode.cpp index 2cb6008a976..691e0f5a8ef 100644 --- a/src/hotspot/share/opto/mulnode.cpp +++ b/src/hotspot/share/opto/mulnode.cpp @@ -1401,3 +1401,12 @@ const Type* FmaFNode::Value(PhaseGVN* phase) const { return TypeF::make(fma(f1, f2, f3)); #endif } + +//============================================================================= +//------------------------------hash------------------------------------------- +// Hash function for MulAddS2INode. Operation is commutative with commutative pairs. +// The hash function must return the same value when edge swapping is performed. +uint MulAddS2INode::hash() const { + return (uintptr_t)in(1) + (uintptr_t)in(2) + (uintptr_t)in(3) + (uintptr_t)in(4) + Opcode(); +} + diff --git a/src/hotspot/share/opto/mulnode.hpp b/src/hotspot/share/opto/mulnode.hpp index 1520ab4f00d..b519b04eb6b 100644 --- a/src/hotspot/share/opto/mulnode.hpp +++ b/src/hotspot/share/opto/mulnode.hpp @@ -289,6 +289,7 @@ public: // Multiply shorts into integers and add them. // Semantics: I_OUT = S1 * S2 + S3 * S4 class MulAddS2INode : public Node { + virtual uint hash() const; public: MulAddS2INode(Node* in1, Node *in2, Node *in3, Node* in4) : Node(0, in1, in2, in3, in4) {} virtual int Opcode() const; diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp index 90dbef57c62..c5ee934ae7c 100644 --- a/src/hotspot/share/opto/superword.cpp +++ b/src/hotspot/share/opto/superword.cpp @@ -1445,7 +1445,7 @@ void SuperWord::order_def_uses(Node_List* p) { Node* t1 = s1->fast_out(i); // Only allow operand swap on commuting operations - if (!t1->is_Add() && !t1->is_Mul()) { + if (!t1->is_Add() && !t1->is_Mul() && !VectorNode::is_muladds2i(t1)) { break; } @@ -1500,9 +1500,23 @@ bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) { if ((i1 == (3-i2)) && (u2->is_Add() || u2->is_Mul())) { // Further analysis relies on operands position matching. u2->swap_edges(i1, i2); + } else if (VectorNode::is_muladds2i(u2) && u1 != u2) { + if (i1 == 5 - i2) { // ((i1 == 3 && i2 == 2) || (i1 == 2 && i2 == 3) || (i1 == 1 && i2 == 4) || (i1 == 4 && i2 == 1)) + u2->swap_edges(1, 2); + u2->swap_edges(3, 4); + } + if (i1 == 3 - i2 || i1 == 7 - i2) { // ((i1 == 1 && i2 == 2) || (i1 == 2 && i2 == 1) || (i1 == 3 && i2 == 4) || (i1 == 4 && i2 == 3)) + u2->swap_edges(2, 3); + u2->swap_edges(1, 4); + } + return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs } else { return false; } + } else if (i1 == i2 && VectorNode::is_muladds2i(u2) && u1 != u2) { + u2->swap_edges(1, 3); + u2->swap_edges(2, 4); + return false; // Just swap the edges, the muladds2i nodes get packed in follow_use_defs } } while (i1 < ct); return true;