8273322: Enhance macro logic optimization for masked logic operations.

Reviewed-by: kvn, sviswanathan
2022-01-06 18:41:06 +00:00 · 2022-01-06 18:41:06 +00:00 · 8703f14808
commit 8703f14808
parent bc12381105
12 changed files with 1413 additions and 51 deletions
--- a/src/hotspot/cpu/x86/assembler_x86.cpp
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -9725,6 +9725,68 @@ void Assembler::evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Addre
  emit_operand(dst, src);
 }

+void Assembler::evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len) {
+  assert(VM_Version::supports_evex(), "requires EVEX support");
+  assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  attributes.set_embedded_opmask_register_specifier(mask);
+  if (merge) {
+    attributes.reset_is_clear_context();
+  }
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int24(0x25, (unsigned char)(0xC0 | encode), imm8);
+}
+
+void Assembler::evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len) {
+  assert(VM_Version::supports_evex(), "requires EVEX support");
+  assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
+  assert(dst != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  if (merge) {
+    attributes.reset_is_clear_context();
+  }
+  vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x25);
+  emit_operand(dst, src3);
+  emit_int8(imm8);
+}
+
+void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len) {
+  assert(VM_Version::supports_evex(), "requires EVEX support");
+  assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  attributes.set_embedded_opmask_register_specifier(mask);
+  if (merge) {
+    attributes.reset_is_clear_context();
+  }
+  int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src3->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int24(0x25, (unsigned char)(0xC0 | encode), imm8);
+}
+
+void Assembler::evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len) {
+  assert(VM_Version::supports_evex(), "requires EVEX support");
+  assert(vector_len == Assembler::AVX_512bit || VM_Version::supports_avx512vl(), "requires VL support");
+  assert(dst != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FV, /* input_size_in_bits */ EVEX_64bit);
+  if (merge) {
+    attributes.reset_is_clear_context();
+  }
+  vex_prefix(src3, src2->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x25);
+  emit_operand(dst, src3);
+  emit_int8(imm8);
+}
+
 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
  assert(UseAVX >= 2, "");
--- a/src/hotspot/cpu/x86/assembler_x86.hpp
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -2413,6 +2413,12 @@ private:
  void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
  void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);

+  void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
+  void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
+  void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
+  void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
+
+
  // Sub packed integers
  void psubb(XMMRegister dst, XMMRegister src);
  void psubw(XMMRegister dst, XMMRegister src);
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -4152,6 +4152,26 @@ void C2_MacroAssembler::vector_castF2I_evex(XMMRegister dst, XMMRegister src, XM
  bind(done);
 }

+void C2_MacroAssembler::evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
+                                   bool merge, BasicType bt, int vlen_enc) {
+  if (bt == T_INT) {
+    evpternlogd(dst, func, mask, src2, src3, merge, vlen_enc);
+  } else {
+    assert(bt == T_LONG, "");
+    evpternlogq(dst, func, mask, src2, src3, merge, vlen_enc);
+  }
+}
+
+void C2_MacroAssembler::evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
+                                   bool merge, BasicType bt, int vlen_enc) {
+  if (bt == T_INT) {
+    evpternlogd(dst, func, mask, src2, src3, merge, vlen_enc);
+  } else {
+    assert(bt == T_LONG, "");
+    evpternlogq(dst, func, mask, src2, src3, merge, vlen_enc);
+  }
+}
+
 #ifdef _LP64
 void C2_MacroAssembler::vector_long_to_maskvec(XMMRegister dst, Register src, Register rtmp1,
                                               Register rtmp2, XMMRegister xtmp, int mask_len,
--- a/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
+++ b/src/hotspot/cpu/x86/c2_MacroAssembler_x86.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -311,4 +311,11 @@ public:
  void vector_castD2L_evex(XMMRegister dst, XMMRegister src, XMMRegister xtmp1, XMMRegister xtmp2,
                           KRegister ktmp1, KRegister ktmp2, AddressLiteral double_sign_flip,
                           Register scratch, int vec_enc);
+
+  void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, XMMRegister src3,
+                  bool merge, BasicType bt, int vlen_enc);
+
+  void evpternlog(XMMRegister dst, int func, KRegister mask, XMMRegister src2, Address src3,
+                  bool merge, BasicType bt, int vlen_enc);
+
 #endif // CPU_X86_C2_MACROASSEMBLER_X86_HPP
--- a/src/hotspot/cpu/x86/x86.ad
+++ b/src/hotspot/cpu/x86/x86.ad
@ -1888,6 +1888,12 @@ const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, Bas
    case Op_FmaVD:
      return true;

+    case Op_MacroLogicV:
+      if(bt != T_INT && bt != T_LONG) {
+        return false;
+      }
+      return true;
+
    // Binary masked operations
    case Op_AddVB:
    case Op_AddVS:
@ -9560,6 +9566,29 @@ instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
  ins_pipe( pipe_slow );
 %}

+instruct vternlog_reg_masked(vec dst, vec src2, vec src3, immU8 func, kReg mask) %{
+  match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
+  format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
+  ins_encode %{
+    int vlen_enc = vector_length_encoding(this);
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
+                  $src2$$XMMRegister, $src3$$XMMRegister, true, bt, vlen_enc);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vternlogd_mem_masked(vec dst, vec src2, memory src3, immU8 func, kReg mask) %{
+  match(Set dst (MacroLogicV dst (Binary src2 (Binary src3 (Binary func mask)))));
+  format %{ "vternlog_masked $dst,$src2,$src3,$func,$mask\t! vternlog masked operation" %}
+  ins_encode %{
+    int vlen_enc = vector_length_encoding(this);
+    BasicType bt = Matcher::vector_element_basic_type(this);
+    __ evpternlog($dst$$XMMRegister, $func$$constant, $mask$$KRegister,
+                  $src2$$XMMRegister, $src3$$Address, true, bt, vlen_enc);
+  %}
+  ins_pipe( pipe_slow );
+%}

 instruct castMM(kReg dst)
 %{
--- a/src/hotspot/share/opto/compile.cpp
+++ b/src/hotspot/share/opto/compile.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -2375,7 +2375,6 @@ bool Compile::has_vbox_nodes() {

 static bool is_vector_unary_bitwise_op(Node* n) {
  return n->Opcode() == Op_XorV &&
-         n->req() == 2 &&
         VectorNode::is_vector_bitwise_not_pattern(n);
 }

@ -2383,7 +2382,7 @@ static bool is_vector_binary_bitwise_op(Node* n) {
  switch (n->Opcode()) {
    case Op_AndV:
    case Op_OrV:
-      return n->req() == 2;
+      return true;

    case Op_XorV:
      return !is_vector_unary_bitwise_op(n);
@ -2415,11 +2414,12 @@ static bool is_vector_bitwise_cone_root(Node* n) {
  return true;
 }

-static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
+static uint collect_unique_inputs(Node* n, Unique_Node_List& inputs) {
  uint cnt = 0;
  if (is_vector_bitwise_op(n)) {
+    uint inp_cnt = n->is_predicated_vector() ? n->req()-1 : n->req();
    if (VectorNode::is_vector_bitwise_not_pattern(n)) {
-      for (uint i = 1; i < n->req(); i++) {
+      for (uint i = 1; i < inp_cnt; i++) {
        Node* in = n->in(i);
        bool skip = VectorNode::is_all_ones_vector(in);
        if (!skip && !inputs.member(in)) {
@ -2429,9 +2429,9 @@ static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_N
      }
      assert(cnt <= 1, "not unary");
    } else {
-      uint last_req = n->req();
+      uint last_req = inp_cnt;
      if (is_vector_ternary_bitwise_op(n)) {
-        last_req = n->req() - 1; // skip last input
+        last_req = inp_cnt - 1; // skip last input
      }
      for (uint i = 1; i < last_req; i++) {
        Node* def = n->in(i);
@ -2441,7 +2441,6 @@ static uint collect_unique_inputs(Node* n, Unique_Node_List& partition, Unique_N
        }
      }
    }
-    partition.push(n);
  } else { // not a bitwise operations
    if (!inputs.member(n)) {
      inputs.push(n);
@ -2476,7 +2475,10 @@ Node* Compile::xform_to_MacroLogicV(PhaseIterGVN& igvn,
  Node* in3 = (inputs.size() == 3 ? inputs.at(2) : in2);

  uint func = compute_truth_table(partition, inputs);
-  return igvn.transform(MacroLogicVNode::make(igvn, in3, in2, in1, func, vt));
+
+  Node* pn = partition.at(partition.size() - 1);
+  Node* mask = pn->is_predicated_vector() ? pn->in(pn->req()-1) : NULL;
+  return igvn.transform(MacroLogicVNode::make(igvn, in1, in2, in3, mask, func, vt));
 }

 static uint extract_bit(uint func, uint pos) {
@ -2556,11 +2558,11 @@ uint Compile::compute_truth_table(Unique_Node_List& partition, Unique_Node_List&

  // Populate precomputed functions for inputs.
  // Each input corresponds to one column of 3 input truth-table.
-  uint input_funcs[] = { 0xAA,   // (_, _, a) -> a
+  uint input_funcs[] = { 0xAA,   // (_, _, c) -> c
                         0xCC,   // (_, b, _) -> b
-                         0xF0 }; // (c, _, _) -> c
+                         0xF0 }; // (a, _, _) -> a
  for (uint i = 0; i < inputs.size(); i++) {
-    eval_map.put(inputs.at(i), input_funcs[i]);
+    eval_map.put(inputs.at(i), input_funcs[2-i]);
  }

  for (uint i = 0; i < partition.size(); i++) {
@ -2603,6 +2605,14 @@ uint Compile::compute_truth_table(Unique_Node_List& partition, Unique_Node_List&
  return res;
 }

+// Criteria under which nodes gets packed into a macro logic node:-
+//  1) Parent and both child nodes are all unmasked or masked with
+//     same predicates.
+//  2) Masked parent can be packed with left child if it is predicated
+//     and both have same predicates.
+//  3) Masked parent can be packed with right child if its un-predicated
+//     or has matching predication condition.
+//  4) An unmasked parent can be packed with an unmasked child.
 bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_Node_List& inputs) {
  assert(partition.size() == 0, "not empty");
  assert(inputs.size() == 0, "not empty");
@ -2612,37 +2622,65 @@ bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_No

  bool is_unary_op = is_vector_unary_bitwise_op(n);
  if (is_unary_op) {
-    assert(collect_unique_inputs(n, partition, inputs) == 1, "not unary");
+    assert(collect_unique_inputs(n, inputs) == 1, "not unary");
    return false; // too few inputs
  }

-  assert(is_vector_binary_bitwise_op(n), "not binary");
-  Node* in1 = n->in(1);
-  Node* in2 = n->in(2);
+  bool pack_left_child = true;
+  bool pack_right_child = true;

-  int in1_unique_inputs_cnt = collect_unique_inputs(in1, partition, inputs);
-  int in2_unique_inputs_cnt = collect_unique_inputs(in2, partition, inputs);
-  partition.push(n);
+  bool left_child_LOP = is_vector_bitwise_op(n->in(1));
+  bool right_child_LOP = is_vector_bitwise_op(n->in(2));
+
+  int left_child_input_cnt = 0;
+  int right_child_input_cnt = 0;
+
+  bool parent_is_predicated = n->is_predicated_vector();
+  bool left_child_predicated = n->in(1)->is_predicated_vector();
+  bool right_child_predicated = n->in(2)->is_predicated_vector();
+
+  Node* parent_pred = parent_is_predicated ? n->in(n->req()-1) : NULL;
+  Node* left_child_pred = left_child_predicated ? n->in(1)->in(n->in(1)->req()-1) : NULL;
+  Node* right_child_pred = right_child_predicated ? n->in(1)->in(n->in(1)->req()-1) : NULL;
+
+  do {
+    if (pack_left_child && left_child_LOP &&
+        ((!parent_is_predicated && !left_child_predicated) ||
+        ((parent_is_predicated && left_child_predicated &&
+          parent_pred == left_child_pred)))) {
+       partition.push(n->in(1));
+       left_child_input_cnt = collect_unique_inputs(n->in(1), inputs);
+    } else {
+       inputs.push(n->in(1));
+       left_child_input_cnt = 1;
+    }
+
+    if (pack_right_child && right_child_LOP &&
+        (!right_child_predicated ||
+         (right_child_predicated && parent_is_predicated &&
+          parent_pred == right_child_pred))) {
+       partition.push(n->in(2));
+       right_child_input_cnt = collect_unique_inputs(n->in(2), inputs);
+    } else {
+       inputs.push(n->in(2));
+       right_child_input_cnt = 1;
+    }

-  // Too many inputs?
    if (inputs.size() > 3) {
-    partition.clear();
+      assert(partition.size() > 0, "");
      inputs.clear();
-    { // Recompute in2 inputs
-      Unique_Node_List not_used;
-      in2_unique_inputs_cnt = collect_unique_inputs(in2, not_used, not_used);
+      partition.clear();
+      if (left_child_input_cnt > right_child_input_cnt) {
+        pack_left_child = false;
+      } else {
+        pack_right_child = false;
      }
-    // Pick the node with minimum number of inputs.
-    if (in1_unique_inputs_cnt >= 3 && in2_unique_inputs_cnt >= 3) {
-      return false; // still too many inputs
+    } else {
+      break;
    }
-    // Recompute partition & inputs.
-    Node* child       = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in1 : in2);
-    collect_unique_inputs(child, partition, inputs);
-
-    Node* other_input = (in1_unique_inputs_cnt < in2_unique_inputs_cnt ? in2 : in1);
-    inputs.push(other_input);
+  } while(true);

+  if(partition.size()) {
    partition.push(n);
  }

@ -2650,7 +2688,6 @@ bool Compile::compute_logic_cone(Node* n, Unique_Node_List& partition, Unique_No
         (inputs.size()    == 2 || inputs.size()    == 3);
 }

-
 void Compile::process_logic_cone_root(PhaseIterGVN &igvn, Node *n, VectorSet &visited) {
  assert(is_vector_bitwise_op(n), "not a root");

@ -2670,9 +2707,20 @@ void Compile::process_logic_cone_root(PhaseIterGVN &igvn, Node *n, VectorSet &vi
  Unique_Node_List inputs;
  if (compute_logic_cone(n, partition, inputs)) {
    const TypeVect* vt = n->bottom_type()->is_vect();
+    Node* pn = partition.at(partition.size() - 1);
+    Node* mask = pn->is_predicated_vector() ? pn->in(pn->req()-1) : NULL;
+    if (mask == NULL ||
+        Matcher::match_rule_supported_vector_masked(Op_MacroLogicV, vt->length(), vt->element_basic_type())) {
      Node* macro_logic = xform_to_MacroLogicV(igvn, vt, partition, inputs);
+#ifdef ASSERT
+      if (TraceNewVectors) {
+        tty->print("new Vector node: ");
+        macro_logic->dump();
+      }
+#endif
      igvn.replace_node(n, macro_logic);
    }
+  }
 }

 void Compile::optimize_logic_cones(PhaseIterGVN &igvn) {
--- a/src/hotspot/share/opto/matcher.cpp
+++ b/src/hotspot/share/opto/matcher.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -2320,6 +2320,14 @@ void Matcher::find_shared_post_visit(Node* n, uint opcode) {
      n->set_req(2, new BinaryNode(n->in(3), n->in(4)));
      n->del_req(4);
      n->del_req(3);
+    } else if (n->req() == 6) {
+      Node* b3 = new BinaryNode(n->in(4), n->in(5));
+      Node* b2 = new BinaryNode(n->in(3), b3);
+      Node* b1 = new BinaryNode(n->in(2), b2);
+      n->set_req(2, b1);
+      n->del_req(5);
+      n->del_req(4);
+      n->del_req(3);
    }
    return;
  }
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1201,13 +1201,14 @@ bool ReductionNode::implemented(int opc, uint vlen, BasicType bt) {
 }

 MacroLogicVNode* MacroLogicVNode::make(PhaseGVN& gvn, Node* in1, Node* in2, Node* in3,
-                                       uint truth_table, const TypeVect* vt) {
+                                       Node* mask, uint truth_table, const TypeVect* vt) {
  assert(truth_table <= 0xFF, "invalid");
  assert(in1->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
  assert(in2->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
  assert(in3->bottom_type()->is_vect()->length_in_bytes() == vt->length_in_bytes(), "mismatch");
+  assert(!mask || mask->bottom_type()->isa_vectmask(), "predicated register type expected");
  Node* fn = gvn.intcon(truth_table);
-  return new MacroLogicVNode(in1, in2, in3, fn, vt);
+  return new MacroLogicVNode(in1, in2, in3, fn, mask, vt);
 }

 Node* VectorNode::degenerate_vector_rotate(Node* src, Node* cnt, bool is_rotate_left,
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2021, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1275,13 +1275,19 @@ public:
 // Vector logical operations packing node.
 class MacroLogicVNode : public VectorNode {
 private:
-  MacroLogicVNode(Node* in1, Node* in2, Node* in3, Node* fn, const TypeVect* vt)
-  : VectorNode(in1, in2, in3, fn, vt) {}
+  MacroLogicVNode(Node* in1, Node* in2, Node* in3, Node* fn, Node* mask, const TypeVect* vt)
+  : VectorNode(in1, in2, in3, fn, vt) {
+     if (mask) {
+       this->add_req(mask);
+       this->add_flag(Node::Flag_is_predicated_vector);
+     }
+  }

 public:
  virtual int Opcode() const;

-  static MacroLogicVNode* make(PhaseGVN& igvn, Node* in1, Node* in2, Node* in3, uint truth_table, const TypeVect* vt);
+  static MacroLogicVNode* make(PhaseGVN& igvn, Node* in1, Node* in2, Node* in3,
+                               Node* mask, uint truth_table, const TypeVect* vt);
 };

 class VectorMaskCmpNode : public VectorNode {
--- a/test/hotspot/jtreg/compiler/vectorapi/TestMaskedMacroLogicVector.java
+++ b/test/hotspot/jtreg/compiler/vectorapi/TestMaskedMacroLogicVector.java
@ -0,0 +1,844 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8273322
+ * @key randomness
+ * @summary Enhance macro logic optimization for masked logic operations.
+ * @modules jdk.incubator.vector
+ * @requires vm.compiler2.enabled
+ * @requires os.simpleArch == "x64"
+ * @library /test/lib /
+ * @run driver compiler.vectorapi.TestMaskedMacroLogicVector
+ */
+
+package compiler.vectorapi;
+
+import java.util.concurrent.Callable;
+import compiler.lib.ir_framework.*;
+import compiler.lib.ir_framework.driver.IRViolationException;
+import jdk.test.lib.Asserts;
+import jdk.test.lib.Asserts;
+import jdk.test.lib.Utils;
+import java.util.Random;
+
+import jdk.incubator.vector.*;
+
+public class TestMaskedMacroLogicVector {
+    boolean [] br;
+    boolean [] ba;
+    boolean [] bb;
+
+    short [] sr;
+    char  [] ca;
+    char  [] cb;
+
+    int [] r;
+    int [] a;
+    int [] b;
+    int [] c;
+    int [] d;
+    int [] e;
+    int [] f;
+
+    long [] rl;
+    long [] al;
+    long [] bl;
+    long [] cl;
+
+    boolean [] mask;
+
+    static boolean booleanFunc1(boolean a, boolean b) {
+        return a & b;
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV" , " > 0 "})
+    public void testSubWordBoolean(boolean[] r, boolean[] a, boolean[] b) {
+        for (int i = 0; i < r.length; i++) {
+            r[i] = booleanFunc1(a[i], b[i]);
+        }
+    }
+    public void verifySubWordBoolean(boolean[] r, boolean[] a, boolean[] b) {
+        for (int i = 0; i < r.length; i++) {
+            boolean expected = booleanFunc1(a[i], b[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(
+                        String.format("at #%d: r=%b, expected = %b = booleanFunc1(%b,%b)",
+                                      i, r[i], expected, a[i], b[i]));
+            }
+        }
+    }
+
+
+    static short charFunc1(char a, char b) {
+        return (short)((a & b) & 1);
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV" , " > 0 "})
+    public void testSubWordChar(short[] r, char[] a, char[] b) {
+        for (int i = 0; i < r.length; i++) {
+            r[i] = charFunc1(a[i], b[i]);
+        }
+    }
+    public void verifySubWordChar(short[] r, char[] a, char[] b) {
+        for (int i = 0; i < r.length; i++) {
+            short expected = charFunc1(a[i], b[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(
+                        String.format("testSubWordChar: at #%d: r=%d, expected = %d = booleanFunc1(%d,%d)",
+                                      i, r[i], expected, (int)a[i], (int)b[i]));
+            }
+        }
+    }
+
+    // Case 1): Unmasked expression tree.
+    //        P_LOP
+    //   L_LOP     R_LOP
+
+    static int intFunc1(int a, int b, int c) {
+        return (a & b) ^ (a & c);
+    }
+
+    @ForceInline
+    public void testInt1Kernel(VectorSpecies SPECIES, int [] r, int [] a, int [] b, int [] c) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            va.lanewise(VectorOperators.AND, vc)
+            .lanewise(VectorOperators.XOR, va.lanewise(VectorOperators.AND, vb))
+            .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt1_Int128(int[] r, int[] a, int[] b, int[] c) {
+        testInt1Kernel(IntVector.SPECIES_128, r, a, b, c);
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt1_Int256(int[] r, int[] a, int[] b, int[] c) {
+        testInt1Kernel(IntVector.SPECIES_256, r, a, b, c);
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt1_Int512(int[] r, int[] a, int[] b, int[] c) {
+        testInt1Kernel(IntVector.SPECIES_512, r, a, b, c);
+    }
+
+    public void verifyInt1(int[] r, int[] a, int[] b, int[] c) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc1(a[i], b[i], c[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt1: at #%d: r=%d, expected = %d = intFunc1(%d,%d,%d)",
+                                                       i, r[i], expected, a[i], b[i], c[i]));
+            }
+        }
+    }
+
+    // Case 2): Only right child is masked.
+    //        P_LOP
+    //   L_LOP    R_LOP(mask)
+
+    static int intFunc2(int a, int b, int c, boolean mask) {
+        return (a & b) ^ (mask == true ? a & c : a);
+    }
+
+    @ForceInline
+    public void testInt2Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            va.lanewise(VectorOperators.AND, vb)
+            .lanewise(VectorOperators.XOR,
+                      va.lanewise(VectorOperators.AND, vc, vmask))
+           .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt2_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt2Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt2_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt2Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt2_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt2Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
+    }
+
+    public void verifyInt2(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc2(a[i], b[i], c[i], mask[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt2: at #%d: r=%d, expected = %d = intFunc2(%d,%d,%d,%b)",
+                                                       i, r[i], expected, a[i], b[i], c[i], mask[i]));
+            }
+        }
+    }
+
+    // Case 3): Only left child is masked.
+    //             P_LOP
+    //   L_LOP(mask)    R_LOP
+
+    static int intFunc3(int a, int b, int c, boolean mask) {
+        return (mask == true ? a & b : a) ^ (a & c);
+    }
+
+    @ForceInline
+    public void testInt3Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            va.lanewise(VectorOperators.AND, vb, vmask)
+            .lanewise(VectorOperators.XOR,
+                      va.lanewise(VectorOperators.AND, vc))
+           .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt3_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt3Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt3_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt3Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt3_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt3Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
+    }
+
+
+    @ForceInline
+    public void verifyInt3(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc3(a[i], b[i], c[i], mask[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt3: at #%d: r=%d, expected = %d = intFunc3(%d,%d,%d,%b)",
+                                                       i, r[i], expected, a[i], b[i], c[i], mask[i]));
+            }
+        }
+    }
+
+    // Case 4): Both child nodes are masked.
+    //             P_LOP
+    //   L_LOP(mask)    R_LOP(mask)
+
+    static int intFunc4(int a, int b, int c, boolean mask) {
+        return (mask == true ? b & a : b) ^ (mask == true ? c & a : c);
+    }
+
+    @ForceInline
+    public void testInt4Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            vb.lanewise(VectorOperators.AND, va, vmask)
+            .lanewise(VectorOperators.XOR,
+                      vc.lanewise(VectorOperators.AND, va, vmask))
+           .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"AndV", " > 0 ", "XorV", " > 0 "})
+    public void testInt4_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt4Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"AndV", " > 0 ", "XorV", " > 0 "})
+    public void testInt4_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt4Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"AndV", " > 0 ", "XorV", " > 0 "})
+    public void testInt4_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt4Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
+    }
+
+    public void verifyInt4(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc4(a[i], b[i], c[i], mask[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt4: at #%d: r=%d, expected = %d = intFunc4(%d,%d,%d,%b)",
+                                                       i, r[i], expected, a[i], b[i], c[i], mask[i]));
+            }
+        }
+    }
+
+    // Case 5): Parent is masked with unmasked child expressions.
+    //        P_LOP(mask)
+    //   L_LOP     R_LOP
+
+    static int intFunc5(int a, int b, int c, boolean mask) {
+        return mask == true ? ((a & b) ^ (a & c)) : (a & b);
+    }
+
+    @ForceInline
+    public void testInt5Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            va.lanewise(VectorOperators.AND, vb)
+            .lanewise(VectorOperators.XOR,
+                      va.lanewise(VectorOperators.AND, vc), vmask)
+           .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt5_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt5Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt5_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt5Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt5_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt5Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
+    }
+
+    @ForceInline
+    public void verifyInt5(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc5(a[i], b[i], c[i], mask[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt5: at #%d: r=%d, expected = %d = intFunc5(%d,%d,%d,%b)",
+                                                       i, r[i], expected, a[i], b[i], c[i], mask[i]));
+            }
+        }
+    }
+
+    // Case 6): Parent and right child are masked.
+    //        P_LOP(mask)
+    //   L_LOP     R_LOP(mask)
+
+    static int intFunc6(int a, int b, int c, boolean mask) {
+        return mask == true ? ((a & b) ^ (mask == true ? a & c : a)) : (a & b);
+    }
+
+    @ForceInline
+    public void testInt6Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            va.lanewise(VectorOperators.AND, vb)
+            .lanewise(VectorOperators.XOR,
+                      va.lanewise(VectorOperators.AND, vc, vmask), vmask)
+           .intoArray(r, i);
+        }
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt6_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt6Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt6_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt6Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt6_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt6Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
+    }
+
+
+    public void verifyInt6(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc6(a[i], b[i], c[i], mask[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt6: at #%d: r=%d, expected = %d = intFunc6(%d,%d,%d,%b)",
+                                                       i, r[i], expected, a[i], b[i], c[i], mask[i]));
+            }
+        }
+    }
+
+    // Case 7): Parent and left child are masked.
+    //            P_LOP(mask)
+    //   L_LOP(mask)       R_LOP
+
+    static int intFunc7(int a, int b, int c, boolean mask) {
+        return mask == true ? ((mask == true ? a & b : a) ^ (a & c)) : a;
+    }
+
+    @ForceInline
+    public void testInt7Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            va.lanewise(VectorOperators.AND, vb, vmask)
+            .lanewise(VectorOperators.XOR,
+                      va.lanewise(VectorOperators.AND, vc), vmask)
+           .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt7_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt7Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt7_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt7Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt7_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt7Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
+    }
+
+    public void verifyInt7(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc7(a[i], b[i], c[i], mask[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt7: at #%d: r=%d, expected = %d = intFunc7(%d,%d,%d,%b)",
+                                                       i, r[i], expected, a[i], b[i], c[i], mask[i]));
+            }
+        }
+    }
+
+    // Case 8): Parent and both child expressions are masked.
+    //            P_LOP(mask)
+    //   L_LOP(mask)       R_LOP (mask)
+
+    static int intFunc8(int a, int b, int c, boolean mask) {
+        return mask == true ? ((mask == true ? b & a : b) ^ (mask == true ? c & a  : c)) : b;
+    }
+
+    @ForceInline
+    public void testInt8Kernel(VectorSpecies SPECIES, int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i += SPECIES.length()) {
+            VectorMask<Integer> vmask = VectorMask.fromArray(SPECIES, mask , i);
+            IntVector va = IntVector.fromArray(SPECIES, a, i);
+            IntVector vb = IntVector.fromArray(SPECIES, b, i);
+            IntVector vc = IntVector.fromArray(SPECIES, c, i);
+            vb.lanewise(VectorOperators.AND, va, vmask)
+            .lanewise(VectorOperators.XOR,
+                      vc.lanewise(VectorOperators.AND, va, vmask), vmask)
+           .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt8_Int128(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt8Kernel(IntVector.SPECIES_128, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt8_Int256(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt8Kernel(IntVector.SPECIES_256, r, a, b, c, mask);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testInt8_Int512(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        testInt8Kernel(IntVector.SPECIES_512, r, a, b, c, mask);
+    }
+
+    public void verifyInt8(int[] r, int[] a, int[] b, int[] c, boolean [] mask) {
+        for (int i = 0; i < r.length; i++) {
+            int expected = intFunc8(a[i], b[i], c[i], mask[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(String.format("testInt8: at #%d: r=%d, expected = %d = intFunc8(%d,%d,%d,%b)",
+                                                       i, r[i], expected, a[i], b[i], c[i], mask[i]));
+            }
+        }
+    }
+
+
+    // ===================================================== //
+
+    static long longFunc(long a, long b, long c) {
+        long v1 = (a & b) ^ (a & c) ^ (b & c);
+        long v2 = (~a & b) | (~b & c) | (~c & a);
+        return v1 & v2;
+    }
+
+    @ForceInline
+    public void testLongKernel(VectorSpecies SPECIES, long[] r, long[] a, long[] b, long[] c) {
+        for (int i = 0; i < SPECIES.loopBound(r.length); i  +=  SPECIES.length()) {
+            LongVector va = LongVector.fromArray(SPECIES, a, i);
+            LongVector vb = LongVector.fromArray(SPECIES, b, i);
+            LongVector vc = LongVector.fromArray(SPECIES, c, i);
+
+            va.lanewise(VectorOperators.AND, vb)
+            .lanewise(VectorOperators.XOR, va.lanewise(VectorOperators.AND, vc))
+            .lanewise(VectorOperators.XOR, vb.lanewise(VectorOperators.AND, vc))
+            .lanewise(VectorOperators.AND,
+                       va.lanewise(VectorOperators.NOT).lanewise(VectorOperators.AND, vb)
+                      .lanewise(VectorOperators.OR, vb.lanewise(VectorOperators.NOT).lanewise(VectorOperators.AND, vc))
+                      .lanewise(VectorOperators.OR, vc.lanewise(VectorOperators.NOT).lanewise(VectorOperators.AND, va)))
+            .intoArray(r, i);
+        }
+    }
+
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testLong_Long256(long[] r, long[] a, long[] b, long[] c) {
+        testLongKernel(LongVector.SPECIES_256, r, a, b, c);
+    }
+    @Test
+    @IR(applyIf = {"UseAVX", "3"}, counts = {"MacroLogicV", " > 0 "})
+    public void testLong_Long512(long[] r, long[] a, long[] b, long[] c) {
+        testLongKernel(LongVector.SPECIES_512, r, a, b, c);
+    }
+
+    public void verifyLong(long[] r, long[] a, long[] b, long[] c) {
+        for (int i = 0; i < r.length; i++) {
+            long expected = longFunc(a[i], b[i], c[i]);
+            if (r[i] != expected) {
+                throw new AssertionError(
+                        String.format("testLong: at #%d: r=%d, expected = %d = longFunc(%d,%d,%d)",
+                                      i, r[i], expected, a[i], b[i], c[i]));
+            }
+        }
+    }
+
+    // ===================================================== //
+
+    private static final Random R = Utils.getRandomInstance();
+
+    static boolean[] fillBooleanRandom(Callable<boolean[]> factory) {
+        try {
+            boolean[] arr = factory.call();
+            for (int i = 0; i < arr.length; i++) {
+                arr[i] = R.nextBoolean();
+            }
+            return arr;
+        } catch (Exception e) {
+            throw new InternalError(e);
+        }
+    }
+    static char[] fillCharRandom(Callable<char[]> factory) {
+        try {
+            char[] arr = factory.call();
+            for (int i = 0; i < arr.length; i++) {
+                arr[i] = (char)R.nextInt();
+            }
+            return arr;
+        } catch (Exception e) {
+            throw new InternalError(e);
+        }
+    }
+    static int[] fillIntRandom(Callable<int[]> factory) {
+        try {
+            int[] arr = factory.call();
+            for (int i = 0; i < arr.length; i++) {
+                arr[i] = R.nextInt();
+            }
+            return arr;
+        } catch (Exception e) {
+            throw new InternalError(e);
+        }
+    }
+    static long[] fillLongRandom(Callable<long[]> factory) {
+        try {
+            long[] arr = factory.call();
+            for (int i = 0; i < arr.length; i++) {
+                arr[i] = R.nextLong();
+            }
+            return arr;
+        } catch (Exception e) {
+            throw new InternalError(e);
+        }
+    }
+
+    // ===================================================== //
+
+    static final int SIZE = 512;
+
+    @Run(test = {"testInt4_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt4_Int128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt4_Int128(r, a, b, c, mask);
+            verifyInt4(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt4_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt4_Int256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt4_Int256(r, a, b, c, mask);
+            verifyInt4(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt4_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt4_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt4_Int512(r, a, b, c, mask);
+            verifyInt4(r, a, b, c, mask);
+        }
+    }
+
+    @Run(test = {"testSubWordBoolean"}, mode = RunMode.STANDALONE)
+    public void kernel_test_SubWordBoolean() {
+        for (int i = 0; i < 10000; i++) {
+            testSubWordBoolean(br, ba, bb);
+            verifySubWordBoolean(br, ba, bb);
+        }
+    }
+
+    @Run(test = {"testSubWordChar"}, mode = RunMode.STANDALONE)
+    public void kernel_test_SubWordChar() {
+        for (int i = 0; i < 10000; i++) {
+            testSubWordChar(sr, ca, cb);
+            verifySubWordChar(sr, ca, cb);
+        }
+    }
+
+    @Run(test = {"testInt1_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt1_Int128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt1_Int128(r, a, b, c);
+            verifyInt1(r, a, b, c);
+        }
+    }
+    @Run(test = {"testInt1_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt1_Int256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt1_Int256(r, a, b, c);
+            verifyInt1(r, a, b, c);
+        }
+    }
+    @Run(test = {"testInt1_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt1_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt1_Int512(r, a, b, c);
+            verifyInt1(r, a, b, c);
+        }
+    }
+
+    @Run(test = {"testInt2_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt2_Int128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt2_Int128(r, a, b, c, mask);
+            verifyInt2(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt2_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt2_Int256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt2_Int256(r, a, b, c, mask);
+            verifyInt2(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt2_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt2_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt2_Int512(r, a, b, c, mask);
+            verifyInt2(r, a, b, c, mask);
+        }
+    }
+
+    @Run(test = {"testInt3_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt3_Int128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt3_Int128(r, a, b, c, mask);
+            verifyInt3(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt3_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt3_Int256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt3_Int256(r, a, b, c, mask);
+            verifyInt3(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt3_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt3_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt3_Int512(r, a, b, c, mask);
+            verifyInt3(r, a, b, c, mask);
+        }
+    }
+
+    @Run(test = {"testInt5_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt5_128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt5_Int128(r, a, b, c, mask);
+            verifyInt5(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt5_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt5_256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt5_Int256(r, a, b, c, mask);
+            verifyInt5(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt5_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt5_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt5_Int512(r, a, b, c, mask);
+            verifyInt5(r, a, b, c, mask);
+        }
+    }
+
+    @Run(test = {"testInt6_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt6_Int128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt6_Int128(r, a, b, c, mask);
+            verifyInt6(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt6_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt6_Int256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt6_Int256(r, a, b, c, mask);
+            verifyInt6(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt6_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt6_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt6_Int512(r, a, b, c, mask);
+            verifyInt6(r, a, b, c, mask);
+        }
+    }
+
+    @Run(test = {"testInt7_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt7_Int128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt7_Int128(r, a, b, c, mask);
+            verifyInt7(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt7_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt7_Int256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt7_Int256(r, a, b, c, mask);
+            verifyInt7(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt7_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt7_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt7_Int512(r, a, b, c, mask);
+            verifyInt7(r, a, b, c, mask);
+        }
+    }
+
+    @Run(test = {"testInt8_Int128"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt8_Int128() {
+        for (int i = 0; i < 10000; i++) {
+            testInt8_Int128(r, a, b, c, mask);
+            verifyInt8(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt8_Int256"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt8_Int256() {
+        for (int i = 0; i < 10000; i++) {
+            testInt8_Int256(r, a, b, c, mask);
+            verifyInt8(r, a, b, c, mask);
+        }
+    }
+    @Run(test = {"testInt8_Int512"}, mode = RunMode.STANDALONE)
+    public void kernel_testInt8_Int512() {
+        for (int i = 0; i < 10000; i++) {
+            testInt8_Int512(r, a, b, c, mask);
+            verifyInt8(r, a, b, c, mask);
+        }
+    }
+
+    @Run(test = {"testLong_Long256"}, mode = RunMode.STANDALONE)
+    public void kernel_testLong_Long256() {
+        for (int i = 0; i < 10000; i++) {
+            testLong_Long256(rl, al, bl, cl);
+            verifyLong(rl, al, bl, cl);
+        }
+    }
+    @Run(test = {"testLong_Long512"}, mode = RunMode.STANDALONE)
+    public void kernel_testLong_Long512() {
+        for (int i = 0; i < 10000; i++) {
+            testLong_Long512(rl, al, bl, cl);
+            verifyLong(rl, al, bl, cl);
+        }
+    }
+
+    public TestMaskedMacroLogicVector() {
+        br = new boolean[SIZE];
+        ba = fillBooleanRandom((()-> new boolean[SIZE]));
+        bb = fillBooleanRandom((()-> new boolean[SIZE]));
+
+        sr = new short[SIZE];
+        ca = fillCharRandom((()-> new char[SIZE]));
+        cb = fillCharRandom((()-> new char[SIZE]));
+
+        r = new int[SIZE];
+        a = fillIntRandom(()-> new int[SIZE]);
+        b = fillIntRandom(()-> new int[SIZE]);
+        c = fillIntRandom(()-> new int[SIZE]);
+        d = fillIntRandom(()-> new int[SIZE]);
+        e = fillIntRandom(()-> new int[SIZE]);
+        f = fillIntRandom(()-> new int[SIZE]);
+
+        rl = new long[SIZE];
+        al = fillLongRandom(() -> new long[SIZE]);
+        bl = fillLongRandom(() -> new long[SIZE]);
+        cl = fillLongRandom(() -> new long[SIZE]);
+
+        mask = fillBooleanRandom((()-> new boolean[SIZE]));
+    }
+
+    public static void main(String[] args) {
+        TestFramework.runWithFlags("-XX:-TieredCompilation",
+                                   "-XX:UseAVX=3",
+                                   "--add-modules=jdk.incubator.vector",
+                                   "-XX:CompileThresholdScaling=0.3");
+    }
+}
--- a/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskedLogicOpts.java
+++ b/test/micro/org/openjdk/bench/jdk/incubator/vector/MaskedLogicOpts.java
@ -0,0 +1,331 @@
+/*
+ * Copyright (c) 2022, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.jdk.incubator.vector;
+
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.*;
+
+import jdk.incubator.vector.*;
+import java.util.concurrent.TimeUnit;
+import java.util.Random;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.SECONDS)
+@State(Scope.Thread)
+public class MaskedLogicOpts {
+    @Param({"256","512","1024"})
+    private int ARRAYLEN;
+
+    boolean [] mask_arr = {
+        false, false, false, true, false, false, false, false,
+        false, false, false, true, false, false, false, false,
+        false, false, false, true, false, false, false, false,
+        true, true, true, true, true, true, true, true,
+        true, true, true, true, true, true, true, true,
+        false, false, false, true, false, false, false, false,
+        false, false, false, true, false, false, false, false,
+        false, false, false, true, false, false, false, false
+    };
+
+    int INVOC_COUNTER = 4096;
+
+    int [] i1 = new int[ARRAYLEN];
+    int [] i2 = new int[ARRAYLEN];
+    int [] i3 = new int[ARRAYLEN];
+    int [] i4 = new int[ARRAYLEN];
+    int [] i5 = new int[ARRAYLEN];
+
+    long [] l1 = new long[ARRAYLEN];
+    long [] l2 = new long[ARRAYLEN];
+    long [] l3 = new long[ARRAYLEN];
+    long [] l4 = new long[ARRAYLEN];
+    long [] l5 = new long[ARRAYLEN];
+
+    Vector<Integer> iv1;
+    Vector<Integer> iv2;
+    Vector<Integer> iv3;
+    Vector<Integer> iv4;
+    Vector<Integer> iv5;
+
+    Vector<Long> lv1;
+    Vector<Long> lv2;
+    Vector<Long> lv3;
+    Vector<Long> lv4;
+    Vector<Long> lv5;
+
+    VectorMask<Integer> imask;
+    VectorMask<Long> lmask;
+
+    VectorSpecies<Integer> ispecies;
+    VectorSpecies<Long> lspecies;
+
+    int int512_arr_idx;
+    int int256_arr_idx;
+    int int128_arr_idx;
+    int long256_arr_idx;
+    int long512_arr_idx;
+
+    private Random r = new Random();
+
+    @Setup(Level.Trial)
+    public void init() {
+        int512_arr_idx = 0;
+        int256_arr_idx = 0;
+        int128_arr_idx = 0;
+        long256_arr_idx = 0;
+        long512_arr_idx = 0;
+        i1 = new int[ARRAYLEN];
+        i2 = new int[ARRAYLEN];
+        i3 = new int[ARRAYLEN];
+        i4 = new int[ARRAYLEN];
+        i5 = new int[ARRAYLEN];
+
+        l1 = new long[ARRAYLEN];
+        l2 = new long[ARRAYLEN];
+        l3 = new long[ARRAYLEN];
+        l4 = new long[ARRAYLEN];
+        l5 = new long[ARRAYLEN];
+
+        for (int i=0; i<ARRAYLEN; i++) {
+            i1[i] = r.nextInt();
+            i2[i] = r.nextInt();
+            i3[i] = r.nextInt();
+            i4[i] = r.nextInt();
+            i5[i] = r.nextInt();
+
+            l1[i] = r.nextLong();
+            l2[i] = r.nextLong();
+            l3[i] = r.nextLong();
+            l4[i] = r.nextLong();
+            l5[i] = r.nextLong();
+        }
+
+    }
+
+    @Setup(Level.Invocation)
+    public void init_per_invoc() {
+        int512_arr_idx = (int512_arr_idx + 16) & (ARRAYLEN-1);
+        int256_arr_idx = (int256_arr_idx + 8) & (ARRAYLEN-1);
+        int128_arr_idx = (int128_arr_idx + 4) & (ARRAYLEN-1);
+        long512_arr_idx = (long512_arr_idx + 8) & (ARRAYLEN-1);
+        long256_arr_idx = (long256_arr_idx + 4) & (ARRAYLEN-1);
+    }
+
+    @CompilerControl(CompilerControl.Mode.INLINE)
+    public void maskedLogicKernel(VectorSpecies<Integer> SPECIES) {
+        imask = VectorMask.fromArray(SPECIES, mask_arr, 0);
+        iv2 = IntVector.fromArray(SPECIES, i2, int512_arr_idx);
+        iv3 = IntVector.fromArray(SPECIES, i3, int512_arr_idx);
+        iv4 = IntVector.fromArray(SPECIES, i4, int512_arr_idx);
+        iv5 = IntVector.fromArray(SPECIES, i5, int512_arr_idx);
+        for(int i = 0; i < INVOC_COUNTER; i++) {
+            for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
+                IntVector.fromArray(SPECIES, i1, j)
+                    .lanewise(VectorOperators.AND, iv2, imask)
+                    .lanewise(VectorOperators.OR,  iv2, imask)
+                    .lanewise(VectorOperators.AND, iv3, imask)
+                    .lanewise(VectorOperators.OR,  iv3, imask)
+                    .lanewise(VectorOperators.AND, iv4, imask)
+                    .lanewise(VectorOperators.OR,  iv4, imask)
+                    .lanewise(VectorOperators.AND, iv5, imask)
+                    .lanewise(VectorOperators.XOR, iv5, imask)
+                    .intoArray(i1, j);
+            }
+        }
+    }
+
+    @Benchmark
+    public void maskedLogicOperationsInt512() {
+       maskedLogicKernel(IntVector.SPECIES_512);
+    }
+
+    @Benchmark
+    public void maskedLogicOperationsInt256() {
+       maskedLogicKernel(IntVector.SPECIES_256);
+    }
+
+    @Benchmark
+    public void maskedLogicOperationsInt128() {
+       maskedLogicKernel(IntVector.SPECIES_128);
+    }
+
+    @CompilerControl(CompilerControl.Mode.INLINE)
+    public void partiallyMaskedLogicOperationsIntKernel(VectorSpecies<Integer> SPECIES) {
+       imask = VectorMask.fromArray(SPECIES, mask_arr, 0);
+       iv2 = IntVector.fromArray(SPECIES, i2, int512_arr_idx);
+       iv3 = IntVector.fromArray(SPECIES, i3, int512_arr_idx);
+       iv4 = IntVector.fromArray(SPECIES, i4, int512_arr_idx);
+       iv5 = IntVector.fromArray(SPECIES, i5, int512_arr_idx);
+       for(int i = 0; i < INVOC_COUNTER; i++) {
+           for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
+               IntVector.fromArray(SPECIES, i1, j)
+                   .lanewise(VectorOperators.AND, iv2, imask)
+                   .lanewise(VectorOperators.OR,  iv2, imask)
+                   .lanewise(VectorOperators.AND, iv3)
+                   .lanewise(VectorOperators.OR,  iv3)
+                   .lanewise(VectorOperators.OR,  iv4, imask)
+                   .lanewise(VectorOperators.AND, iv4, imask)
+                   .lanewise(VectorOperators.XOR, iv5, imask)
+                   .intoArray(i1, j);
+           }
+       }
+    }
+
+    @Benchmark
+    public void partiallyMaskedLogicOperationsInt512() {
+        partiallyMaskedLogicOperationsIntKernel(IntVector.SPECIES_512);
+    }
+
+    @Benchmark
+    public void partiallyMaskedLogicOperationsInt256() {
+        partiallyMaskedLogicOperationsIntKernel(IntVector.SPECIES_256);
+    }
+
+    @Benchmark
+    public void partiallyMaskedLogicOperationsInt128() {
+        partiallyMaskedLogicOperationsIntKernel(IntVector.SPECIES_128);
+    }
+
+    @CompilerControl(CompilerControl.Mode.INLINE)
+    public void bitwiseBlendOperationIntKernel(VectorSpecies<Integer> SPECIES) {
+       imask = VectorMask.fromArray(SPECIES, mask_arr, 0);
+       iv2 = IntVector.fromArray(SPECIES, i2, int512_arr_idx);
+       iv3 = IntVector.fromArray(SPECIES, i3, int512_arr_idx);
+       iv4 = IntVector.fromArray(SPECIES, i4, int512_arr_idx);
+       iv5 = IntVector.fromArray(SPECIES, i5, int512_arr_idx);
+       for(int i = 0; i < INVOC_COUNTER; i++) {
+           for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
+               IntVector.fromArray(SPECIES, i1, j)
+                   .lanewise(VectorOperators.BITWISE_BLEND, iv2, iv3, imask)
+                   .lanewise(VectorOperators.BITWISE_BLEND, iv3, iv4, imask)
+                   .lanewise(VectorOperators.BITWISE_BLEND, iv4, iv5, imask)
+                   .intoArray(i1, j);
+           }
+       }
+    }
+
+    @Benchmark
+    public void bitwiseBlendOperationInt512() {
+       bitwiseBlendOperationIntKernel(IntVector.SPECIES_512);
+    }
+
+    @Benchmark
+    public void bitwiseBlendOperationInt256() {
+       bitwiseBlendOperationIntKernel(IntVector.SPECIES_256);
+    }
+
+    @Benchmark
+    public void bitwiseBlendOperationInt128() {
+       bitwiseBlendOperationIntKernel(IntVector.SPECIES_128);
+    }
+
+    @CompilerControl(CompilerControl.Mode.INLINE)
+    public void maskedLogicOperationsLongKernel(VectorSpecies<Long> SPECIES) {
+       lmask = VectorMask.fromArray(SPECIES, mask_arr, 0);
+       lv2 = LongVector.fromArray(SPECIES, l2, long256_arr_idx);
+       lv3 = LongVector.fromArray(SPECIES, l3, long256_arr_idx);
+       lv4 = LongVector.fromArray(SPECIES, l4, long256_arr_idx);
+       lv5 = LongVector.fromArray(SPECIES, l5, long256_arr_idx);
+       for(int i = 0; i < INVOC_COUNTER; i++) {
+           for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
+               LongVector.fromArray(SPECIES, l1, j)
+                   .lanewise(VectorOperators.AND, lv2, lmask)
+                   .lanewise(VectorOperators.OR,  lv3, lmask)
+                   .lanewise(VectorOperators.AND, lv3, lmask)
+                   .lanewise(VectorOperators.OR,  lv4, lmask)
+                   .lanewise(VectorOperators.AND, lv4, lmask)
+                   .lanewise(VectorOperators.XOR, lv5, lmask)
+                   .intoArray(l1, j);
+           }
+       }
+    }
+
+    @Benchmark
+    public void maskedLogicOperationsLong512() {
+       maskedLogicOperationsLongKernel(LongVector.SPECIES_512);
+    }
+    @Benchmark
+    public void maskedLogicOperationsLong256() {
+       maskedLogicOperationsLongKernel(LongVector.SPECIES_256);
+    }
+
+    @CompilerControl(CompilerControl.Mode.INLINE)
+    public void partiallyMaskedLogicOperationsLongKernel(VectorSpecies<Long> SPECIES) {
+       lmask = VectorMask.fromArray(SPECIES, mask_arr, 0);
+       lv2 = LongVector.fromArray(SPECIES, l2, long512_arr_idx);
+       lv3 = LongVector.fromArray(SPECIES, l3, long512_arr_idx);
+       lv4 = LongVector.fromArray(SPECIES, l4, long512_arr_idx);
+       lv5 = LongVector.fromArray(SPECIES, l5, long512_arr_idx);
+       for(int i = 0; i < INVOC_COUNTER; i++) {
+           for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
+               LongVector.fromArray(SPECIES, l1, j)
+                   .lanewise(VectorOperators.AND, lv2, lmask)
+                   .lanewise(VectorOperators.OR,  lv2, lmask)
+                   .lanewise(VectorOperators.AND, lv3)
+                   .lanewise(VectorOperators.OR,  lv3)
+                   .lanewise(VectorOperators.AND, lv4)
+                   .lanewise(VectorOperators.OR,  lv4, lmask)
+                   .lanewise(VectorOperators.XOR, lv5, lmask)
+                   .intoArray(l1, j);
+           }
+       }
+    }
+
+    @Benchmark
+    public void partiallyMaskedLogicOperationsLong512() {
+       partiallyMaskedLogicOperationsLongKernel(LongVector.SPECIES_512);
+    }
+
+    @Benchmark
+    public void partiallyMaskedLogicOperationsLong256() {
+       partiallyMaskedLogicOperationsLongKernel(LongVector.SPECIES_256);
+    }
+
+    @CompilerControl(CompilerControl.Mode.INLINE)
+    public void bitwiseBlendOperationLongKernel(VectorSpecies<Long> SPECIES) {
+       lmask = VectorMask.fromArray(SPECIES, mask_arr, 0);
+       lv2 = LongVector.fromArray(SPECIES, l2, long512_arr_idx);
+       lv3 = LongVector.fromArray(SPECIES, l3, long512_arr_idx);
+       lv4 = LongVector.fromArray(SPECIES, l4, long512_arr_idx);
+       lv5 = LongVector.fromArray(SPECIES, l5, long512_arr_idx);
+       for(int i = 0; i < INVOC_COUNTER; i++) {
+           for(int j = 0 ; j < ARRAYLEN; j+= SPECIES.length()) {
+               LongVector.fromArray(SPECIES, l1, j)
+                   .lanewise(VectorOperators.BITWISE_BLEND, lv2, lv3, lmask)
+                   .lanewise(VectorOperators.BITWISE_BLEND, lv3, lv4, lmask)
+                   .lanewise(VectorOperators.BITWISE_BLEND, lv4, lv5, lmask)
+                   .intoArray(l1, j);
+           }
+       }
+    }
+
+    @Benchmark
+    public void bitwiseBlendOperationLong512() {
+       bitwiseBlendOperationLongKernel(LongVector.SPECIES_512);
+    }
+
+    @Benchmark
+    public void bitwiseBlendOperationLong256() {
+       bitwiseBlendOperationLongKernel(LongVector.SPECIES_256);
+    }
+}
--- a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
+++ b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -32,7 +32,7 @@ import java.util.Random;
@OutputTimeUnit(TimeUnit.SECONDS)
@State(Scope.Thread)
 public class MacroLogicOpt {
-  @Param({"64","128","256","512","1024","2048","4096"}) private int VECLEN;
+  @Param({"64","128","256","512","1024"}) private int VECLEN;

  private  int [] ai = new int[VECLEN];
  private  int [] bi = new int[VECLEN];