diff --git a/src/hotspot/share/opto/superword.cpp b/src/hotspot/share/opto/superword.cpp
index cd9b76eb44d..f41e8a7b8b3 100644
--- a/src/hotspot/share/opto/superword.cpp
+++ b/src/hotspot/share/opto/superword.cpp
@@ -45,12 +45,13 @@ SuperWord::SuperWord(const VLoopAnalyzer &vloop_analyzer) :
   _arena(mtCompiler),
   _node_info(arena(), _vloop.estimated_body_length(), 0, SWNodeInfo::initial), // info needed per node
   _clone_map(phase()->C->clone_map()),                      // map of nodes created in cloning
-  _align_to_ref(nullptr),                                   // memory reference to align vectors to
   _pairset(&_arena, _vloop_analyzer),
   _packset(&_arena, _vloop_analyzer
            NOT_PRODUCT(COMMA is_trace_superword_packset())
            NOT_PRODUCT(COMMA is_trace_superword_rejections())
            ),
+  _mem_ref_for_main_loop_alignment(nullptr),
+  _aw_for_main_loop_alignment(0),
   _do_vector_loop(phase()->C->do_vector_loop()),            // whether to do vectorization/simd style
   _num_work_vecs(0),                                        // amount of vector work we have
   _num_reductions(0)                                        // amount of reduction work we have
@@ -516,22 +517,12 @@ void SuperWord::find_adjacent_refs() {
 
   int max_idx;
 
-  // Take the first mem_ref as the reference to align to. The pre-loop trip count is
-  // modified to align this reference to a vector-aligned address. If strict alignment
-  // is required, we may change the reference later (see filter_packs_for_alignment()).
-  MemNode* align_to_mem_ref = nullptr;
-
   while (memops.size() != 0) {
     // Find a memory reference to align to.
     MemNode* mem_ref = find_align_to_ref(memops, max_idx);
     if (mem_ref == nullptr) break;
     int iv_adjustment = get_iv_adjustment(mem_ref);
 
-    if (align_to_mem_ref == nullptr) {
-      align_to_mem_ref = mem_ref;
-      set_align_to_ref(align_to_mem_ref);
-    }
-
     const VPointer& align_to_ref_p = vpointer(mem_ref);
     // Set alignment relative to "align_to_ref" for all related memory operations.
     for (int i = memops.size() - 1; i >= 0; i--) {
@@ -573,9 +564,6 @@ void SuperWord::find_adjacent_refs() {
     }
   } // while (memops.size() != 0)
 
-  assert(_pairset.is_empty() || align_to_mem_ref != nullptr,
-         "pairset empty or we find the alignment reference");
-
 #ifndef PRODUCT
   if (is_trace_superword_packset()) {
     tty->print_cr("\nAfter Superword::find_adjacent_refs");
@@ -1723,7 +1711,11 @@ void SuperWord::filter_packs_for_alignment() {
   if (current->is_constrained()) {
     // Solution is constrained (not trivial)
     // -> must change pre-limit to achieve alignment
-    set_align_to_ref(current->as_constrained()->mem_ref());
+    MemNode const* mem = current->as_constrained()->mem_ref();
+    Node_List* pack = get_pack(mem);
+    assert(pack != nullptr, "memop of final solution must still be packed");
+    _mem_ref_for_main_loop_alignment = mem;
+    _aw_for_main_loop_alignment = pack->size() * mem->memory_size();
   }
 }
 
@@ -3397,6 +3389,32 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
   return dep;
 }
 
+// Find the memop pack with the maximum vector width, unless they were already
+// determined by SuperWord::filter_packs_for_alignment().
+void SuperWord::determine_mem_ref_and_aw_for_main_loop_alignment() {
+  if (_mem_ref_for_main_loop_alignment != nullptr) {
+    assert(vectors_should_be_aligned(), "mem_ref only set if filtered for alignment");
+    return;
+  }
+
+  MemNode const* mem_ref = nullptr;
+  int max_aw = 0;
+  for (int i = 0; i < _packset.length(); i++) {
+    Node_List* pack = _packset.at(i);
+    MemNode* first = pack->at(0)->isa_Mem();
+    if (first == nullptr) { continue; }
+
+    int vw = first->memory_size() * pack->size();
+    if (vw > max_aw) {
+      max_aw = vw;
+      mem_ref = first;
+    }
+  }
+  assert(mem_ref != nullptr && max_aw > 0, "found mem_ref and aw");
+  _mem_ref_for_main_loop_alignment = mem_ref;
+  _aw_for_main_loop_alignment = max_aw;
+}
+
 #define TRACE_ALIGN_VECTOR_NODE(node) { \
   DEBUG_ONLY(                           \
     if (is_trace_align_vector()) {      \
@@ -3407,11 +3425,14 @@ LoadNode::ControlDependency SuperWord::control_dependency(Node_List* p) {
 }                                       \
 
 // Ensure that the main loop vectors are aligned by adjusting the pre loop limit. We memory-align
-// the address of "align_to_ref" to the maximal possible vector width. We adjust the pre-loop
-// iteration count by adjusting the pre-loop limit.
+// the address of "_mem_ref_for_main_loop_alignment" to "_aw_for_main_loop_alignment", which is a
+// sufficiently large alignment width. We adjust the pre-loop iteration count by adjusting the
+// pre-loop limit.
 void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() {
-  const MemNode* align_to_ref = _align_to_ref;
-  assert(align_to_ref != nullptr, "align_to_ref must be set");
+  determine_mem_ref_and_aw_for_main_loop_alignment();
+  const MemNode* align_to_ref = _mem_ref_for_main_loop_alignment;
+  const int aw                = _aw_for_main_loop_alignment;
+  assert(align_to_ref != nullptr && aw > 0, "must have alignment reference and aw");
   assert(cl()->is_main_loop(), "can only do alignment for main loop");
 
   // The opaque node for the limit, where we adjust the input
@@ -3556,10 +3577,7 @@ void SuperWord::adjust_pre_loop_limit_to_align_main_loop_vectors() {
   //                   = MIN(new_limit,                   orig_limit)         (15a, stride > 0)
   // constrained_limit = MAX(old_limit - adjust_pre_iter, orig_limit)
   //                   = MAX(new_limit,                   orig_limit)         (15a, stride < 0)
-
-  // We chose an aw that is the maximal possible vector width for the type of
-  // align_to_ref.
-  const int aw       = vector_width_in_bytes(align_to_ref);
+  //
   const int stride   = iv_stride();
   const int scale    = align_to_ref_p.scale_in_bytes();
   const int offset   = align_to_ref_p.offset_in_bytes();
diff --git a/src/hotspot/share/opto/superword.hpp b/src/hotspot/share/opto/superword.hpp
index 7d1ba1131f3..159032d94b9 100644
--- a/src/hotspot/share/opto/superword.hpp
+++ b/src/hotspot/share/opto/superword.hpp
@@ -411,11 +411,15 @@ class SuperWord : public ResourceObj {
 
   GrowableArray<SWNodeInfo> _node_info;  // Info needed per node
   CloneMap&            _clone_map;       // map of nodes created in cloning
-  MemNode const* _align_to_ref;          // Memory reference that pre-loop will align to
 
   PairSet _pairset;
   PackSet _packset;
 
+  // Memory reference, and the alignment width (aw) for which we align the main-loop,
+  // by adjusting the pre-loop limit.
+  MemNode const* _mem_ref_for_main_loop_alignment;
+  int _aw_for_main_loop_alignment;
+
  public:
   SuperWord(const VLoopAnalyzer &vloop_analyzer);
 
@@ -563,8 +567,6 @@ class SuperWord : public ResourceObj {
   Arena* arena()                   { return &_arena; }
 
   int get_vw_bytes_special(MemNode* s);
-  const MemNode* align_to_ref() const { return _align_to_ref; }
-  void set_align_to_ref(const MemNode* m) { _align_to_ref = m; }
 
   // Ensure node_info contains element "i"
   void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }
@@ -670,6 +672,7 @@ private:
   // Alignment within a vector memory reference
   int memory_alignment(MemNode* s, int iv_adjust);
   // Ensure that the main loop vectors are aligned by adjusting the pre loop limit.
+  void determine_mem_ref_and_aw_for_main_loop_alignment();
   void adjust_pre_loop_limit_to_align_main_loop_vectors();
 };