Merge

2008-05-04 03:29:31 -07:00 · 2008-05-04 03:29:31 -07:00 · 035662396c
commit 035662396c
parent 92cdc5a2e2 a24f915b60
27 changed files with 425 additions and 153 deletions
--- a/hotspot/make/linux/makefiles/mapfile-vers-debug
+++ b/hotspot/make/linux/makefiles/mapfile-vers-debug
@ -279,7 +279,9 @@ SUNWprivate_1.1 {
                jio_snprintf;
                jio_vfprintf;
                jio_vsnprintf;
-		fork1;
+                fork1;
+                numa_warn;
+                numa_error;

                # Needed because there is no JVM interface for this.
                sysThreadAvailableStackWithSlack;
--- a/hotspot/make/linux/makefiles/mapfile-vers-product
+++ b/hotspot/make/linux/makefiles/mapfile-vers-product
@ -274,7 +274,9 @@ SUNWprivate_1.1 {
                jio_snprintf;
                jio_vfprintf;
                jio_vsnprintf;
-		fork1;
+                fork1;
+                numa_warn;
+                numa_error;

                # Needed because there is no JVM interface for this.
                sysThreadAvailableStackWithSlack;
--- a/hotspot/src/os/linux/vm/os_linux.cpp
+++ b/hotspot/src/os/linux/vm/os_linux.cpp
@ -2228,20 +2228,42 @@ bool os::commit_memory(char* addr, size_t size, size_t alignment_hint) {
 }

 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
-void os::free_memory(char *addr, size_t bytes)         { }
+
+void os::free_memory(char *addr, size_t bytes) {
+  uncommit_memory(addr, bytes);
+}
+
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
-bool os::numa_topology_changed()                       { return false; }
-size_t os::numa_get_groups_num()                       { return 1; }
-int os::numa_get_group_id()                            { return 0; }
-size_t os::numa_get_leaf_groups(int *ids, size_t size) {
-  if (size > 0) {
-    ids[0] = 0;
-    return 1;
+
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
+  Linux::numa_tonode_memory(addr, bytes, lgrp_hint);
+}
+
+bool os::numa_topology_changed()   { return false; }
+
+size_t os::numa_get_groups_num() {
+  int max_node = Linux::numa_max_node();
+  return max_node > 0 ? max_node + 1 : 1;
+}
+
+int os::numa_get_group_id() {
+  int cpu_id = Linux::sched_getcpu();
+  if (cpu_id != -1) {
+    int lgrp_id = Linux::get_node_by_cpu(cpu_id);
+    if (lgrp_id != -1) {
+      return lgrp_id;
+    }
  }
  return 0;
 }

+size_t os::numa_get_leaf_groups(int *ids, size_t size) {
+  for (size_t i = 0; i < size; i++) {
+    ids[i] = i;
+  }
+  return size;
+}
+
 bool os::get_page_info(char *start, page_info* info) {
  return false;
 }
@ -2250,6 +2272,74 @@ char *os::scan_pages(char *start, char* end, page_info* page_expected, page_info
  return end;
 }

+extern "C" void numa_warn(int number, char *where, ...) { }
+extern "C" void numa_error(char *where) { }
+
+void os::Linux::libnuma_init() {
+  // sched_getcpu() should be in libc.
+  set_sched_getcpu(CAST_TO_FN_PTR(sched_getcpu_func_t,
+                                  dlsym(RTLD_DEFAULT, "sched_getcpu")));
+
+  if (sched_getcpu() != -1) { // Does it work?
+    void *handle = dlopen("libnuma.so", RTLD_LAZY);
+    if (handle != NULL) {
+      set_numa_node_to_cpus(CAST_TO_FN_PTR(numa_node_to_cpus_func_t,
+                                           dlsym(handle, "numa_node_to_cpus")));
+      set_numa_max_node(CAST_TO_FN_PTR(numa_max_node_func_t,
+                                       dlsym(handle, "numa_max_node")));
+      set_numa_available(CAST_TO_FN_PTR(numa_available_func_t,
+                                        dlsym(handle, "numa_available")));
+      set_numa_tonode_memory(CAST_TO_FN_PTR(numa_tonode_memory_func_t,
+                                            dlsym(handle, "numa_tonode_memory")));
+      if (numa_available() != -1) {
+        // Create a cpu -> node mapping
+        _cpu_to_node = new (ResourceObj::C_HEAP) GrowableArray<int>(0, true);
+        rebuild_cpu_to_node_map();
+      }
+    }
+  }
+}
+
+// rebuild_cpu_to_node_map() constructs a table mapping cpud id to node id.
+// The table is later used in get_node_by_cpu().
+void os::Linux::rebuild_cpu_to_node_map() {
+  int cpu_num = os::active_processor_count();
+  cpu_to_node()->clear();
+  cpu_to_node()->at_grow(cpu_num - 1);
+  int node_num = numa_get_groups_num();
+  int cpu_map_size = (cpu_num + BitsPerLong - 1) / BitsPerLong;
+  unsigned long *cpu_map = NEW_C_HEAP_ARRAY(unsigned long, cpu_map_size);
+  for (int i = 0; i < node_num; i++) {
+    if (numa_node_to_cpus(i, cpu_map, cpu_map_size * sizeof(unsigned long)) != -1) {
+      for (int j = 0; j < cpu_map_size; j++) {
+        if (cpu_map[j] != 0) {
+          for (int k = 0; k < BitsPerLong; k++) {
+            if (cpu_map[j] & (1UL << k)) {
+              cpu_to_node()->at_put(j * BitsPerLong + k, i);
+            }
+          }
+        }
+      }
+    }
+  }
+  FREE_C_HEAP_ARRAY(unsigned long, cpu_map);
+}
+
+int os::Linux::get_node_by_cpu(int cpu_id) {
+  if (cpu_to_node() != NULL && cpu_id >= 0 && cpu_id < cpu_to_node()->length()) {
+    return cpu_to_node()->at(cpu_id);
+  }
+  return -1;
+}
+
+GrowableArray<int>* os::Linux::_cpu_to_node;
+os::Linux::sched_getcpu_func_t os::Linux::_sched_getcpu;
+os::Linux::numa_node_to_cpus_func_t os::Linux::_numa_node_to_cpus;
+os::Linux::numa_max_node_func_t os::Linux::_numa_max_node;
+os::Linux::numa_available_func_t os::Linux::_numa_available;
+os::Linux::numa_tonode_memory_func_t os::Linux::_numa_tonode_memory;
+
+
 bool os::uncommit_memory(char* addr, size_t size) {
  return ::mmap(addr, size,
                PROT_READ|PROT_WRITE|PROT_EXEC,
@ -3552,6 +3642,10 @@ jint os::init_2(void)
          Linux::is_floating_stack() ? "floating stack" : "fixed stack");
  }

+  if (UseNUMA) {
+    Linux::libnuma_init();
+  }
+
  if (MaxFDLimit) {
    // set the number of file descriptors to max. print out error
    // if getrlimit/setrlimit fails but continue regardless.
--- a/hotspot/src/os/linux/vm/os_linux.hpp
+++ b/hotspot/src/os/linux/vm/os_linux.hpp
@ -59,6 +59,8 @@ class Linux {
  static bool _is_NPTL;
  static bool _supports_fast_thread_cpu_time;

+  static GrowableArray<int>* _cpu_to_node;
+
 protected:

  static julong _physical_memory;
@ -79,8 +81,9 @@ class Linux {
  static void set_is_LinuxThreads()           { _is_NPTL = false; }
  static void set_is_floating_stack()         { _is_floating_stack = true; }

+  static void rebuild_cpu_to_node_map();
+  static GrowableArray<int>* cpu_to_node()    { return _cpu_to_node; }
 public:
-
  static void init_thread_fpu_state();
  static int  get_fpu_control_word();
  static void set_fpu_control_word(int fpu_control);
@ -143,6 +146,7 @@ class Linux {
  static bool is_floating_stack()             { return _is_floating_stack; }

  static void libpthread_init();
+  static void libnuma_init();

  // Minimum stack size a thread can be created with (allowing
  // the VM to completely create the thread and enter user code)
@ -229,6 +233,38 @@ class Linux {

    #undef SR_SUSPENDED
  };
+
+private:
+  typedef int (*sched_getcpu_func_t)(void);
+  typedef int (*numa_node_to_cpus_func_t)(int node, unsigned long *buffer, int bufferlen);
+  typedef int (*numa_max_node_func_t)(void);
+  typedef int (*numa_available_func_t)(void);
+  typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node);
+
+
+  static sched_getcpu_func_t _sched_getcpu;
+  static numa_node_to_cpus_func_t _numa_node_to_cpus;
+  static numa_max_node_func_t _numa_max_node;
+  static numa_available_func_t _numa_available;
+  static numa_tonode_memory_func_t _numa_tonode_memory;
+
+  static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; }
+  static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; }
+  static void set_numa_max_node(numa_max_node_func_t func) { _numa_max_node = func; }
+  static void set_numa_available(numa_available_func_t func) { _numa_available = func; }
+  static void set_numa_tonode_memory(numa_tonode_memory_func_t func) { _numa_tonode_memory = func; }
+
+public:
+  static int sched_getcpu()  { return _sched_getcpu != NULL ? _sched_getcpu() : -1; }
+  static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) {
+    return _numa_node_to_cpus != NULL ? _numa_node_to_cpus(node, buffer, bufferlen) : -1;
+  }
+  static int numa_max_node() { return _numa_max_node != NULL ? _numa_max_node() : -1; }
+  static int numa_available() { return _numa_available != NULL ? _numa_available() : -1; }
+  static int numa_tonode_memory(void *start, size_t size, int node) {
+    return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1;
+  }
+  static int get_node_by_cpu(int cpu_id);
 };


--- a/hotspot/src/os/linux/vm/os_linux.inline.hpp
+++ b/hotspot/src/os/linux/vm/os_linux.inline.hpp
@ -120,3 +120,6 @@ inline int os::closedir(DIR *dirp)
  RESTARTABLE(_cmd, _result); \
  return _result; \
 } while(false)
+
+inline bool os::numa_has_static_binding()   { return true; }
+inline bool os::numa_has_group_homing()     { return false;  }
--- a/hotspot/src/os/solaris/vm/os_solaris.cpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.cpp
@ -2602,7 +2602,7 @@ void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) {
 }

 // Tell the OS to make the range local to the first-touching LWP
-void os::numa_make_local(char *addr, size_t bytes) {
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint) {
  assert((intptr_t)addr % os::vm_page_size() == 0, "Address should be page-aligned.");
  if (madvise(addr, bytes, MADV_ACCESS_LWP) < 0) {
    debug_only(warning("MADV_ACCESS_LWP failed."));
--- a/hotspot/src/os/solaris/vm/os_solaris.inline.hpp
+++ b/hotspot/src/os/solaris/vm/os_solaris.inline.hpp
@ -204,3 +204,6 @@ do { \
  RESTARTABLE(_cmd, _result); \
  return _result; \
 } while(false)
+
+inline bool os::numa_has_static_binding()   { return false; }
+inline bool os::numa_has_group_homing()     { return true;  }
--- a/hotspot/src/os/windows/vm/os_windows.cpp
+++ b/hotspot/src/os/windows/vm/os_windows.cpp
@ -2581,7 +2581,7 @@ bool os::unguard_memory(char* addr, size_t bytes) {
 void os::realign_memory(char *addr, size_t bytes, size_t alignment_hint) { }
 void os::free_memory(char *addr, size_t bytes)         { }
 void os::numa_make_global(char *addr, size_t bytes)    { }
-void os::numa_make_local(char *addr, size_t bytes)     { }
+void os::numa_make_local(char *addr, size_t bytes, int lgrp_hint)    { }
 bool os::numa_topology_changed()                       { return false; }
 size_t os::numa_get_groups_num()                       { return 1; }
 int os::numa_get_group_id()                            { return 0; }
--- a/hotspot/src/os/windows/vm/os_windows.inline.hpp
+++ b/hotspot/src/os/windows/vm/os_windows.inline.hpp
@ -69,3 +69,6 @@ inline void os::bang_stack_shadow_pages() {
    *((int *)(sp - (pages * vm_page_size()))) = 0;
  }
 }
+
+inline bool os::numa_has_static_binding()   { return true;   }
+inline bool os::numa_has_group_homing()     { return false;  }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.cpp
@ -44,52 +44,12 @@ HeapWord* CMSPermGen::mem_allocate(size_t size) {
  bool lock_owned = lock->owned_by_self();
  if (lock_owned) {
    MutexUnlocker mul(lock);
-    return mem_allocate_work(size);
+    return mem_allocate_in_gen(size, _gen);
  } else {
-    return mem_allocate_work(size);
+    return mem_allocate_in_gen(size, _gen);
  }
 }

-HeapWord* CMSPermGen::mem_allocate_work(size_t size) {
-  assert(!_gen->freelistLock()->owned_by_self(), "Potetntial deadlock");
-
-  MutexLocker ml(Heap_lock);
-  HeapWord* obj = NULL;
-
-  obj = _gen->allocate(size, false);
-  // Since we want to minimize pause times, we will prefer
-  // expanding the perm gen rather than doing a stop-world
-  // collection to satisfy the allocation request.
-  if (obj == NULL) {
-    // Try to expand the perm gen and allocate space.
-    obj = _gen->expand_and_allocate(size, false, false);
-    if (obj == NULL) {
-      // Let's see if a normal stop-world full collection will
-      // free up enough space.
-      SharedHeap::heap()->collect_locked(GCCause::_permanent_generation_full);
-      obj = _gen->allocate(size, false);
-      if (obj == NULL) {
-        // The collection above may have shrunk the space, so try
-        // to expand again and allocate space.
-        obj = _gen->expand_and_allocate(size, false, false);
-      }
-      if (obj == NULL) {
-        // We have not been able to allocate space despite a
-        // full stop-world collection. We now make a last-ditch collection
-        // attempt (in which soft refs are all aggressively freed)
-        // that will try to reclaim as much space as possible.
-        SharedHeap::heap()->collect_locked(GCCause::_last_ditch_collection);
-        obj = _gen->allocate(size, false);
-        if (obj == NULL) {
-          // Expand generation in case it was shrunk following the collection.
-          obj = _gen->expand_and_allocate(size, false, false);
-        }
-      }
-    }
-  }
-  return obj;
-}
-
 void CMSPermGen::compute_new_size() {
  _gen->compute_new_size();
 }
--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp
+++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/cmsPermGen.hpp
@ -29,7 +29,6 @@ class ConcurrentMarkSweepGeneration;
 class CMSPermGen:  public PermGen {
  friend class VMStructs;

-  HeapWord* mem_allocate_work(size_t size);
 protected:
  // The "generation" view.
  ConcurrentMarkSweepGeneration* _gen;
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp
@ -590,6 +590,31 @@ HeapWord* ParallelScavengeHeap::permanent_mem_allocate(size_t size) {
      full_gc_count = Universe::heap()->total_full_collections();

      result = perm_gen()->allocate_permanent(size);
+
+      if (result != NULL) {
+        return result;
+      }
+
+      if (GC_locker::is_active_and_needs_gc()) {
+        // If this thread is not in a jni critical section, we stall
+        // the requestor until the critical section has cleared and
+        // GC allowed. When the critical section clears, a GC is
+        // initiated by the last thread exiting the critical section; so
+        // we retry the allocation sequence from the beginning of the loop,
+        // rather than causing more, now probably unnecessary, GC attempts.
+        JavaThread* jthr = JavaThread::current();
+        if (!jthr->in_critical()) {
+          MutexUnlocker mul(Heap_lock);
+          GC_locker::stall_until_clear();
+          continue;
+        } else {
+          if (CheckJNICalls) {
+            fatal("Possible deadlock due to allocating while"
+                  " in jni critical section");
+          }
+          return NULL;
+        }
+      }
    }

    if (result == NULL) {
@ -622,6 +647,12 @@ HeapWord* ParallelScavengeHeap::permanent_mem_allocate(size_t size) {
      if (op.prologue_succeeded()) {
        assert(Universe::heap()->is_in_permanent_or_null(op.result()),
          "result not in heap");
+        // If GC was locked out during VM operation then retry allocation
+        // and/or stall as necessary.
+        if (op.gc_locked()) {
+          assert(op.result() == NULL, "must be NULL if gc_locked() is true");
+          continue;  // retry and/or stall as necessary
+        }
        // If a NULL results is being returned, an out-of-memory
        // will be thrown now.  Clear the gc_time_limit_exceeded
        // flag to avoid the following situation.
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.hpp
@ -169,8 +169,9 @@ class ParallelScavengeHeap : public CollectedHeap {
  size_t large_typearray_limit() { return FastAllocateSizeLimit; }

  bool supports_inline_contig_alloc() const { return !UseNUMA; }
-  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : NULL; }
-  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : NULL; }
+
+  HeapWord** top_addr() const { return !UseNUMA ? young_gen()->top_addr() : (HeapWord**)-1; }
+  HeapWord** end_addr() const { return !UseNUMA ? young_gen()->end_addr() : (HeapWord**)-1; }

  void ensure_parsability(bool retire_tlabs);
  void accumulate_statistics_all_tlabs();
--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp
@ -976,7 +976,7 @@ void PSParallelCompact::pre_compact(PreGCValues* pre_gc_values)
  DEBUG_ONLY(mark_bitmap_count = mark_bitmap_size = 0;)

  // Increment the invocation count
-  heap->increment_total_collections();
+  heap->increment_total_collections(true);

  // We need to track unique mark sweep invocations as well.
  _total_invocations++;
@ -1941,7 +1941,7 @@ void PSParallelCompact::invoke_no_policy(bool maximum_heap_compaction) {
  assert(SafepointSynchronize::is_at_safepoint(), "must be at a safepoint");
  assert(ref_processor() != NULL, "Sanity");

-  if (GC_locker::is_active()) {
+  if (GC_locker::check_active_before_gc()) {
    return;
  }

--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp
+++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/vmPSOperations.cpp
@ -69,6 +69,9 @@ void VM_ParallelGCFailedPermanentAllocation::doit() {

  GCCauseSetter gccs(heap, _gc_cause);
  _result = heap->failed_permanent_mem_allocate(_size);
+  if (_result == NULL && GC_locker::is_active_and_needs_gc()) {
+    set_gc_locked();
+  }
  notify_gc_end();
 }

--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.cpp
@ -46,9 +46,11 @@ void MutableNUMASpace::mangle_unused_area() {
  for (int i = 0; i < lgrp_spaces()->length(); i++) {
    LGRPSpace *ls = lgrp_spaces()->at(i);
    MutableSpace *s = ls->space();
-    HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
-    if (top < s->end()) {
-      ls->add_invalid_region(MemRegion(top, s->end()));
+    if (!os::numa_has_static_binding()) {
+      HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+      if (top < s->end()) {
+        ls->add_invalid_region(MemRegion(top, s->end()));
+      }
    }
    s->mangle_unused_area();
  }
@ -70,32 +72,36 @@ void MutableNUMASpace::ensure_parsability() {
                                    area_touched_words);
        }
 #endif
-        MemRegion invalid;
-        HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
-        HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
-                                                     os::vm_page_size());
-        if (crossing_start != crossing_end) {
-          // If object header crossed a small page boundary we mark the area
-          // as invalid rounding it to a page_size().
-          HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
-          HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
-                               s->end());
-          invalid = MemRegion(start, end);
-        }
+        if (!os::numa_has_static_binding()) {
+          MemRegion invalid;
+          HeapWord *crossing_start = (HeapWord*)round_to((intptr_t)s->top(), os::vm_page_size());
+          HeapWord *crossing_end = (HeapWord*)round_to((intptr_t)(s->top() + area_touched_words),
+                                                       os::vm_page_size());
+          if (crossing_start != crossing_end) {
+            // If object header crossed a small page boundary we mark the area
+            // as invalid rounding it to a page_size().
+            HeapWord *start = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());
+            HeapWord *end = MIN2((HeapWord*)round_to((intptr_t)(s->top() + area_touched_words), page_size()),
+                                 s->end());
+            invalid = MemRegion(start, end);
+          }

-        ls->add_invalid_region(invalid);
+          ls->add_invalid_region(invalid);
+        }
        s->set_top(s->end());
      }
    } else {
+      if (!os::numa_has_static_binding()) {
 #ifdef ASSERT
-      MemRegion invalid(s->top(), s->end());
-      ls->add_invalid_region(invalid);
-#else
-      if (ZapUnusedHeapArea) {
        MemRegion invalid(s->top(), s->end());
        ls->add_invalid_region(invalid);
-      } else break;
+#else
+        if (ZapUnusedHeapArea) {
+          MemRegion invalid(s->top(), s->end());
+          ls->add_invalid_region(invalid);
+        } else break;
 #endif
+      }
    }
  }
 }
@ -194,7 +200,7 @@ bool MutableNUMASpace::update_layout(bool force) {
 }

 // Bias region towards the first-touching lgrp. Set the right page sizes.
-void MutableNUMASpace::bias_region(MemRegion mr) {
+void MutableNUMASpace::bias_region(MemRegion mr, int lgrp_id) {
  HeapWord *start = (HeapWord*)round_to((intptr_t)mr.start(), page_size());
  HeapWord *end = (HeapWord*)round_down((intptr_t)mr.end(), page_size());
  if (end > start) {
@ -202,9 +208,13 @@ void MutableNUMASpace::bias_region(MemRegion mr) {
    assert((intptr_t)aligned_region.start()     % page_size() == 0 &&
           (intptr_t)aligned_region.byte_size() % page_size() == 0, "Bad alignment");
    assert(region().contains(aligned_region), "Sanity");
-    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    // First we tell the OS which page size we want in the given range. The underlying
+    // large page can be broken down if we require small pages.
    os::realign_memory((char*)aligned_region.start(), aligned_region.byte_size(), page_size());
-    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size());
+    // Then we uncommit the pages in the range.
+    os::free_memory((char*)aligned_region.start(), aligned_region.byte_size());
+    // And make them local/first-touch biased.
+    os::numa_make_local((char*)aligned_region.start(), aligned_region.byte_size(), lgrp_id);
  }
 }

@ -233,10 +243,12 @@ void MutableNUMASpace::update() {
    initialize(region(), true);
  } else {
    bool should_initialize = false;
-    for (int i = 0; i < lgrp_spaces()->length(); i++) {
-      if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
-        should_initialize = true;
-        break;
+    if (!os::numa_has_static_binding()) {
+      for (int i = 0; i < lgrp_spaces()->length(); i++) {
+        if (!lgrp_spaces()->at(i)->invalid_region().is_empty()) {
+          should_initialize = true;
+          break;
+        }
      }
    }

@ -472,8 +484,8 @@ void MutableNUMASpace::initialize(MemRegion mr, bool clear_space) {
      intersection = MemRegion(new_region.start(), new_region.start());
    }
    select_tails(new_region, intersection, &bottom_region, &top_region);
-    bias_region(bottom_region);
-    bias_region(top_region);
+    bias_region(bottom_region, lgrp_spaces()->at(0)->lgrp_id());
+    bias_region(top_region, lgrp_spaces()->at(lgrp_spaces()->length() - 1)->lgrp_id());
  }

  // Check if the space layout has changed significantly?
@ -545,22 +557,37 @@ void MutableNUMASpace::initialize(MemRegion mr, bool clear_space) {
      intersection = MemRegion(new_region.start(), new_region.start());
    }

-    MemRegion invalid_region = ls->invalid_region().intersection(new_region);
-    if (!invalid_region.is_empty()) {
-      merge_regions(new_region, &intersection, &invalid_region);
-      free_region(invalid_region);
+    if (!os::numa_has_static_binding()) {
+      MemRegion invalid_region = ls->invalid_region().intersection(new_region);
+      // Invalid region is a range of memory that could've possibly
+      // been allocated on the other node. That's relevant only on Solaris where
+      // there is no static memory binding.
+      if (!invalid_region.is_empty()) {
+        merge_regions(new_region, &intersection, &invalid_region);
+        free_region(invalid_region);
+        ls->set_invalid_region(MemRegion());
+      }
    }
+
    select_tails(new_region, intersection, &bottom_region, &top_region);
-    free_region(bottom_region);
-    free_region(top_region);
+
+    if (!os::numa_has_static_binding()) {
+      // If that's a system with the first-touch policy then it's enough
+      // to free the pages.
+      free_region(bottom_region);
+      free_region(top_region);
+    } else {
+      // In a system with static binding we have to change the bias whenever
+      // we reshape the heap.
+      bias_region(bottom_region, ls->lgrp_id());
+      bias_region(top_region, ls->lgrp_id());
+    }

    // If we clear the region, we would mangle it in debug. That would cause page
    // allocation in a different place. Hence setting the top directly.
    s->initialize(new_region, false);
    s->set_top(s->bottom());

-    ls->set_invalid_region(MemRegion());
-
    set_adaptation_cycles(samples_count());
  }
 }
@ -575,7 +602,7 @@ void MutableNUMASpace::set_top(HeapWord* value) {
    HeapWord *top = MAX2((HeapWord*)round_down((intptr_t)s->top(), page_size()), s->bottom());

    if (s->contains(value)) {
-      if (top < value && top < s->end()) {
+      if (!os::numa_has_static_binding() && top < value && top < s->end()) {
        ls->add_invalid_region(MemRegion(top, value));
      }
      s->set_top(value);
@ -584,10 +611,10 @@ void MutableNUMASpace::set_top(HeapWord* value) {
        if (found_top) {
            s->set_top(s->bottom());
        } else {
-            if (top < s->end()) {
-              ls->add_invalid_region(MemRegion(top, s->end()));
-            }
-            s->set_top(s->end());
+          if (!os::numa_has_static_binding() && top < s->end()) {
+            ls->add_invalid_region(MemRegion(top, s->end()));
+          }
+          s->set_top(s->end());
        }
    }
  }
@ -601,11 +628,23 @@ void MutableNUMASpace::clear() {
  }
 }

+/*
+   Linux supports static memory binding, therefore the most part of the
+   logic dealing with the possible invalid page allocation is effectively
+   disabled. Besides there is no notion of the home node in Linux. A
+   thread is allowed to migrate freely. Although the scheduler is rather
+   reluctant to move threads between the nodes. We check for the current
+   node every allocation. And with a high probability a thread stays on
+   the same node for some time allowing local access to recently allocated
+   objects.
+ */
+
 HeapWord* MutableNUMASpace::allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
-  if (lgrp_id == -1) {
+  Thread* thr = Thread::current();
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
    lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
  }

  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@ -628,22 +667,22 @@ HeapWord* MutableNUMASpace::allocate(size_t size) {
      MutableSpace::set_top(s->top());
    }
  }
-  // Make the page allocation happen here.
-  if (p != NULL) {
+  // Make the page allocation happen here if there is no static binding..
+  if (p != NULL && !os::numa_has_static_binding()) {
    for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
      *(int*)i = 0;
    }
  }
-
  return p;
 }

 // This version is lock-free.
 HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
-  int lgrp_id = Thread::current()->lgrp_id();
-  if (lgrp_id == -1) {
+  Thread* thr = Thread::current();
+  int lgrp_id = thr->lgrp_id();
+  if (lgrp_id == -1 || !os::numa_has_group_homing()) {
    lgrp_id = os::numa_get_group_id();
-    Thread::current()->set_lgrp_id(lgrp_id);
+    thr->set_lgrp_id(lgrp_id);
  }

  int i = lgrp_spaces()->find(&lgrp_id, LGRPSpace::equals);
@ -670,8 +709,8 @@ HeapWord* MutableNUMASpace::cas_allocate(size_t size) {
    }
  }

-  // Make the page allocation happen here.
-  if (p != NULL) {
+  // Make the page allocation happen here if there is no static binding.
+  if (p != NULL && !os::numa_has_static_binding() ) {
    for (HeapWord *i = p; i < p + size; i += os::vm_page_size() >> LogHeapWordSize) {
      *(int*)i = 0;
    }
--- a/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/mutableNUMASpace.hpp
@ -139,8 +139,8 @@ class MutableNUMASpace : public MutableSpace {
  // Check if the NUMA topology has changed. Add and remove spaces if needed.
  // The update can be forced by setting the force parameter equal to true.
  bool update_layout(bool force);
-  // Bias region towards the first-touching lgrp.
-  void bias_region(MemRegion mr);
+  // Bias region towards the lgrp.
+  void bias_region(MemRegion mr, int lgrp_id);
  // Free pages in a given region.
  void free_region(MemRegion mr);
  // Get current chunk size.
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.cpp
@ -144,3 +144,18 @@ void VM_GenCollectFull::doit() {
  gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level);
  notify_gc_end();
 }
+
+void VM_GenCollectForPermanentAllocation::doit() {
+  JvmtiGCForAllocationMarker jgcm;
+  notify_gc_begin(true);
+  GenCollectedHeap* gch = GenCollectedHeap::heap();
+  GCCauseSetter gccs(gch, _gc_cause);
+  gch->do_full_collection(gch->must_clear_all_soft_refs(),
+                          gch->n_gens() - 1);
+  _res = gch->perm_gen()->allocate(_size, false);
+  assert(gch->is_in_reserved_or_null(_res), "result not in heap");
+  if (_res == NULL && GC_locker::is_active_and_needs_gc()) {
+    set_gc_locked();
+  }
+  notify_gc_end();
+}
--- a/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.hpp
+++ b/hotspot/src/share/vm/gc_implementation/shared/vmGCOperations.hpp
@ -43,6 +43,7 @@
 //     is specified; and also the attach "inspectheap" operation
 //
 //  VM_GenCollectForAllocation
+//  VM_GenCollectForPermanentAllocation
 //  VM_ParallelGCFailedAllocation
 //  VM_ParallelGCFailedPermanentAllocation
 //   - this operation is invoked when allocation is failed;
@ -166,3 +167,23 @@ class VM_GenCollectFull: public VM_GC_Operation {
  virtual VMOp_Type type() const { return VMOp_GenCollectFull; }
  virtual void doit();
 };
+
+class VM_GenCollectForPermanentAllocation: public VM_GC_Operation {
+ private:
+  HeapWord*   _res;
+  size_t      _size;                       // size of object to be allocated
+ public:
+  VM_GenCollectForPermanentAllocation(size_t size,
+                                      unsigned int gc_count_before,
+                                      unsigned int full_gc_count_before,
+                                      GCCause::Cause gc_cause)
+    : VM_GC_Operation(gc_count_before, full_gc_count_before, true),
+      _size(size) {
+    _res = NULL;
+    _gc_cause = gc_cause;
+  }
+  ~VM_GenCollectForPermanentAllocation()  {}
+  virtual VMOp_Type type() const { return VMOp_GenCollectForPermanentAllocation; }
+  virtual void doit();
+  HeapWord* result() const       { return _res; }
+};
--- a/hotspot/src/share/vm/includeDB_core
+++ b/hotspot/src/share/vm/includeDB_core
@ -718,6 +718,11 @@ ciObjArray.cpp                          ciNullObject.hpp
 ciObjArray.cpp                          ciUtilities.hpp
 ciObjArray.cpp                          objArrayOop.hpp

+ciObjArray.cpp                          ciObjArray.hpp
+ciObjArray.cpp                          ciNullObject.hpp
+ciObjArray.cpp                          ciUtilities.hpp
+ciObjArray.cpp                          objArrayOop.hpp
+
 ciObjArrayKlass.cpp                     ciInstanceKlass.hpp
 ciObjArrayKlass.cpp                     ciObjArrayKlass.hpp
 ciObjArrayKlass.cpp                     ciObjArrayKlassKlass.hpp
@ -1662,6 +1667,7 @@ frame_<arch>.inline.hpp                 generate_platform_dependent_include

 gcLocker.cpp                            gcLocker.inline.hpp
 gcLocker.cpp                            sharedHeap.hpp
+gcLocker.cpp                            resourceArea.hpp

 gcLocker.hpp                            collectedHeap.hpp
 gcLocker.hpp                            genCollectedHeap.hpp
@ -3094,13 +3100,14 @@ oopMap.cpp                              scopeDesc.hpp
 oopMap.cpp                              signature.hpp

 oopMap.hpp                              allocation.hpp
+oopMapCache.cpp                         jvmtiRedefineClassesTrace.hpp
 oopMap.hpp                              compressedStream.hpp
 oopMap.hpp                              growableArray.hpp
 oopMap.hpp                              vmreg.hpp

 oopMapCache.cpp                         allocation.inline.hpp
-oopMapCache.cpp                         handles.inline.hpp
 oopMapCache.cpp                         jvmtiRedefineClassesTrace.hpp
+oopMapCache.cpp                         handles.inline.hpp
 oopMapCache.cpp                         oop.inline.hpp
 oopMapCache.cpp                         oopMapCache.hpp
 oopMapCache.cpp                         resourceArea.hpp
@ -3207,6 +3214,7 @@ os_<os_family>.cpp                      events.hpp
 os_<os_family>.cpp                      extendedPC.hpp
 os_<os_family>.cpp                      filemap.hpp
 os_<os_family>.cpp                      globals.hpp
+os_<os_family>.cpp                      growableArray.hpp
 os_<os_family>.cpp                      hpi.hpp
 os_<os_family>.cpp                      icBuffer.hpp
 os_<os_family>.cpp                      interfaceSupport.hpp
@ -3348,6 +3356,10 @@ permGen.cpp                             java.hpp
 permGen.cpp                             oop.inline.hpp
 permGen.cpp                             permGen.hpp
 permGen.cpp                             universe.hpp
+permGen.cpp                             gcLocker.hpp
+permGen.cpp                             gcLocker.inline.hpp
+permGen.cpp                             vmGCOperations.hpp
+permGen.cpp                             vmThread.hpp

 permGen.hpp                             gcCause.hpp
 permGen.hpp                             generation.hpp
--- a/hotspot/src/share/vm/memory/gcLocker.cpp
+++ b/hotspot/src/share/vm/memory/gcLocker.cpp
@ -32,6 +32,12 @@ volatile bool GC_locker::_doing_gc       = false;

 void GC_locker::stall_until_clear() {
  assert(!JavaThread::current()->in_critical(), "Would deadlock");
+  if (PrintJNIGCStalls && PrintGCDetails) {
+    ResourceMark rm; // JavaThread::name() allocates to convert to UTF8
+    gclog_or_tty->print_cr(
+      "Allocation failed. Thread \"%s\" is stalled by JNI critical section.",
+      JavaThread::current()->name());
+  }
  MutexLocker   ml(JNICritical_lock);
  // Wait for _needs_gc  to be cleared
  while (GC_locker::needs_gc()) {
--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp
+++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp
@ -35,6 +35,7 @@ class GenCollectedHeap : public SharedHeap {
  friend class CMSCollector;
  friend class GenMarkSweep;
  friend class VM_GenCollectForAllocation;
+  friend class VM_GenCollectForPermanentAllocation;
  friend class VM_GenCollectFull;
  friend class VM_GenCollectFullConcurrent;
  friend class VM_GC_HeapInspection;
--- a/hotspot/src/share/vm/memory/permGen.cpp
+++ b/hotspot/src/share/vm/memory/permGen.cpp
@ -25,6 +25,70 @@
 #include "incls/_precompiled.incl"
 #include "incls/_permGen.cpp.incl"

+HeapWord* PermGen::mem_allocate_in_gen(size_t size, Generation* gen) {
+  MutexLocker ml(Heap_lock);
+  GCCause::Cause next_cause = GCCause::_permanent_generation_full;
+  GCCause::Cause prev_cause = GCCause::_no_gc;
+
+  for (;;) {
+    HeapWord* obj = gen->allocate(size, false);
+    if (obj != NULL) {
+      return obj;
+    }
+    if (gen->capacity() < _capacity_expansion_limit ||
+        prev_cause != GCCause::_no_gc) {
+      obj = gen->expand_and_allocate(size, false);
+    }
+    if (obj == NULL && prev_cause != GCCause::_last_ditch_collection) {
+      if (GC_locker::is_active_and_needs_gc()) {
+        // If this thread is not in a jni critical section, we stall
+        // the requestor until the critical section has cleared and
+        // GC allowed. When the critical section clears, a GC is
+        // initiated by the last thread exiting the critical section; so
+        // we retry the allocation sequence from the beginning of the loop,
+        // rather than causing more, now probably unnecessary, GC attempts.
+        JavaThread* jthr = JavaThread::current();
+        if (!jthr->in_critical()) {
+          MutexUnlocker mul(Heap_lock);
+          // Wait for JNI critical section to be exited
+          GC_locker::stall_until_clear();
+          continue;
+        } else {
+          if (CheckJNICalls) {
+            fatal("Possible deadlock due to allocating while"
+                  " in jni critical section");
+          }
+          return NULL;
+        }
+      }
+
+      // Read the GC count while holding the Heap_lock
+      unsigned int gc_count_before      = SharedHeap::heap()->total_collections();
+      unsigned int full_gc_count_before = SharedHeap::heap()->total_full_collections();
+      {
+        MutexUnlocker mu(Heap_lock);  // give up heap lock, execute gets it back
+        VM_GenCollectForPermanentAllocation op(size, gc_count_before, full_gc_count_before,
+                                               next_cause);
+        VMThread::execute(&op);
+        if (!op.prologue_succeeded() || op.gc_locked()) {
+          assert(op.result() == NULL, "must be NULL if gc_locked() is true");
+          continue;  // retry and/or stall as necessary
+        }
+        obj = op.result();
+        assert(obj == NULL || SharedHeap::heap()->is_in_reserved(obj),
+               "result not in heap");
+        if (obj != NULL) {
+          return obj;
+        }
+      }
+      prev_cause = next_cause;
+      next_cause = GCCause::_last_ditch_collection;
+    } else {
+      return obj;
+    }
+  }
+}
+
 CompactingPermGen::CompactingPermGen(ReservedSpace rs,
                                     ReservedSpace shared_rs,
                                     size_t initial_byte_size,
@ -44,40 +108,7 @@ CompactingPermGen::CompactingPermGen(ReservedSpace rs,
 }

 HeapWord* CompactingPermGen::mem_allocate(size_t size) {
-  MutexLocker ml(Heap_lock);
-  HeapWord* obj = _gen->allocate(size, false);
-  bool tried_collection = false;
-  bool tried_expansion = false;
-  while (obj == NULL) {
-    if (_gen->capacity() >= _capacity_expansion_limit || tried_expansion) {
-      // Expansion limit reached, try collection before expanding further
-      // For now we force a full collection, this could be changed
-      SharedHeap::heap()->collect_locked(GCCause::_permanent_generation_full);
-      obj = _gen->allocate(size, false);
-      tried_collection = true;
-      tried_expansion =  false;    // ... following the collection:
-                                   // the collection may have shrunk the space.
-    }
-    if (obj == NULL && !tried_expansion) {
-      obj = _gen->expand_and_allocate(size, false);
-      tried_expansion = true;
-    }
-    if (obj == NULL && tried_collection && tried_expansion) {
-      // We have not been able to allocate despite a collection and
-      // an attempted space expansion. We now make a last-ditch collection
-      // attempt that will try to reclaim as much space as possible (for
-      // example by aggressively clearing all soft refs).
-      SharedHeap::heap()->collect_locked(GCCause::_last_ditch_collection);
-      obj = _gen->allocate(size, false);
-      if (obj == NULL) {
-        // An expansion attempt is necessary since the previous
-        // collection may have shrunk the space.
-        obj = _gen->expand_and_allocate(size, false);
-      }
-      break;
-    }
-  }
-  return obj;
+  return mem_allocate_in_gen(size, _gen);
 }

 void CompactingPermGen::compute_new_size() {
--- a/hotspot/src/share/vm/memory/permGen.hpp
+++ b/hotspot/src/share/vm/memory/permGen.hpp
@ -38,6 +38,8 @@ class PermGen : public CHeapObj {
  size_t _capacity_expansion_limit;  // maximum expansion allowed without a
                                     // full gc occuring

+  HeapWord* mem_allocate_in_gen(size_t size, Generation* gen);
+
 public:
  enum Name {
    MarkSweepCompact, MarkSweep, ConcurrentMarkSweep
--- a/hotspot/src/share/vm/runtime/globals.hpp
+++ b/hotspot/src/share/vm/runtime/globals.hpp
@ -1928,6 +1928,10 @@ class CommandLineFlags {
  develop(bool, IgnoreLibthreadGPFault, false,                              \
          "Suppress workaround for libthread GP fault")                     \
                                                                            \
+  product(bool, PrintJNIGCStalls, false,                                    \
+          "Print diagnostic message when GC is stalled"                     \
+          "by JNI critical section")                                        \
+                                                                            \
  /* JVMTI heap profiling */                                                \
                                                                            \
  diagnostic(bool, TraceJVMTIObjectTagging, false,                          \
--- a/hotspot/src/share/vm/runtime/os.hpp
+++ b/hotspot/src/share/vm/runtime/os.hpp
@ -33,6 +33,7 @@ class JavaThread;
 class Event;
 class DLL;
 class FileHandle;
+template<class E> class GrowableArray;

 // %%%%% Moved ThreadState, START_FN, OSThread to new osThread.hpp. -- Rose

@ -206,7 +207,9 @@ class os: AllStatic {
  static void   realign_memory(char *addr, size_t bytes, size_t alignment_hint);

  // NUMA-specific interface
-  static void   numa_make_local(char *addr, size_t bytes);
+  static bool   numa_has_static_binding();
+  static bool   numa_has_group_homing();
+  static void   numa_make_local(char *addr, size_t bytes, int lgrp_hint);
  static void   numa_make_global(char *addr, size_t bytes);
  static size_t numa_get_groups_num();
  static size_t numa_get_leaf_groups(int *ids, size_t size);
--- a/hotspot/src/share/vm/runtime/vm_operations.hpp
+++ b/hotspot/src/share/vm/runtime/vm_operations.hpp
@ -49,6 +49,7 @@
  template(GenCollectFull)                        \
  template(GenCollectFullConcurrent)              \
  template(GenCollectForAllocation)               \
+  template(GenCollectForPermanentAllocation)      \
  template(ParallelGCFailedAllocation)            \
  template(ParallelGCFailedPermanentAllocation)   \
  template(ParallelGCSystemGC)                    \