From cfb08c72ba7f1fc038e01334e8a57c670aef0ef7 Mon Sep 17 00:00:00 2001 From: John R Rose Date: Fri, 20 Mar 2009 23:19:36 -0700 Subject: [PATCH 01/15] 6814659: separable cleanups and subroutines for 6655638 Preparatory but separable changes for method handles Reviewed-by: kvn, never --- hotspot/src/cpu/sparc/vm/assembler_sparc.cpp | 34 +++---- hotspot/src/cpu/sparc/vm/assembler_sparc.hpp | 60 ++++++------ .../cpu/sparc/vm/assembler_sparc.inline.hpp | 42 ++++---- .../cpu/sparc/vm/c1_LIRAssembler_sparc.cpp | 8 +- hotspot/src/cpu/x86/vm/assembler_x86.cpp | 16 +-- hotspot/src/cpu/x86/vm/assembler_x86.hpp | 14 +-- hotspot/src/share/vm/asm/assembler.hpp | 20 ++-- .../src/share/vm/classfile/javaClasses.cpp | 98 ++++++++++++++++--- .../src/share/vm/classfile/javaClasses.hpp | 6 ++ .../share/vm/classfile/loaderConstraints.hpp | 8 +- .../src/share/vm/classfile/symbolTable.cpp | 45 +++++++-- .../src/share/vm/classfile/symbolTable.hpp | 15 ++- .../share/vm/classfile/systemDictionary.cpp | 54 +++++++++- .../share/vm/classfile/systemDictionary.hpp | 16 +++ .../parallelScavenge/parallelScavengeHeap.cpp | 3 +- hotspot/src/share/vm/oops/instanceKlass.cpp | 96 ++++++++++++++++-- hotspot/src/share/vm/oops/instanceKlass.hpp | 18 +++- .../src/share/vm/oops/instanceKlassKlass.cpp | 74 +++++++------- hotspot/src/share/vm/oops/klassVtable.cpp | 23 ++++- hotspot/src/share/vm/oops/klassVtable.hpp | 4 +- hotspot/src/share/vm/oops/methodKlass.cpp | 15 ++- hotspot/src/share/vm/oops/objArrayKlass.cpp | 19 +++- hotspot/src/share/vm/oops/oop.cpp | 8 +- hotspot/src/share/vm/prims/jvm.cpp | 59 ++++++++--- .../src/share/vm/runtime/fieldDescriptor.cpp | 14 +-- hotspot/src/share/vm/runtime/handles.hpp | 10 +- hotspot/src/share/vm/runtime/reflection.cpp | 21 +++- hotspot/src/share/vm/runtime/reflection.hpp | 10 +- .../src/share/vm/runtime/sharedRuntime.cpp | 42 -------- .../src/share/vm/runtime/sharedRuntime.hpp | 5 +- 30 files changed, 598 insertions(+), 259 deletions(-) diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp index 4a61d2f2c5d..f95b12a3a12 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2615,12 +2615,12 @@ void MacroAssembler::cas_under_lock(Register top_ptr_reg, Register top_reg, Regi } } -RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr, - Register tmp, - int offset) { +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { intptr_t value = *delayed_value_addr; if (value != 0) - return RegisterConstant(value + offset); + return RegisterOrConstant(value + offset); // load indirectly to solve generation ordering problem Address a(tmp, (address) delayed_value_addr); @@ -2634,11 +2634,11 @@ RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr, if (offset != 0) add(tmp, offset, tmp); - return RegisterConstant(tmp); + return RegisterOrConstant(tmp); } -void MacroAssembler::regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) { +void MacroAssembler::regcon_inc_ptr( RegisterOrConstant& dest, RegisterOrConstant src, Register temp ) { assert(dest.register_or_noreg() != G0, "lost side effect"); if ((src.is_constant() && src.as_constant() == 0) || (src.is_register() && src.as_register() == G0)) { @@ -2647,15 +2647,15 @@ void MacroAssembler::regcon_inc_ptr( RegisterConstant& dest, RegisterConstant sr add(dest.as_register(), ensure_rs2(src, temp), dest.as_register()); } else if (src.is_constant()) { intptr_t res = dest.as_constant() + src.as_constant(); - dest = RegisterConstant(res); // side effect seen by caller + dest = RegisterOrConstant(res); // side effect seen by caller } else { assert(temp != noreg, "cannot handle constant += register"); add(src.as_register(), ensure_rs2(dest, temp), temp); - dest = RegisterConstant(temp); // side effect seen by caller + dest = RegisterOrConstant(temp); // side effect seen by caller } } -void MacroAssembler::regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src, Register temp ) { +void MacroAssembler::regcon_sll_ptr( RegisterOrConstant& dest, RegisterOrConstant src, Register temp ) { assert(dest.register_or_noreg() != G0, "lost side effect"); if (!is_simm13(src.constant_or_zero())) src = (src.as_constant() & 0xFF); @@ -2666,12 +2666,12 @@ void MacroAssembler::regcon_sll_ptr( RegisterConstant& dest, RegisterConstant sr sll_ptr(dest.as_register(), src, dest.as_register()); } else if (src.is_constant()) { intptr_t res = dest.as_constant() << src.as_constant(); - dest = RegisterConstant(res); // side effect seen by caller + dest = RegisterOrConstant(res); // side effect seen by caller } else { assert(temp != noreg, "cannot handle constant <<= register"); set(dest.as_constant(), temp); sll_ptr(temp, src, temp); - dest = RegisterConstant(temp); // side effect seen by caller + dest = RegisterOrConstant(temp); // side effect seen by caller } } @@ -2683,7 +2683,7 @@ void MacroAssembler::regcon_sll_ptr( RegisterConstant& dest, RegisterConstant sr // On failure, execution transfers to the given label. void MacroAssembler::lookup_interface_method(Register recv_klass, Register intf_klass, - RegisterConstant itable_index, + RegisterOrConstant itable_index, Register method_result, Register scan_temp, Register sethi_temp, @@ -2720,7 +2720,7 @@ void MacroAssembler::lookup_interface_method(Register recv_klass, add(recv_klass, scan_temp, scan_temp); // Adjust recv_klass by scaled itable_index, so we can free itable_index. - RegisterConstant itable_offset = itable_index; + RegisterOrConstant itable_offset = itable_index; regcon_sll_ptr(itable_offset, exact_log2(itableMethodEntry::size() * wordSize)); regcon_inc_ptr(itable_offset, itableMethodEntry::method_offset_in_bytes()); add(recv_klass, ensure_rs2(itable_offset, sethi_temp), recv_klass); @@ -2805,7 +2805,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterConstant super_check_offset, + RegisterOrConstant super_check_offset, Register instanceof_hack) { int sc_offset = (klassOopDesc::header_size() * HeapWordSize + Klass::secondary_super_cache_offset_in_bytes()); @@ -2867,7 +2867,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, if (must_load_sco) { // The super check offset is always positive... lduw(super_klass, sco_offset, temp2_reg); - super_check_offset = RegisterConstant(temp2_reg); + super_check_offset = RegisterOrConstant(temp2_reg); } ld_ptr(sub_klass, super_check_offset, temp_reg); cmp(super_klass, temp_reg); @@ -4472,7 +4472,7 @@ void MacroAssembler::card_write_barrier_post(Register store_addr, Register new_v } // Loading values by size and signed-ness -void MacroAssembler::load_sized_value(Register s1, RegisterConstant s2, Register d, +void MacroAssembler::load_sized_value(Register s1, RegisterOrConstant s2, Register d, int size_in_bytes, bool is_signed) { switch (size_in_bytes ^ (is_signed ? -1 : 0)) { case ~8: // fall through: diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp index fc05cef68a4..5c756c4b6b1 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1088,8 +1088,8 @@ public: inline void add( Register s1, Register s2, Register d ); inline void add( Register s1, int simm13a, Register d, relocInfo::relocType rtype = relocInfo::none); inline void add( Register s1, int simm13a, Register d, RelocationHolder const& rspec); - inline void add( Register s1, RegisterConstant s2, Register d, int offset = 0); - inline void add( const Address& a, Register d, int offset = 0); + inline void add( Register s1, RegisterOrConstant s2, Register d, int offset = 0); + inline void add( const Address& a, Register d, int offset = 0); void addcc( Register s1, Register s2, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | rs2(s2) ); } void addcc( Register s1, int simm13a, Register d ) { emit_long( op(arith_op) | rd(d) | op3(add_op3 | cc_bit_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); } @@ -1305,15 +1305,15 @@ public: inline void ld( const Address& a, Register d, int offset = 0 ); inline void ldd( const Address& a, Register d, int offset = 0 ); - inline void ldub( Register s1, RegisterConstant s2, Register d ); - inline void ldsb( Register s1, RegisterConstant s2, Register d ); - inline void lduh( Register s1, RegisterConstant s2, Register d ); - inline void ldsh( Register s1, RegisterConstant s2, Register d ); - inline void lduw( Register s1, RegisterConstant s2, Register d ); - inline void ldsw( Register s1, RegisterConstant s2, Register d ); - inline void ldx( Register s1, RegisterConstant s2, Register d ); - inline void ld( Register s1, RegisterConstant s2, Register d ); - inline void ldd( Register s1, RegisterConstant s2, Register d ); + inline void ldub( Register s1, RegisterOrConstant s2, Register d ); + inline void ldsb( Register s1, RegisterOrConstant s2, Register d ); + inline void lduh( Register s1, RegisterOrConstant s2, Register d ); + inline void ldsh( Register s1, RegisterOrConstant s2, Register d ); + inline void lduw( Register s1, RegisterOrConstant s2, Register d ); + inline void ldsw( Register s1, RegisterOrConstant s2, Register d ); + inline void ldx( Register s1, RegisterOrConstant s2, Register d ); + inline void ld( Register s1, RegisterOrConstant s2, Register d ); + inline void ldd( Register s1, RegisterOrConstant s2, Register d ); // pp 177 @@ -1535,12 +1535,12 @@ public: inline void st( Register d, const Address& a, int offset = 0 ); inline void std( Register d, const Address& a, int offset = 0 ); - inline void stb( Register d, Register s1, RegisterConstant s2 ); - inline void sth( Register d, Register s1, RegisterConstant s2 ); - inline void stw( Register d, Register s1, RegisterConstant s2 ); - inline void stx( Register d, Register s1, RegisterConstant s2 ); - inline void std( Register d, Register s1, RegisterConstant s2 ); - inline void st( Register d, Register s1, RegisterConstant s2 ); + inline void stb( Register d, Register s1, RegisterOrConstant s2 ); + inline void sth( Register d, Register s1, RegisterOrConstant s2 ); + inline void stw( Register d, Register s1, RegisterOrConstant s2 ); + inline void stx( Register d, Register s1, RegisterOrConstant s2 ); + inline void std( Register d, Register s1, RegisterOrConstant s2 ); + inline void st( Register d, Register s1, RegisterOrConstant s2 ); // pp 177 @@ -1859,7 +1859,7 @@ class MacroAssembler: public Assembler { // Functions for isolating 64 bit shifts for LP64 inline void sll_ptr( Register s1, Register s2, Register d ); inline void sll_ptr( Register s1, int imm6a, Register d ); - inline void sll_ptr( Register s1, RegisterConstant s2, Register d ); + inline void sll_ptr( Register s1, RegisterOrConstant s2, Register d ); inline void srl_ptr( Register s1, Register s2, Register d ); inline void srl_ptr( Register s1, int imm6a, Register d ); @@ -1965,26 +1965,26 @@ class MacroAssembler: public Assembler { // st_ptr will perform st for 32 bit VM's and stx for 64 bit VM's inline void ld_ptr( Register s1, Register s2, Register d ); inline void ld_ptr( Register s1, int simm13a, Register d); - inline void ld_ptr( Register s1, RegisterConstant s2, Register d ); + inline void ld_ptr( Register s1, RegisterOrConstant s2, Register d ); inline void ld_ptr( const Address& a, Register d, int offset = 0 ); inline void st_ptr( Register d, Register s1, Register s2 ); inline void st_ptr( Register d, Register s1, int simm13a); - inline void st_ptr( Register d, Register s1, RegisterConstant s2 ); + inline void st_ptr( Register d, Register s1, RegisterOrConstant s2 ); inline void st_ptr( Register d, const Address& a, int offset = 0 ); // ld_long will perform ld for 32 bit VM's and ldx for 64 bit VM's // st_long will perform st for 32 bit VM's and stx for 64 bit VM's inline void ld_long( Register s1, Register s2, Register d ); inline void ld_long( Register s1, int simm13a, Register d ); - inline void ld_long( Register s1, RegisterConstant s2, Register d ); + inline void ld_long( Register s1, RegisterOrConstant s2, Register d ); inline void ld_long( const Address& a, Register d, int offset = 0 ); inline void st_long( Register d, Register s1, Register s2 ); inline void st_long( Register d, Register s1, int simm13a ); - inline void st_long( Register d, Register s1, RegisterConstant s2 ); + inline void st_long( Register d, Register s1, RegisterOrConstant s2 ); inline void st_long( Register d, const Address& a, int offset = 0 ); // Loading values by size and signed-ness - void load_sized_value(Register s1, RegisterConstant s2, Register d, + void load_sized_value(Register s1, RegisterOrConstant s2, Register d, int size_in_bytes, bool is_signed); // Helpers for address formation. @@ -1994,11 +1994,11 @@ class MacroAssembler: public Assembler { // is required, and becomes the result. // If dest is a register and src is a non-simm13 constant, // the temp argument is required, and is used to materialize the constant. - void regcon_inc_ptr( RegisterConstant& dest, RegisterConstant src, + void regcon_inc_ptr( RegisterOrConstant& dest, RegisterOrConstant src, Register temp = noreg ); - void regcon_sll_ptr( RegisterConstant& dest, RegisterConstant src, + void regcon_sll_ptr( RegisterOrConstant& dest, RegisterOrConstant src, Register temp = noreg ); - RegisterConstant ensure_rs2(RegisterConstant rs2, Register sethi_temp) { + RegisterOrConstant ensure_rs2(RegisterOrConstant rs2, Register sethi_temp) { guarantee(sethi_temp != noreg, "constant offset overflow"); if (is_simm13(rs2.constant_or_zero())) return rs2; // register or short constant @@ -2322,7 +2322,7 @@ class MacroAssembler: public Assembler { // interface method calling void lookup_interface_method(Register recv_klass, Register intf_klass, - RegisterConstant itable_index, + RegisterOrConstant itable_index, Register method_result, Register temp_reg, Register temp2_reg, Label& no_such_interface); @@ -2341,7 +2341,7 @@ class MacroAssembler: public Assembler { Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterConstant super_check_offset = RegisterConstant(-1), + RegisterOrConstant super_check_offset = RegisterOrConstant(-1), Register instanceof_hack = noreg); // The rest of the type check; must be wired to a corresponding fast path. @@ -2381,7 +2381,7 @@ class MacroAssembler: public Assembler { // stack overflow + shadow pages. Clobbers tsp and scratch registers. void bang_stack_size(Register Rsize, Register Rtsp, Register Rscratch); - virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset); + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset); void verify_tlab(); diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp index d31ab55f3f4..d9053f7f6b7 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -143,45 +143,45 @@ inline void Assembler::ld( Register s1, Register s2, Register d) { lduw( s1, s2 inline void Assembler::ld( Register s1, int simm13a, Register d) { lduw( s1, simm13a, d); } #endif -inline void Assembler::ldub( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::ldub( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldsb(s1, s2.as_register(), d); else ldsb(s1, s2.as_constant(), d); } -inline void Assembler::ldsb( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::ldsb( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldsb(s1, s2.as_register(), d); else ldsb(s1, s2.as_constant(), d); } -inline void Assembler::lduh( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::lduh( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldsh(s1, s2.as_register(), d); else ldsh(s1, s2.as_constant(), d); } -inline void Assembler::ldsh( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::ldsh( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldsh(s1, s2.as_register(), d); else ldsh(s1, s2.as_constant(), d); } -inline void Assembler::lduw( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::lduw( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldsw(s1, s2.as_register(), d); else ldsw(s1, s2.as_constant(), d); } -inline void Assembler::ldsw( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::ldsw( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldsw(s1, s2.as_register(), d); else ldsw(s1, s2.as_constant(), d); } -inline void Assembler::ldx( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::ldx( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldx(s1, s2.as_register(), d); else ldx(s1, s2.as_constant(), d); } -inline void Assembler::ld( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::ld( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ld(s1, s2.as_register(), d); else ld(s1, s2.as_constant(), d); } -inline void Assembler::ldd( Register s1, RegisterConstant s2, Register d) { +inline void Assembler::ldd( Register s1, RegisterOrConstant s2, Register d) { if (s2.is_register()) ldd(s1, s2.as_register(), d); else ldd(s1, s2.as_constant(), d); } // form effective addresses this way: -inline void Assembler::add( Register s1, RegisterConstant s2, Register d, int offset) { +inline void Assembler::add( Register s1, RegisterOrConstant s2, Register d, int offset) { if (s2.is_register()) add(s1, s2.as_register(), d); else { add(s1, s2.as_constant() + offset, d); offset = 0; } if (offset != 0) add(d, offset, d); @@ -243,23 +243,23 @@ inline void Assembler::std( Register d, Register s1, int simm13a) { v9_dep(); a inline void Assembler::st( Register d, Register s1, Register s2) { stw(d, s1, s2); } inline void Assembler::st( Register d, Register s1, int simm13a) { stw(d, s1, simm13a); } -inline void Assembler::stb( Register d, Register s1, RegisterConstant s2) { +inline void Assembler::stb( Register d, Register s1, RegisterOrConstant s2) { if (s2.is_register()) stb(d, s1, s2.as_register()); else stb(d, s1, s2.as_constant()); } -inline void Assembler::sth( Register d, Register s1, RegisterConstant s2) { +inline void Assembler::sth( Register d, Register s1, RegisterOrConstant s2) { if (s2.is_register()) sth(d, s1, s2.as_register()); else sth(d, s1, s2.as_constant()); } -inline void Assembler::stx( Register d, Register s1, RegisterConstant s2) { +inline void Assembler::stx( Register d, Register s1, RegisterOrConstant s2) { if (s2.is_register()) stx(d, s1, s2.as_register()); else stx(d, s1, s2.as_constant()); } -inline void Assembler::std( Register d, Register s1, RegisterConstant s2) { +inline void Assembler::std( Register d, Register s1, RegisterOrConstant s2) { if (s2.is_register()) std(d, s1, s2.as_register()); else std(d, s1, s2.as_constant()); } -inline void Assembler::st( Register d, Register s1, RegisterConstant s2) { +inline void Assembler::st( Register d, Register s1, RegisterOrConstant s2) { if (s2.is_register()) st(d, s1, s2.as_register()); else st(d, s1, s2.as_constant()); } @@ -308,7 +308,7 @@ inline void MacroAssembler::ld_ptr( Register s1, int simm13a, Register d ) { #endif } -inline void MacroAssembler::ld_ptr( Register s1, RegisterConstant s2, Register d ) { +inline void MacroAssembler::ld_ptr( Register s1, RegisterOrConstant s2, Register d ) { #ifdef _LP64 Assembler::ldx( s1, s2, d); #else @@ -340,7 +340,7 @@ inline void MacroAssembler::st_ptr( Register d, Register s1, int simm13a ) { #endif } -inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterConstant s2 ) { +inline void MacroAssembler::st_ptr( Register d, Register s1, RegisterOrConstant s2 ) { #ifdef _LP64 Assembler::stx( d, s1, s2); #else @@ -373,7 +373,7 @@ inline void MacroAssembler::ld_long( Register s1, int simm13a, Register d ) { #endif } -inline void MacroAssembler::ld_long( Register s1, RegisterConstant s2, Register d ) { +inline void MacroAssembler::ld_long( Register s1, RegisterOrConstant s2, Register d ) { #ifdef _LP64 Assembler::ldx(s1, s2, d); #else @@ -405,7 +405,7 @@ inline void MacroAssembler::st_long( Register d, Register s1, int simm13a ) { #endif } -inline void MacroAssembler::st_long( Register d, Register s1, RegisterConstant s2 ) { +inline void MacroAssembler::st_long( Register d, Register s1, RegisterOrConstant s2 ) { #ifdef _LP64 Assembler::stx(d, s1, s2); #else @@ -455,7 +455,7 @@ inline void MacroAssembler::srl_ptr( Register s1, int imm6a, Register d ) { #endif } -inline void MacroAssembler::sll_ptr( Register s1, RegisterConstant s2, Register d ) { +inline void MacroAssembler::sll_ptr( Register s1, RegisterOrConstant s2, Register d ) { if (s2.is_register()) sll_ptr(s1, s2.as_register(), d); else sll_ptr(s1, s2.as_constant(), d); } diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp index 389acd2ee26..1e8c190c0bf 100644 --- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2000-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2000-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -2489,7 +2489,7 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, noreg, (need_slow_path ? &done : NULL), stub->entry(), NULL, - RegisterConstant(k->super_check_offset())); + RegisterOrConstant(k->super_check_offset())); } else { // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, O7, @@ -2550,14 +2550,14 @@ void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) { __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, noreg, (need_slow_path ? &done : NULL), (need_slow_path ? &done : NULL), NULL, - RegisterConstant(k->super_check_offset()), + RegisterOrConstant(k->super_check_offset()), dst); } else { assert(dst != klass_RInfo && dst != k_RInfo, "need 3 registers"); // perform the fast part of the checking logic __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, O7, dst, &done, &done, NULL, - RegisterConstant(-1), + RegisterOrConstant(-1), dst); } if (need_slow_path) { diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index b043c9d3506..351ae044728 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -7218,7 +7218,7 @@ void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) { // On failure, execution transfers to the given label. void MacroAssembler::lookup_interface_method(Register recv_klass, Register intf_klass, - RegisterConstant itable_index, + RegisterOrConstant itable_index, Register method_result, Register scan_temp, Label& L_no_such_interface) { @@ -7303,7 +7303,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterConstant super_check_offset) { + RegisterOrConstant super_check_offset) { assert_different_registers(sub_klass, super_klass, temp_reg); bool must_load_sco = (super_check_offset.constant_or_zero() == -1); if (super_check_offset.is_register()) { @@ -7352,7 +7352,7 @@ void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, if (must_load_sco) { // Positive movl does right thing on LP64. movl(temp_reg, super_check_offset_addr); - super_check_offset = RegisterConstant(temp_reg); + super_check_offset = RegisterOrConstant(temp_reg); } Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0); cmpptr(super_klass, super_check_addr); // load displayed supertype @@ -7550,12 +7550,12 @@ void MacroAssembler::verify_oop(Register reg, const char* s) { } -RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr, - Register tmp, - int offset) { +RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset) { intptr_t value = *delayed_value_addr; if (value != 0) - return RegisterConstant(value + offset); + return RegisterOrConstant(value + offset); // load indirectly to solve generation ordering problem movptr(tmp, ExternalAddress((address) delayed_value_addr)); @@ -7571,7 +7571,7 @@ RegisterConstant MacroAssembler::delayed_value(intptr_t* delayed_value_addr, if (offset != 0) addptr(tmp, offset); - return RegisterConstant(tmp); + return RegisterOrConstant(tmp); } diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index 9b54b800ba1..4dfe7fec22e 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -212,7 +212,7 @@ class Address VALUE_OBJ_CLASS_SPEC { "inconsistent address"); } - Address(Register base, RegisterConstant index, ScaleFactor scale = times_1, int disp = 0) + Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0) : _base (base), _index(index.register_or_noreg()), _scale(scale), @@ -256,7 +256,7 @@ class Address VALUE_OBJ_CLASS_SPEC { "inconsistent address"); } - Address(Register base, RegisterConstant index, ScaleFactor scale, ByteSize disp) + Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp) : _base (base), _index(index.register_or_noreg()), _scale(scale), @@ -1802,7 +1802,7 @@ class MacroAssembler: public Assembler { // interface method calling void lookup_interface_method(Register recv_klass, Register intf_klass, - RegisterConstant itable_index, + RegisterOrConstant itable_index, Register method_result, Register scan_temp, Label& no_such_interface); @@ -1819,7 +1819,7 @@ class MacroAssembler: public Assembler { Label* L_success, Label* L_failure, Label* L_slow_path, - RegisterConstant super_check_offset = RegisterConstant(-1)); + RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); // The rest of the type check; must be wired to a corresponding fast path. // It does not repeat the fast path logic, so don't use it standalone. @@ -1883,9 +1883,9 @@ class MacroAssembler: public Assembler { // stack overflow + shadow pages. Also, clobbers tmp void bang_stack_size(Register size, Register tmp); - virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, - Register tmp, - int offset); + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, + Register tmp, + int offset); // Support for serializing memory accesses between threads void serialize_memory(Register thread, Register tmp); diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp index 13a4c6dfad6..8027e4e1322 100644 --- a/hotspot/src/share/vm/asm/assembler.hpp +++ b/hotspot/src/share/vm/asm/assembler.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -143,15 +143,15 @@ class Label VALUE_OBJ_CLASS_SPEC { // A union type for code which has to assemble both constant and // non-constant operands, when the distinction cannot be made // statically. -class RegisterConstant VALUE_OBJ_CLASS_SPEC { +class RegisterOrConstant VALUE_OBJ_CLASS_SPEC { private: Register _r; intptr_t _c; public: - RegisterConstant(): _r(noreg), _c(0) {} - RegisterConstant(Register r): _r(r), _c(0) {} - RegisterConstant(intptr_t c): _r(noreg), _c(c) {} + RegisterOrConstant(): _r(noreg), _c(0) {} + RegisterOrConstant(Register r): _r(r), _c(0) {} + RegisterOrConstant(intptr_t c): _r(noreg), _c(c) {} Register as_register() const { assert(is_register(),""); return _r; } intptr_t as_constant() const { assert(is_constant(),""); return _c; } @@ -310,13 +310,13 @@ class AbstractAssembler : public ResourceObj { // offsets in code which must be generated before the object class is loaded. // Field offsets are never zero, since an object's header (mark word) // is located at offset zero. - RegisterConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0) { - return delayed_value(delayed_value_addr(value_fn), tmp, offset); + RegisterOrConstant delayed_value(int(*value_fn)(), Register tmp, int offset = 0) { + return delayed_value_impl(delayed_value_addr(value_fn), tmp, offset); } - RegisterConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0) { - return delayed_value(delayed_value_addr(value_fn), tmp, offset); + RegisterOrConstant delayed_value(address(*value_fn)(), Register tmp, int offset = 0) { + return delayed_value_impl(delayed_value_addr(value_fn), tmp, offset); } - virtual RegisterConstant delayed_value(intptr_t* delayed_value_addr, Register tmp, int offset) = 0; + virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, Register tmp, int offset) = 0; // Last overloading is platform-dependent; look in assembler_.cpp. static intptr_t* delayed_value_addr(int(*constant_fn)()); static intptr_t* delayed_value_addr(address(*constant_fn)()); diff --git a/hotspot/src/share/vm/classfile/javaClasses.cpp b/hotspot/src/share/vm/classfile/javaClasses.cpp index 4b8b9892b6c..cb6b41f3bb9 100644 --- a/hotspot/src/share/vm/classfile/javaClasses.cpp +++ b/hotspot/src/share/vm/classfile/javaClasses.cpp @@ -239,22 +239,20 @@ symbolHandle java_lang_String::as_symbol(Handle java_string, TRAPS) { typeArrayOop value = java_lang_String::value(obj); int offset = java_lang_String::offset(obj); int length = java_lang_String::length(obj); - - ResourceMark rm(THREAD); - symbolHandle result; - - if (length > 0) { - int utf8_length = UNICODE::utf8_length(value->char_at_addr(offset), length); - char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1); - UNICODE::convert_to_utf8(value->char_at_addr(offset), length, chars); - // Allocate the symbol - result = oopFactory::new_symbol_handle(chars, utf8_length, CHECK_(symbolHandle())); - } else { - result = oopFactory::new_symbol_handle("", 0, CHECK_(symbolHandle())); - } - return result; + jchar* base = value->char_at_addr(offset); + symbolOop sym = SymbolTable::lookup_unicode(base, length, THREAD); + return symbolHandle(THREAD, sym); } +symbolOop java_lang_String::as_symbol_or_null(oop java_string) { + typeArrayOop value = java_lang_String::value(java_string); + int offset = java_lang_String::offset(java_string); + int length = java_lang_String::length(java_string); + jchar* base = value->char_at_addr(offset); + return SymbolTable::probe_unicode(base, length); +} + + int java_lang_String::utf8_length(oop java_string) { typeArrayOop value = java_lang_String::value(java_string); int offset = java_lang_String::offset(java_string); @@ -385,6 +383,48 @@ klassOop java_lang_Class::as_klassOop(oop java_class) { } +void java_lang_Class::print_signature(oop java_class, outputStream* st) { + assert(java_lang_Class::is_instance(java_class), "must be a Class object"); + symbolOop name = NULL; + bool is_instance = false; + if (is_primitive(java_class)) { + name = vmSymbols::type_signature(primitive_type(java_class)); + } else { + klassOop k = as_klassOop(java_class); + is_instance = Klass::cast(k)->oop_is_instance(); + name = Klass::cast(k)->name(); + } + if (name == NULL) { + st->print(""); + return; + } + if (is_instance) st->print("L"); + st->write((char*) name->base(), (int) name->utf8_length()); + if (is_instance) st->print(";"); +} + +symbolOop java_lang_Class::as_signature(oop java_class, bool intern_if_not_found, TRAPS) { + assert(java_lang_Class::is_instance(java_class), "must be a Class object"); + symbolOop name = NULL; + if (is_primitive(java_class)) { + return vmSymbols::type_signature(primitive_type(java_class)); + } else { + klassOop k = as_klassOop(java_class); + if (!Klass::cast(k)->oop_is_instance()) { + return Klass::cast(k)->name(); + } else { + ResourceMark rm; + const char* sigstr = Klass::cast(k)->signature_name(); + int siglen = (int) strlen(sigstr); + if (!intern_if_not_found) + return SymbolTable::probe(sigstr, siglen); + else + return oopFactory::new_symbol(sigstr, siglen, THREAD); + } + } +} + + klassOop java_lang_Class::array_klass(oop java_class) { klassOop k = klassOop(java_class->obj_field(array_klass_offset)); assert(k == NULL || k->is_klass() && Klass::cast(k)->oop_is_javaArray(), "should be array klass"); @@ -412,6 +452,8 @@ void java_lang_Class::set_resolved_constructor(oop java_class, methodOop constru bool java_lang_Class::is_primitive(oop java_class) { + // should assert: + //assert(java_lang_Class::is_instance(java_class), "must be a Class object"); klassOop k = klassOop(java_class->obj_field(klass_offset)); return k == NULL; } @@ -431,6 +473,19 @@ BasicType java_lang_Class::primitive_type(oop java_class) { return type; } +BasicType java_lang_Class::as_BasicType(oop java_class, klassOop* reference_klass) { + assert(java_lang_Class::is_instance(java_class), "must be a Class object"); + if (is_primitive(java_class)) { + if (reference_klass != NULL) + (*reference_klass) = NULL; + return primitive_type(java_class); + } else { + if (reference_klass != NULL) + (*reference_klass) = as_klassOop(java_class); + return T_OBJECT; + } +} + oop java_lang_Class::primitive_mirror(BasicType t) { oop mirror = Universe::java_mirror(t); @@ -1988,6 +2043,21 @@ BasicType java_lang_boxing_object::set_value(oop box, jvalue* value) { } +void java_lang_boxing_object::print(BasicType type, jvalue* value, outputStream* st) { + switch (type) { + case T_BOOLEAN: st->print("%s", value->z ? "true" : "false"); break; + case T_CHAR: st->print("%d", value->c); break; + case T_BYTE: st->print("%d", value->b); break; + case T_SHORT: st->print("%d", value->s); break; + case T_INT: st->print("%d", value->i); break; + case T_LONG: st->print(INT64_FORMAT, value->j); break; + case T_FLOAT: st->print("%f", value->f); break; + case T_DOUBLE: st->print("%lf", value->d); break; + default: st->print("type %d?", type); break; + } +} + + // Support for java_lang_ref_Reference oop java_lang_ref_Reference::pending_list_lock() { instanceKlass* ik = instanceKlass::cast(SystemDictionary::reference_klass()); diff --git a/hotspot/src/share/vm/classfile/javaClasses.hpp b/hotspot/src/share/vm/classfile/javaClasses.hpp index 64b0d4768f9..3ae5b5337f8 100644 --- a/hotspot/src/share/vm/classfile/javaClasses.hpp +++ b/hotspot/src/share/vm/classfile/javaClasses.hpp @@ -107,6 +107,7 @@ class java_lang_String : AllStatic { // Conversion static symbolHandle as_symbol(Handle java_string, TRAPS); + static symbolOop as_symbol_or_null(oop java_string); // Testers static bool is_instance(oop obj) { @@ -149,6 +150,9 @@ class java_lang_Class : AllStatic { static oop create_basic_type_mirror(const char* basic_type_name, BasicType type, TRAPS); // Conversion static klassOop as_klassOop(oop java_class); + static BasicType as_BasicType(oop java_class, klassOop* reference_klass = NULL); + static symbolOop as_signature(oop java_class, bool intern_if_not_found, TRAPS); + static void print_signature(oop java_class, outputStream *st); // Testing static bool is_instance(oop obj) { return obj != NULL && obj->klass() == SystemDictionary::class_klass(); @@ -668,6 +672,8 @@ class java_lang_boxing_object: AllStatic { static BasicType basic_type(oop box); static bool is_instance(oop box) { return basic_type(box) != T_ILLEGAL; } static bool is_instance(oop box, BasicType type) { return basic_type(box) == type; } + static void print(oop box, outputStream* st) { jvalue value; print(get_value(box, &value), &value, st); } + static void print(BasicType type, jvalue* value, outputStream* st); static int value_offset_in_bytes(BasicType type) { return ( type == T_LONG || type == T_DOUBLE ) ? long_value_offset : diff --git a/hotspot/src/share/vm/classfile/loaderConstraints.hpp b/hotspot/src/share/vm/classfile/loaderConstraints.hpp index 9d1a6880a89..6928180d22a 100644 --- a/hotspot/src/share/vm/classfile/loaderConstraints.hpp +++ b/hotspot/src/share/vm/classfile/loaderConstraints.hpp @@ -1,5 +1,5 @@ /* - * Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2003-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -60,8 +60,10 @@ public: bool add_entry(symbolHandle name, klassOop klass1, Handle loader1, klassOop klass2, Handle loader2); - void check_signature_loaders(symbolHandle signature, Handle loader1, - Handle loader2, bool is_method, TRAPS); + // Note: The main entry point for this module is via SystemDictionary. + // SystemDictionary::check_signature_loaders(symbolHandle signature, + // Handle loader1, Handle loader2, + // bool is_method, TRAPS) klassOop find_constrained_klass(symbolHandle name, Handle loader); klassOop find_constrained_elem_klass(symbolHandle name, symbolHandle elem_name, diff --git a/hotspot/src/share/vm/classfile/symbolTable.cpp b/hotspot/src/share/vm/classfile/symbolTable.cpp index b77db6bd952..076e29e3009 100644 --- a/hotspot/src/share/vm/classfile/symbolTable.cpp +++ b/hotspot/src/share/vm/classfile/symbolTable.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -109,6 +109,40 @@ symbolOop SymbolTable::lookup_only(const char* name, int len, return the_table()->lookup(index, name, len, hash); } +// Suggestion: Push unicode-based lookup all the way into the hashing +// and probing logic, so there is no need for convert_to_utf8 until +// an actual new symbolOop is created. +symbolOop SymbolTable::lookup_unicode(const jchar* name, int utf16_length, TRAPS) { + int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length); + char stack_buf[128]; + if (utf8_length < (int) sizeof(stack_buf)) { + char* chars = stack_buf; + UNICODE::convert_to_utf8(name, utf16_length, chars); + return lookup(chars, utf8_length, THREAD); + } else { + ResourceMark rm(THREAD); + char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);; + UNICODE::convert_to_utf8(name, utf16_length, chars); + return lookup(chars, utf8_length, THREAD); + } +} + +symbolOop SymbolTable::lookup_only_unicode(const jchar* name, int utf16_length, + unsigned int& hash) { + int utf8_length = UNICODE::utf8_length((jchar*) name, utf16_length); + char stack_buf[128]; + if (utf8_length < (int) sizeof(stack_buf)) { + char* chars = stack_buf; + UNICODE::convert_to_utf8(name, utf16_length, chars); + return lookup_only(chars, utf8_length, hash); + } else { + ResourceMark rm; + char* chars = NEW_RESOURCE_ARRAY(char, utf8_length + 1);; + UNICODE::convert_to_utf8(name, utf16_length, chars); + return lookup_only(chars, utf8_length, hash); + } +} + void SymbolTable::add(constantPoolHandle cp, int names_count, const char** names, int* lengths, int* cp_indices, unsigned int* hashValues, TRAPS) { @@ -126,15 +160,6 @@ void SymbolTable::add(constantPoolHandle cp, int names_count, } } -// Needed for preloading classes in signatures when compiling. - -symbolOop SymbolTable::probe(const char* name, int len) { - unsigned int hashValue = hash_symbol(name, len); - int index = the_table()->hash_to_index(hashValue); - return the_table()->lookup(index, name, len, hashValue); -} - - symbolOop SymbolTable::basic_add(int index, u1 *name, int len, unsigned int hashValue, TRAPS) { assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(), diff --git a/hotspot/src/share/vm/classfile/symbolTable.hpp b/hotspot/src/share/vm/classfile/symbolTable.hpp index 828512780bf..bb0f67d1cdd 100644 --- a/hotspot/src/share/vm/classfile/symbolTable.hpp +++ b/hotspot/src/share/vm/classfile/symbolTable.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -91,6 +91,10 @@ public: // Only copy to C string to be added if lookup failed. static symbolOop lookup(symbolHandle sym, int begin, int end, TRAPS); + // jchar (utf16) version of lookups + static symbolOop lookup_unicode(const jchar* name, int len, TRAPS); + static symbolOop lookup_only_unicode(const jchar* name, int len, unsigned int& hash); + static void add(constantPoolHandle cp, int names_count, const char** names, int* lengths, int* cp_indices, unsigned int* hashValues, TRAPS); @@ -112,7 +116,14 @@ public: // Needed for preloading classes in signatures when compiling. // Returns the symbol is already present in symbol table, otherwise // NULL. NO ALLOCATION IS GUARANTEED! - static symbolOop probe(const char* name, int len); + static symbolOop probe(const char* name, int len) { + unsigned int ignore_hash; + return lookup_only(name, len, ignore_hash); + } + static symbolOop probe_unicode(const jchar* name, int len) { + unsigned int ignore_hash; + return lookup_only_unicode(name, len, ignore_hash); + } // Histogram static void print_histogram() PRODUCT_RETURN; diff --git a/hotspot/src/share/vm/classfile/systemDictionary.cpp b/hotspot/src/share/vm/classfile/systemDictionary.cpp index 95551841633..b6af53d2d27 100644 --- a/hotspot/src/share/vm/classfile/systemDictionary.cpp +++ b/hotspot/src/share/vm/classfile/systemDictionary.cpp @@ -1964,6 +1964,13 @@ BasicType SystemDictionary::box_klass_type(klassOop k) { return T_OBJECT; } +KlassHandle SystemDictionaryHandles::box_klass(BasicType t) { + if (t >= T_BOOLEAN && t <= T_VOID) + return KlassHandle(&SystemDictionary::_box_klasses[t], true); + else + return KlassHandle(); +} + // Constraints on class loaders. The details of the algorithm can be // found in the OOPSLA'98 paper "Dynamic Class Loading in the Java // Virtual Machine" by Sheng Liang and Gilad Bracha. The basic idea is @@ -2174,11 +2181,56 @@ symbolOop SystemDictionary::find_resolution_error(constantPoolHandle pool, int w } +// Signature constraints ensure that callers and callees agree about +// the meaning of type names in their signatures. This routine is the +// intake for constraints. It collects them from several places: +// +// * LinkResolver::resolve_method (if check_access is true) requires +// that the resolving class (the caller) and the defining class of +// the resolved method (the callee) agree on each type in the +// method's signature. +// +// * LinkResolver::resolve_interface_method performs exactly the same +// checks. +// +// * LinkResolver::resolve_field requires that the constant pool +// attempting to link to a field agree with the field's defining +// class about the type of the field signature. +// +// * klassVtable::initialize_vtable requires that, when a class +// overrides a vtable entry allocated by a superclass, that the +// overriding method (i.e., the callee) agree with the superclass +// on each type in the method's signature. +// +// * klassItable::initialize_itable requires that, when a class fills +// in its itables, for each non-abstract method installed in an +// itable, the method (i.e., the callee) agree with the interface +// on each type in the method's signature. +// +// All those methods have a boolean (check_access, checkconstraints) +// which turns off the checks. This is used from specialized contexts +// such as bootstrapping, dumping, and debugging. +// +// No direct constraint is placed between the class and its +// supertypes. Constraints are only placed along linked relations +// between callers and callees. When a method overrides or implements +// an abstract method in a supertype (superclass or interface), the +// constraints are placed as if the supertype were the caller to the +// overriding method. (This works well, since callers to the +// supertype have already established agreement between themselves and +// the supertype.) As a result of all this, a class can disagree with +// its supertype about the meaning of a type name, as long as that +// class neither calls a relevant method of the supertype, nor is +// called (perhaps via an override) from the supertype. +// +// +// SystemDictionary::check_signature_loaders(sig, l1, l2) +// // Make sure all class components (including arrays) in the given // signature will be resolved to the same class in both loaders. // Returns the name of the type that failed a loader constraint check, or // NULL if no constraint failed. The returned C string needs cleaning up -// with a ResourceMark in the caller +// with a ResourceMark in the caller. No exception except OOME is thrown. char* SystemDictionary::check_signature_loaders(symbolHandle signature, Handle loader1, Handle loader2, bool is_method, TRAPS) { diff --git a/hotspot/src/share/vm/classfile/systemDictionary.hpp b/hotspot/src/share/vm/classfile/systemDictionary.hpp index 7ee23212587..6444709dd62 100644 --- a/hotspot/src/share/vm/classfile/systemDictionary.hpp +++ b/hotspot/src/share/vm/classfile/systemDictionary.hpp @@ -161,6 +161,7 @@ class ResolutionErrorTable; class SystemDictionary : AllStatic { friend class VMStructs; friend class CompactingPermGenGen; + friend class SystemDictionaryHandles; NOT_PRODUCT(friend class instanceKlassKlass;) public: @@ -595,3 +596,18 @@ private: static bool _has_loadClassInternal; static bool _has_checkPackageAccess; }; + +// Cf. vmSymbols vs. vmSymbolHandles +class SystemDictionaryHandles : AllStatic { +public: + #define WK_KLASS_HANDLE_DECLARE(name, ignore_symbol, option) \ + static KlassHandle name() { \ + SystemDictionary::name(); \ + klassOop* loc = &SystemDictionary::_well_known_klasses[SystemDictionary::WK_KLASS_ENUM_NAME(name)]; \ + return KlassHandle(loc, true); \ + } + WK_KLASSES_DO(WK_KLASS_HANDLE_DECLARE); + #undef WK_KLASS_HANDLE_DECLARE + + static KlassHandle box_klass(BasicType t); +}; diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp index 195847a66e2..d0fa3a4eecf 100644 --- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp +++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/parallelScavengeHeap.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2001-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 2001-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -825,6 +825,7 @@ HeapWord* ParallelScavengeHeap::block_start(const void* addr) const { if (young_gen()->is_in_reserved(addr)) { assert(young_gen()->is_in(addr), "addr should be in allocated part of young gen"); + if (Debugging) return NULL; // called from find() in debug.cpp Unimplemented(); } else if (old_gen()->is_in_reserved(addr)) { assert(old_gen()->is_in(addr), diff --git a/hotspot/src/share/vm/oops/instanceKlass.cpp b/hotspot/src/share/vm/oops/instanceKlass.cpp index 12adb12aba6..c52a00184cc 100644 --- a/hotspot/src/share/vm/oops/instanceKlass.cpp +++ b/hotspot/src/share/vm/oops/instanceKlass.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1813,6 +1813,8 @@ bool instanceKlass::is_same_class_package(oop class_loader1, symbolOop class_nam oop class_loader2, symbolOop class_name2) { if (class_loader1 != class_loader2) { return false; + } else if (class_name1 == class_name2) { + return true; // skip painful bytewise comparison } else { ResourceMark rm; @@ -1859,6 +1861,55 @@ bool instanceKlass::is_same_class_package(oop class_loader1, symbolOop class_nam } } +/* defined for now in jvm.cpp, for historical reasons *-- +klassOop instanceKlass::compute_enclosing_class_impl(instanceKlassHandle self, + symbolOop& simple_name_result, TRAPS) { + ... +} +*/ + +// tell if two classes have the same enclosing class (at package level) +bool instanceKlass::is_same_package_member_impl(instanceKlassHandle class1, + klassOop class2_oop, TRAPS) { + if (class2_oop == class1->as_klassOop()) return true; + if (!Klass::cast(class2_oop)->oop_is_instance()) return false; + instanceKlassHandle class2(THREAD, class2_oop); + + // must be in same package before we try anything else + if (!class1->is_same_class_package(class2->class_loader(), class2->name())) + return false; + + // As long as there is an outer1.getEnclosingClass, + // shift the search outward. + instanceKlassHandle outer1 = class1; + for (;;) { + // As we walk along, look for equalities between outer1 and class2. + // Eventually, the walks will terminate as outer1 stops + // at the top-level class around the original class. + symbolOop ignore_name; + klassOop next = outer1->compute_enclosing_class(ignore_name, CHECK_false); + if (next == NULL) break; + if (next == class2()) return true; + outer1 = instanceKlassHandle(THREAD, next); + } + + // Now do the same for class2. + instanceKlassHandle outer2 = class2; + for (;;) { + symbolOop ignore_name; + klassOop next = outer2->compute_enclosing_class(ignore_name, CHECK_false); + if (next == NULL) break; + // Might as well check the new outer against all available values. + if (next == class1()) return true; + if (next == outer1()) return true; + outer2 = instanceKlassHandle(THREAD, next); + } + + // If by this point we have not found an equality between the + // two classes, we know they are in separate package members. + return false; +} + jint instanceKlass::compute_modifier_flags(TRAPS) const { klassOop k = as_klassOop(); @@ -1996,9 +2047,11 @@ nmethod* instanceKlass::lookup_osr_nmethod(const methodOop m, int bci) const { // Printing +#define BULLET " - " + void FieldPrinter::do_field(fieldDescriptor* fd) { - if (fd->is_static() == (_obj == NULL)) { - _st->print(" - "); + _st->print(BULLET); + if (fd->is_static() || (_obj == NULL)) { fd->print_on(_st); _st->cr(); } else { @@ -2019,7 +2072,7 @@ void instanceKlass::oop_print_on(oop obj, outputStream* st) { value->is_typeArray() && offset <= (juint) value->length() && offset + length <= (juint) value->length()) { - st->print("string: "); + st->print(BULLET"string: "); Handle h_obj(obj); java_lang_String::print(h_obj, st); st->cr(); @@ -2027,22 +2080,25 @@ void instanceKlass::oop_print_on(oop obj, outputStream* st) { } } - st->print_cr("fields:"); + st->print_cr(BULLET"---- fields (total size %d words):", oop_size(obj)); FieldPrinter print_nonstatic_field(st, obj); do_nonstatic_fields(&print_nonstatic_field); if (as_klassOop() == SystemDictionary::class_klass()) { + st->print(BULLET"signature: "); + java_lang_Class::print_signature(obj, st); + st->cr(); klassOop mirrored_klass = java_lang_Class::as_klassOop(obj); - st->print(" - fake entry for mirror: "); + st->print(BULLET"fake entry for mirror: "); mirrored_klass->print_value_on(st); st->cr(); - st->print(" - fake entry resolved_constructor: "); + st->print(BULLET"fake entry resolved_constructor: "); methodOop ctor = java_lang_Class::resolved_constructor(obj); ctor->print_value_on(st); klassOop array_klass = java_lang_Class::array_klass(obj); - st->print(" - fake entry for array: "); - array_klass->print_value_on(st); st->cr(); + st->print(BULLET"fake entry for array: "); + array_klass->print_value_on(st); st->cr(); } } @@ -2051,6 +2107,28 @@ void instanceKlass::oop_print_value_on(oop obj, outputStream* st) { st->print("a "); name()->print_value_on(st); obj->print_address_on(st); + if (as_klassOop() == SystemDictionary::string_klass() + && java_lang_String::value(obj) != NULL) { + ResourceMark rm; + int len = java_lang_String::length(obj); + int plen = (len < 24 ? len : 12); + char* str = java_lang_String::as_utf8_string(obj, 0, plen); + st->print(" = \"%s\"", str); + if (len > plen) + st->print("...[%d]", len); + } else if (as_klassOop() == SystemDictionary::class_klass()) { + klassOop k = java_lang_Class::as_klassOop(obj); + st->print(" = "); + if (k != NULL) { + k->print_value_on(st); + } else { + const char* tname = type2name(java_lang_Class::primitive_type(obj)); + st->print("%s", tname ? tname : "type?"); + } + } else if (java_lang_boxing_object::is_instance(obj)) { + st->print(" = "); + java_lang_boxing_object::print(obj, st); + } } #endif // ndef PRODUCT diff --git a/hotspot/src/share/vm/oops/instanceKlass.hpp b/hotspot/src/share/vm/oops/instanceKlass.hpp index 8586ecba03b..5e6eddbe78d 100644 --- a/hotspot/src/share/vm/oops/instanceKlass.hpp +++ b/hotspot/src/share/vm/oops/instanceKlass.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -308,6 +308,22 @@ class instanceKlass: public Klass { bool is_same_class_package(oop classloader2, symbolOop classname2); static bool is_same_class_package(oop class_loader1, symbolOop class_name1, oop class_loader2, symbolOop class_name2); + // find an enclosing class (defined where original code was, in jvm.cpp!) + klassOop compute_enclosing_class(symbolOop& simple_name_result, TRAPS) { + instanceKlassHandle self(THREAD, this->as_klassOop()); + return compute_enclosing_class_impl(self, simple_name_result, THREAD); + } + static klassOop compute_enclosing_class_impl(instanceKlassHandle self, + symbolOop& simple_name_result, TRAPS); + + // tell if two classes have the same enclosing class (at package level) + bool is_same_package_member(klassOop class2, TRAPS) { + instanceKlassHandle self(THREAD, this->as_klassOop()); + return is_same_package_member_impl(self, class2, THREAD); + } + static bool is_same_package_member_impl(instanceKlassHandle self, + klassOop class2, TRAPS); + // initialization state bool is_loaded() const { return _init_state >= loaded; } bool is_linked() const { return _init_state >= linked; } diff --git a/hotspot/src/share/vm/oops/instanceKlassKlass.cpp b/hotspot/src/share/vm/oops/instanceKlassKlass.cpp index 94a6bee03ec..23ebfaba967 100644 --- a/hotspot/src/share/vm/oops/instanceKlassKlass.cpp +++ b/hotspot/src/share/vm/oops/instanceKlassKlass.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -487,6 +487,8 @@ klassOop instanceKlassKlass::allocate_instance_klass(int vtable_len, int itable_ // Printing +#define BULLET " - " + static const char* state_names[] = { "unparseable_by_gc", "allocated", "loaded", "linked", "being_initialized", "fully_initialized", "initialization_error" }; @@ -497,13 +499,13 @@ void instanceKlassKlass::oop_print_on(oop obj, outputStream* st) { instanceKlass* ik = instanceKlass::cast(klassOop(obj)); klassKlass::oop_print_on(obj, st); - st->print(" - instance size: %d", ik->size_helper()); st->cr(); - st->print(" - klass size: %d", ik->object_size()); st->cr(); - st->print(" - access: "); ik->access_flags().print_on(st); st->cr(); - st->print(" - state: "); st->print_cr(state_names[ik->_init_state]); - st->print(" - name: "); ik->name()->print_value_on(st); st->cr(); - st->print(" - super: "); ik->super()->print_value_on(st); st->cr(); - st->print(" - sub: "); + st->print(BULLET"instance size: %d", ik->size_helper()); st->cr(); + st->print(BULLET"klass size: %d", ik->object_size()); st->cr(); + st->print(BULLET"access: "); ik->access_flags().print_on(st); st->cr(); + st->print(BULLET"state: "); st->print_cr(state_names[ik->_init_state]); + st->print(BULLET"name: "); ik->name()->print_value_on(st); st->cr(); + st->print(BULLET"super: "); ik->super()->print_value_on(st); st->cr(); + st->print(BULLET"sub: "); Klass* sub = ik->subklass(); int n; for (n = 0; sub != NULL; n++, sub = sub->next_sibling()) { @@ -516,12 +518,12 @@ void instanceKlassKlass::oop_print_on(oop obj, outputStream* st) { st->cr(); if (ik->is_interface()) { - st->print_cr(" - nof implementors: %d", ik->nof_implementors()); + st->print_cr(BULLET"nof implementors: %d", ik->nof_implementors()); int print_impl = 0; for (int i = 0; i < instanceKlass::implementors_limit; i++) { if (ik->implementor(i) != NULL) { if (++print_impl == 1) - st->print_cr(" - implementor: "); + st->print_cr(BULLET"implementor: "); st->print(" "); ik->implementor(i)->print_value_on(st); } @@ -529,34 +531,33 @@ void instanceKlassKlass::oop_print_on(oop obj, outputStream* st) { if (print_impl > 0) st->cr(); } - st->print(" - arrays: "); ik->array_klasses()->print_value_on(st); st->cr(); - st->print(" - methods: "); ik->methods()->print_value_on(st); st->cr(); + st->print(BULLET"arrays: "); ik->array_klasses()->print_value_on(st); st->cr(); + st->print(BULLET"methods: "); ik->methods()->print_value_on(st); st->cr(); if (Verbose) { objArrayOop methods = ik->methods(); for(int i = 0; i < methods->length(); i++) { tty->print("%d : ", i); methods->obj_at(i)->print_value(); tty->cr(); } } - st->print(" - method ordering: "); ik->method_ordering()->print_value_on(st); st->cr(); - st->print(" - local interfaces: "); ik->local_interfaces()->print_value_on(st); st->cr(); - st->print(" - trans. interfaces: "); ik->transitive_interfaces()->print_value_on(st); st->cr(); - st->print(" - constants: "); ik->constants()->print_value_on(st); st->cr(); - st->print(" - class loader: "); ik->class_loader()->print_value_on(st); st->cr(); - st->print(" - protection domain: "); ik->protection_domain()->print_value_on(st); st->cr(); - st->print(" - host class: "); ik->host_klass()->print_value_on(st); st->cr(); - st->print(" - signers: "); ik->signers()->print_value_on(st); st->cr(); + st->print(BULLET"method ordering: "); ik->method_ordering()->print_value_on(st); st->cr(); + st->print(BULLET"local interfaces: "); ik->local_interfaces()->print_value_on(st); st->cr(); + st->print(BULLET"trans. interfaces: "); ik->transitive_interfaces()->print_value_on(st); st->cr(); + st->print(BULLET"constants: "); ik->constants()->print_value_on(st); st->cr(); + st->print(BULLET"class loader: "); ik->class_loader()->print_value_on(st); st->cr(); + st->print(BULLET"protection domain: "); ik->protection_domain()->print_value_on(st); st->cr(); + st->print(BULLET"host class: "); ik->host_klass()->print_value_on(st); st->cr(); + st->print(BULLET"signers: "); ik->signers()->print_value_on(st); st->cr(); if (ik->source_file_name() != NULL) { - st->print(" - source file: "); + st->print(BULLET"source file: "); ik->source_file_name()->print_value_on(st); st->cr(); } if (ik->source_debug_extension() != NULL) { - st->print(" - source debug extension: "); + st->print(BULLET"source debug extension: "); ik->source_debug_extension()->print_value_on(st); st->cr(); } - st->print_cr(" - previous version: "); { ResourceMark rm; // PreviousVersionInfo objects returned via PreviousVersionWalker @@ -564,38 +565,43 @@ void instanceKlassKlass::oop_print_on(oop obj, outputStream* st) { // GrowableArray _after_ the PreviousVersionWalker destructor // has destroyed the handles. { + bool have_pv = false; PreviousVersionWalker pvw(ik); for (PreviousVersionInfo * pv_info = pvw.next_previous_version(); pv_info != NULL; pv_info = pvw.next_previous_version()) { + if (!have_pv) + st->print(BULLET"previous version: "); + have_pv = true; pv_info->prev_constant_pool_handle()()->print_value_on(st); } - st->cr(); + if (have_pv) st->cr(); } // pvw is cleaned up } // rm is cleaned up if (ik->generic_signature() != NULL) { - st->print(" - generic signature: "); + st->print(BULLET"generic signature: "); ik->generic_signature()->print_value_on(st); + st->cr(); } - st->print(" - inner classes: "); ik->inner_classes()->print_value_on(st); st->cr(); - st->print(" - java mirror: "); ik->java_mirror()->print_value_on(st); st->cr(); - st->print(" - vtable length %d (start addr: " INTPTR_FORMAT ")", ik->vtable_length(), ik->start_of_vtable()); st->cr(); - st->print(" - itable length %d (start addr: " INTPTR_FORMAT ")", ik->itable_length(), ik->start_of_itable()); st->cr(); - st->print_cr(" - static fields:"); + st->print(BULLET"inner classes: "); ik->inner_classes()->print_value_on(st); st->cr(); + st->print(BULLET"java mirror: "); ik->java_mirror()->print_value_on(st); st->cr(); + st->print(BULLET"vtable length %d (start addr: " INTPTR_FORMAT ")", ik->vtable_length(), ik->start_of_vtable()); st->cr(); + st->print(BULLET"itable length %d (start addr: " INTPTR_FORMAT ")", ik->itable_length(), ik->start_of_itable()); st->cr(); + st->print_cr(BULLET"---- static fields (%d words):", ik->static_field_size()); FieldPrinter print_static_field(st); ik->do_local_static_fields(&print_static_field); - st->print_cr(" - non-static fields:"); - FieldPrinter print_nonstatic_field(st, obj); + st->print_cr(BULLET"---- non-static fields (%d words):", ik->nonstatic_field_size()); + FieldPrinter print_nonstatic_field(st); ik->do_nonstatic_fields(&print_nonstatic_field); - st->print(" - static oop maps: "); + st->print(BULLET"static oop maps: "); if (ik->static_oop_field_size() > 0) { int first_offset = ik->offset_of_static_fields(); st->print("%d-%d", first_offset, first_offset + ik->static_oop_field_size() - 1); } st->cr(); - st->print(" - non-static oop maps: "); + st->print(BULLET"non-static oop maps: "); OopMapBlock* map = ik->start_of_nonstatic_oop_maps(); OopMapBlock* end_map = map + ik->nonstatic_oop_map_size(); while (map < end_map) { diff --git a/hotspot/src/share/vm/oops/klassVtable.cpp b/hotspot/src/share/vm/oops/klassVtable.cpp index 9411e76509b..48511fe25ba 100644 --- a/hotspot/src/share/vm/oops/klassVtable.cpp +++ b/hotspot/src/share/vm/oops/klassVtable.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -1153,6 +1153,27 @@ int klassItable::compute_itable_index(methodOop m) { return index; } + +// inverse to compute_itable_index +methodOop klassItable::method_for_itable_index(klassOop intf, int itable_index) { + assert(instanceKlass::cast(intf)->is_interface(), "sanity check"); + objArrayOop methods = instanceKlass::cast(intf)->methods(); + + int index = itable_index; + // Adjust for , which is left out of table if first method + if (methods->length() > 0 && ((methodOop)methods->obj_at(0))->name() == vmSymbols::class_initializer_name()) { + index++; + } + + if (itable_index < 0 || index >= methods->length()) + return NULL; // help caller defend against bad indexes + + methodOop m = (methodOop)methods->obj_at(index); + assert(compute_itable_index(m) == itable_index, "correct inverse"); + + return m; +} + void klassVtable::verify(outputStream* st, bool forced) { // make sure table is initialized if (!Universe::is_fully_initialized()) return; diff --git a/hotspot/src/share/vm/oops/klassVtable.hpp b/hotspot/src/share/vm/oops/klassVtable.hpp index 2ef1848f59e..d73af3761d8 100644 --- a/hotspot/src/share/vm/oops/klassVtable.hpp +++ b/hotspot/src/share/vm/oops/klassVtable.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -298,6 +298,8 @@ class klassItable : public ResourceObj { // Resolving of method to index static int compute_itable_index(methodOop m); + // ...and back again: + static methodOop method_for_itable_index(klassOop klass, int itable_index); // Debugging/Statistics static void print_statistics() PRODUCT_RETURN; diff --git a/hotspot/src/share/vm/oops/methodKlass.cpp b/hotspot/src/share/vm/oops/methodKlass.cpp index bcc9afb4f31..3144312ef6c 100644 --- a/hotspot/src/share/vm/oops/methodKlass.cpp +++ b/hotspot/src/share/vm/oops/methodKlass.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -247,9 +247,14 @@ void methodKlass::oop_print_on(oop obj, outputStream* st) { st->print_cr(" - size of params: %d", m->size_of_parameters()); st->print_cr(" - method size: %d", m->method_size()); st->print_cr(" - vtable index: %d", m->_vtable_index); + st->print_cr(" - i2i entry: " INTPTR_FORMAT, m->interpreter_entry()); + st->print_cr(" - adapter: " INTPTR_FORMAT, m->adapter()); + st->print_cr(" - compiled entry " INTPTR_FORMAT, m->from_compiled_entry()); st->print_cr(" - code size: %d", m->code_size()); - st->print_cr(" - code start: " INTPTR_FORMAT, m->code_base()); - st->print_cr(" - code end (excl): " INTPTR_FORMAT, m->code_base() + m->code_size()); + if (m->code_size() != 0) { + st->print_cr(" - code start: " INTPTR_FORMAT, m->code_base()); + st->print_cr(" - code end (excl): " INTPTR_FORMAT, m->code_base() + m->code_size()); + } if (m->method_data() != NULL) { st->print_cr(" - method data: " INTPTR_FORMAT, (address)m->method_data()); } @@ -293,6 +298,10 @@ void methodKlass::oop_print_on(oop obj, outputStream* st) { m->code()->print_value_on(st); st->cr(); } + if (m->is_native()) { + st->print_cr(" - native function: " INTPTR_FORMAT, m->native_function()); + st->print_cr(" - signature handler: " INTPTR_FORMAT, m->signature_handler()); + } } diff --git a/hotspot/src/share/vm/oops/objArrayKlass.cpp b/hotspot/src/share/vm/oops/objArrayKlass.cpp index 1f2574b9c43..3f56d9c1ff6 100644 --- a/hotspot/src/share/vm/oops/objArrayKlass.cpp +++ b/hotspot/src/share/vm/oops/objArrayKlass.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -502,12 +502,25 @@ void objArrayKlass::oop_print_on(oop obj, outputStream* st) { } } +static int max_objArray_print_length = 4; void objArrayKlass::oop_print_value_on(oop obj, outputStream* st) { assert(obj->is_objArray(), "must be objArray"); + st->print("a "); element_klass()->print_value_on(st); - st->print("a [%d] ", objArrayOop(obj)->length()); - as_klassOop()->klass()->print_value_on(st); + int len = objArrayOop(obj)->length(); + st->print("[%d] ", len); + obj->print_address_on(st); + if (PrintOopAddress || PrintMiscellaneous && (WizardMode || Verbose)) { + st->print("{"); + for (int i = 0; i < len; i++) { + if (i > max_objArray_print_length) { + st->print("..."); break; + } + st->print(" "INTPTR_FORMAT, (intptr_t)(void*)objArrayOop(obj)->obj_at(i)); + } + st->print(" }"); + } } #endif // PRODUCT diff --git a/hotspot/src/share/vm/oops/oop.cpp b/hotspot/src/share/vm/oops/oop.cpp index 505a81f263e..da787bed038 100644 --- a/hotspot/src/share/vm/oops/oop.cpp +++ b/hotspot/src/share/vm/oops/oop.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -65,11 +65,7 @@ void oopDesc::print_value_on(outputStream* st) const { void oopDesc::print_address_on(outputStream* st) const { if (PrintOopAddress) { - st->print("{"); - if (PrintOopAddress) { - st->print(INTPTR_FORMAT, this); - } - st->print("}"); + st->print("{"INTPTR_FORMAT"}", this); } } diff --git a/hotspot/src/share/vm/prims/jvm.cpp b/hotspot/src/share/vm/prims/jvm.cpp index cf866df9f61..507759940df 100644 --- a/hotspot/src/share/vm/prims/jvm.cpp +++ b/hotspot/src/share/vm/prims/jvm.cpp @@ -1242,7 +1242,7 @@ JVM_ENTRY(jobjectArray, JVM_GetDeclaredClasses(JNIEnv *env, jclass ofClass)) // Throws an exception if outer klass has not declared k as // an inner klass - Reflection::check_for_inner_class(k, inner_klass, CHECK_NULL); + Reflection::check_for_inner_class(k, inner_klass, true, CHECK_NULL); result->obj_at_put(members, inner_klass->java_mirror()); members++; @@ -1265,16 +1265,29 @@ JVM_END JVM_ENTRY(jclass, JVM_GetDeclaringClass(JNIEnv *env, jclass ofClass)) - const int inner_class_info_index = 0; - const int outer_class_info_index = 1; - +{ // ofClass is a reference to a java_lang_Class object. if (java_lang_Class::is_primitive(JNIHandles::resolve_non_null(ofClass)) || ! Klass::cast(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(ofClass)))->oop_is_instance()) { return NULL; } - instanceKlassHandle k(thread, java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(ofClass))); + symbolOop simple_name = NULL; + klassOop outer_klass + = instanceKlass::cast(java_lang_Class::as_klassOop(JNIHandles::resolve_non_null(ofClass)) + )->compute_enclosing_class(simple_name, CHECK_NULL); + if (outer_klass == NULL) return NULL; // already a top-level class + if (simple_name == NULL) return NULL; // an anonymous class (inside a method) + return (jclass) JNIHandles::make_local(env, Klass::cast(outer_klass)->java_mirror()); +} +JVM_END + +// should be in instanceKlass.cpp, but is here for historical reasons +klassOop instanceKlass::compute_enclosing_class_impl(instanceKlassHandle k, + symbolOop& simple_name_result, TRAPS) { + Thread* thread = THREAD; + const int inner_class_info_index = inner_class_inner_class_info_offset; + const int outer_class_info_index = inner_class_outer_class_info_offset; if (k->inner_classes()->length() == 0) { // No inner class info => no declaring class @@ -1288,35 +1301,51 @@ JVM_ENTRY(jclass, JVM_GetDeclaringClass(JNIEnv *env, jclass ofClass)) bool found = false; klassOop ok; instanceKlassHandle outer_klass; + bool inner_is_member = false; + int simple_name_index = 0; // Find inner_klass attribute - for(int i = 0; i < i_length && !found; i+= 4) { + for (int i = 0; i < i_length && !found; i += inner_class_next_offset) { int ioff = i_icls->ushort_at(i + inner_class_info_index); int ooff = i_icls->ushort_at(i + outer_class_info_index); - - if (ioff != 0 && ooff != 0) { + int noff = i_icls->ushort_at(i + inner_class_inner_name_offset); + if (ioff != 0) { // Check to see if the name matches the class we're looking for // before attempting to find the class. if (i_cp->klass_name_at_matches(k, ioff)) { klassOop inner_klass = i_cp->klass_at(ioff, CHECK_NULL); - if (k() == inner_klass) { - found = true; + found = (k() == inner_klass); + if (found && ooff != 0) { ok = i_cp->klass_at(ooff, CHECK_NULL); outer_klass = instanceKlassHandle(thread, ok); + simple_name_index = noff; + inner_is_member = true; } } } } + if (found && outer_klass.is_null()) { + // It may be anonymous; try for that. + int encl_method_class_idx = k->enclosing_method_class_index(); + if (encl_method_class_idx != 0) { + ok = i_cp->klass_at(encl_method_class_idx, CHECK_NULL); + outer_klass = instanceKlassHandle(thread, ok); + inner_is_member = false; + } + } + // If no inner class attribute found for this class. - if (!found) return NULL; + if (outer_klass.is_null()) return NULL; // Throws an exception if outer klass has not declared k as an inner klass - Reflection::check_for_inner_class(outer_klass, k, CHECK_NULL); - - return (jclass)JNIHandles::make_local(env, outer_klass->java_mirror()); -JVM_END + // We need evidence that each klass knows about the other, or else + // the system could allow a spoof of an inner class to gain access rights. + Reflection::check_for_inner_class(outer_klass, k, inner_is_member, CHECK_NULL); + simple_name_result = (inner_is_member ? i_cp->symbol_at(simple_name_index) : symbolOop(NULL)); + return outer_klass(); +} JVM_ENTRY(jstring, JVM_GetClassSignature(JNIEnv *env, jclass cls)) assert (cls != NULL, "illegal class"); diff --git a/hotspot/src/share/vm/runtime/fieldDescriptor.cpp b/hotspot/src/share/vm/runtime/fieldDescriptor.cpp index d750981a4c3..a5e40ad8e26 100644 --- a/hotspot/src/share/vm/runtime/fieldDescriptor.cpp +++ b/hotspot/src/share/vm/runtime/fieldDescriptor.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2005 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -107,13 +107,14 @@ void fieldDescriptor::print_on(outputStream* st) const { void fieldDescriptor::print_on_for(outputStream* st, oop obj) { print_on(st); BasicType ft = field_type(); - jint as_int; + jint as_int = 0; switch (ft) { case T_BYTE: as_int = (jint)obj->byte_field(offset()); st->print(" %d", obj->byte_field(offset())); break; case T_CHAR: + as_int = (jint)obj->char_field(offset()); { jchar c = obj->char_field(offset()); as_int = c; @@ -128,6 +129,7 @@ void fieldDescriptor::print_on_for(outputStream* st, oop obj) { st->print(" %f", obj->float_field(offset())); break; case T_INT: + as_int = obj->int_field(offset()); st->print(" %d", obj->int_field(offset())); break; case T_LONG: @@ -144,12 +146,12 @@ void fieldDescriptor::print_on_for(outputStream* st, oop obj) { break; case T_ARRAY: st->print(" "); - as_int = obj->int_field(offset()); + NOT_LP64(as_int = obj->int_field(offset())); obj->obj_field(offset())->print_value_on(st); break; case T_OBJECT: st->print(" "); - as_int = obj->int_field(offset()); + NOT_LP64(as_int = obj->int_field(offset())); obj->obj_field(offset())->print_value_on(st); break; default: @@ -158,9 +160,9 @@ void fieldDescriptor::print_on_for(outputStream* st, oop obj) { } // Print a hint as to the underlying integer representation. This can be wrong for // pointers on an LP64 machine - if (ft == T_LONG || ft == T_DOUBLE) { + if (ft == T_LONG || ft == T_DOUBLE LP64_ONLY(|| !is_java_primitive(ft)) ) { st->print(" (%x %x)", obj->int_field(offset()), obj->int_field(offset()+sizeof(jint))); - } else { + } else if (as_int < 0 || as_int > 9) { st->print(" (%x)", as_int); } } diff --git a/hotspot/src/share/vm/runtime/handles.hpp b/hotspot/src/share/vm/runtime/handles.hpp index 55e9b41fa20..c44c6ac3d22 100644 --- a/hotspot/src/share/vm/runtime/handles.hpp +++ b/hotspot/src/share/vm/runtime/handles.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2007 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -137,6 +137,14 @@ class KlassHandle: public Handle { assert(is_null() || obj()->is_klass(), "not a klassOop"); } + // Direct interface, use very sparingly. + // Used by SystemDictionaryHandles to create handles on existing WKKs. + // The obj of such a klass handle may be null, because the handle is formed + // during system bootstrapping. + KlassHandle(klassOop *handle, bool dummy) : Handle((oop*)handle, dummy) { + assert(SharedSkipVerify || is_null() || obj() == NULL || obj()->is_klass(), "not a klassOop"); + } + // General access klassOop operator () () const { return obj(); } Klass* operator -> () const { return as_klass(); } diff --git a/hotspot/src/share/vm/runtime/reflection.cpp b/hotspot/src/share/vm/runtime/reflection.cpp index 3bc1b029d4c..6c7fe33ee6b 100644 --- a/hotspot/src/share/vm/runtime/reflection.cpp +++ b/hotspot/src/share/vm/runtime/reflection.cpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -554,10 +554,18 @@ bool Reflection::is_same_class_package(klassOop class1, klassOop class2) { return instanceKlass::cast(class1)->is_same_class_package(class2); } +bool Reflection::is_same_package_member(klassOop class1, klassOop class2, TRAPS) { + return instanceKlass::cast(class1)->is_same_package_member(class2, THREAD); +} + // Checks that the 'outer' klass has declared 'inner' as being an inner klass. If not, // throw an incompatible class change exception -void Reflection::check_for_inner_class(instanceKlassHandle outer, instanceKlassHandle inner, TRAPS) { +// If inner_is_member, require the inner to be a member of the outer. +// If !inner_is_member, require the inner to be anonymous (a non-member). +// Caller is responsible for figuring out in advance which case must be true. +void Reflection::check_for_inner_class(instanceKlassHandle outer, instanceKlassHandle inner, + bool inner_is_member, TRAPS) { const int inner_class_info_index = 0; const int outer_class_info_index = 1; @@ -567,7 +575,7 @@ void Reflection::check_for_inner_class(instanceKlassHandle outer, instanceKlassH int ioff = icls->ushort_at(i + inner_class_info_index); int ooff = icls->ushort_at(i + outer_class_info_index); - if (ioff != 0 && ooff != 0) { + if (inner_is_member && ioff != 0 && ooff != 0) { klassOop o = cp->klass_at(ooff, CHECK); if (o == outer()) { klassOop i = cp->klass_at(ioff, CHECK); @@ -576,6 +584,13 @@ void Reflection::check_for_inner_class(instanceKlassHandle outer, instanceKlassH } } } + if (!inner_is_member && ioff != 0 && ooff == 0 && + cp->klass_name_at_matches(inner, ioff)) { + klassOop i = cp->klass_at(ioff, CHECK); + if (i == inner()) { + return; + } + } } // 'inner' not declared as an inner klass in outer diff --git a/hotspot/src/share/vm/runtime/reflection.hpp b/hotspot/src/share/vm/runtime/reflection.hpp index 4e8054af5cd..56a54cd0e32 100644 --- a/hotspot/src/share/vm/runtime/reflection.hpp +++ b/hotspot/src/share/vm/runtime/reflection.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2006 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -87,12 +87,18 @@ class Reflection: public AllStatic { bool classloader_only, bool protected_restriction = false); static bool is_same_class_package(klassOop class1, klassOop class2); + static bool is_same_package_member(klassOop class1, klassOop class2, TRAPS); static bool can_relax_access_check_for( klassOop accessor, klassOop accesee, bool classloader_only); // inner class reflection - static void check_for_inner_class(instanceKlassHandle outer, instanceKlassHandle inner, TRAPS); + // raise an ICCE unless the required relationship can be proven to hold + // If inner_is_member, require the inner to be a member of the outer. + // If !inner_is_member, require the inner to be anonymous (a non-member). + // Caller is responsible for figuring out in advance which case must be true. + static void check_for_inner_class(instanceKlassHandle outer, instanceKlassHandle inner, + bool inner_is_member, TRAPS); // // Support for reflection based on dynamic bytecode generation (JDK 1.4) diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.cpp b/hotspot/src/share/vm/runtime/sharedRuntime.cpp index 54c94ba0358..787674569e7 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.cpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.cpp @@ -675,48 +675,6 @@ JRT_ENTRY(void, SharedRuntime::yield_all(JavaThread* thread, int attempts)) JRT_END -// --------------------------------------------------------------------------------------------------------- -// Non-product code -#ifndef PRODUCT - -void SharedRuntime::verify_caller_frame(frame caller_frame, methodHandle callee_method) { - ResourceMark rm; - assert (caller_frame.is_interpreted_frame(), "sanity check"); - assert (callee_method->has_compiled_code(), "callee must be compiled"); - methodHandle caller_method (Thread::current(), caller_frame.interpreter_frame_method()); - jint bci = caller_frame.interpreter_frame_bci(); - methodHandle method = find_callee_method_inside_interpreter(caller_frame, caller_method, bci); - assert (callee_method == method, "incorrect method"); -} - -methodHandle SharedRuntime::find_callee_method_inside_interpreter(frame caller_frame, methodHandle caller_method, int bci) { - EXCEPTION_MARK; - Bytecode_invoke* bytecode = Bytecode_invoke_at(caller_method, bci); - methodHandle staticCallee = bytecode->static_target(CATCH); // Non-product code - - bytecode = Bytecode_invoke_at(caller_method, bci); - int bytecode_index = bytecode->index(); - Bytecodes::Code bc = bytecode->adjusted_invoke_code(); - - Handle receiver; - if (bc == Bytecodes::_invokeinterface || - bc == Bytecodes::_invokevirtual || - bc == Bytecodes::_invokespecial) { - symbolHandle signature (THREAD, staticCallee->signature()); - receiver = Handle(THREAD, retrieve_receiver(signature, caller_frame)); - } else { - receiver = Handle(); - } - CallInfo result; - constantPoolHandle constants (THREAD, caller_method->constants()); - LinkResolver::resolve_invoke(result, receiver, constants, bytecode_index, bc, CATCH); // Non-product code - methodHandle calleeMethod = result.selected_method(); - return calleeMethod; -} - -#endif // PRODUCT - - JRT_ENTRY_NO_ASYNC(void, SharedRuntime::register_finalizer(JavaThread* thread, oopDesc* obj)) assert(obj->is_oop(), "must be a valid oop"); assert(obj->klass()->klass_part()->has_finalizer(), "shouldn't be here otherwise"); diff --git a/hotspot/src/share/vm/runtime/sharedRuntime.hpp b/hotspot/src/share/vm/runtime/sharedRuntime.hpp index bea176a9dce..e98f71d1ce6 100644 --- a/hotspot/src/share/vm/runtime/sharedRuntime.hpp +++ b/hotspot/src/share/vm/runtime/sharedRuntime.hpp @@ -1,5 +1,5 @@ /* - * Copyright 1997-2008 Sun Microsystems, Inc. All Rights Reserved. + * Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -180,9 +180,6 @@ class SharedRuntime: AllStatic { static oop retrieve_receiver( symbolHandle sig, frame caller ); - static void verify_caller_frame(frame caller_frame, methodHandle callee_method) PRODUCT_RETURN; - static methodHandle find_callee_method_inside_interpreter(frame caller_frame, methodHandle caller_method, int bci) PRODUCT_RETURN_(return methodHandle();); - static void register_finalizer(JavaThread* thread, oopDesc* obj); // dtrace notifications From 1cafadfd3e83bae47284d0a3b43b8df592976461 Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Mon, 23 Mar 2009 13:58:58 -0700 Subject: [PATCH 02/15] 6805522: Server VM fails with assertion (block1->start() != block2->start(),"successors have unique bcis") Reviewed-by: kvn --- hotspot/src/share/vm/ci/ciTypeFlow.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/hotspot/src/share/vm/ci/ciTypeFlow.cpp b/hotspot/src/share/vm/ci/ciTypeFlow.cpp index 51f49132b61..9bf831ef9f3 100644 --- a/hotspot/src/share/vm/ci/ciTypeFlow.cpp +++ b/hotspot/src/share/vm/ci/ciTypeFlow.cpp @@ -2237,7 +2237,6 @@ ciTypeFlow::Block* ciTypeFlow::clone_loop_head(Loop* lp, StateVector* temp_vecto for (SuccIter iter(tail); !iter.done(); iter.next()) { if (iter.succ() == head) { iter.set_succ(clone); - break; } } flow_block(tail, temp_vector, temp_set); From fface2e4ca7b42bc98bea879a2ff7fe48172f760 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Tue, 24 Mar 2009 12:19:47 -0700 Subject: [PATCH 03/15] 6636138: UseSuperWord enabled failure Fixed SuperWord scheduling of memory operations. Reviewed-by: kvn, never --- hotspot/src/share/vm/opto/superword.cpp | 175 +++++++++++++++++++---- hotspot/src/share/vm/opto/superword.hpp | 7 +- hotspot/test/compiler/6636138/Test1.java | 67 +++++++++ hotspot/test/compiler/6636138/Test2.java | 70 +++++++++ 4 files changed, 292 insertions(+), 27 deletions(-) create mode 100644 hotspot/test/compiler/6636138/Test1.java create mode 100644 hotspot/test/compiler/6636138/Test2.java diff --git a/hotspot/src/share/vm/opto/superword.cpp b/hotspot/src/share/vm/opto/superword.cpp index d64d2e5ec21..c8198392c33 100644 --- a/hotspot/src/share/vm/opto/superword.cpp +++ b/hotspot/src/share/vm/opto/superword.cpp @@ -454,9 +454,13 @@ void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray &p // or need to run igvn.optimize() again before SLP } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) { // Ditto. Not sure what else to check further. - } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) { + } else if (out->Opcode() == Op_StoreCM && out->in(MemNode::OopStore) == n) { // StoreCM has an input edge used as a precedence edge. // Maybe an issue when oop stores are vectorized. + } else if( out->is_MergeMem() && prev && + prev->Opcode() == Op_StoreCM && out == prev->in(MemNode::OopStore)) { + // Oop store is a MergeMem! This should not happen. Temporarily remove the assertion + // for this case because it could not be superwordized anyway. } else { assert(out == prev || prev == NULL, "no branches off of store slice"); } @@ -912,54 +916,175 @@ void SuperWord::schedule() { } } -//------------------------------co_locate_pack--------------------------- -// Within a pack, move stores down to the last executed store, -// and move loads up to the first executed load. +//-------------------------------remove_and_insert------------------- +//remove "current" from its current position in the memory graph and insert +//it after the appropriate insertion point (lip or uip) +void SuperWord::remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, + Node *uip, Unique_Node_List &sched_before) { + Node* my_mem = current->in(MemNode::Memory); + _igvn.hash_delete(current); + _igvn.hash_delete(my_mem); + + //remove current_store from its current position in the memmory graph + for (DUIterator i = current->outs(); current->has_out(i); i++) { + Node* use = current->out(i); + if (use->is_Mem()) { + assert(use->in(MemNode::Memory) == current, "must be"); + _igvn.hash_delete(use); + if (use == prev) { // connect prev to my_mem + use->set_req(MemNode::Memory, my_mem); + } else if (sched_before.member(use)) { + _igvn.hash_delete(uip); + use->set_req(MemNode::Memory, uip); + } else { + _igvn.hash_delete(lip); + use->set_req(MemNode::Memory, lip); + } + _igvn._worklist.push(use); + --i; //deleted this edge; rescan position + } + } + + bool sched_up = sched_before.member(current); + Node *insert_pt = sched_up ? uip : lip; + _igvn.hash_delete(insert_pt); + + // all uses of insert_pt's memory state should use current's instead + for (DUIterator i = insert_pt->outs(); insert_pt->has_out(i); i++) { + Node* use = insert_pt->out(i); + if (use->is_Mem()) { + assert(use->in(MemNode::Memory) == insert_pt, "must be"); + _igvn.hash_delete(use); + use->set_req(MemNode::Memory, current); + _igvn._worklist.push(use); + --i; //deleted this edge; rescan position + } else if (!sched_up && use->is_Phi() && use->bottom_type() == Type::MEMORY) { + uint pos; //lip (lower insert point) must be the last one in the memory slice + _igvn.hash_delete(use); + for (pos=1; pos < use->req(); pos++) { + if (use->in(pos) == insert_pt) break; + } + use->set_req(pos, current); + _igvn._worklist.push(use); + --i; + } + } + + //connect current to insert_pt + current->set_req(MemNode::Memory, insert_pt); + _igvn._worklist.push(current); +} + +//------------------------------co_locate_pack---------------------------------- +// To schedule a store pack, we need to move any sandwiched memory ops either before +// or after the pack, based upon dependence information: +// (1) If any store in the pack depends on the sandwiched memory op, the +// sandwiched memory op must be scheduled BEFORE the pack; +// (2) If a sandwiched memory op depends on any store in the pack, the +// sandwiched memory op must be scheduled AFTER the pack; +// (3) If a sandwiched memory op (say, memA) depends on another sandwiched +// memory op (say memB), memB must be scheduled before memA. So, if memA is +// scheduled before the pack, memB must also be scheduled before the pack; +// (4) If there is no dependence restriction for a sandwiched memory op, we simply +// schedule this store AFTER the pack +// (5) We know there is no dependence cycle, so there in no other case; +// (6) Finally, all memory ops in another single pack should be moved in the same direction. +// +// To schedule a load pack: the memory edge of every loads in the pack must be +// the same as the memory edge of the last executed load in the pack void SuperWord::co_locate_pack(Node_List* pk) { if (pk->at(0)->is_Store()) { - // Push Stores down towards last executed pack member MemNode* first = executed_first(pk)->as_Mem(); MemNode* last = executed_last(pk)->as_Mem(); - MemNode* insert_pt = last; + Unique_Node_List schedule_before_pack; + Unique_Node_List memops; + MemNode* current = last->in(MemNode::Memory)->as_Mem(); + MemNode* previous = last; while (true) { assert(in_bb(current), "stay in block"); + memops.push(previous); + for (DUIterator i = current->outs(); current->has_out(i); i++) { + Node* use = current->out(i); + if (use->is_Mem() && use != previous) + memops.push(use); + } + if(current == first) break; + previous = current; + current = current->in(MemNode::Memory)->as_Mem(); + } + + // determine which memory operations should be scheduled before the pack + for (uint i = 1; i < memops.size(); i++) { + Node *s1 = memops.at(i); + if (!in_pack(s1, pk) && !schedule_before_pack.member(s1)) { + for (uint j = 0; j< i; j++) { + Node *s2 = memops.at(j); + if (!independent(s1, s2)) { + if (in_pack(s2, pk) || schedule_before_pack.member(s2)) { + schedule_before_pack.push(s1); //s1 must be scheduled before + Node_List* mem_pk = my_pack(s1); + if (mem_pk != NULL) { + for (uint ii = 0; ii < mem_pk->size(); ii++) { + Node* s = mem_pk->at(ii); // follow partner + if (memops.member(s) && !schedule_before_pack.member(s)) + schedule_before_pack.push(s); + } + } + } + } + } + } + } + + MemNode* lower_insert_pt = last; + Node* upper_insert_pt = first->in(MemNode::Memory); + previous = last; //previous store in pk + current = last->in(MemNode::Memory)->as_Mem(); + + //start scheduling from "last" to "first" + while (true) { + assert(in_bb(current), "stay in block"); + assert(in_pack(previous, pk), "previous stays in pack"); Node* my_mem = current->in(MemNode::Memory); + if (in_pack(current, pk)) { - // Forward users of my memory state to my input memory state + // Forward users of my memory state (except "previous) to my input memory state _igvn.hash_delete(current); - _igvn.hash_delete(my_mem); for (DUIterator i = current->outs(); current->has_out(i); i++) { Node* use = current->out(i); - if (use->is_Mem()) { + if (use->is_Mem() && use != previous) { assert(use->in(MemNode::Memory) == current, "must be"); _igvn.hash_delete(use); - use->set_req(MemNode::Memory, my_mem); + if (schedule_before_pack.member(use)) { + _igvn.hash_delete(upper_insert_pt); + use->set_req(MemNode::Memory, upper_insert_pt); + } else { + _igvn.hash_delete(lower_insert_pt); + use->set_req(MemNode::Memory, lower_insert_pt); + } _igvn._worklist.push(use); --i; // deleted this edge; rescan position } } - // put current immediately before insert_pt - current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory)); - _igvn.hash_delete(insert_pt); - insert_pt->set_req(MemNode::Memory, current); - _igvn._worklist.push(insert_pt); - _igvn._worklist.push(current); - insert_pt = current; + previous = current; + } else { // !in_pack(current, pk) ==> a sandwiched store + remove_and_insert(current, previous, lower_insert_pt, upper_insert_pt, schedule_before_pack); } + if (current == first) break; current = my_mem->as_Mem(); - } - } else if (pk->at(0)->is_Load()) { - // Pull Loads up towards first executed pack member - LoadNode* first = executed_first(pk)->as_Load(); - Node* first_mem = first->in(MemNode::Memory); - _igvn.hash_delete(first_mem); - // Give each load same memory state as first + } // end while + } else if (pk->at(0)->is_Load()) { //load + // all use the memory state that the last executed load uses + LoadNode* last_load = executed_last(pk)->as_Load(); + Node* last_mem = last_load->in(MemNode::Memory); + _igvn.hash_delete(last_mem); + // Give each load same memory state as last for (uint i = 0; i < pk->size(); i++) { LoadNode* ld = pk->at(i)->as_Load(); _igvn.hash_delete(ld); - ld->set_req(MemNode::Memory, first_mem); + ld->set_req(MemNode::Memory, last_mem); _igvn._worklist.push(ld); } } diff --git a/hotspot/src/share/vm/opto/superword.hpp b/hotspot/src/share/vm/opto/superword.hpp index 1c09607ed7d..4c11ff639b8 100644 --- a/hotspot/src/share/vm/opto/superword.hpp +++ b/hotspot/src/share/vm/opto/superword.hpp @@ -341,8 +341,11 @@ class SuperWord : public ResourceObj { void filter_packs(); // Adjust the memory graph for the packed operations void schedule(); - // Within a pack, move stores down to the last executed store, - // and move loads up to the first executed load. + // Remove "current" from its current position in the memory graph and insert + // it after the appropriate insert points (lip or uip); + void remove_and_insert(MemNode *current, MemNode *prev, MemNode *lip, Node *uip, Unique_Node_List &schd_before); + // Within a store pack, schedule stores together by moving out the sandwiched memory ops according + // to dependence info; and within a load pack, move loads down to the last executed load. void co_locate_pack(Node_List* p); // Convert packs into vector node operations void output(); diff --git a/hotspot/test/compiler/6636138/Test1.java b/hotspot/test/compiler/6636138/Test1.java new file mode 100644 index 00000000000..e01ab7f1e8d --- /dev/null +++ b/hotspot/test/compiler/6636138/Test1.java @@ -0,0 +1,67 @@ +/* + * Copyright 2009 Sun Microsystems, Inc. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, + * CA 95054 USA or visit www.sun.com if you need additional information or + * have any questions. + */ + +/** + * @test + * @bug 6636138 + * @summary SuperWord::co_locate_pack(Node_List* p) generates memory graph that leads to memory order violation. + * + * @run main/othervm -server -Xbatch -XX:CompileOnly=Test1.init -XX:+UseSuperword Test1 + */ + +class Test1 { + + public static void init(int src[], int [] dst, int[] ref) { + // initialize the arrays + for (int i =0; i 0; i--){ + int tmp = src[i]; + src[i] = src[i-1]; + src[i-1] = tmp; + } + } + + public static void verify(int src[]) { + for (int i = 0; i < src.length; i++){ + int value = (i-1 + src.length)%src.length; // correct value after shifting + if (src[i] != value) { + System.out.println("Error: src["+i+"] should be "+ value + " instead of " + src[i]); + System.exit(-1); + } + } + } + + public static void test() { + int[] src = new int[10]; + init(src); + shift(src); + verify(src); + } + + public static void main(String[] args) { + for (int i=0; i< 2000; i++) + test(); + } +} From a5396e1107cf5e3b154a1f52cafbb31a4a244574 Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Tue, 24 Mar 2009 15:09:52 -0700 Subject: [PATCH 04/15] 6820510: assertion failure with unloaded class in subnode.cpp Reviewed-by: kvn --- hotspot/src/share/vm/opto/subnode.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hotspot/src/share/vm/opto/subnode.cpp b/hotspot/src/share/vm/opto/subnode.cpp index 260b5dc8af8..d7c3cc87e2a 100644 --- a/hotspot/src/share/vm/opto/subnode.cpp +++ b/hotspot/src/share/vm/opto/subnode.cpp @@ -639,8 +639,8 @@ const Type *CmpPNode::sub( const Type *t1, const Type *t2 ) const { int kps = (p0->isa_klassptr()?1:0) + (p1->isa_klassptr()?1:0); if (klass0 && klass1 && kps != 1 && // both or neither are klass pointers - !klass0->is_interface() && // do not trust interfaces - !klass1->is_interface()) { + klass0->is_loaded() && !klass0->is_interface() && // do not trust interfaces + klass1->is_loaded() && !klass1->is_interface()) { bool unrelated_classes = false; // See if neither subclasses the other, or if the class on top // is precise. In either of these cases, the compare is known From 5231fcdc40072a17f40916d95b25e00b7740c16e Mon Sep 17 00:00:00 2001 From: Paul Hohensee Date: Tue, 24 Mar 2009 21:56:53 -0400 Subject: [PATCH 05/15] 6810653: Change String cache class used by Hotspot from String to StringValue Change create_vm() to load and initialize StringValue rather than String. Reviewed-by: kvn --- hotspot/src/share/vm/classfile/vmSymbols.hpp | 1 + hotspot/src/share/vm/runtime/thread.cpp | 22 +++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index b9c17c2e2d2..1d5b68f0da6 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -49,6 +49,7 @@ template(java_lang_Object, "java/lang/Object") \ template(java_lang_Class, "java/lang/Class") \ template(java_lang_String, "java/lang/String") \ + template(java_lang_StringValue, "java/lang/StringValue") \ template(java_lang_Thread, "java/lang/Thread") \ template(java_lang_ThreadGroup, "java/lang/ThreadGroup") \ template(java_lang_Cloneable, "java/lang/Cloneable") \ diff --git a/hotspot/src/share/vm/runtime/thread.cpp b/hotspot/src/share/vm/runtime/thread.cpp index 2e4c6360143..547ea7fe64c 100644 --- a/hotspot/src/share/vm/runtime/thread.cpp +++ b/hotspot/src/share/vm/runtime/thread.cpp @@ -3007,17 +3007,19 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) { } if (UseStringCache) { - // Forcibly initialize java/lang/String and mutate the private + // Forcibly initialize java/lang/StringValue and mutate the private // static final "stringCacheEnabled" field before we start creating instances - klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_lang_String(), Handle(), Handle(), CHECK_0); - KlassHandle k = KlassHandle(THREAD, k_o); - guarantee(k.not_null(), "Must find java/lang/String"); - instanceKlassHandle ik = instanceKlassHandle(THREAD, k()); - ik->initialize(CHECK_0); - fieldDescriptor fd; - // Possible we might not find this field; if so, don't break - if (ik->find_local_field(vmSymbols::stringCacheEnabled_name(), vmSymbols::bool_signature(), &fd)) { - k()->bool_field_put(fd.offset(), true); + klassOop k_o = SystemDictionary::resolve_or_null(vmSymbolHandles::java_lang_StringValue(), Handle(), Handle(), CHECK_0); + // Possible that StringValue isn't present: if so, silently don't break + if (k_o != NULL) { + KlassHandle k = KlassHandle(THREAD, k_o); + instanceKlassHandle ik = instanceKlassHandle(THREAD, k()); + ik->initialize(CHECK_0); + fieldDescriptor fd; + // Possible we might not find this field: if so, silently don't break + if (ik->find_local_field(vmSymbols::stringCacheEnabled_name(), vmSymbols::bool_signature(), &fd)) { + k()->bool_field_put(fd.offset(), true); + } } } } From cf2ae8d98d2d77d64aebe95b4c325da96c3f8bf9 Mon Sep 17 00:00:00 2001 From: Andrey Petrusenko Date: Wed, 25 Mar 2009 13:10:54 -0700 Subject: [PATCH 06/15] 6543938: G1: remove the concept of popularity Reviewed-by: iveresov, tonyp --- hotspot/src/cpu/sparc/vm/assembler_sparc.cpp | 11 +- .../g1/collectionSetChooser.cpp | 2 - .../gc_implementation/g1/g1CollectedHeap.cpp | 499 +----------------- .../gc_implementation/g1/g1CollectedHeap.hpp | 84 +-- .../g1/g1CollectorPolicy.cpp | 297 ++--------- .../g1/g1CollectorPolicy.hpp | 79 +-- .../vm/gc_implementation/g1/g1MarkSweep.cpp | 45 +- .../vm/gc_implementation/g1/g1RemSet.cpp | 7 +- .../gc_implementation/g1/g1RemSet.inline.hpp | 10 +- .../vm/gc_implementation/g1/g1_globals.hpp | 16 - .../vm/gc_implementation/g1/heapRegion.cpp | 4 - .../vm/gc_implementation/g1/heapRegion.hpp | 30 -- .../gc_implementation/g1/heapRegionRemSet.hpp | 46 +- .../vm/gc_implementation/g1/heapRegionSeq.cpp | 14 +- .../vm/gc_implementation/g1/heapRegionSeq.hpp | 5 +- .../gc_implementation/g1/vm_operations_g1.cpp | 9 +- .../gc_implementation/g1/vm_operations_g1.hpp | 14 - hotspot/src/share/vm/gc_interface/gcCause.hpp | 2 +- .../src/share/vm/runtime/vm_operations.hpp | 1 - 19 files changed, 101 insertions(+), 1074 deletions(-) diff --git a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp index 4a61d2f2c5d..82b03a7ccee 100644 --- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp @@ -4234,7 +4234,6 @@ void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offs static jint num_ct_writes = 0; static jint num_ct_writes_filtered_in_hr = 0; static jint num_ct_writes_filtered_null = 0; -static jint num_ct_writes_filtered_pop = 0; static G1CollectedHeap* g1 = NULL; static Thread* count_ct_writes(void* filter_val, void* new_val) { @@ -4247,25 +4246,19 @@ static Thread* count_ct_writes(void* filter_val, void* new_val) { if (g1 == NULL) { g1 = G1CollectedHeap::heap(); } - if ((HeapWord*)new_val < g1->popular_object_boundary()) { - Atomic::inc(&num_ct_writes_filtered_pop); - } } if ((num_ct_writes % 1000000) == 0) { jint num_ct_writes_filtered = num_ct_writes_filtered_in_hr + - num_ct_writes_filtered_null + - num_ct_writes_filtered_pop; + num_ct_writes_filtered_null; tty->print_cr("%d potential CT writes: %5.2f%% filtered\n" - " (%5.2f%% intra-HR, %5.2f%% null, %5.2f%% popular).", + " (%5.2f%% intra-HR, %5.2f%% null).", num_ct_writes, 100.0*(float)num_ct_writes_filtered/(float)num_ct_writes, 100.0*(float)num_ct_writes_filtered_in_hr/ (float)num_ct_writes, 100.0*(float)num_ct_writes_filtered_null/ - (float)num_ct_writes, - 100.0*(float)num_ct_writes_filtered_pop/ (float)num_ct_writes); } return Thread::current(); diff --git a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp index fbc5f4f151b..418dd584954 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/collectionSetChooser.cpp @@ -277,8 +277,6 @@ printHeapRegion(HeapRegion *hr) { gclog_or_tty->print("H: "); if (hr->in_collection_set()) gclog_or_tty->print("CS: "); - if (hr->popular()) - gclog_or_tty->print("pop: "); gclog_or_tty->print_cr("Region " PTR_FORMAT " (%s%s) " "[" PTR_FORMAT ", " PTR_FORMAT"] " "Used: " SIZE_FORMAT "K, garbage: " SIZE_FORMAT "K.", diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index 1966a7c59b4..a1e0262382c 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -42,21 +42,6 @@ // Local to this file. -// Finds the first HeapRegion. -// No longer used, but might be handy someday. - -class FindFirstRegionClosure: public HeapRegionClosure { - HeapRegion* _a_region; -public: - FindFirstRegionClosure() : _a_region(NULL) {} - bool doHeapRegion(HeapRegion* r) { - _a_region = r; - return true; - } - HeapRegion* result() { return _a_region; } -}; - - class RefineCardTableEntryClosure: public CardTableEntryClosure { SuspendibleThreadSet* _sts; G1RemSet* _g1rs; @@ -1207,13 +1192,12 @@ G1CollectedHeap::free_region_if_totally_empty_work(HeapRegion* hr, bool par) { assert(!hr->continuesHumongous(), "should have filtered these out"); size_t res = 0; - if (!hr->popular() && hr->used() > 0 && hr->garbage_bytes() == hr->used()) { - if (!hr->is_young()) { - if (G1PolicyVerbose > 0) - gclog_or_tty->print_cr("Freeing empty region "PTR_FORMAT "(" SIZE_FORMAT " bytes)" - " during cleanup", hr, hr->used()); - free_region_work(hr, pre_used, cleared_h, freed_regions, list, par); - } + if (hr->used() > 0 && hr->garbage_bytes() == hr->used() && + !hr->is_young()) { + if (G1PolicyVerbose > 0) + gclog_or_tty->print_cr("Freeing empty region "PTR_FORMAT "(" SIZE_FORMAT " bytes)" + " during cleanup", hr, hr->used()); + free_region_work(hr, pre_used, cleared_h, freed_regions, list, par); } } @@ -1342,10 +1326,6 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : _refine_cte_cl(NULL), _free_region_list(NULL), _free_region_list_size(0), _free_regions(0), - _popular_object_boundary(NULL), - _cur_pop_hr_index(0), - _popular_regions_to_be_evacuated(NULL), - _pop_obj_rc_at_copy(), _full_collection(false), _unclean_region_list(), _unclean_regions_coming(false), @@ -1520,26 +1500,11 @@ jint G1CollectedHeap::initialize() { _czft = new ConcurrentZFThread(); } - - - // Allocate the popular regions; take them off free lists. - size_t pop_byte_size = G1NumPopularRegions * HeapRegion::GrainBytes; - expand(pop_byte_size); - _popular_object_boundary = - _g1_reserved.start() + (G1NumPopularRegions * HeapRegion::GrainWords); - for (int i = 0; i < G1NumPopularRegions; i++) { - HeapRegion* hr = newAllocRegion(HeapRegion::GrainWords); - // assert(hr != NULL && hr->bottom() < _popular_object_boundary, - // "Should be enough, and all should be below boundary."); - hr->set_popular(true); - } - assert(_cur_pop_hr_index == 0, "Start allocating at the first region."); - // Initialize the from_card cache structure of HeapRegionRemSet. HeapRegionRemSet::init_heap(max_regions()); - // Now expand into the rest of the initial heap size. - expand(init_byte_size - pop_byte_size); + // Now expand into the initial heap size. + expand(init_byte_size); // Perform any initialization actions delegated to the policy. g1_policy()->init(); @@ -1654,8 +1619,7 @@ size_t G1CollectedHeap::recalculate_used() const { class SumUsedRegionsClosure: public HeapRegionClosure { size_t _num; public: - // _num is set to 1 to account for the popular region - SumUsedRegionsClosure() : _num(G1NumPopularRegions) {} + SumUsedRegionsClosure() : _num(0) {} bool doHeapRegion(HeapRegion* r) { if (r->continuesHumongous() || r->used() > 0 || r->is_gc_alloc_region()) { _num += 1; @@ -2318,9 +2282,6 @@ void G1CollectedHeap::print_tracing_info() const { if (SummarizeG1ZFStats) { ConcurrentZFThread::print_summary_info(); } - if (G1SummarizePopularity) { - print_popularity_summary_info(); - } g1_policy()->print_yg_surv_rate_info(); GCOverheadReporter::printGCOverhead(); @@ -2495,30 +2456,19 @@ G1CollectedHeap::cleanup_surviving_young_words() { // void -G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { +G1CollectedHeap::do_collection_pause_at_safepoint() { char verbose_str[128]; sprintf(verbose_str, "GC pause "); - if (popular_region != NULL) - strcat(verbose_str, "(popular)"); - else if (g1_policy()->in_young_gc_mode()) { + if (g1_policy()->in_young_gc_mode()) { if (g1_policy()->full_young_gcs()) strcat(verbose_str, "(young)"); else strcat(verbose_str, "(partial)"); } - bool reset_should_initiate_conc_mark = false; - if (popular_region != NULL && g1_policy()->should_initiate_conc_mark()) { - // we currently do not allow an initial mark phase to be piggy-backed - // on a popular pause - reset_should_initiate_conc_mark = true; - g1_policy()->unset_should_initiate_conc_mark(); - } if (g1_policy()->should_initiate_conc_mark()) strcat(verbose_str, " (initial-mark)"); - GCCauseSetter x(this, (popular_region == NULL ? - GCCause::_g1_inc_collection_pause : - GCCause::_g1_pop_region_collection_pause)); + GCCauseSetter x(this, GCCause::_g1_inc_collection_pause); // if PrintGCDetails is on, we'll print long statistics information // in the collector policy code, so let's not print this as the output @@ -2609,7 +2559,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { save_marks(); // We must do this before any possible evacuation that should propagate - // marks, including evacuation of popular objects in a popular pause. + // marks. if (mark_in_progress()) { double start_time_sec = os::elapsedTime(); @@ -2626,29 +2576,15 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { assert(regions_accounted_for(), "Region leakage."); - bool abandoned = false; - if (mark_in_progress()) concurrent_mark()->newCSet(); // Now choose the CS. - if (popular_region == NULL) { - g1_policy()->choose_collection_set(); - } else { - // We may be evacuating a single region (for popularity). - g1_policy()->record_popular_pause_preamble_start(); - popularity_pause_preamble(popular_region); - g1_policy()->record_popular_pause_preamble_end(); - abandoned = (g1_policy()->collection_set() == NULL); - // Now we allow more regions to be added (we have to collect - // all popular regions). - if (!abandoned) { - g1_policy()->choose_collection_set(popular_region); - } - } + g1_policy()->choose_collection_set(); + // We may abandon a pause if we find no region that will fit in the MMU // pause. - abandoned = (g1_policy()->collection_set() == NULL); + bool abandoned = (g1_policy()->collection_set() == NULL); // Nothing to do if we were unable to choose a collection set. if (!abandoned) { @@ -2673,12 +2609,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { _in_cset_fast_test = NULL; _in_cset_fast_test_base = NULL; - if (popular_region != NULL) { - // We have to wait until now, because we don't want the region to - // be rescheduled for pop-evac during RS update. - popular_region->set_popular_pending(false); - } - release_gc_alloc_regions(false /* totally */); cleanup_surviving_young_words(); @@ -2724,8 +2654,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS; g1_policy()->record_pause_time_ms(pause_time_ms); GCOverheadReporter::recordSTWEnd(end_time_sec); - g1_policy()->record_collection_pause_end(popular_region != NULL, - abandoned); + g1_policy()->record_collection_pause_end(abandoned); assert(regions_accounted_for(), "Region leakage."); @@ -2759,9 +2688,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(HeapRegion* popular_region) { assert(verify_region_lists(), "Bad region lists."); - if (reset_should_initiate_conc_mark) - g1_policy()->set_should_initiate_conc_mark(); - if (ExitAfterGCNum > 0 && total_collections() == ExitAfterGCNum) { gclog_or_tty->print_cr("Stopping after GC #%d", ExitAfterGCNum); print_tracing_info(); @@ -4707,7 +4633,6 @@ G1CollectedHeap::free_region_work(HeapRegion* hr, size_t& freed_regions, UncleanRegionList* list, bool par) { - assert(!hr->popular(), "should not free popular regions"); pre_used += hr->used(); if (hr->isHumongous()) { assert(hr->startsHumongous(), @@ -4791,12 +4716,6 @@ void G1CollectedHeap::cleanUpCardTable() { void G1CollectedHeap::do_collection_pause_if_appropriate(size_t word_size) { - // First do any popular regions. - HeapRegion* hr; - while ((hr = popular_region_to_evac()) != NULL) { - evac_popular_region(hr); - } - // Now do heuristic pauses. if (g1_policy()->should_do_collection_pause(word_size)) { do_collection_pause(); } @@ -5192,7 +5111,7 @@ class RegionCounter: public HeapRegionClosure { public: RegionCounter() : _n(0) {} bool doHeapRegion(HeapRegion* r) { - if (r->is_empty() && !r->popular()) { + if (r->is_empty()) { assert(!r->isHumongous(), "H regions should not be empty."); _n++; } @@ -5336,14 +5255,8 @@ public: r->set_zero_fill_allocated(); } else { assert(r->is_empty(), "tautology"); - if (r->popular()) { - if (r->zero_fill_state() != HeapRegion::Allocated) { - r->ensure_zero_filled_locked(); - r->set_zero_fill_allocated(); - } - } else { - _n++; - switch (r->zero_fill_state()) { + _n++; + switch (r->zero_fill_state()) { case HeapRegion::NotZeroFilled: case HeapRegion::ZeroFilling: _g1->put_region_on_unclean_list_locked(r); @@ -5354,7 +5267,6 @@ public: case HeapRegion::ZeroFilled: _g1->put_free_region_on_list_locked(r); break; - } } } return false; @@ -5402,376 +5314,6 @@ void G1CollectedHeap::set_used_regions_to_need_zero_fill() { heap_region_iterate(&rs); } -class CountObjClosure: public ObjectClosure { - size_t _n; -public: - CountObjClosure() : _n(0) {} - void do_object(oop obj) { _n++; } - size_t n() { return _n; } -}; - -size_t G1CollectedHeap::pop_object_used_objs() { - size_t sum_objs = 0; - for (int i = 0; i < G1NumPopularRegions; i++) { - CountObjClosure cl; - _hrs->at(i)->object_iterate(&cl); - sum_objs += cl.n(); - } - return sum_objs; -} - -size_t G1CollectedHeap::pop_object_used_bytes() { - size_t sum_bytes = 0; - for (int i = 0; i < G1NumPopularRegions; i++) { - sum_bytes += _hrs->at(i)->used(); - } - return sum_bytes; -} - - -static int nq = 0; - -HeapWord* G1CollectedHeap::allocate_popular_object(size_t word_size) { - while (_cur_pop_hr_index < G1NumPopularRegions) { - HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index); - HeapWord* res = cur_pop_region->allocate(word_size); - if (res != NULL) { - // We account for popular objs directly in the used summary: - _summary_bytes_used += (word_size * HeapWordSize); - return res; - } - // Otherwise, try the next region (first making sure that we remember - // the last "top" value as the "next_top_at_mark_start", so that - // objects made popular during markings aren't automatically considered - // live). - cur_pop_region->note_end_of_copying(); - // Otherwise, try the next region. - _cur_pop_hr_index++; - } - // XXX: For now !!! - vm_exit_out_of_memory(word_size, - "Not enough pop obj space (To Be Fixed)"); - return NULL; -} - -class HeapRegionList: public CHeapObj { - public: - HeapRegion* hr; - HeapRegionList* next; -}; - -void G1CollectedHeap::schedule_popular_region_evac(HeapRegion* r) { - // This might happen during parallel GC, so protect by this lock. - MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - // We don't schedule regions whose evacuations are already pending, or - // are already being evacuated. - if (!r->popular_pending() && !r->in_collection_set()) { - r->set_popular_pending(true); - if (G1TracePopularity) { - gclog_or_tty->print_cr("Scheduling region "PTR_FORMAT" " - "["PTR_FORMAT", "PTR_FORMAT") for pop-object evacuation.", - r, r->bottom(), r->end()); - } - HeapRegionList* hrl = new HeapRegionList; - hrl->hr = r; - hrl->next = _popular_regions_to_be_evacuated; - _popular_regions_to_be_evacuated = hrl; - } -} - -HeapRegion* G1CollectedHeap::popular_region_to_evac() { - MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag); - HeapRegion* res = NULL; - while (_popular_regions_to_be_evacuated != NULL && res == NULL) { - HeapRegionList* hrl = _popular_regions_to_be_evacuated; - _popular_regions_to_be_evacuated = hrl->next; - res = hrl->hr; - // The G1RSPopLimit may have increased, so recheck here... - if (res->rem_set()->occupied() < (size_t) G1RSPopLimit) { - // Hah: don't need to schedule. - if (G1TracePopularity) { - gclog_or_tty->print_cr("Unscheduling region "PTR_FORMAT" " - "["PTR_FORMAT", "PTR_FORMAT") " - "for pop-object evacuation (size %d < limit %d)", - res, res->bottom(), res->end(), - res->rem_set()->occupied(), G1RSPopLimit); - } - res->set_popular_pending(false); - res = NULL; - } - // We do not reset res->popular() here; if we did so, it would allow - // the region to be "rescheduled" for popularity evacuation. Instead, - // this is done in the collection pause, with the world stopped. - // So the invariant is that the regions in the list have the popularity - // boolean set, but having the boolean set does not imply membership - // on the list (though there can at most one such pop-pending region - // not on the list at any time). - delete hrl; - } - return res; -} - -void G1CollectedHeap::evac_popular_region(HeapRegion* hr) { - while (true) { - // Don't want to do a GC pause while cleanup is being completed! - wait_for_cleanup_complete(); - - // Read the GC count while holding the Heap_lock - int gc_count_before = SharedHeap::heap()->total_collections(); - g1_policy()->record_stop_world_start(); - - { - MutexUnlocker mu(Heap_lock); // give up heap lock, execute gets it back - VM_G1PopRegionCollectionPause op(gc_count_before, hr); - VMThread::execute(&op); - - // If the prolog succeeded, we didn't do a GC for this. - if (op.prologue_succeeded()) break; - } - // Otherwise we didn't. We should recheck the size, though, since - // the limit may have increased... - if (hr->rem_set()->occupied() < (size_t) G1RSPopLimit) { - hr->set_popular_pending(false); - break; - } - } -} - -void G1CollectedHeap::atomic_inc_obj_rc(oop obj) { - Atomic::inc(obj_rc_addr(obj)); -} - -class CountRCClosure: public OopsInHeapRegionClosure { - G1CollectedHeap* _g1h; - bool _parallel; -public: - CountRCClosure(G1CollectedHeap* g1h) : - _g1h(g1h), _parallel(ParallelGCThreads > 0) - {} - void do_oop(narrowOop* p) { - guarantee(false, "NYI"); - } - void do_oop(oop* p) { - oop obj = *p; - assert(obj != NULL, "Precondition."); - if (_parallel) { - // We go sticky at the limit to avoid excess contention. - // If we want to track the actual RC's further, we'll need to keep a - // per-thread hash table or something for the popular objects. - if (_g1h->obj_rc(obj) < G1ObjPopLimit) { - _g1h->atomic_inc_obj_rc(obj); - } - } else { - _g1h->inc_obj_rc(obj); - } - } -}; - -class EvacPopObjClosure: public ObjectClosure { - G1CollectedHeap* _g1h; - size_t _pop_objs; - size_t _max_rc; -public: - EvacPopObjClosure(G1CollectedHeap* g1h) : - _g1h(g1h), _pop_objs(0), _max_rc(0) {} - - void do_object(oop obj) { - size_t rc = _g1h->obj_rc(obj); - _max_rc = MAX2(rc, _max_rc); - if (rc >= (size_t) G1ObjPopLimit) { - _g1h->_pop_obj_rc_at_copy.add((double)rc); - size_t word_sz = obj->size(); - HeapWord* new_pop_loc = _g1h->allocate_popular_object(word_sz); - oop new_pop_obj = (oop)new_pop_loc; - Copy::aligned_disjoint_words((HeapWord*)obj, new_pop_loc, word_sz); - obj->forward_to(new_pop_obj); - G1ScanAndBalanceClosure scan_and_balance(_g1h); - new_pop_obj->oop_iterate_backwards(&scan_and_balance); - // preserve "next" mark bit if marking is in progress. - if (_g1h->mark_in_progress() && !_g1h->is_obj_ill(obj)) { - _g1h->concurrent_mark()->markAndGrayObjectIfNecessary(new_pop_obj); - } - - if (G1TracePopularity) { - gclog_or_tty->print_cr("Found obj " PTR_FORMAT " of word size " SIZE_FORMAT - " pop (%d), move to " PTR_FORMAT, - (void*) obj, word_sz, - _g1h->obj_rc(obj), (void*) new_pop_obj); - } - _pop_objs++; - } - } - size_t pop_objs() { return _pop_objs; } - size_t max_rc() { return _max_rc; } -}; - -class G1ParCountRCTask : public AbstractGangTask { - G1CollectedHeap* _g1h; - BitMap _bm; - - size_t getNCards() { - return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1) - / G1BlockOffsetSharedArray::N_bytes; - } - CountRCClosure _count_rc_closure; -public: - G1ParCountRCTask(G1CollectedHeap* g1h) : - AbstractGangTask("G1 Par RC Count task"), - _g1h(g1h), _bm(getNCards()), _count_rc_closure(g1h) - {} - - void work(int i) { - ResourceMark rm; - HandleMark hm; - _g1h->g1_rem_set()->oops_into_collection_set_do(&_count_rc_closure, i); - } -}; - -void G1CollectedHeap::popularity_pause_preamble(HeapRegion* popular_region) { - // We're evacuating a single region (for popularity). - if (G1TracePopularity) { - gclog_or_tty->print_cr("Doing pop region pause for ["PTR_FORMAT", "PTR_FORMAT")", - popular_region->bottom(), popular_region->end()); - } - g1_policy()->set_single_region_collection_set(popular_region); - size_t max_rc; - if (!compute_reference_counts_and_evac_popular(popular_region, - &max_rc)) { - // We didn't evacuate any popular objects. - // We increase the RS popularity limit, to prevent this from - // happening in the future. - if (G1RSPopLimit < (1 << 30)) { - G1RSPopLimit *= 2; - } - // For now, interesting enough for a message: -#if 1 - gclog_or_tty->print_cr("In pop region pause for ["PTR_FORMAT", "PTR_FORMAT"), " - "failed to find a pop object (max = %d).", - popular_region->bottom(), popular_region->end(), - max_rc); - gclog_or_tty->print_cr("Increased G1RSPopLimit to %d.", G1RSPopLimit); -#endif // 0 - // Also, we reset the collection set to NULL, to make the rest of - // the collection do nothing. - assert(popular_region->next_in_collection_set() == NULL, - "should be single-region."); - popular_region->set_in_collection_set(false); - popular_region->set_popular_pending(false); - g1_policy()->clear_collection_set(); - } -} - -bool G1CollectedHeap:: -compute_reference_counts_and_evac_popular(HeapRegion* popular_region, - size_t* max_rc) { - HeapWord* rc_region_bot; - HeapWord* rc_region_end; - - // Set up the reference count region. - HeapRegion* rc_region = newAllocRegion(HeapRegion::GrainWords); - if (rc_region != NULL) { - rc_region_bot = rc_region->bottom(); - rc_region_end = rc_region->end(); - } else { - rc_region_bot = NEW_C_HEAP_ARRAY(HeapWord, HeapRegion::GrainWords); - if (rc_region_bot == NULL) { - vm_exit_out_of_memory(HeapRegion::GrainWords, - "No space for RC region."); - } - rc_region_end = rc_region_bot + HeapRegion::GrainWords; - } - - if (G1TracePopularity) - gclog_or_tty->print_cr("RC region is ["PTR_FORMAT", "PTR_FORMAT")", - rc_region_bot, rc_region_end); - if (rc_region_bot > popular_region->bottom()) { - _rc_region_above = true; - _rc_region_diff = - pointer_delta(rc_region_bot, popular_region->bottom(), 1); - } else { - assert(rc_region_bot < popular_region->bottom(), "Can't be equal."); - _rc_region_above = false; - _rc_region_diff = - pointer_delta(popular_region->bottom(), rc_region_bot, 1); - } - g1_policy()->record_pop_compute_rc_start(); - // Count external references. - g1_rem_set()->prepare_for_oops_into_collection_set_do(); - if (ParallelGCThreads > 0) { - - set_par_threads(workers()->total_workers()); - G1ParCountRCTask par_count_rc_task(this); - workers()->run_task(&par_count_rc_task); - set_par_threads(0); - - } else { - CountRCClosure count_rc_closure(this); - g1_rem_set()->oops_into_collection_set_do(&count_rc_closure, 0); - } - g1_rem_set()->cleanup_after_oops_into_collection_set_do(); - g1_policy()->record_pop_compute_rc_end(); - - // Now evacuate popular objects. - g1_policy()->record_pop_evac_start(); - EvacPopObjClosure evac_pop_obj_cl(this); - popular_region->object_iterate(&evac_pop_obj_cl); - *max_rc = evac_pop_obj_cl.max_rc(); - - // Make sure the last "top" value of the current popular region is copied - // as the "next_top_at_mark_start", so that objects made popular during - // markings aren't automatically considered live. - HeapRegion* cur_pop_region = _hrs->at(_cur_pop_hr_index); - cur_pop_region->note_end_of_copying(); - - if (rc_region != NULL) { - free_region(rc_region); - } else { - FREE_C_HEAP_ARRAY(HeapWord, rc_region_bot); - } - g1_policy()->record_pop_evac_end(); - - return evac_pop_obj_cl.pop_objs() > 0; -} - -class CountPopObjInfoClosure: public HeapRegionClosure { - size_t _objs; - size_t _bytes; - - class CountObjClosure: public ObjectClosure { - int _n; - public: - CountObjClosure() : _n(0) {} - void do_object(oop obj) { _n++; } - size_t n() { return _n; } - }; - -public: - CountPopObjInfoClosure() : _objs(0), _bytes(0) {} - bool doHeapRegion(HeapRegion* r) { - _bytes += r->used(); - CountObjClosure blk; - r->object_iterate(&blk); - _objs += blk.n(); - return false; - } - size_t objs() { return _objs; } - size_t bytes() { return _bytes; } -}; - - -void G1CollectedHeap::print_popularity_summary_info() const { - CountPopObjInfoClosure blk; - for (int i = 0; i <= _cur_pop_hr_index; i++) { - blk.doHeapRegion(_hrs->at(i)); - } - gclog_or_tty->print_cr("\nPopular objects: %d objs, %d bytes.", - blk.objs(), blk.bytes()); - gclog_or_tty->print_cr(" RC at copy = [avg = %5.2f, max = %5.2f, sd = %5.2f].", - _pop_obj_rc_at_copy.avg(), - _pop_obj_rc_at_copy.maximum(), - _pop_obj_rc_at_copy.sd()); -} - void G1CollectedHeap::set_refine_cte_cl_concurrency(bool concurrent) { _refine_cte_cl->set_concurrent(concurrent); } @@ -5845,7 +5387,6 @@ bool G1CollectedHeap::regions_accounted_for() { } bool G1CollectedHeap::print_region_accounting_info() { - gclog_or_tty->print_cr("P regions: %d.", G1NumPopularRegions); gclog_or_tty->print_cr("Free regions: %d (count: %d count list %d) (clean: %d unclean: %d).", free_regions(), count_free_regions(), count_free_regions_list(), diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index e67e4d4cab3..9a3b0b6d9e9 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -29,7 +29,6 @@ class HeapRegion; class HeapRegionSeq; -class HeapRegionList; class PermanentGenerationSpec; class GenerationSpec; class OopsInHeapRegionClosure; @@ -143,7 +142,6 @@ class G1CollectedHeap : public SharedHeap { friend class VM_GenCollectForPermanentAllocation; friend class VM_G1CollectFull; friend class VM_G1IncCollectionPause; - friend class VM_G1PopRegionCollectionPause; friend class VMStructs; // Closures used in implementation. @@ -253,10 +251,6 @@ private: // than the current allocation region. size_t _summary_bytes_used; - // Summary information about popular objects; method to print it. - NumberSeq _pop_obj_rc_at_copy; - void print_popularity_summary_info() const; - // This is used for a quick test on whether a reference points into // the collection set or not. Basically, we have an array, with one // byte per region, and that byte denotes whether the corresponding @@ -447,10 +441,8 @@ protected: virtual void do_collection_pause(); // The guts of the incremental collection pause, executed by the vm - // thread. If "popular_region" is non-NULL, this pause should evacuate - // this single region whose remembered set has gotten large, moving - // any popular objects to one of the popular regions. - virtual void do_collection_pause_at_safepoint(HeapRegion* popular_region); + // thread. + virtual void do_collection_pause_at_safepoint(); // Actually do the work of evacuating the collection set. virtual void evacuate_collection_set(); @@ -625,67 +617,10 @@ protected: SubTasksDone* _process_strong_tasks; - // Allocate space to hold a popular object. Result is guaranteed below - // "popular_object_boundary()". Note: CURRENTLY halts the system if we - // run out of space to hold popular objects. - HeapWord* allocate_popular_object(size_t word_size); - - // The boundary between popular and non-popular objects. - HeapWord* _popular_object_boundary; - - HeapRegionList* _popular_regions_to_be_evacuated; - - // Compute which objects in "single_region" are popular. If any are, - // evacuate them to a popular region, leaving behind forwarding pointers, - // and select "popular_region" as the single collection set region. - // Otherwise, leave the collection set null. - void popularity_pause_preamble(HeapRegion* populer_region); - - // Compute which objects in "single_region" are popular, and evacuate - // them to a popular region, leaving behind forwarding pointers. - // Returns "true" if at least one popular object is discovered and - // evacuated. In any case, "*max_rc" is set to the maximum reference - // count of an object in the region. - bool compute_reference_counts_and_evac_popular(HeapRegion* populer_region, - size_t* max_rc); - // Subroutines used in the above. - bool _rc_region_above; - size_t _rc_region_diff; - jint* obj_rc_addr(oop obj) { - uintptr_t obj_addr = (uintptr_t)obj; - if (_rc_region_above) { - jint* res = (jint*)(obj_addr + _rc_region_diff); - assert((uintptr_t)res > obj_addr, "RC region is above."); - return res; - } else { - jint* res = (jint*)(obj_addr - _rc_region_diff); - assert((uintptr_t)res < obj_addr, "RC region is below."); - return res; - } - } - jint obj_rc(oop obj) { - return *obj_rc_addr(obj); - } - void inc_obj_rc(oop obj) { - (*obj_rc_addr(obj))++; - } - void atomic_inc_obj_rc(oop obj); - - - // Number of popular objects and bytes (latter is cheaper!). - size_t pop_object_used_objs(); - size_t pop_object_used_bytes(); - - // Index of the popular region in which allocation is currently being - // done. - int _cur_pop_hr_index; - // List of regions which require zero filling. UncleanRegionList _unclean_region_list; bool _unclean_regions_coming; - bool check_age_cohort_well_formed_work(int a, HeapRegion* hr); - public: void set_refine_cte_cl_concurrency(bool concurrent); @@ -1066,21 +1001,6 @@ public: // words. virtual size_t large_typearray_limit(); - // All popular objects are guaranteed to have addresses below this - // boundary. - HeapWord* popular_object_boundary() { - return _popular_object_boundary; - } - - // Declare the region as one that should be evacuated because its - // remembered set is too large. - void schedule_popular_region_evac(HeapRegion* r); - // If there is a popular region to evacuate it, remove it from the list - // and return it. - HeapRegion* popular_region_to_evac(); - // Evacuate the given popular region. - void evac_popular_region(HeapRegion* r); - // Returns "true" iff the given word_size is "very large". static bool isHumongous(size_t word_size) { return word_size >= VeryLargeInWords; diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp index 6147c8b6c67..d259ad38ea0 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp @@ -91,10 +91,8 @@ G1CollectorPolicy::G1CollectorPolicy() : _all_mod_union_times_ms(new NumberSeq()), - _non_pop_summary(new NonPopSummary()), - _pop_summary(new PopSummary()), - _non_pop_abandoned_summary(new NonPopAbandonedSummary()), - _pop_abandoned_summary(new PopAbandonedSummary()), + _summary(new Summary()), + _abandoned_summary(new AbandonedSummary()), _cur_clear_ct_time_ms(0.0), @@ -109,9 +107,6 @@ G1CollectorPolicy::G1CollectorPolicy() : _cur_aux_times_ms(new double[_aux_num]), _cur_aux_times_set(new bool[_aux_num]), - _pop_compute_rc_start(0.0), - _pop_evac_start(0.0), - _concurrent_mark_init_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _concurrent_mark_remark_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), _concurrent_mark_cleanup_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)), @@ -224,16 +219,6 @@ G1CollectorPolicy::G1CollectorPolicy() : _par_last_termination_times_ms = new double[_parallel_gc_threads]; - // we store the data from the first pass during popularity pauses - _pop_par_last_update_rs_start_times_ms = new double[_parallel_gc_threads]; - _pop_par_last_update_rs_times_ms = new double[_parallel_gc_threads]; - _pop_par_last_update_rs_processed_buffers = new double[_parallel_gc_threads]; - - _pop_par_last_scan_rs_start_times_ms = new double[_parallel_gc_threads]; - _pop_par_last_scan_rs_times_ms = new double[_parallel_gc_threads]; - - _pop_par_last_closure_app_times_ms = new double[_parallel_gc_threads]; - // start conservatively _expensive_region_limit_ms = 0.5 * (double) G1MaxPauseTimeMS; @@ -1047,23 +1032,6 @@ void G1CollectorPolicy::record_full_collection_end() { calculate_young_list_target_config(); } -void G1CollectorPolicy::record_pop_compute_rc_start() { - _pop_compute_rc_start = os::elapsedTime(); -} -void G1CollectorPolicy::record_pop_compute_rc_end() { - double ms = (os::elapsedTime() - _pop_compute_rc_start)*1000.0; - _cur_popular_compute_rc_time_ms = ms; - _pop_compute_rc_start = 0.0; -} -void G1CollectorPolicy::record_pop_evac_start() { - _pop_evac_start = os::elapsedTime(); -} -void G1CollectorPolicy::record_pop_evac_end() { - double ms = (os::elapsedTime() - _pop_evac_start)*1000.0; - _cur_popular_evac_time_ms = ms; - _pop_evac_start = 0.0; -} - void G1CollectorPolicy::record_before_bytes(size_t bytes) { _bytes_in_to_space_before_gc += bytes; } @@ -1120,13 +1088,6 @@ void G1CollectorPolicy::record_collection_pause_start(double start_time_sec, _par_last_scan_new_refs_times_ms[i] = -666.0; _par_last_obj_copy_times_ms[i] = -666.0; _par_last_termination_times_ms[i] = -666.0; - - _pop_par_last_update_rs_start_times_ms[i] = -666.0; - _pop_par_last_update_rs_times_ms[i] = -666.0; - _pop_par_last_update_rs_processed_buffers[i] = -666.0; - _pop_par_last_scan_rs_start_times_ms[i] = -666.0; - _pop_par_last_scan_rs_times_ms[i] = -666.0; - _pop_par_last_closure_app_times_ms[i] = -666.0; } #endif @@ -1185,25 +1146,6 @@ void G1CollectorPolicy::tag_scan_only(size_t short_lived_scan_only_length) { guarantee( false, "we should never reach here" ); } -void G1CollectorPolicy::record_popular_pause_preamble_start() { - _cur_popular_preamble_start_ms = os::elapsedTime() * 1000.0; -} - -void G1CollectorPolicy::record_popular_pause_preamble_end() { - _cur_popular_preamble_time_ms = - (os::elapsedTime() * 1000.0) - _cur_popular_preamble_start_ms; - - // copy the recorded statistics of the first pass to temporary arrays - for (int i = 0; i < _parallel_gc_threads; ++i) { - _pop_par_last_update_rs_start_times_ms[i] = _par_last_update_rs_start_times_ms[i]; - _pop_par_last_update_rs_times_ms[i] = _par_last_update_rs_times_ms[i]; - _pop_par_last_update_rs_processed_buffers[i] = _par_last_update_rs_processed_buffers[i]; - _pop_par_last_scan_rs_start_times_ms[i] = _par_last_scan_rs_start_times_ms[i]; - _pop_par_last_scan_rs_times_ms[i] = _par_last_scan_rs_times_ms[i]; - _pop_par_last_closure_app_times_ms[i] = _par_last_obj_copy_times_ms[i]; - } -} - void G1CollectorPolicy::record_mark_closure_time(double mark_closure_time_ms) { _mark_closure_time_ms = mark_closure_time_ms; } @@ -1465,8 +1407,7 @@ double G1CollectorPolicy::max_sum (double* data1, // Anything below that is considered to be zero #define MIN_TIMER_GRANULARITY 0.0000001 -void G1CollectorPolicy::record_collection_pause_end(bool popular, - bool abandoned) { +void G1CollectorPolicy::record_collection_pause_end(bool abandoned) { double end_time_sec = os::elapsedTime(); double elapsed_ms = _last_pause_time_ms; bool parallel = ParallelGCThreads > 0; @@ -1587,42 +1528,10 @@ void G1CollectorPolicy::record_collection_pause_end(bool popular, } PauseSummary* summary; - if (!abandoned && !popular) - summary = _non_pop_summary; - else if (!abandoned && popular) - summary = _pop_summary; - else if (abandoned && !popular) - summary = _non_pop_abandoned_summary; - else if (abandoned && popular) - summary = _pop_abandoned_summary; - else - guarantee(false, "should not get here!"); - - double pop_update_rs_time; - double pop_update_rs_processed_buffers; - double pop_scan_rs_time; - double pop_closure_app_time; - double pop_other_time; - - if (popular) { - PopPreambleSummary* preamble_summary = summary->pop_preamble_summary(); - guarantee(preamble_summary != NULL, "should not be null!"); - - pop_update_rs_time = avg_value(_pop_par_last_update_rs_times_ms); - pop_update_rs_processed_buffers = - sum_of_values(_pop_par_last_update_rs_processed_buffers); - pop_scan_rs_time = avg_value(_pop_par_last_scan_rs_times_ms); - pop_closure_app_time = avg_value(_pop_par_last_closure_app_times_ms); - pop_other_time = _cur_popular_preamble_time_ms - - (pop_update_rs_time + pop_scan_rs_time + pop_closure_app_time + - _cur_popular_evac_time_ms); - - preamble_summary->record_pop_preamble_time_ms(_cur_popular_preamble_time_ms); - preamble_summary->record_pop_update_rs_time_ms(pop_update_rs_time); - preamble_summary->record_pop_scan_rs_time_ms(pop_scan_rs_time); - preamble_summary->record_pop_closure_app_time_ms(pop_closure_app_time); - preamble_summary->record_pop_evacuation_time_ms(_cur_popular_evac_time_ms); - preamble_summary->record_pop_other_time_ms(pop_other_time); + if (abandoned) { + summary = _abandoned_summary; + } else { + summary = _summary; } double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms); @@ -1694,8 +1603,6 @@ void G1CollectorPolicy::record_collection_pause_end(bool popular, } double other_time_ms = elapsed_ms; - if (popular) - other_time_ms -= _cur_popular_preamble_time_ms; if (!abandoned) { if (_satb_drain_time_set) @@ -1712,41 +1619,24 @@ void G1CollectorPolicy::record_collection_pause_end(bool popular, if (PrintGCDetails) { gclog_or_tty->print_cr("%s%s, %1.8lf secs]", - (popular && !abandoned) ? " (popular)" : - (!popular && abandoned) ? " (abandoned)" : - (popular && abandoned) ? " (popular/abandoned)" : "", + abandoned ? " (abandoned)" : "", (last_pause_included_initial_mark) ? " (initial-mark)" : "", elapsed_ms / 1000.0); if (!abandoned) { - if (_satb_drain_time_set) + if (_satb_drain_time_set) { print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms); - if (_last_satb_drain_processed_buffers >= 0) + } + if (_last_satb_drain_processed_buffers >= 0) { print_stats(2, "Processed Buffers", _last_satb_drain_processed_buffers); - } - if (popular) - print_stats(1, "Popularity Preamble", _cur_popular_preamble_time_ms); - if (parallel) { - if (popular) { - print_par_stats(2, "Update RS (Start)", _pop_par_last_update_rs_start_times_ms, false); - print_par_stats(2, "Update RS", _pop_par_last_update_rs_times_ms); + } + if (parallel) { + print_stats(1, "Parallel Time", _cur_collection_par_time_ms); + print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); + print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); if (G1RSBarrierUseQueue) print_par_buffers(3, "Processed Buffers", - _pop_par_last_update_rs_processed_buffers, true); - print_par_stats(2, "Scan RS", _pop_par_last_scan_rs_times_ms); - print_par_stats(2, "Closure app", _pop_par_last_closure_app_times_ms); - print_stats(2, "Evacuation", _cur_popular_evac_time_ms); - print_stats(2, "Other", pop_other_time); - } - if (!abandoned) { - print_stats(1, "Parallel Time", _cur_collection_par_time_ms); - if (!popular) { - print_par_stats(2, "Update RS (Start)", _par_last_update_rs_start_times_ms, false); - print_par_stats(2, "Update RS", _par_last_update_rs_times_ms); - if (G1RSBarrierUseQueue) - print_par_buffers(3, "Processed Buffers", - _par_last_update_rs_processed_buffers, true); - } + _par_last_update_rs_processed_buffers, true); print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms); print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms); print_par_stats(2, "Scan-Only Scanning", _par_last_scan_only_times_ms); @@ -1757,25 +1647,11 @@ void G1CollectorPolicy::record_collection_pause_end(bool popular, print_par_stats(2, "Termination", _par_last_termination_times_ms); print_stats(2, "Other", parallel_other_time); print_stats(1, "Clear CT", _cur_clear_ct_time_ms); - } - } else { - if (popular) { - print_stats(2, "Update RS", pop_update_rs_time); + } else { + print_stats(1, "Update RS", update_rs_time); if (G1RSBarrierUseQueue) - print_stats(3, "Processed Buffers", - (int)pop_update_rs_processed_buffers); - print_stats(2, "Scan RS", pop_scan_rs_time); - print_stats(2, "Closure App", pop_closure_app_time); - print_stats(2, "Evacuation", _cur_popular_evac_time_ms); - print_stats(2, "Other", pop_other_time); - } - if (!abandoned) { - if (!popular) { - print_stats(1, "Update RS", update_rs_time); - if (G1RSBarrierUseQueue) - print_stats(2, "Processed Buffers", - (int)update_rs_processed_buffers); - } + print_stats(2, "Processed Buffers", + (int)update_rs_processed_buffers); print_stats(1, "Ext Root Scanning", ext_root_scan_time); print_stats(1, "Mark Stack Scanning", mark_stack_scan_time); print_stats(1, "Scan-Only Scanning", scan_only_time); @@ -1855,7 +1731,7 @@ void G1CollectorPolicy::record_collection_pause_end(bool popular, // - if (!popular && update_stats) { + if (update_stats) { double pause_time_ms = elapsed_ms; size_t diff = 0; @@ -2454,36 +2330,8 @@ void G1CollectorPolicy::check_other_times(int level, void G1CollectorPolicy::print_summary(PauseSummary* summary) const { bool parallel = ParallelGCThreads > 0; MainBodySummary* body_summary = summary->main_body_summary(); - PopPreambleSummary* preamble_summary = summary->pop_preamble_summary(); - if (summary->get_total_seq()->num() > 0) { - print_summary_sd(0, - (preamble_summary == NULL) ? "Non-Popular Pauses" : - "Popular Pauses", - summary->get_total_seq()); - if (preamble_summary != NULL) { - print_summary(1, "Popularity Preamble", - preamble_summary->get_pop_preamble_seq()); - print_summary(2, "Update RS", preamble_summary->get_pop_update_rs_seq()); - print_summary(2, "Scan RS", preamble_summary->get_pop_scan_rs_seq()); - print_summary(2, "Closure App", - preamble_summary->get_pop_closure_app_seq()); - print_summary(2, "Evacuation", - preamble_summary->get_pop_evacuation_seq()); - print_summary(2, "Other", preamble_summary->get_pop_other_seq()); - { - NumberSeq* other_parts[] = { - preamble_summary->get_pop_update_rs_seq(), - preamble_summary->get_pop_scan_rs_seq(), - preamble_summary->get_pop_closure_app_seq(), - preamble_summary->get_pop_evacuation_seq() - }; - NumberSeq calc_other_times_ms(preamble_summary->get_pop_preamble_seq(), - 4, other_parts); - check_other_times(2, preamble_summary->get_pop_other_seq(), - &calc_other_times_ms); - } - } + print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq()); if (body_summary != NULL) { print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq()); if (parallel) { @@ -2537,19 +2385,15 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const { // parallel NumberSeq* other_parts[] = { body_summary->get_satb_drain_seq(), - (preamble_summary == NULL) ? NULL : - preamble_summary->get_pop_preamble_seq(), body_summary->get_parallel_seq(), body_summary->get_clear_ct_seq() }; - calc_other_times_ms = NumberSeq (summary->get_total_seq(), - 4, other_parts); + calc_other_times_ms = NumberSeq(summary->get_total_seq(), + 3, other_parts); } else { // serial NumberSeq* other_parts[] = { body_summary->get_satb_drain_seq(), - (preamble_summary == NULL) ? NULL : - preamble_summary->get_pop_preamble_seq(), body_summary->get_update_rs_seq(), body_summary->get_ext_root_scan_seq(), body_summary->get_mark_stack_scan_seq(), @@ -2558,16 +2402,11 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const { body_summary->get_obj_copy_seq() }; calc_other_times_ms = NumberSeq(summary->get_total_seq(), - 8, other_parts); + 7, other_parts); } } else { // abandoned - NumberSeq* other_parts[] = { - (preamble_summary == NULL) ? NULL : - preamble_summary->get_pop_preamble_seq() - }; - calc_other_times_ms = NumberSeq(summary->get_total_seq(), - 1, other_parts); + calc_other_times_ms = NumberSeq(); } check_other_times(1, summary->get_other_seq(), &calc_other_times_ms); } @@ -2579,18 +2418,12 @@ void G1CollectorPolicy::print_summary(PauseSummary* summary) const { } void -G1CollectorPolicy::print_abandoned_summary(PauseSummary* non_pop_summary, - PauseSummary* pop_summary) const { +G1CollectorPolicy::print_abandoned_summary(PauseSummary* summary) const { bool printed = false; - if (non_pop_summary->get_total_seq()->num() > 0) { + if (summary->get_total_seq()->num() > 0) { printed = true; - print_summary(non_pop_summary); + print_summary(summary); } - if (pop_summary->get_total_seq()->num() > 0) { - printed = true; - print_summary(pop_summary); - } - if (!printed) { print_indent(0); gclog_or_tty->print_cr("none"); @@ -2608,15 +2441,11 @@ void G1CollectorPolicy::print_tracing_info() const { gclog_or_tty->print_cr(" Partial Young GC Pauses: %8d", _partial_young_pause_num); gclog_or_tty->print_cr(""); - gclog_or_tty->print_cr("NON-POPULAR PAUSES"); - print_summary(_non_pop_summary); - - gclog_or_tty->print_cr("POPULAR PAUSES"); - print_summary(_pop_summary); + gclog_or_tty->print_cr("EVACUATION PAUSES"); + print_summary(_summary); gclog_or_tty->print_cr("ABANDONED PAUSES"); - print_abandoned_summary(_non_pop_abandoned_summary, - _pop_abandoned_summary); + print_abandoned_summary(_abandoned_summary); gclog_or_tty->print_cr("MISC"); print_summary_sd(0, "Stop World", _all_stop_world_times_ms); @@ -2702,14 +2531,6 @@ void G1CollectorPolicy::update_conc_refine_data() { _conc_refine_enabled++; } -void G1CollectorPolicy::set_single_region_collection_set(HeapRegion* hr) { - assert(collection_set() == NULL, "Must be no current CS."); - _collection_set_size = 0; - _collection_set_bytes_used_before = 0; - add_to_collection_set(hr); - count_CS_bytes_used(); -} - bool G1CollectorPolicy::should_add_next_region_to_young_list() { assert(in_young_gc_mode(), "should be in young GC mode"); @@ -2787,15 +2608,6 @@ void G1CollectorPolicy::calculate_survivors_policy() } } - -void -G1CollectorPolicy_BestRegionsFirst:: -set_single_region_collection_set(HeapRegion* hr) { - G1CollectorPolicy::set_single_region_collection_set(hr); - _collectionSetChooser->removeRegion(hr); -} - - bool G1CollectorPolicy_BestRegionsFirst::should_do_collection_pause(size_t word_size) { @@ -3061,19 +2873,13 @@ add_to_collection_set(HeapRegion* hr) { void G1CollectorPolicy_BestRegionsFirst:: -choose_collection_set(HeapRegion* pop_region) { +choose_collection_set() { double non_young_start_time_sec; start_recording_regions(); - if (pop_region != NULL) { - _target_pause_time_ms = (double) G1MaxPauseTimeMS; - } else { - guarantee(_target_pause_time_ms > -1.0, - "_target_pause_time_ms should have been set!"); - } - - // pop region is either null (and so is CS), or else it *is* the CS. - assert(_collection_set == pop_region, "Precondition"); + guarantee(_target_pause_time_ms > -1.0, + "_target_pause_time_ms should have been set!"); + assert(_collection_set == NULL, "Precondition"); double base_time_ms = predict_base_elapsed_time_ms(_pending_cards); double predicted_pause_time_ms = base_time_ms; @@ -3100,15 +2906,13 @@ choose_collection_set(HeapRegion* pop_region) { size_t expansion_bytes = _g1->expansion_regions() * HeapRegion::GrainBytes; - if (pop_region == NULL) { - _collection_set_bytes_used_before = 0; - _collection_set_size = 0; - } + _collection_set_bytes_used_before = 0; + _collection_set_size = 0; // Adjust for expansion and slop. max_live_bytes = max_live_bytes + expansion_bytes; - assert(pop_region != NULL || _g1->regions_accounted_for(), "Region leakage!"); + assert(_g1->regions_accounted_for(), "Region leakage!"); HeapRegion* hr; if (in_young_gc_mode()) { @@ -3135,14 +2939,9 @@ choose_collection_set(HeapRegion* pop_region) { double predicted_time_ms = predict_region_elapsed_time_ms(hr, true); time_remaining_ms -= predicted_time_ms; predicted_pause_time_ms += predicted_time_ms; - if (hr == pop_region) { - // The popular region was young. Skip over it. - assert(hr->in_collection_set(), "It's the pop region."); - } else { - assert(!hr->in_collection_set(), "It's not the pop region."); - add_to_collection_set(hr); - record_cset_region(hr, true); - } + assert(!hr->in_collection_set(), "invariant"); + add_to_collection_set(hr); + record_cset_region(hr, true); max_live_bytes -= MIN2(hr->max_live_bytes(), max_live_bytes); if (G1PolicyVerbose > 0) { gclog_or_tty->print_cr(" Added [" PTR_FORMAT ", " PTR_FORMAT") to CS.", @@ -3165,10 +2964,6 @@ choose_collection_set(HeapRegion* pop_region) { // don't bother adding more regions... goto choose_collection_set_end; } - } else if (pop_region != NULL) { - // We're not in young mode, and we chose a popular region; don't choose - // any more. - return; } if (!in_young_gc_mode() || !full_young_gcs()) { @@ -3178,7 +2973,7 @@ choose_collection_set(HeapRegion* pop_region) { do { hr = _collectionSetChooser->getNextMarkedRegion(time_remaining_ms, avg_prediction); - if (hr != NULL && !hr->popular()) { + if (hr != NULL) { double predicted_time_ms = predict_region_elapsed_time_ms(hr, false); time_remaining_ms -= predicted_time_ms; predicted_pause_time_ms += predicted_time_ms; @@ -3225,8 +3020,8 @@ expand_if_possible(size_t numRegions) { } void G1CollectorPolicy_BestRegionsFirst:: -record_collection_pause_end(bool popular, bool abandoned) { - G1CollectorPolicy::record_collection_pause_end(popular, abandoned); +record_collection_pause_end(bool abandoned) { + G1CollectorPolicy::record_collection_pause_end(abandoned); assert(assertMarkedBytesDataOK(), "Marked regions not OK at pause end."); } diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp index 598a4018109..3043b7b674e 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp @@ -47,7 +47,6 @@ public: \ } class MainBodySummary; -class PopPreambleSummary; class PauseSummary: public CHeapObj { define_num_seq(total) @@ -55,7 +54,6 @@ class PauseSummary: public CHeapObj { public: virtual MainBodySummary* main_body_summary() { return NULL; } - virtual PopPreambleSummary* pop_preamble_summary() { return NULL; } }; class MainBodySummary: public CHeapObj { @@ -75,36 +73,13 @@ class MainBodySummary: public CHeapObj { define_num_seq(clear_ct) // parallel only }; -class PopPreambleSummary: public CHeapObj { - define_num_seq(pop_preamble) - define_num_seq(pop_update_rs) - define_num_seq(pop_scan_rs) - define_num_seq(pop_closure_app) - define_num_seq(pop_evacuation) - define_num_seq(pop_other) -}; - -class NonPopSummary: public PauseSummary, - public MainBodySummary { +class Summary: public PauseSummary, + public MainBodySummary { public: virtual MainBodySummary* main_body_summary() { return this; } }; -class PopSummary: public PauseSummary, - public MainBodySummary, - public PopPreambleSummary { -public: - virtual MainBodySummary* main_body_summary() { return this; } - virtual PopPreambleSummary* pop_preamble_summary() { return this; } -}; - -class NonPopAbandonedSummary: public PauseSummary { -}; - -class PopAbandonedSummary: public PauseSummary, - public PopPreambleSummary { -public: - virtual PopPreambleSummary* pop_preamble_summary() { return this; } +class AbandonedSummary: public PauseSummary { }; class G1CollectorPolicy: public CollectorPolicy { @@ -146,10 +121,6 @@ protected: double _cur_satb_drain_time_ms; double _cur_clear_ct_time_ms; bool _satb_drain_time_set; - double _cur_popular_preamble_start_ms; - double _cur_popular_preamble_time_ms; - double _cur_popular_compute_rc_time_ms; - double _cur_popular_evac_time_ms; double _cur_CH_strong_roots_end_sec; double _cur_CH_strong_roots_dur_ms; @@ -173,10 +144,8 @@ protected: TruncatedSeq* _concurrent_mark_remark_times_ms; TruncatedSeq* _concurrent_mark_cleanup_times_ms; - NonPopSummary* _non_pop_summary; - PopSummary* _pop_summary; - NonPopAbandonedSummary* _non_pop_abandoned_summary; - PopAbandonedSummary* _pop_abandoned_summary; + Summary* _summary; + AbandonedSummary* _abandoned_summary; NumberSeq* _all_pause_times_ms; NumberSeq* _all_full_gc_times_ms; @@ -210,18 +179,6 @@ protected: double* _par_last_obj_copy_times_ms; double* _par_last_termination_times_ms; - // there are two pases during popular pauses, so we need to store - // somewhere the results of the first pass - double* _pop_par_last_update_rs_start_times_ms; - double* _pop_par_last_update_rs_times_ms; - double* _pop_par_last_update_rs_processed_buffers; - double* _pop_par_last_scan_rs_start_times_ms; - double* _pop_par_last_scan_rs_times_ms; - double* _pop_par_last_closure_app_times_ms; - - double _pop_compute_rc_start; - double _pop_evac_start; - // indicates that we are in young GC mode bool _in_young_gc_mode; @@ -634,8 +591,7 @@ protected: NumberSeq* calc_other_times_ms) const; void print_summary (PauseSummary* stats) const; - void print_abandoned_summary(PauseSummary* non_pop_summary, - PauseSummary* pop_summary) const; + void print_abandoned_summary(PauseSummary* summary) const; void print_summary (int level, const char* str, NumberSeq* seq) const; void print_summary_sd (int level, const char* str, NumberSeq* seq) const; @@ -856,9 +812,6 @@ public: virtual void record_collection_pause_start(double start_time_sec, size_t start_used); - virtual void record_popular_pause_preamble_start(); - virtual void record_popular_pause_preamble_end(); - // Must currently be called while the world is stopped. virtual void record_concurrent_mark_init_start(); virtual void record_concurrent_mark_init_end(); @@ -881,7 +834,7 @@ public: virtual void record_collection_pause_end_CH_strong_roots(); virtual void record_collection_pause_end_G1_strong_roots(); - virtual void record_collection_pause_end(bool popular, bool abandoned); + virtual void record_collection_pause_end(bool abandoned); // Record the fact that a full collection occurred. virtual void record_full_collection_start(); @@ -990,12 +943,6 @@ public: _cur_aux_times_ms[i] += ms; } - void record_pop_compute_rc_start(); - void record_pop_compute_rc_end(); - - void record_pop_evac_start(); - void record_pop_evac_end(); - // Record the fact that "bytes" bytes allocated in a region. void record_before_bytes(size_t bytes); void record_after_bytes(size_t bytes); @@ -1008,9 +955,7 @@ public: // Choose a new collection set. Marks the chosen regions as being // "in_collection_set", and links them together. The head and number of // the collection set are available via access methods. - // If "pop_region" is non-NULL, it is a popular region that has already - // been added to the collection set. - virtual void choose_collection_set(HeapRegion* pop_region = NULL) = 0; + virtual void choose_collection_set() = 0; void clear_collection_set() { _collection_set = NULL; } @@ -1018,9 +963,6 @@ public: // current collection set. HeapRegion* collection_set() { return _collection_set; } - // Sets the collection set to the given single region. - virtual void set_single_region_collection_set(HeapRegion* hr); - // The number of elements in the current collection set. size_t collection_set_size() { return _collection_set_size; } @@ -1203,7 +1145,7 @@ class G1CollectorPolicy_BestRegionsFirst: public G1CollectorPolicy { // If the estimated is less then desirable, resize if possible. void expand_if_possible(size_t numRegions); - virtual void choose_collection_set(HeapRegion* pop_region = NULL); + virtual void choose_collection_set(); virtual void record_collection_pause_start(double start_time_sec, size_t start_used); virtual void record_concurrent_mark_cleanup_end(size_t freed_bytes, @@ -1214,9 +1156,8 @@ public: G1CollectorPolicy_BestRegionsFirst() { _collectionSetChooser = new CollectionSetChooser(); } - void record_collection_pause_end(bool popular, bool abandoned); + void record_collection_pause_end(bool abandoned); bool should_do_collection_pause(size_t word_size); - virtual void set_single_region_collection_set(HeapRegion* hr); // This is not needed any more, after the CSet choosing code was // changed to use the pause prediction work. But let's leave the // hook in just in case. diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp index 42d177a1e59..2e4ba2f9f48 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp @@ -157,7 +157,6 @@ void G1MarkSweep::mark_sweep_phase1(bool& marked_for_unloading, class G1PrepareCompactClosure: public HeapRegionClosure { ModRefBarrierSet* _mrbs; CompactPoint _cp; - bool _popular_only; void free_humongous_region(HeapRegion* hr) { HeapWord* bot = hr->bottom(); @@ -172,17 +171,11 @@ class G1PrepareCompactClosure: public HeapRegionClosure { } public: - G1PrepareCompactClosure(CompactibleSpace* cs, bool popular_only) : + G1PrepareCompactClosure(CompactibleSpace* cs) : _cp(NULL, cs, cs->initialize_threshold()), - _mrbs(G1CollectedHeap::heap()->mr_bs()), - _popular_only(popular_only) + _mrbs(G1CollectedHeap::heap()->mr_bs()) {} bool doHeapRegion(HeapRegion* hr) { - if (_popular_only && !hr->popular()) - return true; // terminate early - else if (!_popular_only && hr->popular()) - return false; // skip this one. - if (hr->isHumongous()) { if (hr->startsHumongous()) { oop obj = oop(hr->bottom()); @@ -203,20 +196,15 @@ public: return false; } }; -// Stolen verbatim from g1CollectedHeap.cpp + +// Finds the first HeapRegion. class FindFirstRegionClosure: public HeapRegionClosure { HeapRegion* _a_region; - bool _find_popular; public: - FindFirstRegionClosure(bool find_popular) : - _a_region(NULL), _find_popular(find_popular) {} + FindFirstRegionClosure() : _a_region(NULL) {} bool doHeapRegion(HeapRegion* r) { - if (r->popular() == _find_popular) { - _a_region = r; - return true; - } else { - return false; - } + _a_region = r; + return true; } HeapRegion* result() { return _a_region; } }; @@ -242,30 +230,15 @@ void G1MarkSweep::mark_sweep_phase2() { TraceTime tm("phase 2", PrintGC && Verbose, true, gclog_or_tty); GenMarkSweep::trace("2"); - // First we compact the popular regions. - if (G1NumPopularRegions > 0) { - CompactibleSpace* sp = g1h->first_compactible_space(); - FindFirstRegionClosure cl(true /*find_popular*/); - g1h->heap_region_iterate(&cl); - HeapRegion *r = cl.result(); - assert(r->popular(), "should have found a popular region."); - assert(r == sp, "first popular heap region should " - "== first compactible space"); - G1PrepareCompactClosure blk(sp, true/*popular_only*/); - g1h->heap_region_iterate(&blk); - } - - // Now we do the regular regions. - FindFirstRegionClosure cl(false /*find_popular*/); + FindFirstRegionClosure cl; g1h->heap_region_iterate(&cl); HeapRegion *r = cl.result(); - assert(!r->popular(), "should have founda non-popular region."); CompactibleSpace* sp = r; if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) { sp = r->next_compaction_space(); } - G1PrepareCompactClosure blk(sp, false/*popular_only*/); + G1PrepareCompactClosure blk(sp); g1h->heap_region_iterate(&blk); CompactPoint perm_cp(pg, NULL, NULL); diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp index d0482ea1054..f8674dd16c5 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.cpp @@ -580,9 +580,7 @@ public: virtual void do_oop(oop* p) { HeapRegion* to = _g1->heap_region_containing(*p); if (to->in_collection_set()) { - if (to->rem_set()->add_reference(p, 0)) { - _g1->schedule_popular_region_evac(to); - } + to->rem_set()->add_reference(p, 0); } } }; @@ -1024,9 +1022,8 @@ void HRInto_G1RemSet::print_summary_info() { gclog_or_tty->print_cr(" %d occupied cards represented.", blk.occupied()); gclog_or_tty->print_cr(" Max sz region = [" PTR_FORMAT ", " PTR_FORMAT " )" - " %s, cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.", + ", cap = " SIZE_FORMAT "K, occ = " SIZE_FORMAT "K.", blk.max_mem_sz_region()->bottom(), blk.max_mem_sz_region()->end(), - (blk.max_mem_sz_region()->popular() ? "POP" : ""), (blk.max_mem_sz_region()->rem_set()->mem_size() + K - 1)/K, (blk.max_mem_sz_region()->rem_set()->occupied() + K - 1)/K); gclog_or_tty->print_cr(" Did %d coarsenings.", diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp index 6a29f1775ec..00aa14452c2 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1RemSet.inline.hpp @@ -65,7 +65,6 @@ inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) { HeapRegion* to = _g1->heap_region_containing(obj); // The test below could be optimized by applying a bit op to to and from. if (to != NULL && from != NULL && from != to) { - bool update_delayed = false; // There is a tricky infinite loop if we keep pushing // self forwarding pointers onto our _new_refs list. // The _par_traversal_in_progress flag is true during the collection pause, @@ -77,10 +76,7 @@ inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) { // or processed (if an evacuation failure occurs) at the end // of the collection. // See HRInto_G1RemSet::cleanup_after_oops_into_collection_set_do(). - update_delayed = true; - } - - if (!to->popular() && !update_delayed) { + } else { #if G1_REM_SET_LOGGING gclog_or_tty->print_cr("Adding " PTR_FORMAT " (" PTR_FORMAT ") to RS" " for region [" PTR_FORMAT ", " PTR_FORMAT ")", @@ -88,9 +84,7 @@ inline void HRInto_G1RemSet::par_write_ref(HeapRegion* from, oop* p, int tid) { to->bottom(), to->end()); #endif assert(to->rem_set() != NULL, "Need per-region 'into' remsets."); - if (to->rem_set()->add_reference(p, tid)) { - _g1->schedule_popular_region_evac(to); - } + to->rem_set()->add_reference(p, tid); } } } diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp index 2b7a984a3fd..f6589e75c78 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1_globals.hpp @@ -185,15 +185,9 @@ product(intx, G1InefficientPausePct, 80, \ "Threshold of an 'inefficient' pauses (as % of cum efficiency.") \ \ - product(intx, G1RSPopLimit, 32768, \ - "Limit that defines popularity. Should go away! XXX") \ - \ develop(bool, G1RSCountHisto, false, \ "If true, print a histogram of RS occupancies after each pause") \ \ - product(intx, G1ObjPopLimit, 256, \ - "Limit that defines popularity for an object.") \ - \ product(bool, G1TraceFileOverwrite, false, \ "Allow the trace file to be overwritten") \ \ @@ -201,16 +195,6 @@ "When > 0, print the occupancies of the best and worst" \ "regions.") \ \ - develop(bool, G1TracePopularity, false, \ - "When true, provide detailed tracing of popularity.") \ - \ - product(bool, G1SummarizePopularity, false, \ - "When true, provide end-of-run-summarization of popularity.") \ - \ - product(intx, G1NumPopularRegions, 1, \ - "Number of regions reserved to hold popular objects. " \ - "Should go away later.") \ - \ develop(bool, G1PrintParCleanupStats, false, \ "When true, print extra stats about parallel cleanup.") \ \ diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp index 26817660e60..ee578bb2c4c 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp @@ -104,7 +104,6 @@ public: HeapRegion* to = _g1h->heap_region_containing(*p); if (from != NULL && to != NULL && from != to && - !to->popular() && !to->isHumongous()) { jbyte cv_obj = *_bs->byte_for_const(_containing_obj); jbyte cv_field = *_bs->byte_for_const(p); @@ -285,8 +284,6 @@ void HeapRegion::hr_clear(bool par, bool clear_space) { } zero_marked_bytes(); set_sort_index(-1); - if ((uintptr_t)bottom() >= (uintptr_t)g1h->popular_object_boundary()) - set_popular(false); _offsets.resize(HeapRegion::GrainWords); init_top_at_mark_start(); @@ -371,7 +368,6 @@ HeapRegion(G1BlockOffsetSharedArray* sharedOffsetArray, _next_in_special_set(NULL), _orig_end(NULL), _claimed(InitialClaimValue), _evacuation_failed(false), _prev_marked_bytes(0), _next_marked_bytes(0), _sort_index(-1), - _popularity(NotPopular), _young_type(NotYoung), _next_young_region(NULL), _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1), _rem_set(NULL), _zfs(NotZeroFilled) diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp index 0e71ead7c28..e0a801af30d 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp @@ -238,15 +238,6 @@ class HeapRegion: public G1OffsetTableContigSpace { // See "sort_index" method. -1 means is not in the array. int _sort_index; - // Means it has (or at least had) a very large RS, and should not be - // considered for membership in a collection set. - enum PopularityState { - NotPopular, - PopularPending, - Popular - }; - PopularityState _popularity; - // double _gc_efficiency; // @@ -433,10 +424,6 @@ class HeapRegion: public G1OffsetTableContigSpace { _next_in_special_set = r; } - bool is_reserved() { - return popular(); - } - bool is_on_free_list() { return _is_on_free_list; } @@ -609,23 +596,6 @@ class HeapRegion: public G1OffsetTableContigSpace { init_top_at_mark_start(); } - bool popular() { return _popularity == Popular; } - void set_popular(bool b) { - if (b) { - _popularity = Popular; - } else { - _popularity = NotPopular; - } - } - bool popular_pending() { return _popularity == PopularPending; } - void set_popular_pending(bool b) { - if (b) { - _popularity = PopularPending; - } else { - _popularity = NotPopular; - } - } - // void calc_gc_efficiency(void); double gc_efficiency() { return _gc_efficiency;} diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp index 042588458ef..aa8d3346fa9 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp @@ -188,32 +188,6 @@ private: // the _outgoing_region_map. void clear_outgoing_entries(); -#if MAYBE - // Audit the given card index. - void audit_card(size_t card_num, HeapRegion* hr, u2* rc_arr, - HeapRegionRemSet* empty_cards, size_t* one_obj_cards); - - // Assumes that "audit_stage1" has been called for "hr", to set up - // "shadow" and "new_rs" appropriately. Identifies individual popular - // objects; returns "true" if any are found. - bool audit_find_pop(HeapRegion* hr, u2* rc_arr); - - // Assumes that "audit_stage1" has been called for "hr", to set up - // "shadow" and "new_rs" appropriately. Identifies individual popular - // objects, and determines the number of entries in "new_rs" if any such - // popular objects are ignored. If this is sufficiently small, returns - // "false" to indicate that a constraint should not be introduced. - // Otherwise, returns "true" to indicate that we should go ahead with - // adding the constraint. - bool audit_stag(HeapRegion* hr, u2* rc_arr); - - - u2* alloc_rc_array(); - - SeqHeapRegionRemSet* audit_post(u2* rc_arr, size_t multi_obj_crds, - SeqHeapRegionRemSet* empty_cards); -#endif - enum ParIterState { Unclaimed, Claimed, Complete }; ParIterState _iter_state; @@ -261,16 +235,14 @@ public: /* Used in the sequential case. Returns "true" iff this addition causes the size limit to be reached. */ - bool add_reference(oop* from) { + void add_reference(oop* from) { _other_regions.add_reference(from); - return false; } /* Used in the parallel case. Returns "true" iff this addition causes the size limit to be reached. */ - bool add_reference(oop* from, int tid) { + void add_reference(oop* from, int tid) { _other_regions.add_reference(from, tid); - return false; } // Records the fact that the current region contains an outgoing @@ -338,20 +310,6 @@ public: } void print() const; -#if MAYBE - // We are about to introduce a constraint, requiring the collection time - // of the region owning this RS to be <= "hr", and forgetting pointers - // from the owning region to "hr." Before doing so, examines this rem - // set for pointers to "hr", possibly identifying some popular objects., - // and possibly finding some cards to no longer contain pointers to "hr", - // - // These steps may prevent the the constraint from being necessary; in - // which case returns a set of cards now thought to contain no pointers - // into HR. In the normal (I assume) case, returns NULL, indicating that - // we should go ahead and add the constraint. - virtual SeqHeapRegionRemSet* audit(HeapRegion* hr) = 0; -#endif - // Called during a stop-world phase to perform any deferred cleanups. // The second version may be called by parallel threads after then finish // collection work. diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp index 915cd439336..4e89c8cf979 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.cpp @@ -74,7 +74,6 @@ HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) { // [first, cur) HeapRegion* curhr = _regions.at(cur); if (curhr->is_empty() - && !curhr->is_reserved() && (first == cur || (_regions.at(cur-1)->end() == curhr->bottom()))) { @@ -121,35 +120,27 @@ HeapRegionSeq::alloc_obj_from_region_index(int ind, size_t word_size) { } } -void HeapRegionSeq::print_empty_runs(bool reserved_are_empty) { +void HeapRegionSeq::print_empty_runs() { int empty_run = 0; int n_empty = 0; - bool at_least_one_reserved = false; int empty_run_start; for (int i = 0; i < _regions.length(); i++) { HeapRegion* r = _regions.at(i); if (r->continuesHumongous()) continue; - if (r->is_empty() && (reserved_are_empty || !r->is_reserved())) { + if (r->is_empty()) { assert(!r->isHumongous(), "H regions should not be empty."); if (empty_run == 0) empty_run_start = i; empty_run++; n_empty++; - if (r->is_reserved()) { - at_least_one_reserved = true; - } } else { if (empty_run > 0) { gclog_or_tty->print(" %d:%d", empty_run_start, empty_run); - if (reserved_are_empty && at_least_one_reserved) - gclog_or_tty->print("(R)"); empty_run = 0; - at_least_one_reserved = false; } } } if (empty_run > 0) { gclog_or_tty->print(" %d:%d", empty_run_start, empty_run); - if (reserved_are_empty && at_least_one_reserved) gclog_or_tty->print("(R)"); } gclog_or_tty->print_cr(" [tot = %d]", n_empty); } @@ -193,7 +184,6 @@ size_t HeapRegionSeq::free_suffix() { int cur = first; while (cur >= 0 && (_regions.at(cur)->is_empty() - && !_regions.at(cur)->is_reserved() && (first == cur || (_regions.at(cur+1)->bottom() == _regions.at(cur)->end())))) { diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp index 6ddec8d3fc4..6eee58edcd3 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSeq.hpp @@ -104,8 +104,7 @@ class HeapRegionSeq: public CHeapObj { void print(); - // Prints out runs of empty regions. If the arg is "true" reserved - // (popular regions are considered "empty". - void print_empty_runs(bool reserved_are_empty); + // Prints out runs of empty regions. + void print_empty_runs(); }; diff --git a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp index e5753d53b42..40af9313c33 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.cpp @@ -43,16 +43,9 @@ void VM_G1IncCollectionPause::doit() { JvmtiGCForAllocationMarker jgcm; G1CollectedHeap* g1h = G1CollectedHeap::heap(); GCCauseSetter x(g1h, GCCause::_g1_inc_collection_pause); - g1h->do_collection_pause_at_safepoint(NULL); + g1h->do_collection_pause_at_safepoint(); } -void VM_G1PopRegionCollectionPause::doit() { - JvmtiGCForAllocationMarker jgcm; - G1CollectedHeap* g1h = G1CollectedHeap::heap(); - g1h->do_collection_pause_at_safepoint(_pop_region); -} - - void VM_CGC_Operation::doit() { gclog_or_tty->date_stamp(PrintGC && PrintGCDateStamps); TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty); diff --git a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp index a914cea3b48..47eb146e7de 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/vm_operations_g1.hpp @@ -77,20 +77,6 @@ class VM_G1IncCollectionPause: public VM_GC_Operation { } }; -class VM_G1PopRegionCollectionPause: public VM_GC_Operation { - HeapRegion* _pop_region; - public: - VM_G1PopRegionCollectionPause(int gc_count_before, HeapRegion* pop_region) : - VM_GC_Operation(gc_count_before), - _pop_region(pop_region) - {} - virtual VMOp_Type type() const { return VMOp_G1PopRegionCollectionPause; } - virtual void doit(); - virtual const char* name() const { - return "garbage-first popular region collection pause"; - } -}; - // Concurrent GC stop-the-world operations such as initial and final mark; // consider sharing these with CMS's counterparts. class VM_CGC_Operation: public VM_Operation { diff --git a/hotspot/src/share/vm/gc_interface/gcCause.hpp b/hotspot/src/share/vm/gc_interface/gcCause.hpp index bae001c9ca7..da23c8e70d4 100644 --- a/hotspot/src/share/vm/gc_interface/gcCause.hpp +++ b/hotspot/src/share/vm/gc_interface/gcCause.hpp @@ -60,7 +60,7 @@ class GCCause : public AllStatic { _old_generation_too_full_to_scavenge, _adaptive_size_policy, - _g1_inc_collection_pause, _g1_pop_region_collection_pause, + _g1_inc_collection_pause, _last_ditch_collection, _last_gc_cause diff --git a/hotspot/src/share/vm/runtime/vm_operations.hpp b/hotspot/src/share/vm/runtime/vm_operations.hpp index a2ed20b3b02..8f6e114623b 100644 --- a/hotspot/src/share/vm/runtime/vm_operations.hpp +++ b/hotspot/src/share/vm/runtime/vm_operations.hpp @@ -59,7 +59,6 @@ template(G1CollectFull) \ template(G1CollectForAllocation) \ template(G1IncCollectionPause) \ - template(G1PopRegionCollectionPause) \ template(EnableBiasedLocking) \ template(RevokeBias) \ template(BulkRevokeBias) \ From 74e0691df55a1820a3f8de3f782b642b22b4611d Mon Sep 17 00:00:00 2001 From: Igor Veresov Date: Thu, 26 Mar 2009 08:51:32 -0700 Subject: [PATCH 07/15] 6822263: G1: JVMTI heap iteration fails Make object_iterate() traverse the perm gen Reviewed-by: apetrusenko, tonyp --- .../gc_implementation/g1/g1CollectedHeap.cpp | 17 +++++++++++++---- .../gc_implementation/g1/g1CollectedHeap.hpp | 19 +++++++++++++++---- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp index a1e0262382c..fb4a0c79868 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp @@ -1722,14 +1722,20 @@ public: } }; -void G1CollectedHeap::oop_iterate(OopClosure* cl) { +void G1CollectedHeap::oop_iterate(OopClosure* cl, bool do_perm) { IterateOopClosureRegionClosure blk(_g1_committed, cl); _hrs->iterate(&blk); + if (do_perm) { + perm_gen()->oop_iterate(cl); + } } -void G1CollectedHeap::oop_iterate(MemRegion mr, OopClosure* cl) { +void G1CollectedHeap::oop_iterate(MemRegion mr, OopClosure* cl, bool do_perm) { IterateOopClosureRegionClosure blk(mr, cl); _hrs->iterate(&blk); + if (do_perm) { + perm_gen()->oop_iterate(cl); + } } // Iterates an ObjectClosure over all objects within a HeapRegion. @@ -1746,9 +1752,12 @@ public: } }; -void G1CollectedHeap::object_iterate(ObjectClosure* cl) { +void G1CollectedHeap::object_iterate(ObjectClosure* cl, bool do_perm) { IterateObjectClosureRegionClosure blk(cl); _hrs->iterate(&blk); + if (do_perm) { + perm_gen()->object_iterate(cl); + } } void G1CollectedHeap::object_iterate_since_last_GC(ObjectClosure* cl) { @@ -2375,7 +2384,7 @@ G1CollectedHeap::checkConcurrentMark() { VerifyMarkedObjsClosure verifycl(this); // MutexLockerEx x(getMarkBitMapLock(), // Mutex::_no_safepoint_check_flag); - object_iterate(&verifycl); + object_iterate(&verifycl, false); } void G1CollectedHeap::do_sync_mark() { diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp index 9a3b0b6d9e9..c0eca678db8 100644 --- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp @@ -865,14 +865,25 @@ public: // Iterate over all the ref-containing fields of all objects, calling // "cl.do_oop" on each. - virtual void oop_iterate(OopClosure* cl); + virtual void oop_iterate(OopClosure* cl) { + oop_iterate(cl, true); + } + void oop_iterate(OopClosure* cl, bool do_perm); // Same as above, restricted to a memory region. - virtual void oop_iterate(MemRegion mr, OopClosure* cl); + virtual void oop_iterate(MemRegion mr, OopClosure* cl) { + oop_iterate(mr, cl, true); + } + void oop_iterate(MemRegion mr, OopClosure* cl, bool do_perm); // Iterate over all objects, calling "cl.do_object" on each. - virtual void object_iterate(ObjectClosure* cl); - virtual void safe_object_iterate(ObjectClosure* cl) { object_iterate(cl); } + virtual void object_iterate(ObjectClosure* cl) { + object_iterate(cl, true); + } + virtual void safe_object_iterate(ObjectClosure* cl) { + object_iterate(cl, true); + } + void object_iterate(ObjectClosure* cl, bool do_perm); // Iterate over all objects allocated since the last collection, calling // "cl.do_object" on each. The heap must have been initialized properly From 134debb0ba7e772ea8c54f500d052a22f869a67f Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Thu, 26 Mar 2009 14:31:45 -0700 Subject: [PATCH 08/15] 6822204: volatile fences should prefer lock:addl to actual mfence instructions Reviewed-by: kvn, phh --- .../src/cpu/sparc/vm/stubGenerator_sparc.cpp | 16 ------- hotspot/src/cpu/x86/vm/assembler_x86.cpp | 24 ++-------- hotspot/src/cpu/x86/vm/assembler_x86.hpp | 24 ++++++---- .../src/cpu/x86/vm/stubGenerator_x86_64.cpp | 2 +- hotspot/src/cpu/x86/vm/x86_32.ad | 46 ++++++++---------- hotspot/src/cpu/x86/vm/x86_64.ad | 48 ++++++------------- .../os_cpu/linux_sparc/vm/os_linux_sparc.hpp | 2 - .../vm/orderAccess_linux_x86.inline.hpp | 5 +- .../vm/orderAccess_solaris_sparc.inline.hpp | 12 ----- .../solaris_sparc/vm/os_solaris_sparc.cpp | 16 ------- .../solaris_sparc/vm/os_solaris_sparc.hpp | 2 - .../vm/orderAccess_solaris_x86.inline.hpp | 5 +- .../os_cpu/solaris_x86/vm/os_solaris_x86.cpp | 16 ------- .../os_cpu/solaris_x86/vm/os_solaris_x86.hpp | 2 - .../vm/orderAccess_windows_x86.inline.hpp | 2 +- .../os_cpu/windows_x86/vm/os_windows_x86.cpp | 17 ------- .../os_cpu/windows_x86/vm/os_windows_x86.hpp | 5 -- hotspot/src/share/vm/includeDB_core | 2 + hotspot/src/share/vm/runtime/orderAccess.cpp | 12 +++++ hotspot/src/share/vm/runtime/orderAccess.hpp | 6 +++ 20 files changed, 81 insertions(+), 183 deletions(-) diff --git a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp index e4a3806da86..62c201605e5 100644 --- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp @@ -817,21 +817,6 @@ class StubGenerator: public StubCodeGenerator { Label _atomic_add_stub; // called from other stubs - // Support for void OrderAccess::fence(). - // - address generate_fence() { - StubCodeMark mark(this, "StubRoutines", "fence"); - address start = __ pc(); - - __ membar(Assembler::Membar_mask_bits(Assembler::LoadLoad | Assembler::LoadStore | - Assembler::StoreLoad | Assembler::StoreStore)); - __ retl(false); - __ delayed()->nop(); - - return start; - } - - //------------------------------------------------------------------------------------------------------------------------ // The following routine generates a subroutine to throw an asynchronous // UnknownError when an unsafe access gets a fault that could not be @@ -2861,7 +2846,6 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_atomic_cmpxchg_ptr_entry = StubRoutines::_atomic_cmpxchg_entry; StubRoutines::_atomic_cmpxchg_long_entry = generate_atomic_cmpxchg_long(); StubRoutines::_atomic_add_ptr_entry = StubRoutines::_atomic_add_entry; - StubRoutines::_fence_entry = generate_fence(); #endif // COMPILER2 !=> _LP64 } diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index 351ae044728..dbf2f2b664a 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -1438,26 +1438,12 @@ void Assembler::lock() { } } -// Serializes memory. +// Emit mfence instruction void Assembler::mfence() { - // Memory barriers are only needed on multiprocessors - if (os::is_MP()) { - if( LP64_ONLY(true ||) VM_Version::supports_sse2() ) { - emit_byte( 0x0F ); // MFENCE; faster blows no regs - emit_byte( 0xAE ); - emit_byte( 0xF0 ); - } else { - // All usable chips support "locked" instructions which suffice - // as barriers, and are much faster than the alternative of - // using cpuid instruction. We use here a locked add [esp],0. - // This is conveniently otherwise a no-op except for blowing - // flags (which we save and restore.) - pushf(); // Save eflags register - lock(); - addl(Address(rsp, 0), 0);// Assert the lock# signal here - popf(); // Restore eflags register - } - } + NOT_LP64(assert(VM_Version::supports_sse2(), "unsupported");) + emit_byte( 0x0F ); + emit_byte( 0xAE ); + emit_byte( 0xF0 ); } void Assembler::mov(Register dst, Register src) { diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index 4dfe7fec22e..a5efad8d22b 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -1068,15 +1068,23 @@ private: LoadLoad = 1 << 0 }; - // Serializes memory. + // Serializes memory and blows flags void membar(Membar_mask_bits order_constraint) { - // We only have to handle StoreLoad and LoadLoad - if (order_constraint & StoreLoad) { - // MFENCE subsumes LFENCE - mfence(); - } /* [jk] not needed currently: else if (order_constraint & LoadLoad) { - lfence(); - } */ + if (os::is_MP()) { + // We only have to handle StoreLoad + if (order_constraint & StoreLoad) { + // All usable chips support "locked" instructions which suffice + // as barriers, and are much faster than the alternative of + // using cpuid instruction. We use here a locked add [esp],0. + // This is conveniently otherwise a no-op except for blowing + // flags. + // Any change to this code may need to revisit other places in + // the code where this idiom is used, in particular the + // orderAccess code. + lock(); + addl(Address(rsp, 0), 0);// Assert the lock# signal here + } + } } void mfence(); diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 73d60542a4d..ec322b527d1 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -637,7 +637,7 @@ class StubGenerator: public StubCodeGenerator { address generate_orderaccess_fence() { StubCodeMark mark(this, "StubRoutines", "orderaccess_fence"); address start = __ pc(); - __ mfence(); + __ membar(Assembler::StoreLoad); __ ret(0); return start; diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index 479adb1cbfd..cd64cfbf9f2 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -4288,24 +4288,6 @@ encode %{ emit_opcode(cbuf, 0xC8 + $src2$$reg); %} - enc_class enc_membar_acquire %{ - // Doug Lea believes this is not needed with current Sparcs and TSO. - // MacroAssembler masm(&cbuf); - // masm.membar(); - %} - - enc_class enc_membar_release %{ - // Doug Lea believes this is not needed with current Sparcs and TSO. - // MacroAssembler masm(&cbuf); - // masm.membar(); - %} - - enc_class enc_membar_volatile %{ - MacroAssembler masm(&cbuf); - masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad | - Assembler::StoreStore)); - %} - // Atomically load the volatile long enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{ emit_opcode(cbuf,0xDF); @@ -7498,9 +7480,9 @@ instruct membar_acquire() %{ ins_cost(400); size(0); - format %{ "MEMBAR-acquire" %} - ins_encode( enc_membar_acquire ); - ins_pipe(pipe_slow); + format %{ "MEMBAR-acquire ! (empty encoding)" %} + ins_encode(); + ins_pipe(empty); %} instruct membar_acquire_lock() %{ @@ -7519,9 +7501,9 @@ instruct membar_release() %{ ins_cost(400); size(0); - format %{ "MEMBAR-release" %} - ins_encode( enc_membar_release ); - ins_pipe(pipe_slow); + format %{ "MEMBAR-release ! (empty encoding)" %} + ins_encode( ); + ins_pipe(empty); %} instruct membar_release_lock() %{ @@ -7535,12 +7517,22 @@ instruct membar_release_lock() %{ ins_pipe(empty); %} -instruct membar_volatile() %{ +instruct membar_volatile(eFlagsReg cr) %{ match(MemBarVolatile); + effect(KILL cr); ins_cost(400); - format %{ "MEMBAR-volatile" %} - ins_encode( enc_membar_volatile ); + format %{ + $$template + if (os::is_MP()) { + $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile" + } else { + $$emit$$"MEMBAR-volatile ! (empty encoding)" + } + %} + ins_encode %{ + __ membar(Assembler::StoreLoad); + %} ins_pipe(pipe_slow); %} diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad index 0705c2a009e..ae23fef1114 100644 --- a/hotspot/src/cpu/x86/vm/x86_64.ad +++ b/hotspot/src/cpu/x86/vm/x86_64.ad @@ -4162,33 +4162,6 @@ encode %{ // done: %} - enc_class enc_membar_acquire - %{ - // [jk] not needed currently, if you enable this and it really - // emits code don't forget to the remove the "size(0)" line in - // membar_acquire() - // MacroAssembler masm(&cbuf); - // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore | - // Assembler::LoadLoad)); - %} - - enc_class enc_membar_release - %{ - // [jk] not needed currently, if you enable this and it really - // emits code don't forget to the remove the "size(0)" line in - // membar_release() - // MacroAssembler masm(&cbuf); - // masm.membar(Assembler::Membar_mask_bits(Assembler::LoadStore | - // Assembler::StoreStore)); - %} - - enc_class enc_membar_volatile - %{ - MacroAssembler masm(&cbuf); - masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad | - Assembler::StoreStore)); - %} - // Safepoint Poll. This polls the safepoint page, and causes an // exception if it is not readable. Unfortunately, it kills // RFLAGS in the process. @@ -7458,7 +7431,7 @@ instruct membar_acquire() ins_cost(0); size(0); - format %{ "MEMBAR-acquire" %} + format %{ "MEMBAR-acquire ! (empty encoding)" %} ins_encode(); ins_pipe(empty); %} @@ -7481,7 +7454,7 @@ instruct membar_release() ins_cost(0); size(0); - format %{ "MEMBAR-release" %} + format %{ "MEMBAR-release ! (empty encoding)" %} ins_encode(); ins_pipe(empty); %} @@ -7498,13 +7471,22 @@ instruct membar_release_lock() ins_pipe(empty); %} -instruct membar_volatile() -%{ +instruct membar_volatile(rFlagsReg cr) %{ match(MemBarVolatile); + effect(KILL cr); ins_cost(400); - format %{ "MEMBAR-volatile" %} - ins_encode(enc_membar_volatile); + format %{ + $$template + if (os::is_MP()) { + $$emit$$"lock addl [rsp + #0], 0\t! membar_volatile" + } else { + $$emit$$"MEMBAR-volatile ! (empty encoding)" + } + %} + ins_encode %{ + __ membar(Assembler::StoreLoad); + %} ins_pipe(pipe_slow); %} diff --git a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp index 4c74a6af6ab..865cd1ce0f1 100644 --- a/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp +++ b/hotspot/src/os_cpu/linux_sparc/vm/os_linux_sparc.hpp @@ -29,13 +29,11 @@ static jint (*atomic_cmpxchg_func) (jint, volatile jint*, jint); static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong); static jint (*atomic_add_func) (jint, volatile jint*); - static void (*fence_func) (); static jint atomic_xchg_bootstrap (jint, volatile jint*); static jint atomic_cmpxchg_bootstrap (jint, volatile jint*, jint); static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong); static jint atomic_add_bootstrap (jint, volatile jint*); - static void fence_bootstrap (); static void setup_fpu() {} diff --git a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp index 2b5f3ec0ac5..9777b0a131d 100644 --- a/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp +++ b/hotspot/src/os_cpu/linux_x86/vm/orderAccess_linux_x86.inline.hpp @@ -44,11 +44,12 @@ inline void OrderAccess::release() { inline void OrderAccess::fence() { if (os::is_MP()) { + // always use locked addl since mfence is sometimes expensive #ifdef AMD64 - __asm__ __volatile__ ("mfence":::"memory"); + __asm__ volatile ("lock; addl $0,0(%%rsp)" : : : "cc", "memory"); #else __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory"); -#endif // AMD64 +#endif } } diff --git a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp index c80d89157e4..5e27aefb977 100644 --- a/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp +++ b/hotspot/src/os_cpu/solaris_sparc/vm/orderAccess_solaris_sparc.inline.hpp @@ -60,22 +60,10 @@ inline void OrderAccess::release() { dummy = 0; } -#if defined(COMPILER2) || defined(_LP64) - inline void OrderAccess::fence() { _OrderAccess_fence(); } -#else // defined(COMPILER2) || defined(_LP64) - -inline void OrderAccess::fence() { - if (os::is_MP()) { - (*os::fence_func)(); - } -} - -#endif // defined(COMPILER2) || defined(_LP64) - #endif // _GNU_SOURCE inline jbyte OrderAccess::load_acquire(volatile jbyte* p) { return *p; } diff --git a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp index 802934511af..44b67e9d035 100644 --- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp +++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.cpp @@ -619,7 +619,6 @@ typedef jint xchg_func_t (jint, volatile jint*); typedef jint cmpxchg_func_t (jint, volatile jint*, jint); typedef jlong cmpxchg_long_func_t(jlong, volatile jlong*, jlong); typedef jint add_func_t (jint, volatile jint*); -typedef void fence_func_t (); jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint* dest) { // try to use the stub: @@ -681,25 +680,10 @@ jint os::atomic_add_bootstrap(jint add_value, volatile jint* dest) { return (*dest) += add_value; } -void os::fence_bootstrap() { - // try to use the stub: - fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry()); - - if (func != NULL) { - os::fence_func = func; - (*func)(); - return; - } - assert(Threads::number_of_threads() == 0, "for bootstrap only"); - - // don't have to do anything for a single thread -} - xchg_func_t* os::atomic_xchg_func = os::atomic_xchg_bootstrap; cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap; cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap; add_func_t* os::atomic_add_func = os::atomic_add_bootstrap; -fence_func_t* os::fence_func = os::fence_bootstrap; #endif // !_LP64 && !COMPILER2 diff --git a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp index f522b038507..62fee83dd25 100644 --- a/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp +++ b/hotspot/src/os_cpu/solaris_sparc/vm/os_solaris_sparc.hpp @@ -29,13 +29,11 @@ static jint (*atomic_cmpxchg_func) (jint, volatile jint*, jint); static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong); static jint (*atomic_add_func) (jint, volatile jint*); - static void (*fence_func) (); static jint atomic_xchg_bootstrap (jint, volatile jint*); static jint atomic_cmpxchg_bootstrap (jint, volatile jint*, jint); static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong); static jint atomic_add_bootstrap (jint, volatile jint*); - static void fence_bootstrap (); static void setup_fpu() {} diff --git a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp index ed8486f746f..bf4d97d21b4 100644 --- a/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp +++ b/hotspot/src/os_cpu/solaris_x86/vm/orderAccess_solaris_x86.inline.hpp @@ -61,11 +61,8 @@ extern "C" { #endif // AMD64 } inline void _OrderAccess_fence() { -#ifdef AMD64 - __asm__ __volatile__ ("mfence":::"memory"); -#else + // Always use locked addl since mfence is sometimes expensive __asm__ volatile ("lock; addl $0,0(%%esp)" : : : "cc", "memory"); -#endif // AMD64 } } diff --git a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp index e6b1edca5a4..59ed458b23f 100644 --- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp +++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.cpp @@ -794,7 +794,6 @@ typedef jint xchg_func_t (jint, volatile jint*); typedef jint cmpxchg_func_t (jint, volatile jint*, jint); typedef jlong cmpxchg_long_func_t(jlong, volatile jlong*, jlong); typedef jint add_func_t (jint, volatile jint*); -typedef void fence_func_t (); jint os::atomic_xchg_bootstrap(jint exchange_value, volatile jint* dest) { // try to use the stub: @@ -856,25 +855,10 @@ jint os::atomic_add_bootstrap(jint add_value, volatile jint* dest) { return (*dest) += add_value; } -void os::fence_bootstrap() { - // try to use the stub: - fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry()); - - if (func != NULL) { - os::fence_func = func; - (*func)(); - return; - } - assert(Threads::number_of_threads() == 0, "for bootstrap only"); - - // don't have to do anything for a single thread -} - xchg_func_t* os::atomic_xchg_func = os::atomic_xchg_bootstrap; cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap; cmpxchg_long_func_t* os::atomic_cmpxchg_long_func = os::atomic_cmpxchg_long_bootstrap; add_func_t* os::atomic_add_func = os::atomic_add_bootstrap; -fence_func_t* os::fence_func = os::fence_bootstrap; extern "C" _solaris_raw_setup_fpu(address ptr); void os::setup_fpu() { diff --git a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp index fd5707cbe37..3a02d11965d 100644 --- a/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp +++ b/hotspot/src/os_cpu/solaris_x86/vm/os_solaris_x86.hpp @@ -32,13 +32,11 @@ static jint (*atomic_cmpxchg_func) (jint, volatile jint*, jint); static jlong (*atomic_cmpxchg_long_func)(jlong, volatile jlong*, jlong); static jint (*atomic_add_func) (jint, volatile jint*); - static void (*fence_func) (); static jint atomic_xchg_bootstrap (jint, volatile jint*); static jint atomic_cmpxchg_bootstrap (jint, volatile jint*, jint); static jlong atomic_cmpxchg_long_bootstrap(jlong, volatile jlong*, jlong); static jint atomic_add_bootstrap (jint, volatile jint*); - static void fence_bootstrap (); static void setup_fpu(); #endif // AMD64 diff --git a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp index b0a98bb0bab..1e53ed1aaa1 100644 --- a/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp +++ b/hotspot/src/os_cpu/windows_x86/vm/orderAccess_windows_x86.inline.hpp @@ -46,7 +46,7 @@ inline void OrderAccess::release() { inline void OrderAccess::fence() { #ifdef AMD64 - (*os::fence_func)(); + StubRoutines_fence(); #else if (os::is_MP()) { __asm { diff --git a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp index 27b6af946d6..e322f5fd1e8 100644 --- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp +++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.cpp @@ -196,7 +196,6 @@ typedef jint cmpxchg_func_t (jint, volatile jint*, jint); typedef jlong cmpxchg_long_func_t (jlong, volatile jlong*, jlong); typedef jint add_func_t (jint, volatile jint*); typedef intptr_t add_ptr_func_t (intptr_t, volatile intptr_t*); -typedef void fence_func_t (); #ifdef AMD64 @@ -292,27 +291,11 @@ intptr_t os::atomic_add_ptr_bootstrap(intptr_t add_value, volatile intptr_t* des return (*dest) += add_value; } -void os::fence_bootstrap() { - // try to use the stub: - fence_func_t* func = CAST_TO_FN_PTR(fence_func_t*, StubRoutines::fence_entry()); - - if (func != NULL) { - os::fence_func = func; - (*func)(); - return; - } - assert(Threads::number_of_threads() == 0, "for bootstrap only"); - - // don't have to do anything for a single thread -} - - xchg_func_t* os::atomic_xchg_func = os::atomic_xchg_bootstrap; xchg_ptr_func_t* os::atomic_xchg_ptr_func = os::atomic_xchg_ptr_bootstrap; cmpxchg_func_t* os::atomic_cmpxchg_func = os::atomic_cmpxchg_bootstrap; add_func_t* os::atomic_add_func = os::atomic_add_bootstrap; add_ptr_func_t* os::atomic_add_ptr_func = os::atomic_add_ptr_bootstrap; -fence_func_t* os::fence_func = os::fence_bootstrap; #endif // AMD64 diff --git a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp index d7578101677..1e0c6b334b5 100644 --- a/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp +++ b/hotspot/src/os_cpu/windows_x86/vm/os_windows_x86.hpp @@ -35,9 +35,6 @@ static jint (*atomic_add_func) (jint, volatile jint*); static intptr_t (*atomic_add_ptr_func) (intptr_t, volatile intptr_t*); - static void (*fence_func) (); - - static jint atomic_xchg_bootstrap (jint, volatile jint*); static intptr_t atomic_xchg_ptr_bootstrap (intptr_t, volatile intptr_t*); @@ -53,8 +50,6 @@ #ifdef AMD64 static jint atomic_add_bootstrap (jint, volatile jint*); static intptr_t atomic_add_ptr_bootstrap (intptr_t, volatile intptr_t*); - - static void fence_bootstrap (); #endif // AMD64 static void setup_fpu(); diff --git a/hotspot/src/share/vm/includeDB_core b/hotspot/src/share/vm/includeDB_core index b75a1ab39a8..a88800f29f0 100644 --- a/hotspot/src/share/vm/includeDB_core +++ b/hotspot/src/share/vm/includeDB_core @@ -3154,6 +3154,8 @@ oopsHierarchy.cpp thread.hpp oopsHierarchy.cpp thread_.inline.hpp orderAccess.cpp orderAccess.hpp +orderAccess.cpp stubRoutines.hpp +orderAccess.cpp thread.hpp orderAccess.hpp allocation.hpp orderAccess.hpp os.hpp diff --git a/hotspot/src/share/vm/runtime/orderAccess.cpp b/hotspot/src/share/vm/runtime/orderAccess.cpp index 392b5978126..1e66d6258f5 100644 --- a/hotspot/src/share/vm/runtime/orderAccess.cpp +++ b/hotspot/src/share/vm/runtime/orderAccess.cpp @@ -26,3 +26,15 @@ # include "incls/_orderAccess.cpp.incl" volatile intptr_t OrderAccess::dummy = 0; + +void OrderAccess::StubRoutines_fence() { + // Use a stub if it exists. It may not exist during bootstrap so do + // nothing in that case but assert if no fence code exists after threads have been created + void (*func)() = CAST_TO_FN_PTR(void (*)(), StubRoutines::fence_entry()); + + if (func != NULL) { + (*func)(); + return; + } + assert(Threads::number_of_threads() == 0, "for bootstrap only"); +} diff --git a/hotspot/src/share/vm/runtime/orderAccess.hpp b/hotspot/src/share/vm/runtime/orderAccess.hpp index c51a9229735..d6b83466da5 100644 --- a/hotspot/src/share/vm/runtime/orderAccess.hpp +++ b/hotspot/src/share/vm/runtime/orderAccess.hpp @@ -300,4 +300,10 @@ class OrderAccess : AllStatic { // In order to force a memory access, implementations may // need a volatile externally visible dummy variable. static volatile intptr_t dummy; + + private: + // This is a helper that invokes the StubRoutines::fence_entry() + // routine if it exists, It should only be used by platforms that + // don't another way to do the inline eassembly. + static void StubRoutines_fence(); }; From 68cf08d2c36e55b68aa5aea0b30fedceba4582c6 Mon Sep 17 00:00:00 2001 From: Vladimir Kozlov Date: Thu, 26 Mar 2009 15:04:55 -0700 Subject: [PATCH 09/15] 6810845: Performance regression in mpegaudio on x64 Used the outer loop frequency in frequencies checks in RA. Reviewed-by: never, twisti --- hotspot/src/share/vm/opto/block.hpp | 2 ++ hotspot/src/share/vm/opto/c2_globals.hpp | 4 ++-- hotspot/src/share/vm/opto/chaitin.cpp | 3 +++ hotspot/src/share/vm/opto/chaitin.hpp | 4 ++++ hotspot/src/share/vm/opto/coalesce.cpp | 4 ++-- hotspot/src/share/vm/opto/gcm.cpp | 12 ++++++++++++ hotspot/src/share/vm/opto/machnode.cpp | 5 +++++ 7 files changed, 30 insertions(+), 4 deletions(-) diff --git a/hotspot/src/share/vm/opto/block.hpp b/hotspot/src/share/vm/opto/block.hpp index f4c46ba2a50..aac5105d66a 100644 --- a/hotspot/src/share/vm/opto/block.hpp +++ b/hotspot/src/share/vm/opto/block.hpp @@ -371,6 +371,7 @@ class PhaseCFG : public Phase { Block *_broot; // Basic block of root uint _rpo_ctr; CFGLoop* _root_loop; + float _outer_loop_freq; // Outmost loop frequency // Per node latency estimation, valid only during GCM GrowableArray _node_latency; @@ -537,6 +538,7 @@ class CFGLoop : public CFGElement { void compute_loop_depth(int depth); void compute_freq(); // compute frequency with loop assuming head freq 1.0f void scale_freq(); // scale frequency by loop trip count (including outer loops) + float outer_loop_freq() const; // frequency of outer loop bool in_loop_nest(Block* b); float trip_count() const { return 1.0f / _exit_prob; } virtual bool is_loop() { return true; } diff --git a/hotspot/src/share/vm/opto/c2_globals.hpp b/hotspot/src/share/vm/opto/c2_globals.hpp index 6734321d8da..425a66a42c2 100644 --- a/hotspot/src/share/vm/opto/c2_globals.hpp +++ b/hotspot/src/share/vm/opto/c2_globals.hpp @@ -391,7 +391,7 @@ product(intx, EliminateAllocationArraySizeLimit, 64, \ "Array size (number of elements) limit for scalar replacement") \ \ - product(bool, UseOptoBiasInlining, true, \ + product(bool, UseOptoBiasInlining, true, \ "Generate biased locking code in C2 ideal graph") \ \ product(intx, ValueSearchLimit, 1000, \ @@ -410,7 +410,7 @@ "Miniumum %% of a successor (predecessor) for which block layout "\ "a will allow a fork (join) in a single chain") \ \ - product(bool, BlockLayoutRotateLoops, false, \ + product(bool, BlockLayoutRotateLoops, true, \ "Allow back branches to be fall throughs in the block layour") \ C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG) diff --git a/hotspot/src/share/vm/opto/chaitin.cpp b/hotspot/src/share/vm/opto/chaitin.cpp index e890c4ae3ce..4ad220d41f5 100644 --- a/hotspot/src/share/vm/opto/chaitin.cpp +++ b/hotspot/src/share/vm/opto/chaitin.cpp @@ -149,6 +149,9 @@ PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher) #endif { NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); ) + + _high_frequency_lrg = MIN2(float(OPTO_LRG_HIGH_FREQ), _cfg._outer_loop_freq); + uint i,j; // Build a list of basic blocks, sorted by frequency _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks ); diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp index 0de7dd41eaa..1cea37e34d1 100644 --- a/hotspot/src/share/vm/opto/chaitin.hpp +++ b/hotspot/src/share/vm/opto/chaitin.hpp @@ -338,6 +338,8 @@ class PhaseChaitin : public PhaseRegAlloc { Block **_blks; // Array of blocks sorted by frequency for coalescing + float _high_frequency_lrg; // Frequency at which LRG will be spilled for debug info + #ifndef PRODUCT bool _trace_spilling; #endif @@ -360,6 +362,8 @@ public: uint n2lidx( const Node *n ) const { return _names[n->_idx]; } + float high_frequency_lrg() const { return _high_frequency_lrg; } + #ifndef PRODUCT bool trace_spilling() const { return _trace_spilling; } #endif diff --git a/hotspot/src/share/vm/opto/coalesce.cpp b/hotspot/src/share/vm/opto/coalesce.cpp index 52c00992719..99584ca165c 100644 --- a/hotspot/src/share/vm/opto/coalesce.cpp +++ b/hotspot/src/share/vm/opto/coalesce.cpp @@ -473,7 +473,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) { } // End of is two-adr // Insert a copy at a debug use for a lrg which has high frequency - if( (b->_freq < OPTO_DEBUG_SPLIT_FREQ) && n->is_MachSafePoint() ) { + if( b->_freq < OPTO_DEBUG_SPLIT_FREQ || b->is_uncommon(_phc._cfg._bbs) ) { // Walk the debug inputs to the node and check for lrg freq JVMState* jvms = n->jvms(); uint debug_start = jvms ? jvms->debug_start() : 999999; @@ -487,7 +487,7 @@ void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) { LRG &lrg = lrgs(nidx); // If this lrg has a high frequency use/def - if( lrg._maxfreq >= OPTO_LRG_HIGH_FREQ ) { + if( lrg._maxfreq >= _phc.high_frequency_lrg() ) { // If the live range is also live out of this block (like it // would be for a fast/slow idiom), the normal spill mechanism // does an excellent job. If it is not live out of this block diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp index 85263fcb389..df648455117 100644 --- a/hotspot/src/share/vm/opto/gcm.cpp +++ b/hotspot/src/share/vm/opto/gcm.cpp @@ -1374,6 +1374,9 @@ void PhaseCFG::Estimate_Block_Frequency() { _root_loop->_freq = 1.0; _root_loop->scale_freq(); + // Save outmost loop frequency for LRG frequency threshold + _outer_loop_freq = _root_loop->outer_loop_freq(); + // force paths ending at uncommon traps to be infrequent if (!C->do_freq_based_layout()) { Block_List worklist; @@ -1898,6 +1901,7 @@ bool CFGLoop::in_loop_nest(Block* b) { // Do a top down traversal of loop tree (visit outer loops first.) void CFGLoop::scale_freq() { float loop_freq = _freq * trip_count(); + _freq = loop_freq; for (int i = 0; i < _members.length(); i++) { CFGElement* s = _members.at(i); float block_freq = s->_freq * loop_freq; @@ -1912,6 +1916,14 @@ void CFGLoop::scale_freq() { } } +// Frequency of outer loop +float CFGLoop::outer_loop_freq() const { + if (_child != NULL) { + return _child->_freq; + } + return _freq; +} + #ifndef PRODUCT //------------------------------dump_tree-------------------------------------- void CFGLoop::dump_tree() const { diff --git a/hotspot/src/share/vm/opto/machnode.cpp b/hotspot/src/share/vm/opto/machnode.cpp index adb7ecb98ba..b7357f86ab6 100644 --- a/hotspot/src/share/vm/opto/machnode.cpp +++ b/hotspot/src/share/vm/opto/machnode.cpp @@ -340,6 +340,10 @@ const class TypePtr *MachNode::adr_type() const { if (base == NodeSentinel) return TypePtr::BOTTOM; const Type* t = base->bottom_type(); + if (UseCompressedOops && Universe::narrow_oop_shift() == 0) { + // 32-bit unscaled narrow oop can be the base of any address expression + t = t->make_ptr(); + } if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) { // We cannot assert that the offset does not look oop-ish here. // Depending on the heap layout the cardmark base could land @@ -353,6 +357,7 @@ const class TypePtr *MachNode::adr_type() const { // be conservative if we do not recognize the type if (tp == NULL) { + assert(false, "this path may produce not optimal code"); return TypePtr::BOTTOM; } assert(tp->base() != Type::AnyPtr, "not a bare pointer"); From 99aa7292a36916c12dc4d1560f7f659d975d2b24 Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Fri, 27 Mar 2009 14:37:42 -0700 Subject: [PATCH 10/15] 6822333: _call_stub_compiled_return address handling in SA is broken causing jstack to hang occasionally Reviewed-by: kvn, twisti --- .../classes/sun/jvm/hotspot/runtime/StubRoutines.java | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/StubRoutines.java b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/StubRoutines.java index be55ca524c7..fad351b495d 100644 --- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/StubRoutines.java +++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/StubRoutines.java @@ -46,12 +46,18 @@ public class StubRoutines { Type type = db.lookupType("StubRoutines"); callStubReturnAddressField = type.getAddressField("_call_stub_return_address"); - // Only some platforms have specif return from compiled to call_stub + // Only some platforms have specific return from compiled to call_stub try { - callStubCompiledReturnAddressField = type.getAddressField("_call_stub_compiled_return"); + type = db.lookupType("StubRoutines::x86"); + if (type != null) { + callStubCompiledReturnAddressField = type.getAddressField("_call_stub_compiled_return"); + } } catch (RuntimeException re) { callStubCompiledReturnAddressField = null; } + if (callStubCompiledReturnAddressField == null && VM.getVM().getCPU().equals("x86")) { + throw new InternalError("Missing definition for _call_stub_compiled_return"); + } } public StubRoutines() { From 137a679446093b7afd169f9a2ee3d785e4f97af0 Mon Sep 17 00:00:00 2001 From: "Y. Srinivas Ramakrishna" Date: Sat, 28 Mar 2009 15:47:29 -0700 Subject: [PATCH 11/15] 6819891: ParNew: Fix work queue overflow code to deal correctly with +UseCompressedOops When using compressed oops, rather than chaining the overflowed grey objects' pre-images through their klass words, we use GC-worker thread-local overflow stacks. Reviewed-by: jcoomes, jmasa --- .../concurrentMarkSweepGeneration.cpp | 14 +- .../parNew/parNewGeneration.cpp | 176 +++++++++++++----- .../parNew/parNewGeneration.hpp | 17 +- hotspot/src/share/vm/runtime/globals.hpp | 3 + 4 files changed, 154 insertions(+), 56 deletions(-) diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp index de9ee7869d9..40714825b7d 100644 --- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp @@ -3847,7 +3847,7 @@ bool CMSConcMarkingTask::get_work_from_overflow_stack(CMSMarkStack* ovflw_stk, MutexLockerEx ml(ovflw_stk->par_lock(), Mutex::_no_safepoint_check_flag); // Grab up to 1/4 the size of the work queue - size_t num = MIN2((size_t)work_q->max_elems()/4, + size_t num = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); num = MIN2(num, ovflw_stk->length()); for (int i = (int) num; i > 0; i--) { @@ -5204,13 +5204,12 @@ CMSParRemarkTask::do_work_steal(int i, Par_MarkRefsIntoAndScanClosure* cl, NOT_PRODUCT(int num_steals = 0;) oop obj_to_scan; CMSBitMap* bm = &(_collector->_markBitMap); - size_t num_from_overflow_list = - MIN2((size_t)work_q->max_elems()/4, - (size_t)ParGCDesiredObjsFromOverflowList); while (true) { // Completely finish any left over work from (an) earlier round(s) cl->trim_queue(0); + size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, + (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list if (_collector->par_take_from_overflow_list(num_from_overflow_list, work_q)) { @@ -5622,13 +5621,12 @@ void CMSRefProcTaskProxy::do_work_steal(int i, OopTaskQueue* work_q = work_queue(i); NOT_PRODUCT(int num_steals = 0;) oop obj_to_scan; - size_t num_from_overflow_list = - MIN2((size_t)work_q->max_elems()/4, - (size_t)ParGCDesiredObjsFromOverflowList); while (true) { // Completely finish any left over work from (an) earlier round(s) drain->trim_queue(0); + size_t num_from_overflow_list = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, + (size_t)ParGCDesiredObjsFromOverflowList); // Now check if there's any work in the overflow list if (_collector->par_take_from_overflow_list(num_from_overflow_list, work_q)) { @@ -9021,7 +9019,7 @@ void ASConcurrentMarkSweepGeneration::shrink_by(size_t desired_bytes) { // Transfer some number of overflown objects to usual marking // stack. Return true if some objects were transferred. bool MarkRefsIntoAndScanClosure::take_from_overflow_list() { - size_t num = MIN2((size_t)_mark_stack->capacity()/4, + size_t num = MIN2((size_t)(_mark_stack->capacity() - _mark_stack->length())/4, (size_t)ParGCDesiredObjsFromOverflowList); bool res = _collector->take_from_overflow_list(num, _mark_stack); diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp index 7bafe50aa36..bec5507ef76 100644 --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp @@ -36,7 +36,7 @@ ParScanThreadState::ParScanThreadState(Space* to_space_, ObjToScanQueueSet* work_queue_set_, size_t desired_plab_sz_, ParallelTaskTerminator& term_) : - _to_space(to_space_), _old_gen(old_gen_), _thread_num(thread_num_), + _to_space(to_space_), _old_gen(old_gen_), _young_gen(gen_), _thread_num(thread_num_), _work_queue(work_queue_set_->queue(thread_num_)), _to_space_full(false), _ageTable(false), // false ==> not the global age table, no perf data. _to_space_alloc_buffer(desired_plab_sz_), @@ -57,6 +57,11 @@ ParScanThreadState::ParScanThreadState(Space* to_space_, _start = os::elapsedTime(); _old_gen_closure.set_generation(old_gen_); _old_gen_root_closure.set_generation(old_gen_); + if (UseCompressedOops) { + _overflow_stack = new (ResourceObj::C_HEAP) GrowableArray(512, true); + } else { + _overflow_stack = NULL; + } } #ifdef _MSC_VER #pragma warning( pop ) @@ -81,7 +86,7 @@ void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) { assert(old->is_objArray(), "must be obj array"); assert(old->is_forwarded(), "must be forwarded"); assert(Universe::heap()->is_in_reserved(old), "must be in heap."); - assert(!_old_gen->is_in(old), "must be in young generation."); + assert(!old_gen()->is_in(old), "must be in young generation."); objArrayOop obj = objArrayOop(old->forwardee()); // Process ParGCArrayScanChunk elements now @@ -119,26 +124,68 @@ void ParScanThreadState::scan_partial_array_and_push_remainder(oop old) { void ParScanThreadState::trim_queues(int max_size) { ObjToScanQueue* queue = work_queue(); - while (queue->size() > (juint)max_size) { - oop obj_to_scan; - if (queue->pop_local(obj_to_scan)) { - note_pop(); - - if ((HeapWord *)obj_to_scan < young_old_boundary()) { - if (obj_to_scan->is_objArray() && - obj_to_scan->is_forwarded() && - obj_to_scan->forwardee() != obj_to_scan) { - scan_partial_array_and_push_remainder(obj_to_scan); + do { + while (queue->size() > (juint)max_size) { + oop obj_to_scan; + if (queue->pop_local(obj_to_scan)) { + note_pop(); + if ((HeapWord *)obj_to_scan < young_old_boundary()) { + if (obj_to_scan->is_objArray() && + obj_to_scan->is_forwarded() && + obj_to_scan->forwardee() != obj_to_scan) { + scan_partial_array_and_push_remainder(obj_to_scan); + } else { + // object is in to_space + obj_to_scan->oop_iterate(&_to_space_closure); + } } else { - // object is in to_space - obj_to_scan->oop_iterate(&_to_space_closure); + // object is in old generation + obj_to_scan->oop_iterate(&_old_gen_closure); } - } else { - // object is in old generation - obj_to_scan->oop_iterate(&_old_gen_closure); } } + // For the case of compressed oops, we have a private, non-shared + // overflow stack, so we eagerly drain it so as to more evenly + // distribute load early. Note: this may be good to do in + // general rather than delay for the final stealing phase. + // If applicable, we'll transfer a set of objects over to our + // work queue, allowing them to be stolen and draining our + // private overflow stack. + } while (ParGCTrimOverflow && young_gen()->take_from_overflow_list(this)); +} + +bool ParScanThreadState::take_from_overflow_stack() { + assert(UseCompressedOops, "Else should not call"); + assert(young_gen()->overflow_list() == NULL, "Error"); + ObjToScanQueue* queue = work_queue(); + GrowableArray* of_stack = overflow_stack(); + uint num_overflow_elems = of_stack->length(); + uint num_take_elems = MIN2(MIN2((queue->max_elems() - queue->size())/4, + (juint)ParGCDesiredObjsFromOverflowList), + num_overflow_elems); + // Transfer the most recent num_take_elems from the overflow + // stack to our work queue. + for (size_t i = 0; i != num_take_elems; i++) { + oop cur = of_stack->pop(); + oop obj_to_push = cur->forwardee(); + assert(Universe::heap()->is_in_reserved(cur), "Should be in heap"); + assert(!old_gen()->is_in_reserved(cur), "Should be in young gen"); + assert(Universe::heap()->is_in_reserved(obj_to_push), "Should be in heap"); + if (should_be_partially_scanned(obj_to_push, cur)) { + assert(arrayOop(cur)->length() == 0, "entire array remaining to be scanned"); + obj_to_push = cur; + } + bool ok = queue->push(obj_to_push); + assert(ok, "Should have succeeded"); } + assert(young_gen()->overflow_list() == NULL, "Error"); + return num_take_elems > 0; // was something transferred? +} + +void ParScanThreadState::push_on_overflow_stack(oop p) { + assert(UseCompressedOops, "Else should not call"); + overflow_stack()->push(p); + assert(young_gen()->overflow_list() == NULL, "Error"); } HeapWord* ParScanThreadState::alloc_in_to_space_slow(size_t word_sz) { @@ -425,8 +472,7 @@ void ParNewGenTask::work(int i) { ResourceMark rm; HandleMark hm; // We would need multiple old-gen queues otherwise. - guarantee(gch->n_gens() == 2, - "Par young collection currently only works with one older gen."); + assert(gch->n_gens() == 2, "Par young collection currently only works with one older gen."); Generation* old_gen = gch->next_gen(_gen); @@ -1169,36 +1215,75 @@ bool ParNewGeneration::should_simulate_overflow() { } #endif +// In case we are using compressed oops, we need to be careful. +// If the object being pushed is an object array, then its length +// field keeps track of the "grey boundary" at which the next +// incremental scan will be done (see ParGCArrayScanChunk). +// When using compressed oops, this length field is kept in the +// lower 32 bits of the erstwhile klass word and cannot be used +// for the overflow chaining pointer (OCP below). As such the OCP +// would itself need to be compressed into the top 32-bits in this +// case. Unfortunately, see below, in the event that we have a +// promotion failure, the node to be pushed on the list can be +// outside of the Java heap, so the heap-based pointer compression +// would not work (we would have potential aliasing between C-heap +// and Java-heap pointers). For this reason, when using compressed +// oops, we simply use a worker-thread-local, non-shared overflow +// list in the form of a growable array, with a slightly different +// overflow stack draining strategy. If/when we start using fat +// stacks here, we can go back to using (fat) pointer chains +// (although some performance comparisons would be useful since +// single global lists have their own performance disadvantages +// as we were made painfully aware not long ago, see 6786503). #define BUSY (oop(0x1aff1aff)) void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state) { - // if the object has been forwarded to itself, then we cannot - // use the klass pointer for the linked list. Instead we have - // to allocate an oopDesc in the C-Heap and use that for the linked list. - // XXX This is horribly inefficient when a promotion failure occurs - // and should be fixed. XXX FIX ME !!! + assert(is_in_reserved(from_space_obj), "Should be from this generation"); + if (UseCompressedOops) { + // In the case of compressed oops, we use a private, not-shared + // overflow stack. + par_scan_state->push_on_overflow_stack(from_space_obj); + } else { + // if the object has been forwarded to itself, then we cannot + // use the klass pointer for the linked list. Instead we have + // to allocate an oopDesc in the C-Heap and use that for the linked list. + // XXX This is horribly inefficient when a promotion failure occurs + // and should be fixed. XXX FIX ME !!! #ifndef PRODUCT - Atomic::inc_ptr(&_num_par_pushes); - assert(_num_par_pushes > 0, "Tautology"); + Atomic::inc_ptr(&_num_par_pushes); + assert(_num_par_pushes > 0, "Tautology"); #endif - if (from_space_obj->forwardee() == from_space_obj) { - oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); - listhead->forward_to(from_space_obj); - from_space_obj = listhead; - } - oop observed_overflow_list = _overflow_list; - oop cur_overflow_list; - do { - cur_overflow_list = observed_overflow_list; - if (cur_overflow_list != BUSY) { - from_space_obj->set_klass_to_list_ptr(cur_overflow_list); - } else { - from_space_obj->set_klass_to_list_ptr(NULL); + if (from_space_obj->forwardee() == from_space_obj) { + oopDesc* listhead = NEW_C_HEAP_ARRAY(oopDesc, 1); + listhead->forward_to(from_space_obj); + from_space_obj = listhead; } - observed_overflow_list = - (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); - } while (cur_overflow_list != observed_overflow_list); + oop observed_overflow_list = _overflow_list; + oop cur_overflow_list; + do { + cur_overflow_list = observed_overflow_list; + if (cur_overflow_list != BUSY) { + from_space_obj->set_klass_to_list_ptr(cur_overflow_list); + } else { + from_space_obj->set_klass_to_list_ptr(NULL); + } + observed_overflow_list = + (oop)Atomic::cmpxchg_ptr(from_space_obj, &_overflow_list, cur_overflow_list); + } while (cur_overflow_list != observed_overflow_list); + } } +bool ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { + bool res; + + if (UseCompressedOops) { + res = par_scan_state->take_from_overflow_stack(); + } else { + res = take_from_overflow_list_work(par_scan_state); + } + return res; +} + + // *NOTE*: The overflow list manipulation code here and // in CMSCollector:: are very similar in shape, // except that in the CMS case we thread the objects @@ -1213,14 +1298,13 @@ void ParNewGeneration::push_on_overflow_list(oop from_space_obj, ParScanThreadSt // similar changes might be needed. // See CMSCollector::par_take_from_overflow_list() for // more extensive documentation comments. -bool -ParNewGeneration::take_from_overflow_list(ParScanThreadState* par_scan_state) { +bool ParNewGeneration::take_from_overflow_list_work(ParScanThreadState* par_scan_state) { ObjToScanQueue* work_q = par_scan_state->work_queue(); - assert(work_q->size() == 0, "Should first empty local work queue"); // How many to take? - size_t objsFromOverflow = MIN2((size_t)work_q->max_elems()/4, + size_t objsFromOverflow = MIN2((size_t)(work_q->max_elems() - work_q->size())/4, (size_t)ParGCDesiredObjsFromOverflowList); + assert(par_scan_state->overflow_stack() == NULL, "Error"); if (_overflow_list == NULL) return false; // Otherwise, there was something there; try claiming the list. diff --git a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp index 51e4c5f39f1..7f0015bd676 100644 --- a/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp +++ b/hotspot/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp @@ -55,6 +55,7 @@ class ParScanThreadState { friend class ParScanThreadStateSet; private: ObjToScanQueue *_work_queue; + GrowableArray* _overflow_stack; ParGCAllocBuffer _to_space_alloc_buffer; @@ -79,6 +80,9 @@ class ParScanThreadState { Space* _to_space; Space* to_space() { return _to_space; } + ParNewGeneration* _young_gen; + ParNewGeneration* young_gen() const { return _young_gen; } + Generation* _old_gen; Generation* old_gen() { return _old_gen; } @@ -134,6 +138,11 @@ class ParScanThreadState { // Decrease queue size below "max_size". void trim_queues(int max_size); + // Private overflow stack usage + GrowableArray* overflow_stack() { return _overflow_stack; } + bool take_from_overflow_stack(); + void push_on_overflow_stack(oop p); + // Is new_obj a candidate for scan_partial_array_and_push_remainder method. inline bool should_be_partially_scanned(oop new_obj, oop old_obj) const; @@ -378,13 +387,17 @@ class ParNewGeneration: public DefNewGeneration { NOT_PRODUCT(int _overflow_counter;) NOT_PRODUCT(bool should_simulate_overflow();) + // Accessor for overflow list + oop overflow_list() { return _overflow_list; } + // Push the given (from-space) object on the global overflow list. void push_on_overflow_list(oop from_space_obj, ParScanThreadState* par_scan_state); // If the global overflow list is non-empty, move some tasks from it - // onto "work_q" (which must be empty). No more than 1/4 of the - // max_elems of "work_q" are moved. + // onto "work_q" (which need not be empty). No more than 1/4 of the + // available space on "work_q" is used. bool take_from_overflow_list(ParScanThreadState* par_scan_state); + bool take_from_overflow_list_work(ParScanThreadState* par_scan_state); // The task queues to be used by parallel GC threads. ObjToScanQueueSet* task_queues() { diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 2d33287fcd4..b80ec6fbda2 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -1316,6 +1316,9 @@ class CommandLineFlags { product(intx, ParGCArrayScanChunk, 50, \ "Scan a subset and push remainder, if array is bigger than this") \ \ + product(bool, ParGCTrimOverflow, true, \ + "Eagerly trim the overflow lists (useful for UseCompressedOops") \ + \ notproduct(bool, ParGCWorkQueueOverflowALot, false, \ "Whether we should simulate work queue overflow in ParNew") \ \ From 705188bb7b441bde57dad82910e0d47da1e9ca7c Mon Sep 17 00:00:00 2001 From: Vladimir Kozlov Date: Mon, 30 Mar 2009 18:19:31 -0700 Subject: [PATCH 12/15] 6821700: tune VM flags for peak performance Tune C2 flags default values for performance. Reviewed-by: never, phh, iveresov, jmasa, ysr --- hotspot/src/cpu/sparc/vm/globals_sparc.hpp | 1 + hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp | 14 +++++++++----- hotspot/src/cpu/x86/vm/globals_x86.hpp | 1 + hotspot/src/share/vm/classfile/vmSymbols.hpp | 2 ++ hotspot/src/share/vm/opto/bytecodeInfo.cpp | 8 ++++++++ hotspot/src/share/vm/runtime/globals.hpp | 3 ++- 6 files changed, 23 insertions(+), 6 deletions(-) diff --git a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp index 9d1bd7ac26f..98b3230ec7b 100644 --- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp +++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp @@ -46,6 +46,7 @@ define_pd_global(uintx, TLABSize, 0); define_pd_global(uintx, NewSize, ScaleForWordSize((2048 * K) + (2 * (64 * K)))); define_pd_global(intx, SurvivorRatio, 8); define_pd_global(intx, InlineFrequencyCount, 50); // we can use more inlining on the SPARC +define_pd_global(intx, InlineSmallCode, 1500); #ifdef _LP64 // Stack slots are 2X larger in LP64 than in the 32 bit VM. define_pd_global(intx, ThreadStackSize, 1024); diff --git a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp index dc7f6b5bdbe..17666c0a7fc 100644 --- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp +++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp @@ -62,7 +62,7 @@ void VM_Version::initialize() { if (is_niagara1()) { // Indirect branch is the same cost as direct if (FLAG_IS_DEFAULT(UseInlineCaches)) { - UseInlineCaches = false; + FLAG_SET_DEFAULT(UseInlineCaches, false); } #ifdef _LP64 // Single issue niagara1 is slower for CompressedOops @@ -79,15 +79,19 @@ void VM_Version::initialize() { #ifdef COMPILER2 // Indirect branch is the same cost as direct if (FLAG_IS_DEFAULT(UseJumpTables)) { - UseJumpTables = true; + FLAG_SET_DEFAULT(UseJumpTables, true); } // Single-issue, so entry and loop tops are // aligned on a single instruction boundary if (FLAG_IS_DEFAULT(InteriorEntryAlignment)) { - InteriorEntryAlignment = 4; + FLAG_SET_DEFAULT(InteriorEntryAlignment, 4); } if (FLAG_IS_DEFAULT(OptoLoopAlignment)) { - OptoLoopAlignment = 4; + FLAG_SET_DEFAULT(OptoLoopAlignment, 4); + } + if (is_niagara1_plus() && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { + // Use smaller prefetch distance on N2 + FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256); } #endif } @@ -95,7 +99,7 @@ void VM_Version::initialize() { // Use hardware population count instruction if available. if (has_hardware_popc()) { if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { - UsePopCountInstruction = true; + FLAG_SET_DEFAULT(UsePopCountInstruction, true); } } diff --git a/hotspot/src/cpu/x86/vm/globals_x86.hpp b/hotspot/src/cpu/x86/vm/globals_x86.hpp index 67f27d68125..9d6d0292eab 100644 --- a/hotspot/src/cpu/x86/vm/globals_x86.hpp +++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp @@ -60,6 +60,7 @@ define_pd_global(uintx, NewSize, 1024 * K); define_pd_global(intx, StackShadowPages, 3 DEBUG_ONLY(+1)); #endif // AMD64 define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 1000); define_pd_global(intx, PreInflateSpin, 10); define_pd_global(intx, StackYellowPages, 2); diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index 1d5b68f0da6..4678ba95175 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -50,6 +50,7 @@ template(java_lang_Class, "java/lang/Class") \ template(java_lang_String, "java/lang/String") \ template(java_lang_StringValue, "java/lang/StringValue") \ + template(java_lang_StringCache, "java/lang/StringValue$StringCache") \ template(java_lang_Thread, "java/lang/Thread") \ template(java_lang_ThreadGroup, "java/lang/ThreadGroup") \ template(java_lang_Cloneable, "java/lang/Cloneable") \ @@ -286,6 +287,7 @@ template(frontCacheEnabled_name, "frontCacheEnabled") \ template(stringCacheEnabled_name, "stringCacheEnabled") \ template(bitCount_name, "bitCount") \ + template(profile_name, "profile") \ \ /* non-intrinsic name/signature pairs: */ \ template(register_method_name, "register") \ diff --git a/hotspot/src/share/vm/opto/bytecodeInfo.cpp b/hotspot/src/share/vm/opto/bytecodeInfo.cpp index 38623fd5f76..a66c873e282 100644 --- a/hotspot/src/share/vm/opto/bytecodeInfo.cpp +++ b/hotspot/src/share/vm/opto/bytecodeInfo.cpp @@ -232,6 +232,14 @@ const char* InlineTree::shouldNotInline(ciMethod *callee_method, ciMethod* calle return "disallowed by CompilerOracle"; } + if (UseStringCache) { + // Do not inline StringCache::profile() method used only at the beginning. + if (callee_method->name() == ciSymbol::profile_name() && + callee_method->holder()->name() == ciSymbol::java_lang_StringCache()) { + return "profiling method"; + } + } + return NULL; } diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 2d33287fcd4..9af4074d76f 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -47,6 +47,7 @@ define_pd_global(intx, Tier4BackEdgeThreshold, 0); define_pd_global(intx, OnStackReplacePercentage, 0); define_pd_global(bool, ResizeTLAB, false); define_pd_global(intx, FreqInlineSize, 0); +define_pd_global(intx, InlineSmallCode, 0); define_pd_global(intx, NewSizeThreadIncrease, 4*K); define_pd_global(intx, NewRatio, 4); define_pd_global(intx, InlineClassNatives, true); @@ -2616,7 +2617,7 @@ class CommandLineFlags { develop(intx, MaxRecursiveInlineLevel, 1, \ "maximum number of nested recursive calls that are inlined") \ \ - product(intx, InlineSmallCode, 1000, \ + product_pd(intx, InlineSmallCode, \ "Only inline already compiled methods if their code size is " \ "less than this") \ \ From c0d62ad9e6b0783f73b795c82626254e2239d2f0 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Tue, 31 Mar 2009 14:07:08 -0700 Subject: [PATCH 13/15] 6761600: Use sse 4.2 in intrinsics Use SSE 4.2 in intrinsics for String.{compareTo/equals/indexOf} and Arrays.equals. Reviewed-by: kvn, never, jrose --- hotspot/src/cpu/sparc/vm/sparc.ad | 215 ++++++++++ hotspot/src/cpu/x86/vm/assembler_x86.cpp | 48 +++ hotspot/src/cpu/x86/vm/assembler_x86.hpp | 8 + hotspot/src/cpu/x86/vm/vm_version_x86.cpp | 5 + hotspot/src/cpu/x86/vm/x86_32.ad | 427 +++++++++++++++--- hotspot/src/cpu/x86/vm/x86_64.ad | 428 ++++++++++++++++--- hotspot/src/share/vm/adlc/formssel.cpp | 14 +- hotspot/src/share/vm/classfile/vmSymbols.hpp | 3 +- hotspot/src/share/vm/opto/classes.hpp | 2 + hotspot/src/share/vm/opto/gcm.cpp | 6 + hotspot/src/share/vm/opto/lcm.cpp | 2 + hotspot/src/share/vm/opto/library_call.cpp | 240 ++++++++--- hotspot/src/share/vm/opto/loopnode.cpp | 2 + hotspot/src/share/vm/opto/matcher.cpp | 4 + hotspot/src/share/vm/opto/memnode.cpp | 26 +- hotspot/src/share/vm/opto/memnode.hpp | 48 +++ hotspot/src/share/vm/runtime/arguments.cpp | 3 - hotspot/src/share/vm/runtime/globals.hpp | 8 +- 18 files changed, 1297 insertions(+), 192 deletions(-) diff --git a/hotspot/src/cpu/sparc/vm/sparc.ad b/hotspot/src/cpu/sparc/vm/sparc.ad index df6e9049ebb..c65cc5495e6 100644 --- a/hotspot/src/cpu/sparc/vm/sparc.ad +++ b/hotspot/src/cpu/sparc/vm/sparc.ad @@ -3003,6 +3003,202 @@ enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{ __ bind(Ldone); %} +enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ + Label Lword, Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone; + MacroAssembler _masm(&cbuf); + + Register str1_reg = reg_to_register_object($str1$$reg); + Register str2_reg = reg_to_register_object($str2$$reg); + Register tmp1_reg = reg_to_register_object($tmp1$$reg); + Register tmp2_reg = reg_to_register_object($tmp2$$reg); + Register result_reg = reg_to_register_object($result$$reg); + + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + int value_offset = java_lang_String:: value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String:: count_offset_in_bytes(); + + // load str1 (jchar*) base address into tmp1_reg + __ load_heap_oop(Address(str1_reg, 0, value_offset), tmp1_reg); + __ ld(Address(str1_reg, 0, offset_offset), result_reg); + __ add(tmp1_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1_reg); + __ ld(Address(str1_reg, 0, count_offset), str1_reg); // hoisted + __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); + __ load_heap_oop(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted + __ add(result_reg, tmp1_reg, tmp1_reg); + + // load str2 (jchar*) base address into tmp2_reg + // __ ld_ptr(Address(str2_reg, 0, value_offset), tmp2_reg); // hoisted + __ ld(Address(str2_reg, 0, offset_offset), result_reg); + __ add(tmp2_reg, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp2_reg); + __ ld(Address(str2_reg, 0, count_offset), str2_reg); // hoisted + __ sll(result_reg, exact_log2(sizeof(jchar)), result_reg); + __ cmp(str1_reg, str2_reg); // hoisted + __ add(result_reg, tmp2_reg, tmp2_reg); + + __ sll(str1_reg, exact_log2(sizeof(jchar)), str1_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + __ br_zero(Assembler::equal, true, Assembler::pn, str1_reg, Ldone); + __ delayed()->add(G0, 1, result_reg); //equals + + __ cmp(tmp1_reg, tmp2_reg); //same string ? + __ brx(Assembler::equal, true, Assembler::pn, Ldone); + __ delayed()->add(G0, 1, result_reg); + + //rename registers + Register limit_reg = str1_reg; + Register chr2_reg = str2_reg; + Register chr1_reg = result_reg; + // tmp{12} are the base pointers + + //check for alignment and position the pointers to the ends + __ or3(tmp1_reg, tmp2_reg, chr1_reg); + __ andcc(chr1_reg, 0x3, chr1_reg); // notZero means at least one not 4-byte aligned + __ br(Assembler::notZero, false, Assembler::pn, Lchar); + __ delayed()->nop(); + + __ bind(Lword); + __ and3(limit_reg, 0x2, O7); //remember the remainder (either 0 or 2) + __ andn(limit_reg, 0x3, limit_reg); + __ br_zero(Assembler::zero, false, Assembler::pn, limit_reg, Lpost_word); + __ delayed()->nop(); + + __ add(tmp1_reg, limit_reg, tmp1_reg); + __ add(tmp2_reg, limit_reg, tmp2_reg); + __ neg(limit_reg); + + __ lduw(tmp1_reg, limit_reg, chr1_reg); + __ bind(Lword_loop); + __ lduw(tmp2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); + __ inccc(limit_reg, 2*sizeof(jchar)); + // annul LDUW if branch i s not taken to prevent access past end of string + __ br(Assembler::notZero, true, Assembler::pt, Lword_loop); //annul on taken + __ delayed()->lduw(tmp1_reg, limit_reg, chr1_reg); // hoisted + + __ bind(Lpost_word); + __ br_zero(Assembler::zero, true, Assembler::pt, O7, Ldone); + __ delayed()->add(G0, 1, result_reg); + + __ lduh(tmp1_reg, 0, chr1_reg); + __ lduh(tmp2_reg, 0, chr2_reg); + __ cmp (chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); + __ ba(false,Ldone); + __ delayed()->add(G0, 1, result_reg); + + __ bind(Lchar); + __ add(tmp1_reg, limit_reg, tmp1_reg); + __ add(tmp2_reg, limit_reg, tmp2_reg); + __ neg(limit_reg); //negate count + + __ lduh(tmp1_reg, limit_reg, chr1_reg); + __ bind(Lchar_loop); + __ lduh(tmp2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); //not equal + __ inccc(limit_reg, sizeof(jchar)); + // annul LDUH if branch is not taken to prevent access past end of string + __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop); //annul on taken + __ delayed()->lduh(tmp1_reg, limit_reg, chr1_reg); // hoisted + + __ add(G0, 1, result_reg); //equal + + __ bind(Ldone); + %} + +enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result) %{ + Label Lvector, Ldone, Lloop; + MacroAssembler _masm(&cbuf); + + Register ary1_reg = reg_to_register_object($ary1$$reg); + Register ary2_reg = reg_to_register_object($ary2$$reg); + Register tmp1_reg = reg_to_register_object($tmp1$$reg); + Register tmp2_reg = reg_to_register_object($tmp2$$reg); + Register result_reg = reg_to_register_object($result$$reg); + + int length_offset = arrayOopDesc::length_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // return true if the same array + __ cmp(ary1_reg, ary2_reg); + __ br(Assembler::equal, true, Assembler::pn, Ldone); + __ delayed()->add(G0, 1, result_reg); // equal + + __ br_null(ary1_reg, true, Assembler::pn, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + __ br_null(ary2_reg, true, Assembler::pn, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + //load the lengths of arrays + __ ld(Address(ary1_reg, 0, length_offset), tmp1_reg); + __ ld(Address(ary2_reg, 0, length_offset), tmp2_reg); + + // return false if the two arrays are not equal length + __ cmp(tmp1_reg, tmp2_reg); + __ br(Assembler::notEqual, true, Assembler::pn, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + __ br_zero(Assembler::zero, true, Assembler::pn, tmp1_reg, Ldone); + __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal + + // load array addresses + __ add(ary1_reg, base_offset, ary1_reg); + __ add(ary2_reg, base_offset, ary2_reg); + + // renaming registers + Register chr1_reg = tmp2_reg; // for characters in ary1 + Register chr2_reg = result_reg; // for characters in ary2 + Register limit_reg = tmp1_reg; // length + + // set byte count + __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); + __ andcc(limit_reg, 0x2, chr1_reg); //trailing character ? + __ br(Assembler::zero, false, Assembler::pt, Lvector); + __ delayed()->nop(); + + //compare the trailing char + __ sub(limit_reg, sizeof(jchar), limit_reg); + __ lduh(ary1_reg, limit_reg, chr1_reg); + __ lduh(ary2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, true, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + + // only one char ? + __ br_zero(Assembler::zero, true, Assembler::pn, limit_reg, Ldone); + __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal + + __ bind(Lvector); + // Shift ary1_reg and ary2_reg to the end of the arrays, negate limit + __ add(ary1_reg, limit_reg, ary1_reg); + __ add(ary2_reg, limit_reg, ary2_reg); + __ neg(limit_reg, limit_reg); + + __ lduw(ary1_reg, limit_reg, chr1_reg); + __ bind(Lloop); + __ lduw(ary2_reg, limit_reg, chr2_reg); + __ cmp(chr1_reg, chr2_reg); + __ br(Assembler::notEqual, false, Assembler::pt, Ldone); + __ delayed()->mov(G0, result_reg); // not equal + __ inccc(limit_reg, 2*sizeof(jchar)); + // annul LDUW if branch is not taken to prevent access past end of string + __ br(Assembler::notZero, true, Assembler::pt, Lloop); //annul on taken + __ delayed()->lduw(ary1_reg, limit_reg, chr1_reg); // hoisted + + __ add(G0, 1, result_reg); // equals + + __ bind(Ldone); + %} + enc_class enc_rethrow() %{ cbuf.set_inst_mark(); Register temp_reg = G3; @@ -9015,6 +9211,25 @@ instruct string_compare(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, note ins_pipe(long_memory_op); %} +instruct string_equals(o0RegP str1, o1RegP str2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, + o7RegI tmp3, flagsReg ccr) %{ + match(Set result (StrEquals str1 str2)); + effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL ccr, KILL tmp3); + ins_cost(300); + format %{ "String Equals $str1,$str2 -> $result" %} + ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, result) ); + ins_pipe(long_memory_op); +%} + +instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, g4RegP tmp2, notemp_iRegI result, + flagsReg ccr) %{ + match(Set result (AryEq ary1 ary2)); + effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr); + ins_cost(300); + format %{ "Array Equals $ary1,$ary2 -> $result" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result)); + ins_pipe(long_memory_op); +%} //---------- Population Count Instructions ------------------------------------- diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index dbf2f2b664a..35ee7e54019 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -2173,6 +2173,31 @@ void Assembler::orl(Register dst, Register src) { emit_arith(0x0B, 0xC0, dst, src); } +void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) { + assert(VM_Version::supports_sse4_2(), ""); + + InstructionMark im(this); + emit_byte(0x66); + prefix(src, dst); + emit_byte(0x0F); + emit_byte(0x3A); + emit_byte(0x61); + emit_operand(dst, src); + emit_byte(imm8); +} + +void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) { + assert(VM_Version::supports_sse4_2(), ""); + + emit_byte(0x66); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0x3A); + emit_byte(0x61); + emit_byte(0xC0 | encode); + emit_byte(imm8); +} + // generic void Assembler::pop(Register dst) { int encode = prefix_and_encode(dst->encoding()); @@ -2330,6 +2355,29 @@ void Assembler::psrlq(XMMRegister dst, int shift) { emit_byte(shift); } +void Assembler::ptest(XMMRegister dst, Address src) { + assert(VM_Version::supports_sse4_1(), ""); + + InstructionMark im(this); + emit_byte(0x66); + prefix(src, dst); + emit_byte(0x0F); + emit_byte(0x38); + emit_byte(0x17); + emit_operand(dst, src); +} + +void Assembler::ptest(XMMRegister dst, XMMRegister src) { + assert(VM_Version::supports_sse4_1(), ""); + + emit_byte(0x66); + int encode = prefixq_and_encode(dst->encoding(), src->encoding()); + emit_byte(0x0F); + emit_byte(0x38); + emit_byte(0x17); + emit_byte(0xC0 | encode); +} + void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) { NOT_LP64(assert(VM_Version::supports_sse2(), "")); emit_byte(0x66); diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index a5efad8d22b..89ac6dd5326 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -1226,6 +1226,10 @@ private: void orq(Register dst, Address src); void orq(Register dst, Register src); + // SSE4.2 string instructions + void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8); + void pcmpestri(XMMRegister xmm1, Address src, int imm8); + void popl(Address dst); #ifdef _LP64 @@ -1260,6 +1264,10 @@ private: // Shift Right Logical Quadword Immediate void psrlq(XMMRegister dst, int shift); + // Logical Compare Double Quadword + void ptest(XMMRegister dst, XMMRegister src); + void ptest(XMMRegister dst, Address src); + // Interleave Low Bytes void punpcklbw(XMMRegister dst, XMMRegister src); diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp index 1307dd1e54f..ccf6b429282 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp @@ -408,6 +408,11 @@ void VM_Version::get_processor_features() { UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus } } + if( supports_sse4_2() && UseSSE >= 4 ) { + if( FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { + UseSSE42Intrinsics = true; + } + } } } diff --git a/hotspot/src/cpu/x86/vm/x86_32.ad b/hotspot/src/cpu/x86/vm/x86_32.ad index cd64cfbf9f2..509a39972fe 100644 --- a/hotspot/src/cpu/x86/vm/x86_32.ad +++ b/hotspot/src/cpu/x86/vm/x86_32.ad @@ -3694,12 +3694,16 @@ encode %{ } %} - enc_class enc_String_Compare() %{ + enc_class enc_String_Compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result) %{ Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, CONT_LABEL, WHILE_HEAD_LABEL; MacroAssembler masm(&cbuf); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + // Get the first character position in both strings // [8] char array, [12] offset, [16] count int value_offset = java_lang_String::value_offset_in_bytes(); @@ -3717,7 +3721,6 @@ encode %{ // Compute the minimum of the string lengths(rsi) and the // difference of the string lengths (stack) - if (VM_Version::supports_cmov()) { masm.movl(rdi, Address(rdi, count_offset)); masm.movl(rsi, Address(rsi, count_offset)); @@ -3731,7 +3734,7 @@ encode %{ masm.movl(rsi, rdi); masm.subl(rdi, rcx); masm.push(rdi); - masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL); + masm.jccb(Assembler::lessEqual, ECX_GOOD_LABEL); masm.movl(rsi, rcx); // rsi holds min, rcx is unused } @@ -3756,7 +3759,7 @@ encode %{ Label LSkip2; // Check if the strings start at same location masm.cmpptr(rbx,rax); - masm.jcc(Assembler::notEqual, LSkip2); + masm.jccb(Assembler::notEqual, LSkip2); // Check if the length difference is zero (from stack) masm.cmpl(Address(rsp, 0), 0x0); @@ -3766,9 +3769,52 @@ encode %{ masm.bind(LSkip2); } - // Shift rax, and rbx, to the end of the arrays, negate min - masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); - masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); + // Advance to next character + masm.addptr(rax, 2); + masm.addptr(rbx, 2); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; + // Setup to compare 16-byte vectors + masm.movl(rdi, rsi); + masm.andl(rsi, 0xfffffff8); // rsi holds the vector count + masm.andl(rdi, 0x00000007); // rdi holds the tail count + masm.testl(rsi, rsi); + masm.jccb(Assembler::zero, COMPARE_TAIL); + + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.negl(rsi); + + masm.bind(COMPARE_VECTORS); + masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2)); + masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL); + masm.addl(rsi, 8); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + masm.jmpb(COMPARE_TAIL); + + // Mismatched characters in the vectors + masm.bind(VECTOR_NOT_EQUAL); + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.movl(rdi, 8); + + // Compare tail (< 8 chars), or rescan last vectors to + // find 1st mismatched characters + masm.bind(COMPARE_TAIL); + masm.testl(rdi, rdi); + masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL); + masm.movl(rsi, rdi); + // Fallthru to tail compare + } + + //Shift rax, and rbx, to the end of the arrays, negate min + masm.lea(rax, Address(rax, rsi, Address::times_2, 0)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0)); masm.negl(rsi); // Compare the rest of the characters @@ -3776,93 +3822,329 @@ encode %{ masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); masm.subl(rcx, rdi); - masm.jcc(Assembler::notZero, POP_LABEL); + masm.jccb(Assembler::notZero, POP_LABEL); masm.incrementl(rsi); masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); // Strings are equal up to min length. Return the length difference. masm.bind(LENGTH_DIFF_LABEL); masm.pop(rcx); - masm.jmp(DONE_LABEL); + masm.jmpb(DONE_LABEL); // Discard the stored length difference masm.bind(POP_LABEL); masm.addptr(rsp, 4); - + // That's it masm.bind(DONE_LABEL); %} - enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{ - Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; + enc_class enc_String_Equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result) %{ + Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR; MacroAssembler masm(&cbuf); - Register ary1Reg = as_Register($ary1$$reg); - Register ary2Reg = as_Register($ary2$$reg); - Register tmp1Reg = as_Register($tmp1$$reg); - Register tmp2Reg = as_Register($tmp2$$reg); - Register resultReg = as_Register($result$$reg); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // does source == target string? + masm.cmpptr(rdi, rsi); + masm.jcc(Assembler::equal, RET_TRUE); + + // get and compare counts + masm.movl(rcx, Address(rdi, count_offset)); + masm.movl(rax, Address(rsi, count_offset)); + masm.cmpl(rcx, rax); + masm.jcc(Assembler::notEqual, RET_FALSE); + masm.testl(rax, rax); + masm.jcc(Assembler::zero, RET_TRUE); + + // get source string offset and value + masm.movptr(rbx, Address(rsi, value_offset)); + masm.movl(rax, Address(rsi, offset_offset)); + masm.leal(rsi, Address(rbx, rax, Address::times_2, base_offset)); + + // get compare string offset and value + masm.movptr(rbx, Address(rdi, value_offset)); + masm.movl(rax, Address(rdi, offset_offset)); + masm.leal(rdi, Address(rbx, rax, Address::times_2, base_offset)); + + // Set byte count + masm.shll(rcx, 1); + masm.movl(rax, rcx); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(rcx, 0xfffffff0); // vector count (in bytes) + masm.andl(rax, 0x0000000e); // tail count (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negl(rcx); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1)); + masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, RET_FALSE); + masm.addl(rcx, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(rcx, rax); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(rcx, 0xfffffffc); // vector count (in bytes) + masm.andl(rax, 0x00000002); // tail char (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negl(rcx); + + masm.bind(COMPARE_VECTORS); + masm.movl(rbx, Address(rdi, rcx, Address::times_1)); + masm.cmpl(rbx, Address(rsi, rcx, Address::times_1)); + masm.jccb(Assembler::notEqual, RET_FALSE); + masm.addl(rcx, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); + masm.testl(rax, rax); + masm.jccb(Assembler::zero, RET_TRUE); + masm.load_unsigned_short(rbx, Address(rdi, 0)); + masm.load_unsigned_short(rcx, Address(rsi, 0)); + masm.cmpl(rbx, rcx); + masm.jccb(Assembler::notEqual, RET_FALSE); + + masm.bind(RET_TRUE); + masm.movl(rax, 1); // return true + masm.jmpb(DONE); + + masm.bind(RET_FALSE); + masm.xorl(rax, rax); // return false + + masm.bind(DONE); + %} + + enc_class enc_String_IndexOf(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result) %{ + // SSE4.2 version + Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, + SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // Get counts for string and substr + masm.movl(rdx, Address(rsi, count_offset)); + masm.movl(rax, Address(rdi, count_offset)); + // Check for substr count > string count + masm.cmpl(rax, rdx); + masm.jcc(Assembler::greater, RET_NEG_ONE); + + // Start the indexOf operation + // Get start addr of string + masm.movptr(rbx, Address(rsi, value_offset)); + masm.movl(rcx, Address(rsi, offset_offset)); + masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rsi); + + // Get start addr of substr + masm.movptr(rbx, Address(rdi, value_offset)); + masm.movl(rcx, Address(rdi, offset_offset)); + masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rdi); + masm.push(rax); + masm.jmpb(PREP_FOR_SCAN); + + // Substr count saved at sp + // Substr saved at sp+4 + // String saved at sp+8 + + // Prep to load substr for scan + masm.bind(LOAD_SUBSTR); + masm.movptr(rdi, Address(rsp, 4)); + masm.movl(rax, Address(rsp, 0)); + + // Load substr + masm.bind(PREP_FOR_SCAN); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.addl(rdx, 8); // prime the loop + masm.subptr(rsi, 16); + + // Scan string for substr in 16-byte vectors + masm.bind(SCAN_TO_SUBSTR); + masm.subl(rdx, 8); + masm.addptr(rsi, 16); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::above, SCAN_TO_SUBSTR); // CF == 0 && ZF == 0 + masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); // CF == 0 + + // Fallthru: found a potential substr + + // Make sure string is still long enough + masm.subl(rdx, rcx); + masm.cmpl(rdx, rax); + masm.jccb(Assembler::negative, RET_NOT_FOUND); + // Compute start addr of substr + masm.lea(rsi, Address(rsi, rcx, Address::times_2)); + masm.movptr(rbx, rsi); + + // Compare potential substr + masm.addl(rdx, 8); // prime the loop + masm.addl(rax, 8); + masm.subptr(rsi, 16); + masm.subptr(rdi, 16); + + // Scan 16-byte vectors of string and substr + masm.bind(SCAN_SUBSTR); + masm.subl(rax, 8); + masm.subl(rdx, 8); + masm.addptr(rsi, 16); + masm.addptr(rdi, 16); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0 + masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 + + // Compute substr offset + masm.movptr(rsi, Address(rsp, 8)); + masm.subptr(rbx, rsi); + masm.shrl(rbx, 1); + masm.jmpb(CLEANUP); + + masm.bind(RET_NEG_ONE); + masm.movl(rbx, -1); + masm.jmpb(DONE); + + masm.bind(RET_NOT_FOUND); + masm.movl(rbx, -1); + + masm.bind(CLEANUP); + masm.addptr(rsp, 12); + + masm.bind(DONE); + %} + + enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, + eBXRegI tmp3, eDXRegI tmp4, eAXRegI result) %{ + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + Register ary1Reg = as_Register($ary1$$reg); + Register ary2Reg = as_Register($ary2$$reg); + Register tmp3Reg = as_Register($tmp3$$reg); + Register tmp4Reg = as_Register($tmp4$$reg); + Register resultReg = as_Register($result$$reg); int length_offset = arrayOopDesc::length_offset_in_bytes(); int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); // Check the input args - masm.cmpl(ary1Reg, ary2Reg); + masm.cmpptr(ary1Reg, ary2Reg); masm.jcc(Assembler::equal, TRUE_LABEL); - masm.testl(ary1Reg, ary1Reg); + masm.testptr(ary1Reg, ary1Reg); masm.jcc(Assembler::zero, FALSE_LABEL); - masm.testl(ary2Reg, ary2Reg); + masm.testptr(ary2Reg, ary2Reg); masm.jcc(Assembler::zero, FALSE_LABEL); // Check the lengths - masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); + masm.movl(tmp4Reg, Address(ary1Reg, length_offset)); masm.movl(resultReg, Address(ary2Reg, length_offset)); - masm.cmpl(tmp2Reg, resultReg); + masm.cmpl(tmp4Reg, resultReg); masm.jcc(Assembler::notEqual, FALSE_LABEL); masm.testl(resultReg, resultReg); masm.jcc(Assembler::zero, TRUE_LABEL); - // Get the number of 4 byte vectors to compare - masm.shrl(resultReg, 1); + // Load array addrs + masm.lea(ary1Reg, Address(ary1Reg, base_offset)); + masm.lea(ary2Reg, Address(ary2Reg, base_offset)); - // Check for odd-length arrays - masm.andl(tmp2Reg, 1); - masm.testl(tmp2Reg, tmp2Reg); - masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); + // Set byte count + masm.shll(tmp4Reg, 1); + masm.movl(resultReg, tmp4Reg); - // Compare 2-byte "tail" at end of arrays - masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.cmpl(tmp1Reg, tmp2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes) + masm.andl(resultReg, 0x0000000e); // tail count (in bytes) + masm.testl(tmp4Reg, tmp4Reg); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negl(tmp4Reg); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + + masm.jccb(Assembler::notZero, FALSE_LABEL); + masm.addl(tmp4Reg, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(tmp4Reg, resultReg); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes) + masm.andl(resultReg, 0x00000002); // tail char (in bytes) + masm.testl(tmp4Reg, tmp4Reg); + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negl(tmp4Reg); + + masm.bind(COMPARE_VECTORS); + masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.jccb(Assembler::notEqual, FALSE_LABEL); + masm.addl(tmp4Reg, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); masm.testl(resultReg, resultReg); - masm.jcc(Assembler::zero, TRUE_LABEL); - - // Setup compare loop - masm.bind(COMPARE_LOOP_HDR); - // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays - masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.negl(resultReg); - - // 4-byte-wide compare loop - masm.bind(COMPARE_LOOP); - masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0)); - masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0)); - masm.cmpl(ary1Reg, ary2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); - masm.increment(resultReg); - masm.jcc(Assembler::notZero, COMPARE_LOOP); + masm.jccb(Assembler::zero, TRUE_LABEL); + masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0)); + masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0)); + masm.cmpl(tmp3Reg, tmp4Reg); + masm.jccb(Assembler::notEqual, FALSE_LABEL); masm.bind(TRUE_LABEL); masm.movl(resultReg, 1); // return true - masm.jmp(DONE_LABEL); + masm.jmpb(DONE); masm.bind(FALSE_LABEL); masm.xorl(resultReg, resultReg); // return false // That's it - masm.bind(DONE_LABEL); + masm.bind(DONE); %} enc_class enc_pop_rdx() %{ @@ -12074,11 +12356,8 @@ instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{ ins_pipe( fpu_reg_reg ); %} - - // ======================================================================= // fast clearing of an array - instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{ match(Set dummy (ClearArray cnt base)); effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr); @@ -12092,24 +12371,48 @@ instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlag ins_pipe( pipe_slow ); %} -instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ +instruct string_compare(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eAXRegI tmp3, eBXRegI tmp4, eCXRegI result, eFlagsReg cr) %{ match(Set result (StrComp str1 str2)); - effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); format %{ "String Compare $str1,$str2 -> $result // KILL EAX, EBX" %} - ins_encode( enc_String_Compare() ); + ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +// fast string equals +instruct string_equals(eDIRegP str1, eSIRegP str2, regXD tmp1, regXD tmp2, + eBXRegI tmp3, eCXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ + match(Set result (StrEquals str1 str2)); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String Equals $str1,$str2 -> $result // KILL EBX, ECX" %} + ins_encode( enc_String_Equals(tmp1, tmp2, str1, str2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +instruct string_indexof(eSIRegP str1, eDIRegP str2, regXD tmp1, eAXRegI tmp2, + eCXRegI tmp3, eDXRegI tmp4, eBXRegI result, eFlagsReg cr) %{ + predicate(UseSSE42Intrinsics); + match(Set result (StrIndexOf str1 str2)); + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String IndexOf $str1,$str2 -> $result // KILL EAX, ECX, EDX" %} + ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} // fast array equals -instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{ +instruct array_equals(eDIRegP ary1, eSIRegP ary2, regXD tmp1, regXD tmp2, eBXRegI tmp3, + eDXRegI tmp4, eAXRegI result, eFlagsReg cr) %{ match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); - format %{ "Array Equals $ary1,$ary2 -> $result // KILL EAX, EBX" %} - ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); + format %{ "Array Equals $ary1,$ary2 -> $result // KILL EBX, EDX" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} diff --git a/hotspot/src/cpu/x86/vm/x86_64.ad b/hotspot/src/cpu/x86/vm/x86_64.ad index ae23fef1114..892b0863128 100644 --- a/hotspot/src/cpu/x86/vm/x86_64.ad +++ b/hotspot/src/cpu/x86/vm/x86_64.ad @@ -3694,13 +3694,16 @@ encode %{ } %} - enc_class enc_String_Compare() - %{ + enc_class enc_String_Compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{ Label RCX_GOOD_LABEL, LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, CONT_LABEL, WHILE_HEAD_LABEL; MacroAssembler masm(&cbuf); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + // Get the first character position in both strings // [8] char array, [12] offset, [16] count int value_offset = java_lang_String::value_offset_in_bytes(); @@ -3718,6 +3721,7 @@ encode %{ // Compute the minimum of the string lengths(rsi) and the // difference of the string lengths (stack) + // do the conditional move stuff masm.movl(rdi, Address(rdi, count_offset)); masm.movl(rsi, Address(rsi, count_offset)); masm.movl(rcx, rdi); @@ -3745,7 +3749,7 @@ encode %{ Label LSkip2; // Check if the strings start at same location masm.cmpptr(rbx, rax); - masm.jcc(Assembler::notEqual, LSkip2); + masm.jccb(Assembler::notEqual, LSkip2); // Check if the length difference is zero (from stack) masm.cmpl(Address(rsp, 0), 0x0); @@ -3755,9 +3759,52 @@ encode %{ masm.bind(LSkip2); } + // Advance to next character + masm.addptr(rax, 2); + masm.addptr(rbx, 2); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_TAIL; + // Setup to compare 16-byte vectors + masm.movl(rdi, rsi); + masm.andl(rsi, 0xfffffff8); // rsi holds the vector count + masm.andl(rdi, 0x00000007); // rdi holds the tail count + masm.testl(rsi, rsi); + masm.jccb(Assembler::zero, COMPARE_TAIL); + + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.negptr(rsi); + + masm.bind(COMPARE_VECTORS); + masm.movdqu(tmp1Reg, Address(rax, rsi, Address::times_2)); + masm.movdqu(tmp2Reg, Address(rbx, rsi, Address::times_2)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, VECTOR_NOT_EQUAL); + masm.addptr(rsi, 8); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + masm.jmpb(COMPARE_TAIL); + + // Mismatched characters in the vectors + masm.bind(VECTOR_NOT_EQUAL); + masm.lea(rax, Address(rax, rsi, Address::times_2)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2)); + masm.movl(rdi, 8); + + // Compare tail (< 8 chars), or rescan last vectors to + // find 1st mismatched characters + masm.bind(COMPARE_TAIL); + masm.testl(rdi, rdi); + masm.jccb(Assembler::zero, LENGTH_DIFF_LABEL); + masm.movl(rsi, rdi); + // Fallthru to tail compare + } + // Shift RAX and RBX to the end of the arrays, negate min - masm.lea(rax, Address(rax, rsi, Address::times_2, 2)); - masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2)); + masm.lea(rax, Address(rax, rsi, Address::times_2, 0)); + masm.lea(rbx, Address(rbx, rsi, Address::times_2, 0)); masm.negptr(rsi); // Compare the rest of the characters @@ -3765,93 +3812,329 @@ encode %{ masm.load_unsigned_short(rcx, Address(rbx, rsi, Address::times_2, 0)); masm.load_unsigned_short(rdi, Address(rax, rsi, Address::times_2, 0)); masm.subl(rcx, rdi); - masm.jcc(Assembler::notZero, POP_LABEL); + masm.jccb(Assembler::notZero, POP_LABEL); masm.increment(rsi); masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL); // Strings are equal up to min length. Return the length difference. masm.bind(LENGTH_DIFF_LABEL); masm.pop(rcx); - masm.jmp(DONE_LABEL); + masm.jmpb(DONE_LABEL); // Discard the stored length difference masm.bind(POP_LABEL); masm.addptr(rsp, 8); - + // That's it masm.bind(DONE_LABEL); %} - enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, rbx_RegI tmp2, rcx_RegI result) %{ - Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP; + enc_class enc_String_IndexOf(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2, + rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result) %{ + // SSE4.2 version + Label LOAD_SUBSTR, PREP_FOR_SCAN, SCAN_TO_SUBSTR, + SCAN_SUBSTR, RET_NEG_ONE, RET_NOT_FOUND, CLEANUP, DONE; MacroAssembler masm(&cbuf); - Register ary1Reg = as_Register($ary1$$reg); - Register ary2Reg = as_Register($ary2$$reg); - Register tmp1Reg = as_Register($tmp1$$reg); - Register tmp2Reg = as_Register($tmp2$$reg); - Register resultReg = as_Register($result$$reg); + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + + // Get the first character position in both strings + // [8] char array, [12] offset, [16] count + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // Get counts for string and substr + masm.movl(rdx, Address(rsi, count_offset)); + masm.movl(rax, Address(rdi, count_offset)); + // Check for substr count > string count + masm.cmpl(rax, rdx); + masm.jcc(Assembler::greater, RET_NEG_ONE); + + // Start the indexOf operation + // Get start addr of string + masm.load_heap_oop(rbx, Address(rsi, value_offset)); + masm.movl(rcx, Address(rsi, offset_offset)); + masm.lea(rsi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rsi); + + // Get start addr of substr + masm.load_heap_oop(rbx, Address(rdi, value_offset)); + masm.movl(rcx, Address(rdi, offset_offset)); + masm.lea(rdi, Address(rbx, rcx, Address::times_2, base_offset)); + masm.push(rdi); + masm.push(rax); + masm.jmpb(PREP_FOR_SCAN); + + // Substr count saved at sp + // Substr saved at sp+8 + // String saved at sp+16 + + // Prep to load substr for scan + masm.bind(LOAD_SUBSTR); + masm.movptr(rdi, Address(rsp, 8)); + masm.movl(rax, Address(rsp, 0)); + + // Load substr + masm.bind(PREP_FOR_SCAN); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.addq(rdx, 8); // prime the loop + masm.subptr(rsi, 16); + + // Scan string for substr in 16-byte vectors + masm.bind(SCAN_TO_SUBSTR); + masm.subq(rdx, 8); + masm.addptr(rsi, 16); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::above, SCAN_TO_SUBSTR); + masm.jccb(Assembler::aboveEqual, RET_NOT_FOUND); + + // Fallthru: found a potential substr + + //Make sure string is still long enough + masm.subl(rdx, rcx); + masm.cmpl(rdx, rax); + masm.jccb(Assembler::negative, RET_NOT_FOUND); + // Compute start addr of substr + masm.lea(rsi, Address(rsi, rcx, Address::times_2)); + masm.movptr(rbx, rsi); + + // Compare potential substr + masm.addq(rdx, 8); // prime the loop + masm.addq(rax, 8); + masm.subptr(rsi, 16); + masm.subptr(rdi, 16); + + // Scan 16-byte vectors of string and substr + masm.bind(SCAN_SUBSTR); + masm.subq(rax, 8); + masm.subq(rdx, 8); + masm.addptr(rsi, 16); + masm.addptr(rdi, 16); + masm.movdqu(tmp1Reg, Address(rdi, 0)); + masm.pcmpestri(tmp1Reg, Address(rsi, 0), 0x0d); + masm.jcc(Assembler::noOverflow, LOAD_SUBSTR); // OF == 0 + masm.jcc(Assembler::positive, SCAN_SUBSTR); // SF == 0 + + // Compute substr offset + masm.movptr(rsi, Address(rsp, 16)); + masm.subptr(rbx, rsi); + masm.shrl(rbx, 1); + masm.jmpb(CLEANUP); + + masm.bind(RET_NEG_ONE); + masm.movl(rbx, -1); + masm.jmpb(DONE); + + masm.bind(RET_NOT_FOUND); + masm.movl(rbx, -1); + + masm.bind(CLEANUP); + masm.addptr(rsp, 24); + + masm.bind(DONE); + %} + + enc_class enc_String_Equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, + rbx_RegI tmp3, rcx_RegI tmp2, rax_RegI result) %{ + Label RET_TRUE, RET_FALSE, DONE, COMPARE_VECTORS, COMPARE_CHAR; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + + int value_offset = java_lang_String::value_offset_in_bytes(); + int offset_offset = java_lang_String::offset_offset_in_bytes(); + int count_offset = java_lang_String::count_offset_in_bytes(); + int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); + + // does source == target string? + masm.cmpptr(rdi, rsi); + masm.jcc(Assembler::equal, RET_TRUE); + + // get and compare counts + masm.movl(rcx, Address(rdi, count_offset)); + masm.movl(rax, Address(rsi, count_offset)); + masm.cmpl(rcx, rax); + masm.jcc(Assembler::notEqual, RET_FALSE); + masm.testl(rax, rax); + masm.jcc(Assembler::zero, RET_TRUE); + + // get source string offset and value + masm.load_heap_oop(rbx, Address(rsi, value_offset)); + masm.movl(rax, Address(rsi, offset_offset)); + masm.lea(rsi, Address(rbx, rax, Address::times_2, base_offset)); + + // get compare string offset and value + masm.load_heap_oop(rbx, Address(rdi, value_offset)); + masm.movl(rax, Address(rdi, offset_offset)); + masm.lea(rdi, Address(rbx, rax, Address::times_2, base_offset)); + + // Set byte count + masm.shll(rcx, 1); + masm.movl(rax, rcx); + + if (UseSSE42Intrinsics) { + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(rcx, 0xfffffff0); // vector count (in bytes) + masm.andl(rax, 0x0000000e); // tail count (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negptr(rcx); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(rdi, rcx, Address::times_1)); + masm.movdqu(tmp2Reg, Address(rsi, rcx, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + masm.jccb(Assembler::notZero, RET_FALSE); + masm.addptr(rcx, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(rcx, rax); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(rcx, 0xfffffffc); // vector count (in bytes) + masm.andl(rax, 0x00000002); // tail char (in bytes) + masm.testl(rcx, rcx); + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(rdi, Address(rdi, rcx, Address::times_1)); + masm.lea(rsi, Address(rsi, rcx, Address::times_1)); + masm.negptr(rcx); + + masm.bind(COMPARE_VECTORS); + masm.movl(rbx, Address(rdi, rcx, Address::times_1)); + masm.cmpl(rbx, Address(rsi, rcx, Address::times_1)); + masm.jccb(Assembler::notEqual, RET_FALSE); + masm.addptr(rcx, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); + masm.testl(rax, rax); + masm.jccb(Assembler::zero, RET_TRUE); + masm.load_unsigned_short(rbx, Address(rdi, 0)); + masm.load_unsigned_short(rcx, Address(rsi, 0)); + masm.cmpl(rbx, rcx); + masm.jccb(Assembler::notEqual, RET_FALSE); + + masm.bind(RET_TRUE); + masm.movl(rax, 1); // return true + masm.jmpb(DONE); + + masm.bind(RET_FALSE); + masm.xorl(rax, rax); // return false + + masm.bind(DONE); + %} + + enc_class enc_Array_Equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result) %{ + Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR; + MacroAssembler masm(&cbuf); + + XMMRegister tmp1Reg = as_XMMRegister($tmp1$$reg); + XMMRegister tmp2Reg = as_XMMRegister($tmp2$$reg); + Register ary1Reg = as_Register($ary1$$reg); + Register ary2Reg = as_Register($ary2$$reg); + Register tmp3Reg = as_Register($tmp3$$reg); + Register tmp4Reg = as_Register($tmp4$$reg); + Register resultReg = as_Register($result$$reg); int length_offset = arrayOopDesc::length_offset_in_bytes(); int base_offset = arrayOopDesc::base_offset_in_bytes(T_CHAR); // Check the input args - masm.cmpq(ary1Reg, ary2Reg); + masm.cmpq(ary1Reg, ary2Reg); masm.jcc(Assembler::equal, TRUE_LABEL); - masm.testq(ary1Reg, ary1Reg); + masm.testq(ary1Reg, ary1Reg); masm.jcc(Assembler::zero, FALSE_LABEL); - masm.testq(ary2Reg, ary2Reg); + masm.testq(ary2Reg, ary2Reg); masm.jcc(Assembler::zero, FALSE_LABEL); // Check the lengths - masm.movl(tmp2Reg, Address(ary1Reg, length_offset)); + masm.movl(tmp4Reg, Address(ary1Reg, length_offset)); masm.movl(resultReg, Address(ary2Reg, length_offset)); - masm.cmpl(tmp2Reg, resultReg); + masm.cmpl(tmp4Reg, resultReg); masm.jcc(Assembler::notEqual, FALSE_LABEL); masm.testl(resultReg, resultReg); masm.jcc(Assembler::zero, TRUE_LABEL); - // Get the number of 4 byte vectors to compare - masm.shrl(resultReg, 1); + //load array address + masm.lea(ary1Reg, Address(ary1Reg, base_offset)); + masm.lea(ary2Reg, Address(ary2Reg, base_offset)); - // Check for odd-length arrays - masm.andl(tmp2Reg, 1); - masm.testl(tmp2Reg, tmp2Reg); - masm.jcc(Assembler::zero, COMPARE_LOOP_HDR); + //set byte count + masm.shll(tmp4Reg, 1); + masm.movl(resultReg,tmp4Reg); - // Compare 2-byte "tail" at end of arrays - masm.load_unsigned_short(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.load_unsigned_short(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.cmpl(tmp1Reg, tmp2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); + if (UseSSE42Intrinsics){ + // With SSE4.2, use double quad vector compare + Label COMPARE_WIDE_VECTORS, COMPARE_TAIL; + // Compare 16-byte vectors + masm.andl(tmp4Reg, 0xfffffff0); // vector count (in bytes) + masm.andl(resultReg, 0x0000000e); // tail count (in bytes) + masm.testl(tmp4Reg, tmp4Reg); + masm.jccb(Assembler::zero, COMPARE_TAIL); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negptr(tmp4Reg); + + masm.bind(COMPARE_WIDE_VECTORS); + masm.movdqu(tmp1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.movdqu(tmp2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.pxor(tmp1Reg, tmp2Reg); + masm.ptest(tmp1Reg, tmp1Reg); + + masm.jccb(Assembler::notZero, FALSE_LABEL); + masm.addptr(tmp4Reg, 16); + masm.jcc(Assembler::notZero, COMPARE_WIDE_VECTORS); + masm.bind(COMPARE_TAIL); + masm.movl(tmp4Reg, resultReg); + // Fallthru to tail compare + } + + // Compare 4-byte vectors + masm.andl(tmp4Reg, 0xfffffffc); // vector count (in bytes) + masm.andl(resultReg, 0x00000002); // tail char (in bytes) + masm.testl(tmp4Reg, tmp4Reg); //if tmp2 == 0, only compare char + masm.jccb(Assembler::zero, COMPARE_CHAR); + masm.lea(ary1Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.lea(ary2Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.negptr(tmp4Reg); + + masm.bind(COMPARE_VECTORS); + masm.movl(tmp3Reg, Address(ary1Reg, tmp4Reg, Address::times_1)); + masm.cmpl(tmp3Reg, Address(ary2Reg, tmp4Reg, Address::times_1)); + masm.jccb(Assembler::notEqual, FALSE_LABEL); + masm.addptr(tmp4Reg, 4); + masm.jcc(Assembler::notZero, COMPARE_VECTORS); + + // Compare trailing char (final 2 bytes), if any + masm.bind(COMPARE_CHAR); masm.testl(resultReg, resultReg); - masm.jcc(Assembler::zero, TRUE_LABEL); - - // Setup compare loop - masm.bind(COMPARE_LOOP_HDR); - // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays - masm.leaq(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset)); - masm.leaq(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset)); - masm.negq(resultReg); - - // 4-byte-wide compare loop - masm.bind(COMPARE_LOOP); - masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0)); - masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0)); - masm.cmpl(ary1Reg, ary2Reg); - masm.jcc(Assembler::notEqual, FALSE_LABEL); - masm.incrementq(resultReg); - masm.jcc(Assembler::notZero, COMPARE_LOOP); + masm.jccb(Assembler::zero, TRUE_LABEL); + masm.load_unsigned_short(tmp3Reg, Address(ary1Reg, 0)); + masm.load_unsigned_short(tmp4Reg, Address(ary2Reg, 0)); + masm.cmpl(tmp3Reg, tmp4Reg); + masm.jccb(Assembler::notEqual, FALSE_LABEL); masm.bind(TRUE_LABEL); masm.movl(resultReg, 1); // return true - masm.jmp(DONE_LABEL); + masm.jmpb(DONE); masm.bind(FALSE_LABEL); masm.xorl(resultReg, resultReg); // return false // That's it - masm.bind(DONE_LABEL); + masm.bind(DONE); %} enc_class enc_rethrow() @@ -5087,7 +5370,7 @@ operand regF() %} // Double register operands -operand regD() +operand regD() %{ constraint(ALLOC_IN_RC(double_reg)); match(RegD); @@ -11540,27 +11823,52 @@ instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy, ins_pipe(pipe_slow); %} -instruct string_compare(rdi_RegP str1, rsi_RegP str2, rax_RegI tmp1, - rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) +instruct string_compare(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, + rax_RegI tmp3, rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr) %{ match(Set result (StrComp str1 str2)); - effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); format %{ "String Compare $str1, $str2 -> $result // XXX KILL RAX, RBX" %} - ins_encode( enc_String_Compare() ); + ins_encode( enc_String_Compare(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +instruct string_indexof(rsi_RegP str1, rdi_RegP str2, regD tmp1, rax_RegI tmp2, + rcx_RegI tmp3, rdx_RegI tmp4, rbx_RegI result, rFlagsReg cr) +%{ + predicate(UseSSE42Intrinsics); + match(Set result (StrIndexOf str1 str2)); + effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, KILL tmp2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String IndexOf $str1,$str2 -> $result // KILL RAX, RCX, RDX" %} + ins_encode( enc_String_IndexOf(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); + ins_pipe( pipe_slow ); +%} + +// fast string equals +instruct string_equals(rdi_RegP str1, rsi_RegP str2, regD tmp1, regD tmp2, rbx_RegI tmp3, + rcx_RegI tmp4, rax_RegI result, rFlagsReg cr) +%{ + match(Set result (StrEquals str1 str2)); + effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, KILL tmp3, KILL tmp4, KILL cr); + + format %{ "String Equals $str1,$str2 -> $result // KILL RBX, RCX" %} + ins_encode( enc_String_Equals(str1, str2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} // fast array equals -instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI tmp1, - rbx_RegI tmp2, rcx_RegI result, rFlagsReg cr) %{ +instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, regD tmp1, regD tmp2, rax_RegI tmp3, + rbx_RegI tmp4, rcx_RegI result, rFlagsReg cr) +%{ match(Set result (AryEq ary1 ary2)); - effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr); + effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr); //ins_cost(300); - format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %} - ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) ); + format %{ "Array Equals $ary1,$ary2 -> $result // KILL RAX, RBX" %} + ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, tmp3, tmp4, result) ); ins_pipe( pipe_slow ); %} diff --git a/hotspot/src/share/vm/adlc/formssel.cpp b/hotspot/src/share/vm/adlc/formssel.cpp index c573511a66e..1f80e2d0e16 100644 --- a/hotspot/src/share/vm/adlc/formssel.cpp +++ b/hotspot/src/share/vm/adlc/formssel.cpp @@ -574,9 +574,13 @@ bool InstructForm::needs_anti_dependence_check(FormDict &globals) const { // TEMPORARY // if( is_simple_chain_rule(globals) ) return false; - // String-compare uses many memorys edges, but writes none + // String.(compareTo/equals/indexOf) and Arrays.equals use many memorys edges, + // but writes none if( _matrule && _matrule->_rChild && - strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) + ( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 || + strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 || + strcmp(_matrule->_rChild->_opType,"StrIndexOf" )==0 || + strcmp(_matrule->_rChild->_opType,"AryEq" )==0 )) return true; // Check if instruction has a USE of a memory operand class, but no defs @@ -815,8 +819,10 @@ uint InstructForm::oper_input_base(FormDict &globals) { return AdlcVMDeps::Parms; // Skip the machine-state edges if( _matrule->_rChild && - strcmp(_matrule->_rChild->_opType,"StrComp")==0 ) { - // String compare takes 1 control and 4 memory edges. + ( strcmp(_matrule->_rChild->_opType,"StrComp" )==0 || + strcmp(_matrule->_rChild->_opType,"StrEquals" )==0 || + strcmp(_matrule->_rChild->_opType,"StrIndexOf")==0 )) { + // String.(compareTo/equals/indexOf) take 1 control and 4 memory edges. return 5; } diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp index cbf24f4bc89..fc0601aee3a 100644 --- a/hotspot/src/share/vm/classfile/vmSymbols.hpp +++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp @@ -288,6 +288,7 @@ template(stringCacheEnabled_name, "stringCacheEnabled") \ template(bitCount_name, "bitCount") \ template(profile_name, "profile") \ + template(equals_name, "equals") \ \ /* non-intrinsic name/signature pairs: */ \ template(register_method_name, "register") \ @@ -579,7 +580,6 @@ do_signature(copyOfRange_signature, "([Ljava/lang/Object;IILjava/lang/Class;)[Ljava/lang/Object;") \ \ do_intrinsic(_equalsC, java_util_Arrays, equals_name, equalsC_signature, F_S) \ - do_name( equals_name, "equals") \ do_signature(equalsC_signature, "([C[C)Z") \ \ do_intrinsic(_invoke, java_lang_reflect_Method, invoke_name, object_array_object_object_signature, F_R) \ @@ -589,6 +589,7 @@ do_name( compareTo_name, "compareTo") \ do_intrinsic(_indexOf, java_lang_String, indexOf_name, string_int_signature, F_R) \ do_name( indexOf_name, "indexOf") \ + do_intrinsic(_equals, java_lang_String, equals_name, object_boolean_signature, F_R) \ \ do_class(java_nio_Buffer, "java/nio/Buffer") \ do_intrinsic(_checkIndex, java_nio_Buffer, checkIndex_name, int_int_signature, F_R) \ diff --git a/hotspot/src/share/vm/opto/classes.hpp b/hotspot/src/share/vm/opto/classes.hpp index 87adb737cb7..b854447d75d 100644 --- a/hotspot/src/share/vm/opto/classes.hpp +++ b/hotspot/src/share/vm/opto/classes.hpp @@ -218,6 +218,8 @@ macro(StoreL) macro(StoreP) macro(StoreN) macro(StrComp) +macro(StrEquals) +macro(StrIndexOf) macro(SubD) macro(SubF) macro(SubI) diff --git a/hotspot/src/share/vm/opto/gcm.cpp b/hotspot/src/share/vm/opto/gcm.cpp index df648455117..92d5371153d 100644 --- a/hotspot/src/share/vm/opto/gcm.cpp +++ b/hotspot/src/share/vm/opto/gcm.cpp @@ -438,6 +438,12 @@ Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) { #endif assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp), "String compare is only known 'load' that does not conflict with any stores"); + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrEquals), + "String equals is a 'load' that does not conflict with any stores"); + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrIndexOf), + "String indexOf is a 'load' that does not conflict with any stores"); + assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_AryEq), + "Arrays equals is a 'load' that do not conflict with any stores"); if (!C->alias_type(load_alias_idx)->is_rewritable()) { // It is impossible to spoil this load by putting stores before it, diff --git a/hotspot/src/share/vm/opto/lcm.cpp b/hotspot/src/share/vm/opto/lcm.cpp index 4c83f0af66d..31de55a5435 100644 --- a/hotspot/src/share/vm/opto/lcm.cpp +++ b/hotspot/src/share/vm/opto/lcm.cpp @@ -137,6 +137,8 @@ void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowe if( mach->in(2) != val ) continue; break; // Found a memory op? case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: // Not a legit memory op for implicit null check regardless of // embedded loads diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp index b5b7136a375..a57333fd5a0 100644 --- a/hotspot/src/share/vm/opto/library_call.cpp +++ b/hotspot/src/share/vm/opto/library_call.cpp @@ -136,6 +136,7 @@ class LibraryCallKit : public GraphKit { bool inline_string_compareTo(); bool inline_string_indexOf(); Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i); + bool inline_string_equals(); Node* pop_math_arg(); bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName); bool inline_math_native(vmIntrinsics::ID id); @@ -261,6 +262,7 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { switch (id) { case vmIntrinsics::_indexOf: case vmIntrinsics::_compareTo: + case vmIntrinsics::_equals: case vmIntrinsics::_equalsC: break; // InlineNatives does not control String.compareTo default: @@ -275,6 +277,9 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { case vmIntrinsics::_indexOf: if (!SpecialStringIndexOf) return NULL; break; + case vmIntrinsics::_equals: + if (!SpecialStringEquals) return NULL; + break; case vmIntrinsics::_equalsC: if (!SpecialArraysEquals) return NULL; break; @@ -442,6 +447,8 @@ bool LibraryCallKit::try_to_inline() { return inline_string_compareTo(); case vmIntrinsics::_indexOf: return inline_string_indexOf(); + case vmIntrinsics::_equals: + return inline_string_equals(); case vmIntrinsics::_getObject: return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false); @@ -793,6 +800,8 @@ Node* LibraryCallKit::generate_current_thread(Node* &tls_output) { //------------------------------inline_string_compareTo------------------------ bool LibraryCallKit::inline_string_compareTo() { + if (!Matcher::has_match_rule(Op_StrComp)) return false; + const int value_offset = java_lang_String::value_offset_in_bytes(); const int count_offset = java_lang_String::count_offset_in_bytes(); const int offset_offset = java_lang_String::offset_offset_in_bytes(); @@ -830,6 +839,82 @@ bool LibraryCallKit::inline_string_compareTo() { return true; } +//------------------------------inline_string_equals------------------------ +bool LibraryCallKit::inline_string_equals() { + + if (!Matcher::has_match_rule(Op_StrEquals)) return false; + + const int value_offset = java_lang_String::value_offset_in_bytes(); + const int count_offset = java_lang_String::count_offset_in_bytes(); + const int offset_offset = java_lang_String::offset_offset_in_bytes(); + + _sp += 2; + Node* argument = pop(); // pop non-receiver first: it was pushed second + Node* receiver = pop(); + + // Null check on self without removing any arguments. The argument + // null check technically happens in the wrong place, which can lead to + // invalid stack traces when string compare is inlined into a method + // which handles NullPointerExceptions. + _sp += 2; + receiver = do_null_check(receiver, T_OBJECT); + //should not do null check for argument for String.equals(), because spec + //allows to specify NULL as argument. + _sp -= 2; + + if (stopped()) { + return true; + } + + // get String klass for instanceOf + ciInstanceKlass* klass = env()->String_klass(); + + // two paths (plus control) merge + RegionNode* region = new (C, 3) RegionNode(3); + Node* phi = new (C, 3) PhiNode(region, TypeInt::BOOL); + + Node* inst = gen_instanceof(argument, makecon(TypeKlassPtr::make(klass))); + Node* cmp = _gvn.transform(new (C, 3) CmpINode(inst, intcon(1))); + Node* bol = _gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq)); + + IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN); + + Node* if_true = _gvn.transform(new (C, 1) IfTrueNode(iff)); + set_control(if_true); + + const TypeInstPtr* string_type = + TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0); + + // instanceOf == true + Node* equals = + _gvn.transform(new (C, 7) StrEqualsNode( + control(), + memory(TypeAryPtr::CHARS), + memory(string_type->add_offset(value_offset)), + memory(string_type->add_offset(count_offset)), + memory(string_type->add_offset(offset_offset)), + receiver, + argument)); + + phi->init_req(1, _gvn.transform(equals)); + region->init_req(1, if_true); + + //instanceOf == false, fallthrough + Node* if_false = _gvn.transform(new (C, 1) IfFalseNode(iff)); + set_control(if_false); + + phi->init_req(2, _gvn.transform(intcon(0))); + region->init_req(2, if_false); + + // post merge + set_control(_gvn.transform(region)); + record_for_igvn(region); + + push(_gvn.transform(phi)); + + return true; +} + //------------------------------inline_array_equals---------------------------- bool LibraryCallKit::inline_array_equals() { @@ -994,80 +1079,115 @@ Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_ar return result; } - //------------------------------inline_string_indexOf------------------------ bool LibraryCallKit::inline_string_indexOf() { - _sp += 2; - Node *argument = pop(); // pop non-receiver first: it was pushed second - Node *receiver = pop(); - - // don't intrinsify if argument isn't a constant string. - if (!argument->is_Con()) { - return false; - } - const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); - if (str_type == NULL) { - return false; - } - ciInstanceKlass* klass = env()->String_klass(); - ciObject* str_const = str_type->const_oop(); - if (str_const == NULL || str_const->klass() != klass) { - return false; - } - ciInstance* str = str_const->as_instance(); - assert(str != NULL, "must be instance"); - const int value_offset = java_lang_String::value_offset_in_bytes(); const int count_offset = java_lang_String::count_offset_in_bytes(); const int offset_offset = java_lang_String::offset_offset_in_bytes(); - ciObject* v = str->field_value_by_offset(value_offset).as_object(); - int o = str->field_value_by_offset(offset_offset).as_int(); - int c = str->field_value_by_offset(count_offset).as_int(); - ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array - - // constant strings have no offset and count == length which - // simplifies the resulting code somewhat so lets optimize for that. - if (o != 0 || c != pat->length()) { - return false; - } - - // Null check on self without removing any arguments. The argument - // null check technically happens in the wrong place, which can lead to - // invalid stack traces when string compare is inlined into a method - // which handles NullPointerExceptions. _sp += 2; - receiver = do_null_check(receiver, T_OBJECT); - // No null check on the argument is needed since it's a constant String oop. - _sp -= 2; - if (stopped()) { - return true; - } + Node *argument = pop(); // pop non-receiver first: it was pushed second + Node *receiver = pop(); - // The null string as a pattern always returns 0 (match at beginning of string) - if (c == 0) { - push(intcon(0)); - return true; - } + Node* result; + if (Matcher::has_match_rule(Op_StrIndexOf) && + UseSSE42Intrinsics) { + // Generate SSE4.2 version of indexOf + // We currently only have match rules that use SSE4.2 - jchar lastChar = pat->char_at(o + (c - 1)); - int cache = 0; - int i; - for (i = 0; i < c - 1; i++) { - assert(i < pat->length(), "out of range"); - cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1))); - } + // Null check on self without removing any arguments. The argument + // null check technically happens in the wrong place, which can lead to + // invalid stack traces when string compare is inlined into a method + // which handles NullPointerExceptions. + _sp += 2; + receiver = do_null_check(receiver, T_OBJECT); + argument = do_null_check(argument, T_OBJECT); + _sp -= 2; - int md2 = c; - for (i = 0; i < c - 1; i++) { - assert(i < pat->length(), "out of range"); - if (pat->char_at(o + i) == lastChar) { - md2 = (c - 1) - i; + if (stopped()) { + return true; } + + ciInstanceKlass* klass = env()->String_klass(); + const TypeInstPtr* string_type = + TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0); + + result = + _gvn.transform(new (C, 7) + StrIndexOfNode(control(), + memory(TypeAryPtr::CHARS), + memory(string_type->add_offset(value_offset)), + memory(string_type->add_offset(count_offset)), + memory(string_type->add_offset(offset_offset)), + receiver, + argument)); + } else { //Use LibraryCallKit::string_indexOf + // don't intrinsify is argument isn't a constant string. + if (!argument->is_Con()) { + return false; + } + const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr(); + if (str_type == NULL) { + return false; + } + ciInstanceKlass* klass = env()->String_klass(); + ciObject* str_const = str_type->const_oop(); + if (str_const == NULL || str_const->klass() != klass) { + return false; + } + ciInstance* str = str_const->as_instance(); + assert(str != NULL, "must be instance"); + + ciObject* v = str->field_value_by_offset(value_offset).as_object(); + int o = str->field_value_by_offset(offset_offset).as_int(); + int c = str->field_value_by_offset(count_offset).as_int(); + ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array + + // constant strings have no offset and count == length which + // simplifies the resulting code somewhat so lets optimize for that. + if (o != 0 || c != pat->length()) { + return false; + } + + // Null check on self without removing any arguments. The argument + // null check technically happens in the wrong place, which can lead to + // invalid stack traces when string compare is inlined into a method + // which handles NullPointerExceptions. + _sp += 2; + receiver = do_null_check(receiver, T_OBJECT); + // No null check on the argument is needed since it's a constant String oop. + _sp -= 2; + if (stopped()) { + return true; + } + + // The null string as a pattern always returns 0 (match at beginning of string) + if (c == 0) { + push(intcon(0)); + return true; + } + + // Generate default indexOf + jchar lastChar = pat->char_at(o + (c - 1)); + int cache = 0; + int i; + for (i = 0; i < c - 1; i++) { + assert(i < pat->length(), "out of range"); + cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1))); + } + + int md2 = c; + for (i = 0; i < c - 1; i++) { + assert(i < pat->length(), "out of range"); + if (pat->char_at(o + i) == lastChar) { + md2 = (c - 1) - i; + } + } + + result = string_indexOf(receiver, pat, o, cache, md2); } - Node* result = string_indexOf(receiver, pat, o, cache, md2); push(result); return true; } diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp index bb372e0d3f6..a36d0a534fc 100644 --- a/hotspot/src/share/vm/opto/loopnode.cpp +++ b/hotspot/src/share/vm/opto/loopnode.cpp @@ -2668,6 +2668,8 @@ void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify case Op_LoadD_unaligned: case Op_LoadL_unaligned: case Op_StrComp: // Does a bunch of load-like effects + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: pinned = false; } diff --git a/hotspot/src/share/vm/opto/matcher.cpp b/hotspot/src/share/vm/opto/matcher.cpp index 100f79fd9e1..e230480b22a 100644 --- a/hotspot/src/share/vm/opto/matcher.cpp +++ b/hotspot/src/share/vm/opto/matcher.cpp @@ -746,6 +746,8 @@ static void match_alias_type(Compile* C, Node* n, Node* m) { if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) { switch (n->Opcode()) { case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: case Op_MemBarVolatile: case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type? @@ -1788,6 +1790,8 @@ void Matcher::find_shared( Node *n ) { mstack.push(n->in(0), Pre_Visit); // Visit Control input continue; // while (mstack.is_nonempty()) case Op_StrComp: + case Op_StrEquals: + case Op_StrIndexOf: case Op_AryEq: set_shared(n); // Force result into register (it will be anyways) break; diff --git a/hotspot/src/share/vm/opto/memnode.cpp b/hotspot/src/share/vm/opto/memnode.cpp index 570e813e2fa..b2af60d27dc 100644 --- a/hotspot/src/share/vm/opto/memnode.cpp +++ b/hotspot/src/share/vm/opto/memnode.cpp @@ -2481,6 +2481,31 @@ Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){ return remove_dead_region(phase, can_reshape) ? this : NULL; } +// Do we match on this edge? No memory edges +uint StrEqualsNode::match_edge(uint idx) const { + return idx == 5 || idx == 6; +} + +//------------------------------Ideal------------------------------------------ +// Return a node which is more "ideal" than the current node. Strip out +// control copies +Node *StrEqualsNode::Ideal(PhaseGVN *phase, bool can_reshape){ + return remove_dead_region(phase, can_reshape) ? this : NULL; +} + +//============================================================================= +// Do we match on this edge? No memory edges +uint StrIndexOfNode::match_edge(uint idx) const { + return idx == 5 || idx == 6; +} + +//------------------------------Ideal------------------------------------------ +// Return a node which is more "ideal" than the current node. Strip out +// control copies +Node *StrIndexOfNode::Ideal(PhaseGVN *phase, bool can_reshape){ + return remove_dead_region(phase, can_reshape) ? this : NULL; +} + //------------------------------Ideal------------------------------------------ // Return a node which is more "ideal" than the current node. Strip out // control copies @@ -2488,7 +2513,6 @@ Node *AryEqNode::Ideal(PhaseGVN *phase, bool can_reshape){ return remove_dead_region(phase, can_reshape) ? this : NULL; } - //============================================================================= MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent) : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)), diff --git a/hotspot/src/share/vm/opto/memnode.hpp b/hotspot/src/share/vm/opto/memnode.hpp index e318f3079f6..1d4f499da13 100644 --- a/hotspot/src/share/vm/opto/memnode.hpp +++ b/hotspot/src/share/vm/opto/memnode.hpp @@ -765,6 +765,54 @@ public: virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); }; +//------------------------------StrEquals------------------------------------- +class StrEqualsNode: public Node { +public: + StrEqualsNode(Node *control, + Node* char_array_mem, + Node* value_mem, + Node* count_mem, + Node* offset_mem, + Node* s1, Node* s2): Node(control, + char_array_mem, + value_mem, + count_mem, + offset_mem, + s1, s2) {}; + virtual int Opcode() const; + virtual bool depends_only_on_test() const { return false; } + virtual const Type* bottom_type() const { return TypeInt::BOOL; } + // a StrEqualsNode (conservatively) aliases with everything: + virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; } + virtual uint match_edge(uint idx) const; + virtual uint ideal_reg() const { return Op_RegI; } + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); +}; + +//------------------------------StrIndexOf------------------------------------- +class StrIndexOfNode: public Node { +public: + StrIndexOfNode(Node *control, + Node* char_array_mem, + Node* value_mem, + Node* count_mem, + Node* offset_mem, + Node* s1, Node* s2): Node(control, + char_array_mem, + value_mem, + count_mem, + offset_mem, + s1, s2) {}; + virtual int Opcode() const; + virtual bool depends_only_on_test() const { return false; } + virtual const Type* bottom_type() const { return TypeInt::INT; } + // a StrIndexOfNode (conservatively) aliases with everything: + virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; } + virtual uint match_edge(uint idx) const; + virtual uint ideal_reg() const { return Op_RegI; } + virtual Node *Ideal(PhaseGVN *phase, bool can_reshape); +}; + //------------------------------AryEq--------------------------------------- class AryEqNode: public Node { public: diff --git a/hotspot/src/share/vm/runtime/arguments.cpp b/hotspot/src/share/vm/runtime/arguments.cpp index 05a84d581e9..567e6073e28 100644 --- a/hotspot/src/share/vm/runtime/arguments.cpp +++ b/hotspot/src/share/vm/runtime/arguments.cpp @@ -1366,9 +1366,6 @@ void Arguments::set_aggressive_opts_flags() { if (AggressiveOpts && FLAG_IS_DEFAULT(DoEscapeAnalysis)) { FLAG_SET_DEFAULT(DoEscapeAnalysis, true); } - if (AggressiveOpts && FLAG_IS_DEFAULT(SpecialArraysEquals)) { - FLAG_SET_DEFAULT(SpecialArraysEquals, true); - } if (AggressiveOpts && FLAG_IS_DEFAULT(BiasedLockingStartupDelay)) { FLAG_SET_DEFAULT(BiasedLockingStartupDelay, 500); } diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp index 9af4074d76f..fc85089e624 100644 --- a/hotspot/src/share/vm/runtime/globals.hpp +++ b/hotspot/src/share/vm/runtime/globals.hpp @@ -491,9 +491,15 @@ class CommandLineFlags { develop(bool, SpecialStringIndexOf, true, \ "special version of string indexOf") \ \ - product(bool, SpecialArraysEquals, false, \ + develop(bool, SpecialStringEquals, true, \ + "special version of string equals") \ + \ + develop(bool, SpecialArraysEquals, true, \ "special version of Arrays.equals(char[],char[])") \ \ + product(bool, UseSSE42Intrinsics, false, \ + "SSE4.2 versions of intrinsics") \ + \ develop(bool, TraceCallFixup, false, \ "traces all call fixups") \ \ From b857081608c1f9cf158cf253143fb704acb106a5 Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Tue, 31 Mar 2009 15:09:45 -0700 Subject: [PATCH 14/15] 6824463: deopt blob is testing wrong register on 64-bit x86 Reviewed-by: jrose, phh, kvn --- hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp index 0de47d37fe3..57fa4a480ca 100644 --- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp @@ -2691,7 +2691,7 @@ void SharedRuntime::generate_deopt_blob() { __ mov(rdi, rax); Label noException; - __ cmpl(r12, Deoptimization::Unpack_exception); // Was exception pending? + __ cmpl(r14, Deoptimization::Unpack_exception); // Was exception pending? __ jcc(Assembler::notEqual, noException); __ movptr(rax, Address(r15_thread, JavaThread::exception_oop_offset())); // QQQ this is useless it was NULL above From 4d34a77121f9d9869b08416bb3c63afd84a365a7 Mon Sep 17 00:00:00 2001 From: Tom Rodriguez Date: Wed, 1 Apr 2009 11:45:01 -0700 Subject: [PATCH 15/15] 6823454: Oop-typed loadP yields invalid pointer (0x1) on SPECjbb2005 at OSRed method entry Reviewed-by: kvn, jrose --- hotspot/src/share/vm/opto/parse1.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hotspot/src/share/vm/opto/parse1.cpp b/hotspot/src/share/vm/opto/parse1.cpp index 12b75fb327f..da9537eb3c4 100644 --- a/hotspot/src/share/vm/opto/parse1.cpp +++ b/hotspot/src/share/vm/opto/parse1.cpp @@ -95,7 +95,7 @@ Node *Parse::fetch_interpreter_state(int index, switch( bt ) { // Signature is flattened case T_INT: l = new (C, 3) LoadINode( 0, mem, adr, TypeRawPtr::BOTTOM ); break; case T_FLOAT: l = new (C, 3) LoadFNode( 0, mem, adr, TypeRawPtr::BOTTOM ); break; - case T_ADDRESS: + case T_ADDRESS: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM ); break; case T_OBJECT: l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break; case T_LONG: case T_DOUBLE: {