8144448: Avoid placing CTI immediately following or preceding RDPC instruction

Best practice for new SPARC CPUs Reviewed-by: kvn
2017-06-27 15:46:16 +02:00 · 2017-06-27 15:46:16 +02:00 · 6a9aa18f63
commit 6a9aa18f63
parent 065a8981f5
9 changed files with 1546 additions and 852 deletions
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.cpp
@ -26,6 +26,36 @@
 #include "asm/assembler.hpp"
 #include "asm/assembler.inline.hpp"
 #include "assembler_sparc.hpp"
 int AbstractAssembler::code_fill_byte() {
  return 0x00;                  // illegal instruction 0x00000000
 }
 #ifdef VALIDATE_PIPELINE
 /* Walk over the current code section and verify that there are no obvious
 * pipeline hazards exposed in the code generated.
 */
 void Assembler::validate_no_pipeline_hazards() {
  const CodeSection* csect = code_section();
  address addr0 = csect->start();
  address addrN = csect->end();
  uint32_t prev = 0;
  assert((addrN - addr0) % BytesPerInstWord == 0, "must be");
  for (address pc = addr0; pc != addrN; pc += BytesPerInstWord) {
    uint32_t insn = *reinterpret_cast<uint32_t*>(pc);
    // 1. General case: No CTI immediately after other CTI
    assert(!(is_cti(prev) && is_cti(insn)), "CTI-CTI not allowed.");
    // 2. Special case: No CTI immediately after/before RDPC
    assert(!(is_cti(prev) && is_rdpc(insn)), "CTI-RDPC not allowed.");
    assert(!(is_rdpc(prev) && is_cti(insn)), "RDPC-CTI not allowed.");
    prev = insn;
  }
 }
 #endif
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -28,8 +28,8 @@
 #include "asm/register.hpp"
 // The SPARC Assembler: Pure assembler doing NO optimizations on the instruction
-// level; i.e., what you write
+// level; i.e., what you write is what you get. The Assembler is generating code
-// is what you get. The Assembler is generating code into a CodeBuffer.
+// into a CodeBuffer.
 class Assembler : public AbstractAssembler {
  friend class AbstractAssembler;
@ -278,26 +278,6 @@ class Assembler : public AbstractAssembler  {
    f_unorderedOrLessOrEqual    = 14,
    f_ordered                   = 15,
    // V8 coproc, pp 123 v8 manual
    cp_always  = 8,
    cp_never   = 0,
    cp_3       = 7,
    cp_2       = 6,
    cp_2or3    = 5,
    cp_1       = 4,
    cp_1or3    = 3,
    cp_1or2    = 2,
    cp_1or2or3 = 1,
    cp_0       = 9,
    cp_0or3    = 10,
    cp_0or2    = 11,
    cp_0or2or3 = 12,
    cp_0or1    = 13,
    cp_0or1or3 = 14,
    cp_0or1or2 = 15,
    // for integers
    never                = 0,
@ -323,9 +303,8 @@ class Assembler : public AbstractAssembler  {
  };
  enum CC {
    icc  = 0,  xcc  = 2,
    // ptr_cc is the correct condition code for a pointer or intptr_t:
-    ptr_cc = NOT_LP64(icc) LP64_ONLY(xcc),
+    icc  = 0, xcc  = 2, ptr_cc = xcc,
    fcc0 = 0, fcc1 = 1, fcc2 = 2, fcc3 = 3
  };
@ -405,8 +384,7 @@ class Assembler : public AbstractAssembler  {
    assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
  }
-  // fields: note bits numbered from LSB = 0,
+  // fields: note bits numbered from LSB = 0, fields known by inclusive bit range
  //  fields known by inclusive bit range
  static int fmask(juint hi_bit, juint lo_bit) {
    assert(hi_bit >= lo_bit && 0 <= lo_bit && hi_bit < 32, "bad bits");
@ -421,7 +399,6 @@ class Assembler : public AbstractAssembler  {
    return int(r);
  }
  // signed version: extract from field and sign-extend
  static int inv_s_field(int x, int hi_bit, int lo_bit) {
@ -488,6 +465,36 @@ class Assembler : public AbstractAssembler  {
    assert(is_cbcond(x), "wrong instruction");
    return (x & (1 << 21)) != 0;
  }
  static bool is_branch(int x) {
    if (inv_op(x) != Assembler::branch_op) return false;
    bool is_bpr = inv_op2(x) == Assembler::bpr_op2;
    bool is_bp  = inv_op2(x) == Assembler::bp_op2;
    bool is_br  = inv_op2(x) == Assembler::br_op2;
    bool is_fp  = inv_op2(x) == Assembler::fb_op2;
    bool is_fbp = inv_op2(x) == Assembler::fbp_op2;
    return is_bpr || is_bp || is_br || is_fp || is_fbp;
  }
  static bool is_call(int x) {
    return inv_op(x) == Assembler::call_op;
  }
  static bool is_jump(int x) {
    if (inv_op(x) != Assembler::arith_op) return false;
    bool is_jmpl = inv_op3(x) == Assembler::jmpl_op3;
    bool is_rett = inv_op3(x) == Assembler::rett_op3;
    return is_jmpl || is_rett;
  }
  static bool is_rdpc(int x) {
    return (inv_op(x) == Assembler::arith_op && inv_op3(x) == Assembler::rdreg_op3 &&
            inv_u_field(x, 18, 14) == 5);
  }
  static bool is_cti(int x) {
      return is_branch(x) || is_call(x) || is_jump(x); // Ignoring done/retry
  }
  static int cond_cbcond(int x) { return  u_field((((x & 8) << 1) + 8 + (x & 7)), 29, 25); }
  static int inv_cond_cbcond(int x) {
    assert(is_cbcond(x), "wrong instruction");
@ -514,7 +521,6 @@ class Assembler : public AbstractAssembler  {
    return op3(r);
  }
  // compute inverse of simm
  static int inv_simm(int x, int nbits) {
    return (int)(x << (32 - nbits)) >> (32 - nbits);
@ -540,8 +546,7 @@ class Assembler : public AbstractAssembler  {
  static int wdisp16(intptr_t x, intptr_t off) {
    intptr_t xx = x - off;
    assert_signed_word_disp_range(xx, 16);
-    int r =  (xx >> 2) & ((1 << 14) - 1)
+    int r = (xx >> 2) & ((1 << 14) - 1) | (((xx >> (2+14)) & 3) << 20);
           |  (  ( (xx>>(2+14)) & 3 )  <<  20 );
    assert(inv_wdisp16(r, off) == x, "inverse is not inverse");
    return r;
  }
@ -560,8 +565,7 @@ class Assembler : public AbstractAssembler  {
    assert(VM_Version::has_cbcond(), "This CPU does not have CBCOND instruction");
    intptr_t xx = x - off;
    assert_signed_word_disp_range(xx, 10);
-    int r =  ( ( (xx >>  2   ) & ((1 << 8) - 1) ) <<  5 )
+    int r = (((xx >> 2) & ((1 << 8) - 1)) << 5) | (((xx >> (2+8)) & 3) << 19);
           | ( ( (xx >> (2+8)) & 3              ) << 19 );
    // Have to fake cbcond instruction to pass assert in inv_wdisp10()
    assert(inv_wdisp10((r | op(branch_op) | cond_cbcond(rc_last+1) | op2(bpr_op2)), off) == x, "inverse is not inverse");
    return r;
@ -571,9 +575,8 @@ class Assembler : public AbstractAssembler  {
  static intptr_t inv_wdisp(int x, intptr_t pos, int nbits) {
    int pre_sign_extend = x & ((1 << nbits) - 1);
-    int r =  pre_sign_extend >= ( 1 << (nbits-1) )
+    int r = (pre_sign_extend >= (1 << (nbits - 1)) ?
-       ?   pre_sign_extend | ~(( 1 << nbits ) - 1)
+             pre_sign_extend | ~((1 << nbits) - 1) : pre_sign_extend);
       :   pre_sign_extend;
    return (r << 2) + pos;
  }
@ -638,76 +641,112 @@ class Assembler : public AbstractAssembler  {
  static void v9_dep() { } // do nothing for now
 protected:
  // Simple delay-slot scheme:
  // In order to check the programmer, the assembler keeps track of delay slots.
  // It forbids CTIs in delay slots (conservative, but should be OK).
  // Also, when putting an instruction into a delay slot, you must say
  // asm->delayed()->add(...), in order to check that you don't omit
  // delay-slot instructions.
  // To implement this, we use a simple FSA
 #ifdef ASSERT
-  #define CHECK_DELAY
+#define VALIDATE_PIPELINE
 #endif
-#ifdef CHECK_DELAY
+
-  enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
+#ifdef VALIDATE_PIPELINE
  // A simple delay-slot scheme:
  // In order to check the programmer, the assembler keeps track of delay-slots.
  // It forbids CTIs in delay-slots (conservative, but should be OK). Also, when
  // emitting an instruction into a delay-slot, you must do so using delayed(),
  // e.g. asm->delayed()->add(...), in order to check that you do not omit the
  // delay-slot instruction. To implement this, we use a simple FSA.
  enum { NoDelay, AtDelay, FillDelay } _delay_state;
  // A simple hazard scheme:
  // In order to avoid pipeline stalls, due to single cycle pipeline hazards, we
  // adopt a simplistic state tracking mechanism that will enforce an additional
  // 'nop' instruction to be inserted prior to emitting an instruction that can
  // expose a given hazard (currently, PC-related hazards only).
  enum { NoHazard, PcHazard } _hazard_state;
 #endif
 public:
-  // Tells assembler next instruction must NOT be in delay slot.
+  // Tell the assembler that the next instruction must NOT be in delay-slot.
-  // Use at start of multinstruction macros.
+  // Use at start of multi-instruction macros.
  void assert_not_delayed() {
-    // This is a separate overloading to avoid creation of string constants
+    // This is a separate entry to avoid the creation of string constants in
-    // in non-asserted code--with some compilers this pollutes the object code.
+    // non-asserted code, with some compilers this pollutes the object code.
-#ifdef CHECK_DELAY
+#ifdef VALIDATE_PIPELINE
-    assert_not_delayed("next instruction should not be a delay slot");
+    assert_no_delay("Next instruction should not be in a delay-slot.");
 #endif
  }
  void assert_not_delayed(const char* msg) {
 #ifdef CHECK_DELAY
    assert(delay_state == no_delay, msg);
 #endif
  }
 protected:
-  // Insert a nop if the previous is cbcond
+  void assert_no_delay(const char* msg) {
-  inline void insert_nop_after_cbcond();
+#ifdef VALIDATE_PIPELINE
    assert(_delay_state == NoDelay, msg);
 #endif
  }
-  // Delay slot helpers
+  void assert_no_hazard() {
-  // cti is called when emitting control-transfer instruction,
+#ifdef VALIDATE_PIPELINE
-  // BEFORE doing the emitting.
+    assert(_hazard_state == NoHazard, "Unsolicited pipeline hazard.");
-  // Only effective when assertion-checking is enabled.
+#endif
  }
 private:
  inline int32_t prev_insn() {
    assert(offset() > 0, "Interface violation.");
    int32_t* addr = (int32_t*)pc() - 1;
    return *addr;
  }
 #ifdef VALIDATE_PIPELINE
  void validate_no_pipeline_hazards();
 #endif
 protected:
  // Avoid possible pipeline stall by inserting an additional 'nop' instruction,
  // if the previous instruction is a 'cbcond' or a 'rdpc'.
  inline void avoid_pipeline_stall();
  // A call to cti() is made before emitting a control-transfer instruction (CTI)
  // in order to assert a CTI is not emitted right after a 'cbcond', nor in the
  // delay-slot of another CTI. Only effective when assertions are enabled.
  void cti() {
-    // A cbcond instruction immediately followed by a CTI
+    // A 'cbcond' or 'rdpc' instruction immediately followed by a CTI introduces
-    // instruction introduces pipeline stalls, we need to avoid that.
+    // a pipeline stall, which we make sure to prohibit.
-    no_cbcond_before();
+    assert_no_cbcond_before();
-#ifdef CHECK_DELAY
+    assert_no_rdpc_before();
-    assert_not_delayed("cti should not be in delay slot");
+#ifdef VALIDATE_PIPELINE
    assert_no_hazard();
    assert_no_delay("CTI in delay-slot.");
 #endif
  }
-  // called when emitting cti with a delay slot, AFTER emitting
+  // Called when emitting CTI with a delay-slot, AFTER emitting.
-  void has_delay_slot() {
+  inline void induce_delay_slot() {
-#ifdef CHECK_DELAY
+#ifdef VALIDATE_PIPELINE
-    assert_not_delayed("just checking");
+    assert_no_delay("Already in delay-slot.");
-    delay_state = at_delay_slot;
+    _delay_state = AtDelay;
 #endif
  }
-  // cbcond instruction should not be generated one after an other
+  inline void induce_pc_hazard() {
-  bool cbcond_before() {
+#ifdef VALIDATE_PIPELINE
-    if (offset() == 0) return false; // it is first instruction
+    assert_no_hazard();
-    int x = *(int*)(intptr_t(pc()) - 4); // previous instruction
+    _hazard_state = PcHazard;
-    return is_cbcond(x);
+#endif
  }
-  void no_cbcond_before() {
+  bool is_cbcond_before() { return offset() > 0 ? is_cbcond(prev_insn()) : false; }
-    assert(offset() == 0 || !cbcond_before(), "cbcond should not follow an other cbcond");
+
  bool is_rdpc_before() { return offset() > 0 ? is_rdpc(prev_insn()) : false; }
  void assert_no_cbcond_before() {
    assert(offset() == 0 || !is_cbcond_before(), "CBCOND should not be followed by CTI.");
  }
  void assert_no_rdpc_before() {
    assert(offset() == 0 || !is_rdpc_before(), "RDPC should not be followed by CTI.");
  }
 public:
  bool use_cbcond(Label &L) {
-    if (!UseCBCond || cbcond_before()) return false;
+    if (!UseCBCond || is_cbcond_before()) return false;
    intptr_t x = intptr_t(target_distance(L)) - intptr_t(pc());
    assert((x & 3) == 0, "not word aligned");
    return is_simm12(x);
@ -715,32 +754,33 @@ public:
  // Tells assembler you know that next instruction is delayed
  Assembler* delayed() {
-#ifdef CHECK_DELAY
+#ifdef VALIDATE_PIPELINE
-    assert ( delay_state == at_delay_slot, "delayed instruction is not in delay slot");
+    assert(_delay_state == AtDelay, "Delayed instruction not in delay-slot.");
-    delay_state = filling_delay_slot;
+    _delay_state = FillDelay;
 #endif
    return this;
  }
  void flush() {
-#ifdef CHECK_DELAY
+#ifdef VALIDATE_PIPELINE
-    assert ( delay_state == no_delay, "ending code with a delay slot");
+    assert(_delay_state == NoDelay, "Ending code with a delay-slot.");
    validate_no_pipeline_hazards();
 #endif
    AbstractAssembler::flush();
  }
  inline void emit_int32(int);  // shadows AbstractAssembler::emit_int32
-  inline void emit_data(int x);
+  inline void emit_data(int);
-  inline void emit_data(int, RelocationHolder const&);
+  inline void emit_data(int, RelocationHolder const &rspec);
  inline void emit_data(int, relocInfo::relocType rtype);
-  // helper for above fcns
+  // helper for above functions
  inline void check_delay();
 public:
  // instructions, refer to page numbers in the SPARC Architecture Manual, V9
-  // pp 135 (addc was addx in v8)
+  // pp 135
  inline void add(Register s1, Register s2, Register d);
  inline void add(Register s1, int simm13a, Register d);
@ -895,7 +935,6 @@ public:
  // pp 168
  void illtrap(int const22a);
  // v8 unimp == illtrap(0)
  // pp 169
@ -905,12 +944,14 @@ public:
  // pp 170
  void jmpl(Register s1, Register s2, Register d);
-  void jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec = RelocationHolder() );
+  void jmpl(Register s1, int simm13a, Register d,
            RelocationHolder const &rspec = RelocationHolder());
  // 171
  inline void ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d);
-  inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec = RelocationHolder());
+  inline void ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d,
                  RelocationHolder const &rspec = RelocationHolder());
  inline void ldfsr(Register s1, Register s2);
@ -923,7 +964,7 @@ public:
  inline void ldfa(FloatRegisterImpl::Width w, Register s1, Register s2, int ia, FloatRegister d);
  inline void ldfa(FloatRegisterImpl::Width w, Register s1, int simm13a,         FloatRegister d);
-  // pp 175, lduw is ld on v8
+  // pp 175
  inline void ldsb(Register s1, Register s2, Register d);
  inline void ldsb(Register s1, int simm13a, Register d);
@ -1079,6 +1120,7 @@ public:
  // pp 217
  inline void sethi(int imm22a, Register d, RelocationHolder const &rspec = RelocationHolder());
  // pp 218
  inline void sll(Register s1, Register s2, Register d);
@ -1118,7 +1160,7 @@ public:
  inline void stfa(FloatRegisterImpl::Width w, FloatRegister d, Register s1, Register s2, int ia);
  inline void stfa(FloatRegisterImpl::Width w, FloatRegister d, Register s1, int simm13a);
-  // p 226
+  // pp 226
  inline void stb(Register d, Register s1, Register s2);
  inline void stb(Register d, Register s1, int simm13a);
@ -1240,8 +1282,9 @@ public:
  // Creation
  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
-#ifdef CHECK_DELAY
+#ifdef VALIDATE_PIPELINE
-    delay_state = no_delay;
+    _delay_state  = NoDelay;
    _hazard_state = NoHazard;
 #endif
  }
 };
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.cpp
@ -651,8 +651,8 @@ void MacroAssembler::card_table_write(jbyte* byte_map_base,
 void MacroAssembler::internal_sethi(const AddressLiteral& addrlit, Register d, bool ForceRelocatable) {
  address save_pc;
  int shiftcnt;
-# ifdef CHECK_DELAY
+#ifdef VALIDATE_PIPELINE
-  assert_not_delayed((char*) "cannot put two instructions in delay slot");
+  assert_no_delay("Cannot put two instructions in delay-slot.");
 #endif
  v9_dep();
  save_pc = pc();
@ -752,7 +752,7 @@ void MacroAssembler::internal_set(const AddressLiteral& addrlit, Register d, boo
      return;
    }
  }
-  assert_not_delayed((char*) "cannot put two instructions in delay slot");
+  assert_no_delay("Cannot put two instructions in delay-slot.");
  internal_sethi(addrlit, d, ForceRelocatable);
  if (ForceRelocatable || addrlit.rspec().type() != relocInfo::none || addrlit.low10() != 0) {
    add(d, addrlit.low10(), d, addrlit.rspec());
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.hpp
@ -662,9 +662,6 @@ class MacroAssembler : public Assembler {
  inline void fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt = relocInfo::none );
  inline void fbp( Condition c, bool a, CC cc, Predict p, Label& L );
  // get PC the best way
  inline int get_pc( Register d );
  // Sparc shorthands(pp 85, V8 manual, pp 289 V9 manual)
  inline void cmp(  Register s1, Register s2 );
  inline void cmp(  Register s1, int simm13a );
@ -1396,7 +1393,7 @@ public:
  void movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2);
  void movftoi_revbytes(FloatRegister src, Register dst, Register tmp1, Register tmp2);
-  // CRC32 code for java.util.zip.CRC32::updateBytes0() instrinsic.
+  // CRC32 code for java.util.zip.CRC32::updateBytes0() intrinsic.
  void kernel_crc32(Register crc, Register buf, Register len, Register table);
  // Fold 128-bit data chunk
  void fold_128bit_crc32(Register xcrc_hi, Register xcrc_lo, Register xK_hi, Register xK_lo, Register xtmp_hi, Register xtmp_lo, Register buf, int offset);
@ -1404,7 +1401,7 @@ public:
  // Fold 8-bit data
  void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
  void fold_8bit_crc32(Register crc, Register table, Register tmp);
-  // CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic.
+  // CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer intrinsic.
  void kernel_crc32c(Register crc, Register buf, Register len, Register table);
 };
--- a/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp
+++ b/hotspot/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp
@ -185,7 +185,8 @@ inline void MacroAssembler::br( Condition c, bool a, Predict p, address d, reloc
 }
 inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
-  insert_nop_after_cbcond();
+  // See note[+] on 'avoid_pipeline_stalls()', in "assembler_sparc.inline.hpp".
  avoid_pipeline_stall();
  br(c, a, p, target(L));
 }
@ -197,7 +198,7 @@ inline void MacroAssembler::brx( Condition c, bool a, Predict p, address d, relo
 }
 inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) {
-  insert_nop_after_cbcond();
+  avoid_pipeline_stall();
  brx(c, a, p, target(L));
 }
@ -219,7 +220,7 @@ inline void MacroAssembler::fb( Condition c, bool a, Predict p, address d, reloc
 }
 inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) {
-  insert_nop_after_cbcond();
+  avoid_pipeline_stall();
  fb(c, a, p, target(L));
 }
@ -269,12 +270,11 @@ inline void MacroAssembler::call( address d, RelocationHolder const& rspec ) {
 }
 inline void MacroAssembler::call( Label& L, relocInfo::relocType rt ) {
-  insert_nop_after_cbcond();
+  avoid_pipeline_stall();
  MacroAssembler::call(target(L), rt);
 }
 inline void MacroAssembler::callr( Register s1, Register s2 ) { jmpl( s1, s2, O7 ); }
 inline void MacroAssembler::callr( Register s1, int simm13a, RelocationHolder const& rspec ) { jmpl( s1, simm13a, O7, rspec); }
@ -304,13 +304,6 @@ inline void MacroAssembler::retl( bool trace ) {
  }
 }
 // clobbers o7 on V8!!
 // returns delta from gotten pc to addr after
 inline int MacroAssembler::get_pc( Register d ) {
  int x = offset();
  rdpc(d);
  return offset() - x;
 }
 inline void MacroAssembler::cmp(  Register s1, Register s2 ) { subcc( s1, s2, G0 ); }
 inline void MacroAssembler::cmp(  Register s1, int simm13a ) { subcc( s1, simm13a, G0 ); }
--- a/hotspot/src/cpu/sparc/vm/methodHandles_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/methodHandles_sparc.hpp
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2017, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -27,7 +27,7 @@
 // Adapters
 enum /* platform_dependent_constants */ {
-  adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
+  adapter_code_size = 35000 DEBUG_ONLY(+ 50000)
 };
 // Additional helper methods for MethodHandles code generation:
--- a/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/nativeInst_sparc.hpp
@ -67,11 +67,8 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
  bool is_illegal();
  bool is_zombie() {
    int x = long_at(0);
-    return is_op3(x,
+    return (is_op3(x, Assembler::ldsw_op3, Assembler::ldst_op) &&
-                  Assembler::ldsw_op3,
+            inv_rs1(x) == G0 && inv_rd(x) == O7);
                  Assembler::ldst_op)
        && Assembler::inv_rs1(x) == G0
        && Assembler::inv_rd(x) == O7;
  }
  bool is_ic_miss_trap();       // Inline-cache uses a trap to detect a miss
  bool is_return() {
@ -129,28 +126,10 @@ class NativeInstruction VALUE_OBJ_CLASS_SPEC {
  bool is_load_store_with_small_offset(Register reg);
 public:
 #ifdef ASSERT
  static int rdpc_instruction()        { return Assembler::op(Assembler::arith_op ) | Assembler::op3(Assembler::rdreg_op3) | Assembler::u_field(5, 18, 14) | Assembler::rd(O7); }
 #else
  // Temporary fix: in optimized mode, u_field is a macro for efficiency reasons (see Assembler::u_field) - needs to be fixed
  static int rdpc_instruction()        { return Assembler::op(Assembler::arith_op ) | Assembler::op3(Assembler::rdreg_op3) |            u_field(5, 18, 14) | Assembler::rd(O7); }
 #endif
  static int nop_instruction()         { return Assembler::op(Assembler::branch_op) | Assembler::op2(Assembler::sethi_op2); }
  static int illegal_instruction();    // the output of __ breakpoint_trap()
  static int call_instruction(address destination, address pc) { return Assembler::op(Assembler::call_op) | Assembler::wdisp((intptr_t)destination, (intptr_t)pc, 30); }
  static int branch_instruction(Assembler::op2s op2val, Assembler::Condition c, bool a) {
    return Assembler::op(Assembler::branch_op) | Assembler::op2(op2val) | Assembler::annul(a) | Assembler::cond(c);
  }
  static int op3_instruction(Assembler::ops opval, Register rd, Assembler::op3s op3val, Register rs1, int simm13a) {
    return Assembler::op(opval) | Assembler::rd(rd) | Assembler::op3(op3val) | Assembler::rs1(rs1) | Assembler::immed(true) | Assembler::simm(simm13a, 13);
  }
  static int sethi_instruction(Register rd, int imm22a) {
    return Assembler::op(Assembler::branch_op) | Assembler::rd(rd) | Assembler::op2(Assembler::sethi_op2) | Assembler::hi22(imm22a);
  }
 protected:
  address  addr_at(int offset) const    { return address(this) + offset; }
  int      long_at(int offset) const    { return *(int*)addr_at(offset); }
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@ -1072,7 +1072,13 @@ void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
    __ rdpc(r);
-    if (disp != 0) {
+    if (disp == 0) {
      // Emitting an additional 'nop' instruction in order not to cause a code
      // size adjustment in the code following the table setup (if the instruction
      // immediately following after this section is a CTI).
      __ nop();
    }
    else {
      assert(r != O7, "need temporary");
      __ sub(r, __ ensure_simm13_or_reg(disp, O7), r);
    }
@ -8624,7 +8630,7 @@ instruct branch_short(label labl) %{
  predicate(UseCBCond);
  effect(USE labl);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "BA     $labl\t! short branch" %}
  ins_encode %{
@ -8965,7 +8971,7 @@ instruct cmpI_reg_branch_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flag
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! int" %}
  ins_encode %{
@ -8983,7 +8989,7 @@ instruct cmpI_imm_branch_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flag
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! int" %}
  ins_encode %{
@ -9001,7 +9007,7 @@ instruct cmpU_reg_branch_short(cmpOpU cmp, iRegI op1, iRegI op2, label labl, fla
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
  ins_encode %{
@ -9019,7 +9025,7 @@ instruct cmpU_imm_branch_short(cmpOpU cmp, iRegI op1, immI5 op2, label labl, fla
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
  ins_encode %{
@ -9037,7 +9043,7 @@ instruct cmpL_reg_branch_short(cmpOp cmp, iRegL op1, iRegL op2, label labl, flag
  predicate(UseCBCond);
  effect(USE labl, KILL xcc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CXB$cmp  $op1,$op2,$labl\t! long" %}
  ins_encode %{
@ -9055,7 +9061,7 @@ instruct cmpL_imm_branch_short(cmpOp cmp, iRegL op1, immL5 op2, label labl, flag
  predicate(UseCBCond);
  effect(USE labl, KILL xcc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CXB$cmp  $op1,$op2,$labl\t! long" %}
  ins_encode %{
@ -9074,7 +9080,7 @@ instruct cmpP_reg_branch_short(cmpOpP cmp, iRegP op1, iRegP op2, label labl, fla
  predicate(UseCBCond);
  effect(USE labl, KILL pcc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CXB$cmp $op1,$op2,$labl\t! ptr" %}
  ins_encode %{
@ -9092,7 +9098,7 @@ instruct cmpP_null_branch_short(cmpOpP cmp, iRegP op1, immP0 null, label labl, f
  predicate(UseCBCond);
  effect(USE labl, KILL pcc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CXB$cmp $op1,0,$labl\t! ptr" %}
  ins_encode %{
@ -9110,7 +9116,7 @@ instruct cmpN_reg_branch_short(cmpOp cmp, iRegN op1, iRegN op2, label labl, flag
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! compressed ptr" %}
  ins_encode %{
@ -9128,7 +9134,7 @@ instruct cmpN_null_branch_short(cmpOp cmp, iRegN op1, immN0 null, label labl, fl
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,0,$labl\t! compressed ptr" %}
  ins_encode %{
@ -9147,7 +9153,7 @@ instruct cmpI_reg_branchLoopEnd_short(cmpOp cmp, iRegI op1, iRegI op2, label lab
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! Loop end" %}
  ins_encode %{
@ -9165,7 +9171,7 @@ instruct cmpI_imm_branchLoopEnd_short(cmpOp cmp, iRegI op1, immI5 op2, label lab
  predicate(UseCBCond);
  effect(USE labl, KILL icc);
-  size(4);
+  size(4); // Assuming no NOP inserted.
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! Loop end" %}
  ins_encode %{