8164888: Intrinsify fused mac operations on SPARC

Such speed, much wow Reviewed-by: kvn
2017-06-27 15:50:09 +02:00 · 2017-06-27 15:50:09 +02:00 · 431063deba
commit 431063deba
parent 6a9aa18f63
10 changed files with 218 additions and 45 deletions
--- a/hotspot/src/cpu/sparc/vm/abstractInterpreter_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/abstractInterpreter_sparc.cpp
@ -52,8 +52,16 @@ int AbstractInterpreter::BasicType_as_index(BasicType type) {
  return i;
 }

+// These should never be compiled since the interpreter will prefer the compiled
+// version to the intrinsic version.
 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
-  // No special entry points that preclude compilation
+  switch (method_kind(m)) {
+    case Interpreter::java_lang_math_fmaD:
+    case Interpreter::java_lang_math_fmaF:
+      return false;
+    default:
+      break;
+  }
  return true;
 }

--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp
@ -628,6 +628,9 @@ class Assembler : public AbstractAssembler {
  // CRC32C instruction supported only on certain processors
  static void crc32c_only() { assert(VM_Version::has_crc32c(), "This instruction only works on SPARC with CRC32C"); }

+  // FMAf instructions supported only on certain processors
+  static void fmaf_only() { assert(VM_Version::has_fmaf(), "This instruction only works on SPARC with FMAf"); }
+
  // instruction only in VIS1
  static void vis1_only() { assert(VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }

@ -923,6 +926,10 @@ class Assembler : public AbstractAssembler {

  inline void fsqrt(FloatRegisterImpl::Width w, FloatRegister s, FloatRegister d);

+  // fmaf instructions.
+
+  inline void fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d);
+
  // pp 165

  inline void flush(Register s1, Register s2);
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.inline.hpp
@ -355,6 +355,11 @@ inline void Assembler::fsqrt(FloatRegisterImpl::Width w, FloatRegister s, FloatR
  emit_int32(op(arith_op) | fd(d, w) | op3(fpop1_op3) | opf(0x28 + w) | fs2(s, w));
 }

+inline void Assembler::fmadd(FloatRegisterImpl::Width w, FloatRegister s1, FloatRegister s2, FloatRegister s3, FloatRegister d) {
+  fmaf_only();
+  emit_int32(op(arith_op) | fd(d, w) | op3(stpartialf_op3) | fs1(s1, w) | fs3(s3, w) | op5(w) | fs2(s2, w));
+}
+
 inline void Assembler::flush(Register s1, Register s2) {
  emit_int32(op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2));
 }
--- a/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp
@ -440,6 +440,31 @@ void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) {
 }

 void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  switch (op->code()) {
+    case lir_idiv:
+    case lir_irem:  // Both idiv & irem are handled after the switch (below).
+      break;
+    case lir_fmaf:
+      __ fmadd(FloatRegisterImpl::S,
+               op->in_opr1()->as_float_reg(),
+               op->in_opr2()->as_float_reg(),
+               op->in_opr3()->as_float_reg(),
+               op->result_opr()->as_float_reg());
+      return;
+    case lir_fmad:
+      __ fmadd(FloatRegisterImpl::D,
+               op->in_opr1()->as_double_reg(),
+               op->in_opr2()->as_double_reg(),
+               op->in_opr3()->as_double_reg(),
+               op->result_opr()->as_double_reg());
+      return;
+    default:
+      ShouldNotReachHere();
+      break;
+  }
+
+  // Handle idiv & irem:
+
  Register Rdividend = op->in_opr1()->as_register();
  Register Rdivisor  = noreg;
  Register Rscratch  = op->in_opr3()->as_register();
--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp
@ -953,7 +953,29 @@ void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
 }

 void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
-  fatal("FMA intrinsic is not implemented on this platform");
+  assert(x->number_of_arguments() == 3, "wrong type");
+  assert(UseFMA, "Needs FMA instructions support.");
+
+  LIRItem a(x->argument_at(0), this);
+  LIRItem b(x->argument_at(1), this);
+  LIRItem c(x->argument_at(2), this);
+
+  a.load_item();
+  b.load_item();
+  c.load_item();
+
+  LIR_Opr ina = a.result();
+  LIR_Opr inb = b.result();
+  LIR_Opr inc = c.result();
+  LIR_Opr res = rlock_result(x);
+
+  switch (x->id()) {
+    case vmIntrinsics::_fmaF: __ fmaf(ina, inb, inc, res); break;
+    case vmIntrinsics::_fmaD: __ fmad(ina, inb, inc, res); break;
+    default:
+      ShouldNotReachHere();
+      break;
+  }
 }

 void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
--- a/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
+++ b/hotspot/src/cpu/sparc/vm/c2_globals_sparc.hpp
@ -33,7 +33,7 @@

 define_pd_global(bool, BackgroundCompilation,        true);
 define_pd_global(bool, CICompileOSR,                 true);
-define_pd_global(bool, InlineIntrinsics,             false);
+define_pd_global(bool, InlineIntrinsics,             true);
 define_pd_global(bool, PreferInterpreterNativeStubs, false);
 define_pd_global(bool, ProfileTraps,                 true);
 define_pd_global(bool, UseOnStackReplacement,        true);
--- a/hotspot/src/cpu/sparc/vm/sparc.ad
+++ b/hotspot/src/cpu/sparc/vm/sparc.ad
@ -2627,6 +2627,33 @@ enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
    __ fsqrt(FloatRegisterImpl::D, Fsrc, Fdst);
 %}

+
+
+enc_class fmadds (sflt_reg dst, sflt_reg a, sflt_reg b, sflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_SingleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_SingleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_SingleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_SingleFloatRegister_object($c$$reg);
+
+    __ fmadd(FloatRegisterImpl::S, Fra, Frb, Frc, Frd);
+%}
+
+enc_class fmaddd (dflt_reg dst, dflt_reg a, dflt_reg b, dflt_reg c) %{
+    MacroAssembler _masm(&cbuf);
+
+    FloatRegister Frd = reg_to_DoubleFloatRegister_object($dst$$reg);
+    FloatRegister Fra = reg_to_DoubleFloatRegister_object($a$$reg);
+    FloatRegister Frb = reg_to_DoubleFloatRegister_object($b$$reg);
+    FloatRegister Frc = reg_to_DoubleFloatRegister_object($c$$reg);
+
+    __ fmadd(FloatRegisterImpl::D, Fra, Frb, Frc, Frd);
+%}
+
+
+
+
 enc_class fmovs (dflt_reg dst, dflt_reg src) %{
    MacroAssembler _masm(&cbuf);

@ -4540,6 +4567,26 @@ pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{
    FDIV  : C(17);
 %}

+// Fused floating-point multiply-add float.
+pipe_class fmaF_regx4(regF dst, regF src1, regF src2, regF src3) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    src3  : E(read);
+    FM    : R;
+%}
+
+// Fused gloating-point multiply-add double.
+pipe_class fmaD_regx4(regD dst, regD src1, regD src2, regD src3) %{
+    single_instruction;
+    dst   : X(write);
+    src1  : E(read);
+    src2  : E(read);
+    src3  : E(read);
+    FM    : R;
+%}
+
 // Floating Point Move/Negate/Abs Float
 pipe_class faddF_reg(regF dst, regF src) %{
    single_instruction;
@ -7531,6 +7578,24 @@ instruct sqrtD_reg_reg(regD dst, regD src) %{
  ins_pipe(fdivD_reg_reg);
 %}

+// Single precision fused floating-point multiply-add (d = a * b + c).
+instruct fmaF_regx4(regF dst, regF a, regF b, regF c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaF c (Binary a b)));
+  format %{ "fmadds $a,$b,$c,$dst\t# $dst = $a * $b + $c" %}
+  ins_encode(fmadds(dst, a, b, c));
+  ins_pipe(fmaF_regx4);
+%}
+
+// Double precision fused floating-point multiply-add (d = a * b + c).
+instruct fmaD_regx4(regD dst, regD a, regD b, regD c) %{
+  predicate(UseFMA);
+  match(Set dst (FmaD c (Binary a b)));
+  format %{ "fmaddd $a,$b,$c,$dst\t# $dst = $a * $b + $c" %}
+  ins_encode(fmaddd(dst, a, b, c));
+  ins_pipe(fmaD_regx4);
+%}
+
 //----------Logical Instructions-----------------------------------------------
 // And Instructions
 // Register And
--- a/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp
@ -153,13 +153,12 @@ address TemplateInterpreterGenerator::generate_slow_signature_handler() {
    __ delayed()->srl( G4_scratch, 2, G4_scratch );

    __ bind(NextArg);
-
  }

  __ bind(done);
  __ ret();
-  __ delayed()->
-     restore(O0, 0, Lscratch);  // caller's Lscratch gets the result handler
+  __ delayed()->restore(O0, 0, Lscratch);  // caller's Lscratch gets the result handler
+
  return entry;
 }

@ -177,7 +176,6 @@ void TemplateInterpreterGenerator::generate_counter_overflow(Label& Lcontinue) {
  // returns verified_entry_point or NULL
  // we ignore it in any case
  __ ba_short(Lcontinue);
-
 }


@ -196,7 +194,6 @@ address TemplateInterpreterGenerator::generate_abstract_entry(void) {
  // the call_VM checks for exception, so we should never return here.
  __ should_not_reach_here();
  return entry;
-
 }

 void TemplateInterpreterGenerator::save_native_result(void) {
@ -474,7 +471,6 @@ void TemplateInterpreterGenerator::generate_counter_incr(Label* overflow, Label*
    __ delayed()->nop();
    __ bind(done);
  }
-
 }

 // Allocate monitor and lock method (asm interpreter)
@ -590,7 +586,7 @@ void TemplateInterpreterGenerator::generate_stack_overflow_check(Register Rframe
 //   pop parameters from the callers stack by adjusting Lesp
 //   set O0 to Lesp
 //   compute X = (max_locals - num_parameters)
-//   bump SP up by X to accomadate the extra locals
+//   bump SP up by X to accommodate the extra locals
 //   compute X = max_expression_stack
 //               + vm_local_words
 //               + 16 words of register save area
@ -688,7 +684,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
  // 1) Increase caller's SP by for the extra local space needed:
  //    (check for overflow)
  //    Efficient implementation of xload/xstore bytecodes requires
-  //    that arguments and non-argument locals are in a contigously
+  //    that arguments and non-argument locals are in a contiguously
  //    addressable memory block => non-argument locals must be
  //    allocated in the caller's frame.
  //
@ -782,7 +778,7 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
    __ sub(Gframe_size, Glocals_size, Gframe_size);

    //
-    // bump SP to accomodate the extra locals
+    // bump SP to accommodate the extra locals
    //
    __ sub(SP, Glocals_size, SP);
  }
@ -810,9 +806,9 @@ void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
  Register mirror = LcpoolCache;
  __ load_mirror(mirror, Lmethod);
  __ st_ptr(mirror, FP, (frame::interpreter_frame_mirror_offset * wordSize) + STACK_BIAS);
-  __ get_constant_pool_cache( LcpoolCache );   // set LcpoolCache
+  __ get_constant_pool_cache(LcpoolCache);     // set LcpoolCache
  __ sub(FP, rounded_vm_local_words * BytesPerWord, Lmonitors ); // set Lmonitors
-  __ add( Lmonitors, STACK_BIAS, Lmonitors );   // Account for 64 bit stack bias
+  __ add(Lmonitors, STACK_BIAS, Lmonitors);    // Account for 64 bit stack bias
  __ sub(Lmonitors, BytesPerWord, Lesp);       // set Lesp

  // setup interpreter activation registers
@ -984,7 +980,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
      __ ldx( Gargs, 16, buf);
      __ lduw(Gargs, 24, crc);
      __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
-      __ add(buf ,offset, buf);
+      __ add(buf, offset, buf);
    }

    // Call the crc32 kernel
@ -1057,8 +1053,58 @@ address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(Abstract
  return NULL;
 }

-// Not supported
-address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
+/* Math routines only partially supported.
+ *
+ *   Providing support for fma (float/double) only.
+ */
+address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind)
+{
+  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
+
+  address entry = __ pc();
+
+  switch (kind) {
+    case Interpreter::java_lang_math_fmaF:
+      if (UseFMA) {
+        // float .fma(float a, float b, float c)
+        const FloatRegister ra = F1;
+        const FloatRegister rb = F2;
+        const FloatRegister rc = F3;
+        const FloatRegister rd = F0; // Result.
+
+        __ ldf(FloatRegisterImpl::S, Gargs,  0, rc);
+        __ ldf(FloatRegisterImpl::S, Gargs,  8, rb);
+        __ ldf(FloatRegisterImpl::S, Gargs, 16, ra);
+
+        __ fmadd(FloatRegisterImpl::S, ra, rb, rc, rd);
+        __ retl();  // Result in F0 (rd).
+        __ delayed()->mov(O5_savedSP, SP);
+
+        return entry;
+      }
+      break;
+    case Interpreter::java_lang_math_fmaD:
+      if (UseFMA) {
+        // double .fma(double a, double b, double c)
+        const FloatRegister ra = F2; // D1
+        const FloatRegister rb = F4; // D2
+        const FloatRegister rc = F6; // D3
+        const FloatRegister rd = F0; // D0 Result.
+
+        __ ldf(FloatRegisterImpl::D, Gargs,  0, rc);
+        __ ldf(FloatRegisterImpl::D, Gargs, 16, rb);
+        __ ldf(FloatRegisterImpl::D, Gargs, 32, ra);
+
+        __ fmadd(FloatRegisterImpl::D, ra, rb, rc, rd);
+        __ retl();  // Result in D0 (rd).
+        __ delayed()->mov(O5_savedSP, SP);
+
+        return entry;
+      }
+      break;
+    default:
+      break;
+  }
  return NULL;
 }

@ -1071,7 +1117,7 @@ void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
  // Doing the banging earlier fails if the caller frame is not an interpreter
  // frame.
  // (Also, the exception throwing code expects to unlock any synchronized
-  // method receiever, so do the banging after locking the receiver.)
+  // method receiver, so do the banging after locking the receiver.)

  // Bang each page in the shadow zone. We can't assume it's been done for
  // an interpreter frame with greater than a page of locals, so each page
@ -1112,8 +1158,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
  // rethink these assertions - they can be simplified and shared (gri 2/25/2000)
 #ifdef ASSERT
  __ ld(G5_method, Method::access_flags_offset(), Gtmp1);
-  {
-    Label L;
+  { Label L;
    __ btst(JVM_ACC_NATIVE, Gtmp1);
    __ br(Assembler::notZero, false, Assembler::pt, L);
    __ delayed()->nop();
@ -1362,7 +1407,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
    //     didn't see any synchronization is progress, and escapes.
    __ set(_thread_in_native_trans, G3_scratch);
    __ st(G3_scratch, thread_state);
-    if(os::is_MP()) {
+    if (os::is_MP()) {
      if (UseMembar) {
        // Force this write out before the read below
        __ membar(Assembler::StoreLoad);
@ -1425,8 +1470,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
  // If we have an oop result store it where it will be safe for any further gc
  // until we return now that we've released the handle it might be protected by

-  {
-    Label no_oop, store_result;
+  { Label no_oop, store_result;

    __ set((intptr_t)AbstractInterpreter::result_handler(T_OBJECT), G3_scratch);
    __ cmp_and_brx_short(G3_scratch, Lscratch, Assembler::notEqual, Assembler::pt, no_oop);
@ -1484,8 +1528,7 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {

  // dispose of return address and remove activation
 #ifdef ASSERT
-  {
-    Label ok;
+  { Label ok;
    __ cmp_and_brx_short(I5_savedSP, FP, Assembler::greaterEqualUnsigned, Assembler::pt, ok);
    __ stop("bad I5_savedSP value");
    __ should_not_reach_here();
@ -1495,15 +1538,12 @@ address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
  __ jmp(Lscratch, 0);
  __ delayed()->nop();

-
  if (inc_counter) {
    // handle invocation counter overflow
    __ bind(invocation_counter_overflow);
    generate_counter_overflow(Lcontinue);
  }

-
-
  return entry;
 }

@ -1533,8 +1573,7 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
  // rethink these assertions - they can be simplified and shared (gri 2/25/2000)
 #ifdef ASSERT
  __ ld(G5_method, Method::access_flags_offset(), Gtmp1);
-  {
-    Label L;
+  { Label L;
    __ btst(JVM_ACC_NATIVE, Gtmp1);
    __ br(Assembler::zero, false, Assembler::pt, L);
    __ delayed()->nop();
@ -1666,7 +1705,6 @@ address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
    generate_counter_overflow(Lcontinue);
  }

-
  return entry;
 }

@ -1786,8 +1824,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
  }

 #if INCLUDE_JVMTI
-  {
-    Label L_done;
+  { Label L_done;

    __ ldub(Address(Lbcp, 0), G1_scratch);  // Load current bytecode
    __ cmp_and_br_short(G1_scratch, Bytecodes::_invokestatic, Assembler::notEqual, Assembler::pn, L_done);
@ -1827,7 +1864,7 @@ void TemplateInterpreterGenerator::generate_throw_exception() {
  __ get_vm_result(Oexception);
  __ verify_oop(Oexception);

-    const int return_reg_adjustment = frame::pc_return_offset;
+  const int return_reg_adjustment = frame::pc_return_offset;
  Address issuing_pc_addr(I7, return_reg_adjustment);

  // We are done with this activation frame; find out where to go next.
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp
@ -317,7 +317,11 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
  }

-  if (UseFMA) {
+  if (has_fmaf()) {
+    if (FLAG_IS_DEFAULT(UseFMA)) {
+      UseFMA = true;
+    }
+  } else if (UseFMA) {
    warning("FMA instructions are not available on this CPU");
    FLAG_SET_DEFAULT(UseFMA, false);
  }
--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
+++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
@ -248,7 +248,7 @@ class StubRoutines: AllStatic {
  static jint    verify_oop_count()                        { return _verify_oop_count; }
  static jint*   verify_oop_count_addr()                   { return &_verify_oop_count; }
  // a subroutine for debugging the GC
-  static address verify_oop_subroutine_entry_address()    { return (address)&_verify_oop_subroutine_entry; }
+  static address verify_oop_subroutine_entry_address()     { return (address)&_verify_oop_subroutine_entry; }

  static address catch_exception_entry()                   { return _catch_exception_entry; }

@ -335,8 +335,8 @@ class StubRoutines: AllStatic {
  static address checkcast_arraycopy(bool dest_uninitialized = false) {
    return dest_uninitialized ? _checkcast_arraycopy_uninit : _checkcast_arraycopy;
  }
-  static address unsafe_arraycopy()        { return _unsafe_arraycopy; }
-  static address generic_arraycopy()       { return _generic_arraycopy; }
+  static address unsafe_arraycopy()    { return _unsafe_arraycopy; }
+  static address generic_arraycopy()   { return _generic_arraycopy; }

  static address jbyte_fill()          { return _jbyte_fill; }
  static address jshort_fill()         { return _jshort_fill; }
@ -349,8 +349,8 @@ class StubRoutines: AllStatic {
  static address aescrypt_decryptBlock()                { return _aescrypt_decryptBlock; }
  static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
  static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
-  static address counterMode_AESCrypt() { return _counterMode_AESCrypt; }
-  static address ghash_processBlocks() { return _ghash_processBlocks; }
+  static address counterMode_AESCrypt()  { return _counterMode_AESCrypt; }
+  static address ghash_processBlocks()   { return _ghash_processBlocks; }

  static address sha1_implCompress()     { return _sha1_implCompress; }
  static address sha1_implCompressMB()   { return _sha1_implCompressMB; }
@ -366,9 +366,9 @@ class StubRoutines: AllStatic {
  static address updateBytesCRC32C()   { return _updateBytesCRC32C; }
  static address updateBytesAdler32()  { return _updateBytesAdler32; }

-  static address multiplyToLen()       {return _multiplyToLen; }
-  static address squareToLen()         {return _squareToLen; }
-  static address mulAdd()              {return _mulAdd; }
+  static address multiplyToLen()       { return _multiplyToLen; }
+  static address squareToLen()         { return _squareToLen; }
+  static address mulAdd()              { return _mulAdd; }
  static address montgomeryMultiply()  { return _montgomeryMultiply; }
  static address montgomerySquare()    { return _montgomerySquare; }

@ -376,7 +376,7 @@ class StubRoutines: AllStatic {

  static address dexp()                { return _dexp; }
  static address dlog()                { return _dlog; }
-  static address dlog10()                { return _dlog10; }
+  static address dlog10()              { return _dlog10; }
  static address dpow()                { return _dpow; }
  static address dsin()                { return _dsin; }
  static address dcos()                { return _dcos; }
@ -387,7 +387,7 @@ class StubRoutines: AllStatic {

  static address select_fill_function(BasicType t, bool aligned, const char* &name);

-  static address zero_aligned_words()   { return _zero_aligned_words; }
+  static address zero_aligned_words()  { return _zero_aligned_words; }

  static double  intrinsic_log10(double d) {
    assert(_intrinsic_log10 != NULL, "must be defined");