8175369: [ppc] Provide intrinsic implementation for CRC32C

Reviewed-by: mdoerr, simonis, kvn
2017-03-08 17:01:13 -08:00 · 2017-03-08 17:01:13 -08:00 · 0171aad88e
commit 0171aad88e
parent 99a554c5ad
10 changed files with 963 additions and 547 deletions
--- a/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -3177,9 +3177,8 @@ void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
  assert_different_registers(val, crc, res);

  __ load_const_optimized(res, StubRoutines::crc_table_addr(), R0);
-  __ nand(crc, crc, crc); // ~crc
-  __ update_byte_crc32(crc, val, res);
-  __ nand(res, crc, crc); // ~crc
+  __ kernel_crc32_singleByteReg(crc, val, res, true);
+  __ mr(res, crc);
 }

 #undef __
--- a/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2005, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 SAP SE. All rights reserved.
+ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -63,18 +63,6 @@ void LIRItem::load_nonconstant() {
 }


-inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
-  LIR_Opr r = li.value()->operand();
-  if (r->is_register()) {
-    LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
-    ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
-  } else {
-    // Constants or memory get loaded with sign extend on this platform.
-    ll->move(li.result(), dst);
-  }
-}
-
-
 //--------------------------------------------------------------
 //               LIRGenerator
 //--------------------------------------------------------------
@ -1419,10 +1407,9 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
              arg2 = cc->at(1),
              arg3 = cc->at(2);

-      // CCallingConventionRequiresIntsAsLongs
      crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
      __ leal(LIR_OprFact::address(a), arg2);
-      load_int_as_long(gen()->lir(), len, arg3);
+      len.load_item_force(arg3); // We skip int->long conversion here, , because CRC32 stub expects int.

      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), LIR_OprFact::illegalOpr, result_reg, cc->args());
      __ move(result_reg, result);
@ -1434,6 +1421,66 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) {
  }
 }

+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  assert(UseCRC32CIntrinsics, "or should not be here");
+  LIR_Opr result = rlock_result(x);
+
+  switch (x->id()) {
+    case vmIntrinsics::_updateBytesCRC32C:
+    case vmIntrinsics::_updateDirectByteBufferCRC32C: {
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem len(x->argument_at(3), this);
+      buf.load_item();
+      off.load_nonconstant();
+
+      LIR_Opr index = off.result();
+      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+      if (off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+        offset += off.result()->as_jint();
+      }
+      LIR_Opr base_op = buf.result();
+      LIR_Address* a = NULL;
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+        __ add(index, LIR_OprFact::intptrConst(offset), index);
+        a = new LIR_Address(base_op, index, T_BYTE);
+      } else {
+        a = new LIR_Address(base_op, offset, T_BYTE);
+      }
+
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr arg1 = cc->at(0),
+              arg2 = cc->at(1),
+              arg3 = cc->at(2);
+
+      crc.load_item_force(arg1); // We skip int->long conversion here, because CRC32 stub doesn't care about high bits.
+      __ leal(LIR_OprFact::address(a), arg2);
+      len.load_item_force(arg3); // We skip int->long conversion here, , because CRC32 stub expects int.
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), LIR_OprFact::illegalOpr, result_reg, cc->args());
+      __ move(result_reg, result);
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}
+
 void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
  assert(x->number_of_arguments() == 3, "wrong type");
  assert(UseFMA, "Needs FMA instructions support.");
@ -1460,7 +1507,3 @@ void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
 void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
 }
-
-void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
-  Unimplemented();
-}
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -4092,7 +4092,7 @@ void MacroAssembler::update_byte_crc32(Register crc, Register val, Register tabl
 * @param table register pointing to CRC table
 */
 void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
-                                           Register data, bool loopAlignment, bool invertCRC) {
+                                           Register data, bool loopAlignment) {
  assert_different_registers(crc, buf, len, table, data);

  Label L_mainLoop, L_done;
@ -4103,10 +4103,6 @@ void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register
  clrldi_(len, len, 32);                         // Enforce 32 bit. Anything to do?
  beq(CCR0, L_done);

-  if (invertCRC) {
-    nand(crc, crc, crc);                         // ~c
-  }
-
  mtctr(len);
  align(mainLoop_alignment);
  BIND(L_mainLoop);
@ -4115,10 +4111,6 @@ void MacroAssembler::update_byteLoop_crc32(Register crc, Register buf, Register
    update_byte_crc32(crc, data, table);
    bdnz(L_mainLoop);                            // Iterate.

-  if (invertCRC) {
-    nand(crc, crc, crc);                         // ~c
-  }
-
  bind(L_done);
 }

@ -4175,7 +4167,8 @@ void MacroAssembler::update_1word_crc32(Register crc, Register buf, Register tab
 */
 void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
                                        Register t0,  Register t1,  Register t2,  Register t3,
-                                        Register tc0, Register tc1, Register tc2, Register tc3) {
+                                        Register tc0, Register tc1, Register tc2, Register tc3,
+                                        bool invertCRC) {
  assert_different_registers(crc, buf, len, table);

  Label L_mainLoop, L_tail;
@ -4189,14 +4182,16 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
  const int complexThreshold   = 2*mainLoop_stepping;

  // Don't test for len <= 0 here. This pathological case should not occur anyway.
-  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
-  // The situation itself is detected and handled correctly by the conditional branches
-  // following  aghi(len, -stepping) and aghi(len, +stepping).
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
+  // for all well-behaved cases. The situation itself is detected and handled correctly
+  // within update_byteLoop_crc32.
  assert(tailLoop_stepping == 1, "check tailLoop_stepping!");

  BLOCK_COMMENT("kernel_crc32_2word {");

-  nand(crc, crc, crc);                           // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }

  // Check for short (<mainLoop_stepping) buffer.
  cmpdi(CCR0, len, complexThreshold);
@ -4217,7 +4212,7 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
      blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
      mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
    }
-    update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+    update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
  }

  srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
@ -4253,9 +4248,11 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len

  // Process last few (<complexThreshold) bytes of buffer.
  BIND(L_tail);
-  update_byteLoop_crc32(crc, buf, len, table, data, false, false);
+  update_byteLoop_crc32(crc, buf, len, table, data, false);

-  nand(crc, crc, crc);                           // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
  BLOCK_COMMENT("} kernel_crc32_2word");
 }

@ -4269,7 +4266,8 @@ void MacroAssembler::kernel_crc32_2word(Register crc, Register buf, Register len
 */
 void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
                                        Register t0,  Register t1,  Register t2,  Register t3,
-                                        Register tc0, Register tc1, Register tc2, Register tc3) {
+                                        Register tc0, Register tc1, Register tc2, Register tc3,
+                                        bool invertCRC) {
  assert_different_registers(crc, buf, len, table);

  Label L_mainLoop, L_tail;
@ -4283,14 +4281,16 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
  const int complexThreshold   = 2*mainLoop_stepping;

  // Don't test for len <= 0 here. This pathological case should not occur anyway.
-  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles.
-  // The situation itself is detected and handled correctly by the conditional branches
-  // following  aghi(len, -stepping) and aghi(len, +stepping).
+  // Optimizing for it by adding a test and a branch seems to be a waste of CPU cycles
+  // for all well-behaved cases. The situation itself is detected and handled correctly
+  // within update_byteLoop_crc32.
  assert(tailLoop_stepping == 1, "check tailLoop_stepping!");

  BLOCK_COMMENT("kernel_crc32_1word {");

-  nand(crc, crc, crc);                           // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }

  // Check for short (<mainLoop_stepping) buffer.
  cmpdi(CCR0, len, complexThreshold);
@ -4311,7 +4311,7 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
      blt(CCR0, L_tail);                         // For less than one mainloop_stepping left, do only tail processing
      mr(len, tmp);                              // remaining bytes for main loop (>=mainLoop_stepping is guaranteed).
    }
-    update_byteLoop_crc32(crc, buf, tmp2, table, data, false, false);
+    update_byteLoop_crc32(crc, buf, tmp2, table, data, false);
  }

  srdi(tmp2, len, log_stepping);                 // #iterations for mainLoop
@ -4346,9 +4346,11 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len

  // Process last few (<complexThreshold) bytes of buffer.
  BIND(L_tail);
-  update_byteLoop_crc32(crc, buf, len, table, data, false, false);
+  update_byteLoop_crc32(crc, buf, len, table, data, false);

-  nand(crc, crc, crc);                           // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
  BLOCK_COMMENT("} kernel_crc32_1word");
 }

@ -4361,16 +4363,24 @@ void MacroAssembler::kernel_crc32_1word(Register crc, Register buf, Register len
 * Uses R7_ARG5, R8_ARG6 as work registers.
 */
 void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
-                                        Register t0,  Register t1,  Register t2,  Register t3) {
+                                        Register t0,  Register t1,  Register t2,  Register t3,
+                                        bool invertCRC) {
  assert_different_registers(crc, buf, len, table);

  Register  data = t0;                   // Holds the current byte to be folded into crc.

  BLOCK_COMMENT("kernel_crc32_1byte {");

-  // Process all bytes in a single-byte loop.
-  update_byteLoop_crc32(crc, buf, len, table, data, true, true);
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }

+  // Process all bytes in a single-byte loop.
+  update_byteLoop_crc32(crc, buf, len, table, data, true);
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                      // 1s complement of crc
+  }
  BLOCK_COMMENT("} kernel_crc32_1byte");
 }

@ -4388,7 +4398,8 @@ void MacroAssembler::kernel_crc32_1byte(Register crc, Register buf, Register len
 */
 void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
                                                Register constants,  Register barretConstants,
-                                                Register t0,  Register t1, Register t2, Register t3, Register t4) {
+                                                Register t0,  Register t1, Register t2, Register t3, Register t4,
+                                                bool invertCRC) {
  assert_different_registers(crc, buf, len, table);

  Label L_alignedHead, L_tail, L_alignTail, L_start, L_end;
@ -4406,13 +4417,15 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
    Register tc0 = t4;
    Register tc1 = constants;
    Register tc2 = barretConstants;
-    kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table);
+    kernel_crc32_1word(crc, buf, len, table,t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
    b(L_end);

  BIND(L_start);

    // 2. ~c
-    nand(crc, crc, crc);
+    if (invertCRC) {
+      nand(crc, crc, crc);                      // 1s complement of crc
+    }

    // 3. calculate from 0 to first 128bit-aligned address
    clrldi_(prealign, buf, 57);
@ -4421,7 +4434,7 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
    subfic(prealign, prealign, 128);

    subf(len, prealign, len);
-    update_byteLoop_crc32(crc, buf, prealign, table, t2, false, false);
+    update_byteLoop_crc32(crc, buf, prealign, table, t2, false);

    // 4. calculate from first 128bit-aligned address to last 128bit-aligned address
    BIND(L_alignedHead);
@ -4436,12 +4449,14 @@ void MacroAssembler::kernel_crc32_1word_vpmsumd(Register crc, Register buf, Regi
    cmpdi(CCR0, postalign, 0);
    beq(CCR0, L_tail);

-    update_byteLoop_crc32(crc, buf, postalign, table, t2, false, false);
+    update_byteLoop_crc32(crc, buf, postalign, table, t2, false);

    BIND(L_tail);

    // 6. ~c
-    nand(crc, crc, crc);
+    if (invertCRC) {
+      nand(crc, crc, crc);                      // 1s complement of crc
+    }

  BIND(L_end);

@ -4933,16 +4948,35 @@ void MacroAssembler::kernel_crc32_1word_aligned(Register crc, Register buf, Regi
  offsetInt -= 8;  ld(R31, offsetInt, R1_SP);
 }

-void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp) {
+void MacroAssembler::kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp, bool invertCRC) {
  assert_different_registers(crc, buf, /* len,  not used!! */ table, tmp);

  BLOCK_COMMENT("kernel_crc32_singleByte:");
-  nand(crc, crc, crc);       // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }

-  lbz(tmp, 0, buf);          // Byte from buffer, zero-extended.
+  lbz(tmp, 0, buf);                     // Byte from buffer, zero-extended.
  update_byte_crc32(crc, tmp, table);

-  nand(crc, crc, crc);       // ~c
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }
+}
+
+void MacroAssembler::kernel_crc32_singleByteReg(Register crc, Register val, Register table, bool invertCRC) {
+  assert_different_registers(crc, val, table);
+
+  BLOCK_COMMENT("kernel_crc32_singleByteReg:");
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }
+
+  update_byte_crc32(crc, val, table);
+
+  if (invertCRC) {
+    nand(crc, crc, crc);                // 1s complement of crc
+  }
 }

 // dest_lo += src1 + src2
--- a/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/macroAssembler_ppc.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -817,33 +817,47 @@ class MacroAssembler: public Assembler {
                       Register tmp6, Register tmp7, Register tmp8, Register tmp9, Register tmp10,
                       Register tmp11, Register tmp12, Register tmp13);

-  // CRC32 Intrinsics.
+  // Emitters for CRC32 calculation.
+  // A note on invertCRC:
+  //   Unfortunately, internal representation of crc differs between CRC32 and CRC32C.
+  //   CRC32 holds it's current crc value in the externally visible representation.
+  //   CRC32C holds it's current crc value in internal format, ready for updating.
+  //   Thus, the crc value must be bit-flipped before updating it in the CRC32 case.
+  //   In the CRC32C case, it must be bit-flipped when it is given to the outside world (getValue()).
+  //   The bool invertCRC parameter indicates whether bit-flipping is required before updates.
  void load_reverse_32(Register dst, Register src);
  int  crc32_table_columns(Register table, Register tc0, Register tc1, Register tc2, Register tc3);
  void fold_byte_crc32(Register crc, Register val, Register table, Register tmp);
  void fold_8bit_crc32(Register crc, Register table, Register tmp);
  void update_byte_crc32(Register crc, Register val, Register table);
  void update_byteLoop_crc32(Register crc, Register buf, Register len, Register table,
-                             Register data, bool loopAlignment, bool invertCRC);
+                             Register data, bool loopAlignment);
  void update_1word_crc32(Register crc, Register buf, Register table, int bufDisp, int bufInc,
                          Register t0,  Register t1,  Register t2,  Register t3,
                          Register tc0, Register tc1, Register tc2, Register tc3);
  void kernel_crc32_2word(Register crc, Register buf, Register len, Register table,
                          Register t0,  Register t1,  Register t2,  Register t3,
-                          Register tc0, Register tc1, Register tc2, Register tc3);
+                          Register tc0, Register tc1, Register tc2, Register tc3,
+                          bool invertCRC);
  void kernel_crc32_1word(Register crc, Register buf, Register len, Register table,
                          Register t0,  Register t1,  Register t2,  Register t3,
-                          Register tc0, Register tc1, Register tc2, Register tc3);
+                          Register tc0, Register tc1, Register tc2, Register tc3,
+                          bool invertCRC);
  void kernel_crc32_1byte(Register crc, Register buf, Register len, Register table,
-                          Register t0,  Register t1,  Register t2,  Register t3);
+                          Register t0,  Register t1,  Register t2,  Register t3,
+                          bool invertCRC);
  void kernel_crc32_1word_vpmsumd(Register crc, Register buf, Register len, Register table,
                          Register constants, Register barretConstants,
-                          Register t0,  Register t1, Register t2, Register t3, Register t4);
+                          Register t0,  Register t1, Register t2, Register t3, Register t4,
+                          bool invertCRC);
  void kernel_crc32_1word_aligned(Register crc, Register buf, Register len,
                          Register constants, Register barretConstants,
                          Register t0, Register t1, Register t2);

-  void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp);
+  void kernel_crc32_singleByte(Register crc, Register buf, Register len, Register table, Register tmp,
+                               bool invertCRC);
+  void kernel_crc32_singleByteReg(Register crc, Register val, Register table,
+                                  bool invertCRC);

  //
  // Debugging
--- a/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -3276,6 +3276,36 @@ class StubGenerator: public StubCodeGenerator {
    return start;
  }

+
+  // Compute CRC32/CRC32C function.
+  void generate_CRC_updateBytes(const char* name, Register table, bool invertCRC) {
+
+      // arguments to kernel_crc32:
+      const Register crc     = R3_ARG1;  // Current checksum, preset by caller or result from previous call.
+      const Register data    = R4_ARG2;  // source byte array
+      const Register dataLen = R5_ARG3;  // #bytes to process
+
+      const Register t0      = R2;
+      const Register t1      = R7;
+      const Register t2      = R8;
+      const Register t3      = R9;
+      const Register tc0     = R10;
+      const Register tc1     = R11;
+      const Register tc2     = R12;
+
+      BLOCK_COMMENT("Stub body {");
+      assert_different_registers(crc, data, dataLen, table);
+
+      __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table, invertCRC);
+
+      BLOCK_COMMENT("return");
+      __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
+      __ blr();
+
+      BLOCK_COMMENT("} Stub body");
+  }
+
+
  /**
   * Arguments:
   *
@ -3296,14 +3326,14 @@ class StubGenerator: public StubCodeGenerator {
    StubCodeMark mark(this, "StubRoutines", name);
    address start = __ function_entry();  // Remember stub start address (is rtn value).

+    const Register table   = R6;       // crc table address
+
+#ifdef VM_LITTLE_ENDIAN
    // arguments to kernel_crc32:
    const Register crc     = R3_ARG1;  // Current checksum, preset by caller or result from previous call.
    const Register data    = R4_ARG2;  // source byte array
    const Register dataLen = R5_ARG3;  // #bytes to process

-    const Register table   = R6;       // crc table address
-
-#ifdef VM_LITTLE_ENDIAN
    if (VM_Version::has_vpmsumb()) {
      const Register constants    = R2;  // constants address
      const Register bconstants   = R8;  // barret table address
@ -3321,7 +3351,7 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::ppc64::generate_load_crc_constants_addr(_masm, constants);
      StubRoutines::ppc64::generate_load_crc_barret_constants_addr(_masm, bconstants);

-      __ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4);
+      __ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4, true);

      BLOCK_COMMENT("return");
      __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
@ -3331,31 +3361,79 @@ class StubGenerator: public StubCodeGenerator {
    } else
 #endif
    {
-      const Register t0      = R2;
-      const Register t1      = R7;
-      const Register t2      = R8;
-      const Register t3      = R9;
-      const Register tc0     = R10;
-      const Register tc1     = R11;
-      const Register tc2     = R12;
+      StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
+      generate_CRC_updateBytes(name, table, true);
+    }
+
+    return start;
+  }
+
+
+  /**
+   * Arguments:
+   *
+   * Inputs:
+   *   R3_ARG1    - int   crc
+   *   R4_ARG2    - byte* buf
+   *   R5_ARG3    - int   length (of buffer)
+   *
+   * scratch:
+   *   R2, R6-R12
+   *
+   * Ouput:
+   *   R3_RET     - int   crc result
+   */
+  // Compute CRC32C function.
+  address generate_CRC32C_updateBytes(const char* name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ function_entry();  // Remember stub start address (is rtn value).
+
+    const Register table   = R6;       // crc table address
+
+#if 0   // no vector support yet for CRC32C
+#ifdef VM_LITTLE_ENDIAN
+    // arguments to kernel_crc32:
+    const Register crc     = R3_ARG1;  // Current checksum, preset by caller or result from previous call.
+    const Register data    = R4_ARG2;  // source byte array
+    const Register dataLen = R5_ARG3;  // #bytes to process
+
+    if (VM_Version::has_vpmsumb()) {
+      const Register constants    = R2;  // constants address
+      const Register bconstants   = R8;  // barret table address
+
+      const Register t0      = R9;
+      const Register t1      = R10;
+      const Register t2      = R11;
+      const Register t3      = R12;
+      const Register t4      = R7;

      BLOCK_COMMENT("Stub body {");
      assert_different_registers(crc, data, dataLen, table);

-      StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
+      StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
+      StubRoutines::ppc64::generate_load_crc32c_constants_addr(_masm, constants);
+      StubRoutines::ppc64::generate_load_crc32c_barret_constants_addr(_masm, bconstants);

-      __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, table);
+      __ kernel_crc32_1word_vpmsumd(crc, data, dataLen, table, constants, bconstants, t0, t1, t2, t3, t4, false);

      BLOCK_COMMENT("return");
      __ mr_if_needed(R3_RET, crc);      // Updated crc is function result. No copying required (R3_ARG1 == R3_RET).
      __ blr();

      BLOCK_COMMENT("} Stub body");
+    } else
+#endif
+#endif
+    {
+      StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
+      generate_CRC_updateBytes(name, table, false);
    }

    return start;
  }

+
  // Initialization
  void generate_initial() {
    // Generates all stubs and initializes the entry points
@ -3383,6 +3461,12 @@ class StubGenerator: public StubCodeGenerator {
      StubRoutines::_crc_table_adr    = (address)StubRoutines::ppc64::_crc_table;
      StubRoutines::_updateBytesCRC32 = generate_CRC32_updateBytes("CRC32_updateBytes");
    }
+
+    // CRC32C Intrinsics.
+    if (UseCRC32CIntrinsics) {
+      StubRoutines::_crc32c_table_addr = (address)StubRoutines::ppc64::_crc32c_table;
+      StubRoutines::_updateBytesCRC32C = generate_CRC32C_updateBytes("CRC32C_updateBytes");
+    }
  }

  void generate_all() {
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc.hpp
+++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc.hpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2002, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -55,13 +55,16 @@ class ppc64 {

  // CRC32 Intrinsics.
  static juint _crc_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
+  static juint _crc32c_table[CRC32_TABLES][CRC32_COLUMN_SIZE];
  static juint* _constants;
  static juint* _barret_constants;

 public:

  // CRC32 Intrinsics.
+  static void generate_load_table_addr(MacroAssembler* masm, Register table, address table_addr, uint64_t table_contents);
  static void generate_load_crc_table_addr(MacroAssembler* masm, Register table);
+  static void generate_load_crc32c_table_addr(MacroAssembler* masm, Register table);
  static void generate_load_crc_constants_addr(MacroAssembler* masm, Register table);
  static void generate_load_crc_barret_constants_addr(MacroAssembler* masm, Register table);
  static juint* generate_crc_constants();
--- a/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp
+++ b/hotspot/src/cpu/ppc/vm/stubRoutines_ppc_64.cpp
--- a/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/templateInterpreterGenerator_ppc.cpp
@ -1,6 +1,6 @@
 /*
 * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2015, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 2015, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -1895,7 +1895,7 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
    __ lwz(crc,  2*wordSize, argP);    // Current crc state, zero extend to 64 bit to have a clean register.

    StubRoutines::ppc64::generate_load_crc_table_addr(_masm, table);
-    __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp);
+    __ kernel_crc32_singleByte(crc, data, dataLen, table, tmp, true);

    // Restore caller sp for c2i case and return.
    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
@ -1911,6 +1911,10 @@ address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
  return NULL;
 }

+// TODO: generate_CRC32_updateBytes_entry and generate_CRC32C_updateBytes_entry are identical
+//       except for using different crc tables and some block comment strings.
+//       We should provide a common implementation.
+
 // CRC32 Intrinsics.
 /**
 * Method entry for static native methods:
@ -1987,7 +1991,7 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
    // Performance measurements show the 1word and 2word variants to be almost equivalent,
    // with very light advantages for the 1word variant. We chose the 1word variant for
    // code compactness.
-    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3);
+    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3, true);

    // Restore caller sp for c2i case and return.
    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
@ -2003,8 +2007,84 @@ address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractI
  return NULL;
 }

-// Not supported
+// CRC32C Intrinsics.
+/**
+ * Method entry for static native methods:
+ *   int java.util.zip.CRC32C.updateBytes(           int crc, byte[] b,  int off, int len)
+ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long* buf, int off, int len)
+ **/
 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+  if (UseCRC32CIntrinsics) {
+    address start = __ pc();  // Remember stub start address (is rtn value).
+
+    // We don't generate local frame and don't align stack because
+    // we not even call stub code (we generate the code inline)
+    // and there is no safepoint on this path.
+
+    // Load parameters.
+    // Z_esp is callers operand stack pointer, i.e. it points to the parameters.
+    const Register argP    = R15_esp;
+    const Register crc     = R3_ARG1;  // crc value
+    const Register data    = R4_ARG2;  // address of java byte array
+    const Register dataLen = R5_ARG3;  // source data len
+    const Register table   = R6_ARG4;  // address of crc32c table
+
+    const Register t0      = R9;       // scratch registers for crc calculation
+    const Register t1      = R10;
+    const Register t2      = R11;
+    const Register t3      = R12;
+
+    const Register tc0     = R2;       // registers to hold pre-calculated column addresses
+    const Register tc1     = R7;
+    const Register tc2     = R8;
+    const Register tc3     = table;    // table address is reconstructed at the end of kernel_crc32_* emitters
+
+    const Register tmp     = t0;       // Only used very locally to calculate byte buffer address.
+
+    // Arguments are reversed on java expression stack.
+    // Calculate address of start element.
+    if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) { // Used for "updateDirectByteBuffer".
+      BLOCK_COMMENT("CRC32C_updateDirectByteBuffer {");
+      // crc     @ (SP + 5W) (32bit)
+      // buf     @ (SP + 3W) (64bit ptr to long array)
+      // off     @ (SP + 2W) (32bit)
+      // dataLen @ (SP + 1W) (32bit)
+      // data = buf + off
+      __ ld(  data,    3*wordSize, argP);  // start of byte buffer
+      __ lwa( tmp,     2*wordSize, argP);  // byte buffer offset
+      __ lwa( dataLen, 1*wordSize, argP);  // #bytes to process
+      __ lwz( crc,     5*wordSize, argP);  // current crc state
+      __ add( data, data, tmp);            // Add byte buffer offset.
+    } else {                                                         // Used for "updateBytes update".
+      BLOCK_COMMENT("CRC32C_updateBytes {");
+      // crc     @ (SP + 4W) (32bit)
+      // buf     @ (SP + 3W) (64bit ptr to byte array)
+      // off     @ (SP + 2W) (32bit)
+      // dataLen @ (SP + 1W) (32bit)
+      // data = buf + off + base_offset
+      __ ld(  data,    3*wordSize, argP);  // start of byte buffer
+      __ lwa( tmp,     2*wordSize, argP);  // byte buffer offset
+      __ lwa( dataLen, 1*wordSize, argP);  // #bytes to process
+      __ add( data, data, tmp);            // add byte buffer offset
+      __ lwz( crc,     4*wordSize, argP);  // current crc state
+      __ addi(data, data, arrayOopDesc::base_offset_in_bytes(T_BYTE));
+    }
+
+    StubRoutines::ppc64::generate_load_crc32c_table_addr(_masm, table);
+
+    // Performance measurements show the 1word and 2word variants to be almost equivalent,
+    // with very light advantages for the 1word variant. We chose the 1word variant for
+    // code compactness.
+    __ kernel_crc32_1word(crc, data, dataLen, table, t0, t1, t2, t3, tc0, tc1, tc2, tc3, false);
+
+    // Restore caller sp for c2i case and return.
+    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
+    __ blr();
+
+    BLOCK_COMMENT("} CRC32C_update{Bytes|DirectByteBuffer}");
+    return start;
+  }
+
  return NULL;
 }

--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp
@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2016 SAP SE. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017, SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -172,18 +172,27 @@ void VM_Version::initialize() {

  assert(AllocatePrefetchStyle >= 0, "AllocatePrefetchStyle should be positive");

-  // Implementation does not use any of the vector instructions
-  // available with Power8. Their exploitation is still pending.
+  // If defined(VM_LITTLE_ENDIAN) and running on Power8 or newer hardware,
+  // the implementation uses the vector instructions available with Power8.
+  // In all other cases, the implementation uses only generally available instructions.
  if (!UseCRC32Intrinsics) {
    if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
      FLAG_SET_DEFAULT(UseCRC32Intrinsics, true);
    }
  }

-  if (UseCRC32CIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
-      warning("CRC32C intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
+  // Implementation does not use any of the vector instructions available with Power8.
+  // Their exploitation is still pending (aka "work in progress").
+  if (!UseCRC32CIntrinsics) {
+    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
+      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
+    }
+  }
+
+  // TODO: Provide implementation.
+  if (UseAdler32Intrinsics) {
+    warning("Adler32Intrinsics not available on this CPU.");
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
  }

  // The AES intrinsic stubs require AES instruction support.
@ -245,11 +254,6 @@ void VM_Version::initialize() {
    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
  }

-  if (UseAdler32Intrinsics) {
-    warning("Adler32Intrinsics not available on this CPU.");
-    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
-  }
-
  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
    UseMultiplyToLenIntrinsic = true;
  }
--- a/hotspot/src/share/vm/c1/c1_Compiler.cpp
+++ b/hotspot/src/share/vm/c1/c1_Compiler.cpp
@ -212,7 +212,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
  case vmIntrinsics::_updateCRC32:
  case vmIntrinsics::_updateBytesCRC32:
  case vmIntrinsics::_updateByteBufferCRC32:
-#if defined(SPARC) || defined(S390)
+#if defined(SPARC) || defined(S390) || defined(PPC64)
  case vmIntrinsics::_updateBytesCRC32C:
  case vmIntrinsics::_updateDirectByteBufferCRC32C:
 #endif